PyQuery 노트

69757 단어 python 파충류
1. 초기화
1.1 문자열 초기화
from pyquery import PyQuery as py

html = '''

   

       
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''

    doc = py(html)
    print(doc('li'))

    1.2 URL 초기화
    from pyquery import PyQuery as py

    # URL
    doc = py(url='http://www.baidu.com')
    #
    print(type(doc('title')))
    # head
    print(doc('head'))

    1.3 파일의 초기화
    from pyquery import PyQuery as py

    #
    doc = py(filename='demo1.html')
    #
    print(type(doc('li')))
    # li
    print(doc('li'))

    2. 기본 CSS 선택기
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''

    doc = py(html)
    # id container class list li
    print(doc('#container .list li'))

    3. 요소 찾기
    3.1 서브 요소
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''
    doc = py(html)
    # class list
    items = doc('.list')
    #
    print(type(items))
    print(items)
    # li
    lis = items.find('li')
    #
    print(type(lis))
    print(lis)
    #
    lis2 = items.children()
    print(type(lis2))
    print(lis2)
    # class active
    li3 = items.children('.active')
    print(li3)

    3.2 부모 요소
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''
    doc = py(html)
    # class list
    items = doc('.list')
    #
    container = items.parent()
    print(type(container))
    print(container)
    print("==========================")
    #
    parents = items.parents()
    print(type(parents))
    print(parents)
    print("==========================")
    # class container
    parent = items.parents('.container')
    print(parent)

    3.3 형제 요소
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''
    doc = py(html)
    # class list
    items = doc('.list')
    li = doc('.list .item-0.active')
    # ( )
    print(li.siblings())
    # class active ( )
    print(li.siblings('.active'))

    4、두루
    4.1 개별 요소
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''
    doc = py(html)

    #
    li = doc('.item-0.active')
    print(li)

    4.2 여러 요소
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''
    doc = py(html)

    # li
    lis = doc('li').items()
    #
    print(type(lis))
    forli inlis:
       print(li)

    5. 정보 얻기
    5.1 속성 획득
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''
    doc = py(html)
    a = doc('.item-0.active a')
    # third item
    print(a)
    # link3.html     href
    print(a.attr('href'))
    # link3.html
    print(a.attr.href)

    5.2 텍스트 가져오기
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''
    doc = py(html)
    a = doc('.item-0.active a')
    # third item
    print(a)
    # a
    print(a.text())

    5.3 HTML 가져오기
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''
    doc = py(html)
    li = doc('.item-1.active')
    #
  • fourth item

  • print(li)
    # li HTML
    print(li.html())

    6. DOM 작업
    6.1、addClass、removeClass
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''
    doc = py(html)
    li = doc('.item-0.active')
    print(li)
    # class
    li.removeClass('active')
    print(li)
    # class
    li.addClass('active')
    print(li)

    6.2、attr、css
    from pyquery import PyQuery as py

    html = '''

       

           
  • first item

  •        
  • second item

  •        
  • third item

  •        
  • fourth item

  •        
  • fifth item

  •    

    '''
    doc = py(html)
    li = doc('.item-0.active')
    print(li)
    # name
    li.attr('name', 'link')
    print(li)
    # css
    li.css('font-size', '14px')
    print(li)

    6.3、remove
    from pyquery import PyQuery as py

    html = '''

       Hello, World
       

    This is a paragraph.



    '''
    doc = py(html)
    wrap = doc('.wrap')
    print(wrap.text())
    # p
    wrap.find('p').remove()
    print(wrap.text())

    6.4 기타 DOM 방법
    http://pyquery.readthedocs.io/en/latest/api.html
    7. 위조 선택기
    from pyquery import PyQuery as py

    html = '''

       

           

               
  • first item

  •            
  • second item

  •            
  • third item

  •            
  • fourth item

  •            
  • fifth item

  •        
       


    '''

    doc = py(html)
    # li
    li = doc('li:first-child')
    print(li)
    # li
    li = doc('li:last-child')
    print(li)
    # li
    li = doc('li:nth-child(2)')
    print(li)
    # li
    li = doc('li:gt(2)')
    print(li)
    # li
    li = doc('li:nth-child(2n)')
    print(li)
    # second li
    li = doc('li:contains(second)')
    print(li)

    더 많은 선택기
    http://www.w3school.com.cn/css/index.asp
    8. 공식 문서
    http://pyquery.readthedocs.io/

    좋은 웹페이지 즐겨찾기