파이썬 파충류의 셀레니움 사용법

26022 단어 파이썬 파충류
파이썬 파충류의 셀레니움 사용법
사용 방법은 모두 프로그램의 주석에 쓰여 있으니 마음껏 드세요. 괜찮다면 좋아요를 눌러주세요. 코드는 다음과 같습니다.
"""
<>
    :                                  :
1.  set_window_size()	                        
2.  back()	                                   
3.  forward()	                               
4.  refresh()	                              
5.  clear()	                                
6.  send_keys (value)	                      
7.  click()	                                
8.  submit()	                              
9.  get_attribute(name)	                       
10. is_displayed()	                               
11. size	                                   
12. text	                                   

<>
      :                                   :                                :
1. find_element_by_id	                 find_elements_by_id	                      id  
2. find_element_by_name	                 find_elements_by_name	                      name  
3. find_element_by_xpath	             find_elements_by_xpath	                    xpath     
4. find_element_by_link_text	         find_elements_by_link_tex	                       
5. find_element_by_partial_link_text	 find_elements_by_partial_link_text	              
6. find_element_by_tag_name	             find_elements_by_tag_name	                    
7. find_element_by_class_name	         find_elements_by_class_name	                  
8. find_elements_by_css_selector	     find_elements_by_css_selector	            css       

<>
    :                                              :
1.  title_is                                              
2.  title_contains                                         
3.  presence_of_element_located                           ,      , (By.ID, 'p')
4.  presence_of_all_elements_located                        
5.  visibility_of_element_located                       ,      
6.  visibility_of                                     ,      
7.  text_to_be_present_in_element                              
8.  text_to_be_present_in_element_value                       
9.  frame_to_be_available_and_switch_to_it               
10. invisibility_of_element_located                      
11. alert_is_present                                      
12. element_to_be_clickable                              
13. element_to_be_selected                               ,      
14. element_located_to_be_selected                       ,      
15. element_selection_state_to_be                             ,    True,    False
16. element_located_selection_state_to_be                     ,    True,    False
17. staleness_of                                              DOM,             

<>
================================================================================================================
#        
chrome_options = webdriver.ChromeOptions()

#      ,      
chrome_options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})

#         ,     
chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])

#        
chrome_options.add_argument('--headless')

#        utf-8
options.add_argument('lang=zh_CN.UTF-8')

#     user-agent,        

options.add_argument('user-agent="MQQBrowser/26 Mozilla/5.0 (Linux; U; Android 2.3.7; zh-cn; MB200 Build/GRJ22; CyanogenMod-7) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"')

options.add_argument('user-agent="Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1"')

#   selenium    
browser.set_window_size(configure.windowHeight, configure.windowWidth)  # configure          

#             
prefs = {}
prefs[“credentials_enable_service”] = False
prefs[“profile.password_manager_enabled”] = False
options.add_experimental_option(“prefs”, prefs)

#          (.crx  )
extension_path = 'D:/extension/XPath-Helper_v2.0.2.crx'
chrome_options.add_extension(extension_path)

#       
self.driver = webdriver.Chrome(options=chrome_options)
================================================================================================================
"""
# -*- coding:utf-8 -*-
import time

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

__author__ = 'Evan'


class Crawler(object):

    def __init__(self, url=''):
        """
        Chrome      :
        1.      ,      
        2.         ,     
        3.        
        chrome_options = webdriver.ChromeOptions()
        chrome_options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
        chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
        chrome_options.add_argument('--headless')
        self.driver = webdriver.Chrome(options=chrome_options)

        =============================================================
        # PhantomJS           
        service_args = ['--load-images=false', '--disk-cache=true']
        self.driver = webdriver.PhantomJS(service_args=service_args)
        """
        self.source_url = url
        self.driver = webdriver.Chrome()  #        
        self.waiting = WebDriverWait(self.driver, 30)  #       30 
        self.driver.implicitly_wait(30)  #       30 
        self.actions = webdriver.ActionChains(self.driver)  #       

    def switch_to_windows(self, to_parent_windows=False):
        """
              windows  
        :param to_parent_windows:    False,     True      
        :return:
        """
        total = self.driver.window_handles
        if to_parent_windows:
            self.driver.switch_to.window(total[0])
        else:
            current_windows = self.driver.current_window_handle
            for window in total:
                if window != current_windows:
                    self.driver.switch_to.window(window)

    def switch_to_frame(self, index=0, to_parent_frame=False, to_default_frame=False):
        """
              frame  
        :param index: expect by frame index value or id or name or element
        :param to_parent_frame:    False,     True       frame  
        :param to_default_frame:    False,     True        frame  
        :return:
        """
        if to_parent_frame:
            self.driver.switch_to.parent_frame()
        elif to_default_frame:
            self.driver.switch_to.default_content()
        else:
            self.driver.switch_to.frame(index)

    def open_new_windows(self, new_url=''):
        """
              windows  
        :param new_url:   URL
        :return:
        """
        js = "window.open({})".format(new_url)
        self.driver.execute_script(js)
        time.sleep(2)

    def page_scrolling(self, go_to_bottom=False, rolling_distance=(0, 1000)):
        """
            ,        ,    (              )
        :param bool go_to_bottom:    False,   True            
        :param tuple rolling_distance:     ,       1000  
        :return:
        """
        time.sleep(5)
        if go_to_bottom:
            js = "window.scrollTo(0, document.body.scrollHeight)"
        else:
            js = "window.scrollBy({}, {})".format(rolling_distance[0], rolling_distance[1])
        self.driver.execute_script(js)

    def screen_shot(self, picture_name='example.jpg'):
        """
                    
        :param picture_name:        
        :return:
        """
        self.driver.save_screenshot(picture_name)

    def action_chain(self, source, target):
        """
              
        :param source:      
        :param target:      
        :return:
        """
        self.actions.drag_and_drop(source, target)
        self.actions.perform()

    def close_current_windows(self):
        #       
        if self.driver:
            self.driver.close()

    def quit_browser(self):
        #       
        if self.driver:
            self.driver.quit()

    def main(self):
        #     
        self.driver.get(self.source_url)

        #     
        self.driver.find_element_by_xpath('//*[@id="kw"]')  #   xpath  
        input_box = self.waiting.until(EC.presence_of_element_located((By.XPATH, '//*[@id="kw"]')))  #         

        #       
        print(input_box.get_attribute('class'))  #      class   
        print(input_box.id)  #      id 
        print(input_box.text)  #         
        print(input_box.location)  #              
        print(input_box.tag_name)  #          
        print(input_box.size)  #        
        #       
        print(self.driver.current_url)  #      URL
        print(self.driver.get_cookies())  #      Cookies
        print(self.driver.page_source)  #        

        #     
        input_box.clear()  #     
        input_box.send_keys('python')  #     
        input_box.send_keys(Keys.ENTER)  #     
        #     
        self.driver.back()  #     
        time.sleep(1)
        self.driver.forward()  #     
        #     
        self.page_scrolling()  #   javascript
        #    
        source = self.driver.find_element_by_xpath('//*[@id="result_logo"]/img[1]')
        target = self.driver.find_element_by_xpath('//*[@id="kw"]')
        self.action_chain(source=source, target=target)  #     


if __name__ == '__main__':
    crawler = Crawler(url='https://www.baidu.com')
    crawler.main()

좋은 웹페이지 즐겨찾기