从一个启动浏览器并打开百度网页的代码开始

  1. from selenium import webdriver
  2. driver = webdriver.chrome()
  3. driver.get('https://www.baidu.com')

from selenium import webdriver
​ 这代码表示从selenium导入webdriver。进入selenium, 发现webdriver是一个包,那么导入的其实是webdriver包下的`___init__.py`文件

  1. from .firefox.webdriver import WebDriver as Firefox # noqa
  2. from .firefox.firefox_profile import FirefoxProfile # noqa
  3. from .firefox.options import Options as FirefoxOptions # noqa
    #实例化的是.chrome.webdriver里的webDriver
  4. from .chrome.webdriver import WebDriver as Chrome # noqa
  5. from .chrome.options import Options as ChromeOptions # noqa
  6. from .ie.webdriver import WebDriver as Ie # noqa
  7. from .ie.options import Options as IeOptions # noqa
  8. from .edge.webdriver import WebDriver as Edge # noqa
  9. from .opera.webdriver import WebDriver as Opera # noqa
  10. from .safari.webdriver import WebDriver as Safari # noqa
  11. from .blackberry.webdriver import WebDriver as BlackBerry #noqa
  12. from .phantomjs.webdriver import WebDriver as PhantomJS # noqa
  13. from .android.webdriver import WebDriver as Android # noqa
  14. from .webkitgtk.webdriver import WebDriver as WebKitGTK # noqa
  15. from .webkitgtk.options import Options as WebKitGTKOptions
  16. from .remote.webdriver import WebDriver as Remote # noqa
  17. from .common.desired_capabilities import DesiredCapabilities
  18. from .common.action_chains import ActionChains # noqa
  19. from .common.touch_actions import TouchActions # noqa
  20. from .common.proxy import Proxy # noqa

打开chrome.webdriver文件,下面只展示出相关代码

  1. #selenium/webdriver/chrome/webdriver.py
  2. import warnings
  3. from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
  4. from .remote_connection import ChromeRemoteConnection
  5. from .service import Service
  6. from .options import Options
  7. class WebDriver(RemoteWebDriver):
  8.  
  9. def __init__(self, executable_path="chromedriver", port=0,
  10. options=None, service_args=None,
  11. desired_capabilities=None, service_log_path=None,
  12. chrome_options=None):
  13. """
  14. 参数:
  15. - executable_path - chromedriver的执行路径 默认在环境变里中查找
  16. - port -http连接的端口号
  17. - desired_capabilities: 一般浏览器的字典对象
  18. - options: ChromeOptions的实例
  19. """
  20.  
  21. #………………………………省略…………………………………………
  22. #第1步 实例化一个Service对象
  23. self.service = Service(
  24. executable_path,
  25. port=port,
  26. service_args=service_args,
  27. log_path=service_log_path)
  28. #第2步 调用了service的start方法
  29. self.service.start()
  30. #………………………………省略…………………………………………

WebDriver构造方法中最先实例化Service类,我们实例化chrome() 并没有参数,所以Service 的参数 executable_path="chromedriver" port=0,其余都是None

打开Chrome目录Service文件, 只有以下代码

  1. #selenium/webdriver/chrome/service.py
  2. from selenium.webdriver.common import service
  3.  
  4. class Service(service.Service):
  5. """
  6. 实例化Service对象 管理ChromeDriver的启动和停止
  7. """
  8. def __init__(self, executable_path, port=0, service_args=None,
  9. log_path=None, env=None):
  10. """
  11. 参数:
  12. - service_args : chromedriver 的参数 列表形式
  13. - log_path : chromedriver的日志路径
  14. """
  15.  
  16. self.service_args = service_args or []
  17. if log_path:
  18. self.service_args.append('--log-path=%s' % log_path)
  19. #第1步 调用复类的构造方法
  20. service.Service.__init__(self, executable_path, port=port, env=env,
  21. start_error_message="Please see https://sites.google.com/a/chromium.org/chromedriver/home")
  22.  
  23. #重写父类方法 获取命令行的参数
  24. def command_line_args(self):
  25. return ["--port=%d" % self.port] + self.service_args

该类继承了selenium.webdriver.common目录下 service 类,并重写了父类的command_line_args方法。构造方法中调用了父类的构造方法。

  1. #selenium/webdriver/common/service.py
  2. import errno
  3. import os
  4. import platform
  5. import subprocess
  6. from subprocess import PIPE
  7. import time
  8. from selenium.common.exceptions import WebDriverException
  9. from selenium.webdriver.common import utils
  10.  
  11. try:
  12. from subprocess import DEVNULL
  13. _HAS_NATIVE_DEVNULL = True
  14. except ImportError:
  15. DEVNULL = -3
  16. _HAS_NATIVE_DEVNULL = False
  17.  
  18. class Service(object):
  19.  
  20. def __init__(self, executable, port=0, log_file=DEVNULL, env=None, start_error_message=""):
  21. self.path = executable
  22.  
  23. self.port = port
  24. #默认自动获取一个端口
  25. if self.port == 0:
  26. self.port = utils.free_port()
  27.  
  28. if not _HAS_NATIVE_DEVNULL and log_file == DEVNULL:
  29. log_file = open(os.devnull, 'wb')
  30.  
  31. self.start_error_message = start_error_message
  32. self.log_file = log_file
  33. #默认获取系统的环境变量
  34. self.env = env or os.environ
  35.  
  36. @property
  37. def service_url(self):
  38. """
  39. Gets the url of the Service
  40. """
  41. return "http://%s" % utils.join_host_port('localhost', self.port)
  42.  
  43. def command_line_args(self):
  44. raise NotImplemented("This method needs to be implemented in a sub class")
  45.  
  46. def start(self):
  47. """
  48. Starts the Service.
  49.  
  50. :Exceptions:
  51. - WebDriverException : Raised either when it can't start the service
  52. or when it can't connect to the service
  53. """
  54. try:
  55. #启动chromedriver程序 参数为 --port=端口号 输入输出到devnull空设备
  56. cmd = [self.path]
  57. cmd.extend(self.command_line_args())
  58. self.process = subprocess.Popen(cmd, env=self.env,
  59. close_fds=platform.system() != 'Windows',
  60. stdout=self.log_file,
  61. stderr=self.log_file,
  62. stdin=PIPE)
  63. except TypeError:
  64. raise
  65. except OSError as err:
  66. if err.errno == errno.ENOENT:
  67. raise WebDriverException(
  68. "'%s' executable needs to be in PATH. %s" % (
  69. os.path.basename(self.path), self.start_error_message)
  70. )
  71. elif err.errno == errno.EACCES:
  72. raise WebDriverException(
  73. "'%s' executable may have wrong permissions. %s" % (
  74. os.path.basename(self.path), self.start_error_message)
  75. )
  76. else:
  77. raise
  78. except Exception as e:
  79. raise WebDriverException(
  80. "The executable %s needs to be available in the path. %s\n%s" %
  81. (os.path.basename(self.path), self.start_error_message, str(e)))
  82. count = 0
  83. #检测是否subprocess进程是否还在,不在则抛出异常
  84. #检测是否http协议是否链接 若无链接等待30秒抛出异常
  85. while True:
  86. self.assert_process_still_running()
  87. if self.is_connectable():
  88. break
  89. count += 1
  90. time.sleep(1)
  91. if count == 30:
  92. raise WebDriverException("Can not connect to the Service %s" % self.path)
  93.  
  94. def assert_process_still_running(self):
  95. return_code = self.process.poll()
  96. if return_code is not None:
  97. raise WebDriverException(
  98. 'Service %s unexpectedly exited. Status code was: %s'
  99. % (self.path, return_code)
  100. )
       #判断是否正在连接,等待30秒后抛出webdriver异常
  101. def is_connectable(self):
  102. return utils.is_connectable(self.port)

由上代码可知Serivce的实例化 获取一个端口。
然后调用了service对象的start方法。该方法用subprocess启动chromedriver程序 并检测是否正在连接。
现在再来看最开始chrome 的webDriver类,  此类继承了selenium.webdriver.remote下的webdriver并调用了父类的构造方法。

  1. #selenium/webdriver/remote/webdriver.py
  2. import warnings
  3. from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
  4. from .remote_connection import ChromeRemoteConnection
  5. from .service import Service
  6. from .options import Options
  7. class WebDriver(RemoteWebDriver):
  8. """
  9. Controls the ChromeDriver and allows you to drive the browser.
  10.  
  11. You will need to download the ChromeDriver executable from
  12. http://chromedriver.storage.googleapis.com/index.html
  13. """
  14. def __init__(self, executable_path="chromedriver", port=0,
  15. options=None, service_args=None,
  16. desired_capabilities=None, service_log_path=None,
  17. chrome_options=None):
  18. #………………………………省略…………………………………………
  19.  
  20. if options is None:
  21. # desired_capabilities stays as passed in
  22. if desired_capabilities is None:
  23. #第1步 创建一个浏览器的字典对象
  24. desired_capabilities = self.create_options().to_capabilities()
  25. else:
  26. if desired_capabilities is None:
  27. desired_capabilities = options.to_capabilities()
  28. else:
  29. desired_capabilities.update(options.to_capabilities())
  30. #………………………………省略…………………………………………
  31.  
  32. #第二步调用 复类的构造方法
  33. try:
  34. RemoteWebDriver.__init__(
  35. self,
  36. command_executor=ChromeRemoteConnection(
  37. remote_server_addr=self.service.service_url),
  38. desired_capabilities=desired_capabilities)
  39. except Exception:
  40. self.quit()
  41. raise
  42. self._is_remote = False
  43.  
  44. def create_options(self):
  45. return Options()

首先创建一个浏览器的字典对象,然后调用了to_capabilities()方法。

Options的to_capabilities()方法是返回一个caps字典对象

chrome浏览器返回的caps字典对象为:
    {
     'browserName': 'chrome',
     'version': '',
     'platform': 'ANY',
     'goog:chromeOptions': {'extensions': [], 'args': []}
     }

接下来看看 RemoteWebDriver的构造方法

  1. RemoteWebDriver.__init__(
  2. self,
  3. command_executor=ChromeRemoteConnection(
  4. remote_server_addr=self.service.service_url),
  5. desired_capabilities=desired_capabilities)

传入了2个参数 一个是 ChromeRemoteConnection类的实例对象,  一个是前面获取到的浏览器字典对象。
来看看ChromeRemoteConnection类。继承了RemoteConnection,调用了父类的构造方法并往self._commands添加里几个command键值对

  1. #selenium/webdriver/chrome/remote_connection.py
  2. from selenium.webdriver.remote.remote_connection import RemoteConnection
  3.  
  4. class ChromeRemoteConnection(RemoteConnection):
  5.  
  6. def __init__(self, remote_server_addr, keep_alive=True):
  7. RemoteConnection.__init__(self, remote_server_addr, keep_alive)
  8. self._commands["launchApp"] = ('POST', '/session/$sessionId/chromium/launch_app')
  9. self._commands["setNetworkConditions"] = ('POST', '/session/$sessionId/chromium/network_conditions')
  10. self._commands["getNetworkConditions"] = ('GET', '/session/$sessionId/chromium/network_conditions')
  1. #selenium/webdriver/remote/remote_connection.py
  2. class RemoteConnection(object):
  3. """A connection with the Remote WebDriver server.
  4.  
  5. Communicates with the server using the WebDriver wire protocol:
  6. https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol"""
  7.  
  8. def __init__(self, remote_server_addr, keep_alive=False, resolve_ip=True):
  9. # Attempt to resolve the hostname and get an IP address.
  10. self.keep_alive = keep_alive
  11. parsed_url = parse.urlparse(remote_server_addr)
  12. addr = parsed_url.hostname
  13. if parsed_url.hostname and resolve_ip:
  14. port = parsed_url.port or None
  15. if parsed_url.scheme == "https":
  16. ip = parsed_url.hostname
  17. elif port and not common_utils.is_connectable(port, parsed_url.hostname):
  18. ip = None
  19. LOGGER.info('Could not connect to port {} on host '
  20. '{}'.format(port, parsed_url.hostname))
  21. else:
  22. ip = common_utils.find_connectable_ip(parsed_url.hostname,
  23. port=port)
  24. if ip:
  25. netloc = ip
  26. addr = netloc
  27. if parsed_url.port:
  28. netloc = common_utils.join_host_port(netloc,
  29. parsed_url.port)
  30. if parsed_url.username:
  31. auth = parsed_url.username
  32. if parsed_url.password:
  33. auth += ':%s' % parsed_url.password
  34. netloc = '%s@%s' % (auth, netloc)
  35. remote_server_addr = parse.urlunparse(
  36. (parsed_url.scheme, netloc, parsed_url.path,
  37. parsed_url.params, parsed_url.query, parsed_url.fragment))
  38. else:
  39. LOGGER.info('Could not get IP address for host: %s' %
  40. parsed_url.hostname)
  41.  
  42. self._url = remote_server_addr
  43. if keep_alive:
  44. self._conn = httplib.HTTPConnection(
  45. str(addr), str(parsed_url.port), timeout=self._timeout)
  46.  
  47. self._commands = {
  48. Command.STATUS: ('GET', '/status'),
  49. Command.NEW_SESSION: ('POST', '/session'),
  50. Command.GET_ALL_SESSIONS: ('GET', '/sessions'),
  51. Command.QUIT: ('DELETE', '/session/$sessionId'),
  52. Command.GET_CURRENT_WINDOW_HANDLE:
  53. ('GET', '/session/$sessionId/window_handle'),
  54. Command.W3C_GET_CURRENT_WINDOW_HANDLE:
  55. ('GET', '/session/$sessionId/window'),
  56. Command.GET_WINDOW_HANDLES:
  57. ('GET', '/session/$sessionId/window_handles'),
  58.  
  59. #................省略.................
  60. }
  61.  
  62. #最终发送命令到远程服务器的方法
  63. def execute(self, command, params):
  64. command_info = self._commands[command]
  65. assert command_info is not None, 'Unrecognised command %s' % command
  66. path = string.Template(command_info[1]).substitute(params)
  67. if hasattr(self, 'w3c') and self.w3c and isinstance(params, dict) and 'sessionId' in params:
  68. del params['sessionId']
  69. data = utils.dump_json(params)
  70. url = '%s%s' % (self._url, path)
  71. return self._request(command_info[0], url, body=data)
  72.  
  73. #返回带有JSON解析的字典
  74. def _request(self, method, url, body=None):
  75. """
  76. Send an HTTP request to the remote server.
  77.  
  78. :Args:
  79. - method - A string for the HTTP method to send the request with.
  80. - url - A string for the URL to send the request to.
  81. - body - A string for request body. Ignored unless method is POST or PUT.
  82.  
  83. :Returns:
  84. A dictionary with the server's parsed JSON response.
  85. """
  86. LOGGER.debug('%s %s %s' % (method, url, body))
  87.  
  88. parsed_url = parse.urlparse(url)
  89. headers = self.get_remote_connection_headers(parsed_url, self.keep_alive)
  90. resp = None
  91. if body and method != 'POST' and method != 'PUT':
  92. body = None
  93.  
  94. if self.keep_alive:
  95. resp = self._conn.request(method, url, body=body, headers=headers)
  96.  
  97. statuscode = resp.status
  98. else:
  99. http = urllib3.PoolManager(timeout=self._timeout)
  100. resp = http.request(method, url, body=body, headers=headers)
  101.  
  102. statuscode = resp.status
  103. if not hasattr(resp, 'getheader'):
  104. if hasattr(resp.headers, 'getheader'):
  105. resp.getheader = lambda x: resp.headers.getheader(x)
  106. elif hasattr(resp.headers, 'get'):
  107. resp.getheader = lambda x: resp.headers.get(x)
  108.  
  109. data = resp.data.decode('UTF-8')
  110. try:
  111. if 300 <= statuscode < 304:
  112. return self._request('GET', resp.getheader('location'))
  113. if 399 < statuscode <= 500:
  114. return {'status': statuscode, 'value': data}
  115. content_type = []
  116. if resp.getheader('Content-Type') is not None:
  117. content_type = resp.getheader('Content-Type').split(';')
  118. if not any([x.startswith('image/png') for x in content_type]):
  119.  
  120. try:
  121. data = utils.load_json(data.strip())
  122. except ValueError:
  123. if 199 < statuscode < 300:
  124. status = ErrorCode.SUCCESS
  125. else:
  126. status = ErrorCode.UNKNOWN_ERROR
  127. return {'status': status, 'value': data.strip()}
  128.  
  129. # Some of the drivers incorrectly return a response
  130. # with no 'value' field when they should return null.
  131. if 'value' not in data:
  132. data['value'] = None
  133. return data
  134. else:
  135. data = {'status': 0, 'value': data}
  136. return data
  137. finally:
  138. LOGGER.debug("Finished Request")
  139. resp.close()

构造方法中主要是把localhost域名换成127.0.0.1,通过urllib.parse.urlparse把要处理的url解析6大部分。

urlparse返回的是一个名字元组对象scheme, netloc, path, params, query, fragment。netloc包括hostname和port。

调用 common_utils.find_connectable_ip()方法获取hostname对应的ip地址,最后urllib.parse.urlunparse()重新组成url并赋值给self._url

初始化里self._commands 字典,value为具体执行的命令的字典。

RemoteConnection类的实例方法execute调用 _request方法最终实现发送命令到远程服务器。

他们是通过wire protocol有线协议 这种协议是点对点方式进行通信的。首先前端将这个点击转换成json格式的字符串,然后通过wire protocl协议传递给服务器

RemoteWebDriver类的构造方法 更新capabilities字典 主要调用start_session传入capabilities字典

start_session方法 根据capabilities字典创建一个新的会话并获取session_id。

另外还实例化了错误处理handle,文件查找file_detector(默认实例化是LocalFileDetector)。一个页面切换的SwitchTo对象。

  1. #selenium/webdriver/remote/webdriver.py
  2. class WebDriver(object):
  3.  
  4. _web_element_cls = WebElement
  5.  
  6. def __init__(self, command_executor='http://127.0.0.1:4444/wd/hub',
  7. desired_capabilities=None, browser_profile=None, proxy=None,
  8. keep_alive=False, file_detector=None, options=None):
  9. """
  10. 创建一个driver使用 wire协议发送命令
  11. 参数:
  12. - command_executor - 远程服务器的url 'http://127.0.0.1:端口号'
  13. - desired_capabilities - A dictionary of capabilities to request when
  14. starting the browser session. 必选参数
  15. - proxy - 一个selenium.webdriver.common.proxy.Proxy 对象. 可选的
  16. - file_detector - 自定义文件检测器对象. 默认使用LocalFileDetector()
  17. - options - options.Options类的实例
  18. """
  19. capabilities = {}
  20. if options is not None:
  21. capabilities = options.to_capabilities()
  22. if desired_capabilities is not None:
  23. if not isinstance(desired_capabilities, dict):
  24. raise WebDriverException("Desired Capabilities must be a dictionary")
  25. else:
  26. #更新capabilities字典
  27. capabilities.update(desired_capabilities)
  28. if proxy is not None:
  29. warnings.warn("Please use FirefoxOptions to set proxy",
  30. DeprecationWarning)
  31. proxy.add_to_capabilities(capabilities)
  32. self.command_executor = command_executor
  33. if type(self.command_executor) is bytes or isinstance(self.command_executor, str):
  34. self.command_executor = RemoteConnection(command_executor, keep_alive=keep_alive)
  35. self._is_remote = True
  36. #控制浏览器会话的字符串id
  37. self.session_id = None
  38. self.capabilities = {}
  39. #errorhandler.ErrorHandler 处理错误的handler
  40. self.error_handler = ErrorHandler()
  41. self.start_client()
  42. if browser_profile is not None:
  43. warnings.warn("Please use FirefoxOptions to set browser profile",
  44. DeprecationWarning)
  45. #核心代码 开始一个会话
  46. self.start_session(capabilities, browser_profile)
  47. #实例化页面切换对象
  48. self._switch_to = SwitchTo(self)
  49. #app
  50. self._mobile = Mobile(self)
  51. #默认实例化LocalFileDetector对象
  52. self.file_detector = file_detector or LocalFileDetector
  53.  
  54. def start_session(self, capabilities, browser_profile=None):
  55. """
  56. 根据capabilities字典创建一个新的会话
  57. browser_profile FirefoxProfile的一个对象 只有火狐浏览器
  58. """
  59. if not isinstance(capabilities, dict):
  60. raise InvalidArgumentException("Capabilities must be a dictionary")
  61. if browser_profile:
  62. if "moz:firefoxOptions" in capabilities:
  63. capabilities["moz:firefoxOptions"]["profile"] = browser_profile.encoded
  64. else:
  65. capabilities.update({'firefox_profile': browser_profile.encoded})
  66. """
  67. _make_w3c_caps return dict
  68. {
  69. "firstMatch": [{}],
  70. "alwaysMatch": {
  71. 'browserName': 'chrome',
  72. 'version': '',
  73. 'platformName': 'any',
  74. 'goog:chromeOptions': {'extensions': [], 'args': []}
  75. }
  76. }
  77. """
  78. w3c_caps = _make_w3c_caps(capabilities)
  79. parameters = {"capabilities": w3c_caps,
  80. "desiredCapabilities": capabilities}
  81. #Command.NEW_SESSION: ('POST', '/session'),
  82. response = self.execute(Command.NEW_SESSION, parameters)
  83. if 'sessionId' not in response:
  84. response = response['value']
  85. #获取session_id
  86. self.session_id = response['sessionId']
  87.  
  88. self.capabilities = response.get('value')
  89.  
  90. # if capabilities is none we are probably speaking to
  91. # a W3C endpoint
  92. if self.capabilities is None:
  93. self.capabilities = response.get('capabilities')
  94.  
  95. # Double check to see if we have a W3C Compliant browser
  96. self.w3c = response.get('status') is None
  97. self.command_executor.w3c = self.w3c
  98.  
  99. def _make_w3c_caps(caps):
  100. """Makes a W3C alwaysMatch capabilities object.
  101.  
  102. Filters out capability names that are not in the W3C spec. Spec-compliant
  103. drivers will reject requests containing unknown capability names.
  104.  
  105. Moves the Firefox profile, if present, from the old location to the new Firefox
  106. options object.
  107.  
  108. :Args:
  109. - caps - A dictionary of capabilities requested by the caller.
  110. """
  111. #深拷贝
  112. caps = copy.deepcopy(caps)
  113. #因为浏览器chrome 所以profile为None
  114. profile = caps.get('firefox_profile')
  115. always_match = {}
  116.  
  117. if caps.get('proxy') and caps['proxy'].get('proxyType'):
  118. caps['proxy']['proxyType'] = caps['proxy']['proxyType'].lower()
  119.  
  120. for k, v in caps.items():
  121. #如果caps的key 在_OSS_W3C_CONVERSION key中 而且caps的key对应的值不为空
  122. if v and k in _OSS_W3C_CONVERSION:
  123. #always_match的key 为_OSS_W3C_CONVERSION字典的值 value是caps字典的值
  124. always_match[_OSS_W3C_CONVERSION[k]] = v.lower() if k == 'platform' else v
  125. if k in _W3C_CAPABILITY_NAMES or ':' in k:
  126. always_match[k] = v
  127. if profile:
  128. moz_opts = always_match.get('moz:firefoxOptions', {})
  129. # If it's already present, assume the caller did that intentionally.
  130. if 'profile' not in moz_opts:
  131. # Don't mutate the original capabilities.
  132. new_opts = copy.deepcopy(moz_opts)
  133. new_opts['profile'] = profile
  134. always_match['moz:firefoxOptions'] = new_opts
  135. return {"firstMatch": [{}], "alwaysMatch": always_match}
  136.  
  137. _OSS_W3C_CONVERSION = {
  138. 'acceptSslCerts': 'acceptInsecureCerts',
  139. 'version': 'browserVersion',
  140. 'platform': 'platformName'
  141. }
  142. #通过self.command_executor.execute发送cmd命令到远程服务器达到控制浏览器的目标。
  143.  
  144. def execute(self, driver_command, params=None):
  145. """
  146. 通过command.CommandExecutor执行driver_command命令
  147. 返回一个字典对象 里面装着JSON response
  148. """
  149. if self.session_id is not None:
  150. if not params:
  151. params = {'sessionId': self.session_id}
  152. elif 'sessionId' not in params:
  153. params['sessionId'] = self.session_id
  154.  
  155. #数据封包
  156. params = self._wrap_value(params)
  157. #核心代码 执行cmmand_executor实例对象的execute方法
  158. response = self.command_executor.execute(driver_command, params)
  159. if response:
  160. self.error_handler.check_response(response)
  161. #数据解包
  162. response['value'] = self._unwrap_value(
  163. response.get('value', None))
  164. return response
  165. # If the server doesn't send a response, assume the command was
  166. # a success
  167. return {'success': 0, 'value': None, 'sessionId': self.session_id}

driver.get('https://www.baidu.com')调用的是webdriver/remote/webdriver.py下的get方法
get方法调用了remote_connection.py中execute的方法,remote_connection.py中execute的方法中self.command_executor.execute实际调用的是RemoteConnection.py的execute方法。
实际上是一个HTTP request给监听端口上的Web Service, 在我们的HTTP request的body中,会以WebDriver Wire协议规定的JSON格式的字符串来告诉Selenium我们希望浏览器打开'https://www.baidu.com'页面

  1. #selenium/webdriver/remote/webdriver.py
  2.  
  3. def get(self, url):
  4. """
  5. Loads a web page in the current browser session.
  6. """
  7. #Command.GET: ('POST', '/session/$sessionId/url'),
  8. self.execute(Command.GET, {'url': url})

总结一下:
首先是webdriver实例化Service 类调用start()方法用subprocess启动chromedriver(带--port参数)驱动。chromedriver启动之后都会在绑定的端口启动Web Service。

接着实例化RemoteConnection获得 command_executor实例化对象 传入给RemoteWebDriver构造方法。

RemoteWebDriver构造方法 start_session()方法启动session并获得唯一的session_id,通过这个session_id来确定找到对方且在多线程并行的时候彼此之间不会有冲突和干扰)

接下来调用WebDriver的任何API,比如get()  都需要借助一个ComandExecutor(remote_connection类的实例对象)调用execute()发送一个命令(这个命令在ComandExecutor实例化时候生成的一个command字典)。

  1. #部分
  2. self._commands = {
  3. Command.STATUS: ('GET', '/status'),
  4. Command.NEW_SESSION: ('POST', '/session'),
  5. Command.GET_ALL_SESSIONS: ('GET', '/sessions'),
  6. Command.QUIT: ('DELETE', '/session/$sessionId'),
  7. Command.GET_CURRENT_WINDOW_HANDLE:
  8. ('GET', '/session/$sessionId/window_handle'),
  9. Command.W3C_GET_CURRENT_WINDOW_HANDLE:
  10. ('GET', '/session/$sessionId/window'),
  11. Command.GET_WINDOW_HANDLES:
  12. ('GET', '/session/$sessionId/window_handles'),
  13.  
  14. #.................省略.....................
  15. }

ComandExecutor中的execute()方法最后返回一个_request()方法,实际上是一个HTTP request给监听端口上的Web Service。

在HTTP request的body中,Wire JSON格式字典来告诉chromedriver接下来做什么事。(通过之前绑定的端口)
实际的执行者是chromedriver驱动,而selenium就相当于一个代理。所以selenium并不是直接操控浏览器而是运行webdriver, 通过webdriver间接操控浏览器。

在现实生活中这类似打出租车,我们告诉司机目的地是哪?走哪条路到达?webdriver就相当于出租车司机。

初探selenium3原理的更多相关文章

  1. CSS Spritec下载,精灵图,雪碧图,初探之原理、使用

    CSS Spritec下载,精灵图,雪碧图,初探之原理.使用 关于CSS Sprite CSSSprites在国内很多人叫css精灵雪碧图,是一种网页图片应用处理方式.它允许你将一个页面涉及到的所有零 ...

  2. CSS Sprite初探之原理、使用

    CSS Sprite简介: 利用CSS Sprites能很好地减少了网页的http请求次数,从而大大的提高了页面的性能,节省时间和带宽.CSS Sprites在国内很多人叫css精灵, 是一种网页图片 ...

  3. Kubernetes初探:原理及实践应用

    总体概览 如下图所示是我初步阅读文档和源代码之后整理的总体概览,基本上可以从如下三个维度来认识Kubernetes. 操作对象 Kubernetes以RESTFul形式开放接口,用户可操作的REST对 ...

  4. lucene 初探

    前言: window文件管理右上角, 有个搜索功能, 可以根据文件名进行搜索. 那如果从文件名上判断不出内容, 我岂不是要一个一个的打开文件, 查看文件的内容, 去判断是否是我要的文件? 几个, 十几 ...

  5. 初探Windows用户态调试机制

    我们在感叹Onlydbg强大与便利的同时,是否考虑过它实现的原理呢? 作为一个技术人员知其然必知其所以然,这才是我们追求的本心. 最近在学习张银奎老师的<软件调试>,获益良多.熟悉Wind ...

  6. 【云计算】Docker云平台—Docker进阶

    Docker云平台系列共三讲,此为第二讲:Docker进阶 参考资料: 五个Docker监控工具的对比:http://www.open-open.com/lib/view/open1433897177 ...

  7. Flynn初步:基于Docker的PaaS台

    Flynn它是一个开源PaaS台,无论要部署的应用程序,你可以建立自己的主动性Docker容器集群的实现,能特性与组件设计大量參考了传统的PaaS平台Heroku.本文旨在从使用动机.基本对象.层次架 ...

  8. Python源代码剖析笔记3-Python运行原理初探

    Python源代码剖析笔记3-Python执行原理初探 本文简书地址:http://www.jianshu.com/p/03af86845c95 之前写了几篇源代码剖析笔记,然而慢慢觉得没有从一个宏观 ...

  9. JVM初探- 内存分配、GC原理与垃圾收集器

    JVM初探- 内存分配.GC原理与垃圾收集器 标签 : JVM JVM内存的分配与回收大致可分为如下4个步骤: 何时分配 -> 怎样分配 -> 何时回收 -> 怎样回收. 除了在概念 ...

随机推荐

  1. Qt获取当前屏幕大小

    1.头文件 #include<QScreen> 2.代码 QScreen *screen = QGuiApplication::primaryScreen (); QRect screen ...

  2. 后台实战——用户登录之JWT

    https://blog.csdn.net/jackcheng_ht/article/details/52670211

  3. SSM开发健康信息管理系统

    Spring+Spring MVC+MyBatis基于MVC架构的个人健康信息管理系统 采用ssm框架,包含 健康档案.健康预警(用户输入数据,系统根据范围自动判断给出不同颜色箭头显示). 健康分析. ...

  4. linux安装Nginx 以及 keepalived 管理Nginx

    linux安装Nginx 1.1将Nginx素材内容上传到/usr/local目录(pcre,zlib,openssl,nginx)(注意:必须登录用对这个文件具有操作权限的) 1.2安装pcre库 ...

  5. 用idea打包maven项目

    利用idea工具打包项目 1.点击图中标记①,idea会自动生成打包命令,这个打包命令会包含单元测试内容,如果单元测试的内容报错,是打包不成功的,这个时候我们需要在打包命令中用 -Dmaven.tes ...

  6. 【pattern】设计模式(1) - 单例模式

    前言 好久没写博客,强迫自己写一篇.只是总结一下自己学习的单例模式. 说明 单例模式的定义,摘自baike: 单例模式最初的定义出现于<设计模式>(艾迪生维斯理, 1994):“保证一个类 ...

  7. openlayers地图显示点

    <!DOCTYPE html> <html> <head> <meta charset="utf-8" /> <title&g ...

  8. stun/turn服务器部署

    目录: 一.简介 二.安装 三.配置与运行 四.运行检测 五.答疑环节 一.简介 本文通过在服务器上安装coturn这个软件,实现搭建STUN服务器和TURN服务器. coturn 简介:是一个免费的 ...

  9. H5_0013:CSS特色样式集

    按比例变化,同时又限制最大宽高 ".start-wrap {", " width:40%;", " top: 83.21%;", " ...

  10. ECMAScript基本语法——⑤运算符 void

    void阻止返回值的运算符,没有返回值