from selenium import webdriver:导入webdriver模块
当导入webdriver模块时,会执行\selenium\webdriver目录下的__init__.py文件,导入支持的浏览器模块并起别名(as)
 
from .firefox.webdriver import WebDriver as Firefox  # noqa
from .firefox.firefox_profile import FirefoxProfile # noqa
from .firefox.options import Options as FirefoxOptions # noqa
from .chrome.webdriver import WebDriver as Chrome # noqa
from .chrome.options import Options as ChromeOptions # noqa
from .ie.webdriver import WebDriver as Ie # noqa
from .ie.options import Options as IeOptions # noqa
from .edge.webdriver import WebDriver as Edge # noqa
from .opera.webdriver import WebDriver as Opera # noqa
from .safari.webdriver import WebDriver as Safari # noqa
from .blackberry.webdriver import WebDriver as BlackBerry # noqa
from .phantomjs.webdriver import WebDriver as PhantomJS # noqa
from .android.webdriver import WebDriver as Android # noqa
from .webkitgtk.webdriver import WebDriver as WebKitGTK # noqa
from .webkitgtk.options import Options as WebKitGTKOptions # noqa
from .remote.webdriver import WebDriver as Remote # noqa
from .common.desired_capabilities import DesiredCapabilities # noqa
from .common.action_chains import ActionChains # noqa
from .common.touch_actions import TouchActions # noqa
from .common.proxy import Proxy # noqa
driver = webdriver.Chrome():启动Chrome浏览器程序
此命令为将webdriver.Chrome()实例化,类的实例化会自动调用__init__()方法
路径: webdriver.chrome.webdriver
源码:
import warnings
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
from .remote_connection import ChromeRemoteConnection
from .service import Service
from .options import Options class WebDriver(RemoteWebDriver): #继承selenium.webdriver.remote.webdriver父类
"""
Controls the ChromeDriver and allows you to drive the browser. #控制ChromeDriver并允许您驱动浏览器 You will need to download the ChromeDriver executable from #ChromeDriver下载地址
http://chromedriver.storage.googleapis.com/index.html
""" #子类构造方法
def __init__(self, executable_path="chromedriver", port=0,
options=None, service_args=None,
desired_capabilities=None, service_log_path=None,
chrome_options=None, keep_alive=True):
"""
Creates a new instance of the chrome driver. #创建一个新的chrome驱动实例 Starts the service and then creates new instance of chrome driver. #启动服务,然后创建新的chrome驱动实例 :Args:
- executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH #可执行文件(chromedriver)的路径。如果使用默认值,则假定可执行文件位于$PATH中
- port - port you would like the service to run, if left as 0, a free port will be found. #您希望服务运行的端口,如果保持为0,将找到一个空闲端口
- options - this takes an instance of ChromeOptions #这是ChromeOptions的一个实例
- service_args - List of args to pass to the driver service #要传递给驱动程序服务的参数列表
- desired_capabilities - Dictionary object with non-browser specific capabilities only, such as "proxy" or "loggingPref". #只具有非浏览器特定功能的Dictionary对象,如“代理”或“loggingPref”
- service_log_path - Where to log information from the driver. #从何处记录来自驱动程序的信息
- chrome_options - Deprecated argument for options #不推荐的选项参数
- keep_alive - Whether to configure ChromeRemoteConnection to use HTTP keep-alive. #是否配置ChromeRemoteConnection来使用HTTP keep-alive。
"""
if chrome_options:
warnings.warn('use options instead of chrome_options',
DeprecationWarning, stacklevel=2)
options = chrome_options if options is None:
# desired_capabilities stays as passed in
if desired_capabilities is None:
#创建一个浏览器的字典对象
desired_capabilities = self.create_options().to_capabilities() #create_options()方法返回Option类,调用Opton类中的to_capabilities()方法
else:
if desired_capabilities is None:
desired_capabilities = options.to_capabilities()
else:
desired_capabilities.update(options.to_capabilities()) #实例化一个Service对象
self.service = Service(
executable_path,
port=port,
service_args=service_args,
log_path=service_log_path)
self.service.start() #调用了父类service的start方法 #try/except异常处理
try:
#调用Webdriver父类构造方法
RemoteWebDriver.__init__(
self,
#传入ChromeRemoteConnection实例化对象
command_executor=ChromeRemoteConnection(
remote_server_addr=self.service.service_url,
keep_alive=keep_alive),
#传入上面获取的浏览器字典对象
desired_capabilities=desired_capabilities)
except Exception:
self.quit()
raise
self._is_remote = False
  • 首先创建一个浏览器的字典对象,然后调用Opton类中的to_capabilities()方法,方法是返回caps字典对象,由一下代码可看出,chrome浏览器返回的caps字典对象默认为:
    {
    "browserName":"chrome",
    "version":"",
    "platform":"ANY",
    " goog:chromeOptions":  {'extensions': [], 'args': []}
    }
class Options(object):
KEY = "goog:chromeOptions" def __init__(self):
self._binary_location = ''
self._arguments = []
self._extension_files = []
self._extensions = []
self._experimental_options = {} #定义空字典
self._debugger_address = None
self._caps = DesiredCapabilities.CHROME.copy() #让self._caps指向复制CHROME的字典对象 #-----------中间代码省略----------- def to_capabilities(self):
"""
Creates a capabilities with all the options that have been set and returns a dictionary with everything
"""
caps = self._caps #让caps指向self._caps
chrome_options = self.experimental_options.copy() #定义chrome_options为一个空字典
chrome_options["extensions"] = self.extensions #将extensions键和值添加到chrome_options字典中
if self.binary_location:
chrome_options["binary"] = self.binary_location
chrome_options["args"] = self.arguments #将args键和值添加到chrome_options字典中
if self.debugger_address:
chrome_options["debuggerAddress"] = self.debugger_address caps[self.KEY] = chrome_options #将KEY键和值添加到caps字典中 return caps #返回caps字典
 
aaarticlea/png;base64,iVBORw0KGgoAAAANSUhEUgAAARsAAABeCAYAAAATz7BKAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAq/SURBVHhe7Z1dkuogEIVnXS7IdcwSfHMVvvp8y0XMi0twEbl9Gkga8ocaWxPPV0VVQggQQp80jDA/DSGEOECxIYS4QLEhhLhAsXkD5/2x+fn5bfbnGEHIF1AtNtfDvtn9/IiRIOzEUK7NYbdrDldcPTf7dG130PSHXUq7l6tIso/nedgNWNz1fGj27f1Ic2i0GCHluwsFt1wPu5Ae5Y+UpSHW721cL9KOFBryfVSJDYSmsO3mLHEQnTYeBl5Y0Hn/kxvV9dDsrLFfIViFcKhQSL4iZoGrRImQyH0hJgrbbt8KENIEEYrCBsqywFCcN+eT1JNiQ76PCrER4x4x0M6zER4RG5DddxUhMXkaurwgNvtmb/OWPHZ7iRsRG3g9Kmh3is15/6vCMBr2fzHlHVBsyJcyLzYw0BrLUI/EDFdiyG4tjf0KIcOQLJ1PiEErSkFsziYtPJ8D8irFxtSjHHa9i+sBYnMcFFRCtsyyYlOkG/RsjADkwyXhHrGRI/WsDumeAbGJebWezZ0s7dlcD2Fi+Gd3MUNAQr6DumGUNeIxasXGiMkZIiDnneHVD6PCYfCmevFgSrjeyFWHUfRsyPdRNUEMo85FAUYDYzbC8IDYAP0rkk2kAmI9nqEJ4iHxqxObbJ7pHXDOhnwpdWIjBHExcyA7MWwrNGloFA28+9N3FBybJsUp/b8k9f/0fQ5CAwExeaShUftnbw0YWuVl5YFiQ8g7qBYbshD8nQ35Uig2b4C/ICbfCMWGEOICxYYQ4gLFhhDiAsWGEOJCldj8+/cvHhFCyGPQsyGEuEDPhhDiwko9m7/m9Pvb/P4em8vtZo7j1RPOf5vTAztAkFcw/b6W5fmybpej9p/jayr4Qjzb+X5W6tmkhjzJUdNcjuHYagsE5xvFJgmtbY/QPjEcLzHWk/n3tRzLlAXBWZ/YeLbz/ax2zkYbMhqOGpgc267xrWIDWsGxDfB3ys+dmXtfS7JEWesUG992vpfVztloQ0bjUbe3MCRcP/3dmsspuMQIx1Pe8MldVvW/iQuqXwK8IJzHRMLf5dQc0zVxS49SVrqs9Uh5xDiJjV8YCW29bpK2qwvyOcl53qFNHVKaosPX1Dk8O+pgXOhSbG4XudfUR+6/dBm0cSF/qQfyi2Xh+Tvm6wzm3lci7fmzO/TzqKWmrJs847GtM54J5907xH3Hy1/0DlKaPJ+n+88b2/kdrNazmSOIgBibeeu3v4u89FLp5SXqC8/TJiAQEC2LdlTzEi+4F0li5wmXkG9wZxVcQ9ltRBAfKzYX6fBlFVBWv1PN1TnUQTtbel6Un3W8m6mLAPGxginnR+38kkiPg3iX6errXMcSYjOHCom0hS1B+0YhNsH4YyppLAhP1oTKk/3nTe38Dlbr2cyRDK6kHx9EYSCpINdG3NBWYARr3EeJV6NGx8kKyr2s8IUzeceO1l03QeqQM1Vn+4zBQFTQSrFBZ86+lAgmT9SnLdeWZ47vqvPnYN/dGPoui0TDferJ/rPhdi7ZtGfT7xiILzvak50F4EslJyEu5HcZ6KyWm3wpdViVKhk73fgdlqk6ozrm2bWjSr3Ele/EBvdLXPapLPKsNYLqOn8OqxSbFbZzyaY9m94wSsfO5Uub6izIZ34YpXlI50mdpi3b3IY4zAGY2shYPu/QOqySfG1pKOtxzyYQhgRSpxSpAiT3p4KS8Nk8a4wAZ9V1rsNtGFX0Awyj8A7T61hGbEL7zA6j3tDOyH8v7ey5H/YmPZs0qYeXPD5BHIYY6VoX+h1HJ/ja6/2XDtrhCkAnkHxsGu2oOgnZlTWUT17W/XUOQhdDa1DxT6LGUlR42/thEJgjCMfqBaVrmgc6Ps5RTjruDHO6zvfhITYgf36ps5nUbcUZIbaZbdfwnhfoP9pPYrxzO4cdI333wt6sZ0MIGUf/c8gj//fsCTY7Z0MIGUOGUDv///BBz4YQ4gI9G0KIC/RsCCEu0LMhhLhAz4YQ4gI9G0KIC/RsvIk/+PP9hQMh72elno39dWXaMKj7peVH4yI2nu1TU9aK3xdZjJV6NqnDfuaOZO/Hs31qyuL7Iiues9EOGxeh6bqVdh0QAZ7tU1MW3xdZ7ZyNdti4SE4XzplFhrqSVr+kIYRd6+x5TAf3Xr+yKRx7mxF1i/LkSzy6G1taNW3ywSpdm1dWp/Gvui62a+tULPpMC/ek7HyBaT+3yfYxeO2MV1sfsl1W69nMI647BCGe4bzcx2SpnfFUBPClbi8F8Sm3KAggr2Gxmd2OAKhoGVGUQuE1PGq7XqusCVmtZ1MDvqB224e+0QbPoBeiu98xLhABMfjCsxlf/j+WF7ym4Xsykcz2PwnAa6CjQD6dDXs2IBg27LS3O1s02mFBKJkTm5zeLnwZFBvynWzaswHq3WDTqgFrrN/9bFpsYOxzu/B1jOdVPYxaUGw+bxjlv4Mc8WHjno0Qh0tjxvjsznhAjX1mF77hfCSUwjG2qxuAEJr7EI+yU9ywuE3zcWLzhh3kiA+b92zIunjHDnLEh+17NmRFvGcHOeIDPRtCiAv0bAghLtCzIYS4QM+GEOICPRtCiAv0bAghLtCzIYS4sFLPBj/5D7+wfWrnt/jr4vAL3Pq1T8+BNVCpTAnlmi03FmrDKjzLquHT6vMdrNSzSR1kqZ3f0PmeE5tshfkEaR3V+1m6DaeYL6tczlEuL+mWZXT3Zfdg2YdZzmFX+/fiXJ+dJFY7Z6MdJK4r0o4Y1wo9hq/YPLpocmmWbcNpasvSFe6X/mJT0AqObUCIiT0fWM0fFrjGk4jns5PAaudstIPEHlTu/Ba+VDBqEZF4rF+2u/eYEaTznsQAUh66S55dmZ0NxWwo3HK7iNKEUqAmd+oT9Fn1mtS33DlQ7k3HIR5G1qXJVo8LU21o8drNT9syCkBvSxABeYRdF821UmyAxKVnHfsI1D47WY7VejZzaGeCAcqYvGXgqxeYEBtJbbNAHkNpxzp1STCYeFJQtcWEgvri+cToyp0DVfxiPlEI0/Gj3pvXyvCsbQZEJF1XcUjvcUhshLB9SBBr8hms1rOZY8yo0Qn7mjAhNjBS4x2F8AqxgQcy7Hn1v/LT9e2GIDbdxD0fQfn82N4jr2/XdmHrD23vEbH5/Of9Pjbt2QyLzVD8WMdEfOk9DKel2DyHeiuZoIdg2zRrO/XapF0wdKTYrIJNezZPD6O0QyOPdJ7+i0I/rYpNZwm6U1/6a4dlXGxw7Z5h1IghvUBsXj+MuonQDtVN6mzeV9l2rUC9VGy4c+BSbNyzyTciLyeI00RyL7TGig6d75x3+sPcRzjueRsmPy3LXB/7cpd2MrlTnxwN1zkaFYYUKU6NFAaXrqfjst7zvFRsVNBjnSW07WGfBe1gn7sVoPgn7KwR03PacP8zt3DnwMX4ujkbQu6BOwcux9fN2RBSD3cOXJJNejb5UOMJF5oQshib9WwIIZ/FZudsCCGfBT0bQogLFBtCiAsUG0KICxQbQogL9WKjv6T8oP8JTQhZFfd5NvzpNiHkQSg2hBAXKDaEEBceEpv9mfM2hJD7uE9ssDANe3tI2J9jFCGEVPCgZxPPCSGkEs7ZEEJcoNgQQlyg2BBCXKgXGxUa/oKYEPIY93k2hBDyIBQbQogLFBtCiAsUG0KICxQbQogLFBtCiAsUG0KICxQbQogDTfMfB1Fg7SHy/PMAAAAASUVORK5CYII=" alt="" />
  • 再看在webdriver.chrome.remote_connection下的ChromeRemoteConnection类,继承了在webdriver.remote.remote_connection下的RemoteConnection,调用父类构造方法往self._commands里添加几个command键值对。
子类ChromeRemoteConnection源码:
 
from selenium.webdriver.remote.remote_connection import RemoteConnection

class ChromeRemoteConnection(RemoteConnection):

    def __init__(self, remote_server_addr, keep_alive=True):
#调用父类构造方法
RemoteConnection.__init__(self, remote_server_addr, keep_alive)
#向self._commands字典中添加几个键值对
self._commands["launchApp"] = ('POST', '/session/$sessionId/chromium/launch_app')
self._commands["setNetworkConditions"] = ('POST', '/session/$sessionId/chromium/network_conditions')
self._commands["getNetworkConditions"] = ('GET', '/session/$sessionId/chromium/network_conditions')
self._commands['executeCdpCommand'] = ('POST', '/session/$sessionId/goog/cdp/execute')
父类构造方法中主要是把localhost域名换成127.0.0.1,通过urllib.parse.urlparse把url解析成6部分,urlparse返回的是一个名字元祖对象 scheme, netloc, path, params, query, fragment。netloc包括hostname和port。调用common_utils.find_connectable_ip()方法获取hostname对应的ip地址,最后urllib.parse.urlunparse()重新组成url并赋值给self._url
初始化里self._commands字典,value为具体执行的命令的字典。
RemoteConnection类的实例方法execute调用 _request方法最终实现发送命令到远程服务器。他们是通过wire protocol有线协议 这种协议是点对点方式进行通信的。首先前端将这个点击转换成json格式的字符串,然后通过wire protocl协议传递给服务器。
父类RemoteConnection源码:
import base64
import logging
import platform
import socket
import string
import urllib3 try:
from urllib import parse
except ImportError: # above is available in py3+, below is py2.7
import urlparse as parse from selenium.webdriver.common import utils as common_utils
from selenium import __version__
from .command import Command
from .errorhandler import ErrorCode
from . import utils LOGGER = logging.getLogger(__name__) class RemoteConnection(object):
"""
A connection with the Remote WebDriver server. #与远程Web驱动程序服务器的连接
Communicates with the server using the WebDriver wire protocol: #使用WebDriver线路协议与服务器通信
https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol
"""
_timeout = socket._GLOBAL_DEFAULT_TIMEOUT #-----------中间代码省略-----------
#父类构造方法
def __init__(self, remote_server_addr, keep_alive=False, resolve_ip=True):
# Attempt to resolve the hostname and get an IP address. #尝试解析主机名并获取 IP 地址
self.keep_alive = keep_alive
parsed_url = parse.urlparse(remote_server_addr)
if parsed_url.hostname and resolve_ip:
port = parsed_url.port or None
if parsed_url.scheme == "https":
ip = parsed_url.hostname
elif port and not common_utils.is_connectable(port, parsed_url.hostname):
ip = None
LOGGER.info('Could not connect to port {} on host '
'{}'.format(port, parsed_url.hostname))
else:
ip = common_utils.find_connectable_ip(parsed_url.hostname,
port=port)
if ip:
netloc = ip
if parsed_url.port:
netloc = common_utils.join_host_port(netloc,
parsed_url.port)
if parsed_url.username:
auth = parsed_url.username
if parsed_url.password:
auth += ':%s' % parsed_url.password
netloc = '%s@%s' % (auth, netloc)
remote_server_addr = parse.urlunparse(
(parsed_url.scheme, netloc, parsed_url.path,
parsed_url.params, parsed_url.query, parsed_url.fragment))
else:
LOGGER.info('Could not get IP address for host: %s' %
parsed_url.hostname) self._url = remote_server_addr
if keep_alive:
self._conn = urllib3.PoolManager(timeout=self._timeout) self._commands = {
Command.STATUS: ('GET', '/status'),
Command.NEW_SESSION: ('POST', '/session'),
Command.GET_ALL_SESSIONS: ('GET', '/sessions'),
Command.QUIT: ('DELETE', '/session/$sessionId'),
Command.GET_CURRENT_WINDOW_HANDLE:
('GET', '/session/$sessionId/window_handle'),
Command.W3C_GET_CURRENT_WINDOW_HANDLE:
('GET', '/session/$sessionId/window'),
Command.GET_WINDOW_HANDLES:
('GET', '/session/$sessionId/window_handles'),
Command.W3C_GET_WINDOW_HANDLES:
('GET', '/session/$sessionId/window/handles'),
#-----------中间代码省略-----------
}
#最终发送命令道远程服务器的方法
def execute(self, command, params):
"""
Send a command to the remote server. #向远程服务器发送命令
Any path subtitutions required for the URL mapped to the command should be included in the command parameters. #映射到命令的 URL 所需的任何路径子项都应包含在命令参数中 :Args:
- command - A string specifying the command to execute. #指定要执行的命令的字符串
- params - A dictionary of named parameters to send with the command as its JSON payload. #命名参数的字典,用于将命令作为 JSON 负载发送
"""
command_info = self._commands[command]
assert command_info is not None, 'Unrecognised command %s' % command
path = string.Template(command_info[1]).substitute(params)
if hasattr(self, 'w3c') and self.w3c and isinstance(params, dict) and 'sessionId' in params:
del params['sessionId']
data = utils.dump_json(params)
url = '%s%s' % (self._url, path)
return self._request(command_info[0], url, body=data) #返回带JSON解析的字典
def _request(self, method, url, body=None):
"""
Send an HTTP request to the remote server. #向远程服务器发送 HTTP 请求
:Args:
- method - A string for the HTTP method to send the request with. #要发送请求的 HTTP 方法的字符串
- url - A string for the URL to send the request to. #要将请求发送到的 URL 的字符串
- body - A string for request body. Ignored unless method is POST or PUT. #请求正文的字符串。忽略,除非方法是 POST 或 PUT
:Returns:
A dictionary with the server's parsed JSON response. #包含服务器解析 JSON 响应的字典
"""
LOGGER.debug('%s %s %s' % (method, url, body)) parsed_url = parse.urlparse(url)
headers = self.get_remote_connection_headers(parsed_url, self.keep_alive)
resp = None
if body and method != 'POST' and method != 'PUT':
body = None if self.keep_alive:
resp = self._conn.request(method, url, body=body, headers=headers) statuscode = resp.status
else:
http = urllib3.PoolManager(timeout=self._timeout)
resp = http.request(method, url, body=body, headers=headers) statuscode = resp.status
if not hasattr(resp, 'getheader'):
if hasattr(resp.headers, 'getheader'):
resp.getheader = lambda x: resp.headers.getheader(x)
elif hasattr(resp.headers, 'get'):
resp.getheader = lambda x: resp.headers.get(x) data = resp.data.decode('UTF-8')
try:
if 300 <= statuscode < 304:
return self._request('GET', resp.getheader('location'))
if 399 < statuscode <= 500:
return {'status': statuscode, 'value': data}
content_type = []
if resp.getheader('Content-Type') is not None:
content_type = resp.getheader('Content-Type').split(';')
if not any([x.startswith('image/png') for x in content_type]): try:
data = utils.load_json(data.strip())
except ValueError:
if 199 < statuscode < 300:
status = ErrorCode.SUCCESS
else:
status = ErrorCode.UNKNOWN_ERROR
return {'status': status, 'value': data.strip()} # Some of the drivers incorrectly return a response
# with no 'value' field when they should return null.
if 'value' not in data:
data['value'] = None
return data
else:
data = {'status': 0, 'value': data}
return data
finally:
LOGGER.debug("Finished Request")
resp.close()
urlparse()方法源码:
def urlparse(url, scheme='', allow_fragments=True):
"""Parse a URL into 6 components: 将 URL 解析为 6 个组件
<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
Return a 6-tuple: (scheme, netloc, path, params, query, fragment). 返回6元组
Note that we don't break the components up in smaller bits
(e.g. netloc is a single string) and we don't expand % escapes."""
url, scheme, _coerce_result = _coerce_args(url, scheme)
splitresult = urlsplit(url, scheme, allow_fragments)
scheme, netloc, url, query, fragment = splitresult
if scheme in uses_params and ';' in url:
url, params = _splitparams(url)
else:
params = ''
result = ParseResult(scheme, netloc, url, params, query, fragment)
return _coerce_result(result)
  • Webdriver类构造方法中,实例化webdriver/chrome/service.py中的Service类,自动调用Service类中的构造方法__init__(),此Service类为继承webdriver/common/service.py中Service类的子类
from selenium.webdriver.common import service

class Service(service.Service):                #继承 selenium.webdriver.common.service中的Service类
"""
Object that manages the starting and stopping of the ChromeDriver #管理 Chrome驱动程序的启动和停止的对象
""" def __init__(self, executable_path, port=0, service_args=None,
log_path=None, env=None):
"""
Creates a new instance of the Service #创建新的服务实例 :Args:
- executable_path : Path to the ChromeDriver
- port : Port the service is running on
- service_args : List of args to pass to the chromedriver service
- log_path : Path for the chromedriver service to log to
""" self.service_args = service_args or []
if log_path:
self.service_args.append('--log-path=%s' % log_path) #%s为格式化字符串,具体学习python语法 #调用父类构造方法
service.Service.__init__(self, executable_path, port=port, env=env,
start_error_message="Please see https://sites.google.com/a/chromium.org/chromedriver/home")
#重写父类的方法
def command_line_args(self):
return ["--port=%d" % self.port] + self.service_args
Service父类源码:
import errno
import os
import platform
import subprocess
from subprocess import PIPE
import time
from selenium.common.exceptions import WebDriverException
from selenium.webdriver.common import utils try:
from subprocess import DEVNULL
_HAS_NATIVE_DEVNULL = True
except ImportError:
DEVNULL = -3
_HAS_NATIVE_DEVNULL = False class Service(object):
def __init__(self, executable, port=0, log_file=DEVNULL, env=None, start_error_message=""):
self.path = executable #默认自动获取一个端口
self.port = port
if self.port == 0:
self.port = utils.free_port() if not _HAS_NATIVE_DEVNULL and log_file == DEVNULL:
log_file = open(os.devnull, 'wb') self.start_error_message = start_error_message
self.log_file = log_file
#默认获取环境变量,当env为true时,self.env = env ,否则self.env指向 os.environ (系统环境变量)
self.env = env or os.environ @property #装饰器,具体学习python内置函数property
def service_url(self):
"""
Gets the url of the Service #获取服务的URL
"""
return "http://%s" % utils.join_host_port('localhost', self.port) def command_line_args(self):
raise NotImplemented("This method needs to be implemented in a sub class") def start(self):
"""
Starts the Service. #启动服务 :Exceptions:
- WebDriverException : Raised either when it can't start the service #当服务无法启动服务或无法连接到服务时引发
or when it can't connect to the service
"""
#启动chromedriver程序,具体深入可学习subprocess.Popen方法
try:
cmd = [self.path]
cmd.extend(self.command_line_args())
self.process = subprocess.Popen(cmd, env=self.env,
close_fds=platform.system() != 'Windows',
stdout=self.log_file,
stderr=self.log_file,
stdin=PIPE)
except TypeError:
raise
except OSError as err:
if err.errno == errno.ENOENT:
raise WebDriverException(
"'%s' executable needs to be in PATH. %s" % (
os.path.basename(self.path), self.start_error_message)
)
elif err.errno == errno.EACCES:
raise WebDriverException(
"'%s' executable may have wrong permissions. %s" % (
os.path.basename(self.path), self.start_error_message)
)
else:
raise
except Exception as e:
raise WebDriverException(
"The executable %s needs to be available in the path. %s\n%s" %
(os.path.basename(self.path), self.start_error_message, str(e)))
count = 0 #检测是否subprocess进程是否还在,不在则抛出异常
#检测是否http协议是否链接,若无法链接等待30秒抛出异常
while True:
self.assert_process_still_running()
if self.is_connectable():
break
count += 1
time.sleep(1)
if count == 30:
raise WebDriverException("Can not connect to the Service %s" % self.path) def assert_process_still_running(self):
return_code = self.process.poll()
if return_code is not None:
raise WebDriverException(
'Service %s unexpectedly exited. Status code was: %s'
% (self.path, return_code)
)
#判断是否正在连接,等待30秒后抛出webdriver异常
def is_connectable(self):
return utils.is_connectable(self.port)
  • 由上代码可知Service实例化后,会自动获取一个随机端口,然后调用star()方法,该方法用subprocess启动chromedrive程序,并检测是否正在连接(subprocess源码路径在Lib下),再看chrome的WebDriver类,继承了selenium.webdriver.remote.webdriver中的WebDriver类。
  • RemoteWebDriver类的构造方法,更新capabilities字典,主要调用start_session传入capabilities字典。start_session方法,根据capabilities字典创建一个新的会话并获取session_id。另外还实例化了错误处理handle,文件查找file_detector(默认实例化是LocalFileDetector)。一个页面切换的SwitchTo对象。
源码:
class WebDriver(object):
"""
Controls a browser by sending commands to a remote server. #通过向远程服务器发送命令来控制浏览器。
This server is expected to be running the WebDriver wire protocol #此服务器将运行定义在的WebDriver连接协议(以下地址中有相应的介绍)
as defined at
https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol :Attributes:
- session_id - String ID of the browser session started and controlled by this WebDriver. #由这个WebDriver启动和控制的浏览器会话的字符串ID
- capabilities - Dictionaty of effective capabilities of this browser session as returned #远程服务器返回的此浏览器会话的有效功能,可看以下地址
by the remote server. See https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities
- command_executor - remote_connection.RemoteConnection object used to execute commands. #remote_connection.RemoteConnection对象用于执行命令
- error_handler - errorhandler.ErrorHandler object used to handle errors. #用于处理错误的ErrorHandler对象。
""" _web_element_cls = WebElement def __init__(self, command_executor='http://127.0.0.1:4444/wd/hub',
desired_capabilities=None, browser_profile=None, proxy=None,
keep_alive=False, file_detector=None, options=None):
"""
Create a new driver that will issue commands using the wire protocol. #创建一个使用wire协议发出命令的新驱动程序。 :Args:
- command_executor - Either a string representing URL of the remote server or a custom #表示远程服务器URL的字符串或自定义的remote_connection。
remote_connection.RemoteConnection object. Defaults to 'http://127.0.0.1:4444/wd/hub'.
- desired_capabilities - A dictionary of capabilities to request when #启动浏览器会话时请求的功能字典。
starting the browser session. Required parameter.
- browser_profile - A selenium.webdriver.firefox.firefox_profile.FirefoxProfile object.
Only used if Firefox is requested. Optional. #selenium.webdriver.firefox.firefox_profile。FirefoxProfile对象。仅在请求Firefox时使用
- proxy - A selenium.webdriver.common.proxy.Proxy object. The browser session will
be started with given proxy settings, if possible. Optional. #一个selenium.webdriver.common.proxy.Proxy对象。如果可能,浏览器会话将使用给定的代理设置启动。可选的。
- keep_alive - Whether to configure remote_connection.RemoteConnection to use
HTTP keep-alive. Defaults to False. #是否配置remote_connection。RemoteConnection使用HTTP keep-alive。默认值为False。
- file_detector - Pass custom file detector object during instantiation. If None,
then default LocalFileDetector() will be used. #在实例化期间传递自定义文件检测器对象。如果没有,则使用默认的LocalFileDetector()。
- options - instance of a driver options.Options class #驱动程序实例options.Options类
"""
capabilities = {}
if options is not None:
capabilities = options.to_capabilities()
if desired_capabilities is not None:
if not isinstance(desired_capabilities, dict):
raise WebDriverException("Desired Capabilities must be a dictionary")
else:
#更新capabilities字典,desired_capabilities参数为子类WebDriver调用父类时传入
capabilities.update(desired_capabilities)
if proxy is not None:
warnings.warn("Please use FirefoxOptions to set proxy",
DeprecationWarning, stacklevel=2)
proxy.add_to_capabilities(capabilities)
self.command_executor = command_executor
if type(self.command_executor) is bytes or isinstance(self.command_executor, str):
self.command_executor = RemoteConnection(command_executor, keep_alive=keep_alive)
self._is_remote = True
#控制浏览器会话的字符串id
self.session_id = None
self.capabilities = {}
self.error_handler = ErrorHandler()
self.start_client()
if browser_profile is not None:
warnings.warn("Please use FirefoxOptions to set browser profile",
DeprecationWarning, stacklevel=2)
#核心代码,开始一个会话
self.start_session(capabilities, browser_profile)
#实例化页面切换对象
self._switch_to = SwitchTo(self)
self._mobile = Mobile(self)
#默认实例化LocalFileDetector对象
self.file_detector = file_detector or LocalFileDetector() def start_session(self, capabilities, browser_profile=None):
"""
Creates a new session with the desired capabilities. :Args:
- browser_name - The name of the browser to request.
- version - Which browser version to request.
- platform - Which platform to request the browser on.
- javascript_enabled - Whether the new session should support JavaScript.
- browser_profile - A selenium.webdriver.firefox.firefox_profile.FirefoxProfile object. Only used if Firefox is requested.
"""
if not isinstance(capabilities, dict):
raise InvalidArgumentException("Capabilities must be a dictionary")
if browser_profile:
if "moz:firefoxOptions" in capabilities:
capabilities["moz:firefoxOptions"]["profile"] = browser_profile.encoded
else:
capabilities.update({'firefox_profile': browser_profile.encoded})
"""
_make_w3c_caps return dict
{
"firstMatch": [{}],
"alwaysMatch": {
'browserName': 'chrome',
'version': '',
'platformName': 'any',
'goog:chromeOptions': {'extensions': [], 'args': []}
}
}
"""
w3c_caps = _make_w3c_caps(capabilities)
parameters = {"capabilities": w3c_caps,
"desiredCapabilities": capabilities}
response = self.execute(Command.NEW_SESSION, parameters)
if 'sessionId' not in response:
response = response['value']
#获取session_id
self.session_id = response['sessionId']
self.capabilities = response.get('value') # if capabilities is none we are probably speaking to
# a W3C endpoint
if self.capabilities is None:
self.capabilities = response.get('capabilities') # Double check to see if we have a W3C Compliant browser
self.w3c = response.get('status') is None
self.command_executor.w3c = self.w3c def _make_w3c_caps(caps):
"""Makes a W3C alwaysMatch capabilities object. Filters out capability names that are not in the W3C spec. Spec-compliant
drivers will reject requests containing unknown capability names. Moves the Firefox profile, if present, from the old location to the new Firefox
options object. :Args:
- caps - A dictionary of capabilities requested by the caller.
"""
#深拷贝
caps = copy.deepcopy(caps)
#浏览器为chrome,profile为None
profile = caps.get('firefox_profile')
always_match = {}
if caps.get('proxy') and caps['proxy'].get('proxyType'):
caps['proxy']['proxyType'] = caps['proxy']['proxyType'].lower()
for k, v in caps.items():
#如果caps的key在_OSS_W3C_CONVERSION key中,而且caps可key对应的值不为空
if v and k in _OSS_W3C_CONVERSION:
#always_match的key为_OSS_W3C_CONVERSION字典的值,value是caps字典的值
always_match[_OSS_W3C_CONVERSION[k]] = v.lower() if k == 'platform' else v
if k in _W3C_CAPABILITY_NAMES or ':' in k:
always_match[k] = v
if profile:
moz_opts = always_match.get('moz:firefoxOptions', {})
# If it's already present, assume the caller did that intentionally.
if 'profile' not in moz_opts:
# Don't mutate the original capabilities.
new_opts = copy.deepcopy(moz_opts)
new_opts['profile'] = profile
always_match['moz:firefoxOptions'] = new_opts
return {"firstMatch": [{}], "alwaysMatch": always_match} _OSS_W3C_CONVERSION = {
'acceptSslCerts': 'acceptInsecureCerts',
'version': 'browserVersion',
'platform': 'platformName'
}
#通过self.command_executor.execute发送cmd命令到远程服务器达到控制浏览器的目标
def execute(self, driver_command, params=None):
"""
Sends a command to be executed by a command.CommandExecutor. :Args:
- driver_command: The name of the command to execute as a string.
- params: A dictionary of named parameters to send with the command. :Returns:
The command's JSON response loaded into a dictionary object.
"""
if self.session_id is not None:
if not params:
params = {'sessionId': self.session_id}
elif 'sessionId' not in params:
params['sessionId'] = self.session_id
#数据封包
params = self._wrap_value(params)
#核心代码 执行cmmand_executor实例对象的execute方法
response = self.command_executor.execute(driver_command, params)
if response:
self.error_handler.check_response(response)
#数据解包
response['value'] = self._unwrap_value(
response.get('value', None))
return response
# If the server doesn't send a response, assume the command was
# a success
return {'success': 0, 'value': None, 'sessionId': self.session_id}
driver.get('https://www.baidu.com')调用的是webdriver/remote/webdriver.py下的get方法
get方法调用了remote_connection.py中execute的方法,remote_connection.py中execute的方法中self.command_executor.execute实际调用的是RemoteConnection.py的execute方法。
实际上是一个HTTP request给监听端口上的Web Service, 在我们的HTTP request的body中,会以WebDriver Wire协议规定的JSON格式的字符串来告诉Selenium我们希望浏览器打开https://www.baidu.com页面
 
#selenium/webdriver/remote/webdriver.py

    def get(self, url):
"""
Loads a web page in the current browser session.
"""
#Command.GET: ('POST', '/session/$sessionId/url'),
self.execute(Command.GET, {'url': url})
总结一下:
首先是webdriver实例化Service 类调用start()方法用subprocess启动chromedriver(带--port参数)驱动。chromedriver启动之后都会在绑定的端口启动Web Service。
接着实例化RemoteConnection获得 command_executor实例化对象 传入给RemoteWebDriver构造方法。
RemoteWebDriver构造方法 start_session()方法启动session并获得唯一的session_id,通过这个session_id来确定找到对方且在多线程并行的时候彼此之间不会有冲突和干扰)
接下来调用WebDriver的任何API,比如get()  都需要借助一个ComandExecutor(remote_connection类的实例对象)调用execute()发送一个命令(这个命令在ComandExecutor实例化时候生成的一个command字典)。
self._commands = {
Command.STATUS: ('GET', '/status'),
Command.NEW_SESSION: ('POST', '/session'),
Command.GET_ALL_SESSIONS: ('GET', '/sessions'),
Command.QUIT: ('DELETE', '/session/$sessionId'),
Command.GET_CURRENT_WINDOW_HANDLE:
('GET', '/session/$sessionId/window_handle'),
Command.W3C_GET_CURRENT_WINDOW_HANDLE:
('GET', '/session/$sessionId/window'),
Command.GET_WINDOW_HANDLES:
('GET', '/session/$sessionId/window_handles'), #.................省略.....................
}
ComandExecutor中的execute()方法最后返回一个_request()方法,实际上是一个HTTP request给监听端口上的Web Service。
在HTTP request的body中,Wire JSON格式字典来告诉chromedriver接下来做什么事。(通过之前绑定的端口)
实际的执行者是chromedriver驱动,而selenium就相当于一个代理。所以selenium并不是直接操控浏览器而是运行webdriver, 通过webdriver间接操控浏览器。
 
 

Selenium原理的更多相关文章

  1. Selenium原理初步--Android自动化测试学习历程

    章节:自动化基础篇——Selenium原理初步(第五讲) 注:其实所有的东西都是应该先去用,但是工具基本都一样,底层都是用的最基础的内容实现的,测试应该做的是: (1)熟练使用工具,了解各个工具的利弊 ...

  2. selenium + python自动化测试unittest框架学习(一)selenium原理及应用

    unittest框架的学习得益于虫师的<selenium+python自动化实践>这一书,该书讲得很详细,大家可以去看下,我也只学到一点点用于工作中,闲暇时记录下自己所学才能更加印象深刻. ...

  3. 【Selenium01篇】python+selenium实现Web自动化:搭建环境,Selenium原理,定位元素以及浏览器常规操作!

    一.前言 最近问我自动化的人确实有点多,个人突发奇想:想从0开始讲解python+selenium实现Web自动化测试,请关注博客持续更新! 二.话不多说,直接开干,开始搭建自动化测试环境 这里以前在 ...

  4. web自动化:selenium原理和元素定位(一)

    一. Selenium2 WebDriver 当Selenium2.x提出了WebDriver的概念后,它提供了完全另外的一种方式与浏览器交互 那就是利用浏览器原生的API,封装成一套更加面向对象的S ...

  5. selenium原理应用 - 利用requests模拟selenium驱动浏览器

    前言 selenium是一个web自动化测试的开源框架,它支持多语言:python/java/c#… 前面也有一篇文章说明了,selenium+浏览器的环境搭建. selenium支持多语言,是因为s ...

  6. selenium原理解析

    相信很多测试小伙伴儿都听过或者使用过web自动化selenium,那您有没有研究过selenium的原理呢?为什么要使用webdriver.exe,webdriver.exe是干啥用的?seleniu ...

  7. selenium原理学习笔记

    一,selenium工作原理(参考文档:https://blog.csdn.net/dawei_yang000000/article/details/87639928) 自动化测试代码发送请求给到浏览 ...

  8. 【java+selenium3】自动化基础小结+selenium原理揭秘 (十七)

    一.自动化实现原理 1.创建驱动对象   (1) 首先加载浏览器安装目录下的exe文件 (2) 其次是加载可执行驱动的exe文件,监听等待客户端发送的web service请求. 底层原理如下: 1. ...

  9. selenium原理和尝试

    引用文章:https://www.cnblogs.com/Albert-Lee/p/6238866.html Selenium是一个自动化测试框架.因为它能够模拟人工操作,比如能在浏览器中点击按钮.在 ...

  10. selenium原理(以百度搜索为例)

    1.首先导入 Selenium(webdriver)相关模块2.调用 Selenium 的浏览器驱动,获取浏览器句柄(driver)并启动浏览器.3.通过句柄访问百度 URL.4.通过句柄操作页面元素 ...

随机推荐

  1. Django csrf校验

    引入: 通常,钓鱼网站本质是本质搭建一个跟正常网站一模一样的页面,用户在该页面上完成转账功能 转账的请求确实是朝着正常网站的服务端提交,唯一不同的在于收款账户人不同. 如果想模拟一个钓鱼网站,就可是给 ...

  2. thingsboard入坑记(一)本机编译运行

    开发环境: windows10 x64 专业版 工具准备: git 2.16.2 windows命令行版 java jdk 1.8:https://www.cnblogs.com/harmful-ch ...

  3. layui导出表格设置常用函数

    1.设置导出单元格为数字格式 字段名: function (value, line, data) { return { v: value, t: 'n' } }

  4. 前端调用本地摄像头实现拍照(vue)

    由于调用摄像头有使用权限,只能在本地运行,线上需用https域名才可以使用. <template> <div class="camera_outer"> & ...

  5. SpringBoot 教程之 banner 定制

    目录   简介  变量  配置  编程  示例  参考资料 简介 Spring Boot 启动时默认会显示以下 LOGO: . ____ _ __ _ _ /\\ / ___'_ __ _ _(_)_ ...

  6. ajax-属性、原理、实现html5进度条上传文件

    一.远古ajax实现方式如下: 1.前端请求后台,后台设置返回 http状态码204 2.运用img图片(或css/javascript等)的加载机制,请求后台 3.post提交数据,运用iframe ...

  7. webserver代理生成本地类的两种方式

    方式1,把webservers地址请求出来的xml拷贝出来放到文本里面后缀改成wsdl文件在VS里面使用下列命令 C:\Program Files (x86)\Microsoft SDKs\Windo ...

  8. Python集合详解

    集合介绍: 集合(set)是一个无序的不重复元素序列.可以使用大括号 { } 或者 set() 函数创建集合,注意:创建一个空集合必须用 set() 而不是 { },因为 { } 是用来创建一个空字典 ...

  9. burpsuite抓取小程序的数据包(指定DNS)

    目标:burpsuite抓取微信小程序的数据包,而且该系统需指定DNS!否则无法访问! 大家都知道小程序是https传输的,所以手机端是需要安装burp证书的. 已忽略安装证书的步骤,可自己百度搜索, ...

  10. C# WPF 表单更改提示

    微信公众号:Dotnet9,网站:Dotnet9,问题或建议,请网站留言: 如果您觉得Dotnet9对您有帮助,欢迎赞赏 C# WPF 表单更改提示 内容目录 实现效果 业务场景 编码实现 本文参考 ...