Selenium原理

from selenium import webdriver：导入webdriver模块

当导入webdriver模块时，会执行\selenium\webdriver目录下的__init__.py文件，导入支持的浏览器模块并起别名（as）

from .firefox.webdriver import WebDriver as Firefox  # noqa

from .firefox.firefox_profile import FirefoxProfile  # noqa

from .firefox.options import Options as FirefoxOptions  # noqa

from .chrome.webdriver import WebDriver as Chrome  # noqa

from .chrome.options import Options as ChromeOptions  # noqa

from .ie.webdriver import WebDriver as Ie  # noqa

from .ie.options import Options as IeOptions  # noqa

from .edge.webdriver import WebDriver as Edge  # noqa

from .opera.webdriver import WebDriver as Opera  # noqa

from .safari.webdriver import WebDriver as Safari  # noqa

from .blackberry.webdriver import WebDriver as BlackBerry  # noqa

from .phantomjs.webdriver import WebDriver as PhantomJS  # noqa

from .android.webdriver import WebDriver as Android  # noqa

from .webkitgtk.webdriver import WebDriver as WebKitGTK # noqa

from .webkitgtk.options import Options as WebKitGTKOptions # noqa

from .remote.webdriver import WebDriver as Remote  # noqa

from .common.desired_capabilities import DesiredCapabilities  # noqa

from .common.action_chains import ActionChains  # noqa

from .common.touch_actions import TouchActions  # noqa

from .common.proxy import Proxy  # noqa

driver = webdriver.Chrome()：启动Chrome浏览器程序

此命令为将webdriver.Chrome()实例化，类的实例化会自动调用__init__()方法

路径： webdriver.chrome.webdriver

源码：

import warnings

from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver

from .remote_connection import ChromeRemoteConnection

from .service import Service

from .options import Options

class WebDriver(RemoteWebDriver):        #继承selenium.webdriver.remote.webdriver父类

    """

    Controls the ChromeDriver and allows you to drive the browser.    #控制ChromeDriver并允许您驱动浏览器

    You will need to download the ChromeDriver executable from        #ChromeDriver下载地址

    http://chromedriver.storage.googleapis.com/index.html

    """

    #子类构造方法

    def __init__(self, executable_path="chromedriver", port=0,

                 options=None, service_args=None,

                 desired_capabilities=None, service_log_path=None,

                 chrome_options=None, keep_alive=True):

        """

        Creates a new instance of the chrome driver.                                #创建一个新的chrome驱动实例

        Starts the service and then creates new instance of chrome driver.        #启动服务，然后创建新的chrome驱动实例

        :Args:

         - executable_path - path to the executable. If the default is used it assumes the executable is in the $PATH        #可执行文件（chromedriver）的路径。如果使用默认值，则假定可执行文件位于$PATH中

         - port - port you would like the service to run, if left as 0, a free port will be found.        #您希望服务运行的端口，如果保持为0，将找到一个空闲端口

         - options - this takes an instance of ChromeOptions        #这是ChromeOptions的一个实例

         - service_args - List of args to pass to the driver service        #要传递给驱动程序服务的参数列表

         - desired_capabilities - Dictionary object with non-browser specific  capabilities only, such as "proxy" or "loggingPref".        #只具有非浏览器特定功能的Dictionary对象，如“代理”或“loggingPref”

         - service_log_path - Where to log information from the driver.        #从何处记录来自驱动程序的信息

         - chrome_options - Deprecated argument for options        #不推荐的选项参数

         - keep_alive - Whether to configure ChromeRemoteConnection to use HTTP keep-alive.        #是否配置ChromeRemoteConnection来使用HTTP keep-alive。

        """

        if chrome_options:

            warnings.warn('use options instead of chrome_options',

                          DeprecationWarning, stacklevel=2)

            options = chrome_options

        if options is None:

            # desired_capabilities stays as passed in

            if desired_capabilities is None:

                #创建一个浏览器的字典对象

                desired_capabilities = self.create_options().to_capabilities()       #create_options()方法返回Option类,调用Opton类中的to_capabilities()方法

        else:

            if desired_capabilities is None:

                desired_capabilities = options.to_capabilities()

            else:

                desired_capabilities.update(options.to_capabilities())

        #实例化一个Service对象

        self.service = Service(

            executable_path,

            port=port,

            service_args=service_args,

            log_path=service_log_path)

        self.service.start()                    #调用了父类service的start方法

        #try/except异常处理

        try:

            #调用Webdriver父类构造方法

            RemoteWebDriver.__init__(

                self,

                #传入ChromeRemoteConnection实例化对象

                command_executor=ChromeRemoteConnection(

                    remote_server_addr=self.service.service_url,

                    keep_alive=keep_alive),

                #传入上面获取的浏览器字典对象

                desired_capabilities=desired_capabilities)

        except Exception:

            self.quit()

            raise

        self._is_remote = False

首先创建一个浏览器的字典对象，然后调用Opton类中的to_capabilities()方法，方法是返回caps字典对象，由一下代码可看出，chrome浏览器返回的caps字典对象默认为：

{

"browserName":"chrome",

"version":"",

"platform":"ANY",

" goog:chromeOptions": {'extensions': [], 'args': []}

}

class Options(object):

    KEY = "goog:chromeOptions"

    def __init__(self):

        self._binary_location = ''

        self._arguments = []

        self._extension_files = []

        self._extensions = []

        self._experimental_options = {}    #定义空字典

        self._debugger_address = None

        self._caps = DesiredCapabilities.CHROME.copy()    #让self._caps指向复制CHROME的字典对象  

#-----------中间代码省略-----------

def to_capabilities(self):

        """

            Creates a capabilities with all the options that have been set and

            returns a dictionary with everything

        """

        caps = self._caps         #让caps指向self._caps

        chrome_options = self.experimental_options.copy()       #定义chrome_options为一个空字典

        chrome_options["extensions"] = self.extensions    #将extensions键和值添加到chrome_options字典中

        if self.binary_location:

            chrome_options["binary"] = self.binary_location

        chrome_options["args"] = self.arguments        #将args键和值添加到chrome_options字典中

        if self.debugger_address:

            chrome_options["debuggerAddress"] = self.debugger_address

        caps[self.KEY] = chrome_options    #将KEY键和值添加到caps字典中

        return caps    #返回caps字典

aaarticlea/png;base64,iVBORw0KGgoAAAANSUhEUgAAARsAAABeCAYAAAATz7BKAAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAq/SURBVHhe7Z1dkuogEIVnXS7IdcwSfHMVvvp8y0XMi0twEbl9Gkga8ocaWxPPV0VVQggQQp80jDA/DSGEOECxIYS4QLEhhLhAsXkD5/2x+fn5bfbnGEHIF1AtNtfDvtn9/IiRIOzEUK7NYbdrDldcPTf7dG130PSHXUq7l6tIso/nedgNWNz1fGj27f1Ic2i0GCHluwsFt1wPu5Ae5Y+UpSHW721cL9KOFBryfVSJDYSmsO3mLHEQnTYeBl5Y0Hn/kxvV9dDsrLFfIViFcKhQSL4iZoGrRImQyH0hJgrbbt8KENIEEYrCBsqywFCcN+eT1JNiQ76PCrER4x4x0M6zER4RG5DddxUhMXkaurwgNvtmb/OWPHZ7iRsRG3g9Kmh3is15/6vCMBr2fzHlHVBsyJcyLzYw0BrLUI/EDFdiyG4tjf0KIcOQLJ1PiEErSkFsziYtPJ8D8irFxtSjHHa9i+sBYnMcFFRCtsyyYlOkG/RsjADkwyXhHrGRI/WsDumeAbGJebWezZ0s7dlcD2Fi+Gd3MUNAQr6DumGUNeIxasXGiMkZIiDnneHVD6PCYfCmevFgSrjeyFWHUfRsyPdRNUEMo85FAUYDYzbC8IDYAP0rkk2kAmI9nqEJ4iHxqxObbJ7pHXDOhnwpdWIjBHExcyA7MWwrNGloFA28+9N3FBybJsUp/b8k9f/0fQ5CAwExeaShUftnbw0YWuVl5YFiQ8g7qBYbshD8nQ35Uig2b4C/ICbfCMWGEOICxYYQ4gLFhhDiAsWGEOJCldj8+/cvHhFCyGPQsyGEuEDPhhDiwko9m7/m9Pvb/P4em8vtZo7j1RPOf5vTAztAkFcw/b6W5fmybpej9p/jayr4Qjzb+X5W6tmkhjzJUdNcjuHYagsE5xvFJgmtbY/QPjEcLzHWk/n3tRzLlAXBWZ/YeLbz/ax2zkYbMhqOGpgc267xrWIDWsGxDfB3ys+dmXtfS7JEWesUG992vpfVztloQ0bjUbe3MCRcP/3dmsspuMQIx1Pe8MldVvW/iQuqXwK8IJzHRMLf5dQc0zVxS49SVrqs9Uh5xDiJjV8YCW29bpK2qwvyOcl53qFNHVKaosPX1Dk8O+pgXOhSbG4XudfUR+6/dBm0cSF/qQfyi2Xh+Tvm6wzm3lci7fmzO/TzqKWmrJs847GtM54J5907xH3Hy1/0DlKaPJ+n+88b2/kdrNazmSOIgBibeeu3v4u89FLp5SXqC8/TJiAQEC2LdlTzEi+4F0li5wmXkG9wZxVcQ9ltRBAfKzYX6fBlFVBWv1PN1TnUQTtbel6Un3W8m6mLAPGxginnR+38kkiPg3iX6errXMcSYjOHCom0hS1B+0YhNsH4YyppLAhP1oTKk/3nTe38Dlbr2cyRDK6kHx9EYSCpINdG3NBWYARr3EeJV6NGx8kKyr2s8IUzeceO1l03QeqQM1Vn+4zBQFTQSrFBZ86+lAgmT9SnLdeWZ47vqvPnYN/dGPoui0TDferJ/rPhdi7ZtGfT7xiILzvak50F4EslJyEu5HcZ6KyWm3wpdViVKhk73fgdlqk6ozrm2bWjSr3Ele/EBvdLXPapLPKsNYLqOn8OqxSbFbZzyaY9m94wSsfO5Uub6izIZ34YpXlI50mdpi3b3IY4zAGY2shYPu/QOqySfG1pKOtxzyYQhgRSpxSpAiT3p4KS8Nk8a4wAZ9V1rsNtGFX0Awyj8A7T61hGbEL7zA6j3tDOyH8v7ey5H/YmPZs0qYeXPD5BHIYY6VoX+h1HJ/ja6/2XDtrhCkAnkHxsGu2oOgnZlTWUT17W/XUOQhdDa1DxT6LGUlR42/thEJgjCMfqBaVrmgc6Ps5RTjruDHO6zvfhITYgf36ps5nUbcUZIbaZbdfwnhfoP9pPYrxzO4cdI333wt6sZ0MIGUf/c8gj//fsCTY7Z0MIGUOGUDv///BBz4YQ4gI9G0KIC/RsCCEu0LMhhLhAz4YQ4gI9G0KIC/RsvIk/+PP9hQMh72elno39dWXaMKj7peVH4yI2nu1TU9aK3xdZjJV6NqnDfuaOZO/Hs31qyuL7Iiues9EOGxeh6bqVdh0QAZ7tU1MW3xdZ7ZyNdti4SE4XzplFhrqSVr+kIYRd6+x5TAf3Xr+yKRx7mxF1i/LkSzy6G1taNW3ywSpdm1dWp/Gvui62a+tULPpMC/ek7HyBaT+3yfYxeO2MV1sfsl1W69nMI647BCGe4bzcx2SpnfFUBPClbi8F8Sm3KAggr2Gxmd2OAKhoGVGUQuE1PGq7XqusCVmtZ1MDvqB224e+0QbPoBeiu98xLhABMfjCsxlf/j+WF7ym4Xsykcz2PwnAa6CjQD6dDXs2IBg27LS3O1s02mFBKJkTm5zeLnwZFBvynWzaswHq3WDTqgFrrN/9bFpsYOxzu/B1jOdVPYxaUGw+bxjlv4Mc8WHjno0Qh0tjxvjsznhAjX1mF77hfCSUwjG2qxuAEJr7EI+yU9ywuE3zcWLzhh3kiA+b92zIunjHDnLEh+17NmRFvGcHOeIDPRtCiAv0bAghLtCzIYS4QM+GEOICPRtCiAv0bAghLtCzIYS4sFLPBj/5D7+wfWrnt/jr4vAL3Pq1T8+BNVCpTAnlmi03FmrDKjzLquHT6vMdrNSzSR1kqZ3f0PmeE5tshfkEaR3V+1m6DaeYL6tczlEuL+mWZXT3Zfdg2YdZzmFX+/fiXJ+dJFY7Z6MdJK4r0o4Y1wo9hq/YPLpocmmWbcNpasvSFe6X/mJT0AqObUCIiT0fWM0fFrjGk4jns5PAaudstIPEHlTu/Ba+VDBqEZF4rF+2u/eYEaTznsQAUh66S55dmZ0NxWwo3HK7iNKEUqAmd+oT9Fn1mtS33DlQ7k3HIR5G1qXJVo8LU21o8drNT9syCkBvSxABeYRdF821UmyAxKVnHfsI1D47WY7VejZzaGeCAcqYvGXgqxeYEBtJbbNAHkNpxzp1STCYeFJQtcWEgvri+cToyp0DVfxiPlEI0/Gj3pvXyvCsbQZEJF1XcUjvcUhshLB9SBBr8hms1rOZY8yo0Qn7mjAhNjBS4x2F8AqxgQcy7Hn1v/LT9e2GIDbdxD0fQfn82N4jr2/XdmHrD23vEbH5/Of9Pjbt2QyLzVD8WMdEfOk9DKel2DyHeiuZoIdg2zRrO/XapF0wdKTYrIJNezZPD6O0QyOPdJ7+i0I/rYpNZwm6U1/6a4dlXGxw7Z5h1IghvUBsXj+MuonQDtVN6mzeV9l2rUC9VGy4c+BSbNyzyTciLyeI00RyL7TGig6d75x3+sPcRzjueRsmPy3LXB/7cpd2MrlTnxwN1zkaFYYUKU6NFAaXrqfjst7zvFRsVNBjnSW07WGfBe1gn7sVoPgn7KwR03PacP8zt3DnwMX4ujkbQu6BOwcux9fN2RBSD3cOXJJNejb5UOMJF5oQshib9WwIIZ/FZudsCCGfBT0bQogLFBtCiAsUG0KICxQbQogL9WKjv6T8oP8JTQhZFfd5NvzpNiHkQSg2hBAXKDaEEBceEpv9mfM2hJD7uE9ssDANe3tI2J9jFCGEVPCgZxPPCSGkEs7ZEEJcoNgQQlyg2BBCXKgXGxUa/oKYEPIY93k2hBDyIBQbQogLFBtCiAsUG0KICxQbQogLFBtCiAsUG0KICxQbQogDTfMfB1Fg7SHy/PMAAAAASUVORK5CYII=" alt="" />

再看在webdriver.chrome.remote_connection下的ChromeRemoteConnection类，继承了在webdriver.remote.remote_connection下的RemoteConnection，调用父类构造方法往self._commands里添加几个command键值对。

子类ChromeRemoteConnection源码：

from selenium.webdriver.remote.remote_connection import RemoteConnection

class ChromeRemoteConnection(RemoteConnection):

    def __init__(self, remote_server_addr, keep_alive=True):

        #调用父类构造方法

        RemoteConnection.__init__(self, remote_server_addr, keep_alive)

        #向self._commands字典中添加几个键值对

        self._commands["launchApp"] = ('POST', '/session/$sessionId/chromium/launch_app')

        self._commands["setNetworkConditions"] = ('POST', '/session/$sessionId/chromium/network_conditions')

        self._commands["getNetworkConditions"] = ('GET', '/session/$sessionId/chromium/network_conditions')

        self._commands['executeCdpCommand'] = ('POST', '/session/$sessionId/goog/cdp/execute')

父类构造方法中主要是把localhost域名换成127.0.0.1，通过urllib.parse.urlparse把url解析成6部分，urlparse返回的是一个名字元祖对象 scheme, netloc, path, params, query, fragment。netloc包括hostname和port。调用common_utils.find_connectable_ip()方法获取hostname对应的ip地址，最后urllib.parse.urlunparse()重新组成url并赋值给self._url

初始化里self._commands字典，value为具体执行的命令的字典。

RemoteConnection类的实例方法execute调用 _request方法最终实现发送命令到远程服务器。他们是通过wire protocol有线协议这种协议是点对点方式进行通信的。首先前端将这个点击转换成json格式的字符串，然后通过wire protocl协议传递给服务器。

父类RemoteConnection源码：

import base64

import logging

import platform

import socket

import string

import urllib3

try:

    from urllib import parse

except ImportError:  # above is available in py3+, below is py2.7

    import urlparse as parse

from selenium.webdriver.common import utils as common_utils

from selenium import __version__

from .command import Command

from .errorhandler import ErrorCode

from . import utils

LOGGER = logging.getLogger(__name__)

class RemoteConnection(object):

    """

    A connection with the Remote WebDriver server.            #与远程Web驱动程序服务器的连接

    Communicates with the server using the WebDriver wire protocol:        #使用WebDriver线路协议与服务器通信

    https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol

    """

    _timeout = socket._GLOBAL_DEFAULT_TIMEOUT

    #-----------中间代码省略-----------

    #父类构造方法

    def __init__(self, remote_server_addr, keep_alive=False, resolve_ip=True):

        # Attempt to resolve the hostname and get an IP address.      #尝试解析主机名并获取 IP 地址

        self.keep_alive = keep_alive

        parsed_url = parse.urlparse(remote_server_addr)

        if parsed_url.hostname and resolve_ip:

            port = parsed_url.port or None

            if parsed_url.scheme == "https":

                ip = parsed_url.hostname

            elif port and not common_utils.is_connectable(port, parsed_url.hostname):

                ip = None

                LOGGER.info('Could not connect to port {} on host '

                            '{}'.format(port, parsed_url.hostname))

            else:

                ip = common_utils.find_connectable_ip(parsed_url.hostname,

                                                      port=port)

            if ip:

                netloc = ip

                if parsed_url.port:

                    netloc = common_utils.join_host_port(netloc,

                                                         parsed_url.port)

                if parsed_url.username:

                    auth = parsed_url.username

                    if parsed_url.password:

                        auth += ':%s' % parsed_url.password

                    netloc = '%s@%s' % (auth, netloc)

                remote_server_addr = parse.urlunparse(

                    (parsed_url.scheme, netloc, parsed_url.path,

                     parsed_url.params, parsed_url.query, parsed_url.fragment))

            else:

                LOGGER.info('Could not get IP address for host: %s' %

                            parsed_url.hostname)

        self._url = remote_server_addr

        if keep_alive:

            self._conn = urllib3.PoolManager(timeout=self._timeout)

        self._commands = {

            Command.STATUS: ('GET', '/status'),

            Command.NEW_SESSION: ('POST', '/session'),

            Command.GET_ALL_SESSIONS: ('GET', '/sessions'),

            Command.QUIT: ('DELETE', '/session/$sessionId'),

            Command.GET_CURRENT_WINDOW_HANDLE:

                ('GET', '/session/$sessionId/window_handle'),

            Command.W3C_GET_CURRENT_WINDOW_HANDLE:

                ('GET', '/session/$sessionId/window'),

            Command.GET_WINDOW_HANDLES:

                ('GET', '/session/$sessionId/window_handles'),

            Command.W3C_GET_WINDOW_HANDLES:

                ('GET', '/session/$sessionId/window/handles'),

            #-----------中间代码省略-----------

        }

    #最终发送命令道远程服务器的方法

    def execute(self, command, params):

        """

        Send a command to the remote server.        #向远程服务器发送命令

        Any path subtitutions required for the URL mapped to the command should be included in the command parameters.    #映射到命令的 URL 所需的任何路径子项都应包含在命令参数中

        :Args:

         - command - A string specifying the command to execute.    #指定要执行的命令的字符串

         - params - A dictionary of named parameters to send with the command as its JSON payload.    #命名参数的字典，用于将命令作为 JSON 负载发送

        """

        command_info = self._commands[command]

        assert command_info is not None, 'Unrecognised command %s' % command

        path = string.Template(command_info[1]).substitute(params)

        if hasattr(self, 'w3c') and self.w3c and isinstance(params, dict) and 'sessionId' in params:

            del params['sessionId']

        data = utils.dump_json(params)

        url = '%s%s' % (self._url, path)

        return self._request(command_info[0], url, body=data)

    #返回带JSON解析的字典

    def _request(self, method, url, body=None):

        """

        Send an HTTP request to the remote server.    #向远程服务器发送 HTTP 请求

        :Args:

         - method - A string for the HTTP method to send the request with.    #要发送请求的 HTTP 方法的字符串

         - url - A string for the URL to send the request to.        #要将请求发送到的 URL 的字符串

         - body - A string for request body. Ignored unless method is POST or PUT.    #请求正文的字符串。忽略，除非方法是 POST 或 PUT

        :Returns:

          A dictionary with the server's parsed JSON response.        #包含服务器解析 JSON 响应的字典

        """

        LOGGER.debug('%s %s %s' % (method, url, body))

        parsed_url = parse.urlparse(url)

        headers = self.get_remote_connection_headers(parsed_url, self.keep_alive)

        resp = None

        if body and method != 'POST' and method != 'PUT':

            body = None

        if self.keep_alive:

            resp = self._conn.request(method, url, body=body, headers=headers)

            statuscode = resp.status

        else:

            http = urllib3.PoolManager(timeout=self._timeout)

            resp = http.request(method, url, body=body, headers=headers)

            statuscode = resp.status

            if not hasattr(resp, 'getheader'):

                if hasattr(resp.headers, 'getheader'):

                    resp.getheader = lambda x: resp.headers.getheader(x)

                elif hasattr(resp.headers, 'get'):

                    resp.getheader = lambda x: resp.headers.get(x)

        data = resp.data.decode('UTF-8')

        try:

            if 300 <= statuscode < 304:

                return self._request('GET', resp.getheader('location'))

            if 399 < statuscode <= 500:

                return {'status': statuscode, 'value': data}

            content_type = []

            if resp.getheader('Content-Type') is not None:

                content_type = resp.getheader('Content-Type').split(';')

            if not any([x.startswith('image/png') for x in content_type]):

                try:

                    data = utils.load_json(data.strip())

                except ValueError:

                    if 199 < statuscode < 300:

                        status = ErrorCode.SUCCESS

                    else:

                        status = ErrorCode.UNKNOWN_ERROR

                    return {'status': status, 'value': data.strip()}

                # Some of the drivers incorrectly return a response

                # with no 'value' field when they should return null.

                if 'value' not in data:

                    data['value'] = None

                return data

            else:

                data = {'status': 0, 'value': data}

                return data

        finally:

            LOGGER.debug("Finished Request")

            resp.close()

urlparse()方法源码：

def urlparse(url, scheme='', allow_fragments=True):

    """Parse a URL into 6 components:       将 URL 解析为 6 个组件

    <scheme>://<netloc>/<path>;<params>?<query>#<fragment>

    Return a 6-tuple: (scheme, netloc, path, params, query, fragment).   返回6元组

    Note that we don't break the components up in smaller bits

    (e.g. netloc is a single string) and we don't expand % escapes."""

    url, scheme, _coerce_result = _coerce_args(url, scheme)

    splitresult = urlsplit(url, scheme, allow_fragments)

    scheme, netloc, url, query, fragment = splitresult

    if scheme in uses_params and ';' in url:

        url, params = _splitparams(url)

    else:

        params = ''

    result = ParseResult(scheme, netloc, url, params, query, fragment)

    return _coerce_result(result)

Webdriver类构造方法中，实例化webdriver/chrome/service.py中的Service类，自动调用Service类中的构造方法__init__()，此Service类为继承webdriver/common/service.py中Service类的子类

from selenium.webdriver.common import service

class Service(service.Service):                #继承 selenium.webdriver.common.service中的Service类

    """

    Object that manages the starting and stopping of the ChromeDriver        #管理 Chrome驱动程序的启动和停止的对象

    """

    def __init__(self, executable_path, port=0, service_args=None,

                 log_path=None, env=None):

        """

        Creates a new instance of the Service            #创建新的服务实例

        :Args:

         - executable_path : Path to the ChromeDriver

         - port : Port the service is running on

         - service_args : List of args to pass to the chromedriver service

         - log_path : Path for the chromedriver service to log to

        """

        self.service_args = service_args or []

        if log_path:

            self.service_args.append('--log-path=%s' % log_path)            #%s为格式化字符串，具体学习python语法

        #调用父类构造方法

        service.Service.__init__(self, executable_path, port=port, env=env,

                                 start_error_message="Please see https://sites.google.com/a/chromium.org/chromedriver/home")

    #重写父类的方法

    def command_line_args(self):

        return ["--port=%d" % self.port] + self.service_args

Service父类源码：

import errno

import os

import platform

import subprocess

from subprocess import PIPE

import time

from selenium.common.exceptions import WebDriverException

from selenium.webdriver.common import utils

try:

    from subprocess import DEVNULL

    _HAS_NATIVE_DEVNULL = True

except ImportError:

    DEVNULL = -3

    _HAS_NATIVE_DEVNULL = False

class Service(object):

    def __init__(self, executable, port=0, log_file=DEVNULL, env=None, start_error_message=""):

        self.path = executable

        #默认自动获取一个端口

        self.port = port

        if self.port == 0:

            self.port = utils.free_port()

        if not _HAS_NATIVE_DEVNULL and log_file == DEVNULL:

            log_file = open(os.devnull, 'wb')

        self.start_error_message = start_error_message

        self.log_file = log_file

        #默认获取环境变量，当env为true时，self.env = env ，否则self.env指向 os.environ (系统环境变量)

        self.env = env or os.environ

    @property                #装饰器，具体学习python内置函数property

    def service_url(self):

        """

        Gets the url of the Service        #获取服务的URL

        """

        return "http://%s" % utils.join_host_port('localhost', self.port)

    def command_line_args(self):

        raise NotImplemented("This method needs to be implemented in a sub class")

    def start(self):

        """

        Starts the Service.        #启动服务

        :Exceptions:

         - WebDriverException : Raised either when it can't start the service                #当服务无法启动服务或无法连接到服务时引发

           or when it can't connect to the service

        """

        #启动chromedriver程序，具体深入可学习subprocess.Popen方法

        try:

            cmd = [self.path]

            cmd.extend(self.command_line_args())

            self.process = subprocess.Popen(cmd, env=self.env,

                                            close_fds=platform.system() != 'Windows',

                                            stdout=self.log_file,

                                            stderr=self.log_file,

                                            stdin=PIPE)

        except TypeError:

            raise

        except OSError as err:

            if err.errno == errno.ENOENT:

                raise WebDriverException(

                    "'%s' executable needs to be in PATH. %s" % (

                        os.path.basename(self.path), self.start_error_message)

                )

            elif err.errno == errno.EACCES:

                raise WebDriverException(

                    "'%s' executable may have wrong permissions. %s" % (

                        os.path.basename(self.path), self.start_error_message)

                )

            else:

                raise

        except Exception as e:

            raise WebDriverException(

                "The executable %s needs to be available in the path. %s\n%s" %

                (os.path.basename(self.path), self.start_error_message, str(e)))

        count = 0

        #检测是否subprocess进程是否还在，不在则抛出异常

        #检测是否http协议是否链接，若无法链接等待30秒抛出异常

        while True:

            self.assert_process_still_running()

            if self.is_connectable():

                break

            count += 1

            time.sleep(1)

            if count == 30:

                raise WebDriverException("Can not connect to the Service %s" % self.path)

    def assert_process_still_running(self):

        return_code = self.process.poll()

        if return_code is not None:

            raise WebDriverException(

                'Service %s unexpectedly exited. Status code was: %s'

                % (self.path, return_code)

            )

    #判断是否正在连接，等待30秒后抛出webdriver异常

    def is_connectable(self):

        return utils.is_connectable(self.port)

由上代码可知Service实例化后，会自动获取一个随机端口，然后调用star()方法，该方法用subprocess启动chromedrive程序，并检测是否正在连接（subprocess源码路径在Lib下），再看chrome的WebDriver类，继承了selenium.webdriver.remote.webdriver中的WebDriver类。
RemoteWebDriver类的构造方法，更新capabilities字典，主要调用start_session传入capabilities字典。start_session方法，根据capabilities字典创建一个新的会话并获取session_id。另外还实例化了错误处理handle，文件查找file_detector（默认实例化是LocalFileDetector）。一个页面切换的SwitchTo对象。

源码：

class WebDriver(object):

    """

    Controls a browser by sending commands to a remote server.                #通过向远程服务器发送命令来控制浏览器。

    This server is expected to be running the WebDriver wire protocol        #此服务器将运行定义在的WebDriver连接协议（以下地址中有相应的介绍）

    as defined at

    https://github.com/SeleniumHQ/selenium/wiki/JsonWireProtocol

    :Attributes:

     - session_id - String ID of the browser session started and controlled by this WebDriver.        #由这个WebDriver启动和控制的浏览器会话的字符串ID

     - capabilities - Dictionaty of effective capabilities of this browser session as returned              #远程服务器返回的此浏览器会话的有效功能，可看以下地址

         by the remote server. See https://github.com/SeleniumHQ/selenium/wiki/DesiredCapabilities

     - command_executor - remote_connection.RemoteConnection object used to execute commands.    #remote_connection.RemoteConnection对象用于执行命令

     - error_handler - errorhandler.ErrorHandler object used to handle errors.        #用于处理错误的ErrorHandler对象。

    """

    _web_element_cls = WebElement

    def __init__(self, command_executor='http://127.0.0.1:4444/wd/hub',

                 desired_capabilities=None, browser_profile=None, proxy=None,

                 keep_alive=False, file_detector=None, options=None):

        """

        Create a new driver that will issue commands using the wire protocol.        #创建一个使用wire协议发出命令的新驱动程序。

        :Args:

         - command_executor - Either a string representing URL of the remote server or a custom        #表示远程服务器URL的字符串或自定义的remote_connection。

             remote_connection.RemoteConnection object. Defaults to 'http://127.0.0.1:4444/wd/hub'.

         - desired_capabilities - A dictionary of capabilities to request when            #启动浏览器会话时请求的功能字典。

             starting the browser session. Required parameter.

         - browser_profile - A selenium.webdriver.firefox.firefox_profile.FirefoxProfile object.

             Only used if Firefox is requested. Optional.         #selenium.webdriver.firefox.firefox_profile。FirefoxProfile对象。仅在请求Firefox时使用

         - proxy - A selenium.webdriver.common.proxy.Proxy object. The browser session will

             be started with given proxy settings, if possible. Optional.      #一个selenium.webdriver.common.proxy.Proxy对象。如果可能，浏览器会话将使用给定的代理设置启动。可选的。

         - keep_alive - Whether to configure remote_connection.RemoteConnection to use

             HTTP keep-alive. Defaults to False.        #是否配置remote_connection。RemoteConnection使用HTTP keep-alive。默认值为False。

         - file_detector - Pass custom file detector object during instantiation. If None,

             then default LocalFileDetector() will be used.            #在实例化期间传递自定义文件检测器对象。如果没有，则使用默认的LocalFileDetector()。

         - options - instance of a driver options.Options class        #驱动程序实例options.Options类

        """

        capabilities = {}

        if options is not None:

            capabilities = options.to_capabilities()

        if desired_capabilities is not None:

            if not isinstance(desired_capabilities, dict):

                raise WebDriverException("Desired Capabilities must be a dictionary")

            else:

                #更新capabilities字典，desired_capabilities参数为子类WebDriver调用父类时传入

                capabilities.update(desired_capabilities)

        if proxy is not None:

            warnings.warn("Please use FirefoxOptions to set proxy",

                          DeprecationWarning, stacklevel=2)

            proxy.add_to_capabilities(capabilities)

        self.command_executor = command_executor

        if type(self.command_executor) is bytes or isinstance(self.command_executor, str):

            self.command_executor = RemoteConnection(command_executor, keep_alive=keep_alive)

        self._is_remote = True

        #控制浏览器会话的字符串id

        self.session_id = None

        self.capabilities = {}

        self.error_handler = ErrorHandler()

        self.start_client()

        if browser_profile is not None:

            warnings.warn("Please use FirefoxOptions to set browser profile",

                          DeprecationWarning, stacklevel=2)

        #核心代码，开始一个会话

        self.start_session(capabilities, browser_profile)

        #实例化页面切换对象

        self._switch_to = SwitchTo(self)

        self._mobile = Mobile(self)

        #默认实例化LocalFileDetector对象

        self.file_detector = file_detector or LocalFileDetector()

    def start_session(self, capabilities, browser_profile=None):

        """

        Creates a new session with the desired capabilities.

        :Args:

         - browser_name - The name of the browser to request.

         - version - Which browser version to request.

         - platform - Which platform to request the browser on.

         - javascript_enabled - Whether the new session should support JavaScript.

         - browser_profile - A selenium.webdriver.firefox.firefox_profile.FirefoxProfile object. Only used if Firefox is requested.

        """

        if not isinstance(capabilities, dict):

            raise InvalidArgumentException("Capabilities must be a dictionary")

        if browser_profile:

            if "moz:firefoxOptions" in capabilities:

                capabilities["moz:firefoxOptions"]["profile"] = browser_profile.encoded

            else:

                capabilities.update({'firefox_profile': browser_profile.encoded})

        """

        _make_w3c_caps return dict

            {

               "firstMatch": [{}],

               "alwaysMatch": {

                   'browserName': 'chrome',

                   'version': '',

                   'platformName': 'any',

                   'goog:chromeOptions': {'extensions': [], 'args': []}

                   }

             }

        """

        w3c_caps = _make_w3c_caps(capabilities)

        parameters = {"capabilities": w3c_caps,

                      "desiredCapabilities": capabilities}

        response = self.execute(Command.NEW_SESSION, parameters)

        if 'sessionId' not in response:

            response = response['value']

        #获取session_id

        self.session_id = response['sessionId']

        self.capabilities = response.get('value')

        # if capabilities is none we are probably speaking to

        # a W3C endpoint

        if self.capabilities is None:

            self.capabilities = response.get('capabilities')

        # Double check to see if we have a W3C Compliant browser

        self.w3c = response.get('status') is None

        self.command_executor.w3c = self.w3c

def _make_w3c_caps(caps):

    """Makes a W3C alwaysMatch capabilities object.

    Filters out capability names that are not in the W3C spec. Spec-compliant

    drivers will reject requests containing unknown capability names.

    Moves the Firefox profile, if present, from the old location to the new Firefox

    options object.

    :Args:

     - caps - A dictionary of capabilities requested by the caller.

    """

    #深拷贝

    caps = copy.deepcopy(caps)

    #浏览器为chrome，profile为None

    profile = caps.get('firefox_profile')

    always_match = {}

    if caps.get('proxy') and caps['proxy'].get('proxyType'):

        caps['proxy']['proxyType'] = caps['proxy']['proxyType'].lower()

    for k, v in caps.items():

        #如果caps的key在_OSS_W3C_CONVERSION key中，而且caps可key对应的值不为空

        if v and k in _OSS_W3C_CONVERSION:

            #always_match的key为_OSS_W3C_CONVERSION字典的值，value是caps字典的值

            always_match[_OSS_W3C_CONVERSION[k]] = v.lower() if k == 'platform' else v

        if k in _W3C_CAPABILITY_NAMES or ':' in k:

            always_match[k] = v

    if profile:

        moz_opts = always_match.get('moz:firefoxOptions', {})

        # If it's already present, assume the caller did that intentionally.

        if 'profile' not in moz_opts:

            # Don't mutate the original capabilities.

            new_opts = copy.deepcopy(moz_opts)

            new_opts['profile'] = profile

            always_match['moz:firefoxOptions'] = new_opts

    return {"firstMatch": [{}], "alwaysMatch": always_match}

_OSS_W3C_CONVERSION = {

    'acceptSslCerts': 'acceptInsecureCerts',

    'version': 'browserVersion',

    'platform': 'platformName'

}

    #通过self.command_executor.execute发送cmd命令到远程服务器达到控制浏览器的目标

    def execute(self, driver_command, params=None):

        """

        Sends a command to be executed by a command.CommandExecutor.

        :Args:

         - driver_command: The name of the command to execute as a string.

         - params: A dictionary of named parameters to send with the command.

        :Returns:

          The command's JSON response loaded into a dictionary object.

        """

        if self.session_id is not None:

            if not params:

                params = {'sessionId': self.session_id}

            elif 'sessionId' not in params:

                params['sessionId'] = self.session_id

        #数据封包

        params = self._wrap_value(params)

        #核心代码 执行cmmand_executor实例对象的execute方法

        response = self.command_executor.execute(driver_command, params)

        if response:

            self.error_handler.check_response(response)

            #数据解包

            response['value'] = self._unwrap_value(

                response.get('value', None))

            return response

        # If the server doesn't send a response, assume the command was

        # a success

        return {'success': 0, 'value': None, 'sessionId': self.session_id}

driver.get('https://www.baidu.com')调用的是webdriver/remote/webdriver.py下的get方法

get方法调用了remote_connection.py中execute的方法，remote_connection.py中execute的方法中self.command_executor.execute实际调用的是RemoteConnection.py的execute方法。

实际上是一个HTTP request给监听端口上的Web Service，在我们的HTTP request的body中，会以WebDriver Wire协议规定的JSON格式的字符串来告诉Selenium我们希望浏览器打开https://www.baidu.com页面

#selenium/webdriver/remote/webdriver.py

    def get(self, url):

        """

        Loads a web page in the current browser session.

        """

        #Command.GET: ('POST', '/session/$sessionId/url'),

        self.execute(Command.GET, {'url': url})

总结一下：

首先是webdriver实例化Service 类调用start()方法用subprocess启动chromedriver(带--port参数)驱动。chromedriver启动之后都会在绑定的端口启动Web Service。

接着实例化RemoteConnection获得 command_executor实例化对象传入给RemoteWebDriver构造方法。

RemoteWebDriver构造方法 start_session()方法启动session并获得唯一的session_id，通过这个session_id来确定找到对方且在多线程并行的时候彼此之间不会有冲突和干扰）

接下来调用WebDriver的任何API，比如get() 都需要借助一个ComandExecutor（remote_connection类的实例对象）调用execute()发送一个命令(这个命令在ComandExecutor实例化时候生成的一个command字典)。

self._commands = {

        Command.STATUS: ('GET', '/status'),

        Command.NEW_SESSION: ('POST', '/session'),

        Command.GET_ALL_SESSIONS: ('GET', '/sessions'),

        Command.QUIT: ('DELETE', '/session/$sessionId'),

        Command.GET_CURRENT_WINDOW_HANDLE:

            ('GET', '/session/$sessionId/window_handle'),

        Command.W3C_GET_CURRENT_WINDOW_HANDLE:

            ('GET', '/session/$sessionId/window'),

        Command.GET_WINDOW_HANDLES:

            ('GET', '/session/$sessionId/window_handles'),

  #.................省略.....................

}

ComandExecutor中的execute()方法最后返回一个_request()方法，实际上是一个HTTP request给监听端口上的Web Service。

在HTTP request的body中，Wire JSON格式字典来告诉chromedriver接下来做什么事。（通过之前绑定的端口）

实际的执行者是chromedriver驱动，而selenium就相当于一个代理。所以selenium并不是直接操控浏览器而是运行webdriver，通过webdriver间接操控浏览器。

参考原文地址： https://www.cnblogs.com/jiang-cheng/p/9914803.html

Selenium原理的更多相关文章

Selenium原理初步--Android自动化测试学习历程
章节:自动化基础篇——Selenium原理初步(第五讲) 注:其实所有的东西都是应该先去用,但是工具基本都一样,底层都是用的最基础的内容实现的,测试应该做的是: (1)熟练使用工具,了解各个工具的利弊 ...
selenium + python自动化测试unittest框架学习（一）selenium原理及应用
unittest框架的学习得益于虫师的<selenium+python自动化实践>这一书,该书讲得很详细,大家可以去看下,我也只学到一点点用于工作中,闲暇时记录下自己所学才能更加印象深刻. ...
【Selenium01篇】python+selenium实现Web自动化：搭建环境，Selenium原理，定位元素以及浏览器常规操作！
一.前言最近问我自动化的人确实有点多,个人突发奇想:想从0开始讲解python+selenium实现Web自动化测试,请关注博客持续更新! 二.话不多说,直接开干,开始搭建自动化测试环境这里以前在 ...
web自动化：selenium原理和元素定位(一)
一. Selenium2 WebDriver 当Selenium2.x提出了WebDriver的概念后,它提供了完全另外的一种方式与浏览器交互那就是利用浏览器原生的API,封装成一套更加面向对象的S ...
selenium原理应用 - 利用requests模拟selenium驱动浏览器
前言 selenium是一个web自动化测试的开源框架,它支持多语言:python/java/c#… 前面也有一篇文章说明了,selenium+浏览器的环境搭建. selenium支持多语言,是因为s ...
selenium原理解析
相信很多测试小伙伴儿都听过或者使用过web自动化selenium,那您有没有研究过selenium的原理呢?为什么要使用webdriver.exe,webdriver.exe是干啥用的?seleniu ...
selenium原理学习笔记
一,selenium工作原理(参考文档:https://blog.csdn.net/dawei_yang000000/article/details/87639928) 自动化测试代码发送请求给到浏览 ...
【java+selenium3】自动化基础小结+selenium原理揭秘 (十七)
一.自动化实现原理 1.创建驱动对象 (1) 首先加载浏览器安装目录下的exe文件 (2) 其次是加载可执行驱动的exe文件,监听等待客户端发送的web service请求. 底层原理如下: 1. ...
selenium原理和尝试
引用文章:https://www.cnblogs.com/Albert-Lee/p/6238866.html Selenium是一个自动化测试框架.因为它能够模拟人工操作,比如能在浏览器中点击按钮.在 ...
selenium原理（以百度搜索为例）
1.首先导入 Selenium(webdriver)相关模块2.调用 Selenium 的浏览器驱动,获取浏览器句柄(driver)并启动浏览器.3.通过句柄访问百度 URL.4.通过句柄操作页面元素 ...

随机推荐

字节码操作、javassist使用
一.功能 1.动态生成新的类 2.动态改变某个类的结构(添加.删除.修改新的属性.方法) 二.优势 1.比反射开销小,性能高 2.JAVAasist性能高于反射,低于ASM 使用javassis ...
就不能换DB吗？抽象工厂模式
15.1 就不能换DB吗? 15.2 最基本的数据访问程序 namespace 抽象工厂模式 { class Program { static void Main(string[] args) { U ...
[菜b]Isaunoya 的一些学习笔记…[保持咕咕咕]
fread/fwrite标记永久化分块树链剖分莫比乌斯反演斜率优化/单调队列 kruskal重构树回滚莫队可持久化线段树/trie树 Link-Cut-Tree dsu on tree F ...
Your idea evaluation has expired. Your session will be limited to 30 minutes
今天打开idea,出现了上面的话,试了网上的很多办法,获取注册码的那个方法是最常见的,那个网站现在不提供注册码了. ----两种方法-----**1)把提示框的x点掉,会自动打开idea**按最开始安 ...
剑指offer-面试题32-从上到下打印二叉树-二叉树遍历
/* 题目: 按层自上向下打印二叉树. */ /* 思路: 使用队列,将节点压入队列中,再弹出来,压入其左右子节点,循环,直到栈为空. */ #include<iostream> #inc ...
一文看懂AI深度学习丨曼孚科技
深度学习(Deep Learning)是机器学习的一种,而机器学习是实现人工智能的必经途径. 目前大部分表现优异的AI应用都使用了深度学习技术,引领了第三次人工智能的浪潮. 一. 深度学习的概念深度 ...
[大数据技术]Kettle从CSV文件读取清洗后到MySQL中文乱码问题
首先要知道CSV文件的编码格式然后在文件输入编码选择编码格式, 第二步,在每个转换或者作业的DB连接中选择选项,并添加如下内容: 中文乱码问题得到解决
Wannafly Winter Camp 2020 Day 6G 单调栈 - 贪心
对于排列 \(p\),它的单调栈 \(f\) 定义为,\(f_i\) 是以 \(p_i\) 结尾的最长上升子序列的长度先给定 \(f\) 中一些位置的值,求字典序最小的 \(p\) 使得它满足这些值 ...
CentOS8中进行IP和主机名的网络配置的过程图解
摘要: 很多人不知道如何在字符界面下配置主机名和ip,所以写了这个文章,本人也是新手,希望指出错误与不足.(本文只是在字符界面下教程) 一.输入你的账号密码登录 1)ifconfig 查看你目前的主 ...
C#效率优化（4）-- 编译器对数组遍历的优化
在平时开发过程中,数组是我们使用频率最高的类型之一,在使用定长列表时,数组可以说是最佳方案,这也是我们最熟悉的数据结构之一. 在C#中使用数组,可以获取在内存上连续的相同类型的一组变量,在连续访问时可 ...

Selenium原理

Selenium原理的更多相关文章

随机推荐

热门专题