第十章：Python高级编程-多线程、多进程和线程池编程

第十章：Python高级编程-多线程、多进程和线程池编程

10.1 Python中的GIL

"""

gil global interpreter lock (cpython)

Python中一个线程对应于C语言中的一个线程

gil是的同一时刻只有一个线程在一个cpu上执行字节码

"""

# GIL会根据执行的字节码行数以及时间片释放， GIL遇到IO操作的时候会主动释放

import dis

def add(a):

    a = a + 1

    return a

print(dis.dis(add))

# ================ demo start =====================

total = 0

def add():

    global total

    for i in range(1000000):

        total -= 1

def desc():

    global total

    for i in range(10000000):

        total -= 1

import threading

thread1 = threading.Thread(target=add)

thread2 = threading.Thread(target=desc)

thread1.start()

thread2.start()

thread1.join()

thread2.join()

print(total)  # GIL是会释放的

10.2 多线程编程-threading

# 对应IO操作来说，多线程和多进程性能差别不大

# 1.通过Thread类实例化

import time

import threading

def get_detail_html(url):

    print("get detail html started")

    time.sleep(2)

    print("get detail html end")

def get_detail_url(url):

    print("get detail url started")

    time.sleep(4)

    print("get detail url end")

#2. 通过集成Thread来实现多线程

class GetDetailHtml(threading.Thread):

    def __init__(self, name):

        super().__init__(name=name)

    def run(self):

        print("get detail html started")

        time.sleep(2)

        print("get detail html end")

class GetDetailUrl(threading.Thread):

    def __init__(self, name):

        super().__init__(name=name)

    def run(self):

        print("get detail url started")

        time.sleep(4)

        print("get detail url end")

if  __name__ == "__main__":

    thread1 = GetDetailHtml("get_detail_html")

    thread2 = GetDetailUrl("get_detail_url")

    start_time = time.time()

    thread1.start()

    thread2.start()

   	# thread1.setDaemon(True)  # 设置为守护线程，主线程结束其立刻结束

    # thread2.setDaemon(True)

    thread1.join()

    thread2.join()

    #当主线程退出的时候， 子线程kill掉

    print ("last time: {}".format(time.time()-start_time))

10.3 线程间通信-共享变量和Queue

"""

1. 线程通信方式-共享变量

"""

import threading

DETAIL_URL_LIST = []

def get_detail_html():

    # 爬取文章详情页

    global DETAIL_URL_LIST

    print("get detail html started")

    url = DEATIL_URL_LIST.pop()

    time.sleep(2)

    print("get detail html end")

def get_detail_url():

    # 爬取文章列表页

    global DETAIL_URL_LIST

    print("get detail url started")

    time.sleep(4)

    for i in range(20):

        DETAIL_URL_LIST.append("http://projectsedu.com/{id}".format(id=i))

    print("get detail url end")

if __name__ == "__main__":

    thread_detail_url = threading.Thread(target=get_detail_url)

    for i in range(10):

        html_thread = threading.Thread(target=get_detail_html)

        html_thread.start()

# =====================================================================

# 通过queue的方式进行线程间同步

from queue import Queue

import time

import threading

def get_detail_html(queue):

    # 爬取文章详情页

    while True:

        url = queue.get()  # 线程安全的，取不到阻塞

        # for url in detail_url_list:

        print("get detail html started")

        time.sleep(2)

        print("get detail html end")

def get_detail_url(queue):

    # 爬取文章列表页

    while True:

        print("get detail url started")

        time.sleep(4)

        for i in range(20):

            queue.put("http://projectsedu.com/{id}".format(id=i))

        print("get detail url end")

# 1. 线程通信方式- 共享变量

if  __name__ == "__main__":

    detail_url_queue = Queue(maxsize=1000)

    thread_detail_url = threading.Thread(target=get_detail_url, args=(detail_url_queue,))

    for i in range(10):

        html_thread = threading.Thread(target=get_detail_html, args=(detail_url_queue,))

        html_thread.start()

    # # thread2 = GetDetailUrl("get_detail_url")

    start_time = time.time()

    # thread_detail_url.start()

    # thread_detail_url1.start()

    #

    # thread1.join()

    # thread2.join()

    detail_url_queue.task_done()

    detail_url_queue.join()  # 阻塞，等待task_done

    # 当主线程退出的时候， 子线程kill掉

    print ("last time: {}".format(time.time()-start_time))

10.4 线程同步-Lock、Rlock

from threading import Lock, RLock, Condition  # 可重入的锁

# 在同一个线程里面，可以连续调用多次acquire， 一定要注意acquire的次数要和release的次数相等

total = 0

lock = RLock()

def add():

    # 1. dosomething1

    # 2. io操作

    # 1. dosomething3

    global lock

    global total

    for i in range(1000000):

        lock.acquire()

        lock.acquire()

        total += 1

        lock.release()

        lock.release()

def desc():

    global total

    global lock

    for i in range(1000000):

        lock.acquire()

        total -= 1

        lock.release()

import threading

thread1 = threading.Thread(target=add)

thread2 = threading.Thread(target=desc)

thread1.start()

thread2.start()

thread1.join()

thread2.join()

print(total)

# 1. 用锁会影响性能

# 2. 锁会引起死锁

# 死锁的情况 A（a，b）

"""

A(a、b)

acquire (a)

acquire (b)

B(a、b)

acquire (a)

acquire (b)

"""

10.5 线程同步-condition使用以及源码分析

import threading

#条件变量， 用于复杂的线程间同步

# class XiaoAi(threading.Thread):

#     def __init__(self, lock):

#         super().__init__(name="小爱")

#         self.lock = lock

#

#     def run(self):

#         self.lock.acquire()

#         print("{} : 在 ".format(self.name))

#         self.lock.release()

#

#         self.lock.acquire()

#         print("{} : 好啊 ".format(self.name))

#         self.lock.release()

#

# class TianMao(threading.Thread):

#     def __init__(self, lock):

#         super().__init__(name="天猫精灵")

#         self.lock = lock

#

#     def run(self):

#

#         self.lock.acquire()

#         print("{} : 小爱同学 ".format(self.name))

#         self.lock.release()

#

#         self.lock.acquire()

#         print("{} : 我们来对古诗吧 ".format(self.name))

#         self.lock.release()

#通过condition完成协同读诗

class XiaoAi(threading.Thread):

    def __init__(self, cond):

        super().__init__(name="小爱")

        self.cond = cond

    def run(self):

        with self.cond:

            self.cond.wait()

            print("{} : 在 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 好啊 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 君住长江尾 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 共饮长江水 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 此恨何时已 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 定不负相思意 ".format(self.name))

            self.cond.notify()

class TianMao(threading.Thread):

    def __init__(self, cond):

        super().__init__(name="天猫精灵")

        self.cond = cond

    def run(self):

        with self.cond:

            print("{} : 小爱同学 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 我们来对古诗吧 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 我住长江头 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 日日思君不见君 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 此水几时休 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

            print("{} : 只愿君心似我心 ".format(self.name))

            self.cond.notify()

            self.cond.wait()

if __name__ == "__main__":

    from concurrent import futures

    cond = threading.Condition()

    xiaoai = XiaoAi(cond)

    tianmao = TianMao(cond)

    #启动顺序很重要

    #在调用with cond之后才能调用wait或者notify方法

    #condition有两层锁， 一把底层锁会在线程调用了wait方法的时候释放， 上面的锁会在每次调用wait的时候分配一把并放入到cond的等待队列中，等到notify方法的唤醒

    xiaoai.start()

    tianmao.start()

10.6 线程同步-Semaphore使用及源码分析

# Semaphore 是用于控制进入数量的锁

# 文件， 读、写， 写一般只是用于一个线程写，读可以允许有多个

# 做爬虫

import threading

import time

class HtmlSpider(threading.Thread):

    def __init__(self, url, sem):

        super().__init__()

        self.url = url

        self.sem = sem

    def run(self):

        time.sleep(2)

        print("got html text success")

        self.sem.release()

class UrlProducer(threading.Thread):

    def __init__(self, sem):

        super().__init__()

        self.sem = sem

    def run(self):

        for i in range(20):

            self.sem.acquire()

            html_thread = HtmlSpider("https://baidu.com/{}".format(i), self.sem)

            html_thread.start()

if __name__ == "__main__":

    sem = threading.Semaphore(3)

    url_producer = UrlProducer(sem)

    url_producer.start()

10.7 ThreadPoolExecutor线程池



from concurrent.futures import ThreadPoolExecutor, as_completed, wait, FIRST_COMPLETED

from concurrent.futures import Future

from multiprocessing import Pool

#未来对象，task的返回容器

#线程池， 为什么要线程池

#主线程中可以获取某一个线程的状态或者某一个任务的状态，以及返回值

#当一个线程完成的时候我们主线程能立即知道

#futures可以让多线程和多进程编码接口一致

import time

def get_html(times):

    time.sleep(times)

    print("get page {} success".format(times))

    return times

executor = ThreadPoolExecutor(max_workers=2)

#通过submit函数提交执行的函数到线程池中, submit 是立即返回

# task1 = executor.submit(get_html, (3))

# task2 = executor.submit(get_html, (2))

#要获取已经成功的task的返回

urls = [3,2,4]

all_task = [executor.submit(get_html, (url)) for url in urls]

wait(all_task, return_when=FIRST_COMPLETED)

print("main")

# for future in as_completed(all_task):

#     data = future.result()

#     print("get {} page".format(data))

#通过executor的map获取已经完成的task的值

# for data in executor.map(get_html, urls):

#     print("get {} page".format(data))

# #done方法用于判定某个任务是否完成

# print(task1.done())

# print(task2.cancel())

# time.sleep(3)

# print(task1.done())

#

# #result方法可以获取task的执行结果

# print(task1.result())

10.8 多进程和多线程对比

import time

from concurrent.futures import ThreadPoolExecutor, as_completed

from concurrent.futures import ProcessPoolExecutor

#多进程编程

#耗cpu的操作，用多进程编程， 对于io操作来说， 使用多线程编程，进程切换代价要高于线程

#1. 对于耗费cpu的操作，多进程由于多线程

# def fib(n):

#     if n<=2:

#         return 1

#     return fib(n-1)+fib(n-2)

#

# if __name__ == "__main__":

#     with ThreadPoolExecutor(3) as executor:

#         all_task = [executor.submit(fib, (num)) for num in range(25,40)]

#         start_time = time.time()

#         for future in as_completed(all_task):

#             data = future.result()

#             print("exe result: {}".format(data))

#

#         print("last time is: {}".format(time.time()-start_time))

#2. 对于io操作来说，多线程优于多进程

def random_sleep(n):

    time.sleep(n)

    return n

if __name__ == "__main__":

    with ProcessPoolExecutor(3) as executor:

        all_task = [executor.submit(random_sleep, (num)) for num in [2]*30]

        start_time = time.time()

        for future in as_completed(all_task):

            data = future.result()

            print("exe result: {}".format(data))

        print("last time is: {}".format(time.time()-start_time))

10.9 multiprocessing多进程编程

# import os

# #fork只能用于linux/unix中

# pid = os.fork()

# print("bobby")

# if pid == 0:

#   print('子进程 {} ，父进程是： {}.' .format(os.getpid(), os.getppid()))

# else:

#   print('我是父进程：{}.'.format(pid))

import multiprocessing

#多进程编程

import time

def get_html(n):

    time.sleep(n)

    print("sub_progress success")

    return n

if __name__ == "__main__":

    # progress = multiprocessing.Process(target=get_html, args=(2,))

    # print(progress.pid)

    # progress.start()

    # print(progress.pid)

    # progress.join()

    # print("main progress end")

    #使用线程池

    pool = multiprocessing.Pool(multiprocessing.cpu_count())

    # result = pool.apply_async(get_html, args=(3,))

    #

    # #等待所有任务完成

    # pool.close()

    # pool.join()

    #

    # print(result.get())

    #imap

    # for result in pool.imap(get_html, [1,5,3]):

    #     print("{} sleep success".format(result))

    for result in pool.imap_unordered(get_html, [1,5,3]):

        print("{} sleep success".format(result))

10.10 进程间通信-Queue、Pipe、Manager

import time

from multiprocessing import Process, Queue, Pool, Manager, Pipe

# def producer(queue):

#     queue.put("a")

#     time.sleep(2)

#

# def consumer(queue):

#     time.sleep(2)

#     data = queue.get()

#     print(data)

#

# if __name__ == "__main__":

#     queue = Queue(10)

#     my_producer = Process(target=producer, args=(queue,))

#     my_consumer = Process(target=consumer, args=(queue,))

#     my_producer.start()

#     my_consumer.start()

#     my_producer.join()

#     my_consumer.join()

#共享全局变量通信

#共享全局变量不能适用于多进程编程，可以适用于多线程

# def producer(a):

#     a += 100

#     time.sleep(2)

#

# def consumer(a):

#     time.sleep(2)

#     print(a)

#

# if __name__ == "__main__":

#     a = 1

#     my_producer = Process(target=producer, args=(a,))

#     my_consumer = Process(target=consumer, args=(a,))

#     my_producer.start()

#     my_consumer.start()

#     my_producer.join()

#     my_consumer.join()

#multiprocessing中的queue不能用于pool进程池

#pool中的进程间通信需要使用manager中的queue

# def producer(queue):

#     queue.put("a")

#     time.sleep(2)

#

# def consumer(queue):

#     time.sleep(2)

#     data = queue.get()

#     print(data)

#

# if __name__ == "__main__":

#     queue = Manager().Queue(10)

#     pool = Pool(2)

#

#     pool.apply_async(producer, args=(queue,))

#     pool.apply_async(consumer, args=(queue,))

#

#     pool.close()

#     pool.join()

#通过pipe实现进程间通信

#pipe的性能高于queue

# def producer(pipe):

#     pipe.send("bobby")

#

# def consumer(pipe):

#     print(pipe.recv())

#

# if __name__ == "__main__":

#     recevie_pipe, send_pipe = Pipe()

#     #pipe只能适用于两个进程

#     my_producer= Process(target=producer, args=(send_pipe, ))

#     my_consumer = Process(target=consumer, args=(recevie_pipe,))

#

#     my_producer.start()

#     my_consumer.start()

#     my_producer.join()

#     my_consumer.join()

def add_data(p_dict, key, value):

    p_dict[key] = value

if __name__ == "__main__":

    progress_dict = Manager().dict()

    from queue import PriorityQueue

    first_progress = Process(target=add_data, args=(progress_dict, "bobby1", 22))

    second_progress = Process(target=add_data, args=(progress_dict, "bobby2", 23))

    first_progress.start()

    second_progress.start()

    first_progress.join()

    second_progress.join()

    print(progress_dict)

第十章：Python高级编程-多线程、多进程和线程池编程的更多相关文章

gj11 多线程、多进程和线程池编程
11.1 python中的GIL # coding=utf-8 # gil global interpreter lock (cpython) # python中一个线程对应于c语言中的一个线程 # ...
Python进阶：多线程、多进程和线程池编程/协程和异步io/asyncio并发编程
gil: gil使得同一个时刻只有一个线程在一个CPU上执行字节码,无法将多个线程映射到多个CPU上执行 gil会根据执行的字节码行数以及时间片释放gil,gil在遇到io的操作时候主动释放 thre ...
python高级之多线程
python高级之多线程本节内容线程与进程定义及区别 python全局解释器锁线程的定义及使用互斥锁线程死锁和递归锁条件变量同步(Condition) 同步条件(Event) 信号量队列 ...
linux C 多线程/线程池编程同步实例
在多线程.线程池编程中经常会遇到同步的问题. 1.创建线程函数原型:int pthread_create(pthread_t *thread, const pthread_attr_t *attr, ...
[Java并发编程（一）] 线程池 FixedThreadPool vs CachedThreadPool ...
[Java并发编程(一)] 线程池 FixedThreadPool vs CachedThreadPool ... 摘要介绍 Java 并发包里的几个主要 ExecutorService . 正文 ...
Java多线程系列--“JUC线程池”06之 Callable和Future
概要本章介绍线程池中的Callable和Future.Callable 和 Future 简介示例和源码分析(基于JDK1.7.0_40) 转载请注明出处:http://www.cnblogs.co ...
Java多线程系列--“JUC线程池”02之线程池原理(一)
概要在上一章"Java多线程系列--“JUC线程池”01之线程池架构"中,我们了解了线程池的架构.线程池的实现类是ThreadPoolExecutor类.本章,我们通过分析Th ...
Java多线程系列--“JUC线程池”03之线程池原理(二)
概要在前面一章"Java多线程系列--“JUC线程池”02之线程池原理(一)"中介绍了线程池的数据结构,本章会通过分析线程池的源码,对线程池进行说明.内容包括:线程池示例参考代 ...
Java多线程系列--“JUC线程池”04之线程池原理(三)
转载请注明出处:http://www.cnblogs.com/skywang12345/p/3509960.html 本章介绍线程池的生命周期.在"Java多线程系列--“基础篇”01之基 ...

随机推荐

Spring Data REST不完全指南（一）
简介 Spring Data REST是Spring Data项目的一部分,可轻松在Spring Data存储库上构建超媒体驱动的REST Web服务. Spring Data REST 构建在 Sp ...
Android | 教你如何在安卓上实现通用卡证识别，一键各种卡绑定
目录前言通用卡证识别的应用场景如何使用通用卡证识别服务集成通用卡证识别服务的关键流程开发实战 1 开发准备 1.1 在项目级gradle里添加华为maven仓 1.2 在应用级的build. ...
ppt和pptx转图片完整代码,解决2003版和2007版中文乱码问题
引入所需依赖,注意poi版本,新版本不支持,最好使用和我一样的版本. <!-- https://mvnrepository.com/artifact/org.apache.poi/poi --& ...
PHP函数：debug_backtrace
debug_backtrace() - 产生一条 PHP 的回溯跟踪(backtrace). 说明: debug_backtrace ([ int $options = DEBUG_BACKTRAC ...
Linux安装PHP的Redis扩展（已安装Redis）
1.下载需要的php操作redis的扩展包下载地址 http://pecl.php.net/package/redis 下载对应php版本,我的php版本为7.3,下载的是最新的版本5.0.2 ...
百度智能云虚拟主机 Typecho 分类功能失效 | 开启伪静态地址
出现的问题 $this->is() 方法失效,无法正确判断 archive.category.tags 页面类型. 点击分类页面.归档页面时,虽然 URL 是正确的,但网页内容却是 index. ...
神奇的Kivy，让Python快速开发移动app
随着移动互联网的不断发展,手机.Pad等移动终端已经被普遍使用,充斥在人们的工作.学习和生活中,越来越多的程序都转向移动终端,各类app应用相拥而至. Kivy作为Python的Android和IOS ...
好用的反向代理工具NATAPP
这里推荐一个好用的反向代理工具NATAPP NATAPP1分钟快速新手图文教程有免费的和付费的个人建议付费的,免费还需要身份证验证,付费版最低9元/月,看个人需求! 这里给个邀请码贴在这需要的话可以 ...
python学习06循环
'''while''''''while 布尔表达式:冒号不能省略''''''1+2+3+...+10'''i=1sum1=0while i<=10: sum1+=i i+=1print(sum1 ...
openssl查看证书细节 [转载]
openssl x509部分命令打印出证书的内容: openssl x509 -in cert.pem -noout -text 打印出证书的系列号 openssl x509 -in cert.pe ...

第十章：Python高级编程-多线程、多进程和线程池编程

第十章：Python高级编程-多线程、多进程和线程池编程

10.1 Python中的GIL

10.2 多线程编程-threading

10.3 线程间通信-共享变量和Queue

10.4 线程同步-Lock、Rlock

10.5 线程同步-condition使用以及源码分析

10.6 线程同步-Semaphore使用及源码分析

10.7 ThreadPoolExecutor线程池

10.8 多进程和多线程对比

10.9 multiprocessing多进程编程

10.10 进程间通信-Queue、Pipe、Manager

第十章：Python高级编程-多线程、多进程和线程池编程的更多相关文章

随机推荐

热门专题