多任务同步处理

多线程、多进程、协程

多线程

from threading import Thread  
from multiprocessing import Process  
from concurrent.futures import ThreadPoolExecutor  
  
def func(name) :  
    for i in range(10) :  
        print(f"{name} func {i}")  
  
# 创建线程类  
class MyThread(Thread) :  
    def run(self):  
        for i in range(10) :  
            print(f"{self.name} myThread {i}")  
  
  
  
if __name__ == '__main__':  
    # 多线程调用  
    # Thread(target=func,args=("子线程1",)).start() # 多线程创建方式1  
    MyThread(name="子线程2").start() # 多线程创建方式2  
  
    # 线程池调用  
    with ThreadPoolExecutor(20) as executor:  
        for i in range(10) :  
            executor.submit(func,name=f"线程池线程：{i}/20")  
  
  
    # 主线程调用  
    for i in range(10) :  
        print(f"main {i}")

协程

import asyncio  
  
import notebook_asyncio  
import time  
  
  
# 定义协程函数  
async def func1():  
    print('func1 running...')  
    await asyncio.sleep(1)  
    print('func1 done')  
  
  
async def func2():  
    print('func2 running...')  
    await asyncio.sleep(2)  
    print('func2 done')  
  
  
async def func3():  
    print('func3 running...')  
    await asyncio.sleep(3)  
    print('func3 done')  
  
  
# 定义主协程，负责调度所有任务  
async def main():  
    # 等待所有任务完成  
    await asyncio.gather(func1(), func2(), func3())  
  
  
if __name__ == '__main__':  
    t1 = time.time()  
  
    # 使用 asyncio.run() 启动事件循环，运行主协程  
    asyncio.run(main())  
  
    t2 = time.time()  
    print(f"all tasks done in {t2 - t1} seconds")

示例

使用协程下载小说章节内容。

import asyncio  
import aiofiles  
import aiohttp  
import requests  
from lxml import etree  
  
domain = "https://dce42ddbb835.bi54.cc"  
header = {  
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",  
    "Referer": domain  
}  
  
  
async def get_book(bid):  
    catalogue_url = f"https://dce42ddbb835.bi54.cc/html/{bid}/list.html"  
  
    catalogue_resp = requests.get(catalogue_url, headers=header)  
    catalogue_resp.encoding = 'utf-8'  
    html = etree.HTML(catalogue_resp.text)  
    catalogue_resp.close()  
    # print(catalogue_resp.text)  
  
    chapter_hrefs = html.xpath("/html/body//div[@class='book_last']//dd/a/@href")  
    chapter_titles = html.xpath("/html/body//div[@class='book_last']//dd/a/text()")  
  
    # print(chapter_hrefs, chapter_titles)  
  
    # 组装任务  
    tasks = []  
    for i in range(min(len(chapter_hrefs), len(chapter_titles))):  
        if not chapter_titles[i][0].isdigit():  
            continue  
        print("start downloading", chapter_titles[i], chapter_hrefs[i])  
        tasks.append(get_chapter_content(domain + chapter_hrefs[i], chapter_titles[i]))  
        break  
  
    # 执行任务  
    await asyncio.gather(*tasks)  
  
  
async def get_chapter_content(url, title):  
    async with aiohttp.ClientSession() as session:  
        async with session.get(url, headers=header) as resp:  
            resp.encoding = 'utf-8'  
            resp_text = await resp.text()  
            # print(resp_text)  
            html = etree.HTML(resp_text)  
            async with aiofiles.open("./note/" + title + ".txt", "w", encoding="utf-8") as f:  
                chapter_content = html.xpath("/html/body//div[@id='chaptercontent']/text()")  
                print("content captured: ", chapter_content)  
                for line in chapter_content:  
                    await f.write(line + "\n")  
                print("chapter content downloaded: " + title + ".txt")  
  
  
if __name__ == '__main__':  
    asyncio.run(get_book("42905"))

多线程​

协程​

示例​

多线程

协程

示例