跳到主要内容

多任务同步处理

多线程、多进程、协程

多线程

from threading import Thread  
from multiprocessing import Process
from concurrent.futures import ThreadPoolExecutor

def func(name) :
for i in range(10) :
print(f"{name} func {i}")

# 创建线程类
class MyThread(Thread) :
def run(self):
for i in range(10) :
print(f"{self.name} myThread {i}")



if __name__ == '__main__':
# 多线程调用
# Thread(target=func,args=("子线程1",)).start() # 多线程创建方式1
MyThread(name="子线程2").start() # 多线程创建方式2

# 线程池调用
with ThreadPoolExecutor(20) as executor:
for i in range(10) :
executor.submit(func,name=f"线程池线程:{i}/20")


# 主线程调用
for i in range(10) :
print(f"main {i}")

协程

import asyncio  

import notebook_asyncio
import time


# 定义协程函数
async def func1():
print('func1 running...')
await asyncio.sleep(1)
print('func1 done')


async def func2():
print('func2 running...')
await asyncio.sleep(2)
print('func2 done')


async def func3():
print('func3 running...')
await asyncio.sleep(3)
print('func3 done')


# 定义主协程,负责调度所有任务
async def main():
# 等待所有任务完成
await asyncio.gather(func1(), func2(), func3())


if __name__ == '__main__':
t1 = time.time()

# 使用 asyncio.run() 启动事件循环,运行主协程
asyncio.run(main())

t2 = time.time()
print(f"all tasks done in {t2 - t1} seconds")

示例

使用协程下载小说章节内容。

import asyncio  
import aiofiles
import aiohttp
import requests
from lxml import etree

domain = "https://dce42ddbb835.bi54.cc"
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"Referer": domain
}


async def get_book(bid):
catalogue_url = f"https://dce42ddbb835.bi54.cc/html/{bid}/list.html"

catalogue_resp = requests.get(catalogue_url, headers=header)
catalogue_resp.encoding = 'utf-8'
html = etree.HTML(catalogue_resp.text)
catalogue_resp.close()
# print(catalogue_resp.text)

chapter_hrefs = html.xpath("/html/body//div[@class='book_last']//dd/a/@href")
chapter_titles = html.xpath("/html/body//div[@class='book_last']//dd/a/text()")

# print(chapter_hrefs, chapter_titles)

# 组装任务
tasks = []
for i in range(min(len(chapter_hrefs), len(chapter_titles))):
if not chapter_titles[i][0].isdigit():
continue
print("start downloading", chapter_titles[i], chapter_hrefs[i])
tasks.append(get_chapter_content(domain + chapter_hrefs[i], chapter_titles[i]))
break

# 执行任务
await asyncio.gather(*tasks)


async def get_chapter_content(url, title):
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=header) as resp:
resp.encoding = 'utf-8'
resp_text = await resp.text()
# print(resp_text)
html = etree.HTML(resp_text)
async with aiofiles.open("./note/" + title + ".txt", "w", encoding="utf-8") as f:
chapter_content = html.xpath("/html/body//div[@id='chaptercontent']/text()")
print("content captured: ", chapter_content)
for line in chapter_content:
await f.write(line + "\n")
print("chapter content downloaded: " + title + ".txt")


if __name__ == '__main__':
asyncio.run(get_book("42905"))