Commit c022d21e c022d21ee0405b8f298fd98d7fcaa035384aa2b8 by 文鑫

添加tqdm

1 parent 828ad47e
......@@ -6,6 +6,7 @@ from playwright.async_api import Page, async_playwright
from superstream import Stream
from app.schemas.spider_schema import GuoDuoSpiderResult, SpiderParams, TypeEnum
import logging
from tqdm.asyncio import tqdm_asyncio
logger = logging.getLogger(__name__)
......@@ -17,10 +18,10 @@ async def startBrowser(spiderParam: SpiderParams) -> List[GuoDuoSpiderResult]:
async with async_playwright() as p:
browser = await p.firefox.launch(headless=True)
# 创建指定个数的浏览器页面
pages = await asyncio.gather(*(browser.new_page()
pages = await tqdm_asyncio.gather(*(browser.new_page()
for _ in range(len(time_range))))
# 同时循环time_range与pages 去调用hand_one_data异步方法
results = await asyncio.gather(*(hand_one_data(time_range[i], pages[i],
results = await tqdm_asyncio.gather(*(hand_one_data(time_range[i], pages[i],
scrawl_types, url)
for i in range(len(time_range))))
return (Stream(results)
......@@ -94,11 +95,11 @@ async def get_data(targetPage: Page, scrawl_type: int, target_time: str) -> Opti
if __name__ == '__main__':
# 获取程序开始执行的时间
start_time = time.time()
param = SpiderParams(startDate='1991-01-02',
endDate='1991-01-05',
param = SpiderParams(startDate='2024-01-02',
endDate='2024-01-15',
target_type=[TypeEnum.ANIME, TypeEnum.EPISODES, TypeEnum.MOVIE]
, url='http://guduodata.com/')
res = asyncio.run(startBrowser(param))
print(f'程序执行耗时时间:{time.time() - start_time} 长度为:{len(res)}')
print(f'程序执行耗时时间:{(time.time() - start_time)/1000}长度为:{len(res)}')
for it in res:
print(it)
......
aiosqlite==0.20.0
annotated-types==0.7.0
anyio==4.7.0
APScheduler==3.11.0
asyncmy==0.2.10
click==8.1.7
exceptiongroup==1.2.2
fastapi==0.115.6
greenlet==3.1.1
h11==0.14.0
idna==3.10
iso8601==2.1.0
playwright==1.49.1
pydantic==2.10.4
pydantic_core==2.27.2
pyee==12.0.0
pypika-tortoise==0.3.2
pytz==2024.2
PyYAML==6.0.2
sniffio==1.3.1
starlette==0.41.3
SuperStream==0.2.6
tortoise-orm==0.22.2
typing_extensions==4.12.2
tzlocal==5.2
uvicorn==0.34.0