Commit c022d21e c022d21ee0405b8f298fd98d7fcaa035384aa2b8 by 文鑫

添加tqdm

1 parent 828ad47e
...@@ -6,6 +6,7 @@ from playwright.async_api import Page, async_playwright ...@@ -6,6 +6,7 @@ from playwright.async_api import Page, async_playwright
6 from superstream import Stream 6 from superstream import Stream
7 from app.schemas.spider_schema import GuoDuoSpiderResult, SpiderParams, TypeEnum 7 from app.schemas.spider_schema import GuoDuoSpiderResult, SpiderParams, TypeEnum
8 import logging 8 import logging
9 from tqdm.asyncio import tqdm_asyncio
9 10
10 logger = logging.getLogger(__name__) 11 logger = logging.getLogger(__name__)
11 12
...@@ -17,10 +18,10 @@ async def startBrowser(spiderParam: SpiderParams) -> List[GuoDuoSpiderResult]: ...@@ -17,10 +18,10 @@ async def startBrowser(spiderParam: SpiderParams) -> List[GuoDuoSpiderResult]:
17 async with async_playwright() as p: 18 async with async_playwright() as p:
18 browser = await p.firefox.launch(headless=True) 19 browser = await p.firefox.launch(headless=True)
19 # 创建指定个数的浏览器页面 20 # 创建指定个数的浏览器页面
20 pages = await asyncio.gather(*(browser.new_page() 21 pages = await tqdm_asyncio.gather(*(browser.new_page()
21 for _ in range(len(time_range)))) 22 for _ in range(len(time_range))))
22 # 同时循环time_range与pages 去调用hand_one_data异步方法 23 # 同时循环time_range与pages 去调用hand_one_data异步方法
23 results = await asyncio.gather(*(hand_one_data(time_range[i], pages[i], 24 results = await tqdm_asyncio.gather(*(hand_one_data(time_range[i], pages[i],
24 scrawl_types, url) 25 scrawl_types, url)
25 for i in range(len(time_range)))) 26 for i in range(len(time_range))))
26 return (Stream(results) 27 return (Stream(results)
...@@ -94,11 +95,11 @@ async def get_data(targetPage: Page, scrawl_type: int, target_time: str) -> Opti ...@@ -94,11 +95,11 @@ async def get_data(targetPage: Page, scrawl_type: int, target_time: str) -> Opti
94 if __name__ == '__main__': 95 if __name__ == '__main__':
95 # 获取程序开始执行的时间 96 # 获取程序开始执行的时间
96 start_time = time.time() 97 start_time = time.time()
97 param = SpiderParams(startDate='1991-01-02', 98 param = SpiderParams(startDate='2024-01-02',
98 endDate='1991-01-05', 99 endDate='2024-01-15',
99 target_type=[TypeEnum.ANIME, TypeEnum.EPISODES, TypeEnum.MOVIE] 100 target_type=[TypeEnum.ANIME, TypeEnum.EPISODES, TypeEnum.MOVIE]
100 , url='http://guduodata.com/') 101 , url='http://guduodata.com/')
101 res = asyncio.run(startBrowser(param)) 102 res = asyncio.run(startBrowser(param))
102 print(f'程序执行耗时时间:{time.time() - start_time} 长度为:{len(res)}') 103 print(f'程序执行耗时时间:{(time.time() - start_time)/1000}长度为:{len(res)}')
103 for it in res: 104 for it in res:
104 print(it) 105 print(it)
......
1 aiosqlite==0.20.0
2 annotated-types==0.7.0
3 anyio==4.7.0
4 APScheduler==3.11.0
5 asyncmy==0.2.10
6 click==8.1.7
7 exceptiongroup==1.2.2
8 fastapi==0.115.6
9 greenlet==3.1.1
10 h11==0.14.0
11 idna==3.10
12 iso8601==2.1.0
13 playwright==1.49.1
14 pydantic==2.10.4
15 pydantic_core==2.27.2
16 pyee==12.0.0
17 pypika-tortoise==0.3.2
18 pytz==2024.2
19 PyYAML==6.0.2
20 sniffio==1.3.1
21 starlette==0.41.3
22 SuperStream==0.2.6
23 tortoise-orm==0.22.2
24 typing_extensions==4.12.2
25 tzlocal==5.2
26 uvicorn==0.34.0