添加tqdm

文鑫
Commit c022d21e ... c022d21ee0405b8f298fd98d7fcaa035384aa2b8 authored 2024-12-22 00:30:57 +0800 by 文鑫
Showing 2 changed files with 6 additions and 31 deletions
app/spider/guduo_spider.py
requirements.txt
--- a/app/spider/guduo_spider.py
View file @c022d21
+++ b/app/spider/guduo_spider.py
View file @c022d21
@@ -6,6 +6,7 @@ from playwright.async_api import Page, async_playwright
 from superstream import Stream
 from app.schemas.spider_schema import GuoDuoSpiderResult, SpiderParams, TypeEnum
 import logging
+from tqdm.asyncio import tqdm_asyncio

 logger = logging.getLogger(__name__)

@@ -17,10 +18,10 @@ async def startBrowser(spiderParam: SpiderParams) -> List[GuoDuoSpiderResult]:
    async with async_playwright() as p:
        browser = await p.firefox.launch(headless=True)
        # 创建指定个数的浏览器页面
-        pages = await asyncio.gather(*(browser.new_page()
+        pages = await tqdm_asyncio.gather(*(browser.new_page()
                                       for _ in range(len(time_range))))
        # 同时循环time_range与pages 去调用hand_one_data异步方法
-        results = await asyncio.gather(*(hand_one_data(time_range[i], pages[i],
+        results = await tqdm_asyncio.gather(*(hand_one_data(time_range[i], pages[i],
                                                       scrawl_types, url)
                                         for i in range(len(time_range))))
        return (Stream(results)
@@ -94,11 +95,11 @@ async def get_data(targetPage: Page, scrawl_type: int, target_time: str) -> Opti
 if __name__ == '__main__':
    # 获取程序开始执行的时间
    start_time = time.time()
-    param = SpiderParams(startDate='1991-01-02',
-                         endDate='1991-01-05',
+    param = SpiderParams(startDate='2024-01-02',
+                         endDate='2024-01-15',
                         target_type=[TypeEnum.ANIME, TypeEnum.EPISODES, TypeEnum.MOVIE]
                         , url='http://guduodata.com/')
    res = asyncio.run(startBrowser(param))
-    print(f'程序执行耗时时间：{time.time() - start_time} 长度为：{len(res)}')
+    print(f'程序执行耗时时间：{(time.time() - start_time)/1000}长度为：{len(res)}')
    for it in res:
        print(it)
--- a/requirements.txt deleted 100644 → 0
View file @828ad47
+++ b/requirements.txt deleted 100644 → 0
View file @828ad47
-aiosqlite==0.20.0
-annotated-types==0.7.0
-anyio==4.7.0
-APScheduler==3.11.0
-asyncmy==0.2.10
-click==8.1.7
-exceptiongroup==1.2.2
-fastapi==0.115.6
-greenlet==3.1.1
-h11==0.14.0
-idna==3.10
-iso8601==2.1.0
-playwright==1.49.1
-pydantic==2.10.4
-pydantic_core==2.27.2
-pyee==12.0.0
-pypika-tortoise==0.3.2
-pytz==2024.2
-PyYAML==6.0.2
-sniffio==1.3.1
-starlette==0.41.3
-SuperStream==0.2.6
-tortoise-orm==0.22.2
-typing_extensions==4.12.2
-tzlocal==5.2
-uvicorn==0.34.0