Commit 2752b396 2752b396f901e220f94b3edd2d23cbfbf696a9e4 by 文鑫

update

1 parent 601ef000
1 <?xml version="1.0" encoding="UTF-8"?> 1 <?xml version="1.0" encoding="UTF-8"?>
2 <project version="4"> 2 <project version="4">
3 <component name="Black"> 3 <component name="Black">
4 <option name="sdkName" value="Spider" /> 4 <option name="enabledOnReformat" value="true" />
5 <option name="enabledOnSave" value="true" />
6 <option name="sdkName" value="spider" />
5 </component> 7 </component>
6 <component name="ProjectRootManager" version="2" project-jdk-name="Spider" project-jdk-type="Python SDK" /> 8 <component name="ProjectRootManager" version="2" project-jdk-name="Spider" project-jdk-type="Python SDK" />
7 </project> 9 </project>
...\ No newline at end of file ...\ No newline at end of file
......
1 import logging
1 from contextlib import asynccontextmanager 2 from contextlib import asynccontextmanager
2
3 from tortoise import Tortoise
4 from app.job.job import scheduler
5 from fastapi import FastAPI 3 from fastapi import FastAPI
4 from tortoise import Tortoise
6 from app.config.tortoise_config import getTortoiseConfig 5 from app.config.tortoise_config import getTortoiseConfig
7 import logging 6 from app.job.job import scheduler
8 7
9 logger = logging.getLogger(__name__) 8 logger = logging.getLogger(__name__)
10 9
...@@ -22,7 +21,6 @@ async def close(): ...@@ -22,7 +21,6 @@ async def close():
22 await Tortoise.close_connections() 21 await Tortoise.close_connections()
23 22
24 23
25 # 使用asynccontextmanager装饰器定义一个异步上下文管理器函数lifespan
26 @asynccontextmanager 24 @asynccontextmanager
27 async def lifespan(app: FastAPI): 25 async def lifespan(app: FastAPI):
28 # 开始apscheduler 26 # 开始apscheduler
...@@ -30,22 +28,6 @@ async def lifespan(app: FastAPI): ...@@ -30,22 +28,6 @@ async def lifespan(app: FastAPI):
30 logging.info("apscheduler启动完成") 28 logging.info("apscheduler启动完成")
31 await init() 29 await init()
32 logging.info("初始化数据库完成") 30 logging.info("初始化数据库完成")
33 # 获取程序开始执行的时间
34 start_time = time.time()
35 param = SpiderParams(
36 startDate="2024-12-20",
37 endDate="2024-12-21",
38 target_type=[
39 TypeEnum.ANIME,
40 TypeEnum.EPISODES,
41 TypeEnum.MOVIE,
42 TypeEnum.VARIETY,
43 ],
44 url="http://guduodata.com",
45 )
46 print(param.get_time_range())
47 asyncio.run(scrawl_and_save(param))
48 print(f"程序执行耗时时间:{(time.time() - start_time) / 1000}")
49 yield 31 yield
50 # 在异步上下文管理器中,"退出上下文"时清理机器学习模型,释放资源 32 # 在异步上下文管理器中,"退出上下文"时清理机器学习模型,释放资源
51 scheduler.shutdown() 33 scheduler.shutdown()
......
...@@ -2,18 +2,13 @@ import hashlib ...@@ -2,18 +2,13 @@ import hashlib
2 import logging 2 import logging
3 from datetime import datetime, timedelta 3 from datetime import datetime, timedelta
4 from typing import Dict, List, Tuple 4 from typing import Dict, List, Tuple
5
6 from apscheduler.schedulers.asyncio import AsyncIOScheduler 5 from apscheduler.schedulers.asyncio import AsyncIOScheduler
7 from superstream import Stream 6 from superstream import Stream
8
9 from app.config.app_config import getAppConfig 7 from app.config.app_config import getAppConfig
10 from app.model.mysql_model import XWebCrawler 8 from app.model.mysql_model import XWebCrawler
11 from app.schemas.spider_schema import ApschedulerJob, SpiderParams, TaskInfo, TypeEnum 9 from app.schemas.spider_schema import ApschedulerJob, SpiderParams, TaskInfo, TypeEnum
12 10
13 logger = logging.getLogger(__name__) 11 logger = logging.getLogger(__name__)
14
15 # 导入asynccontextmanager用于创建异步上下文管理器
16
17 # 创建一个scheduler实例 12 # 创建一个scheduler实例
18 scheduler = AsyncIOScheduler() 13 scheduler = AsyncIOScheduler()
19 14
......
This diff could not be displayed because it is too large.
1 import logging 1 import logging
2 import os 2 import os
3 import platform
3 import sys 4 import sys
4 5
5 # 把当前文件所在文件夹的父文件夹路径加入到PYTHONPATH 解决ModuleNotFoundError: No module named 'app'
6 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 6 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7 import uvicorn 7 import uvicorn
8 from fastapi import FastAPI 8 from fastapi import FastAPI
...@@ -11,9 +11,14 @@ from app.config.app_config import getAppConfig ...@@ -11,9 +11,14 @@ from app.config.app_config import getAppConfig
11 from app.event.fastapi_event import lifespan 11 from app.event.fastapi_event import lifespan
12 from app.config.log_config import getLogConfig 12 from app.config.log_config import getLogConfig
13 13
14
14 app = FastAPI(lifespan=lifespan) 15 app = FastAPI(lifespan=lifespan)
15 app.include_router(spider_router.spiderRouter) 16 app.include_router(spider_router.spiderRouter)
16 if __name__ == "__main__": 17 if __name__ == "__main__":
18 if platform.system() == "Windows":
19 import asyncio
20
21 asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
17 appConf = getAppConfig() 22 appConf = getAppConfig()
18 uvicorn.run( 23 uvicorn.run(
19 "main:app", host="0.0.0.0", port=appConf.server.port, log_config=getLogConfig() 24 "main:app", host="0.0.0.0", port=appConf.server.port, log_config=getLogConfig()
......
...@@ -20,8 +20,8 @@ async def read_users(param: SpiderParams): ...@@ -20,8 +20,8 @@ async def read_users(param: SpiderParams):
20 20
21 @spiderRouter.post("/add_guduo_job", summary="添加一个骨朵爬虫定时任务") 21 @spiderRouter.post("/add_guduo_job", summary="添加一个骨朵爬虫定时任务")
22 async def add_task_to_job(param: Dict[str, Any]): 22 async def add_task_to_job(param: Dict[str, Any]):
23 jobParam = SpiderParams(**(param.get('taskParam'))) 23 jobParam = SpiderParams(**(param.get('taskParam'))) # type: ignore
24 taskInfo = TaskInfo(**(param.get('taskInfo'))) 24 taskInfo = TaskInfo(**(param.get('taskInfo'))) # type: ignore
25 add_guduo_job(jobParam, taskInfo) 25 add_guduo_job(jobParam, taskInfo)
26 return '添加骨朵爬虫任务成功' 26 return '添加骨朵爬虫任务成功'
27 27
......