update
Showing
6 changed files
with
14 additions
and
30 deletions
1 | <?xml version="1.0" encoding="UTF-8"?> | 1 | <?xml version="1.0" encoding="UTF-8"?> |
2 | <project version="4"> | 2 | <project version="4"> |
3 | <component name="Black"> | 3 | <component name="Black"> |
4 | <option name="sdkName" value="Spider" /> | 4 | <option name="enabledOnReformat" value="true" /> |
5 | <option name="enabledOnSave" value="true" /> | ||
6 | <option name="sdkName" value="spider" /> | ||
5 | </component> | 7 | </component> |
6 | <component name="ProjectRootManager" version="2" project-jdk-name="Spider" project-jdk-type="Python SDK" /> | 8 | <component name="ProjectRootManager" version="2" project-jdk-name="Spider" project-jdk-type="Python SDK" /> |
7 | </project> | 9 | </project> |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
1 | import logging | ||
1 | from contextlib import asynccontextmanager | 2 | from contextlib import asynccontextmanager |
2 | |||
3 | from tortoise import Tortoise | ||
4 | from app.job.job import scheduler | ||
5 | from fastapi import FastAPI | 3 | from fastapi import FastAPI |
4 | from tortoise import Tortoise | ||
6 | from app.config.tortoise_config import getTortoiseConfig | 5 | from app.config.tortoise_config import getTortoiseConfig |
7 | import logging | 6 | from app.job.job import scheduler |
8 | 7 | ||
9 | logger = logging.getLogger(__name__) | 8 | logger = logging.getLogger(__name__) |
10 | 9 | ||
... | @@ -22,7 +21,6 @@ async def close(): | ... | @@ -22,7 +21,6 @@ async def close(): |
22 | await Tortoise.close_connections() | 21 | await Tortoise.close_connections() |
23 | 22 | ||
24 | 23 | ||
25 | # 使用asynccontextmanager装饰器定义一个异步上下文管理器函数lifespan | ||
26 | @asynccontextmanager | 24 | @asynccontextmanager |
27 | async def lifespan(app: FastAPI): | 25 | async def lifespan(app: FastAPI): |
28 | # 开始apscheduler | 26 | # 开始apscheduler |
... | @@ -30,22 +28,6 @@ async def lifespan(app: FastAPI): | ... | @@ -30,22 +28,6 @@ async def lifespan(app: FastAPI): |
30 | logging.info("apscheduler启动完成") | 28 | logging.info("apscheduler启动完成") |
31 | await init() | 29 | await init() |
32 | logging.info("初始化数据库完成") | 30 | logging.info("初始化数据库完成") |
33 | # 获取程序开始执行的时间 | ||
34 | start_time = time.time() | ||
35 | param = SpiderParams( | ||
36 | startDate="2024-12-20", | ||
37 | endDate="2024-12-21", | ||
38 | target_type=[ | ||
39 | TypeEnum.ANIME, | ||
40 | TypeEnum.EPISODES, | ||
41 | TypeEnum.MOVIE, | ||
42 | TypeEnum.VARIETY, | ||
43 | ], | ||
44 | url="http://guduodata.com", | ||
45 | ) | ||
46 | print(param.get_time_range()) | ||
47 | asyncio.run(scrawl_and_save(param)) | ||
48 | print(f"程序执行耗时时间:{(time.time() - start_time) / 1000}") | ||
49 | yield | 31 | yield |
50 | # 在异步上下文管理器中,"退出上下文"时清理机器学习模型,释放资源 | 32 | # 在异步上下文管理器中,"退出上下文"时清理机器学习模型,释放资源 |
51 | scheduler.shutdown() | 33 | scheduler.shutdown() | ... | ... |
... | @@ -2,18 +2,13 @@ import hashlib | ... | @@ -2,18 +2,13 @@ import hashlib |
2 | import logging | 2 | import logging |
3 | from datetime import datetime, timedelta | 3 | from datetime import datetime, timedelta |
4 | from typing import Dict, List, Tuple | 4 | from typing import Dict, List, Tuple |
5 | |||
6 | from apscheduler.schedulers.asyncio import AsyncIOScheduler | 5 | from apscheduler.schedulers.asyncio import AsyncIOScheduler |
7 | from superstream import Stream | 6 | from superstream import Stream |
8 | |||
9 | from app.config.app_config import getAppConfig | 7 | from app.config.app_config import getAppConfig |
10 | from app.model.mysql_model import XWebCrawler | 8 | from app.model.mysql_model import XWebCrawler |
11 | from app.schemas.spider_schema import ApschedulerJob, SpiderParams, TaskInfo, TypeEnum | 9 | from app.schemas.spider_schema import ApschedulerJob, SpiderParams, TaskInfo, TypeEnum |
12 | 10 | ||
13 | logger = logging.getLogger(__name__) | 11 | logger = logging.getLogger(__name__) |
14 | |||
15 | # 导入asynccontextmanager用于创建异步上下文管理器 | ||
16 | |||
17 | # 创建一个scheduler实例 | 12 | # 创建一个scheduler实例 |
18 | scheduler = AsyncIOScheduler() | 13 | scheduler = AsyncIOScheduler() |
19 | 14 | ... | ... |
This diff could not be displayed because it is too large.
1 | import logging | 1 | import logging |
2 | import os | 2 | import os |
3 | import platform | ||
3 | import sys | 4 | import sys |
4 | 5 | ||
5 | # 把当前文件所在文件夹的父文件夹路径加入到PYTHONPATH 解决ModuleNotFoundError: No module named 'app' | ||
6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | 6 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
7 | import uvicorn | 7 | import uvicorn |
8 | from fastapi import FastAPI | 8 | from fastapi import FastAPI |
... | @@ -11,9 +11,14 @@ from app.config.app_config import getAppConfig | ... | @@ -11,9 +11,14 @@ from app.config.app_config import getAppConfig |
11 | from app.event.fastapi_event import lifespan | 11 | from app.event.fastapi_event import lifespan |
12 | from app.config.log_config import getLogConfig | 12 | from app.config.log_config import getLogConfig |
13 | 13 | ||
14 | |||
14 | app = FastAPI(lifespan=lifespan) | 15 | app = FastAPI(lifespan=lifespan) |
15 | app.include_router(spider_router.spiderRouter) | 16 | app.include_router(spider_router.spiderRouter) |
16 | if __name__ == "__main__": | 17 | if __name__ == "__main__": |
18 | if platform.system() == "Windows": | ||
19 | import asyncio | ||
20 | |||
21 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) | ||
17 | appConf = getAppConfig() | 22 | appConf = getAppConfig() |
18 | uvicorn.run( | 23 | uvicorn.run( |
19 | "main:app", host="0.0.0.0", port=appConf.server.port, log_config=getLogConfig() | 24 | "main:app", host="0.0.0.0", port=appConf.server.port, log_config=getLogConfig() | ... | ... |
... | @@ -20,8 +20,8 @@ async def read_users(param: SpiderParams): | ... | @@ -20,8 +20,8 @@ async def read_users(param: SpiderParams): |
20 | 20 | ||
21 | @spiderRouter.post("/add_guduo_job", summary="添加一个骨朵爬虫定时任务") | 21 | @spiderRouter.post("/add_guduo_job", summary="添加一个骨朵爬虫定时任务") |
22 | async def add_task_to_job(param: Dict[str, Any]): | 22 | async def add_task_to_job(param: Dict[str, Any]): |
23 | jobParam = SpiderParams(**(param.get('taskParam'))) | 23 | jobParam = SpiderParams(**(param.get('taskParam'))) # type: ignore |
24 | taskInfo = TaskInfo(**(param.get('taskInfo'))) | 24 | taskInfo = TaskInfo(**(param.get('taskInfo'))) # type: ignore |
25 | add_guduo_job(jobParam, taskInfo) | 25 | add_guduo_job(jobParam, taskInfo) |
26 | return '添加骨朵爬虫任务成功' | 26 | return '添加骨朵爬虫任务成功' |
27 | 27 | ... | ... |
-
Please register or sign in to post a comment