Commit 2752b396 2752b396f901e220f94b3edd2d23cbfbf696a9e4 by 文鑫

update

1 parent 601ef000
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Black">
<option name="sdkName" value="Spider" />
<option name="enabledOnReformat" value="true" />
<option name="enabledOnSave" value="true" />
<option name="sdkName" value="spider" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Spider" project-jdk-type="Python SDK" />
</project>
\ No newline at end of file
......
import logging
from contextlib import asynccontextmanager
from tortoise import Tortoise
from app.job.job import scheduler
from fastapi import FastAPI
from tortoise import Tortoise
from app.config.tortoise_config import getTortoiseConfig
import logging
from app.job.job import scheduler
logger = logging.getLogger(__name__)
......@@ -22,7 +21,6 @@ async def close():
await Tortoise.close_connections()
# 使用asynccontextmanager装饰器定义一个异步上下文管理器函数lifespan
@asynccontextmanager
async def lifespan(app: FastAPI):
# 开始apscheduler
......@@ -30,22 +28,6 @@ async def lifespan(app: FastAPI):
logging.info("apscheduler启动完成")
await init()
logging.info("初始化数据库完成")
# 获取程序开始执行的时间
start_time = time.time()
param = SpiderParams(
startDate="2024-12-20",
endDate="2024-12-21",
target_type=[
TypeEnum.ANIME,
TypeEnum.EPISODES,
TypeEnum.MOVIE,
TypeEnum.VARIETY,
],
url="http://guduodata.com",
)
print(param.get_time_range())
asyncio.run(scrawl_and_save(param))
print(f"程序执行耗时时间:{(time.time() - start_time) / 1000}")
yield
# 在异步上下文管理器中,"退出上下文"时清理机器学习模型,释放资源
scheduler.shutdown()
......
......@@ -2,18 +2,13 @@ import hashlib
import logging
from datetime import datetime, timedelta
from typing import Dict, List, Tuple
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from superstream import Stream
from app.config.app_config import getAppConfig
from app.model.mysql_model import XWebCrawler
from app.schemas.spider_schema import ApschedulerJob, SpiderParams, TaskInfo, TypeEnum
logger = logging.getLogger(__name__)
# 导入asynccontextmanager用于创建异步上下文管理器
# 创建一个scheduler实例
scheduler = AsyncIOScheduler()
......
This diff could not be displayed because it is too large.
import logging
import os
import platform
import sys
# 把当前文件所在文件夹的父文件夹路径加入到PYTHONPATH 解决ModuleNotFoundError: No module named 'app'
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import uvicorn
from fastapi import FastAPI
......@@ -11,9 +11,14 @@ from app.config.app_config import getAppConfig
from app.event.fastapi_event import lifespan
from app.config.log_config import getLogConfig
app = FastAPI(lifespan=lifespan)
app.include_router(spider_router.spiderRouter)
if __name__ == "__main__":
if platform.system() == "Windows":
import asyncio
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
appConf = getAppConfig()
uvicorn.run(
"main:app", host="0.0.0.0", port=appConf.server.port, log_config=getLogConfig()
......
......@@ -20,8 +20,8 @@ async def read_users(param: SpiderParams):
@spiderRouter.post("/add_guduo_job", summary="添加一个骨朵爬虫定时任务")
async def add_task_to_job(param: Dict[str, Any]):
jobParam = SpiderParams(**(param.get('taskParam')))
taskInfo = TaskInfo(**(param.get('taskInfo')))
jobParam = SpiderParams(**(param.get('taskParam'))) # type: ignore
taskInfo = TaskInfo(**(param.get('taskInfo'))) # type: ignore
add_guduo_job(jobParam, taskInfo)
return '添加骨朵爬虫任务成功'
......