Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
文鑫
/
guduo_spider
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
Commit
2752b396
...
2752b396f901e220f94b3edd2d23cbfbf696a9e4
authored
2024-12-25 21:29:48 +0800
by
文鑫
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
update
1 parent
601ef000
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
14 additions
and
30 deletions
.idea/misc.xml
app/event/fastapi_event.py
app/job/job.py
app/logs/app.log
app/main.py
app/router/spider_router.py
.idea/misc.xml
View file @
2752b39
<?xml version="1.0" encoding="UTF-8"?>
<project
version=
"4"
>
<component
name=
"Black"
>
<option
name=
"sdkName"
value=
"Spider"
/>
<option
name=
"enabledOnReformat"
value=
"true"
/>
<option
name=
"enabledOnSave"
value=
"true"
/>
<option
name=
"sdkName"
value=
"spider"
/>
</component>
<component
name=
"ProjectRootManager"
version=
"2"
project-jdk-name=
"Spider"
project-jdk-type=
"Python SDK"
/>
</project>
\ No newline at end of file
...
...
app/event/fastapi_event.py
View file @
2752b39
import
logging
from
contextlib
import
asynccontextmanager
from
tortoise
import
Tortoise
from
app.job.job
import
scheduler
from
fastapi
import
FastAPI
from
tortoise
import
Tortoise
from
app.config.tortoise_config
import
getTortoiseConfig
import
logging
from
app.job.job
import
scheduler
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -22,7 +21,6 @@ async def close():
await
Tortoise
.
close_connections
()
# 使用asynccontextmanager装饰器定义一个异步上下文管理器函数lifespan
@asynccontextmanager
async
def
lifespan
(
app
:
FastAPI
):
# 开始apscheduler
...
...
@@ -30,22 +28,6 @@ async def lifespan(app: FastAPI):
logging
.
info
(
"apscheduler启动完成"
)
await
init
()
logging
.
info
(
"初始化数据库完成"
)
# 获取程序开始执行的时间
start_time
=
time
.
time
()
param
=
SpiderParams
(
startDate
=
"2024-12-20"
,
endDate
=
"2024-12-21"
,
target_type
=
[
TypeEnum
.
ANIME
,
TypeEnum
.
EPISODES
,
TypeEnum
.
MOVIE
,
TypeEnum
.
VARIETY
,
],
url
=
"http://guduodata.com"
,
)
print
(
param
.
get_time_range
())
asyncio
.
run
(
scrawl_and_save
(
param
))
print
(
f
"程序执行耗时时间:{(time.time() - start_time) / 1000}"
)
yield
# 在异步上下文管理器中,"退出上下文"时清理机器学习模型,释放资源
scheduler
.
shutdown
()
...
...
app/job/job.py
View file @
2752b39
...
...
@@ -2,18 +2,13 @@ import hashlib
import
logging
from
datetime
import
datetime
,
timedelta
from
typing
import
Dict
,
List
,
Tuple
from
apscheduler.schedulers.asyncio
import
AsyncIOScheduler
from
superstream
import
Stream
from
app.config.app_config
import
getAppConfig
from
app.model.mysql_model
import
XWebCrawler
from
app.schemas.spider_schema
import
ApschedulerJob
,
SpiderParams
,
TaskInfo
,
TypeEnum
logger
=
logging
.
getLogger
(
__name__
)
# 导入asynccontextmanager用于创建异步上下文管理器
# 创建一个scheduler实例
scheduler
=
AsyncIOScheduler
()
...
...
app/logs/app.log
View file @
2752b39
This diff could not be displayed because it is too large.
app/main.py
View file @
2752b39
import
logging
import
os
import
platform
import
sys
# 把当前文件所在文件夹的父文件夹路径加入到PYTHONPATH 解决ModuleNotFoundError: No module named 'app'
sys
.
path
.
append
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))))
import
uvicorn
from
fastapi
import
FastAPI
...
...
@@ -11,9 +11,14 @@ from app.config.app_config import getAppConfig
from
app.event.fastapi_event
import
lifespan
from
app.config.log_config
import
getLogConfig
app
=
FastAPI
(
lifespan
=
lifespan
)
app
.
include_router
(
spider_router
.
spiderRouter
)
if
__name__
==
"__main__"
:
if
platform
.
system
()
==
"Windows"
:
import
asyncio
asyncio
.
set_event_loop_policy
(
asyncio
.
WindowsSelectorEventLoopPolicy
())
appConf
=
getAppConfig
()
uvicorn
.
run
(
"main:app"
,
host
=
"0.0.0.0"
,
port
=
appConf
.
server
.
port
,
log_config
=
getLogConfig
()
...
...
app/router/spider_router.py
View file @
2752b39
...
...
@@ -20,8 +20,8 @@ async def read_users(param: SpiderParams):
@spiderRouter.post
(
"/add_guduo_job"
,
summary
=
"添加一个骨朵爬虫定时任务"
)
async
def
add_task_to_job
(
param
:
Dict
[
str
,
Any
]):
jobParam
=
SpiderParams
(
**
(
param
.
get
(
'taskParam'
)))
taskInfo
=
TaskInfo
(
**
(
param
.
get
(
'taskInfo'
)))
jobParam
=
SpiderParams
(
**
(
param
.
get
(
'taskParam'
)))
# type: ignore
taskInfo
=
TaskInfo
(
**
(
param
.
get
(
'taskInfo'
)))
# type: ignore
add_guduo_job
(
jobParam
,
taskInfo
)
return
'添加骨朵爬虫任务成功'
...
...
Please
register
or
sign in
to post a comment