Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
文鑫
/
guduo_spider
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
Commit
c2536ff1
...
c2536ff1be0d0bb9c05643326a6712c129642b59
authored
2024-12-26 10:23:05 +0800
by
wenxin
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
update
1 parent
6c117fc4
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
18 additions
and
34 deletions
app/config/tortoise_config.py
app/event/fastapi_event.py
app/model/mysql_model.py
app/schemas/spider_schema.py
app/service/spider_job_service.py
app/spider/http_spider.py
app/config/tortoise_config.py
View file @
c2536ff
...
...
@@ -6,8 +6,9 @@ import logging
logger
=
logging
.
getLogger
(
__name__
)
@functools.lru_cache
()
def
getTortoiseConfig
()
->
TortoiseConfig
:
def
getTortoiseConfig
()
->
TortoiseConfig
:
logger
.
info
(
'开始加载TortoiseConfig'
)
# 获取当前文件的绝对路径
current_file_path
=
os
.
path
.
abspath
(
__file__
)
...
...
app/event/fastapi_event.py
View file @
c2536ff
import
logging
from
contextlib
import
asynccontextmanager
from
fastapi
import
FastAPI
# noinspection PyPackageRequirements
from
tortoise
import
Tortoise
from
app.config.tortoise_config
import
getTortoiseConfig
from
app.job.job
import
scheduler
from
fastapi
import
FastAPI
from
app.config.tortoise_config
import
getTortoiseConfig
import
logging
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -21,13 +23,14 @@ async def close():
await
Tortoise
.
close_connections
()
# 使用asynccontextmanager装饰器定义一个异步上下文管理器函数lifespan
# noinspection PyUnusedLocal
@asynccontextmanager
async
def
lifespan
(
app
:
FastAPI
):
# 开始apscheduler
scheduler
.
start
()
logging
.
info
(
"apscheduler启动完成"
)
await
init
()
logging
.
info
(
"初始化数据库完成"
)
yield
# 在异步上下文管理器中,"退出上下文"时清理机器学习模型,释放资源
scheduler
.
shutdown
()
...
...
app/model/mysql_model.py
View file @
c2536ff
from
datetime
import
datetime
# noinspection PyPackageRequirements
from
tortoise
import
Model
,
fields
from
app.schemas.spider_schema
import
TypeEnum
...
...
app/schemas/spider_schema.py
View file @
c2536ff
...
...
@@ -132,24 +132,4 @@ class ResponseModel(BaseModel):
extra
:
Optional
[
dict
]
=
None
if
__name__
==
"__main__"
:
from
datetime
import
datetime
# 时间戳(以毫秒为单位)
release_date_timestamp
=
1573488000000
offline_date_timestamp
=
1576080000000
# 将时间戳转换为datetime对象,并转换为'yyyy-MM-dd'格式
def
timestamp_to_date_string
(
timestamp_ms
):
# Convert from milliseconds to seconds by dividing by 1000, then use fromtimestamp
dt_object
=
datetime
.
fromtimestamp
(
timestamp_ms
/
1000
)
return
dt_object
.
strftime
(
'
%
Y-
%
m-
%
d'
)
# 调用函数并打印结果
formatted_release_date
=
timestamp_to_date_string
(
release_date_timestamp
)
formatted_offline_date
=
timestamp_to_date_string
(
offline_date_timestamp
)
print
(
f
"Release Date: {formatted_release_date}"
)
print
(
f
"Offline Date: {formatted_offline_date}"
)
...
...
app/service/spider_job_service.py
View file @
c2536ff
import
asyncio
import
logging
import
time
from
datetime
import
datetime
from
typing
import
List
,
Any
from
app.job.job
import
scheduler
from
app.model.mysql_model
import
SpiderModel
from
app.schemas.safe_contrainer
import
SafeDict
...
...
@@ -10,8 +10,7 @@ from app.schemas.spider_schema import (
ApschedulerJob
,
TaskInfo
,
SpiderParams
,
GuoDuoSpiderResult
,
TypeEnum
,
)
GuoDuoSpiderResult
,
)
from
app.spider.http_spider
import
get_score_data
logger
=
logging
.
getLogger
(
__name__
)
...
...
app/spider/http_spider.py
View file @
c2536ff
import
asyncio
import
datetime
import
time
import
logging
from
typing
import
List
,
Dict
,
Tuple
import
aiohttp
from
superstream
import
Stream
from
tenacity
import
retry
,
stop_after_attempt
,
before_sleep_log
,
wait_exponential
,
after_log
import
logging
from
tqdm.asyncio
import
tqdm_asyncio
from
app.config.app_config
import
getAppConfig
from
app.schemas.config_schema
import
UrlTemplateInfo
from
app.schemas.spider_schema
import
SpiderParams
,
TypeEnum
,
GuoDuoSpiderResult
,
ResponseModel
from
app.config.app_config
import
getAppConfig
from
superstream
import
Stream
from
tqdm.asyncio
import
tqdm_asyncio
logger
=
logging
.
getLogger
(
__name__
)
...
...
Please
register
or
sign in
to post a comment