Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
文鑫
/
guduo_spider
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
Commit
0c600de3
...
0c600de3ecd57a33c86696558cc14c9cbbf5664d
authored
2024-12-21 19:55:06 +0800
by
wenxin
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
bug修复
1 parent
a43dadc3
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
3 deletions
app/spider/guduo_spider.py
app/spider/guduo_spider.py
View file @
0c600de
...
...
@@ -15,7 +15,7 @@ async def startBrowser(spiderParam: SpiderParams) -> List[GuoDuoSpiderResult]:
url
=
spiderParam
.
url
scrawl_types
=
spiderParam
.
target_type
async
with
async_playwright
()
as
p
:
browser
=
await
p
.
firefox
.
launch
(
headless
=
Fals
e
)
browser
=
await
p
.
firefox
.
launch
(
headless
=
Tru
e
)
# 创建指定个数的浏览器页面
pages
=
await
asyncio
.
gather
(
*
(
browser
.
new_page
()
for
_
in
range
(
len
(
time_range
))))
...
...
@@ -46,6 +46,9 @@ async def hand_one_data(targetDate: str,
f
'[data-pika-month="{month}"]'
f
'[data-pika-day="{day}"]'
)
doc
=
page
.
locator
(
data_css_selector
)
# 判断指定元素是否存在如果不存在就返回空的[]
if
not
await
doc
.
is_visible
():
return
[]
# 点击指定日期
await
doc
.
click
()
# 最后一步修正定位
...
...
@@ -91,8 +94,8 @@ async def get_data(targetPage: Page, scrawl_type: int, target_time: str) -> Opti
if
__name__
==
'__main__'
:
# 获取程序开始执行的时间
start_time
=
time
.
time
()
param
=
SpiderParams
(
startDate
=
'
2024
-01-02'
,
endDate
=
'
2024
-01-05'
,
param
=
SpiderParams
(
startDate
=
'
1991
-01-02'
,
endDate
=
'
1991
-01-05'
,
target_type
=
[
TypeEnum
.
ANIME
,
TypeEnum
.
EPISODES
,
TypeEnum
.
MOVIE
]
,
url
=
'http://guduodata.com/'
)
res
=
asyncio
.
run
(
startBrowser
(
param
))
...
...
Please
register
or
sign in
to post a comment