fix bugs
This commit is contained in:
parent
1235bedf3f
commit
3ff94096db
4
app.py
4
app.py
|
|
@ -118,8 +118,8 @@ async def close_vnc_server(display: int) -> str:
|
||||||
@mcp.resource("data://spider_code")
|
@mcp.resource("data://spider_code")
|
||||||
def get_spider_code_list() -> str:
|
def get_spider_code_list() -> str:
|
||||||
spider_list = [
|
spider_list = [
|
||||||
{"code": "01","name": "阳光采购爬虫"},
|
{"code": "01","name": "阳光采购爬虫","auth_name":"mail"},
|
||||||
{"code": "02","name": "QQ邮箱爬虫"},
|
{"code": "02","name": "QQ邮箱爬虫","auth_name":"mail"},
|
||||||
]
|
]
|
||||||
return json.dumps(spider_list, ensure_ascii=False)
|
return json.dumps(spider_list, ensure_ascii=False)
|
||||||
# 运行服务器
|
# 运行服务器
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import os
|
import os
|
||||||
import argparse
|
import argparse
|
||||||
|
import time
|
||||||
|
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
|
@ -8,11 +9,11 @@ import logging
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
# 日志会输出到 stderr,不会污染 stdout
|
# 日志会输出到 stderr,不会污染 stdout
|
||||||
logging.info("服务器启动")
|
logging.info("服务器启动")
|
||||||
def save_login_state(auth_file, keyword):
|
def save_login_state(auth_file, keyword, display=0):
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p:
|
||||||
os.makedirs(os.path.dirname(auth_file), exist_ok=True)
|
os.makedirs(os.path.dirname(auth_file), exist_ok=True)
|
||||||
# headless=False 表示显示浏览器窗口,方便用户扫码登录
|
# headless=False 表示显示浏览器窗口,方便用户扫码登录
|
||||||
browser = p.chromium.launch(headless=False)
|
browser = p.chromium.launch(headless=False,env={"DISPLAY": display})
|
||||||
context = browser.new_context()
|
context = browser.new_context()
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
login_success = False
|
login_success = False
|
||||||
|
|
@ -88,7 +89,8 @@ def extract_cookies_from_auth(auth_file):
|
||||||
def crawl_with_saved_state(auth_file,display=0):
|
def crawl_with_saved_state(auth_file,display=0):
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p:
|
||||||
# 加载之前保存的登录状态
|
# 加载之前保存的登录状态
|
||||||
browser = p.chromium.launch(headless=False,env={'DISPLAY': display}) # 可以无头模式了
|
logging.info(f"加载认证文件: {auth_file}")
|
||||||
|
browser = p.chromium.launch(headless=False,env={"DISPLAY": display}) # 可以无头模式了
|
||||||
context = browser.new_context(storage_state=auth_file)
|
context = browser.new_context(storage_state=auth_file)
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
page.goto('https://mail.qq.com')
|
page.goto('https://mail.qq.com')
|
||||||
|
|
@ -100,7 +102,7 @@ def crawl_with_saved_state(auth_file,display=0):
|
||||||
# soup = BeautifulSoup(content, 'html.parser')
|
# soup = BeautifulSoup(content, 'html.parser')
|
||||||
items = page.locator('.mail-subject').all()
|
items = page.locator('.mail-subject').all()
|
||||||
for item in items:
|
for item in items:
|
||||||
logging.info(item.text_content())
|
logging.info(f"邮件标题: {item.text_content()}")
|
||||||
|
|
||||||
# page.get_by_title("mail-subject mail-unread").click()
|
# page.get_by_title("mail-subject mail-unread").click()
|
||||||
|
|
||||||
|
|
@ -116,8 +118,19 @@ def start(account, display=0):
|
||||||
exit(1)
|
exit(1)
|
||||||
auth_file_path = "./auth/mail"
|
auth_file_path = "./auth/mail"
|
||||||
file_path = f"{auth_file_path}/{account}.json"
|
file_path = f"{auth_file_path}/{account}.json"
|
||||||
|
logging.info(f"认证文件路径: {file_path}")
|
||||||
if os.path.exists(file_path):
|
if os.path.exists(file_path):
|
||||||
crawl_with_saved_state(file_path, display)
|
# 检查认证文件是否超过48小时
|
||||||
|
file_age_hours = (time.time() - os.path.getmtime(file_path)) / 3600
|
||||||
|
if file_age_hours > 48:
|
||||||
|
logging.warning("认证文件已过期,请重新登录")
|
||||||
|
save_login_state(file_path, "收件箱", display)
|
||||||
else:
|
else:
|
||||||
save_login_state(file_path, "收件箱")
|
logging.info("检测到认证文件,尝试使用保存的登录状态进行爬取")
|
||||||
|
crawl_with_saved_state(file_path,display)
|
||||||
|
else:
|
||||||
|
logging.info("未检测到认证文件,启动登录流程")
|
||||||
|
save_login_state(file_path, "收件箱",display)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
start("123", display=0)
|
||||||
Loading…
Reference in New Issue