From 3ff94096db5be12fa666c2f51a0bf60af8099d95 Mon Sep 17 00:00:00 2001 From: mshe <666666666@666666666.666666666> Date: Mon, 8 Jun 2026 17:22:14 +0800 Subject: [PATCH] fix bugs --- app.py | 4 ++-- spider/mail_qq.py | 25 +++++++++++++++++++------ 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/app.py b/app.py index 221f6c9..0afcfb3 100755 --- a/app.py +++ b/app.py @@ -118,8 +118,8 @@ async def close_vnc_server(display: int) -> str: @mcp.resource("data://spider_code") def get_spider_code_list() -> str: spider_list = [ - {"code": "01","name": "阳光采购爬虫"}, - {"code": "02","name": "QQ邮箱爬虫"}, + {"code": "01","name": "阳光采购爬虫","auth_name":"mail"}, + {"code": "02","name": "QQ邮箱爬虫","auth_name":"mail"}, ] return json.dumps(spider_list, ensure_ascii=False) # 运行服务器 diff --git a/spider/mail_qq.py b/spider/mail_qq.py index 4343729..cb1d330 100644 --- a/spider/mail_qq.py +++ b/spider/mail_qq.py @@ -1,5 +1,6 @@ import os import argparse +import time from playwright.sync_api import sync_playwright @@ -8,11 +9,11 @@ import logging logging.basicConfig(level=logging.INFO) # 日志会输出到 stderr,不会污染 stdout logging.info("服务器启动") -def save_login_state(auth_file, keyword): +def save_login_state(auth_file, keyword, display=0): with sync_playwright() as p: os.makedirs(os.path.dirname(auth_file), exist_ok=True) # headless=False 表示显示浏览器窗口,方便用户扫码登录 - browser = p.chromium.launch(headless=False) + browser = p.chromium.launch(headless=False,env={"DISPLAY": display}) context = browser.new_context() page = context.new_page() login_success = False @@ -88,7 +89,8 @@ def extract_cookies_from_auth(auth_file): def crawl_with_saved_state(auth_file,display=0): with sync_playwright() as p: # 加载之前保存的登录状态 - browser = p.chromium.launch(headless=False,env={'DISPLAY': display}) # 可以无头模式了 + logging.info(f"加载认证文件: {auth_file}") + browser = p.chromium.launch(headless=False,env={"DISPLAY": display}) # 可以无头模式了 context = browser.new_context(storage_state=auth_file) page = context.new_page() page.goto('https://mail.qq.com') @@ -100,7 +102,7 @@ def crawl_with_saved_state(auth_file,display=0): # soup = BeautifulSoup(content, 'html.parser') items = page.locator('.mail-subject').all() for item in items: - logging.info(item.text_content()) + logging.info(f"邮件标题: {item.text_content()}") # page.get_by_title("mail-subject mail-unread").click() @@ -116,8 +118,19 @@ def start(account, display=0): exit(1) auth_file_path = "./auth/mail" file_path = f"{auth_file_path}/{account}.json" + logging.info(f"认证文件路径: {file_path}") if os.path.exists(file_path): - crawl_with_saved_state(file_path, display) + # 检查认证文件是否超过48小时 + file_age_hours = (time.time() - os.path.getmtime(file_path)) / 3600 + if file_age_hours > 48: + logging.warning("认证文件已过期,请重新登录") + save_login_state(file_path, "收件箱", display) + else: + logging.info("检测到认证文件,尝试使用保存的登录状态进行爬取") + crawl_with_saved_state(file_path,display) else: - save_login_state(file_path, "收件箱") + logging.info("未检测到认证文件,启动登录流程") + save_login_state(file_path, "收件箱",display) +if __name__ == '__main__': + start("123", display=0) \ No newline at end of file