From 1235bedf3fbd082ffeb47431cdb111878a367b87 Mon Sep 17 00:00:00 2001 From: mshe <666666666@666666666.666666666> Date: Mon, 8 Jun 2026 17:06:44 +0800 Subject: [PATCH] update start.sh --- app.py | 8 ++++---- spider/mail_qq.py | 38 +++++++++++++++++++++----------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/app.py b/app.py index 7c4b082..221f6c9 100755 --- a/app.py +++ b/app.py @@ -1,14 +1,14 @@ import os from fastmcp import FastMCP -from spider.mail_qq import start as a1 +from spider.mail_qq import start as start_mail_qq_spider import asyncio -import logging import json +import logging -mcp = FastMCP("spider-server") logging.basicConfig(level=logging.INFO) # 日志会输出到 stderr,不会污染 stdout logging.info("服务器启动") +mcp = FastMCP("spider-server") VNC_SERVER_HOST = "https://10.10.40.19:6080" SHELL_DIR = "/home/dgs/vnc-server" @@ -24,7 +24,7 @@ async def mail_qq_spider(account: str,display: int) -> str: data={"display": display} try: os.environ['DISPLAY'] = f':{display}' - result = await asyncio.to_thread(a1, account, display) + result = await asyncio.to_thread(start_mail_qq_spider, account, display) data["status"] = 200 data["result"] = result return json.dumps(data, ensure_ascii=False) diff --git a/spider/mail_qq.py b/spider/mail_qq.py index e4d67cc..4343729 100644 --- a/spider/mail_qq.py +++ b/spider/mail_qq.py @@ -3,7 +3,11 @@ import argparse from playwright.sync_api import sync_playwright +import logging +logging.basicConfig(level=logging.INFO) +# 日志会输出到 stderr,不会污染 stdout +logging.info("服务器启动") def save_login_state(auth_file, keyword): with sync_playwright() as p: os.makedirs(os.path.dirname(auth_file), exist_ok=True) @@ -17,17 +21,17 @@ def save_login_state(auth_file, keyword): nonlocal login_success if '/login_jump' in response.url: if response.status == 200: - print(f"[网络监听] 检测到登录API响应成功") - print(f" - 请求地址: {response.url}") - print(f" - 状态码: {response.status}") + logging.info(f"[网络监听] 检测到登录API响应成功") + logging.info(f" - 请求地址: {response.url}") + logging.info(f" - 状态码: {response.status}") login_success = True def on_url_change(frame): nonlocal login_success current_url = page.url if 'home/index?sid' in current_url: - print(f"[URL监听] 检测到页面已跳转") - print(f" - 当前URL: {current_url}") + logging.info(f"[URL监听] 检测到页面已跳转") + logging.info(f" - 当前URL: {current_url}") login_success = True # 其他可用的事件:'request'(请求发送时)、'requestfailed'(请求失败时)、'requestfinished'(请求完成时) @@ -38,23 +42,23 @@ def save_login_state(auth_file, keyword): try: # 毫秒单位,300000ms = 300秒 = 5分钟 page.wait_for_function(f'document.body.innerText.includes("{keyword}")', timeout=300000) - print(f"[文本监听] 检测到'{keyword}'文字,登录确认成功!") + logging.info(f"[文本监听] 检测到'{keyword}'文字,登录确认成功!") except Exception as e: - print(f"[警告] 未检测到'{keyword}'文字,可能页面结构有变化或登录超时") - print(f" 错误信息: {e}") + logging.warning(f"[警告] 未检测到'{keyword}'文字,可能页面结构有变化或登录超时") + logging.error(f" 错误信息: {e}") if login_success: - print("已确认登录成功!") + logging.info("已确认登录成功!") else: - print("\n⚠️ 未明确检测到登录成功标志,但仍将保存当前状态") - print(" 如果登录成功,状态应该是有效的") + logging.warning("\n⚠️ 未明确检测到登录成功标志,但仍将保存当前状态") + logging.info(" 如果登录成功,状态应该是有效的") page.wait_for_timeout(5000) # 2000毫秒 = 2秒 context.storage_state(path=auth_file) - print(f"✅ 登录状态已保存到: {auth_file}") + logging.info(f"✅ 登录状态已保存到: {auth_file}") browser.close() - print("浏览器已关闭") + logging.info("浏览器已关闭") # 1. 从 Playwright 保存的 auth.json 中提取 cookies @@ -88,7 +92,7 @@ def crawl_with_saved_state(auth_file,display=0): context = browser.new_context(storage_state=auth_file) page = context.new_page() page.goto('https://mail.qq.com') - print("当前URL:", page.url) + logging.info(f"当前URL: {page.url}") page.wait_for_load_state('networkidle') page.wait_for_timeout(2000) # 等待2秒 # 可以使用 BeautifulSoup 解析 @@ -96,7 +100,7 @@ def crawl_with_saved_state(auth_file,display=0): # soup = BeautifulSoup(content, 'html.parser') items = page.locator('.mail-subject').all() for item in items: - print(item.text_content()) + logging.info(item.text_content()) # page.get_by_title("mail-subject mail-unread").click() @@ -106,9 +110,9 @@ def crawl_with_saved_state(auth_file,display=0): browser.close() def start(account, display=0): - print(f"用户名{account}") + logging.info(f"用户名{account}") if not account: - print("请输入用户名") + logging.error("请输入用户名") exit(1) auth_file_path = "./auth/mail" file_path = f"{auth_file_path}/{account}.json"