update start.sh
This commit is contained in:
parent
a6e2aed482
commit
1235bedf3f
8
app.py
8
app.py
|
|
@ -1,14 +1,14 @@
|
||||||
import os
|
import os
|
||||||
from fastmcp import FastMCP
|
from fastmcp import FastMCP
|
||||||
from spider.mail_qq import start as a1
|
from spider.mail_qq import start as start_mail_qq_spider
|
||||||
import asyncio
|
import asyncio
|
||||||
import logging
|
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
|
|
||||||
mcp = FastMCP("spider-server")
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
# 日志会输出到 stderr,不会污染 stdout
|
# 日志会输出到 stderr,不会污染 stdout
|
||||||
logging.info("服务器启动")
|
logging.info("服务器启动")
|
||||||
|
mcp = FastMCP("spider-server")
|
||||||
|
|
||||||
VNC_SERVER_HOST = "https://10.10.40.19:6080"
|
VNC_SERVER_HOST = "https://10.10.40.19:6080"
|
||||||
SHELL_DIR = "/home/dgs/vnc-server"
|
SHELL_DIR = "/home/dgs/vnc-server"
|
||||||
|
|
@ -24,7 +24,7 @@ async def mail_qq_spider(account: str,display: int) -> str:
|
||||||
data={"display": display}
|
data={"display": display}
|
||||||
try:
|
try:
|
||||||
os.environ['DISPLAY'] = f':{display}'
|
os.environ['DISPLAY'] = f':{display}'
|
||||||
result = await asyncio.to_thread(a1, account, display)
|
result = await asyncio.to_thread(start_mail_qq_spider, account, display)
|
||||||
data["status"] = 200
|
data["status"] = 200
|
||||||
data["result"] = result
|
data["result"] = result
|
||||||
return json.dumps(data, ensure_ascii=False)
|
return json.dumps(data, ensure_ascii=False)
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,11 @@ import argparse
|
||||||
|
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
# 日志会输出到 stderr,不会污染 stdout
|
||||||
|
logging.info("服务器启动")
|
||||||
def save_login_state(auth_file, keyword):
|
def save_login_state(auth_file, keyword):
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p:
|
||||||
os.makedirs(os.path.dirname(auth_file), exist_ok=True)
|
os.makedirs(os.path.dirname(auth_file), exist_ok=True)
|
||||||
|
|
@ -17,17 +21,17 @@ def save_login_state(auth_file, keyword):
|
||||||
nonlocal login_success
|
nonlocal login_success
|
||||||
if '/login_jump' in response.url:
|
if '/login_jump' in response.url:
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
print(f"[网络监听] 检测到登录API响应成功")
|
logging.info(f"[网络监听] 检测到登录API响应成功")
|
||||||
print(f" - 请求地址: {response.url}")
|
logging.info(f" - 请求地址: {response.url}")
|
||||||
print(f" - 状态码: {response.status}")
|
logging.info(f" - 状态码: {response.status}")
|
||||||
login_success = True
|
login_success = True
|
||||||
|
|
||||||
def on_url_change(frame):
|
def on_url_change(frame):
|
||||||
nonlocal login_success
|
nonlocal login_success
|
||||||
current_url = page.url
|
current_url = page.url
|
||||||
if 'home/index?sid' in current_url:
|
if 'home/index?sid' in current_url:
|
||||||
print(f"[URL监听] 检测到页面已跳转")
|
logging.info(f"[URL监听] 检测到页面已跳转")
|
||||||
print(f" - 当前URL: {current_url}")
|
logging.info(f" - 当前URL: {current_url}")
|
||||||
login_success = True
|
login_success = True
|
||||||
|
|
||||||
# 其他可用的事件:'request'(请求发送时)、'requestfailed'(请求失败时)、'requestfinished'(请求完成时)
|
# 其他可用的事件:'request'(请求发送时)、'requestfailed'(请求失败时)、'requestfinished'(请求完成时)
|
||||||
|
|
@ -38,23 +42,23 @@ def save_login_state(auth_file, keyword):
|
||||||
try:
|
try:
|
||||||
# 毫秒单位,300000ms = 300秒 = 5分钟
|
# 毫秒单位,300000ms = 300秒 = 5分钟
|
||||||
page.wait_for_function(f'document.body.innerText.includes("{keyword}")', timeout=300000)
|
page.wait_for_function(f'document.body.innerText.includes("{keyword}")', timeout=300000)
|
||||||
print(f"[文本监听] 检测到'{keyword}'文字,登录确认成功!")
|
logging.info(f"[文本监听] 检测到'{keyword}'文字,登录确认成功!")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(f"[警告] 未检测到'{keyword}'文字,可能页面结构有变化或登录超时")
|
logging.warning(f"[警告] 未检测到'{keyword}'文字,可能页面结构有变化或登录超时")
|
||||||
print(f" 错误信息: {e}")
|
logging.error(f" 错误信息: {e}")
|
||||||
|
|
||||||
if login_success:
|
if login_success:
|
||||||
print("已确认登录成功!")
|
logging.info("已确认登录成功!")
|
||||||
else:
|
else:
|
||||||
print("\n⚠️ 未明确检测到登录成功标志,但仍将保存当前状态")
|
logging.warning("\n⚠️ 未明确检测到登录成功标志,但仍将保存当前状态")
|
||||||
print(" 如果登录成功,状态应该是有效的")
|
logging.info(" 如果登录成功,状态应该是有效的")
|
||||||
|
|
||||||
page.wait_for_timeout(5000) # 2000毫秒 = 2秒
|
page.wait_for_timeout(5000) # 2000毫秒 = 2秒
|
||||||
context.storage_state(path=auth_file)
|
context.storage_state(path=auth_file)
|
||||||
print(f"✅ 登录状态已保存到: {auth_file}")
|
logging.info(f"✅ 登录状态已保存到: {auth_file}")
|
||||||
browser.close()
|
browser.close()
|
||||||
print("浏览器已关闭")
|
logging.info("浏览器已关闭")
|
||||||
|
|
||||||
|
|
||||||
# 1. 从 Playwright 保存的 auth.json 中提取 cookies
|
# 1. 从 Playwright 保存的 auth.json 中提取 cookies
|
||||||
|
|
@ -88,7 +92,7 @@ def crawl_with_saved_state(auth_file,display=0):
|
||||||
context = browser.new_context(storage_state=auth_file)
|
context = browser.new_context(storage_state=auth_file)
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
page.goto('https://mail.qq.com')
|
page.goto('https://mail.qq.com')
|
||||||
print("当前URL:", page.url)
|
logging.info(f"当前URL: {page.url}")
|
||||||
page.wait_for_load_state('networkidle')
|
page.wait_for_load_state('networkidle')
|
||||||
page.wait_for_timeout(2000) # 等待2秒
|
page.wait_for_timeout(2000) # 等待2秒
|
||||||
# 可以使用 BeautifulSoup 解析
|
# 可以使用 BeautifulSoup 解析
|
||||||
|
|
@ -96,7 +100,7 @@ def crawl_with_saved_state(auth_file,display=0):
|
||||||
# soup = BeautifulSoup(content, 'html.parser')
|
# soup = BeautifulSoup(content, 'html.parser')
|
||||||
items = page.locator('.mail-subject').all()
|
items = page.locator('.mail-subject').all()
|
||||||
for item in items:
|
for item in items:
|
||||||
print(item.text_content())
|
logging.info(item.text_content())
|
||||||
|
|
||||||
# page.get_by_title("mail-subject mail-unread").click()
|
# page.get_by_title("mail-subject mail-unread").click()
|
||||||
|
|
||||||
|
|
@ -106,9 +110,9 @@ def crawl_with_saved_state(auth_file,display=0):
|
||||||
browser.close()
|
browser.close()
|
||||||
|
|
||||||
def start(account, display=0):
|
def start(account, display=0):
|
||||||
print(f"用户名{account}")
|
logging.info(f"用户名{account}")
|
||||||
if not account:
|
if not account:
|
||||||
print("请输入用户名")
|
logging.error("请输入用户名")
|
||||||
exit(1)
|
exit(1)
|
||||||
auth_file_path = "./auth/mail"
|
auth_file_path = "./auth/mail"
|
||||||
file_path = f"{auth_file_path}/{account}.json"
|
file_path = f"{auth_file_path}/{account}.json"
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue