learn-spider/app.py

173 lines
5.4 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
from fastmcp import FastMCP
from spider.mail_qq import start as start_mail_qq_spider
import asyncio
import json
from typing import Optional
import subprocess
import logging
logging.basicConfig(level=logging.INFO)
# 日志会输出到 stderr不会污染 stdout
logging.info("服务器启动")
mcp = FastMCP("spider-server")
VNC_SERVER_HOST = "https://10.10.40.19:6080"
# SHELL_DIR = "/home/dgs/vnc-server"
SHELL_DIR = "./bin"
@mcp.tool()
async def mail_qq_spider(account: str,display: int) -> str:
"""qq邮箱爬虫,第一个参数是用户名
Args:
account: qq邮箱用户名
display: 显示桌面编号
Returns:
str: 爬虫结果
"""
data={"display": display}
try:
result = await asyncio.to_thread(start_mail_qq_spider, account,display)
data["status"] = 200
data["result"] = result
return json.dumps(data, ensure_ascii=False)
except Exception as e:
data["status"] = 500
return json.dumps(data, ensure_ascii=False)
@mcp.tool()
async def start_vnc_server(instance: int = 1, password: str = "123456") -> str:
"""启动独立的 VNC + Chrome 实例"""
data = {
"instance": instance,
"status": 200,
"action": "start"
}
try:
vnc_port = 5900 + (instance - 1)
websockify_port = 6080 + (instance - 1)
cdp_port = 9223 + (instance - 1)
process = await asyncio.create_subprocess_exec(
"docker", "run", "-d",
"--rm",
"--name", f"vnc-browser-{instance}",
"-p", f"{cdp_port}:9223",
"-p", f"{vnc_port}:5900",
"-p", f"{websockify_port}:6080",
"-e", f"VNC_PASSWORD={password}",
"--shm-size=2gb",
"vnc-server:latest",
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode == 0:
data["message"] = f"Container vnc-browser-{instance} started"
data["vnc_port"] = vnc_port
data["websockify_port"] = websockify_port
data["cdp_port"] = cdp_port
data["password"] = password
else:
data["status"] = 500
data["message"] = stderr.decode() or stdout.decode() or "Unknown error"
return json.dumps(data, ensure_ascii=False)
except Exception as e:
data["status"] = 500
data["message"] = str(e)
return json.dumps(data, ensure_ascii=False)
@mcp.tool()
async def stop_vnc_server(instance: int = 1, keep_data: bool = False) -> str:
"""停止指定的 VNC 实例
Args:
instance: 实例编号 (1-10)
keep_data: 是否保留用户数据,默认 False删除数据
Returns:
操作结果
"""
data = {
"instance": instance,
"status": 200,
"action": "stop",
"keep_data": keep_data
}
try:
container_name = f"vnc-browser-{instance}"
# 停止容器
process = await asyncio.create_subprocess_exec(
"docker", "stop", container_name,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode == 0:
if not keep_data:
# 删除容器
await asyncio.create_subprocess_exec(
"docker", "rm", container_name,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
data["message"] = f"Container {container_name} stopped"
else:
data["status"] = 500
data["message"] = stderr.decode() or stdout.decode() or "Unknown error"
return json.dumps(data, ensure_ascii=False)
except Exception as e:
data["status"] = 500
data["message"] = str(e)
return json.dumps(data, ensure_ascii=False)
@mcp.tool()
async def start_spider(spider_code: str,display: str) -> str:
"""启动爬虫
Args:
spider_code: 爬虫代码
display: 显示桌面编号
"""
data={"display": display}
try:
# 调用外部 start_spider.sh 脚本,传入 spider_code 作为参数
process = await asyncio.create_subprocess_exec(
f"{SHELL_DIR}/start_spider.sh",
spider_code,
display,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
data["message"] = stdout.decode()
data["status"] = 200 if process.returncode == 0 else 500
if process.returncode == 0:
return json.dumps(data, ensure_ascii=False)
else:
return json.dumps(data, ensure_ascii=False)
except Exception as e:
data["status"] = 500
return json.dumps(data, ensure_ascii=False)
@mcp.resource("data://spider_code")
def get_spider_code_list() -> str:
spider_list = [
{"code": "01","name": "阳光采购爬虫","auth_name":"mail"},
{"code": "02","name": "QQ邮箱爬虫","auth_name":"mail"},
]
return json.dumps(spider_list, ensure_ascii=False)
# 运行服务器
if __name__ == "__main__":
mcp.run(transport="sse")
# mcp.run(transport="stdio")