120 lines
4.1 KiB
Python
120 lines
4.1 KiB
Python
import os
|
||
import argparse
|
||
|
||
from playwright.sync_api import sync_playwright
|
||
|
||
|
||
def save_login_state(auth_file, keyword):
|
||
with sync_playwright() as p:
|
||
os.makedirs(os.path.dirname(auth_file), exist_ok=True)
|
||
# headless=False 表示显示浏览器窗口,方便用户扫码登录
|
||
browser = p.chromium.launch(headless=False)
|
||
context = browser.new_context()
|
||
page = context.new_page()
|
||
login_success = False
|
||
|
||
def on_response(response):
|
||
nonlocal login_success
|
||
if '/login_jump' in response.url:
|
||
if response.status == 200:
|
||
print(f"[网络监听] 检测到登录API响应成功")
|
||
print(f" - 请求地址: {response.url}")
|
||
print(f" - 状态码: {response.status}")
|
||
login_success = True
|
||
|
||
def on_url_change(frame):
|
||
nonlocal login_success
|
||
current_url = page.url
|
||
if 'home/index?sid' in current_url:
|
||
print(f"[URL监听] 检测到页面已跳转")
|
||
print(f" - 当前URL: {current_url}")
|
||
login_success = True
|
||
|
||
# 其他可用的事件:'request'(请求发送时)、'requestfailed'(请求失败时)、'requestfinished'(请求完成时)
|
||
page.on('response', on_response)
|
||
page.on('framenavigated', on_url_change)
|
||
|
||
page.goto('https://mail.qq.com')
|
||
try:
|
||
# 毫秒单位,300000ms = 300秒 = 5分钟
|
||
page.wait_for_function(f'document.body.innerText.includes("{keyword}")', timeout=300000)
|
||
print(f"[文本监听] 检测到'{keyword}'文字,登录确认成功!")
|
||
|
||
except Exception as e:
|
||
print(f"[警告] 未检测到'{keyword}'文字,可能页面结构有变化或登录超时")
|
||
print(f" 错误信息: {e}")
|
||
|
||
if login_success:
|
||
print("已确认登录成功!")
|
||
else:
|
||
print("\n⚠️ 未明确检测到登录成功标志,但仍将保存当前状态")
|
||
print(" 如果登录成功,状态应该是有效的")
|
||
|
||
page.wait_for_timeout(5000) # 2000毫秒 = 2秒
|
||
context.storage_state(path=auth_file)
|
||
print(f"✅ 登录状态已保存到: {auth_file}")
|
||
browser.close()
|
||
print("浏览器已关闭")
|
||
|
||
|
||
# 1. 从 Playwright 保存的 auth.json 中提取 cookies
|
||
def extract_cookies_from_auth(auth_file):
|
||
"""
|
||
从 Playwright 的 auth.json 提取 cookies
|
||
|
||
Args:
|
||
auth_file: auth.json 文件路径
|
||
|
||
Returns:
|
||
dict: requests 可用的 cookies 字典
|
||
"""
|
||
with open(auth_file, 'r', encoding='utf-8') as f:
|
||
auth_data = json.load(f)
|
||
|
||
cookies = {}
|
||
for cookie in auth_data.get('cookies', []):
|
||
# Playwright 保存的每个 cookie 包含 name 和 value
|
||
cookie_name = cookie['name']
|
||
cookie_value = cookie['value']
|
||
cookies[cookie_name] = cookie_value
|
||
|
||
return cookies
|
||
|
||
|
||
def crawl_with_saved_state(auth_file):
|
||
with sync_playwright() as p:
|
||
# 加载之前保存的登录状态
|
||
browser = p.chromium.launch(headless=False) # 可以无头模式了
|
||
context = browser.new_context(storage_state=auth_file)
|
||
page = context.new_page()
|
||
page.goto('https://mail.qq.com')
|
||
print("当前URL:", page.url)
|
||
page.wait_for_load_state('networkidle')
|
||
page.wait_for_timeout(2000) # 等待2秒
|
||
# 可以使用 BeautifulSoup 解析
|
||
# from bs4 import BeautifulSoup
|
||
# soup = BeautifulSoup(content, 'html.parser')
|
||
items = page.locator('.mail-subject').all()
|
||
for item in items:
|
||
print(item.text_content())
|
||
|
||
# page.get_by_title("mail-subject mail-unread").click()
|
||
|
||
|
||
page.wait_for_timeout(20000)
|
||
|
||
browser.close()
|
||
|
||
def start(account):
|
||
print(f"用户名{account}")
|
||
if not account:
|
||
print("请输入用户名")
|
||
exit(1)
|
||
auth_file_path = "./auth/mail"
|
||
file_path = f"{auth_file_path}/{account}.json"
|
||
if os.path.exists(file_path):
|
||
crawl_with_saved_state(file_path)
|
||
else:
|
||
save_login_state(file_path,"收件箱")
|
||
|