learn-spider/spider/baidu.py

33 lines
1.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from playwright.sync_api import sync_playwright
from bs4 import BeautifulSoup
def crawl_vue_app():
with sync_playwright() as p:
# 启动浏览器headless=False 可以看到浏览器窗口)
browser = p.chromium.launch(headless=False)
page = browser.new_page()
# 访问页面
page.goto('http://localhost:8080/')
# 等待页面加载完成
page.wait_for_load_state('networkidle')
# 额外的等待时间(如果某些异步数据加载较慢)
page.wait_for_timeout(2000) # 等待2秒
# 获取页面内容
html = page.content()
title = page.title()
print(f"标题: {title}")
# 解析内容
soup = BeautifulSoup(html, 'html.parser')
# 获取 Vue 应用的根元素内的文本
app_div = soup.find('div', id='app')
if app_div:
# 获取所有可见文本
text = app_div.get_text(strip=True)
print(f"应用内容: {text[:500]}")
# 截图
page.screenshot(path='vue_app.png')
print("截图已保存")
browser.close()
if __name__ == "__main__":
crawl_vue_app()