33 lines
1.2 KiB
Python
33 lines
1.2 KiB
Python
from playwright.sync_api import sync_playwright
|
||
from bs4 import BeautifulSoup
|
||
|
||
def crawl_vue_app():
|
||
with sync_playwright() as p:
|
||
# 启动浏览器(headless=False 可以看到浏览器窗口)
|
||
browser = p.chromium.launch(headless=False)
|
||
page = browser.new_page()
|
||
# 访问页面
|
||
page.goto('http://localhost:8080/')
|
||
# 等待页面加载完成
|
||
page.wait_for_load_state('networkidle')
|
||
# 额外的等待时间(如果某些异步数据加载较慢)
|
||
page.wait_for_timeout(2000) # 等待2秒
|
||
# 获取页面内容
|
||
html = page.content()
|
||
title = page.title()
|
||
print(f"标题: {title}")
|
||
# 解析内容
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
# 获取 Vue 应用的根元素内的文本
|
||
app_div = soup.find('div', id='app')
|
||
if app_div:
|
||
# 获取所有可见文本
|
||
text = app_div.get_text(strip=True)
|
||
print(f"应用内容: {text[:500]}")
|
||
# 截图
|
||
page.screenshot(path='vue_app.png')
|
||
print("截图已保存")
|
||
browser.close()
|
||
|
||
if __name__ == "__main__":
|
||
crawl_vue_app() |