48 lines
1.7 KiB
Python
48 lines
1.7 KiB
Python
from playwright.sync_api import sync_playwright
|
||
from bs4 import BeautifulSoup
|
||
|
||
def crawl_vue_app():
|
||
with sync_playwright() as p:
|
||
# 启动浏览器(headless=False 可以看到浏览器窗口)
|
||
browser = p.chromium.launch(headless=False)
|
||
page = browser.new_page()
|
||
# 访问页面
|
||
page.goto('https://caigou.chinatelecom.com.cn/')
|
||
# 等待页面加载完成
|
||
page.wait_for_load_state('networkidle')
|
||
# 额外的等待时间(如果某些异步数据加载较慢)
|
||
page.wait_for_timeout(2000) # 等待2秒
|
||
page.get_by_text("更多").nth(0).click()
|
||
page.wait_for_load_state('networkidle')
|
||
page.wait_for_timeout(2000) # 等待2秒
|
||
page.get_by_text("招标公告").click()
|
||
page.wait_for_timeout(2000) # 等待2秒
|
||
items = page.locator('.el-table__row').all()
|
||
for item in items:
|
||
print(item.text_content())
|
||
|
||
# # 获取页面内容
|
||
# html = page.content()
|
||
# title = page.title()
|
||
|
||
# print(f"标题: {title}")
|
||
|
||
# # 解析内容
|
||
# soup = BeautifulSoup(html, 'html.parser')
|
||
|
||
# # 获取 Vue 应用的根元素内的文本
|
||
# app_div = soup.find('div', id='app')
|
||
# if app_div:
|
||
# # 获取所有可见文本
|
||
# text = app_div.get_text(strip=True)
|
||
# print(f"应用内容: {text}")
|
||
# # print(f"应用内容: {text[:500]}")
|
||
|
||
# 截图
|
||
# page.screenshot(path='vue_app.png')
|
||
# print("截图已保存")
|
||
page.wait_for_timeout(5000) # 等待2秒
|
||
browser.close()
|
||
|
||
if __name__ == "__main__":
|
||
crawl_vue_app() |