from playwright.sync_api import sync_playwright from bs4 import BeautifulSoup def crawl_vue_app(): with sync_playwright() as p: # 启动浏览器(headless=False 可以看到浏览器窗口) browser = p.chromium.launch(headless=False) page = browser.new_page() # 访问页面 page.goto('https://caigou.chinatelecom.com.cn/') # 等待页面加载完成 page.wait_for_load_state('networkidle') # 额外的等待时间(如果某些异步数据加载较慢) page.wait_for_timeout(10000) # 等待2秒 page.get_by_text("更多").nth(0).click() page.wait_for_load_state('networkidle') page.wait_for_timeout(2000) # 等待2秒 page.get_by_text("招标公告").click() page.wait_for_timeout(2000) # 等待2秒 items = page.locator('.el-table__row').all() for item in items: print(item.text_content()) # # 获取页面内容 # html = page.content() # title = page.title() # print(f"标题: {title}") # # 解析内容 # soup = BeautifulSoup(html, 'html.parser') # # 获取 Vue 应用的根元素内的文本 # app_div = soup.find('div', id='app') # if app_div: # # 获取所有可见文本 # text = app_div.get_text(strip=True) # print(f"应用内容: {text}") # # print(f"应用内容: {text[:500]}") # 截图 # page.screenshot(path='vue_app.png') # print("截图已保存") page.wait_for_timeout(5000) # 等待2秒 browser.close() if __name__ == "__main__": crawl_vue_app()