first commit
This commit is contained in:
commit
a607545324
|
|
@ -0,0 +1,14 @@
|
||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<profile version="1.0">
|
||||||
|
<option name="myName" value="Project Default" />
|
||||||
|
<inspection_tool class="PyPackageRequirementsInspection" enabled="true" level="WARNING" enabled_by_default="true">
|
||||||
|
<option name="ignoredPackages">
|
||||||
|
<value>
|
||||||
|
<list size="1">
|
||||||
|
<item index="0" class="java.lang.String" itemvalue="flask" />
|
||||||
|
</list>
|
||||||
|
</value>
|
||||||
|
</option>
|
||||||
|
</inspection_tool>
|
||||||
|
</profile>
|
||||||
|
</component>
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
<component name="InspectionProjectProfileManager">
|
||||||
|
<settings>
|
||||||
|
<option name="USE_PROJECT_PROFILE" value="false" />
|
||||||
|
<version value="1.0" />
|
||||||
|
</settings>
|
||||||
|
</component>
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<module type="PYTHON_MODULE" version="4">
|
||||||
|
<component name="Flask">
|
||||||
|
<option name="enabled" value="true" />
|
||||||
|
</component>
|
||||||
|
<component name="NewModuleRootManager">
|
||||||
|
<content url="file://$MODULE_DIR$">
|
||||||
|
<excludeFolder url="file://$MODULE_DIR$/.venv" />
|
||||||
|
</content>
|
||||||
|
<orderEntry type="jdk" jdkName="Python 3.10" jdkType="Python SDK" />
|
||||||
|
<orderEntry type="sourceFolder" forTests="false" />
|
||||||
|
</component>
|
||||||
|
<component name="TemplatesService">
|
||||||
|
<option name="TEMPLATE_CONFIGURATION" value="Jinja2" />
|
||||||
|
<option name="TEMPLATE_FOLDERS">
|
||||||
|
<list>
|
||||||
|
<option value="$MODULE_DIR$/templates" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
</module>
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="Black">
|
||||||
|
<option name="sdkName" value="Python 3.9 (learn-spider)" />
|
||||||
|
</component>
|
||||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
|
||||||
|
</project>
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="ProjectModuleManager">
|
||||||
|
<modules>
|
||||||
|
<module fileurl="file://$PROJECT_DIR$/.idea/learn-spider.iml" filepath="$PROJECT_DIR$/.idea/learn-spider.iml" />
|
||||||
|
</modules>
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
|
|
@ -0,0 +1,6 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="VcsDirectoryMappings">
|
||||||
|
<mapping directory="$PROJECT_DIR$" vcs="Git" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
|
|
@ -0,0 +1,136 @@
|
||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project version="4">
|
||||||
|
<component name="AutoImportSettings">
|
||||||
|
<option name="autoReloadType" value="SELECTIVE" />
|
||||||
|
</component>
|
||||||
|
<component name="ChangeListManager">
|
||||||
|
<list default="true" id="29e8f12f-1d00-4e65-8b7a-d266f481ff29" name="更改" comment="" />
|
||||||
|
<option name="SHOW_DIALOG" value="false" />
|
||||||
|
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||||
|
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
|
||||||
|
<option name="LAST_RESOLUTION" value="IGNORE" />
|
||||||
|
</component>
|
||||||
|
<component name="FileTemplateManagerImpl">
|
||||||
|
<option name="RECENT_TEMPLATES">
|
||||||
|
<list>
|
||||||
|
<option value="Flask Main" />
|
||||||
|
<option value="Python Script" />
|
||||||
|
</list>
|
||||||
|
</option>
|
||||||
|
</component>
|
||||||
|
<component name="ProjectColorInfo">{
|
||||||
|
"associatedIndex": 5
|
||||||
|
}</component>
|
||||||
|
<component name="ProjectId" id="3DjvhMu4Ipdz8hnE7N4fLfhY0AN" />
|
||||||
|
<component name="ProjectViewState">
|
||||||
|
<option name="showLibraryContents" value="true" />
|
||||||
|
</component>
|
||||||
|
<component name="PropertiesComponent"><![CDATA[{
|
||||||
|
"keyToString": {
|
||||||
|
"Flask 服务器.learn-spider.executor": "Run",
|
||||||
|
"Python.china_net.executor": "Run",
|
||||||
|
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||||
|
"last_opened_file_path": "/Users/mshe/developer/code/python-workspace/learn-spider/spider",
|
||||||
|
"node.js.detected.package.eslint": "true",
|
||||||
|
"node.js.detected.package.tslint": "true",
|
||||||
|
"node.js.selected.package.eslint": "(autodetect)",
|
||||||
|
"node.js.selected.package.tslint": "(autodetect)",
|
||||||
|
"nodejs_package_manager_path": "npm",
|
||||||
|
"settings.editor.selected.configurable": "settings.sync",
|
||||||
|
"vue.rearranger.settings.migration": "true"
|
||||||
|
}
|
||||||
|
}]]></component>
|
||||||
|
<component name="RecentsManager">
|
||||||
|
<key name="CopyFile.RECENT_KEYS">
|
||||||
|
<recent name="$PROJECT_DIR$/spider" />
|
||||||
|
</key>
|
||||||
|
</component>
|
||||||
|
<component name="RunManager" selected="Python.china_net">
|
||||||
|
<configuration name="china_net" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
||||||
|
<module name="learn-spider" />
|
||||||
|
<option name="ENV_FILES" value="" />
|
||||||
|
<option name="INTERPRETER_OPTIONS" value="" />
|
||||||
|
<option name="PARENT_ENVS" value="true" />
|
||||||
|
<envs>
|
||||||
|
<env name="PYTHONUNBUFFERED" value="1" />
|
||||||
|
</envs>
|
||||||
|
<option name="SDK_HOME" value="" />
|
||||||
|
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/spider" />
|
||||||
|
<option name="IS_MODULE_SDK" value="true" />
|
||||||
|
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||||
|
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||||
|
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||||
|
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/spider/china_net.py" />
|
||||||
|
<option name="PARAMETERS" value="" />
|
||||||
|
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||||
|
<option name="EMULATE_TERMINAL" value="false" />
|
||||||
|
<option name="MODULE_MODE" value="false" />
|
||||||
|
<option name="REDIRECT_INPUT" value="false" />
|
||||||
|
<option name="INPUT_FILE" value="" />
|
||||||
|
<method v="2" />
|
||||||
|
</configuration>
|
||||||
|
<configuration name="learn-spider" type="Python.FlaskServer">
|
||||||
|
<module name="learn-spider" />
|
||||||
|
<option name="target" value="$PROJECT_DIR$/app.py" />
|
||||||
|
<option name="targetType" value="PATH" />
|
||||||
|
<option name="ENV_FILES" value="" />
|
||||||
|
<option name="INTERPRETER_OPTIONS" value="" />
|
||||||
|
<option name="PARENT_ENVS" value="true" />
|
||||||
|
<option name="SDK_HOME" value="" />
|
||||||
|
<option name="WORKING_DIRECTORY" value="" />
|
||||||
|
<option name="IS_MODULE_SDK" value="false" />
|
||||||
|
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||||
|
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||||
|
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||||
|
<option name="launchJavascriptDebuger" value="false" />
|
||||||
|
<method v="2" />
|
||||||
|
</configuration>
|
||||||
|
<recent_temporary>
|
||||||
|
<list>
|
||||||
|
<item itemvalue="Python.china_net" />
|
||||||
|
</list>
|
||||||
|
</recent_temporary>
|
||||||
|
</component>
|
||||||
|
<component name="SharedIndexes">
|
||||||
|
<attachedChunks>
|
||||||
|
<set>
|
||||||
|
<option value="bundled-js-predefined-d6986cc7102b-5c90d61e3bab-JavaScript-PY-242.23339.19" />
|
||||||
|
<option value="bundled-python-sdk-0029f7779945-399fe30bd8c1-com.jetbrains.pycharm.pro.sharedIndexes.bundled-PY-242.23339.19" />
|
||||||
|
</set>
|
||||||
|
</attachedChunks>
|
||||||
|
</component>
|
||||||
|
<component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="应用程序级" UseSingleDictionary="true" transferred="true" />
|
||||||
|
<component name="TaskManager">
|
||||||
|
<task active="true" id="Default" summary="默认任务">
|
||||||
|
<changelist id="29e8f12f-1d00-4e65-8b7a-d266f481ff29" name="更改" comment="" />
|
||||||
|
<created>1778808412351</created>
|
||||||
|
<option name="number" value="Default" />
|
||||||
|
<option name="presentableId" value="Default" />
|
||||||
|
<updated>1778808412351</updated>
|
||||||
|
<workItem from="1778808425415" duration="1833000" />
|
||||||
|
<workItem from="1779096652209" duration="2278000" />
|
||||||
|
<workItem from="1779170746673" duration="525000" />
|
||||||
|
<workItem from="1779673152455" duration="4561000" />
|
||||||
|
<workItem from="1779779192025" duration="4366000" />
|
||||||
|
</task>
|
||||||
|
<servers />
|
||||||
|
</component>
|
||||||
|
<component name="TypeScriptGeneratedFilesManager">
|
||||||
|
<option name="version" value="3" />
|
||||||
|
</component>
|
||||||
|
<component name="XDebuggerManager">
|
||||||
|
<breakpoint-manager>
|
||||||
|
<default-breakpoints>
|
||||||
|
<breakpoint type="python-exception">
|
||||||
|
<properties notifyOnTerminate="true" exception="BaseException">
|
||||||
|
<option name="notifyOnTerminate" value="true" />
|
||||||
|
</properties>
|
||||||
|
</breakpoint>
|
||||||
|
</default-breakpoints>
|
||||||
|
</breakpoint-manager>
|
||||||
|
</component>
|
||||||
|
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
||||||
|
<SUITE FILE_PATH="coverage/learn_spider$china_net.coverage" NAME="china_net 覆盖结果" MODIFIED="1779780853999" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/spider" />
|
||||||
|
<SUITE FILE_PATH="coverage/learn_spider$learn_spider.coverage" NAME="learn-spider 覆盖结果" MODIFIED="1778808456145" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="" />
|
||||||
|
</component>
|
||||||
|
</project>
|
||||||
Binary file not shown.
|
|
@ -0,0 +1,12 @@
|
||||||
|
from flask import Flask
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def hello_world(): # put application's code here
|
||||||
|
return 'Hello World!'
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run()
|
||||||
|
|
@ -0,0 +1,32 @@
|
||||||
|
# HTTP请求
|
||||||
|
requests==2.31.0
|
||||||
|
httpx==0.27.0 # 支持HTTP/2,比requests更快
|
||||||
|
|
||||||
|
# HTML解析
|
||||||
|
beautifulsoup4==4.12.3
|
||||||
|
lxml==5.1.0 # 更快的解析器,比html.parser快很多
|
||||||
|
parsel==1.9.0 # Scrapy的解析库
|
||||||
|
|
||||||
|
# 异步爬虫
|
||||||
|
aiohttp==3.9.5 # 异步HTTP客户端
|
||||||
|
aiofiles==23.2.1 # 异步文件操作
|
||||||
|
|
||||||
|
# 模拟浏览器(对付Vue/React等SPA)
|
||||||
|
playwright==1.42.0 # 推荐,现代浏览器自动化
|
||||||
|
selenium==4.18.1 # 经典方案
|
||||||
|
|
||||||
|
# 代理和反爬
|
||||||
|
fake-useragent==1.5.1 # 随机User-Agent
|
||||||
|
requests-html==0.10.0 # 支持JS渲染(基于pyppeteer)
|
||||||
|
|
||||||
|
# 数据存储
|
||||||
|
pymongo==4.6.1 # MongoDB
|
||||||
|
redis==5.0.1 # Redis
|
||||||
|
pymysql==1.1.0 # MySQL
|
||||||
|
|
||||||
|
# 数据处理
|
||||||
|
pandas==2.2.1 # 数据分析
|
||||||
|
numpy==1.26.4 # 科学计算
|
||||||
|
|
||||||
|
# 爬虫框架
|
||||||
|
scrapy==2.11.1 # 重量级爬虫框架
|
||||||
|
|
@ -0,0 +1,33 @@
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def crawl_vue_app():
|
||||||
|
with sync_playwright() as p:
|
||||||
|
# 启动浏览器(headless=False 可以看到浏览器窗口)
|
||||||
|
browser = p.chromium.launch(headless=False)
|
||||||
|
page = browser.new_page()
|
||||||
|
# 访问页面
|
||||||
|
page.goto('http://localhost:8080/')
|
||||||
|
# 等待页面加载完成
|
||||||
|
page.wait_for_load_state('networkidle')
|
||||||
|
# 额外的等待时间(如果某些异步数据加载较慢)
|
||||||
|
page.wait_for_timeout(2000) # 等待2秒
|
||||||
|
# 获取页面内容
|
||||||
|
html = page.content()
|
||||||
|
title = page.title()
|
||||||
|
print(f"标题: {title}")
|
||||||
|
# 解析内容
|
||||||
|
soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
# 获取 Vue 应用的根元素内的文本
|
||||||
|
app_div = soup.find('div', id='app')
|
||||||
|
if app_div:
|
||||||
|
# 获取所有可见文本
|
||||||
|
text = app_div.get_text(strip=True)
|
||||||
|
print(f"应用内容: {text[:500]}")
|
||||||
|
# 截图
|
||||||
|
page.screenshot(path='vue_app.png')
|
||||||
|
print("截图已保存")
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
crawl_vue_app()
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def crawl_vue_app():
|
||||||
|
with sync_playwright() as p:
|
||||||
|
# 启动浏览器(headless=False 可以看到浏览器窗口)
|
||||||
|
browser = p.chromium.launch(headless=True)
|
||||||
|
page = browser.new_page()
|
||||||
|
# 访问页面
|
||||||
|
page.goto('https://caigou.chinatelecom.com.cn/')
|
||||||
|
# 等待页面加载完成
|
||||||
|
page.wait_for_load_state('networkidle')
|
||||||
|
# 额外的等待时间(如果某些异步数据加载较慢)
|
||||||
|
page.wait_for_timeout(2000) # 等待2秒
|
||||||
|
page.get_by_text("更多").nth(0).click()
|
||||||
|
page.wait_for_load_state('networkidle')
|
||||||
|
page.wait_for_timeout(2000) # 等待2秒
|
||||||
|
page.get_by_text("招标公告").click()
|
||||||
|
page.wait_for_timeout(2000) # 等待2秒
|
||||||
|
items = page.locator('.el-table__row').all()
|
||||||
|
for item in items:
|
||||||
|
print(item.text_content())
|
||||||
|
|
||||||
|
# # 获取页面内容
|
||||||
|
# html = page.content()
|
||||||
|
# title = page.title()
|
||||||
|
|
||||||
|
# print(f"标题: {title}")
|
||||||
|
|
||||||
|
# # 解析内容
|
||||||
|
# soup = BeautifulSoup(html, 'html.parser')
|
||||||
|
|
||||||
|
# # 获取 Vue 应用的根元素内的文本
|
||||||
|
# app_div = soup.find('div', id='app')
|
||||||
|
# if app_div:
|
||||||
|
# # 获取所有可见文本
|
||||||
|
# text = app_div.get_text(strip=True)
|
||||||
|
# print(f"应用内容: {text}")
|
||||||
|
# # print(f"应用内容: {text[:500]}")
|
||||||
|
|
||||||
|
# 截图
|
||||||
|
# page.screenshot(path='vue_app.png')
|
||||||
|
# print("截图已保存")
|
||||||
|
page.wait_for_timeout(5000) # 等待2秒
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
crawl_vue_app()
|
||||||
|
|
@ -0,0 +1,92 @@
|
||||||
|
import requests
|
||||||
|
import time
|
||||||
|
import re
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
BASE_URL = 'https://caigou.chinatelecom.com.cn'
|
||||||
|
|
||||||
|
def fetch_all_pages():
|
||||||
|
list_url = f'{BASE_URL}/portal/base/announcementJoin/queryListNew'''
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Referer': 'https://caigou.chinatelecom.com.cn/',
|
||||||
|
'Cookie': 'Secure; sag_agent_cookie='
|
||||||
|
}
|
||||||
|
|
||||||
|
all_data = []
|
||||||
|
page = 1
|
||||||
|
page_size = 10
|
||||||
|
|
||||||
|
while True:
|
||||||
|
print(f"正在获取第 {page} 页...")
|
||||||
|
params = {
|
||||||
|
"pageNum": page,
|
||||||
|
"pageSize": page_size,
|
||||||
|
"type":"e2no",
|
||||||
|
"provinceCode":"",
|
||||||
|
"noticeSummary":""
|
||||||
|
}
|
||||||
|
response = requests.post(list_url, json=params, headers=headers)
|
||||||
|
result = response.json()
|
||||||
|
data = result.get('data', {})
|
||||||
|
pageInfo = data.get('pageInfo', {})
|
||||||
|
items = pageInfo.get('list', [])
|
||||||
|
total = pageInfo.get('total', 0)
|
||||||
|
if not items:
|
||||||
|
break
|
||||||
|
all_data.extend(items)
|
||||||
|
print(f" 获取 {len(items)} 条,累计 {len(all_data)} 条, 总共 {total} 条")
|
||||||
|
if len(all_data) >= total /100:
|
||||||
|
break
|
||||||
|
page += 1
|
||||||
|
time.sleep(0.5)
|
||||||
|
print(f"\n总共获取 {len(all_data)} 条数据")
|
||||||
|
return all_data
|
||||||
|
|
||||||
|
# 详情链接
|
||||||
|
#/DeclareDetails?id=177118231561666&type=1&docTypeCode=TenderAnnouncement&securityViewCode=2f06d88f0032ae9e828be0f7767674c8
|
||||||
|
# https://caigou.chinatelecom.com.cn/portal/base/tenderannouncement/view
|
||||||
|
def get_detail(item):
|
||||||
|
detail_url = f"{BASE_URL}/portal/base/tenderannouncement/view"
|
||||||
|
headers = {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
|
'Accept-Language': 'zh-CN,zh;q=0.9',
|
||||||
|
'Cache-Control': 'no-cache',
|
||||||
|
'Connection': 'keep-alive',
|
||||||
|
'Referer': 'https://caigou.chinatelecom.com.cn/',
|
||||||
|
}
|
||||||
|
params = {
|
||||||
|
"type":"TenderAnnouncement",
|
||||||
|
"id":item.get('docId'),
|
||||||
|
"securityViewCode": item.get('securityViewCode'),
|
||||||
|
"title": item.get('docTitle')
|
||||||
|
}
|
||||||
|
response = requests.post(detail_url, headers=headers, json=params)
|
||||||
|
context = response.json().get('data').get('context')
|
||||||
|
return clean_html_tag(context)
|
||||||
|
|
||||||
|
def clean_html_tag(html_text):
|
||||||
|
"""综合清理 HTML 标签、多余空格和换行"""
|
||||||
|
if not html_text:
|
||||||
|
return ""
|
||||||
|
soup = BeautifulSoup(html_text, 'html.parser')
|
||||||
|
text = soup.get_text()
|
||||||
|
text = re.sub(r'\s+', ' ', text)
|
||||||
|
text = text.strip()
|
||||||
|
text = text.replace('\n', ' ').replace('\r', ' ').replace('\t', ' ')
|
||||||
|
return text
|
||||||
|
|
||||||
|
data = fetch_all_pages()
|
||||||
|
for item in data:
|
||||||
|
print(get_detail(item))
|
||||||
|
time.sleep(0.5)
|
||||||
|
|
||||||
|
|
@ -0,0 +1,25 @@
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
def crawl_vue_app():
|
||||||
|
with sync_playwright() as p:
|
||||||
|
# 启动浏览器(headless=False 可以看到浏览器窗口)
|
||||||
|
browser = p.chromium.launch(headless=False)
|
||||||
|
page = browser.new_page()
|
||||||
|
# 访问页面
|
||||||
|
page.goto('https://mail.qq.com')
|
||||||
|
|
||||||
|
context = browser.new_context()
|
||||||
|
page = context.new_page()
|
||||||
|
|
||||||
|
# 访问页面
|
||||||
|
page.goto('https://caigou.chinatelecom.com.cn/')
|
||||||
|
|
||||||
|
input("请在浏览器中完成登录,然后按 Enter 继续...")
|
||||||
|
context.storage_state(path='auth.json')
|
||||||
|
print("登录状态已保存到 auth.json")
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
crawl_vue_app()
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 585 KiB |
Loading…
Reference in New Issue