fix:优化爬虫配置,单独使用docker容器运行

This commit is contained in:
Daniel
2026-03-05 20:19:24 +08:00
parent bbb9a5e1e1
commit 07454b73c2
9 changed files with 180 additions and 17 deletions

View File

@@ -39,11 +39,16 @@
## 依赖
- **Python 3.11+**(推荐 3.11 或 3.12
```bash
pip install -r requirements.txt
```
新增 `deep-translator`GDELT 与 RSS 新闻入库前自动翻译为中文
或使用 pyproject`pip install -e crawler/`(在项目根目录)
- `deep-translator`GDELT 与 RSS 新闻入库前自动翻译为中文。
- `dashscope`:可选,配置 `DASHSCOPE_API_KEY` 后启用通义提取/清洗。
## 运行(需同时启动 3 个服务)

20
crawler/pyproject.toml Normal file
View File

@@ -0,0 +1,20 @@
[project]
name = "usa-crawler"
version = "1.0.0"
description = "GDELT + RSS 爬虫与实时冲突服务"
readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"requests>=2.32.0",
"feedparser>=6.0.10",
"beautifulsoup4>=4.12.0",
"pytest>=8.0.0",
"fastapi>=0.115.0",
"uvicorn[standard]>=0.32.0",
"deep-translator>=1.11.0",
"dashscope>=1.20.0",
]
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]

View File

@@ -1,10 +0,0 @@
# Python 3.6 / 旧 pip 索引兼容(生产机 pip 无 2.31+ 时用此文件)
# 安装: pip3 install --user -r crawler/requirements-py36.txt
requests>=2.24.0,<2.29
feedparser>=6.0.0
beautifulsoup4>=4.9.0
pytest>=6.0.0
fastapi>=0.68.0,<0.100
uvicorn>=0.15.0,<0.23
deep-translator>=1.5.0
dashscope>=1.14.0

View File

@@ -1,8 +1,10 @@
requests>=2.31.0
feedparser>=6.0.0
# Python 3.11+ 爬虫依赖(使用当前最新兼容版本)
# 安装: pip install -r crawler/requirements.txt
requests>=2.32.0
feedparser>=6.0.10
beautifulsoup4>=4.12.0
pytest>=7.0.0
fastapi>=0.109.0
uvicorn>=0.27.0
pytest>=8.0.0
fastapi>=0.115.0
uvicorn[standard]>=0.32.0
deep-translator>=1.11.0
dashscope>=1.20.0