From 0027074b8b73d2b2a1fd1bc3d6a30b32ad0c588e Mon Sep 17 00:00:00 2001 From: Daniel Date: Mon, 2 Mar 2026 17:20:31 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E7=88=AC=E8=99=AB?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .env.example | 6 +- Dockerfile.crawler | 1 - crawler/__pycache__/config.cpython-39.pyc | Bin 2344 -> 2386 bytes .../extractor_dashscope.cpython-39.pyc | Bin 0 -> 4426 bytes .../__pycache__/news_storage.cpython-39.pyc | Bin 0 -> 4759 bytes .../realtime_conflict_service.cpython-39.pyc | Bin 15012 -> 15244 bytes crawler/config.py | 3 + crawler/extractor_dashscope.py | 121 +++++++++++++++ crawler/news_storage.py | 141 ++++++++++++++++++ crawler/realtime_conflict_service.py | 31 ++-- crawler/requirements.txt | 1 + docker-compose.yml | 2 +- docs/CRAWLER_PIPELINE.md | 65 ++++++++ package.json | 4 +- scripts/verify-pipeline.sh | 124 +++++++++++++++ server/data.db-shm | Bin 32768 -> 32768 bytes server/data.db-wal | Bin 1854032 -> 2542072 bytes server/db.js | 15 ++ server/openapi.js | 11 +- server/routes.js | 13 ++ src/config.ts | 1 + 21 files changed, 523 insertions(+), 16 deletions(-) create mode 100644 crawler/__pycache__/extractor_dashscope.cpython-39.pyc create mode 100644 crawler/__pycache__/news_storage.cpython-39.pyc create mode 100644 crawler/extractor_dashscope.py create mode 100644 crawler/news_storage.py create mode 100644 docs/CRAWLER_PIPELINE.md create mode 100755 scripts/verify-pipeline.sh diff --git a/.env.example b/.env.example index 1c1ca53..c6f33ba 100644 --- a/.env.example +++ b/.env.example @@ -1,4 +1,8 @@ # Mapbox 地图令牌 (波斯湾区域展示) # 免费申请: https://account.mapbox.com/access-tokens/ -# 复制本文件为 .env 并填入你的 token VITE_MAPBOX_ACCESS_TOKEN=your_mapbox_public_token_here + +# 阿里云 DashScope API Key(爬虫 AI 提取用,不设则用规则或 Ollama) +# 在 crawler 目录或系统环境变量中设置,例如: +# export DASHSCOPE_API_KEY=sk-xxx +DASHSCOPE_API_KEY= diff --git a/Dockerfile.crawler b/Dockerfile.crawler index c1907fc..936f3cf 100644 --- a/Dockerfile.crawler +++ b/Dockerfile.crawler @@ -10,7 +10,6 @@ COPY crawler ./ ENV DB_PATH=/data/data.db ENV API_BASE=http://api:3001 -ENV CLEANER_AI_DISABLED=1 ENV GDELT_DISABLED=1 ENV RSS_INTERVAL_SEC=60 diff --git a/crawler/__pycache__/config.cpython-39.pyc b/crawler/__pycache__/config.cpython-39.pyc index cad6fbddc097cf3f3af930ee654e2bbeff1991b3..d85e50a6fd36a0aac1538c5dbd1eccc19334b6e1 100644 GIT binary patch delta 296 zcmZ1>bV-Oek(ZZ?0SFp`mS(=6$Scd(GEqCklrf8A0cQ$l3fDrWDBe`QR0y9tg=ZmC zGh-BgIztNYBE~2IAk7D)1yh)U88rDPF4JI)nt0wwM9{@C*dy54KfpEKF~Bq4+cokQ z!zRY(OpH;RUob~8>qM~?mlS1Yr$(_QmXstWXGd}5X69s;BoA`!7CYxgh5J$Ctu^R Z6B7b5Sr|DOd6)$lrI`7EP>7jB3IN-rQHuZo delta 254 zcmca4v_gnCk(ZZ?0SKZ6mu3b`ERLL^12!Vk<5w%FIr^#gCvn%quBk1F<-Rf`jATTwPs)Z*h3L rMuz(bxdg9dD3S%r70FKi$6+TT0A#W-axn5R3ouGC^DzrCb4URI-IGgp diff --git a/crawler/__pycache__/extractor_dashscope.cpython-39.pyc b/crawler/__pycache__/extractor_dashscope.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..5036d663b3ed940845964495afb2aecce4d81f0e GIT binary patch literal 4426 zcmaJ^>u(g-6`z@%eT{eh!XcO>&4h+Fn*ugT)kGF*ib~Y9Q32J7qD+%+hP~JJ;MtkY zomq^@Yy}wNK=31i01YPbrXfw!;83Y0)aLOIRQ&_yW3vyhT8XM3`k~5){_f1$EMRq4 zbMD-8&pr3v^SHk=dOEEL`1SnalJSpqg77u(wEp7g{0NV89)t**Ktv*$q9z8dq)9;= z(IRLgrks~GIUm)cBA1iRSU#@B^9e1HPio147d4f9o7R?3X{mf#OM{oD?X-g?X##D0 zNa_`O1rnc&kOWB{i)x*u9sDG1?-lMy5C!9HB!%%VA`J?ej^bajJ*9E}R%7y1W$`_A zm+p)V=IjE!ef^!r;SVZ*IZ>UOXq;QPef>mjX0|$WwsG;}+QnP-rPH+!k5x~dt-XJx zHnpHsmZocGuQbjr)y_`V=Dw(0yBtc_-Z@{J`snM!M`|Z8RX>@kotnf1bgLgNR_CtN zPhPDZK2pDY;`a4Zm75n-vM-ybY?!Lcvp1@Xrf<9_F=lvB*-bvEAUz+#v# zyY!&z$N4&T(e-1DxI-^n2Go($)ey%UL>0&@o)~2N#ZzS;etSt7bB&p z7um1uZzBq6^P(g*Ck=}vP1;Gvlw68=vHdAe-k%xZAkfG2cnLiR8u0ky704n(xM&_=+$n$b!3W{GX!0|E~sm`vaO; zSM0;Vm#<9J&wRE<$f;q>+!I@V*L}YBg$7T)-An|{h`#c|OE2vmbal7r6#o+u1Vm$H z=~Cs|yY-vXmFp+_5RR2=bJe#mR2JV2!*n%3t21BL&QI6N3$^LbYG*IkKRAW}tscIP zh+MvTx;nY2{%mm1?ynCYt(}{%+`Ncbhe-AEx!S^?>vKoy?_-sz>f678fTApadZhNp zqqnbrplnr}0WzR&-}+Q_22z)i8Cbr$@L}z}^N_>>e?Rfbv;EKo@s+3>Pzf!V_IRFJ zuBV%nxt?LU)bxk}c3yXloM*cul<_5+VL70h_L!Ha#3<&y5o377%OP5goNhuWOh6=a zW$AeJ#_48~h9~Cve3Bm*$2oVkTK)qLf;=BkRJG`23zRvwWl=LbYM3S^TU3MT)*9@) zWXJ5HMb;2r;fK0&#sR}LpzSWz_i>?lmA9fG+{JzDQ13q7p-z@i$7S~T>Mri%LT{6& z=k?)x<(j-uqi(Pq(}&#V>F;!*V@zkxh*5BwV|Th3(^=kTE0e-^a1YQCBWKc;87p1* z2{#6AAo$m|vJRY&o5XSl)c&p8`}_Ir*y_EjC*U{4b#3lkc&+wTXQrx?OSOsP^`+0@ zO6VKYO*0EKQ0iuJYZdAL(}gryK3eXpUOQTyTjaVH&xBe=>3G((5uAKm#R9(|odNYu z90d_nJ^oeY#_8}zZydS2{PAT>#-0&b^`&F#FW=f^>3N!kt}&YtXVbtYJ&{kJ7hDYsHWuggIU)e zrIw?`b0fBqqYgujAROxc)f;cT@!HsvuQ^Y8c+hz~)8k9FioD~Ft}Yc1CvlJ(faPkl*!FsU*v-1U4OW?c( z+j5|Z>0`ea*mofE-)H(>hWVU6qFV+v`%L7xHQe{|;Lbi|&=~HrK7b_CbFR&@tH;?^ z8229p=ptHIdD*61C$u`-Km@5O`$ zT9Q2(o{In|<@p%kP-Ms4xSQ}~0?6f1O=J$fM4ph`B_v#OL%?cLJTcO?OL+ZIDNa&e{DN>1FdVj$P)d}N9&{fNnC}9>y+p6zrrq`- zX+i?0gS5NpkUJqxNM6*7?-0CXAU_+(CtKuaSLMMCxxD>@sz*sREQ83Oj! z0WILMr=Sr%oU#*~VBCu}IbAyh(#=>z7D{a)2bP~Lr7)gyyMi&rOTm*H%Od2$_lL?Nc^K#9{kHjTcr`^f zatoq;2s+Wnj7P|~xR((ZkMg+S8o^b|;;KC)Htnt>kHHI@u+tP#LoZB7mPEcy9-ouT zqKH-A#43D`UK^~^3%er*Gnr`6ea!V+F}aw9aa2h*2t zovB{;_ z=KR*pwxyo1-xu=xS(s$JnT4JR>&=M=v zfEEo(0WGlt_F4pmfEEpksi1(>)~#V-Un)Aj%!RZET1z`Vq?>V|;=iDY`lM++{=jWWk-z9^~Rl zPBwDFKe0K0jZwzW(hp9q(~dY_4rHQC~@?~gtT7Vi2g=#1E4GcSmV%^ zk`%me5xZC0jgU==Pk<^IUoZB5lZwSKo9%`EjL7)+QAXUI>GtEDH$nmXo=7XZ~v=A0-eF%T_P(0TPAUK4?_11q;UfCFdf8NOUU<(#^(gYt#|f2 zGvE2{?Dz5ce#hx(G$g_AZ%;dVkulCWe+7-2b!EF_+?C5xwG zDdMSGDm;~}meVXPr&~JW{AxCkGb|$)w1T;i1%hBq%Z76iD}p4Vkto*ZWgNi9an))g zQRoMuAHreixAVRji_EJyirZkUgQ$n@z%p(Jt`2+$jCAr5;97?}=M}sTcfnW}UJq9{ z-hemGtIc^|7Q7LzAc=00@TU17-i+@&u3PKzUBn>WneoMBIP>npJ7?;Dyj*|r+7~xoZd`n?@$w7xb0_Ps zd{964m!(r@Lkp+hU3~4G#+jGuSKe&==KaRm>*n82UN9H0*A}k7YGw*~M)E8*lXhnO z@iSNJAAZ0E&R?saer@r?KP`TC>WdqveQ96Zc!@7=TzcOZwRry1`X|3O^W=z=YKjUi z!5*bVO!f?%W`x-};u*Xx7xE;gco@Y0Kcgq!J%X1qM8DV2$X&Z%j=(BxcSl1`pcIZmrgb2-(9$P?dB)v7B2p_ar(S( zT$4M2CzX|FoqN7QX2L!ITPTO0d3JViaL2Q=_f9<*(>$3HPjO0<&@k$0(AY&!%NC9h z8k1=|yvIXKD9dl6d}LiJU!b{k)-IFORDlX_>xK!3>sFFgq@rzzZUj1}dK$A?mUscD zl*^^*92kUVeeH1jEdNv>G`NL!9tVPfevSb`Y~U0+22W1Aa&ZfmtEeVzkSd_Ds^Ti( zAmA!BRMl$Ivd%zDdT9@cX;eRk5NT9TExmR5^P?{`esy{AcP{`jF*a)6qAZBGg)1N3 zym7-1#>L;Cy!p{55Onpo&whnm%I#g&N^EM!!-*vmIx$^*BwY`*7vSp7+LNB{%%tyo z;6YC(`Ah*5FE~SHu{}*37E|daV5OURb0=S3Zv`%gD@T&=M)U{p3;@3ebFApyBfG%5j_OIpv zDd3-gz7l9yO{D(=?+lsE-|n&aP%>^Nhjxv|&5rof(2tY1X==RcHo)cDg|4T6w+A3r^*veqx+UHM-s#F18Zz= zjGoWhjNET^ z>?;zQ_ET4!(gOALOd*%E86=9kA%fP%W;8|ePKlBfOHXDA=Wx29kx-~jMs0BEX!%Kd zVLYY?F&>)n!<8q2Y^!B6A)LCT^UQIX72mRcziUzJ89l)J~6g$ zZ;SHGr}xCi;;qU5QS&i#bY$-cEC3+y0r8eGMObD=q;3D>VtiDggv`Y&!pLAnDZdchTw z(SH7h0p7G^(EyC_T9T)3p|lTx@bd%$JadsNyUH=8V(?7r8pmY1jptV^GsqFHaYbTU zRfp^vs7Y6(H{_}TSr%1;kTsy*)qz@u{>$Uuih zFCcnK@%XAawRl1F3^5mSbvG~-tjUO*Ho|e!4FYZkOE1F4MI<#|xlliG)vo{{`z)Th z+G@pbuib=*#m_E6DY#P&F39dG&bul9>Q=n~Sw;3vV|QnO^~ll?hA9XHrYN zY$;zJNsPzGk`TbjeXYLLYvaC_P4$@qV*AV{koK7Xm(UYd?=x4-2p|Uwn@|Sx%RaCI zAprhQ4UJC3$Iafy`pnz^#e}CV{S)%c(qF*=mN4u#78SuI(xLB;JWbNOc*7&i(xE~F zyt0B&r9pv`OEYTn$?x-qf3FA#x*e!k_#0i44sy<~osowNlrJ{6Wag|zhOnK7Qj!Uz z6VA!YAF17d!Y~B&P(L!DmF408s+xi5gD?lhvj!lPbG!haOcm4YHWbADlx z?{>pjFTu`fWc93g103FJawQ_@PP`82D7(W|+yDe_&<(j^H{wQd*IA>|=C;vYu7=k? zB!MQ>YNsu^`c;Ix-8LIt6=l-%yIj@PrsSt3C@a_TfE{0<1|_UR@NOvG0DA}@_RH-0 z`E$2BHi^=PTX(1V%rCZyh}qV+tr;>vLCiqA5-dP3hYDwF#tJ7zSZ#Z((r5Pf_dnMX z^1Vw(Ux(wIzYhp|IL9s3KCYjB6#~A`tY3d~>9rFuR)6cw#v3&_;rTh6f*VOo{Ky_d zR)C+13MJ+jro6&U(EFhEYt;KW#X(&_H2pEBcu;uAD(m+Q4(=Qr^yA|mJ|hsj52#pR zO`Yp6Zw;p3oJzo`e0!k517-ZYAoy042q2IzznBQ|WAph!hW~2N_S{OSlq5EP8snSR{GuX_;)n zA+dJ)5TD}5H2NrSc&fL8u-TL!vRbB4$}Liw4D z@?)z7g;9RWo)%{E6)u33!<+zOE416U5&R#(HFdvGYu9YL!NW zZiHHlM{)&R%95XQ9Cx_>Rn>sc;a3Ohk|Roo-eGj|PJyTEyDZug^g zSd&fBbQ3S}4GE@;xS^V8%DrR2DFx0m0Hnj-R80I?VGu)NF>sR21xQJO3joJkmu7MA zA0!A_r3=uewWeS?4TXe;gw=xOvAm{d7}KDMP0t|K1cEJ}QEWN~G2BcURPe%B4kOQ+ zZ}=f=0ApkXVA^4n3?hh2Z48nMBa|u2qD(od%;FJ5swN9TG{y7+R+~P+siW>esIU@g zogy+ShVj)E8#gwvqje2UebZEdQ3M`G`?P?`(OiI#(^t{c5-Ov@F|B3Ku&(Sb@_@|NBfF}i|B05dfEE=*Pke+ zpYX{j2fK=0;6HoW-TE2TQ8%=UbsK6@k4jpM_l`O>xEW1Nuz%|7Ks)fmBKESzhAGp6 zI4Jmq670n;L9#CiOSl>PfuTp(TMgBU0{IUWf0}*JP&Gh^K#CG#(qZwJE*WT}Gvgv6 zab$&zJBvWDf+^5W$dVE=*q{h(;Iu@oxsVAG&I&~C5vXD3cndim$no2Z@6j%pdlO2s z*m6J>pCy@SfM8SHE>t4$(pBy$Xs+<)y{CwS6qh5L@!QO0;wVR!3nc2AJQ@k~Ae!5h zv2#kY;5!hvY`Q_YFZFjA0n-io6=3E!luW7V&ICy*31$Vz^l0Eh%VQ3BHijVKDbtfF zGaVbt`4YqxBm|KR5)OidD?!4GFc<-&RY(&eh}+wgN!gG9?()(Mc*|=Pk@6vdW=Mr4 z!I31Wy2YsC#(^r$g(_oU1Rofo%9uhbxmgBA!QMOt#16*X<^!?Yqb+s}Vy{X!!CPxx zTs!gpyZL>?`CY>kyY@^<^VgBH6Who0ADze#je%bCXSe51e+-JgKm66idA5|oCk=rE z97&a~SEGTp*5!+qFI?8y9&7Df99z=9iq@cNJ0qZOF=)KYh^?ckq%AaB2YcB_xF-0Z z5+d-L%jiz3{V7fDpdR*nxWz#a@RZ2RS-6_rn;jg;xxi_5EBXbj$mO?Xwd2d~BtMSzF`$HHTmuyl12@g#ZbD zuojpLm)R&`ZY@dh+A|SsH3KdI%P^N<^n(3F?1E8&KM6*@ms?PVJbGyS$3`E}7l2&q>DNIy)43QoIBqd|pEA zmq-AA$o`77;v3_0pKHSSbJp|xUVM-_np*KO_F_{bzRC2aOQFmBcI^MIMY8=Y#J}n7 z@g>dYG5&!`bHa{WQ1Uu!p3{JDvDI@DORn&4-*EVr!x0W!0dk%$s>Tg9X2kU#dX3YU z0m7JG<#G5S%;eD>fP1WTZu*705QQCfZcZei(VWy3*SgdMwL5+HaNlDUa delta 1865 zcmZWqe{2&~9Dnb6ZLfE2*KO^7fzSZk>=<~n3?B5_j!YHHa#w4=RluQ!n8 z&MA`x69|hhK?V+67#W}o;1nDfgCYtlhCno=i7_S=;Sco>jfufTzwfqy32pB4zVGwC zKi>Pk&*gS*8SnQ-Jsugs?~nGwv40;w>0P@p;D#yZTH#3@zzB`tft74vVdJ19A(>*r zW!f(N#0;a7{cJ_1=4PjK9WDn`BtF5XAK9vVPL~Cgf)R+ zQ`X(whK&|LgbZULjE)gZW!=M9Fw(s+E7J3Z5ZFEo^NC+CNEc>M+D`%*ydROGnLPVr zh!h(#5G7+y9=sN5Yeb|6M)20X7?}!21IzRxL$OLiZ}j_9l^V)^G$zhOuY)d{%BzYd z&y2OG7MM;HDq|hxi`cgEKBiPu7W?5If;Y!9zkjmx=AIL|g9mck59iJ>rdCwoVz#ql zaW#DkTqwUUx`sm=hXWkm1<1-BEuCGFm2F+Et7s#;Q}N7PH+_e%j&ZQU{ujaeDi_(Z zSYszE%bm->17VTY-bOW1)W!GRs{)L0CiU?mUEW$_x%AhZD#L47*v!D&Ia zWpK(*a3@#LDk2bDTuj(a-geaENIQv0>=}VLHVHHgtIoIt+7d9*+}2Ebp^(WFKj`7Cd?P?APy ze_Tz`_c`mn$3|mRc;%NxeQuN7L63WI?J9~R;7#s%0bxM389GI&Q8!6RT9Rvdg}`>N;R@u}%*55^Ph zt0gL?Y)Q==d}*q^MiK%?cwo7<`(Y<{g?yq0i9;8b{lV> z1va;-5?^KErnv7sul5Cp3mhicxu&A>OWgb%AcW~fZintd6}P?zxX$i0^#^|dD`dBP zvmycAWF6g6ty_&#EAf|Xq`Aa?9h`n0RmwF diff --git a/crawler/config.py b/crawler/config.py index 79ebb29..f5bc435 100644 --- a/crawler/config.py +++ b/crawler/config.py @@ -10,6 +10,9 @@ DB_PATH = os.environ.get("DB_PATH", str(PROJECT_ROOT / "server" / "data.db")) # Node API 地址(用于通知推送) API_BASE = os.environ.get("API_BASE", "http://localhost:3001") +# 阿里云 DashScope API Key(用于 AI 提取面板数据,不设则回退到规则/Ollama) +DASHSCOPE_API_KEY = os.environ.get("DASHSCOPE_API_KEY", "") + # 抓取间隔(秒) CRAWL_INTERVAL = int(os.environ.get("CRAWL_INTERVAL", "300")) diff --git a/crawler/extractor_dashscope.py b/crawler/extractor_dashscope.py new file mode 100644 index 0000000..8d92bbf --- /dev/null +++ b/crawler/extractor_dashscope.py @@ -0,0 +1,121 @@ +# -*- coding: utf-8 -*- +""" +阿里云 DashScope(通义千问)提取面板结构化数据 +从新闻文本中提取战损、报复指数、基地状态等,供 db_merge 落库 +API Key 通过环境变量 DASHSCOPE_API_KEY 配置 +""" +import json +import os +import re +from datetime import datetime, timezone +from typing import Any, Dict, Optional + +from panel_schema import validate_category, validate_severity, validate_summary + + +def _call_dashscope_extract(text: str, timeout: int = 15) -> Optional[Dict[str, Any]]: + """调用阿里云 DashScope 提取结构化数据""" + api_key = os.environ.get("DASHSCOPE_API_KEY", "").strip() + if not api_key or not text or len(str(text).strip()) < 10: + return None + try: + import dashscope + from http import HTTPStatus + + dashscope.api_key = api_key + + prompt = f"""从以下美伊/中东军事新闻中提取可明确推断的数值,输出 JSON。无依据的字段省略不写。 + +要求: +- summary: 1-2句中文事实摘要,≤80字 +- category: deployment|alert|intel|diplomatic|other +- severity: low|medium|high|critical +- 战损(仅当新闻明确提及数字时填写): + us_personnel_killed, iran_personnel_killed, us_personnel_wounded, iran_personnel_wounded, + us_civilian_killed, iran_civilian_killed, us_civilian_wounded, iran_civilian_wounded, + us_bases_destroyed, iran_bases_destroyed, us_bases_damaged, iran_bases_damaged, + us_aircraft, iran_aircraft, us_warships, iran_warships, us_armor, iran_armor, us_vehicles, iran_vehicles +- retaliation_sentiment: 0-100,仅当新闻涉及伊朗报复/反击情绪时 +- wall_street_value: 0-100,仅当新闻涉及美股/市场反应时 +- key_location_updates: 当新闻提及具体基地遭袭时,数组 [{{"name_keywords":"阿萨德|asad|assad","side":"us","status":"attacked","damage_level":1-3}}] + +原文: +{str(text)[:800]} + +直接输出 JSON,不要其他解释:""" + + response = dashscope.Generation.call( + model="qwen-turbo", + messages=[{"role": "user", "content": prompt}], + result_format="message", + max_tokens=512, + ) + + if response.status_code != HTTPStatus.OK: + return None + raw = (response.output.get("choices", [{}])[0].get("message", {}).get("content", "") or "").strip() + raw = re.sub(r"^```\w*\s*|\s*```$", "", raw) + return json.loads(raw) + except Exception: + return None + + +def extract_from_news(text: str, timestamp: Optional[str] = None) -> Dict[str, Any]: + """ + 从新闻文本提取结构化数据,符合面板 schema + 返回: { situation_update?, combat_losses_delta?, retaliation?, wall_street?, key_location_updates? } + """ + ts = timestamp or datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.000Z") + out: Dict[str, Any] = {} + parsed = _call_dashscope_extract(text) + if not parsed: + return out + + if parsed.get("summary"): + out["situation_update"] = { + "summary": validate_summary(str(parsed["summary"])[:120], 120), + "category": validate_category(str(parsed.get("category", "other")).lower()), + "severity": validate_severity(str(parsed.get("severity", "medium")).lower()), + "timestamp": ts, + } + + loss_us = {} + loss_ir = {} + for k in ["personnel_killed", "personnel_wounded", "civilian_killed", "civilian_wounded", + "bases_destroyed", "bases_damaged", "aircraft", "warships", "armor", "vehicles"]: + uk, ik = f"us_{k}", f"iran_{k}" + if uk in parsed and isinstance(parsed[uk], (int, float)): + loss_us[k] = max(0, int(parsed[uk])) + if ik in parsed and isinstance(parsed[ik], (int, float)): + loss_ir[k] = max(0, int(parsed[ik])) + if loss_us or loss_ir: + out["combat_losses_delta"] = {} + if loss_us: + out["combat_losses_delta"]["us"] = loss_us + if loss_ir: + out["combat_losses_delta"]["iran"] = loss_ir + + if "retaliation_sentiment" in parsed: + v = parsed["retaliation_sentiment"] + if isinstance(v, (int, float)) and 0 <= v <= 100: + out["retaliation"] = {"value": int(v), "time": ts} + + if "wall_street_value" in parsed: + v = parsed["wall_street_value"] + if isinstance(v, (int, float)) and 0 <= v <= 100: + out["wall_street"] = {"time": ts, "value": int(v)} + + if "key_location_updates" in parsed and isinstance(parsed["key_location_updates"], list): + valid = [] + for u in parsed["key_location_updates"]: + if isinstance(u, dict) and u.get("name_keywords") and u.get("side") in ("us", "iran"): + valid.append({ + "name_keywords": str(u["name_keywords"]), + "side": u["side"], + "status": str(u.get("status", "attacked"))[:20], + "damage_level": min(3, max(1, int(u["damage_level"]))) if isinstance(u.get("damage_level"), (int, float)) else 2, + }) + if valid: + out["key_location_updates"] = valid + + return out diff --git a/crawler/news_storage.py b/crawler/news_storage.py new file mode 100644 index 0000000..9a5e101 --- /dev/null +++ b/crawler/news_storage.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +""" +资讯内容独立存储,支持历史去重 +爬虫拉回数据 → 计算 content_hash → 若已存在则跳过(去重)→ 新数据落库 news_content +""" +import hashlib +import os +import re +import sqlite3 +from datetime import datetime, timezone +from typing import List, Optional, Tuple + +from config import DB_PATH + + +def _to_utc_iso(dt: datetime) -> str: + if dt.tzinfo: + dt = dt.astimezone(timezone.utc) + return dt.strftime("%Y-%m-%dT%H:%M:%S.000Z") + + +def _normalize_for_hash(text: str) -> str: + """归一化文本用于生成去重 hash""" + if not text: + return "" + t = re.sub(r"\s+", " ", str(text).strip().lower())[:600] + return re.sub(r"[\x00-\x1f]", "", t) + + +def content_hash(title: str, summary: str, url: str) -> str: + """根据标题、摘要、URL 生成去重 hash,相似内容视为重复""" + raw = _normalize_for_hash(title) + "|" + _normalize_for_hash(summary) + "|" + (url or "").strip() + return hashlib.sha256(raw.encode("utf-8")).hexdigest()[:32] + + +def _ensure_table(conn: sqlite3.Connection) -> None: + conn.execute(""" + CREATE TABLE IF NOT EXISTS news_content ( + id TEXT PRIMARY KEY, + content_hash TEXT NOT NULL UNIQUE, + title TEXT NOT NULL, + summary TEXT NOT NULL, + url TEXT NOT NULL DEFAULT '', + source TEXT NOT NULL DEFAULT '', + published_at TEXT NOT NULL, + category TEXT NOT NULL DEFAULT 'other', + severity TEXT NOT NULL DEFAULT 'medium', + created_at TEXT NOT NULL DEFAULT (datetime('now')) + ) + """) + try: + conn.execute("CREATE UNIQUE INDEX IF NOT EXISTS idx_news_content_hash ON news_content(content_hash)") + except sqlite3.OperationalError: + pass + try: + conn.execute("CREATE INDEX IF NOT EXISTS idx_news_content_pub ON news_content(published_at DESC)") + except sqlite3.OperationalError: + pass + conn.commit() + + +def exists_by_hash(conn: sqlite3.Connection, h: str) -> bool: + row = conn.execute("SELECT 1 FROM news_content WHERE content_hash = ? LIMIT 1", (h,)).fetchone() + return row is not None + + +def insert_news( + conn: sqlite3.Connection, + *, + title: str, + summary: str, + url: str = "", + source: str = "", + published: datetime, + category: str = "other", + severity: str = "medium", +) -> Optional[str]: + """ + 插入资讯,若 content_hash 已存在则跳过(去重) + 返回: 新插入的 id,或 None 表示重复跳过 + """ + _ensure_table(conn) + h = content_hash(title, summary, url) + if exists_by_hash(conn, h): + return None + uid = "nc_" + hashlib.sha256(f"{h}{datetime.utcnow().isoformat()}".encode()).hexdigest()[:14] + ts = _to_utc_iso(published) + conn.execute( + """INSERT INTO news_content (id, content_hash, title, summary, url, source, published_at, category, severity) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", + (uid, h, (title or "")[:500], (summary or "")[:2000], (url or "")[:500], (source or "")[:100], ts, category, severity), + ) + conn.commit() + return uid + + +def save_and_dedup(items: List[dict], db_path: Optional[str] = None) -> Tuple[List[dict], int]: + """ + 去重后落库 news_content + items: [{"title","summary","url","published","category","severity","source"?}, ...] + 返回: (通过去重的新项列表, 实际新增条数) + """ + path = db_path or DB_PATH + if not os.path.exists(path): + return [], 0 + conn = sqlite3.connect(path, timeout=10) + try: + _ensure_table(conn) + new_items: List[dict] = [] + count = 0 + for u in items: + title = (u.get("title") or "")[:500] + summary = (u.get("summary") or u.get("title") or "")[:2000] + url = (u.get("url") or "")[:500] + source = (u.get("source") or "")[:100] + pub = u.get("published") + if isinstance(pub, str): + try: + pub = datetime.fromisoformat(pub.replace("Z", "+00:00")) + except ValueError: + pub = datetime.now(timezone.utc) + elif pub is None: + pub = datetime.now(timezone.utc) + cat = u.get("category", "other") + sev = u.get("severity", "medium") + uid = insert_news( + conn, + title=title, + summary=summary, + url=url, + source=source, + published=pub, + category=cat, + severity=sev, + ) + if uid: + count += 1 + new_items.append({**u, "news_id": uid}) + return new_items, count + finally: + conn.close() diff --git a/crawler/realtime_conflict_service.py b/crawler/realtime_conflict_service.py index 76b87c4..5e4e626 100644 --- a/crawler/realtime_conflict_service.py +++ b/crawler/realtime_conflict_service.py @@ -283,7 +283,7 @@ def _rss_to_gdelt_fallback() -> None: # ========================== -# RSS 新闻抓取(补充 situation_update + AI 提取面板数据) +# RSS 新闻抓取:资讯落库(去重) → AI 提取 → 面板数据落库 → 通知前端 # ========================== LAST_FETCH = {"items": 0, "inserted": 0, "error": None} @@ -292,6 +292,7 @@ def fetch_news() -> None: try: from scrapers.rss_scraper import fetch_all from db_writer import write_updates + from news_storage import save_and_dedup from translate_utils import translate_to_chinese from cleaner_ai import clean_news_for_panel from cleaner_ai import ensure_category, ensure_severity @@ -304,36 +305,44 @@ def fetch_news() -> None: it["summary"] = clean_news_for_panel(raw_summary or raw_title, max_len=120) it["category"] = ensure_category(it.get("category", "other")) it["severity"] = ensure_severity(it.get("severity", "medium")) - n = write_updates(items) if items else 0 + it["source"] = it.get("source") or "rss" + # 1. 历史去重:资讯内容落库 news_content(独立表,便于后续消费) + new_items, n_news = save_and_dedup(items, db_path=DB_PATH) + # 2. 面板展示:新增资讯写入 situation_update(供前端 recentUpdates) + n_panel = write_updates(new_items) if new_items else 0 LAST_FETCH["items"] = len(items) - LAST_FETCH["inserted"] = n - if items: - _extract_and_merge_panel_data(items) + LAST_FETCH["inserted"] = n_news + # 3. AI 提取 + 合并到 combat_losses / key_location 等 + if new_items: + _extract_and_merge_panel_data(new_items) # GDELT 禁用时用 RSS 填充 gdelt_events,使地图有冲突点 if GDELT_DISABLED: _rss_to_gdelt_fallback() - # 每次抓取完成都通知 Node 更新时间戳,便于「实时更新」显示 _notify_node() - print(f"[{datetime.now().strftime('%H:%M:%S')}] RSS 抓取 {len(items)} 条,新增入库 {n} 条") + print(f"[{datetime.now().strftime('%H:%M:%S')}] RSS 抓取 {len(items)} 条,去重后新增 {n_news} 条资讯,面板 {n_panel} 条") except Exception as e: LAST_FETCH["error"] = str(e) print(f"[{datetime.now().strftime('%H:%M:%S')}] 新闻抓取失败: {e}") def _extract_and_merge_panel_data(items: list) -> None: - """对新闻做 AI/规则 提取,合并到 combat_losses / retaliation / wall_street_trend 等表""" + """AI 分析提取面板相关数据,清洗后落库""" if not items or not os.path.exists(DB_PATH): return try: from db_merge import merge - if os.environ.get("CLEANER_AI_DISABLED", "0") == "1": + use_dashscope = bool(os.environ.get("DASHSCOPE_API_KEY", "").strip()) + if use_dashscope: + from extractor_dashscope import extract_from_news + limit = 10 + elif os.environ.get("CLEANER_AI_DISABLED", "0") == "1": from extractor_rules import extract_from_news + limit = 25 else: from extractor_ai import extract_from_news + limit = 10 from datetime import timezone merged_any = False - # 规则模式可多处理几条(无 Ollama);AI 模式限制 5 条避免调用过多 - limit = 25 if os.environ.get("CLEANER_AI_DISABLED", "0") == "1" else 10 for it in items[:limit]: text = (it.get("title", "") or "") + " " + (it.get("summary", "") or "") if len(text.strip()) < 20: diff --git a/crawler/requirements.txt b/crawler/requirements.txt index e1a5d6e..5facd77 100644 --- a/crawler/requirements.txt +++ b/crawler/requirements.txt @@ -3,3 +3,4 @@ feedparser>=6.0.0 fastapi>=0.109.0 uvicorn>=0.27.0 deep-translator>=1.11.0 +dashscope>=1.20.0 diff --git a/docker-compose.yml b/docker-compose.yml index 9d0dbc9..10bdb59 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -22,9 +22,9 @@ services: environment: - DB_PATH=/data/data.db - API_BASE=http://api:3001 - - CLEANER_AI_DISABLED=1 - GDELT_DISABLED=1 - RSS_INTERVAL_SEC=60 + - DASHSCOPE_API_KEY=${DASHSCOPE_API_KEY:-} volumes: - app-data:/data depends_on: diff --git a/docs/CRAWLER_PIPELINE.md b/docs/CRAWLER_PIPELINE.md new file mode 100644 index 0000000..30064c0 --- /dev/null +++ b/docs/CRAWLER_PIPELINE.md @@ -0,0 +1,65 @@ +# 爬虫数据流水线 + +## 数据流 + +``` +RSS 抓取 + ↓ 翻译、清洗 + ↓ news_storage.save_and_dedup() → 历史去重 + ↓ +news_content(资讯独立表,供后续消费) + ↓ + ↓ 去重后的新数据 + ↓ +situation_update(面板展示用) + ↓ + ↓ AI 提取(阿里云 DashScope) + ↓ +combat_losses / retaliation / key_location / wall_street_trend + ↓ + ↓ notify Node + ↓ +前端 WebSocket + 轮询 +``` + +## 阿里云 DashScope API Key + +设置环境变量 `DASHSCOPE_API_KEY` 后,爬虫使用阿里云通义千问进行 AI 提取。不设置时回退到规则提取(`extractor_rules`)或 Ollama(若可用)。 + +```bash +# 本地 +export DASHSCOPE_API_KEY=sk-xxx + +# Docker +docker compose up -d -e DASHSCOPE_API_KEY=sk-xxx +# 或在 .env 中写入 DASHSCOPE_API_KEY=sk-xxx +``` + +## 表说明 + +| 表 | 用途 | +|----|------| +| `news_content` | 资讯原文,独立存储,支持去重(content_hash),供后续消费 | +| `situation_update` | 面板「近期更新」展示 | +| `combat_losses` | 战损数据(AI/规则提取) | +| `key_location` | 基地状态 | +| `gdelt_events` | 地图冲突点 | + +## 去重逻辑 + +根据 `content_hash = sha256(normalize(title) + normalize(summary) + url)` 判断,相同或高度相似内容视为重复,不入库。 + +## 消费资讯 + +- HTTP: `GET /api/news?limit=50` +- 调试: `/db` 面板查看 `news_content` 表 + +## 链路验证 + +运行脚本一键检查全链路: + +```bash +./scripts/verify-pipeline.sh +``` + +支持环境变量覆盖:`API_URL`、`CRAWLER_URL` diff --git a/package.json b/package.json index bf8d36a..6548247 100644 --- a/package.json +++ b/package.json @@ -14,7 +14,9 @@ "build": "vite build", "typecheck": "tsc --noEmit", "lint": "eslint .", - "preview": "vite preview" + "preview": "vite preview", + "verify": "./scripts/verify-pipeline.sh", + "verify:full": "./scripts/verify-pipeline.sh --start-crawler" }, "dependencies": { "better-sqlite3": "^11.6.0", diff --git a/scripts/verify-pipeline.sh b/scripts/verify-pipeline.sh new file mode 100755 index 0000000..5098f89 --- /dev/null +++ b/scripts/verify-pipeline.sh @@ -0,0 +1,124 @@ +#!/usr/bin/env bash +# 验证爬虫 → 数据库 → API → 前端 全链路 +# 用法: ./scripts/verify-pipeline.sh [--start-crawler] +set -e + +API_URL="${API_URL:-http://localhost:3001}" +CRAWLER_URL="${CRAWLER_URL:-http://localhost:8000}" +START_CRAWLER=false +[[ "${1:-}" = "--start-crawler" ]] && START_CRAWLER=true + +PROJECT_ROOT="$(cd "$(dirname "$0")/.." && pwd)" + +echo "==========================================" +echo "US-Iran 态势面板 链路验证" +echo "API: $API_URL | Crawler: $CRAWLER_URL" +echo "==========================================" +echo "" + +# 可选:启动爬虫 +if $START_CRAWLER; then + echo "[0/6] 启动爬虫..." + if curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then + echo " ✓ 爬虫已在运行" + else + cd "$PROJECT_ROOT/crawler" + python3 -c "import uvicorn" 2>/dev/null || { echo " 需安装: pip install uvicorn"; exit 1; } + uvicorn realtime_conflict_service:app --host 127.0.0.1 --port 8000 & + echo " 等待爬虫就绪..." + for i in $(seq 1 15); do + sleep 2 + if curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then + echo " ✓ 爬虫已启动" + echo " 等待首次 RSS 抓取(约 70 秒)..." + sleep 70 + break + fi + done + if ! curl -sf "$CRAWLER_URL/crawler/status" >/dev/null 2>&1; then + echo " ✗ 爬虫启动超时" + exit 1 + fi + fi + echo "" +fi + +# 1. API 健康检查 +echo "[1/6] API 健康检查..." +if curl -sf "$API_URL/api/health" > /dev/null; then + echo " ✓ API 正常" +else + echo " ✗ API 无响应,请先运行: npm run api" + exit 1 +fi + +# 2. 态势数据 +echo "[2/6] 态势数据..." +SIT=$(curl -sf "$API_URL/api/situation" 2>/dev/null || echo "{}") +if echo "$SIT" | grep -q "lastUpdated"; then + echo " ✓ 态势数据可读" + LAST=$(echo "$SIT" | grep -o '"lastUpdated":"[^"]*"' | head -1) + echo " $LAST" +else + echo " ✗ 态势数据异常" + exit 1 +fi + +# 3. 爬虫状态 +echo "[3/6] 爬虫状态..." +CRAWLER=$(curl -sf "$CRAWLER_URL/crawler/status" 2>/dev/null || echo "{}") +if echo "$CRAWLER" | grep -q "db_path\|db_exists"; then + echo " ✓ 爬虫服务可访问" + if command -v jq &>/dev/null; then + CNT=$(echo "$CRAWLER" | jq -r '.situation_update_count // "?"') + echo " situation_update 条数: $CNT" + fi +else + echo " ⚠ 爬虫未启动或不可达(可选,需单独运行爬虫)" +fi + +# 4. 资讯表 +echo "[4/6] 资讯表 news_content..." +NEWS=$(curl -sf "$API_URL/api/news?limit=3" 2>/dev/null || echo '{"items":[]}') +if echo "$NEWS" | grep -q '"items"'; then + if command -v jq &>/dev/null; then + N=$(echo "$NEWS" | jq '.items | length') + echo " ✓ 最近 $N 条资讯" + else + echo " ✓ 资讯接口可读" + fi +else + echo " ⚠ news_content 可能为空(爬虫未跑或刚启动)" +fi + +# 5. 战损数据 +echo "[5/6] 战损数据 combat_losses..." +if echo "$SIT" | grep -q "personnelCasualties"; then + echo " ✓ 战损字段存在" + if command -v jq &>/dev/null; then + US_K=$(echo "$SIT" | jq -r '.usForces.combatLosses.personnelCasualties.killed // "?"') + IR_K=$(echo "$SIT" | jq -r '.iranForces.combatLosses.personnelCasualties.killed // "?"') + echo " 美军阵亡: $US_K | 伊朗阵亡: $IR_K" + fi +else + echo " ✗ 战损结构异常" +fi + +# 6. 通知接口(仅验证可调用) +echo "[6/6] 通知接口 POST /api/crawler/notify..." +NOTIFY=$(curl -sf -X POST "$API_URL/api/crawler/notify" 2>/dev/null || echo "{}") +if echo "$NOTIFY" | grep -q '"ok"'; then + echo " ✓ 通知接口正常" +else + echo " ⚠ 通知接口可能异常" +fi + +echo "" +echo "==========================================" +echo "验证完成。" +echo "" +echo "建议:" +echo " - 访问 $API_URL/db 查看各表数据" +echo " - 爬虫未启动时: ./scripts/verify-pipeline.sh --start-crawler" +echo " - 或手动启动: cd crawler && uvicorn realtime_conflict_service:app --port 8000" +echo "==========================================" diff --git a/server/data.db-shm b/server/data.db-shm index 4829ca3eea47cfbe18e46f80d42e885e40e819ed..54018eefac726e3ac8b95f1995e2de7f92cc155b 100644 GIT binary patch delta 724 zcmb7?=~IqT7{;ILSG_N?lr=;}=_T1h*|VhGzGMl>QpQKq3}dP>rti(`4=~1u3X#Ve zB4l4evWx8d7DCDM;6pyTW`5_~_kHfU=00;yZZ5gGVR>t6GIx4K++AJn_}_lp+m`Ly)J%V>hKDhLk_}rNC6)fT13f)MqOF5? ziv){A3u`7L+9Ii0oSa`C>Sg&Evcxu$?r(5Zpn|Z1Z($HbmGpm&{FRRbu$xh7tBY1G6+^t9MR;wy9Vp+N1p{ z)e#-j37yheUC?D+)pgy}9o^SMJ=Qb5)Em9mCwYPGt*G}!$ULDXOm8o1Obz0|i zQI)!;YTeRZ)u>ia^jxp>Rv%QSullY=8F=8JC9P>kdphAqcX|>;FroA#oKcKrJd=qh ziK)zBHuG4>VwSR;jDH3(H?FPr$SKke?UJc|I;g`ss^hBA8J*W9T~U>8=(g_Zfgb6p zUg)*n>7zcYUO)6x!V@R0XoC+O=uB7q37{7txCmnaqZ!8pVo4yGY0P8}^GRh1>8!y1 WNWbNFIUC*8*~N}Q3?s^Z%&cFSe7+_C delta 238 zcmZo@U}|V!s+V}A%K!o#K+MR%Ag~-rcmUajJN1gCuN)HD_s`Q^yRGDYSy~>Vw^UwZ2C`}x NCts^uH`$1*1^{#{Q{(^u diff --git a/server/data.db-wal b/server/data.db-wal index 628ee381ae2d9e952c5c1689917bee097b3028d1..1fc0997850c7b777efdd8b43f614caeff910e0d6 100644 GIT binary patch delta 9238 zcmeI%eLPh89tZFlV~m7hFo-fLp~4u(pj1L7Ph~yd70N`C#E_LjPD({-JXB(9MVp5$ zOIvn(x?An`ps4M+m1?WkD%)n)N_6X<-=5*l@BEJQ>d*Un-Rt$5SIzvsb3X6i!<^rl zGaV&s2ILjY9E^#nU@S}(V`FL<2UEw!U>ew1OcT?>v@tHGgXv;COb^q?3@|=sh#6tV z*f`9j#dOg^BL;&p6MryHzxSR$Enn8PA&{vk9X(}w?jE;O^2^VG%a=^a?(a} z#N?ctGpwvb*YYbQD1KOdSykArH2A4c6Aad3Y=Qtq!F*)_kh4b0{Vj^cPIcMO&vt_eG z^)wYZpdHHL`Fr(5>^0d+yYqm7{@k@MMbj)N0k2(1Ij(X>|Yqy0Wa&S&JD0{xrie|5zUDsn(Ol=CLz_Wo1S zEA~WACXwUhDnxSPwW}TKm*;5_Iho{~Ek9o~=GFF=C~`nMlp{$$vT*5yj)bB=UwXG}F58l_ zgviMv=XA8hrBzOz^_e0Ev_m=7VL3^^SDq{+a@G&$p#7-}yJsD_cd(ntSx?UC4ZB$W z=b`yW6*-_C%IOcT5V9{FjUjS25IN2i{h6D%h?ULrSWo0^Am{AaZ)IKnOO~f12ed;u zy8U*0!){nP5jojJjtfQ36K9cIsOX1zL{2t2XU0FK{a{P zc*EenIo{N#(#db=Q#)2`HLtloz_n&GSqzoGt011I zb<`KG;bjphk4ee1r{L8`+|L!RG)Sl8<&t@sjUJq;=8a1r9;kwNM`JBBY6l3AE-yo{KjtF4(k5+EL^f_O<5_G)*QSU8|O7A4Ptf>#tMTXl8X zV-_7RkIc)r?Vl0drt&+)162^u-nzzd!H1hyQJyL#58rl?)8Lh)=kk{_Pca=2BlCPK zyIUE0JMKX|PzCW8{}%G1J#w)Q%41XV@LdPtJ!R#)Ok27#mX5cH%nSX%I}#akCKlp> zDu~w>-n-Ldo|+ZPQ={ZjZ6Xid-p&uZm6=G#%O~^ZYBsP#JFuT29;kwNiGLm`5+1NA zMtK}c9@U=UGsDzkUQb{v9d9$4$ItGsjww%xgm|C|;>E9Kv`y=i-a&cz8H?h)M|Fzl z*It})MduRL@&~==9{I|DaawlD*<834JgsYvd*mNCEhj^e0 z;%$x3?K$Fb`yI;D{AgaC^rqZsThePf-gYvt$?6jG!7`IY5D!#AJc;M8gG(+?w?%nc zqwpME@MA!um-C@2{J|Nwy>z@nGA}&-N!KldXFP}psvw?454*#tKi(SUX;bp-UF{Lx z<&O3)kGhb@bi5)m@5%{L+SwHoN+BMof_RsU#JBI5{IMJ5aYx~yqh}RdeC)-{>IORA z4l=K2RolAscT0L89;kwNh30P)hw?>TG< z=0H491@U;hj0bHJG#{Zn-BEbx=)F2;hj^e0;u&9Jsr9GGj-xz1N*>h( z)xCAuSErTU4W;AB$-I((x0M@v?3u9o1jGYX5HJ6+ z=>(p;bRWtyq~tkKd^B8kKXcoKN$1e<%E>%+wcc|F@9`Czni=s$5>zqq$H;xW72>B2 zc1|I3xlXZ(u0G!216mb-`tq5@4ov;#`TZql3V>_>5hziDym7&`2Hx!nO1HFiE{ z4ad2exn>D_PM)0+$I!xpnQtzOS(+$KlCDjSTDCe#CQXtl|KHmq(91o@OAzEf*Vjv+ z)FO`=Yi_Q1LsWcBYP=xGD=bLhAAmoCeSHPN{@#m&z2qHPW^yx$iG0p_b@}Z&p0=gs zbPWOim%2JFWtmidz0Qz3{8Dn-(!>?1@p5LCp8U*LYVxA5`0}p0F&xX)vUq8VCH|Tz zc&=`_N*cQ&ZIz{bXCq&pbJ&7qEg2`@p=vr_NhgaEDL?a(7=sEk<=3L@m7gud$56fb z_Lsrx2Hs#ehwq1GHGd!M>eoNeyhw+o!v|F(O@&S09xET}eyGvBSVu*N4`#w=_-!wb zyss(SYw(U}Gl1ss)TVRG1w4SQ8t0YPSY8O2LtQo>%%M6lhipDR zi#@8@NhTaai3KxPYeiD5G-E|Fw|76(*o87XYz(@fEOAHG91&$Iu;L0p`>zK=GlH#wyQ_AN?{Ic@MG{Mp~j` zWCBmGPdu#uzXAn<|B@3IAFLz4;T2fFPVKt@>A)S0X7?tp4$stCRW>=~{9SZkGoswr zB(4sQ_~F2}p^-z%=Z&x6+*mo2ZeQDtzX9AJ9eGpZ?ptGa{Myh4ZM&cf+IAa_VmB6? zc07Rcj7Q^1@MAzcaZjVSuw!O79d8er7Z+|SNsmlugm|C|;zbly>+ox*&O>?QKAKlq zWo@Xdv4HkI#$GZn{8YPz-TXyc37+Dr092{qkC8E!l)dP*ZB83^JUVx5A9u2gt37_j zF1B@c5=W3rzfUd@ANIUIg($VfOC4QBF1C(hXHW`tI@DD=Nf3DNG1_Sps!rQ0{aiI7 z+w~-l?HQosO*bbMcrdhNPIuPlCA5odpg93m5YLW3t6|c2l@u3}rXS6#xpGaflzn%A zj<=7@GkSiqoB0K@}T5O2|=fWdLPD~Hh08$TLv__piF=;baAxjy@+OdmR4C7E~S z^@4zX6#){62dW_6S5x@!0zi4^%F7c{PzCYMdbFs1-clNl z@&uGT{H8U+GieC(C@!yPq8q*aWL{eP0q=WpXW0Z#aXtW5(D^{#cF4ugt>)-|KIAyS z#a7}hLO$f;?e7CETE#(VPs~T@2|Ca<-)@PSAMmv^T~7{>dve3i)^OpFr#dvypb8r3 zlDFX}&Od+KiSi~=^6=a5$UwjP;KR99{o6A--WOzEWpZ_7bf393!~<0jFaPrL_d{K~ zRZ!lfQF!R+{c|w5v2RbxLpokHnOEkUe(n&fNDbnFDu|cUmx-Ni7wMzC$&@_&*8ybo z@GnwhUnJOA(eVzFdG%GV?k#UCv4VJ@3gY$4KmEG1_PQO)v!LWD{W>smicAf>P;k;* z6hOzTA@k0KI&ThjS>yxpKo!J`_dc5(Tc-UN%9}#P6Dj>VFv1(Un`E}^diQubUM-n- T!E7M-!Lm9g!Q&9O0&4#a5XFhS delta 66 zcmV~$$q_>U006*2PH`rYI8u?9R&?O89q`kJLX?xI-T%A>`!Y@fp~%VE#nsJS>>-iL T { 'retaliation_current', 'retaliation_history', 'situation_update', + 'news_content', 'gdelt_events', 'conflict_stats', ] @@ -27,6 +28,7 @@ router.get('/db/dashboard', (req, res) => { feedback: 'created_at DESC', situation: 'updated_at DESC', situation_update: 'timestamp DESC', + news_content: 'published_at DESC', gdelt_events: 'event_time DESC', wall_street_trend: 'time DESC', retaliation_history: 'time DESC', @@ -55,6 +57,17 @@ router.get('/db/dashboard', (req, res) => { } }) +// 资讯内容(独立表,供后续消费) +router.get('/news', (req, res) => { + try { + const limit = Math.min(parseInt(req.query.limit, 10) || 50, 200) + const rows = db.prepare('SELECT id, title, summary, url, source, published_at, category, severity, created_at FROM news_content ORDER BY published_at DESC LIMIT ?').all(limit) + res.json({ items: rows }) + } catch (err) { + res.status(500).json({ error: err.message }) + } +}) + router.get('/situation', (req, res) => { try { res.json(getSituation()) diff --git a/src/config.ts b/src/config.ts index 7f2ba45..3221625 100644 --- a/src/config.ts +++ b/src/config.ts @@ -8,3 +8,4 @@ export const config = { /** 是否显示滚动情报 */ showNewsTicker: false, } +