feat: new projiect

2026-03-07 19:13:49 +08:00
commit ea760bb71c
27 changed files with 5866 additions and 0 deletions
--- a/fetch_720yun.py
+++ b/fetch_720yun.py
@@ -0,0 +1,255 @@
+#!/usr/bin/env python3
+"""
+从 720yun 页面抓取全景资源并本地化。
+用法: python3 fetch_720yun.py "https://www.720yun.com/vr/c8525usOunr"
+若页面由 JS 动态加载，请使用「手动获取」方式（见 README）。
+"""
+import re
+import sys
+import json
+import urllib.request
+import urllib.error
+from pathlib import Path
+
+def fetch_html(url):
+    req = urllib.request.Request(url, headers={
+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36'
+    })
+    with urllib.request.urlopen(req, timeout=15) as r:
+        return r.read().decode('utf-8', errors='replace')
+
+def _extract_balanced_json(html, start_marker, open_char='{', close_char='}'):
+    """从 html 中查找 start_marker 后紧跟的完整 JSON（匹配括号）。"""
+    results = []
+    i = 0
+    if open_char == '{':
+        other_open, other_close = '[', ']'
+    else:
+        other_open, other_close = '{', '}'
+    while True:
+        pos = html.find(start_marker, i)
+        if pos < 0:
+            break
+        start = pos + len(start_marker)
+        # 跳过空白与等号
+        while start < len(html) and html[start] in ' \t\n=':
+            start += 1
+        if start >= len(html):
+            i = pos + 1
+            continue
+        if html[start] == open_char:
+            depth = 0
+            in_string = None
+            escape = False
+            j = start
+            while j < len(html):
+                c = html[j]
+                if escape:
+                    escape = False
+                    j += 1
+                    continue
+                if c == '\\' and in_string:
+                    escape = True
+                    j += 1
+                    continue
+                if in_string:
+                    if c == in_string:
+                        in_string = None
+                    j += 1
+                    continue
+                if c in '"\'':
+                    in_string = c
+                    j += 1
+                    continue
+                if c == open_char:
+                    depth += 1
+                elif c == close_char:
+                    depth -= 1
+                    if depth == 0:
+                        results.append(html[start:j + 1])
+                        break
+                elif c == other_open:
+                    depth += 1
+                elif c == other_close:
+                    depth -= 1
+                j += 1
+        elif html[start] == other_open:
+            depth = 0
+            in_string = None
+            escape = False
+            j = start
+            while j < len(html):
+                c = html[j]
+                if escape:
+                    escape = False
+                    j += 1
+                    continue
+                if c == '\\' and in_string:
+                    escape = True
+                    j += 1
+                    continue
+                if in_string:
+                    if c == in_string:
+                        in_string = None
+                    j += 1
+                    continue
+                if c in '"\'':
+                    in_string = c
+                    j += 1
+                    continue
+                if c == other_open:
+                    depth += 1
+                elif c == other_close:
+                    depth -= 1
+                    if depth == 0:
+                        results.append(html[start:j + 1])
+                        break
+                elif c == open_char:
+                    depth += 1
+                elif c == close_char:
+                    depth -= 1
+                j += 1
+        i = pos + 1
+    return results
+
+
+def find_json_assignments(html):
+    """查找页面中常见的 __INITIAL_STATE__、window.__DATA__ 等 JSON 赋值（支持嵌套）。"""
+    markers = [
+        'window.__INITIAL_STATE__',
+        '__INITIAL_STATE__',
+        'window.__DATA__',
+        'window.__NUXT_DATA__',
+    ]
+    results = []
+    for marker in markers:
+        results.extend(_extract_balanced_json(html, marker, '{', '}'))
+    # 也尝试匹配 "panorama":"url" 或 "scenes":[...] 的简单模式
+    for m in re.finditer(r'"panorama"\s*:\s*"([^"]+)"', html):
+        results.append(m.group(1))
+    return results
+
+def find_image_urls(html):
+    """从 HTML 中提取可能是全景图的 URL（720yun CDN 等）。"""
+    # 常见 720 云图片域名
+    url_pattern = re.compile(
+        r'https?://[^\s"\'<>]+?\.(?:720yun\.com|qpic\.cn|gtimg\.com)[^\s"\'<>]*\.(?:jpg|jpeg|png|webp)',
+        re.I
+    )
+    urls = list(set(url_pattern.findall(html)))
+    # 也匹配任意包含 panorama / scene / photo 的图片 URL
+    alt_pattern = re.compile(
+        r'https?://[^\s"\'<>]+?/(?:panorama|scene|photo|pano|vr)[^\s"\'<>]*\.(?:jpg|jpeg|png|webp)',
+        re.I
+    )
+    for u in alt_pattern.findall(html):
+        if u not in urls:
+            urls.append(u)
+    return urls
+
+# 720yun CDN 会校验 Referer，脚本请求需与浏览器一致才能拿到正确数据
+def _browser_headers(url=''):
+    h = {
+        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        'Accept': 'image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.9',
+    }
+    if '720static.com' in url or '720yun.com' in url:
+        h['Referer'] = 'https://www.720yun.com/'
+    return h
+
+
+def download_file(url, dest_path):
+    req = urllib.request.Request(url, headers=_browser_headers(url))
+    with urllib.request.urlopen(req, timeout=30) as r:
+        dest_path.write_bytes(r.read())
+
+def main():
+    if len(sys.argv) < 2:
+        print('用法: python3 fetch_720yun.py <720yun页面URL>')
+        print('例:   python3 fetch_720yun.py "https://www.720yun.com/vr/c8525usOunr"')
+        sys.exit(1)
+    url = sys.argv[1].strip()
+    base = Path(__file__).resolve().parent
+    panorama_dir = base / 'panorama'
+    panorama_dir.mkdir(exist_ok=True)
+    config_path = base / 'config.json'
+
+    print('正在请求页面...')
+    try:
+        html = fetch_html(url)
+    except Exception as e:
+        print('请求失败:', e)
+        print('请使用 README 中的「手动获取」方式在浏览器中抓取资源。')
+        sys.exit(1)
+
+    image_urls = find_image_urls(html)
+    json_candidates = find_json_assignments(html)
+
+    # 尝试从 JSON 中解析 panorama 或 scenes
+    for raw in json_candidates:
+        try:
+            if raw.startswith('http'):
+                image_urls.append(raw)
+                continue
+            if raw.startswith('{'):
+                data = json.loads(raw)
+            elif raw.startswith('['):
+                data = json.loads(raw)
+                if data and isinstance(data[0], dict) and 'url' in data[0]:
+                    image_urls.extend([s.get('url') for s in data if s.get('url')])
+                continue
+            else:
+                continue
+            # 递归查找 url / panorama / image 字段
+            def collect_urls(obj, out):
+                if isinstance(obj, dict):
+                    for k, v in obj.items():
+                        if k in ('url', 'panorama', 'image', 'src', 'pic') and isinstance(v, str) and v.startswith('http'):
+                            out.append(v)
+                        else:
+                            collect_urls(v, out)
+                elif isinstance(obj, list):
+                    for x in obj:
+                        collect_urls(x, out)
+            collect_urls(data, image_urls)
+        except (json.JSONDecodeError, TypeError):
+            pass
+
+    image_urls = list(dict.fromkeys([u for u in image_urls if u and u.startswith('http')]))
+
+    if not image_urls:
+        print('未在页面 HTML 中发现全景图 URL（页面可能由 JavaScript 动态加载）。')
+        print('请按 README 使用浏览器开发者工具手动获取：')
+        print('  1. 打开该 720yun 链接')
+        print('  2. F12 -> Network -> 刷新 -> 筛选 Img 或 XHR')
+        print('  3. 找到全景图或 scene 接口返回的图片 URL，下载到 panorama/ 并命名为 panorama.jpg')
+        print('  4. 确保 config.json 中 panorama 为 "panorama/panorama.jpg"')
+        sys.exit(0)
+
+    print('发现可能的全景图 URL:', len(image_urls))
+    local_path = panorama_dir / 'panorama.jpg'
+    try:
+        first = image_urls[0]
+        print('正在下载:', first[:80], '...')
+        download_file(first, local_path)
+        print('已保存到:', local_path)
+    except Exception as e:
+        print('下载失败:', e)
+        print('请手动将上面列出的任一 URL 在浏览器中打开并另存为 panorama/panorama.jpg')
+        sys.exit(1)
+
+    # 确保 config 指向本地
+    if config_path.exists():
+        with open(config_path, 'r', encoding='utf-8') as f:
+            config = json.load(f)
+    else:
+        config = {}
+    config['panorama'] = 'panorama/panorama.jpg'
+    config['type'] = config.get('type', 'equirectangular')
+    config['title'] = config.get('title', '本地全景')
+    with open(config_path, 'w', encoding='utf-8') as f:
+        json.dump(config, f, ensure_ascii=False, indent=2)
+    print('已更新 config.json。运行本地服务器后打开 index.html 即可离线查看。')
+
+if __name__ == '__main__':
+    main()