music(chord): yopu UI 升级修 selector + 加 PVC override 与调试 dump
deploy music / build-and-deploy (push) Successful in 1m59s

- yopu 现在搜索结果默认全是和弦谱(不再标「和弦谱」字样),改成直接取第一个 a.post-main
- chord_server 启动时把 /data/chord-overrides/ 加到 sys.path 优先级最高,方便后续不 rebuild image 直接 cp yopu.py 热修
- 失败路径 dump HTML + 截图到 /data/chord-debug,view 页 selector 失败也能事后看
This commit is contained in:
Fam Zheng
2026-05-09 23:02:34 +01:00
parent e111398157
commit 05df371435
2 changed files with 69 additions and 24 deletions
+10 -1
View File
@@ -8,6 +8,7 @@ worker 单线程串行(chromium 一次跑一个,省资源),文件落 /da
import json import json
import logging import logging
import queue import queue
import sys
import threading import threading
import os import os
from pathlib import Path from pathlib import Path
@@ -16,7 +17,15 @@ from typing import Optional
from fastapi import FastAPI, HTTPException from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse from fastapi.responses import FileResponse
import yopu # 调试热更:/data 是 PVC mount,重启容器不丢;放 yopu.py 在 /data/chord-overrides/
# 启动时把它放最高优先级,方便不重 build image 直接 hot-fix selector。
_OVERRIDE_DIR = Path('/data/chord-overrides')
_OVERRIDE_DIR.mkdir(parents=True, exist_ok=True)
if (_OVERRIDE_DIR / 'yopu.py').exists():
sys.path.insert(0, str(_OVERRIDE_DIR))
print(f"[chord-server] using yopu.py override from {_OVERRIDE_DIR}")
import yopu # noqa: E402
logging.basicConfig(level=logging.INFO, logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(name)s: %(message)s') format='%(asctime)s %(levelname)s %(name)s: %(message)s')
+55 -19
View File
@@ -55,40 +55,53 @@ def setup_driver(window="1920,5000"):
def find_first_chord_chart(driver, search_url): def find_first_chord_chart(driver, search_url):
"""在搜索页找第一个「和弦谱」结果,返回 view url 和 title。""" """在搜索页找第一个结果yopu 现在默认全部是和弦谱),返回 view url 和 title。
旧版 yopu UI 在 .one-line-info 里有「和弦谱」字样可以过滤,
新版(svelte 重写后)已经没有,只有调号 (G调/C调/F调)。直接取第一个 a.post-main。
"""
logger.info("loading search: %s", search_url) logger.info("loading search: %s", search_url)
driver.get(search_url) driver.get(search_url)
time.sleep(3) time.sleep(3)
chord_links = driver.execute_script(""" hits = driver.execute_script("""
var hits = []; var out = [];
var posts = document.querySelectorAll('a.post-main'); var posts = document.querySelectorAll('a.post-main');
for (var i = 0; i < posts.length; i++) { for (var i = 0; i < posts.length; i++) {
var info = posts[i].querySelector('.one-line-info'); var p = posts[i];
var t = info ? (info.textContent || info.innerText || '') : ''; var titleEl = p.querySelector('.title-line .title, .title');
if (t.indexOf('和弦') >= 0 && t.indexOf('') >= 0) { var subEl = p.querySelector('.title-line .subtitle, .subtitle');
hits.push({ var info = p.querySelector('.one-line-info');
href: posts[i].href, out.push({
title: (posts[i].querySelector('.title-line .title, .title') || {}).textContent || '', href: p.href,
text: t.trim(), title: titleEl ? (titleEl.textContent || '').trim() : '',
subtitle: subEl ? (subEl.textContent || '').trim() : '',
info: info ? (info.textContent || '').trim() : '',
}); });
} }
} return out;
return hits;
""") """)
if not chord_links: if not hits:
logger.warning("no '和弦谱' hits in search results") logger.warning("no a.post-main found in search results — yopu DOM changed?")
return None return None
first = chord_links[0]
# MVP:直接取第一个。前 N 个一般是同一首歌的不同 key (G/C/F),第一个通常是默认 key。
first = hits[0]
href = first['href'] href = first['href']
if href.startswith('/'): if href.startswith('/'):
p = urlparse(search_url) p = urlparse(search_url)
href = f"{p.scheme}://{p.netloc}{href}" href = f"{p.scheme}://{p.netloc}{href}"
elif not href.startswith('http'): elif not href.startswith('http'):
href = urljoin(search_url, href) href = urljoin(search_url, href)
logger.info("matched: %s%s", first.get('title'), href) logger.info("matched %d/%d hits, picking #1: %s%s [%s]",
return {'url': href, 'title': first.get('title') or '', 'text': first.get('text') or ''} 1, len(hits), first.get('title'), first.get('subtitle'), first.get('info'))
return {
'url': href,
'title': first.get('title') or '',
'subtitle': first.get('subtitle') or '',
'text': first.get('info') or '',
}
def select_option_in_row(driver, row_label, button_text, timeout=10): def select_option_in_row(driver, row_label, button_text, timeout=10):
@@ -217,6 +230,21 @@ def crop_white(path, pad_top=20, pad_bottom=50, pad_left=20, pad_right=20, white
logger.info("cropped to %s", box) logger.info("cropped to %s", box)
DEBUG_DIR = Path('/data/chord-debug')
def _save_debug(driver, tag: str):
"""失败时 dump 当前 HTML + 截图到 /data/chord-debug 方便排查。"""
try:
DEBUG_DIR.mkdir(parents=True, exist_ok=True)
ts = int(time.time())
(DEBUG_DIR / f'{tag}-{ts}.html').write_text(driver.page_source, encoding='utf-8')
driver.save_screenshot(str(DEBUG_DIR / f'{tag}-{ts}.png'))
logger.info("debug snapshot saved: %s/%s-%d.{html,png}", DEBUG_DIR, tag, ts)
except Exception as e:
logger.warning("debug snapshot failed: %s", e)
def fetch_chord_chart(query: str, output_path: str, *, def fetch_chord_chart(query: str, output_path: str, *,
sheet_style: str = '功能谱', sheet_style: str = '功能谱',
chord_style: str = '级数名', chord_style: str = '级数名',
@@ -236,6 +264,7 @@ def fetch_chord_chart(query: str, output_path: str, *,
driver = setup_driver() driver = setup_driver()
result = find_first_chord_chart(driver, search_url) result = find_first_chord_chart(driver, search_url)
if not result: if not result:
_save_debug(driver, 'no-search-hit')
return False, '未找到和弦谱' return False, '未找到和弦谱'
view_url = result['url'] view_url = result['url']
@@ -244,16 +273,23 @@ def fetch_chord_chart(query: str, output_path: str, *,
time.sleep(3) time.sleep(3)
# 选样式(写死的 MVP 组合) # 选样式(写死的 MVP 组合)
select_option_in_row(driver, '谱面样式', sheet_style) sheet_ok = select_option_in_row(driver, '谱面样式', sheet_style)
select_option_in_row(driver, '和弦样式', chord_style) chord_ok = select_option_in_row(driver, '和弦样式', chord_style)
if not (sheet_ok and chord_ok):
# 选不上 = UI 改了,dump 给 debug
_save_debug(driver, 'row-not-found')
# 等内容刷新 # 等内容刷新
time.sleep(1.5) time.sleep(1.5)
wait = WebDriverWait(driver, 15) wait = WebDriverWait(driver, 15)
try:
sheet = wait.until(EC.presence_of_element_located( sheet = wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, "div.sheet-container") (By.CSS_SELECTOR, "div.sheet-container")
)) ))
except TimeoutException:
_save_debug(driver, 'no-sheet-container')
raise
driver.execute_script("arguments[0].scrollIntoView(true);", sheet) driver.execute_script("arguments[0].scrollIntoView(true);", sheet)
time.sleep(0.5) time.sleep(0.5)