music(chord): yopu UI 升级修 selector + 加 PVC override 与调试 dump
deploy music / build-and-deploy (push) Successful in 1m59s

- yopu 现在搜索结果默认全是和弦谱(不再标「和弦谱」字样),改成直接取第一个 a.post-main
- chord_server 启动时把 /data/chord-overrides/ 加到 sys.path 优先级最高,方便后续不 rebuild image 直接 cp yopu.py 热修
- 失败路径 dump HTML + 截图到 /data/chord-debug,view 页 selector 失败也能事后看
This commit is contained in:
Fam Zheng
2026-05-09 23:02:34 +01:00
parent e111398157
commit 05df371435
2 changed files with 69 additions and 24 deletions
+10 -1
View File
@@ -8,6 +8,7 @@ worker 单线程串行(chromium 一次跑一个,省资源),文件落 /da
import json
import logging
import queue
import sys
import threading
import os
from pathlib import Path
@@ -16,7 +17,15 @@ from typing import Optional
from fastapi import FastAPI, HTTPException
from fastapi.responses import FileResponse
import yopu
# 调试热更:/data 是 PVC mount,重启容器不丢;放 yopu.py 在 /data/chord-overrides/
# 启动时把它放最高优先级,方便不重 build image 直接 hot-fix selector。
_OVERRIDE_DIR = Path('/data/chord-overrides')
_OVERRIDE_DIR.mkdir(parents=True, exist_ok=True)
if (_OVERRIDE_DIR / 'yopu.py').exists():
sys.path.insert(0, str(_OVERRIDE_DIR))
print(f"[chord-server] using yopu.py override from {_OVERRIDE_DIR}")
import yopu # noqa: E402
logging.basicConfig(level=logging.INFO,
format='%(asctime)s %(levelname)s %(name)s: %(message)s')
+59 -23
View File
@@ -55,40 +55,53 @@ def setup_driver(window="1920,5000"):
def find_first_chord_chart(driver, search_url):
"""在搜索页找第一个「和弦谱」结果,返回 view url 和 title。"""
"""在搜索页找第一个结果yopu 现在默认全部是和弦谱),返回 view url 和 title。
旧版 yopu UI 在 .one-line-info 里有「和弦谱」字样可以过滤,
新版(svelte 重写后)已经没有,只有调号 (G调/C调/F调)。直接取第一个 a.post-main。
"""
logger.info("loading search: %s", search_url)
driver.get(search_url)
time.sleep(3)
chord_links = driver.execute_script("""
var hits = [];
hits = driver.execute_script("""
var out = [];
var posts = document.querySelectorAll('a.post-main');
for (var i = 0; i < posts.length; i++) {
var info = posts[i].querySelector('.one-line-info');
var t = info ? (info.textContent || info.innerText || '') : '';
if (t.indexOf('和弦') >= 0 && t.indexOf('') >= 0) {
hits.push({
href: posts[i].href,
title: (posts[i].querySelector('.title-line .title, .title') || {}).textContent || '',
text: t.trim(),
});
}
var p = posts[i];
var titleEl = p.querySelector('.title-line .title, .title');
var subEl = p.querySelector('.title-line .subtitle, .subtitle');
var info = p.querySelector('.one-line-info');
out.push({
href: p.href,
title: titleEl ? (titleEl.textContent || '').trim() : '',
subtitle: subEl ? (subEl.textContent || '').trim() : '',
info: info ? (info.textContent || '').trim() : '',
});
}
return hits;
return out;
""")
if not chord_links:
logger.warning("no '和弦谱' hits in search results")
if not hits:
logger.warning("no a.post-main found in search results — yopu DOM changed?")
return None
first = chord_links[0]
# MVP:直接取第一个。前 N 个一般是同一首歌的不同 key (G/C/F),第一个通常是默认 key。
first = hits[0]
href = first['href']
if href.startswith('/'):
p = urlparse(search_url)
href = f"{p.scheme}://{p.netloc}{href}"
elif not href.startswith('http'):
href = urljoin(search_url, href)
logger.info("matched: %s%s", first.get('title'), href)
return {'url': href, 'title': first.get('title') or '', 'text': first.get('text') or ''}
logger.info("matched %d/%d hits, picking #1: %s%s [%s]",
1, len(hits), first.get('title'), first.get('subtitle'), first.get('info'))
return {
'url': href,
'title': first.get('title') or '',
'subtitle': first.get('subtitle') or '',
'text': first.get('info') or '',
}
def select_option_in_row(driver, row_label, button_text, timeout=10):
@@ -217,6 +230,21 @@ def crop_white(path, pad_top=20, pad_bottom=50, pad_left=20, pad_right=20, white
logger.info("cropped to %s", box)
DEBUG_DIR = Path('/data/chord-debug')
def _save_debug(driver, tag: str):
"""失败时 dump 当前 HTML + 截图到 /data/chord-debug 方便排查。"""
try:
DEBUG_DIR.mkdir(parents=True, exist_ok=True)
ts = int(time.time())
(DEBUG_DIR / f'{tag}-{ts}.html').write_text(driver.page_source, encoding='utf-8')
driver.save_screenshot(str(DEBUG_DIR / f'{tag}-{ts}.png'))
logger.info("debug snapshot saved: %s/%s-%d.{html,png}", DEBUG_DIR, tag, ts)
except Exception as e:
logger.warning("debug snapshot failed: %s", e)
def fetch_chord_chart(query: str, output_path: str, *,
sheet_style: str = '功能谱',
chord_style: str = '级数名',
@@ -236,6 +264,7 @@ def fetch_chord_chart(query: str, output_path: str, *,
driver = setup_driver()
result = find_first_chord_chart(driver, search_url)
if not result:
_save_debug(driver, 'no-search-hit')
return False, '未找到和弦谱'
view_url = result['url']
@@ -244,16 +273,23 @@ def fetch_chord_chart(query: str, output_path: str, *,
time.sleep(3)
# 选样式(写死的 MVP 组合)
select_option_in_row(driver, '谱面样式', sheet_style)
select_option_in_row(driver, '和弦样式', chord_style)
sheet_ok = select_option_in_row(driver, '谱面样式', sheet_style)
chord_ok = select_option_in_row(driver, '和弦样式', chord_style)
if not (sheet_ok and chord_ok):
# 选不上 = UI 改了,dump 给 debug
_save_debug(driver, 'row-not-found')
# 等内容刷新
time.sleep(1.5)
wait = WebDriverWait(driver, 15)
sheet = wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, "div.sheet-container")
))
try:
sheet = wait.until(EC.presence_of_element_located(
(By.CSS_SELECTOR, "div.sheet-container")
))
except TimeoutException:
_save_debug(driver, 'no-sheet-container')
raise
driver.execute_script("arguments[0].scrollIntoView(true);", sheet)
time.sleep(0.5)