summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeuin <[email protected]>2023-06-04 00:36:41 +0800
committerKeuin <[email protected]>2023-06-04 00:36:46 +0800
commit5ca5f36c92da8afbc264bb1e62d2d5cb27866b5f (patch)
tree15316729c6f55864cc4449be414a2b5a5e9635e6
parent18af87c1e3048e96ca66d64c475e832c858316b4 (diff)
Smart PDF file name
-rw-r--r--htmlcache.py4
-rw-r--r--main.py23
-rw-r--r--web.py18
3 files changed, 35 insertions, 10 deletions
diff --git a/htmlcache.py b/htmlcache.py
index 922ab09..8264468 100644
--- a/htmlcache.py
+++ b/htmlcache.py
@@ -15,7 +15,7 @@ class HtmlCache:
def __init__(self, cache_path: str):
self._cache_path = os.path.abspath(cache_path)
- async def get_utaten_tex_source(self, item_id: str) -> str:
+ async def get_utaten_tex_source(self, item_id: str) -> main.LyricInfo:
cache_file_path = os.path.join(self._cache_path, f'{item_id}.html')
if os.path.isfile(cache_file_path):
async with async_open(cache_file_path, 'r', encoding='utf-8') as f:
@@ -31,4 +31,4 @@ class HtmlCache:
await f.write(html)
except IOError as e:
print(f'Failed to update cache for song `{item_id}`: {e}')
- return main.html_to_tex(html)
+ return main.html_extract_lyric_info(html)
diff --git a/main.py b/main.py
index 2e4810d..2213069 100644
--- a/main.py
+++ b/main.py
@@ -171,8 +171,20 @@ class LatexGenerator:
return doc
-def html_to_tex(html) -> str:
+class LyricInfo:
+ utaten_id: str
+ tex_source: str
+ artist: typing.Optional[str]
+ title: typing.Optional[str]
+
+
+def html_extract_lyric_info(html) -> LyricInfo:
p = BeautifulSoup(html, "html5lib")
+ meta_url_info = p.select_one('meta[property="og:url"]')
+ if not meta_url_info:
+ raise RuntimeError('Cannot parse meta URL info from given HTML')
+ utaten_id = re.findall(r'/lyric/([a-z0-9]+)', str(meta_url_info['content']))[0]
lyric = p.select_one('.hiragana')
if not lyric:
raise RuntimeError('Cannot find lyric element `.hiragana`')
@@ -195,7 +207,12 @@ def html_to_tex(html) -> str:
gen.artist, gen.title = artist, title
# FIXME hardcoded CJK font
gen.cjk_font_main = 'Noto Serif CJK JP'
- return gen.generate_lyric(tokens)
+ return LyricInfo(
+ utaten_id=utaten_id,
+ tex_source=gen.generate_lyric(tokens),
+ artist=artist,
+ title=title,
+ )
def main():
@@ -213,7 +230,7 @@ def main():
else:
# read html from STDIN
html = sys.stdin.read()
- print(html_to_tex(html))
+ print(html_extract_lyric_info(html).tex_source)
if __name__ == '__main__':
diff --git a/web.py b/web.py
index 395a186..c602dd4 100644
--- a/web.py
+++ b/web.py
@@ -21,9 +21,17 @@ html_cache = htmlcache.HtmlCache('html_cache')
@app.get("/utaten/{item_id}.pdf")
async def get_utaten_lyric_pdf(item_id: str):
try:
- tex = await html_cache.get_utaten_tex_source(item_id)
- pdf_path = await tex_generator.xelatex(tex)
- return FileResponse(pdf_path, media_type='application/pdf')
+ lyric_info = await html_cache.get_utaten_tex_source(item_id)
+ pdf_path = await tex_generator.xelatex(lyric_info.tex_source)
+ if lyric_info.title and lyric_info.artist:
+ filename = f'{lyric_info.title} - {lyric_info.artist}.pdf'
+ elif not lyric_info.title and not lyric_info.artist:
+ filename = f'{lyric_info.utaten_id}.pdf'
+ elif lyric_info.title:
+ filename = f'{lyric_info.title} - {lyric_info.utaten_id}.pdf'
+ else:
+ filename = f'{lyric_info.artist} - {lyric_info.utaten_id}.pdf'
+ return FileResponse(pdf_path, media_type='application/pdf', filename=filename)
except texgen.TexGenerationError as e:
return Response(content=f'Failed to generate tex file: {e}', status_code=502)
@@ -31,8 +39,8 @@ async def get_utaten_lyric_pdf(item_id: str):
@app.get("/utaten/{item_id}.tex")
async def get_utaten_lyric_tex(item_id: str):
try:
- tex = await html_cache.get_utaten_tex_source(item_id)
- return Response(content=tex, media_type='application/x-tex')
+ lyric_info = await html_cache.get_utaten_tex_source(item_id)
+ return Response(content=lyric_info.tex_source, media_type='application/x-tex')
except htmlcache.TexSourceGenerationError as e:
return Response(content=str(e), status_code=503)