Smart PDF file name

author: Keuin <[email protected]> 2023-06-04 00:36:41 +0800
committer: Keuin <[email protected]> 2023-06-04 00:36:46 +0800
commit: 5ca5f36c92da8afbc264bb1e62d2d5cb27866b5f (patch)
tree: 15316729c6f55864cc4449be414a2b5a5e9635e6
parent: 18af87c1e3048e96ca66d64c475e832c858316b4 (diff)
3 files changed, 35 insertions, 10 deletions
diff --git a/htmlcache.py b/htmlcache.py
index 922ab09..8264468 100644
--- a/htmlcache.py
+++ b/htmlcache.py
@@ -15,7 +15,7 @@ class HtmlCache:
     def __init__(self, cache_path: str):
         self._cache_path = os.path.abspath(cache_path)
 
-    async def get_utaten_tex_source(self, item_id: str) -> str:
+    async def get_utaten_tex_source(self, item_id: str) -> main.LyricInfo:
         cache_file_path = os.path.join(self._cache_path, f'{item_id}.html')
         if os.path.isfile(cache_file_path):
             async with async_open(cache_file_path, 'r', encoding='utf-8') as f:
@@ -31,4 +31,4 @@ class HtmlCache:
                             await f.write(html)
                     except IOError as e:
                         print(f'Failed to update cache for song `{item_id}`: {e}')
-        return main.html_to_tex(html)
+        return main.html_extract_lyric_info(html)
diff --git a/main.py b/main.py
index 2e4810d..2213069 100644
--- a/main.py
+++ b/main.py
@@ -171,8 +171,20 @@ class LatexGenerator:
         return doc
 
 
-def html_to_tex(html) -> str:
+[email protected]
+class LyricInfo:
+    utaten_id: str
+    tex_source: str
+    artist: typing.Optional[str]
+    title: typing.Optional[str]
+
+
+def html_extract_lyric_info(html) -> LyricInfo:
     p = BeautifulSoup(html, "html5lib")
+    meta_url_info = p.select_one('meta[property="og:url"]')
+    if not meta_url_info:
+        raise RuntimeError('Cannot parse meta URL info from given HTML')
+    utaten_id = re.findall(r'/lyric/([a-z0-9]+)', str(meta_url_info['content']))[0]
     lyric = p.select_one('.hiragana')
     if not lyric:
         raise RuntimeError('Cannot find lyric element `.hiragana`')
@@ -195,7 +207,12 @@ def html_to_tex(html) -> str:
     gen.artist, gen.title = artist, title
     # FIXME hardcoded CJK font
     gen.cjk_font_main = 'Noto Serif CJK JP'
-    return gen.generate_lyric(tokens)
+    return LyricInfo(
+        utaten_id=utaten_id,
+        tex_source=gen.generate_lyric(tokens),
+        artist=artist,
+        title=title,
+    )
 
 
 def main():
@@ -213,7 +230,7 @@ def main():
     else:
         # read html from STDIN
         html = sys.stdin.read()
-    print(html_to_tex(html))
+    print(html_extract_lyric_info(html).tex_source)
 
 
 if __name__ == '__main__':
diff --git a/web.py b/web.py
index 395a186..c602dd4 100644
--- a/web.py
+++ b/web.py
@@ -21,9 +21,17 @@ html_cache = htmlcache.HtmlCache('html_cache')
 @app.get("/utaten/{item_id}.pdf")
 async def get_utaten_lyric_pdf(item_id: str):
     try:
-        tex = await html_cache.get_utaten_tex_source(item_id)
-        pdf_path = await tex_generator.xelatex(tex)
-        return FileResponse(pdf_path, media_type='application/pdf')
+        lyric_info = await html_cache.get_utaten_tex_source(item_id)
+        pdf_path = await tex_generator.xelatex(lyric_info.tex_source)
+        if lyric_info.title and lyric_info.artist:
+            filename = f'{lyric_info.title} - {lyric_info.artist}.pdf'
+        elif not lyric_info.title and not lyric_info.artist:
+            filename = f'{lyric_info.utaten_id}.pdf'
+        elif lyric_info.title:
+            filename = f'{lyric_info.title} - {lyric_info.utaten_id}.pdf'
+        else:
+            filename = f'{lyric_info.artist} - {lyric_info.utaten_id}.pdf'
+        return FileResponse(pdf_path, media_type='application/pdf', filename=filename)
     except texgen.TexGenerationError as e:
         return Response(content=f'Failed to generate tex file: {e}', status_code=502)
 
@@ -31,8 +39,8 @@ async def get_utaten_lyric_pdf(item_id: str):
 @app.get("/utaten/{item_id}.tex")
 async def get_utaten_lyric_tex(item_id: str):
     try:
-        tex = await html_cache.get_utaten_tex_source(item_id)
-        return Response(content=tex, media_type='application/x-tex')
+        lyric_info = await html_cache.get_utaten_tex_source(item_id)
+        return Response(content=lyric_info.tex_source, media_type='application/x-tex')
     except htmlcache.TexSourceGenerationError as e:
         return Response(content=str(e), status_code=503)
author	Keuin <[email protected]>	2023-06-04 00:36:41 +0800
committer	Keuin <[email protected]>	2023-06-04 00:36:46 +0800
commit	5ca5f36c92da8afbc264bb1e62d2d5cb27866b5f (patch)
tree	15316729c6f55864cc4449be414a2b5a5e9635e6
parent	18af87c1e3048e96ca66d64c475e832c858316b4 (diff)