diff options
-rw-r--r-- | htmlcache.py | 4 | ||||
-rw-r--r-- | main.py | 23 | ||||
-rw-r--r-- | web.py | 18 |
3 files changed, 35 insertions, 10 deletions
diff --git a/htmlcache.py b/htmlcache.py index 922ab09..8264468 100644 --- a/htmlcache.py +++ b/htmlcache.py @@ -15,7 +15,7 @@ class HtmlCache: def __init__(self, cache_path: str): self._cache_path = os.path.abspath(cache_path) - async def get_utaten_tex_source(self, item_id: str) -> str: + async def get_utaten_tex_source(self, item_id: str) -> main.LyricInfo: cache_file_path = os.path.join(self._cache_path, f'{item_id}.html') if os.path.isfile(cache_file_path): async with async_open(cache_file_path, 'r', encoding='utf-8') as f: @@ -31,4 +31,4 @@ class HtmlCache: await f.write(html) except IOError as e: print(f'Failed to update cache for song `{item_id}`: {e}') - return main.html_to_tex(html) + return main.html_extract_lyric_info(html) @@ -171,8 +171,20 @@ class LatexGenerator: return doc -def html_to_tex(html) -> str: +class LyricInfo: + utaten_id: str + tex_source: str + artist: typing.Optional[str] + title: typing.Optional[str] + + +def html_extract_lyric_info(html) -> LyricInfo: p = BeautifulSoup(html, "html5lib") + meta_url_info = p.select_one('meta[property="og:url"]') + if not meta_url_info: + raise RuntimeError('Cannot parse meta URL info from given HTML') + utaten_id = re.findall(r'/lyric/([a-z0-9]+)', str(meta_url_info['content']))[0] lyric = p.select_one('.hiragana') if not lyric: raise RuntimeError('Cannot find lyric element `.hiragana`') @@ -195,7 +207,12 @@ def html_to_tex(html) -> str: gen.artist, gen.title = artist, title # FIXME hardcoded CJK font gen.cjk_font_main = 'Noto Serif CJK JP' - return gen.generate_lyric(tokens) + return LyricInfo( + utaten_id=utaten_id, + tex_source=gen.generate_lyric(tokens), + artist=artist, + title=title, + ) def main(): @@ -213,7 +230,7 @@ def main(): else: # read html from STDIN html = sys.stdin.read() - print(html_to_tex(html)) + print(html_extract_lyric_info(html).tex_source) if __name__ == '__main__': @@ -21,9 +21,17 @@ html_cache = htmlcache.HtmlCache('html_cache') @app.get("/utaten/{item_id}.pdf") async def get_utaten_lyric_pdf(item_id: str): try: - tex = await html_cache.get_utaten_tex_source(item_id) - pdf_path = await tex_generator.xelatex(tex) - return FileResponse(pdf_path, media_type='application/pdf') + lyric_info = await html_cache.get_utaten_tex_source(item_id) + pdf_path = await tex_generator.xelatex(lyric_info.tex_source) + if lyric_info.title and lyric_info.artist: + filename = f'{lyric_info.title} - {lyric_info.artist}.pdf' + elif not lyric_info.title and not lyric_info.artist: + filename = f'{lyric_info.utaten_id}.pdf' + elif lyric_info.title: + filename = f'{lyric_info.title} - {lyric_info.utaten_id}.pdf' + else: + filename = f'{lyric_info.artist} - {lyric_info.utaten_id}.pdf' + return FileResponse(pdf_path, media_type='application/pdf', filename=filename) except texgen.TexGenerationError as e: return Response(content=f'Failed to generate tex file: {e}', status_code=502) @@ -31,8 +39,8 @@ async def get_utaten_lyric_pdf(item_id: str): @app.get("/utaten/{item_id}.tex") async def get_utaten_lyric_tex(item_id: str): try: - tex = await html_cache.get_utaten_tex_source(item_id) - return Response(content=tex, media_type='application/x-tex') + lyric_info = await html_cache.get_utaten_tex_source(item_id) + return Response(content=lyric_info.tex_source, media_type='application/x-tex') except htmlcache.TexSourceGenerationError as e: return Response(content=str(e), status_code=503) |