diff options
author | Keuin <[email protected]> | 2023-06-04 00:36:41 +0800 |
---|---|---|
committer | Keuin <[email protected]> | 2023-06-04 00:36:46 +0800 |
commit | 5ca5f36c92da8afbc264bb1e62d2d5cb27866b5f (patch) | |
tree | 15316729c6f55864cc4449be414a2b5a5e9635e6 /main.py | |
parent | 18af87c1e3048e96ca66d64c475e832c858316b4 (diff) |
Smart PDF file name
Diffstat (limited to 'main.py')
-rw-r--r-- | main.py | 23 |
1 files changed, 20 insertions, 3 deletions
@@ -171,8 +171,20 @@ class LatexGenerator: return doc -def html_to_tex(html) -> str: +class LyricInfo: + utaten_id: str + tex_source: str + artist: typing.Optional[str] + title: typing.Optional[str] + + +def html_extract_lyric_info(html) -> LyricInfo: p = BeautifulSoup(html, "html5lib") + meta_url_info = p.select_one('meta[property="og:url"]') + if not meta_url_info: + raise RuntimeError('Cannot parse meta URL info from given HTML') + utaten_id = re.findall(r'/lyric/([a-z0-9]+)', str(meta_url_info['content']))[0] lyric = p.select_one('.hiragana') if not lyric: raise RuntimeError('Cannot find lyric element `.hiragana`') @@ -195,7 +207,12 @@ def html_to_tex(html) -> str: gen.artist, gen.title = artist, title # FIXME hardcoded CJK font gen.cjk_font_main = 'Noto Serif CJK JP' - return gen.generate_lyric(tokens) + return LyricInfo( + utaten_id=utaten_id, + tex_source=gen.generate_lyric(tokens), + artist=artist, + title=title, + ) def main(): @@ -213,7 +230,7 @@ def main(): else: # read html from STDIN html = sys.stdin.read() - print(html_to_tex(html)) + print(html_extract_lyric_info(html).tex_source) if __name__ == '__main__': |