summaryrefslogtreecommitdiff
path: root/main.py
diff options
context:
space:
mode:
authorKeuin <[email protected]>2023-06-04 00:36:41 +0800
committerKeuin <[email protected]>2023-06-04 00:36:46 +0800
commit5ca5f36c92da8afbc264bb1e62d2d5cb27866b5f (patch)
tree15316729c6f55864cc4449be414a2b5a5e9635e6 /main.py
parent18af87c1e3048e96ca66d64c475e832c858316b4 (diff)
Smart PDF file name
Diffstat (limited to 'main.py')
-rw-r--r--main.py23
1 files changed, 20 insertions, 3 deletions
diff --git a/main.py b/main.py
index 2e4810d..2213069 100644
--- a/main.py
+++ b/main.py
@@ -171,8 +171,20 @@ class LatexGenerator:
return doc
-def html_to_tex(html) -> str:
+class LyricInfo:
+ utaten_id: str
+ tex_source: str
+ artist: typing.Optional[str]
+ title: typing.Optional[str]
+
+
+def html_extract_lyric_info(html) -> LyricInfo:
p = BeautifulSoup(html, "html5lib")
+ meta_url_info = p.select_one('meta[property="og:url"]')
+ if not meta_url_info:
+ raise RuntimeError('Cannot parse meta URL info from given HTML')
+ utaten_id = re.findall(r'/lyric/([a-z0-9]+)', str(meta_url_info['content']))[0]
lyric = p.select_one('.hiragana')
if not lyric:
raise RuntimeError('Cannot find lyric element `.hiragana`')
@@ -195,7 +207,12 @@ def html_to_tex(html) -> str:
gen.artist, gen.title = artist, title
# FIXME hardcoded CJK font
gen.cjk_font_main = 'Noto Serif CJK JP'
- return gen.generate_lyric(tokens)
+ return LyricInfo(
+ utaten_id=utaten_id,
+ tex_source=gen.generate_lyric(tokens),
+ artist=artist,
+ title=title,
+ )
def main():
@@ -213,7 +230,7 @@ def main():
else:
# read html from STDIN
html = sys.stdin.read()
- print(html_to_tex(html))
+ print(html_extract_lyric_info(html).tex_source)
if __name__ == '__main__':