From c10ca0b747a19c1e432346c61abdbf9956905af7 Mon Sep 17 00:00:00 2001 From: Keuin Date: Wed, 31 May 2023 19:32:02 +0800 Subject: Enforce mypy static checking --- main.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'main.py') diff --git a/main.py b/main.py index 3ac4aae..b8d6184 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,7 @@ import dataclasses import enum import typing +import bs4 from bs4 import BeautifulSoup from bs4.element import Tag @@ -95,19 +96,22 @@ def process_notated(ele: Tag): def tokenize(lyric: Tag) -> typing.Iterator[Token]: newline = NewLineToken() for i, ele in enumerate(lyric): - if ele.name == 'span': - yield from process_notated(ele) - elif ele.name is None: + if isinstance(ele, bs4.NavigableString): # text t = ele.text.strip() if not t: continue yield TextToken(t) - elif ele.name == 'br': - # newline - yield newline + elif isinstance(ele, bs4.Tag): + if ele.name == 'span': + yield from process_notated(ele) + elif ele.name == 'br': + # newline + yield newline + else: + print(f'** Ignore HTML element ', file=sys.stderr) else: - print(f'') + print(f'** Ignore {type(ele)}', file=sys.stderr) class LatexGenerator: @@ -184,6 +188,8 @@ def main(): html = sys.stdin.read() p = BeautifulSoup(html, "html5lib") lyric = p.select_one('.hiragana') + if not lyric: + raise RuntimeError('Cannot find lyric element `.hiragana`') tokens = tokenize(lyric) tokens = optimize_typography(tokens) gen = LatexGenerator() -- cgit v1.2.3