From e9f4b34c3f36377bbd68d71ede6dbbb293c626e5 Mon Sep 17 00:00:00 2001 From: Keuin Date: Wed, 31 May 2023 02:50:38 +0800 Subject: bugfix: paragraph breaks are missing --- main.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'main.py') diff --git a/main.py b/main.py index 11ea89e..91fc044 100644 --- a/main.py +++ b/main.py @@ -50,6 +50,28 @@ class NewLineToken(Token): return '\n\n' +class SectionBreaker(Token): + + def __init__(self): + pass + + def to_latex(self) -> str: + return r'\section*{}' + + +def optimize_typography(tokens: typing.Iterator[Token]) -> typing.Iterator[Token]: + prev = next(tokens) + for t in tokens: + if all([isinstance(x, NewLineToken) for x in (prev, t)]): + yield NewLineToken() + yield SectionBreaker() + else: + yield prev + prev = t + if prev: + yield prev + + @dataclasses.dataclass class LatexDocInjectionInfo: packages: list[str] @@ -99,6 +121,10 @@ class LatexGenerator: injectors = [] injectors.append(LatexDocInjectionInfo([], [r'\usepackage{pxrubrica}'], [])) injectors.append(LatexDocInjectionInfo([], [r'\usepackage{setspace}', r'\doublespacing'], [])) + injectors.append(LatexDocInjectionInfo([], [ + r'\usepackage{geometry}', + r'\geometry{a4paper,left=20mm,right=20mm,top=10mm,bottom=20mm}', + ], [])) injectors.append(LatexDocInjectionInfo([], [ r'\setCJKmainfont{Noto Serif CJK TC}', r'\setCJKsansfont{Noto Sans CJK TC}', @@ -149,6 +175,7 @@ def main(): p = BeautifulSoup(html, "html5lib") lyric = p.select_one('.hiragana') tokens = tokenize(lyric) + tokens = optimize_typography(tokens) gen = LatexGenerator() gen.centering = True gen.cjk = CJKProvider.xeCJK -- cgit v1.2.3