From ed540d68256f3f05e86b7880858249a1e8a17c42 Mon Sep 17 00:00:00 2001 From: Keuin Date: Fri, 2 Jun 2023 00:15:30 +0800 Subject: Cache HTML source on disk --- htmlcache.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 htmlcache.py (limited to 'htmlcache.py') diff --git a/htmlcache.py b/htmlcache.py new file mode 100644 index 0000000..2f727a7 --- /dev/null +++ b/htmlcache.py @@ -0,0 +1,33 @@ +import os + +import aiohttp + +import main + + +class TexSourceGenerationError(Exception): + pass + + +class HtmlCache: + + def __init__(self, cache_path: str): + self._cache_path = os.path.abspath(cache_path) + + async def get_utaten_tex_source(self, item_id: str) -> str: + cache_file_path = os.path.join(self._cache_path, f'{item_id}.html') + if os.path.isfile(cache_file_path): + with open(cache_file_path, 'r', encoding='utf-8') as f: + html = f.read() + else: + async with aiohttp.ClientSession() as ses: + async with ses.get(f'https://utaten.com/lyric/{item_id}/') as r: + if not r.ok: + raise TexSourceGenerationError('HTTP request failed when reading page source') + html = await r.text() + try: + with open(cache_file_path, 'w', encoding='utf-8') as f: + f.write(html) + except IOError as e: + print(f'Failed to update cache for song `{item_id}`: {e}') + return main.html_to_tex(html) -- cgit v1.2.3