import json from typing import Any def save_json(data: Any, path: str): with open(path, "w", encoding="utf-8") as f: json.dump(data, f, indent=2, ensure_ascii=False) def clean_html(html_content: str) -> str: import re from html import unescape from bs4 import BeautifulSoup if BeautifulSoup: soup = BeautifulSoup(html_content, "html.parser") text = soup.get_text() else: text = re.sub(r'<.*?>', '', html_content) text = unescape(text) text = re.sub(r'\s+', ' ', text).strip() return text