2021-11-25 13:35:56 +01:00
|
|
|
import re
|
2022-06-19 16:03:59 +02:00
|
|
|
from html.parser import HTMLParser
|
2022-11-25 19:43:58 +01:00
|
|
|
from typing import Tuple, Set
|
2021-11-25 13:35:56 +01:00
|
|
|
|
2021-08-22 20:10:29 +02:00
|
|
|
import bleach
|
|
|
|
import markdown
|
|
|
|
from bleach_allowlist import markdown_tags, markdown_attrs
|
|
|
|
|
2021-12-09 15:07:22 +01:00
|
|
|
custom_allowed_tags = ["del", "ins"]
|
|
|
|
|
2021-08-22 20:10:29 +02:00
|
|
|
|
2023-04-25 22:16:28 +02:00
|
|
|
def md_to_html(md: str, replacements=None) -> tuple[str, set[str]]:
|
2022-11-25 19:43:58 +01:00
|
|
|
md, linked_objects = autolink(md, replacements=replacements)
|
2021-08-22 20:10:29 +02:00
|
|
|
html = markdown.markdown(
|
|
|
|
md,
|
|
|
|
output_format="html",
|
|
|
|
extensions=[
|
|
|
|
"nl2br",
|
|
|
|
]
|
|
|
|
)
|
|
|
|
html = bleach.clean(
|
|
|
|
html,
|
2021-12-09 15:07:22 +01:00
|
|
|
tags=markdown_tags + custom_allowed_tags,
|
2021-08-22 20:10:29 +02:00
|
|
|
attributes=markdown_attrs
|
|
|
|
)
|
2022-11-25 19:43:58 +01:00
|
|
|
return html, linked_objects
|
2021-09-26 19:02:25 +02:00
|
|
|
|
|
|
|
|
2023-04-25 22:16:28 +02:00
|
|
|
def autolink(md: str, replacements=None) -> tuple[str, set[str]]:
|
2022-07-05 18:37:21 +02:00
|
|
|
if replacements is None:
|
2022-07-03 00:14:51 +02:00
|
|
|
from utils.urls import name2url
|
|
|
|
replacements = name2url()
|
2022-04-10 17:15:03 +02:00
|
|
|
links = {}
|
2022-11-25 19:43:58 +01:00
|
|
|
linked_objects = set()
|
2022-04-10 17:15:03 +02:00
|
|
|
i = 0
|
2022-11-25 19:43:58 +01:00
|
|
|
for name, (url, obj) in replacements.items():
|
2021-11-25 13:35:56 +01:00
|
|
|
regex = r"\bWORD\b".replace("WORD", name)
|
2022-04-10 17:15:03 +02:00
|
|
|
placeholder = f"SOME{i}LINK"
|
2022-11-25 19:43:58 +01:00
|
|
|
md, n_replacements = re.subn(regex, placeholder, md)
|
|
|
|
if n_replacements > 0:
|
|
|
|
linked_objects.add(obj.graphkey)
|
2022-04-10 17:15:03 +02:00
|
|
|
links[placeholder] = f"[{name}]({url})"
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
for placeholder, value in links.items():
|
|
|
|
md = md.replace(placeholder, value)
|
2022-11-25 19:43:58 +01:00
|
|
|
return md, linked_objects
|
2022-06-19 16:03:59 +02:00
|
|
|
|
|
|
|
|
|
|
|
class HTMLFilter(HTMLParser):
|
|
|
|
text = ""
|
|
|
|
|
|
|
|
def handle_data(self, data):
|
|
|
|
self.text += data
|
|
|
|
|
|
|
|
|
|
|
|
def html_to_text(html: str) -> str:
|
|
|
|
f = HTMLFilter()
|
|
|
|
f.feed(html)
|
|
|
|
return f.text
|