# mautrix-telegram - A Matrix-Telegram puppeting bridge
# Copyright (C) 2021 Tulir Asokan
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see
"
f"{content.formatted_body}
")
await _convert_custom_emoji(source, entities, client=client)
text = add_surrogate(text)
html = await _telegram_entities_to_matrix_catch(text, entities)
html = del_surrogate(html)
return html
async def telegram_to_matrix(
evt: Message | SponsoredMessage,
source: au.AbstractUser,
client: MautrixTelegramClient | None = None,
override_text: str = None,
override_entities: list[TypeMessageEntity] = None,
require_html: bool = False,
) -> TextMessageEventContent:
if not client:
client = source.client
content = TextMessageEventContent(
msgtype=MessageType.TEXT,
body=override_text or evt.message,
)
entities = override_entities or evt.entities
if entities:
content.format = Format.HTML
content.formatted_body = await telegram_text_to_matrix_html(
source, content.body, entities, client=client
)
if require_html:
content.ensure_has_html()
if getattr(evt, "fwd_from", None):
await _add_forward_header(client, content, evt)
if isinstance(evt, Message) and evt.post and evt.post_author:
content.ensure_has_html()
content.body += f"\n- {evt.post_author}"
content.formatted_body += f"
- {evt.post_author}"
return content
async def _telegram_entities_to_matrix_catch(text: str, entities: list[TypeMessageEntity]) -> str:
try:
return await _telegram_entities_to_matrix(text, entities)
except Exception:
log.exception(
"Failed to convert Telegram format:\nmessage=%s\nentities=%s", text, entities
)
return "[failed conversion in _telegram_entities_to_matrix]"
def within_surrogate(text, index):
"""
`True` if ``index`` is within a surrogate (before and after it, not at!).
"""
return (
1 < index < len(text) # in bounds
and "\ud800" <= text[index - 1] <= "\udbff" # current is low surrogate
and "\udc00" <= text[index] <= "\udfff" # previous is high surrogate
)
async def _telegram_entities_to_matrix(
text: str,
entities: list[TypeMessageEntity | ReuploadedCustomEmoji],
offset: int = 0,
length: int = None,
in_codeblock: bool = False,
) -> str:
def text_to_html(
val: str, _in_codeblock: bool = in_codeblock, escape_html: bool = True
) -> str:
if escape_html:
val = escape(val)
if not _in_codeblock:
val = val.replace("\n", "
")
return val
if not entities:
return text_to_html(text)
if length is None:
length = len(text)
html = []
last_offset = 0
for i, entity in enumerate(entities):
if entity.offset >= offset + length:
break
relative_offset = entity.offset - offset
if relative_offset > last_offset:
html.append(text_to_html(text[last_offset:relative_offset]))
elif relative_offset < last_offset:
continue
while within_surrogate(text, relative_offset):
relative_offset += 1
while within_surrogate(text, relative_offset + entity.length):
entity.length += 1
skip_entity = False
is_code_entity = isinstance(entity, (MessageEntityCode, MessageEntityPre))
entity_text = await _telegram_entities_to_matrix(
text=text[relative_offset : relative_offset + entity.length],
entities=entities[i + 1 :],
offset=entity.offset,
length=entity.length,
in_codeblock=is_code_entity,
)
entity_text = text_to_html(entity_text, is_code_entity, escape_html=False)
entity_type = type(entity)
if entity_type == MessageEntityBold:
html.append(f"{entity_text}")
elif entity_type == MessageEntityItalic:
html.append(f"{entity_text}")
elif entity_type == MessageEntityUnderline:
html.append(f"{entity_text}")
elif entity_type == MessageEntityStrike:
html.append(f"{entity_text}")
elif entity_type == MessageEntityBlockquote:
html.append(f"
{entity_text}") elif entity_type == MessageEntityCode: html.append( f"
{entity_text}"
if "\n" in entity_text
else f"{entity_text}"
)
elif entity_type == MessageEntityPre:
skip_entity = _parse_pre(html, entity_text, entity.language)
elif entity_type == MessageEntityMention:
skip_entity = await _parse_mention(html, entity_text)
elif entity_type == MessageEntityMentionName:
skip_entity = await _parse_name_mention(html, entity_text, TelegramID(entity.user_id))
elif entity_type == MessageEntityEmail:
html.append(f"{entity_text}")
elif entity_type in (MessageEntityTextUrl, MessageEntityUrl):
await _parse_url(
html, entity_text, entity.url if entity_type == MessageEntityTextUrl else None
)
elif entity_type == MessageEntityCustomEmoji:
html.append(entity_text)
elif entity_type == ReuploadedCustomEmoji:
if isinstance(entity.file, UnicodeCustomEmoji):
html.append(entity.file.emoji)
else:
html.append(
f"{entity_text}")
else:
html.append(f"{entity_text}")
return False
async def _parse_mention(html: list[str], entity_text: str) -> bool:
username = entity_text[1:]
mxid = None
portal = None
# This is a bit complicated because public channels have both Puppet and Portal instances.
# Basically the currently intended output is:
# User/bot mention (bridge user) -> real user mention
# User/bot mention (normal Telegram user) -> ghost user mention
# Public channel with existing portal -> room mention
# Public channel without portal -> ghost user mention
# Other chat -> room mention
user = await u.User.find_by_username(username) or await pu.Puppet.find_by_username(username)
if user:
if isinstance(user, pu.Puppet) and user.is_channel:
portal = await po.Portal.get_by_tgid(user.tgid)
mxid = user.mxid
else:
portal = await po.Portal.find_by_username(username)
if portal and (portal.mxid or not user):
mxid = portal.alias or portal.mxid
if mxid:
html.append(f"{entity_text}")
else:
return True
return False
async def _parse_name_mention(html: list[str], entity_text: str, user_id: TelegramID) -> bool:
user = await u.User.get_by_tgid(user_id)
if user:
mxid = user.mxid
else:
puppet = await pu.Puppet.get_by_tgid(user_id, create=False)
mxid = puppet.mxid if puppet else None
if mxid:
html.append(f"{entity_text}")
else:
return True
return False
message_link_regex = re.compile(
r"https?://t(?:elegram)?\.(?:me|dog)"
# /username or /c/id
r"/([A-Za-z][A-Za-z0-9_]{3,31}[A-Za-z0-9]|[Cc]/[0-9]{1,20})"
# /messageid
r"/([0-9]{1,20})"
)
async def _parse_url(html: list[str], entity_text: str, url: str) -> None:
url = escape(url) if url else entity_text
if not url.startswith(("https://", "http://", "ftp://", "magnet://")):
url = "http://" + url
message_link_match = message_link_regex.match(url)
if message_link_match:
group, msgid_str = message_link_match.groups()
msgid = int(msgid_str)
if group.lower().startswith("c/"):
portal = await po.Portal.get_by_tgid(TelegramID(int(group[2:])))
else:
portal = await po.Portal.find_by_username(group)
if portal:
message = await DBMessage.get_one_by_tgid(TelegramID(msgid), portal.tgid)
if message:
url = f"https://matrix.to/#/{portal.mxid}/{message.mxid}"
html.append(f"{entity_text}")