4 Commits

Author SHA1 Message Date
luk3yx
75605dcc59 Fix bad idea 2024-10-05 23:11:19 +13:00
luk3yx
5c8f1fd25a Add optional media proxy (disabled by default) 2024-10-05 16:09:51 +13:00
luk3yx
9535a18d87 Send mentions in new format 2024-10-05 14:27:28 +13:00
luk3yx
c822f4f4ac Convert <s> and <strike> to correct IRC code 2024-03-23 11:36:02 +13:00
4 changed files with 184 additions and 22 deletions

View File

@@ -48,6 +48,36 @@ and `PART` commands should work as expected.
Note that events sent before the client connects to Matrix are ignored. Your
system must have an accurate clock for this to work properly.
## Downloading media
Matrix has recently started to require authentication for media endpoints. By
default, miniirc_matrix now translates media files into MXC URLs. It does,
however, have a built-in HTTP proxy (disabled by default, see below).
### Proxying requests (experimental)
**Warning: I don't know how secure this is, it uses Python's `http.server`**
If you want to convert media to a normal URL, for example for use with relay
bots or code that expects normal links, you can provide a `media_proxy_port`
argument to miniirc_matrix.Matrix.
```py
miniirc_matrix.Matrix('example.com', token='my_token',
media_proxy_port=8080)
```
This will start a HTTP server on `http://127.0.0.1:8080` to listen for ports.
The server only listens on localhost.
To expose this to the public, you must use a reverse proxy, and should set up
caching and some kind of rate limiting to prevent abuse. You can set the
`media_proxy_url` keyword argument to the public proxy URL.
A HMAC is created based on a random key and URL to prevent using the proxy to
fetch arbitrary attachment URLs. To make this value consistent across restarts,
pass a bytes value to the `media_proxy_key` keyword argument.
## Installation
You can install `miniirc_matrix` with `pip install miniirc_matrix`.

View File

@@ -6,13 +6,14 @@
from __future__ import annotations
from collections.abc import Callable
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
from typing import Any, Optional, TypeVar, overload
from urllib.parse import quote as _url_quote, urlparse as _urlparse
import functools, html.parser, itertools, json, math, re, time, uuid
import miniirc, requests, traceback # type: ignore
import functools, hmac, html.parser, itertools, json, math, os, re, time, uuid
import miniirc, requests, threading, traceback # type: ignore
ver = (0, 0, 10)
ver = (0, 0, 13)
__version__ = '.'.join(map(str, ver))
@@ -45,13 +46,26 @@ def _register_event(event_name: str):
return _register
_formatting_re = re.compile(
_invisible_formatting_re = re.compile(
r'\x02|\x1d|\x1f|\x1e|\x11|\x16|\x0f'
r'|\x03([0-9]{1,2})?(?:,([0-9]{1,2}))?'
# Hex colours
r'|\x04([0-9a-fA-F]{6})?(?:,([0-9a-fA-F]{6}))?'
)
_full_formatting_re = re.compile(
_invisible_formatting_re.pattern +
# Matrix mentions
# These currently get inserted without any escaping, if HTML characters get
# added to the regex then make sure escaping gets added
r'|\B@[a-z0-9\._=\-/+]+:[a-zA-Z0-9_\-\.]+\.[a-zA-Z]{2,}(?!\.\w)\b'
)
_html_tags = {'\x02': 'strong', '\x1d': 'em', '\x1f': 'u', '\x1e': 'del',
'\x11': 'code'}
_media_url_re = re.compile(
r'^mxc://([A-Za-z0-9_\-\.]+/[A-Za-z0-9_\-\.]+)(?:/(.*))?$'
)
class _TagManager:
@@ -86,9 +100,15 @@ class _TagManager:
self.write_tags()
self.text.append(s)
@staticmethod
def _encode_attribute(param: str) -> str:
if param == 'href':
return param
return 'data-mx-' + param.replace('_', '-')
def open(self, tag: str, **kwargs: Optional[str]) -> None:
self.tags[tag] = ''.join(
f' data-mx-{param.replace("_", "-")}="{value}"'
f' {self._encode_attribute(param)}="{value}"'
for param, value in kwargs.items() if value is not None
)
@@ -167,20 +187,21 @@ def _irc_colour_to_hex(code: Optional[str]) -> Optional[str]:
return ''
def _irc_to_html(irc_msg: str) -> Optional[str]:
def _irc_to_html(irc_msg: str) -> tuple[Optional[str], set[str]]:
"""
Converts IRC formatting to Matrix HTML. Returns None if the message
contains no formatting.
"""
mentions: set[str] = set()
# Escaping quotes seems to make matrix-appservice-discord do strange things
irc_msg = html.escape(irc_msg, quote=False)
# If there is no formatting return immediately
it = _formatting_re.finditer(irc_msg)
it = _full_formatting_re.finditer(irc_msg)
first_match = next(it, None)
if first_match is None:
return None
return None, mentions
tags = _TagManager()
prev_end = start = 0
@@ -212,12 +233,20 @@ def _irc_to_html(irc_msg: str) -> Optional[str]:
elif char == '\x0f':
tags.fg = tags.bg = None
tags.tags.clear()
elif char == '@':
# Matrix mention
mention = match.group(0)
tags.open('a', href=f'https://matrix.to/#/{mention}')
tags.write(mention)
tags.close('a')
mentions.add(mention)
prev_end = match.end()
tags.write(irc_msg[prev_end:])
tags.tags.clear()
tags.write_tags()
return ''.join(tags.text).replace('\n', '<br>')
return ''.join(tags.text).replace('\n', '<br>'), mentions
# This simple space collapsing regex "collapses" newlines as well
@@ -234,6 +263,8 @@ class _MatrixHTMLParser(html.parser.HTMLParser):
irc_codes['b'] = irc_codes['strong']
irc_codes['i'] = irc_codes['em']
irc_codes['br'] = '\n'
irc_codes['s'] = irc_codes['del']
irc_codes['strike'] = irc_codes['del']
def __init__(self) -> None:
super().__init__()
@@ -283,6 +314,48 @@ def _matrix_html_to_irc(content: _Event) -> tuple[str, bool]:
return content.body[str], False
class _MediaProxyHandler(BaseHTTPRequestHandler):
irc: Matrix
def do_GET(self) -> None:
try:
with self.irc._download_media('mxc:/' + self.path) as resp:
if resp.status_code != 200:
self.send_error(resp.status_code)
return
self.send_response(200)
self.send_header('X-Content-Type-Options', 'nosniff')
self.send_header('Content-Security-Policy',
"default-src 'none'")
if 'Content-Length' in resp.headers:
self.send_header('Content-Length',
resp.headers['Content-Length'])
# Only allow probably safe content types
content_type = resp.headers.get('Content-Type', '')
if (content_type.startswith(('image/', 'audio/', 'video/')) or
content_type == 'text/plain'):
self.send_header('Content-Type', content_type)
else:
self.send_header('Content-Type',
'application/octet-stream')
self.end_headers()
# Copy content
for chunk in resp.iter_content(8192):
self.wfile.write(chunk)
except ValueError as exc:
self.send_error(400, explain=str(exc))
return
def log_message(self, format: str, *args) -> None:
if self.irc.debug:
super().log_message(format, *args)
class _InvalidEventError(Exception):
pass
@@ -361,9 +434,15 @@ class Matrix(miniirc.IRC):
connected: Optional[bool]
msglen = 4096
def __init__(self, ip: str, port: int = 0, nick: str = '', *args,
auto_connect: bool = True,
token: Optional[str] = None, **kwargs):
def __init__(
self, ip: str, port: int = 0, nick: str = '', *args,
auto_connect: bool = True,
token: Optional[str] = None,
media_proxy_port: Optional[int] = None,
media_proxy_url: Optional[str] = None,
media_proxy_key: Optional[bytes] = None,
**kwargs
) -> None:
# Cache _get_room_url
# This is done here so that each class instance gets its own cache and
# the cache doesn't store class instances.
@@ -383,6 +462,14 @@ class Matrix(miniirc.IRC):
if token:
self.token = token
self._media_proxy: Optional[ThreadingHTTPServer] = None
self._media_proxy_port = media_proxy_port
if media_proxy_port and not media_proxy_url:
media_proxy_url = f'http://127.0.0.1:{media_proxy_port}'
self._media_proxy_url = media_proxy_url and media_proxy_url.rstrip('/')
if media_proxy_port is not None:
self._media_proxy_key = media_proxy_key or os.urandom(32)
# Stop miniirc from trying to access the (non-existent) socket
kwargs['ping_interval'] = kwargs['ping_timeout'] = None
super().__init__(ip, port, nick, *args, auto_connect=False, **kwargs)
@@ -427,7 +514,7 @@ class Matrix(miniirc.IRC):
raise ValueError(f'Status code {res.status_code} returned')
self._baseurl = f'{baseurl}/_matrix/client/{api_version}'
self._media_baseurl = f'{baseurl}/_matrix/media/{api_version}'
self._media_baseurl = f'{baseurl}/_matrix/client/v1/media'
def __get(self, endpoint: str, timeout: int = 5, /,
**params: Optional[str | int]) -> Any:
@@ -456,6 +543,23 @@ class Matrix(miniirc.IRC):
return f'rooms/{_url_quote(room_id)}'
def __make_url_digest(self, path: str) -> str:
return hmac.digest(self._media_proxy_key, path.encode('ascii'),
'sha256').hex()
def _download_media(self, url: str) -> requests.Response:
url_base, _, key = url.partition('?key=')
match = _media_url_re.match(url_base)
if not match:
raise ValueError('Invalid media URL')
path = match.group(1)
if not hmac.compare_digest(self.__make_url_digest(path), key):
raise ValueError('Invalid key parameter')
return self.__session.get(f'{self._media_baseurl}/download/{path}',
timeout=15, stream=True)
@functools.cached_property
def current_nick(self) -> str:
return self.__get('account/whoami')['user_id']
@@ -464,6 +568,7 @@ class Matrix(miniirc.IRC):
if self.connected is not None:
return
with self._send_lock:
self.connected = False
self._update_baseurl()
self.active_caps = self.ircv3_caps & {
'account-tag', 'echo-message', 'message-tags',
@@ -472,8 +577,26 @@ class Matrix(miniirc.IRC):
self.debug('Starting main loop (Matrix)')
self._start_main_loop()
if self._media_proxy_port:
self.debug('Starting media proxy')
class _handler(_MediaProxyHandler):
irc = self
self._media_proxy = ThreadingHTTPServer(
('127.0.0.1', self._media_proxy_port),
_handler,
)
th = threading.Thread(target=self._media_proxy.serve_forever)
th.daemon = True
th.start()
def disconnect(self) -> None:
self.connected = False
with self._send_lock:
self.connected = False
if self._media_proxy is not None:
self._media_proxy.shutdown()
self._media_proxy = None
def _main(self) -> None:
try:
@@ -568,13 +691,16 @@ class Matrix(miniirc.IRC):
msgtype = 'm.text'
params: dict[str, Any]
if html_msg := _irc_to_html(msg):
html_msg, mentions = _irc_to_html(msg)
if html_msg:
params = {
'msgtype': msgtype,
'body': _formatting_re.sub('', msg),
'body': _invisible_formatting_re.sub('', msg),
'format': 'org.matrix.custom.html',
'formatted_body': html_msg,
}
if mentions:
params['m.mentions'] = {'user_ids': list(mentions)}
else:
# No formatting
params = {'msgtype': msgtype, 'body': msg}
@@ -660,8 +786,10 @@ class Matrix(miniirc.IRC):
msg: str
if 'url' in content:
msg = content.url[str]
if msg.startswith('mxc://'):
msg = f'{self._media_baseurl}/download/{msg[6:]}'
if self._media_proxy_url and (match := _media_url_re.match(msg)):
path = match.group(1)
key = self.__make_url_digest(path)
msg = f'{self._media_proxy_url}/{path}?key={key}'
else:
msg, html_parsed_ok = _matrix_html_to_irc(content)

View File

@@ -5,7 +5,7 @@ from setuptools import setup
setup(
name='miniirc_matrix',
version='0.0.10',
version='0.0.13',
py_modules=['miniirc_matrix'],
author='luk3yx',
description='A Matrix wrapper for miniirc.',

View File

@@ -2,10 +2,14 @@ from miniirc_matrix import _Event, _irc_to_html, _matrix_html_to_irc
def test_irc_to_html():
assert _irc_to_html('Hello world!') is None
assert _irc_to_html('\x02Bold text') == '<strong>Bold text</strong>'
assert _irc_to_html('Hello world!') == (None, set())
assert _irc_to_html('\x02Bold') == ('<strong>Bold</strong>', set())
assert (_irc_to_html('\x021 \x1d2\x02 3') ==
'<strong>1 <em>2</em></strong><em> 3</em>')
('<strong>1 <em>2</em></strong><em> 3</em>', set()))
assert (_irc_to_html('@test:example.com: \x1dHello') ==
('<a href="https://matrix.to/#/@test:example.com">'
'@test:example.com</a>: <em>Hello</em>', {'@test:example.com'}))
def html_to_irc(html):