mirror of
https://github.com/luk3yx/miniirc_matrix.git
synced 2026-05-17 05:25:44 +03:00
Compare commits
8 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
75605dcc59 | ||
|
|
5c8f1fd25a | ||
|
|
9535a18d87 | ||
|
|
c822f4f4ac | ||
|
|
a1b45952eb | ||
|
|
f00a14e316 | ||
|
|
1baa8ef02f | ||
|
|
1a0d644b60 |
24
.github/workflows/pythonapp.yml
vendored
Normal file
24
.github/workflows/pythonapp.yml
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# From https://docs.github.com/en/free-pro-team@latest/actions/guides/building-and-testing-python
|
||||
|
||||
name: Test with pytest
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
run-tests:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
python_version: [3.8, 3]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v2
|
||||
- name: Set up Python ${{ matrix.python_version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python_version }}
|
||||
- name: Install dependencies
|
||||
run: python -m pip install miniirc pytest 'requests>=2.22.0,<3'
|
||||
- name: Run pytest
|
||||
run: pytest
|
||||
30
README.md
30
README.md
@@ -48,6 +48,36 @@ and `PART` commands should work as expected.
|
||||
Note that events sent before the client connects to Matrix are ignored. Your
|
||||
system must have an accurate clock for this to work properly.
|
||||
|
||||
## Downloading media
|
||||
|
||||
Matrix has recently started to require authentication for media endpoints. By
|
||||
default, miniirc_matrix now translates media files into MXC URLs. It does,
|
||||
however, have a built-in HTTP proxy (disabled by default, see below).
|
||||
|
||||
### Proxying requests (experimental)
|
||||
|
||||
**Warning: I don't know how secure this is, it uses Python's `http.server`**
|
||||
|
||||
If you want to convert media to a normal URL, for example for use with relay
|
||||
bots or code that expects normal links, you can provide a `media_proxy_port`
|
||||
argument to miniirc_matrix.Matrix.
|
||||
|
||||
```py
|
||||
miniirc_matrix.Matrix('example.com', token='my_token',
|
||||
media_proxy_port=8080)
|
||||
```
|
||||
|
||||
This will start a HTTP server on `http://127.0.0.1:8080` to listen for ports.
|
||||
The server only listens on localhost.
|
||||
|
||||
To expose this to the public, you must use a reverse proxy, and should set up
|
||||
caching and some kind of rate limiting to prevent abuse. You can set the
|
||||
`media_proxy_url` keyword argument to the public proxy URL.
|
||||
|
||||
A HMAC is created based on a random key and URL to prevent using the proxy to
|
||||
fetch arbitrary attachment URLs. To make this value consistent across restarts,
|
||||
pass a bytes value to the `media_proxy_key` keyword argument.
|
||||
|
||||
## Installation
|
||||
|
||||
You can install `miniirc_matrix` with `pip install miniirc_matrix`.
|
||||
|
||||
@@ -6,13 +6,14 @@
|
||||
|
||||
from __future__ import annotations
|
||||
from collections.abc import Callable
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
from typing import Any, Optional, TypeVar, overload
|
||||
from urllib.parse import quote as _url_quote, urlparse as _urlparse
|
||||
import functools, html.parser, itertools, json, math, re, threading, time, uuid
|
||||
import miniirc, requests, traceback # type: ignore
|
||||
import functools, hmac, html.parser, itertools, json, math, os, re, time, uuid
|
||||
import miniirc, requests, threading, traceback # type: ignore
|
||||
|
||||
|
||||
ver = (0, 0, 7)
|
||||
ver = (0, 0, 13)
|
||||
__version__ = '.'.join(map(str, ver))
|
||||
|
||||
|
||||
@@ -45,13 +46,26 @@ def _register_event(event_name: str):
|
||||
return _register
|
||||
|
||||
|
||||
_formatting_re = re.compile(
|
||||
_invisible_formatting_re = re.compile(
|
||||
r'\x02|\x1d|\x1f|\x1e|\x11|\x16|\x0f'
|
||||
r'|\x03([0-9]{1,2})?(?:,([0-9]{1,2}))?'
|
||||
|
||||
# Hex colours
|
||||
r'|\x04([0-9a-fA-F]{6})?(?:,([0-9a-fA-F]{6}))?'
|
||||
)
|
||||
_full_formatting_re = re.compile(
|
||||
_invisible_formatting_re.pattern +
|
||||
|
||||
# Matrix mentions
|
||||
# These currently get inserted without any escaping, if HTML characters get
|
||||
# added to the regex then make sure escaping gets added
|
||||
r'|\B@[a-z0-9\._=\-/+]+:[a-zA-Z0-9_\-\.]+\.[a-zA-Z]{2,}(?!\.\w)\b'
|
||||
)
|
||||
_html_tags = {'\x02': 'strong', '\x1d': 'em', '\x1f': 'u', '\x1e': 'del',
|
||||
'\x11': 'code'}
|
||||
_media_url_re = re.compile(
|
||||
r'^mxc://([A-Za-z0-9_\-\.]+/[A-Za-z0-9_\-\.]+)(?:/(.*))?$'
|
||||
)
|
||||
|
||||
|
||||
class _TagManager:
|
||||
@@ -86,9 +100,15 @@ class _TagManager:
|
||||
self.write_tags()
|
||||
self.text.append(s)
|
||||
|
||||
@staticmethod
|
||||
def _encode_attribute(param: str) -> str:
|
||||
if param == 'href':
|
||||
return param
|
||||
return 'data-mx-' + param.replace('_', '-')
|
||||
|
||||
def open(self, tag: str, **kwargs: Optional[str]) -> None:
|
||||
self.tags[tag] = ''.join(
|
||||
f' data-mx-{param.replace("_", "-")}="{value}"'
|
||||
f' {self._encode_attribute(param)}="{value}"'
|
||||
for param, value in kwargs.items() if value is not None
|
||||
)
|
||||
|
||||
@@ -167,20 +187,21 @@ def _irc_colour_to_hex(code: Optional[str]) -> Optional[str]:
|
||||
return ''
|
||||
|
||||
|
||||
def _irc_to_html(irc_msg: str) -> Optional[str]:
|
||||
def _irc_to_html(irc_msg: str) -> tuple[Optional[str], set[str]]:
|
||||
"""
|
||||
Converts IRC formatting to Matrix HTML. Returns None if the message
|
||||
contains no formatting.
|
||||
"""
|
||||
mentions: set[str] = set()
|
||||
|
||||
# Escaping quotes seems to make matrix-appservice-discord do strange things
|
||||
irc_msg = html.escape(irc_msg, quote=False)
|
||||
|
||||
# If there is no formatting return immediately
|
||||
it = _formatting_re.finditer(irc_msg)
|
||||
it = _full_formatting_re.finditer(irc_msg)
|
||||
first_match = next(it, None)
|
||||
if first_match is None:
|
||||
return None
|
||||
return None, mentions
|
||||
|
||||
tags = _TagManager()
|
||||
prev_end = start = 0
|
||||
@@ -212,12 +233,20 @@ def _irc_to_html(irc_msg: str) -> Optional[str]:
|
||||
elif char == '\x0f':
|
||||
tags.fg = tags.bg = None
|
||||
tags.tags.clear()
|
||||
elif char == '@':
|
||||
# Matrix mention
|
||||
mention = match.group(0)
|
||||
tags.open('a', href=f'https://matrix.to/#/{mention}')
|
||||
tags.write(mention)
|
||||
tags.close('a')
|
||||
mentions.add(mention)
|
||||
|
||||
prev_end = match.end()
|
||||
|
||||
tags.write(irc_msg[prev_end:])
|
||||
tags.tags.clear()
|
||||
tags.write_tags()
|
||||
return ''.join(tags.text).replace('\n', '<br>')
|
||||
return ''.join(tags.text).replace('\n', '<br>'), mentions
|
||||
|
||||
|
||||
# This simple space collapsing regex "collapses" newlines as well
|
||||
@@ -234,6 +263,8 @@ class _MatrixHTMLParser(html.parser.HTMLParser):
|
||||
irc_codes['b'] = irc_codes['strong']
|
||||
irc_codes['i'] = irc_codes['em']
|
||||
irc_codes['br'] = '\n'
|
||||
irc_codes['s'] = irc_codes['del']
|
||||
irc_codes['strike'] = irc_codes['del']
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__()
|
||||
@@ -257,7 +288,9 @@ class _MatrixHTMLParser(html.parser.HTMLParser):
|
||||
if tag in ('mx-reply', 'script'):
|
||||
self.in_reply -= 1
|
||||
return
|
||||
if tag in self.irc_codes:
|
||||
if tag == 'br':
|
||||
return
|
||||
elif tag in self.irc_codes:
|
||||
self.text.append(self.irc_codes[tag])
|
||||
elif tag != 'font':
|
||||
raise _UnknownTagError(tag)
|
||||
@@ -281,6 +314,48 @@ def _matrix_html_to_irc(content: _Event) -> tuple[str, bool]:
|
||||
return content.body[str], False
|
||||
|
||||
|
||||
class _MediaProxyHandler(BaseHTTPRequestHandler):
|
||||
irc: Matrix
|
||||
|
||||
def do_GET(self) -> None:
|
||||
try:
|
||||
with self.irc._download_media('mxc:/' + self.path) as resp:
|
||||
if resp.status_code != 200:
|
||||
self.send_error(resp.status_code)
|
||||
return
|
||||
|
||||
self.send_response(200)
|
||||
self.send_header('X-Content-Type-Options', 'nosniff')
|
||||
self.send_header('Content-Security-Policy',
|
||||
"default-src 'none'")
|
||||
|
||||
if 'Content-Length' in resp.headers:
|
||||
self.send_header('Content-Length',
|
||||
resp.headers['Content-Length'])
|
||||
|
||||
# Only allow probably safe content types
|
||||
content_type = resp.headers.get('Content-Type', '')
|
||||
if (content_type.startswith(('image/', 'audio/', 'video/')) or
|
||||
content_type == 'text/plain'):
|
||||
self.send_header('Content-Type', content_type)
|
||||
else:
|
||||
self.send_header('Content-Type',
|
||||
'application/octet-stream')
|
||||
|
||||
self.end_headers()
|
||||
|
||||
# Copy content
|
||||
for chunk in resp.iter_content(8192):
|
||||
self.wfile.write(chunk)
|
||||
except ValueError as exc:
|
||||
self.send_error(400, explain=str(exc))
|
||||
return
|
||||
|
||||
def log_message(self, format: str, *args) -> None:
|
||||
if self.irc.debug:
|
||||
super().log_message(format, *args)
|
||||
|
||||
|
||||
class _InvalidEventError(Exception):
|
||||
pass
|
||||
|
||||
@@ -359,9 +434,15 @@ class Matrix(miniirc.IRC):
|
||||
connected: Optional[bool]
|
||||
msglen = 4096
|
||||
|
||||
def __init__(self, ip: str, port: int = 0, nick: str = '', *args,
|
||||
auto_connect: bool = True,
|
||||
token: Optional[str] = None, **kwargs):
|
||||
def __init__(
|
||||
self, ip: str, port: int = 0, nick: str = '', *args,
|
||||
auto_connect: bool = True,
|
||||
token: Optional[str] = None,
|
||||
media_proxy_port: Optional[int] = None,
|
||||
media_proxy_url: Optional[str] = None,
|
||||
media_proxy_key: Optional[bytes] = None,
|
||||
**kwargs
|
||||
) -> None:
|
||||
# Cache _get_room_url
|
||||
# This is done here so that each class instance gets its own cache and
|
||||
# the cache doesn't store class instances.
|
||||
@@ -381,6 +462,14 @@ class Matrix(miniirc.IRC):
|
||||
if token:
|
||||
self.token = token
|
||||
|
||||
self._media_proxy: Optional[ThreadingHTTPServer] = None
|
||||
self._media_proxy_port = media_proxy_port
|
||||
if media_proxy_port and not media_proxy_url:
|
||||
media_proxy_url = f'http://127.0.0.1:{media_proxy_port}'
|
||||
self._media_proxy_url = media_proxy_url and media_proxy_url.rstrip('/')
|
||||
if media_proxy_port is not None:
|
||||
self._media_proxy_key = media_proxy_key or os.urandom(32)
|
||||
|
||||
# Stop miniirc from trying to access the (non-existent) socket
|
||||
kwargs['ping_interval'] = kwargs['ping_timeout'] = None
|
||||
super().__init__(ip, port, nick, *args, auto_connect=False, **kwargs)
|
||||
@@ -425,7 +514,7 @@ class Matrix(miniirc.IRC):
|
||||
raise ValueError(f'Status code {res.status_code} returned')
|
||||
|
||||
self._baseurl = f'{baseurl}/_matrix/client/{api_version}'
|
||||
self._media_baseurl = f'{baseurl}/_matrix/media/{api_version}'
|
||||
self._media_baseurl = f'{baseurl}/_matrix/client/v1/media'
|
||||
|
||||
def __get(self, endpoint: str, timeout: int = 5, /,
|
||||
**params: Optional[str | int]) -> Any:
|
||||
@@ -454,6 +543,23 @@ class Matrix(miniirc.IRC):
|
||||
|
||||
return f'rooms/{_url_quote(room_id)}'
|
||||
|
||||
def __make_url_digest(self, path: str) -> str:
|
||||
return hmac.digest(self._media_proxy_key, path.encode('ascii'),
|
||||
'sha256').hex()
|
||||
|
||||
def _download_media(self, url: str) -> requests.Response:
|
||||
url_base, _, key = url.partition('?key=')
|
||||
match = _media_url_re.match(url_base)
|
||||
if not match:
|
||||
raise ValueError('Invalid media URL')
|
||||
|
||||
path = match.group(1)
|
||||
if not hmac.compare_digest(self.__make_url_digest(path), key):
|
||||
raise ValueError('Invalid key parameter')
|
||||
|
||||
return self.__session.get(f'{self._media_baseurl}/download/{path}',
|
||||
timeout=15, stream=True)
|
||||
|
||||
@functools.cached_property
|
||||
def current_nick(self) -> str:
|
||||
return self.__get('account/whoami')['user_id']
|
||||
@@ -462,6 +568,7 @@ class Matrix(miniirc.IRC):
|
||||
if self.connected is not None:
|
||||
return
|
||||
with self._send_lock:
|
||||
self.connected = False
|
||||
self._update_baseurl()
|
||||
self.active_caps = self.ircv3_caps & {
|
||||
'account-tag', 'echo-message', 'message-tags',
|
||||
@@ -470,16 +577,34 @@ class Matrix(miniirc.IRC):
|
||||
self.debug('Starting main loop (Matrix)')
|
||||
self._start_main_loop()
|
||||
|
||||
if self._media_proxy_port:
|
||||
self.debug('Starting media proxy')
|
||||
|
||||
class _handler(_MediaProxyHandler):
|
||||
irc = self
|
||||
|
||||
self._media_proxy = ThreadingHTTPServer(
|
||||
('127.0.0.1', self._media_proxy_port),
|
||||
_handler,
|
||||
)
|
||||
th = threading.Thread(target=self._media_proxy.serve_forever)
|
||||
th.daemon = True
|
||||
th.start()
|
||||
|
||||
def disconnect(self) -> None:
|
||||
self.connected = False
|
||||
with self._send_lock:
|
||||
self.connected = False
|
||||
if self._media_proxy is not None:
|
||||
self._media_proxy.shutdown()
|
||||
self._media_proxy = None
|
||||
|
||||
def _main(self) -> None:
|
||||
try:
|
||||
self.__numeric('001', f'Welcome to Matrix {self.current_nick}')
|
||||
|
||||
next_batch: Optional[str] = None
|
||||
self.connected = True
|
||||
while self.connected:
|
||||
req_time = time.monotonic()
|
||||
try:
|
||||
res = self.__get('sync', 35, timeout='30000',
|
||||
since=next_batch)
|
||||
@@ -495,6 +620,14 @@ class Matrix(miniirc.IRC):
|
||||
if self.debug_file:
|
||||
self.debug(json.dumps(res, indent=4))
|
||||
if 'error' in res:
|
||||
# TODO: Use self.debug or something
|
||||
print(f'[miniirc_matrix] Error returned when trying to '
|
||||
f'fetch /sync: {res["error"]!r}')
|
||||
|
||||
if self.persist:
|
||||
self.debug('Trying again in 15 seconds...')
|
||||
time.sleep(15)
|
||||
continue
|
||||
break
|
||||
next_batch = res['next_batch']
|
||||
if 'rooms' in res:
|
||||
@@ -558,13 +691,16 @@ class Matrix(miniirc.IRC):
|
||||
msgtype = 'm.text'
|
||||
|
||||
params: dict[str, Any]
|
||||
if html_msg := _irc_to_html(msg):
|
||||
html_msg, mentions = _irc_to_html(msg)
|
||||
if html_msg:
|
||||
params = {
|
||||
'msgtype': msgtype,
|
||||
'body': _formatting_re.sub('', msg),
|
||||
'body': _invisible_formatting_re.sub('', msg),
|
||||
'format': 'org.matrix.custom.html',
|
||||
'formatted_body': html_msg,
|
||||
}
|
||||
if mentions:
|
||||
params['m.mentions'] = {'user_ids': list(mentions)}
|
||||
else:
|
||||
# No formatting
|
||||
params = {'msgtype': msgtype, 'body': msg}
|
||||
@@ -650,8 +786,10 @@ class Matrix(miniirc.IRC):
|
||||
msg: str
|
||||
if 'url' in content:
|
||||
msg = content.url[str]
|
||||
if msg.startswith('mxc://'):
|
||||
msg = f'{self._media_baseurl}/download/{msg[6:]}'
|
||||
if self._media_proxy_url and (match := _media_url_re.match(msg)):
|
||||
path = match.group(1)
|
||||
key = self.__make_url_digest(path)
|
||||
msg = f'{self._media_proxy_url}/{path}?key={key}'
|
||||
else:
|
||||
msg, html_parsed_ok = _matrix_html_to_irc(content)
|
||||
|
||||
|
||||
2
setup.py
2
setup.py
@@ -5,7 +5,7 @@ from setuptools import setup
|
||||
|
||||
setup(
|
||||
name='miniirc_matrix',
|
||||
version='0.0.7',
|
||||
version='0.0.13',
|
||||
py_modules=['miniirc_matrix'],
|
||||
author='luk3yx',
|
||||
description='A Matrix wrapper for miniirc.',
|
||||
|
||||
28
test_formatting.py
Normal file
28
test_formatting.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from miniirc_matrix import _Event, _irc_to_html, _matrix_html_to_irc
|
||||
|
||||
|
||||
def test_irc_to_html():
|
||||
assert _irc_to_html('Hello world!') == (None, set())
|
||||
assert _irc_to_html('\x02Bold') == ('<strong>Bold</strong>', set())
|
||||
assert (_irc_to_html('\x021 \x1d2\x02 3') ==
|
||||
('<strong>1 <em>2</em></strong><em> 3</em>', set()))
|
||||
|
||||
assert (_irc_to_html('@test:example.com: \x1dHello') ==
|
||||
('<a href="https://matrix.to/#/@test:example.com">'
|
||||
'@test:example.com</a>: <em>Hello</em>', {'@test:example.com'}))
|
||||
|
||||
|
||||
def html_to_irc(html):
|
||||
res, html_parsed_ok = _matrix_html_to_irc(_Event({
|
||||
'format': 'org.matrix.custom.html',
|
||||
'formatted_body': html,
|
||||
}))
|
||||
assert html_parsed_ok
|
||||
return res
|
||||
|
||||
|
||||
def test_html_to_irc():
|
||||
assert html_to_irc('Hello <b>world</b>!') == 'Hello \x02world\x02!'
|
||||
assert html_to_irc('Hello\nworld!') == 'Hello\nworld!'
|
||||
assert html_to_irc('Hello<br>world!') == 'Hello\nworld!'
|
||||
assert html_to_irc('Hello<br/>world!') == 'Hello\nworld!'
|
||||
Reference in New Issue
Block a user