media: support voice messages

Signed-off-by: Sumner Evans <sumner.evans@automattic.com>
This commit is contained in:
Sumner Evans
2024-06-20 10:30:39 -06:00
parent 16706d8338
commit b568ef8d8c
7 changed files with 173 additions and 16 deletions

5
go.mod
View File

@@ -5,15 +5,17 @@ go 1.21
require (
github.com/gotd/td v0.102.0
github.com/rs/zerolog v1.33.0
github.com/stretchr/testify v1.9.0
go.mau.fi/util v0.5.0
go.mau.fi/zerozap v0.1.1
go.uber.org/zap v1.27.0
maunium.net/go/mautrix v0.19.0-beta.1.0.20240619154325-69e2b42d857a
maunium.net/go/mautrix v0.19.0-beta.1.0.20240620160042-8e1fdfda2c1e
)
require (
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-faster/errors v0.7.1 // indirect
github.com/go-faster/jx v1.1.0 // indirect
github.com/go-faster/xor v1.0.0 // indirect
@@ -26,6 +28,7 @@ require (
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.19 // indirect
github.com/mattn/go-sqlite3 v1.14.22 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rs/xid v1.5.0 // indirect
github.com/segmentio/asm v1.2.0 // indirect
github.com/skip2/go-qrcode v0.0.0-20200617195104-da1b6568686e // indirect

4
go.sum
View File

@@ -108,8 +108,8 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
maunium.net/go/mauflag v1.0.0 h1:YiaRc0tEI3toYtJMRIfjP+jklH45uDHtT80nUamyD4M=
maunium.net/go/mauflag v1.0.0/go.mod h1:nLivPOpTpHnpzEh8jEdSL9UqO9+/KBJFmNRlwKfkPeA=
maunium.net/go/mautrix v0.19.0-beta.1.0.20240619154325-69e2b42d857a h1:g2X/TEW9MR9lfn4RUHUGcpta9FmFes62/4OEEVEKFJg=
maunium.net/go/mautrix v0.19.0-beta.1.0.20240619154325-69e2b42d857a/go.mod h1:cxv1w6+syudmEpOewHYIQT9yO7TM5UOWmf6xEBVI4H4=
maunium.net/go/mautrix v0.19.0-beta.1.0.20240620160042-8e1fdfda2c1e h1:f5Y1xtNziKs7heoTNL5q86+dDG7kyJVNY3/C+WrefQg=
maunium.net/go/mautrix v0.19.0-beta.1.0.20240620160042-8e1fdfda2c1e/go.mod h1:cxv1w6+syudmEpOewHYIQT9yO7TM5UOWmf6xEBVI4H4=
nhooyr.io/websocket v1.8.11 h1:f/qXNc2/3DpoSZkHt1DQu6rj4zGC8JmkkLkWss0MgN0=
nhooyr.io/websocket v1.8.11/go.mod h1:rN9OFWIUwuxg4fR5tELlYC04bXYowCP9GX47ivo2l+c=
rsc.io/qr v0.2.0 h1:6vBLea5/NRMVTz8V66gipeLycZMl/+UlFmk8DvqQ6WY=

View File

@@ -16,6 +16,7 @@ import (
"maunium.net/go/mautrix/event"
"go.mau.fi/mautrix-telegram/pkg/connector/ids"
"go.mau.fi/mautrix-telegram/pkg/connector/waveform"
)
func getMediaFilenameAndCaption(content *event.MessageEventContent) (filename, caption string) {
@@ -48,7 +49,6 @@ func (t *TelegramClient) HandleMatrixMessage(ctx context.Context, msg *bridgev2.
case event.MsgImage, event.MsgFile, event.MsgAudio, event.MsgVideo:
filename, caption := getMediaFilenameAndCaption(msg.Content)
// TODO stream this download straight into the uploader
var fileData []byte
fileData, err = t.main.Bridge.Bot.DownloadMedia(ctx, msg.Content.URL, msg.Content.File)
if err != nil {
@@ -65,13 +65,35 @@ func (t *TelegramClient) HandleMatrixMessage(ctx context.Context, msg *bridgev2.
// TODO resolver?
styling = append(styling, html.String(nil, caption))
}
if msg.Content.MsgType == event.MsgImage {
updates, err = builder.Media(ctx, message.UploadedPhoto(upload, styling...))
break
} else {
document := message.UploadedDocument(upload, styling...).
Filename(filename).
MIME(msg.Content.Info.MimeType)
updates, err = builder.Media(ctx, document)
document := message.UploadedDocument(upload, styling...).Filename(filename)
if msg.Content.Info != nil {
document.MIME(msg.Content.Info.MimeType)
}
var media message.MediaOption
switch msg.Content.MsgType {
case event.MsgAudio:
audioBuilder := document.Audio()
if msg.Content.MSC1767Audio != nil {
audioBuilder.Duration(time.Duration(msg.Content.MSC1767Audio.Duration) * time.Millisecond)
if len(msg.Content.MSC1767Audio.Waveform) > 0 {
audioBuilder.Waveform(waveform.Encode(msg.Content.MSC1767Audio.Waveform))
}
}
if msg.Content.MSC3245Voice != nil {
audioBuilder.Voice()
}
media = audioBuilder
default:
media = document
}
updates, err = builder.Media(ctx, media)
}
default:
return nil, fmt.Errorf("unsupported message type %s", msg.Content.MsgType)

View File

@@ -56,6 +56,8 @@ func (mc *MessageConverter) convertMedia(ctx context.Context, portal *bridgev2.P
var partID networkid.PartID
var msgType event.MessageType
var filename string
var audio *event.MSC1767Audio
var voice *event.MSC3245Voice
// Determine the filename and some other information
switch media := media.(type) {
@@ -72,9 +74,22 @@ func (mc *MessageConverter) convertMedia(ctx context.Context, portal *bridgev2.P
}
for _, attr := range document.GetAttributes() {
if f, ok := attr.(*tg.DocumentAttributeFilename); ok {
filename = f.GetFileName()
break
switch a := attr.(type) {
case *tg.DocumentAttributeFilename:
filename = a.GetFileName()
case *tg.DocumentAttributeAudio:
msgType = event.MsgAudio
audio = &event.MSC1767Audio{
Duration: a.Duration * 1000,
}
if waveform, ok := a.GetWaveform(); ok {
for _, v := range waveform {
audio.Waveform = append(audio.Waveform, int(v)<<5)
}
}
if a.Voice {
voice = &event.MSC3245Voice{}
}
}
}
@@ -170,12 +185,13 @@ func (mc *MessageConverter) convertMedia(ctx context.Context, portal *bridgev2.P
extra := map[string]any{}
// Handle spolilers
// Handle spoilers
// See: https://github.com/matrix-org/matrix-spec-proposals/pull/3725
if s, ok := media.(spoilable); ok && s.GetSpoiler() {
extra["town.robin.msc3725.content_warning"] = map[string]any{
"type": "town.robin.msc3725.spoiler",
}
extra["fi.mau.telegram.spoiler"] = true
}
// Handle disappearing messages
@@ -193,10 +209,12 @@ func (mc *MessageConverter) convertMedia(ctx context.Context, portal *bridgev2.P
ID: partID,
Type: event.EventMessage,
Content: &event.MessageEventContent{
MsgType: msgType,
Body: filename,
URL: mxcURI,
File: encryptedFileInfo,
MsgType: msgType,
Body: filename,
URL: mxcURI,
File: encryptedFileInfo,
MSC1767Audio: audio,
MSC3245Voice: voice,
},
Extra: extra,
}, disappearingSetting, nil

View File

@@ -0,0 +1,2 @@
go test fuzz v1
[]byte("00")

View File

@@ -0,0 +1,66 @@
// Package waveform implements encoding and decoding of a Telegram waveform.
//
// Telegram waveforms consist of packed 5-bit values. The values are packed
// into a byte stream, meaning that the actual values cross the byte boundary.
//
// The following diagram explains the format:
//
// [210|43210][0|43210|43][3210|4321][10|43210|4]...
// [111|00000][3|22222|11][4444|3333][66|55555|4]...
//
// Explanation of diagram:
// - The []'s enclose byte boundaries.
// - The |s represent separation between waveform values.
// - The numbers in the first row indicate the binary power.
// - The numbers in the second row indicate the corresponding waveform index.
package waveform
import "math"
// NormalizeWaveform normalizes a waveform by bounding the values to the range
// [0, 32] which is required for the encoding to work.
func NormalizeWaveform(waveform []int) (normalized []byte) {
normalized = make([]byte, len(waveform))
var waveformMax int
for _, v := range waveform {
waveformMax = max(waveformMax, v)
}
for i, v := range waveform {
normalized[i] = byte(math.Round(float64(v) / float64(max(waveformMax/256, 1))))
}
return
}
// Encode normalizes and encodes the input Matrix waveform into a Telegram
// waveform.
func Encode(waveform []int) []byte {
bytesCount := (len(waveform)*5 + 7) / 8
result := make([]byte, bytesCount+1)
var bitShift int
for i, v := range NormalizeWaveform(waveform) {
result[i*5/8] |= v << bitShift
result[i*5/8+1] |= v >> (8 - bitShift)
bitShift = (bitShift + 5) % 8
}
return result[:bytesCount]
}
// Decode decodes a Telegram waveform into a waveform usable by Matrix.
func Decode(waveform []byte) []int {
numValues := len(waveform) * 8 / 5
result := make([]int, numValues)
var bitShift int
for i := 0; i < numValues; i++ {
var val byte
val |= waveform[i*5/8] >> bitShift
if i*5/8+1 < len(waveform) {
val |= waveform[i*5/8+1] << (8 - bitShift)
}
result[i] = int(val) & 0b00011111
bitShift = (bitShift + 5) % 8
}
return result
}

View File

@@ -0,0 +1,46 @@
package waveform_test
import (
"testing"
"github.com/stretchr/testify/assert"
"go.mau.fi/mautrix-telegram/pkg/connector/waveform"
)
func TestEncode(t *testing.T) {
assert.Equal(t, []byte{0x01}, waveform.Encode([]int{1}))
assert.Equal(t, []byte{0xff, 0x03}, waveform.Encode([]int{31, 31}))
assert.Equal(t, []byte{0x41, 0x0c, 0x52, 0xcc, 0x41}, waveform.Encode([]int{1, 2, 3, 4, 5, 6, 7, 8}))
assert.Equal(t, []byte{0xff, 0xff, 0xff, 0xff, 0xff}, waveform.Encode([]int{31, 31, 31, 31, 31, 31, 31, 31}))
}
func TestDecode(t *testing.T) {
// assert.Equal(t, []int{0x01}, waveform.Decode([]byte{1}))
// assert.Equal(t, []int{0x01, 0x10, 0x00}, waveform.Decode([]byte{1, 2}))
// assert.Equal(t, []int{0x01, 0x10, 0x00, 0x06, 0x00, 0x02, 0x14, 0x00}, waveform.Decode([]byte{1, 2, 3, 4, 5}))
}
func FuzzRoundtrip(f *testing.F) {
f.Add([]byte{0x01})
f.Fuzz(func(t *testing.T, w []byte) {
wf := make([]int, len(w))
for i, v := range waveform.NormalizeWaveform(wf) {
wf[i] = int(v)
}
encoded := waveform.Encode(wf)
decoded := waveform.Decode(encoded)
// Sometimes, the decoded wavefeorm might have an extra value if the
// last value of the encoded waveform is packed into the 3
// least-significant bits of the last byte. In that case, it's unclear
// whether the waveform contains a 0b00000 as the last byte or if there
// shouldn't have been anything there.
if len(wf) != len(decoded) {
assert.Len(t, decoded, len(wf)+1)
wf = append(wf, 0x00)
}
assert.Equal(t, wf, decoded)
})
}