Compare commits

...

75 Commits

Author SHA1 Message Date
Alexey
2df6b8704d BSD Support + Active IP in API + Timeouts tuning + Apple/XNU Connectivity fixes + Admission-timeouts + Global Each TCP Connections: merge pull request #611 from telemt/flow
BSD Support + Active IP in API + Timeouts tuning + Apple/XNU Connectivity fixes + Admission-timeouts + Global Each TCP Connections
2026-03-31 13:10:31 +03:00
Alexey
5f5a046710 Update Cargo.toml + Cargo.lock 2026-03-31 13:04:24 +03:00
Alexey
2dc81ad0e0 API Consistency fixes
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-31 13:03:05 +03:00
Alexey
d8d8534cf8 Update masking_ab_envelope_blur_integration_security_tests.rs 2026-03-31 12:30:43 +03:00
Alexey
6c850e4150 Update Cargo.toml 2026-03-31 11:15:31 +03:00
Alexey
b8cf596e7d Admission-timeouts + Global Each TCP Connections
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-31 11:14:55 +03:00
Alexey
5bf56b6dd8 Update Cargo.toml 2026-03-30 23:36:45 +03:00
Alexey
65da1f91ec Drafting fixes for Apple/XNU Darwin Connectivity issues
Co-Authored-By: Aleksandr Kalashnikov <33665156+sleep3r@users.noreply.github.com>
2026-03-30 23:35:41 +03:00
Alexey
f3e9d00132 Merge pull request #605 from telemt/readme
Readme
2026-03-29 11:52:44 +03:00
Alexey
dee6e13fef Update CONTRIBUTING.md 2026-03-29 01:51:51 +03:00
Alexey
07d774a82a Merge pull request #595 from xaosproxy/fix/apply-tg-connect-timeout-upstream
Apply [timeouts] tg_connect to upstream DC TCP connect attempts
2026-03-28 21:14:51 +03:00
Roman Martynov
618bc7e0b6 Merge branch 'flow' into fix/apply-tg-connect-timeout-upstream 2026-03-28 14:27:47 +03:00
sintanial
d06ac222d6 fix: move tg_connect to general, rustfmt upstream, fix UpstreamManager::new tests
- Relocate tg_connect from [timeouts] to [general] with validation and docs updates.
- Apply rustfmt to per-attempt upstream connect timeout expression in upstream.rs.
- Pass tg_connect_timeout_secs in all UpstreamManager::new test call sites.
- Wire hot reload and runtime snapshot to general.tg_connect.
2026-03-28 14:25:18 +03:00
Alexey
567453e0f8 Merge pull request #596 from xaosproxy/fix/listen_backlog
feat(server): configurable TCP listen_backlog
2026-03-28 12:28:19 +03:00
Alexey
cba837745b Merge pull request #599 from Dimasssss/main
Update FAQ
2026-03-28 12:28:04 +03:00
Dimasssss
876c8f1612 Update FAQ.en.md 2026-03-27 22:26:21 +03:00
Dimasssss
ac8ad864be Update FAQ.ru.md 2026-03-27 22:26:07 +03:00
Alexey
fe56dc7c1a Update README.md 2026-03-27 14:13:08 +03:00
sintanial
96ae01078c feat(server): configurable TCP listen_backlog
Add [server].listen_backlog (default 1024) for client-facing listen(2)
queue size; use the same value for metrics HTTP listeners. Hot reload
logs restart-required when this field changes.
2026-03-27 12:49:53 +03:00
sintanial
3b9919fa4d Apply [timeouts] tg_connect to upstream DC TCP connect attempts
Wire config.timeouts.tg_connect into UpstreamManager; per-attempt timeout uses
the same .max(1) pattern as connect_budget_ms.

Reject timeouts.tg_connect = 0 at config load (consistent with
general.upstream_connect_budget_ms and related checks). Default when the key
is omitted remains default_connect_timeout() via serde.

Fixes telemt/telemt#439
2026-03-27 12:45:19 +03:00
Alexey
6c4a3b59f9 Merge pull request #515 from vkrivopalov/daemonize
Support running TeleMT as a background system service
2026-03-27 11:36:02 +03:00
Alexey
01c3d0a707 Merge branch 'flow' into daemonize 2026-03-27 11:35:52 +03:00
Alexey
fbee4631d6 Merge pull request #588 from amirotin/feat/active-ips-endpoint
feat(api): add GET /v1/stats/users/active-ips endpoint
2026-03-26 11:12:43 +03:00
Mirotin Artem
d0b52ea299 Merge branch 'main' into feat/active-ips-endpoint 2026-03-26 10:00:47 +03:00
Mirotin Artem
677195e587 feat(api): add GET /v1/stats/users/active-ips endpoint
Lightweight endpoint that returns only users with active TCP connections
and their IP addresses. Calls only get_active_ips_for_users() without
collecting recent IPs or building full UserInfo, significantly reducing
CPU and memory overhead compared to /v1/stats/users.
2026-03-26 10:00:29 +03:00
Alexey
a383efcb21 Bounded Hybrid Loop + Watch + Family ArcSwap Snapshots + Health in Parallel + ArcSwap Writers + Registry Split + Endpoint on ArcSwap + New Backpressure Model + ME Decomposition: merge pull request #586 from telemt/flow
Bounded Hybrid Loop + Watch + Family ArcSwap Snapshots + Health in Parallel + ArcSwap Writers + Registry Split + Endpoint on ArcSwap + New Backpressure Model + ME Decomposition
2026-03-26 02:31:18 +03:00
Alexey
cb5753f77c Update admission.rs
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-26 02:19:35 +03:00
Alexey
7a075b2ffe Middle Relay fixes
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-26 02:18:39 +03:00
Alexey
7de822dd15 RPC Proxy-req fixes
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 22:51:00 +03:00
Alexey
1bbf4584a6 Merge branch 'main' into flow 2026-03-25 22:25:58 +03:00
Alexey
70479c4094 Unexpected-only Quarantine
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 22:25:39 +03:00
Alexey
b94746a6e0 Dashmap-driven Routing + Health Parallel + Family Runtime State
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 21:26:20 +03:00
Alexey
ceae1564af Floor Runtime + Writer Selection Policy + Reconnect/Warmup + TransportPolicy + NAT Runtime Cores
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 20:55:20 +03:00
Alexey
7ce5fc66db ME Reinit Core advancing + Binding Policy Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 20:35:57 +03:00
Alexey
41493462a1 Drain + Single-Endpoint Runtime Cores
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 20:29:22 +03:00
Alexey
6ee4d4648c ME Health Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 20:01:44 +03:00
Alexey
97f6649584 ME Route Runtime Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 19:56:25 +03:00
Alexey
dc6b6d3f9d ME Writer Lifecycle Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 19:47:41 +03:00
Alexey
1c3e0d4e46 ME Reinit Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 19:43:02 +03:00
Alexey
0b78583cf5 ME Routing Core
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 18:18:06 +03:00
Alexey
28d318d724 ME Writer Task Consolidation
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 17:59:54 +03:00
Alexey
70c2f0f045 RoutingTable + BindingState
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 17:50:44 +03:00
Alexey
b9b1271f14 Merge pull request #584 from Dimasssss/patch-3
Update CONFIG_PARAMS, QUICK_START_GUIDE and FAQ
2026-03-25 17:44:59 +03:00
Dimasssss
3c734bd811 Update FAQ.en.md 2026-03-25 17:42:16 +03:00
Dimasssss
6391df0583 Update FAQ.ru.md 2026-03-25 17:42:07 +03:00
Dimasssss
6a781c8bc3 Update QUICK_START_GUIDE.en.md 2026-03-25 17:40:45 +03:00
Dimasssss
138652af8e Update QUICK_START_GUIDE.ru.md 2026-03-25 17:40:16 +03:00
Dimasssss
59157d31a6 Update CONFIG_PARAMS.en.md 2026-03-25 17:37:01 +03:00
Alexey
8bab3f70e1 WritersState on ArcSwao + Preferred Endpoint on ArcSwap + Two-map Rotation for Desync Dedup
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 17:25:35 +03:00
Alexey
41d786cc11 Safety Gates Invariants + HybridAsyncPersistent + Watch + Runtime Snapshots + ME Writer Ping Tracker + Parallel Recovery + Backpressure Guardrails
Co-Authored-By: brekotis <93345790+brekotis@users.noreply.github.com>
2026-03-25 16:29:35 +03:00
Alexey
c43de1bd2a Update release.yml 2026-03-24 22:36:25 +03:00
Alexey
101efe45b7 Update Dockerfile 2026-03-24 22:36:20 +03:00
Alexey
11df61c6ac Update release.yml 2026-03-24 22:18:34 +03:00
Alexey
08684bcbd2 Update Cargo.toml 2026-03-24 22:03:12 +03:00
Alexey
744fb4425f TLS Validator: Unknown SNI as WARN in Log: merge pull request #579 from telemt/flow
TLS Validator: Unknown SNI as WARN in Log
2026-03-24 22:01:09 +03:00
Alexey
80cb1bc221 Merge branch 'main' into flow 2026-03-24 22:00:51 +03:00
Alexey
8461556b02 Update release.yml 2026-03-24 22:00:32 +03:00
Alexey
cfd516edf3 Update Cargo.toml 2026-03-24 21:41:14 +03:00
Alexey
803c2c0492 Update release.yml 2026-03-24 21:40:53 +03:00
Alexey
b762bd029f Merge branch 'main' into flow 2026-03-24 21:18:54 +03:00
Alexey
761679d306 Update test.yml 2026-03-24 21:18:13 +03:00
Alexey
41668b153d Update test.yml 2026-03-24 21:14:12 +03:00
Alexey
1d2f88ad29 Merge branch 'main' into flow 2026-03-24 21:11:11 +03:00
Alexey
80917f5abc Update test.yml 2026-03-24 21:10:56 +03:00
Alexey
dc61d300ab Bump 2026-03-24 21:02:43 +03:00
Alexey
ae16080de5 TLS Validator: Unknown SNI as WARN in Log 2026-03-24 21:01:41 +03:00
Alexey
b8ca1fc166 Update Dockerfile 2026-03-24 20:55:32 +03:00
Alexey
f9986944df Update release.yml 2026-03-24 20:53:56 +03:00
Alexey
cb877c2bc3 Update release profile settings for better optimization: merge pull request #574 from vladon/main
Update release profile settings for better optimization
2026-03-24 14:10:04 +03:00
Vladislav Yaroslavlev
4426082c17 Update release profile settings for better optimization 2026-03-24 14:01:49 +03:00
Vladimir Krivopalov
95685adba7 Add multi-destination logging: syslog and file support
Implement logging infrastructure for non-systemd platforms:

- Add src/logging.rs with syslog and file logging support
- New CLI flags: --syslog, --log-file, --log-file-daily
- Syslog uses libc directly with LOG_DAEMON facility
- File logging via tracing-appender with optional daily rotation

Update service scripts:
- OpenRC and FreeBSD rc.d now use --syslog by default
- Ensures logs are captured on platforms without journald

Default (stderr) behavior unchanged for systemd compatibility.
Log destination is selected at startup based on CLI flags.

Signed-off-by: Vladimir Krivopalov <argenet@yandex.ru>
2026-03-21 21:09:29 +02:00
Vladimir Krivopalov
909714af31 Add multi-platform service manager integration
Implement automatic init system detection and service file generation
for systemd, OpenRC (Alpine/Gentoo), and FreeBSD rc.d:

- Add src/service module with init system detection and generators
- Auto-detect init system via filesystem probes
- Generate platform-appropriate service files during --init

systemd enhancements:
- ExecReload for SIGHUP config reload
- PIDFile directive
- Comprehensive security hardening (ProtectKernelTunables,
  RestrictAddressFamilies, MemoryDenyWriteExecute, etc.)
- CAP_NET_BIND_SERVICE for privileged ports

OpenRC support:
- Standard openrc-run script with depend/reload functions
- Directory setup in start_pre

FreeBSD rc.d support:
- rc.subr integration with rc.conf variables
- reload extra command

The --init command now detects the init system and runs the
appropriate enable/start commands (systemctl, rc-update, sysrc).

Signed-off-by: Vladimir Krivopalov <argenet@yandex.ru>
2026-03-21 21:09:29 +02:00
Vladimir Krivopalov
dc2b4395bd Add daemon lifecycle subcommands: start, stop, reload, status
Implement CLI subcommands for managing telemt as a daemon:

- `start [config.toml]` - Start as background daemon (implies --daemon)
- `stop` - Stop running daemon by sending SIGTERM
- `reload` - Reload configuration by sending SIGHUP
- `status` - Check if daemon is running via PID file

Subcommands use the PID file (default /var/run/telemt.pid) to locate
the running daemon. Stop command waits up to 10 seconds for graceful
shutdown. Status cleans up stale PID files automatically.

Updated help text with subcommand documentation and usage examples.
Exit codes follow Unix convention: 0 for success, 1 for not running
or error.

Signed-off-by: Vladimir Krivopalov <argenet@yandex.ru>
2026-03-21 21:09:29 +02:00
Vladimir Krivopalov
39875afbff Add comprehensive Unix signal handling for daemon mode
Enhance signal handling to support proper daemon operation:

- SIGTERM: Graceful shutdown (same behavior as SIGINT)
- SIGQUIT: Graceful shutdown with full statistics dump
- SIGUSR1: Log rotation acknowledgment for external tools
- SIGUSR2: Dump runtime status to log without stopping

Statistics dump includes connection counts, ME keepalive metrics,
and relay adaptive tuning counters. SIGHUP config reload unchanged
(handled in hot_reload.rs).

Signals are handled via tokio::signal::unix with async select!
to avoid blocking the runtime. Non-shutdown signals (USR1/USR2)
run in a background task spawned at startup.

Signed-off-by: Vladimir Krivopalov <argenet@yandex.ru>
2026-03-21 21:09:29 +02:00
Vladimir Krivopalov
2ea7813ed4 Add Unix daemon mode with PID file and privilege dropping
Implement core daemon infrastructure for running telemt as a background
  service on Unix platforms (Linux, FreeBSD, etc.):

  - Add src/daemon module with classic double-fork daemonization
  - Implement flock-based PID file management to prevent duplicate instances
  - Add privilege dropping (setuid/setgid) after socket binding
  - New CLI flags: --daemon, --foreground, --pid-file, --run-as-user,
    --run-as-group, --working-dir

  Daemonization occurs before tokio runtime starts to ensure clean fork.
  PID file uses exclusive locking to detect already-running instances.
  Privilege dropping happens after bind_listeners() to allow binding
  to privileged ports (< 1024) before switching to unprivileged user.

Signed-off-by: Vladimir Krivopalov <argenet@yandex.ru>
2026-03-21 21:09:29 +02:00
89 changed files with 5957 additions and 1722 deletions

View File

@@ -5,27 +5,69 @@ on:
tags:
- '[0-9]+.[0-9]+.[0-9]+'
workflow_dispatch:
inputs:
tag:
description: 'Release tag (example: 3.3.15)'
required: true
type: string
concurrency:
group: release-${{ github.ref }}
group: release-${{ github.ref_name }}-${{ github.event.inputs.tag || 'auto' }}
cancel-in-progress: true
permissions:
contents: read
packages: write
env:
CARGO_TERM_COLOR: always
BINARY_NAME: telemt
jobs:
# ==========================
# GNU / glibc
# ==========================
build-gnu:
name: GNU ${{ matrix.target }}
prepare:
name: Prepare
runs-on: ubuntu-latest
outputs:
version: ${{ steps.vars.outputs.version }}
prerelease: ${{ steps.vars.outputs.prerelease }}
steps:
- name: Resolve version
id: vars
shell: bash
run: |
set -euo pipefail
if [ "${GITHUB_EVENT_NAME}" = "workflow_dispatch" ]; then
VERSION="${{ github.event.inputs.tag }}"
else
VERSION="${GITHUB_REF#refs/tags/}"
fi
VERSION="${VERSION#refs/tags/}"
if [ -z "${VERSION}" ]; then
echo "Release version is empty" >&2
exit 1
fi
if [[ "${VERSION}" == *-* ]]; then
PRERELEASE=true
else
PRERELEASE=false
fi
echo "version=${VERSION}" >> "${GITHUB_OUTPUT}"
echo "prerelease=${PRERELEASE}" >> "${GITHUB_OUTPUT}"
# ==========================
# GNU / glibc
# ==========================
build-gnu:
name: GNU ${{ matrix.asset }}
runs-on: ubuntu-latest
needs: prepare
container:
image: rust:slim-bookworm
@@ -35,8 +77,15 @@ jobs:
include:
- target: x86_64-unknown-linux-gnu
asset: telemt-x86_64-linux-gnu
cpu: baseline
- target: x86_64-unknown-linux-gnu
asset: telemt-x86_64-v3-linux-gnu
cpu: v3
- target: aarch64-unknown-linux-gnu
asset: telemt-aarch64-linux-gnu
cpu: generic
steps:
- uses: actions/checkout@v4
@@ -62,64 +111,88 @@ jobs:
- uses: actions/cache@v4
with:
path: |
~/.cargo/registry
~/.cargo/git
/usr/local/cargo/registry
/usr/local/cargo/git
target
key: gnu-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }}
key: gnu-${{ matrix.asset }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
gnu-${{ matrix.asset }}-
gnu-
- name: Build
shell: bash
run: |
set -euo pipefail
if [ "${{ matrix.target }}" = "aarch64-unknown-linux-gnu" ]; then
export CC=aarch64-linux-gnu-gcc
export CXX=aarch64-linux-gnu-g++
export RUSTFLAGS="-C linker=aarch64-linux-gnu-gcc"
export RUSTFLAGS="-C linker=aarch64-linux-gnu-gcc -C lto=fat -C panic=abort"
else
export CC=clang
export CXX=clang++
export RUSTFLAGS="-C linker=clang -C link-arg=-fuse-ld=lld"
if [ "${{ matrix.cpu }}" = "v3" ]; then
CPU_FLAGS="-C target-cpu=x86-64-v3"
else
CPU_FLAGS="-C target-cpu=x86-64"
fi
export RUSTFLAGS="-C linker=clang -C link-arg=-fuse-ld=lld -C lto=fat -C panic=abort ${CPU_FLAGS}"
fi
cargo build --release --target ${{ matrix.target }}
cargo build --release --target ${{ matrix.target }} -j "$(nproc)"
- name: Package
shell: bash
run: |
set -euo pipefail
mkdir -p dist
cp target/${{ matrix.target }}/release/${{ env.BINARY_NAME }} dist/telemt
cp "target/${{ matrix.target }}/release/${{ env.BINARY_NAME }}" dist/telemt
cd dist
tar -czf ${{ matrix.asset }}.tar.gz \
tar -czf "${{ matrix.asset }}.tar.gz" \
--owner=0 --group=0 --numeric-owner \
telemt
sha256sum ${{ matrix.asset }}.tar.gz > ${{ matrix.asset }}.sha256
sha256sum "${{ matrix.asset }}.tar.gz" > "${{ matrix.asset }}.tar.gz.sha256"
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.asset }}
path: dist/*
# ==========================
# MUSL
# ==========================
# ==========================
# MUSL
# ==========================
build-musl:
name: MUSL ${{ matrix.target }}
name: MUSL ${{ matrix.asset }}
runs-on: ubuntu-latest
needs: prepare
container:
image: rust:slim-bookworm
strategy:
fail-fast: false
matrix:
include:
- target: x86_64-unknown-linux-musl
asset: telemt-x86_64-linux-musl
cpu: baseline
- target: x86_64-unknown-linux-musl
asset: telemt-x86_64-v3-linux-musl
cpu: v3
- target: aarch64-unknown-linux-musl
asset: telemt-aarch64-linux-musl
cpu: generic
steps:
- uses: actions/checkout@v4
- name: Install deps
run: |
apt-get update
@@ -127,89 +200,104 @@ jobs:
musl-tools \
pkg-config \
curl
- uses: actions/cache@v4
if: matrix.target == 'aarch64-unknown-linux-musl'
with:
path: ~/.musl-aarch64
key: musl-toolchain-aarch64-v1
- name: Install aarch64 musl toolchain
if: matrix.target == 'aarch64-unknown-linux-musl'
shell: bash
run: |
set -e
set -euo pipefail
TOOLCHAIN_DIR="$HOME/.musl-aarch64"
ARCHIVE="aarch64-linux-musl-cross.tgz"
URL="https://github.com/telemt/telemt/releases/download/toolchains/$ARCHIVE"
if [ -x "$TOOLCHAIN_DIR/bin/aarch64-linux-musl-gcc" ]; then
echo "MUSL toolchain cached"
URL="https://github.com/telemt/telemt/releases/download/toolchains/${ARCHIVE}"
if [ -x "${TOOLCHAIN_DIR}/bin/aarch64-linux-musl-gcc" ]; then
echo "MUSL toolchain cached"
else
echo "⬇️ Downloading MUSL toolchain..."
curl -fL \
--retry 5 \
--retry-delay 3 \
--connect-timeout 10 \
--max-time 120 \
-o "$ARCHIVE" "$URL"
mkdir -p "$TOOLCHAIN_DIR"
tar -xzf "$ARCHIVE" --strip-components=1 -C "$TOOLCHAIN_DIR"
-o "${ARCHIVE}" "${URL}"
mkdir -p "${TOOLCHAIN_DIR}"
tar -xzf "${ARCHIVE}" --strip-components=1 -C "${TOOLCHAIN_DIR}"
fi
echo "$TOOLCHAIN_DIR/bin" >> $GITHUB_PATH
echo "${TOOLCHAIN_DIR}/bin" >> "${GITHUB_PATH}"
- name: Add rust target
run: rustup target add ${{ matrix.target }}
- uses: actions/cache@v4
with:
path: |
/usr/local/cargo/registry
/usr/local/cargo/git
target
key: musl-${{ matrix.target }}-${{ hashFiles('**/Cargo.lock') }}
key: musl-${{ matrix.asset }}-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
musl-${{ matrix.asset }}-
musl-
- name: Build
shell: bash
run: |
set -euo pipefail
if [ "${{ matrix.target }}" = "aarch64-unknown-linux-musl" ]; then
export CC=aarch64-linux-musl-gcc
export CC_aarch64_unknown_linux_musl=aarch64-linux-musl-gcc
export RUSTFLAGS="-C target-feature=+crt-static -C linker=aarch64-linux-musl-gcc"
export RUSTFLAGS="-C target-feature=+crt-static -C linker=aarch64-linux-musl-gcc -C lto=fat -C panic=abort"
else
export CC=musl-gcc
export CC_x86_64_unknown_linux_musl=musl-gcc
export RUSTFLAGS="-C target-feature=+crt-static"
if [ "${{ matrix.cpu }}" = "v3" ]; then
CPU_FLAGS="-C target-cpu=x86-64-v3"
else
CPU_FLAGS="-C target-cpu=x86-64"
fi
export RUSTFLAGS="-C target-feature=+crt-static -C lto=fat -C panic=abort ${CPU_FLAGS}"
fi
cargo build --release --target ${{ matrix.target }}
cargo build --release --target ${{ matrix.target }} -j "$(nproc)"
- name: Package
shell: bash
run: |
set -euo pipefail
mkdir -p dist
cp target/${{ matrix.target }}/release/${{ env.BINARY_NAME }} dist/telemt
cp "target/${{ matrix.target }}/release/${{ env.BINARY_NAME }}" dist/telemt
cd dist
tar -czf ${{ matrix.asset }}.tar.gz \
tar -czf "${{ matrix.asset }}.tar.gz" \
--owner=0 --group=0 --numeric-owner \
telemt
sha256sum ${{ matrix.asset }}.tar.gz > ${{ matrix.asset }}.sha256
sha256sum "${{ matrix.asset }}.tar.gz" > "${{ matrix.asset }}.tar.gz.sha256"
- uses: actions/upload-artifact@v4
with:
name: ${{ matrix.asset }}
path: dist/*
# ==========================
# Release
# ==========================
# ==========================
# Release
# ==========================
release:
name: Release
runs-on: ubuntu-latest
needs: [build-gnu, build-musl]
needs: [prepare, build-gnu, build-musl]
permissions:
contents: write
@@ -219,73 +307,99 @@ jobs:
with:
path: artifacts
- name: Flatten
- name: Flatten artifacts
shell: bash
run: |
mkdir dist
set -euo pipefail
mkdir -p dist
find artifacts -type f -exec cp {} dist/ \;
- name: Create Release
- name: Create GitHub Release
uses: softprops/action-gh-release@v2
with:
tag_name: ${{ needs.prepare.outputs.version }}
target_commitish: ${{ github.sha }}
files: dist/*
generate_release_notes: true
prerelease: ${{ contains(github.ref, '-') }}
prerelease: ${{ needs.prepare.outputs.prerelease == 'true' }}
overwrite_files: true
# ==========================
# Docker
# ==========================
# ==========================
# Docker
# ==========================
docker:
name: Docker (${{ matrix.platform }})
name: Docker
runs-on: ubuntu-latest
needs: [build-gnu, build-musl]
strategy:
fail-fast: false
matrix:
include:
- platform: linux/amd64
artifact: telemt-x86_64-linux-musl
- platform: linux/arm64
artifact: telemt-aarch64-linux-musl
needs: [prepare, release]
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v4
- uses: actions/download-artifact@v4
with:
name: ${{ matrix.artifact }}
path: dist
- name: Extract binary
run: |
tar -xzf dist/${{ matrix.artifact }}.tar.gz -C dist
chmod +x dist/telemt
- uses: docker/setup-qemu-action@v3
- uses: docker/setup-buildx-action@v3
- uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Extract version
id: vars
run: echo "VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_OUTPUT
- name: Build & Push (per arch)
- name: Probe release assets
shell: bash
env:
VERSION: ${{ needs.prepare.outputs.version }}
run: |
set -euo pipefail
for asset in \
telemt-x86_64-linux-musl.tar.gz \
telemt-x86_64-linux-musl.tar.gz.sha256 \
telemt-aarch64-linux-musl.tar.gz \
telemt-aarch64-linux-musl.tar.gz.sha256
do
curl -fsIL \
--retry 10 \
--retry-delay 3 \
"https://github.com/${GITHUB_REPOSITORY}/releases/download/${VERSION}/${asset}" \
> /dev/null
done
- name: Compute image tags
id: meta
shell: bash
env:
VERSION: ${{ needs.prepare.outputs.version }}
run: |
set -euo pipefail
IMAGE="$(echo "ghcr.io/${GITHUB_REPOSITORY}" | tr '[:upper:]' '[:lower:]')"
TAGS="${IMAGE}:${VERSION}"
if [[ "${VERSION}" != *-* ]]; then
TAGS="${TAGS}"$'\n'"${IMAGE}:latest"
fi
{
echo "tags<<EOF"
printf '%s\n' "${TAGS}"
echo "EOF"
} >> "${GITHUB_OUTPUT}"
- name: Build & Push
uses: docker/build-push-action@v6
with:
context: .
push: true
platforms: ${{ matrix.platform }}
tags: |
ghcr.io/${{ github.repository }}:${{ steps.vars.outputs.VERSION }}
ghcr.io/${{ github.repository }}:latest
pull: true
platforms: linux/amd64,linux/arm64
tags: ${{ steps.meta.outputs.tags }}
build-args: |
BINARY=dist/telemt
TELEMT_REPOSITORY=${{ github.repository }}
TELEMT_VERSION=${{ needs.prepare.outputs.version }}
cache-from: type=gha
cache-to: type=gha,mode=max

View File

@@ -54,14 +54,20 @@ jobs:
uses: actions/cache@v4
with:
path: |
~/.cargo/bin
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
key: ${{ runner.os }}-cargo-nextest-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-nextest-
${{ runner.os }}-cargo-
- run: cargo test --verbose
- name: Install cargo-nextest
run: cargo install --locked cargo-nextest || true
- name: Run tests with nextest
run: cargo nextest run -j "$(nproc)"
# ==========================
# Clippy
@@ -88,11 +94,13 @@ jobs:
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
key: ${{ runner.os }}-cargo-clippy-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-clippy-
${{ runner.os }}-cargo-
- run: cargo clippy -- --cap-lints warn
- name: Run clippy
run: cargo clippy -j "$(nproc)" -- --cap-lints warn
# ==========================
# Udeps
@@ -108,20 +116,24 @@ jobs:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
with:
components: rust-src
- name: Cache cargo
uses: actions/cache@v4
with:
path: |
~/.cargo/bin
~/.cargo/registry
~/.cargo/git
target
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
key: ${{ runner.os }}-cargo-udeps-${{ hashFiles('**/Cargo.lock') }}
restore-keys: |
${{ runner.os }}-cargo-udeps-
${{ runner.os }}-cargo-
- name: Install cargo-udeps
run: cargo install cargo-udeps || true
run: cargo install --locked cargo-udeps || true
# тоже не валит билд
- run: cargo udeps || true
- name: Run udeps
run: cargo udeps -j "$(nproc)" || true

View File

@@ -1,19 +1,82 @@
# Issues - Rules
# Issues
## Warnung
Before opening Issue, if it is more question than problem or bug - ask about that [in our chat](https://t.me/telemtrs)
## What it is not
- NOT Question and Answer
- NOT Helpdesk
# Pull Requests - Rules
***Each of your Issues triggers attempts to reproduce problems and analyze them, which are done manually by people***
---
# Pull Requests
## General
- ONLY signed and verified commits
- ONLY from your name
- DO NOT commit with `codex` or `claude` as author/commiter
- DO NOT commit with `codex`, `claude`, or other AI tools as author/committer
- PREFER `flow` branch for development, not `main`
## AI
We are not against modern tools, like AI, where you act as a principal or architect, but we consider it important:
---
- you really understand what you're doing
- you understand the relationships and dependencies of the components being modified
- you understand the architecture of Telegram MTProto, MTProxy, Middle-End KDF at least generically
- you DO NOT commit for the sake of commits, but to help the community, core-developers and ordinary users
## Definition of Ready (MANDATORY)
A Pull Request WILL be ignored or closed if:
- it does NOT build
- it does NOT pass tests
- it does NOT follow formatting rules
- it contains unrelated or excessive changes
- the author cannot clearly explain the change
---
## Blessed Principles
- PR must build
- PR must pass tests
- PR must be understood by author
---
## AI Usage Policy
AI tools (Claude, ChatGPT, Codex, DeepSeek, etc.) are allowed as **assistants**, NOT as decision-makers.
By submitting a PR, you confirm that:
- you fully understand the code you submit
- you verified correctness manually
- you reviewed architecture and dependencies
- you take full responsibility for the change
AI-generated code is treated as **draft** and must be validated like any other external contribution.
PRs that look like unverified AI dumps WILL be closed
---
## Maintainer Policy
Maintainers reserve the right to:
- close PRs that do not meet basic quality requirements
- request explanations before review
- ignore low-effort contributions
Respect the reviewers time
---
## Enforcement
Pull Requests that violate project standards may be closed without review.
This includes (but is not limited to):
- non-building code
- failing tests
- unverified or low-effort changes
- inability to explain the change
These actions follow the Code of Conduct and are intended to preserve signal, quality, and Telemt's integrity

15
Cargo.lock generated
View File

@@ -2793,7 +2793,7 @@ checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417"
[[package]]
name = "telemt"
version = "3.3.30"
version = "3.3.35"
dependencies = [
"aes",
"anyhow",
@@ -2844,6 +2844,7 @@ dependencies = [
"tokio-util",
"toml",
"tracing",
"tracing-appender",
"tracing-subscriber",
"url",
"webpki-roots",
@@ -3170,6 +3171,18 @@ dependencies = [
"tracing-core",
]
[[package]]
name = "tracing-appender"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "786d480bce6247ab75f005b14ae1624ad978d3029d9113f0a22fa1ac773faeaf"
dependencies = [
"crossbeam-channel",
"thiserror 2.0.18",
"time",
"tracing-subscriber",
]
[[package]]
name = "tracing-attributes"
version = "0.1.31"

View File

@@ -1,6 +1,6 @@
[package]
name = "telemt"
version = "3.3.30"
version = "3.3.35"
edition = "2024"
[features]
@@ -30,7 +30,13 @@ static_assertions = "1.1"
# Network
socket2 = { version = "0.6", features = ["all"] }
nix = { version = "0.31", default-features = false, features = ["net", "fs"] }
nix = { version = "0.31", default-features = false, features = [
"net",
"user",
"process",
"fs",
"signal",
] }
shadowsocks = { version = "1.24", features = ["aead-cipher-2022"] }
# Serialization
@@ -44,6 +50,7 @@ bytes = "1.9"
thiserror = "2.0"
tracing = "0.1"
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
tracing-appender = "0.2"
parking_lot = "0.12"
dashmap = "6.1"
arc-swap = "1.7"
@@ -68,8 +75,14 @@ hyper = { version = "1", features = ["server", "http1"] }
hyper-util = { version = "0.1", features = ["tokio", "server-auto"] }
http-body-util = "0.1"
httpdate = "1.0"
tokio-rustls = { version = "0.26", default-features = false, features = ["tls12"] }
rustls = { version = "0.23", default-features = false, features = ["std", "tls12", "ring"] }
tokio-rustls = { version = "0.26", default-features = false, features = [
"tls12",
] }
rustls = { version = "0.23", default-features = false, features = [
"std",
"tls12",
"ring",
] }
webpki-roots = "1.0"
[dev-dependencies]
@@ -83,4 +96,6 @@ name = "crypto_bench"
harness = false
[profile.release]
lto = "thin"
lto = "fat"
codegen-units = 1

View File

@@ -1,52 +1,62 @@
# syntax=docker/dockerfile:1
ARG BINARY
ARG TARGETARCH
ARG TELEMT_REPOSITORY=telemt/telemt
ARG TELEMT_VERSION=latest
# ==========================
# Stage: minimal
# Minimal Image
# ==========================
FROM debian:12-slim AS minimal
ARG TARGETARCH
ARG BINARY
ARG TELEMT_REPOSITORY
ARG TELEMT_VERSION
RUN set -eux; \
apt-get update; \
apt-get install -y --no-install-recommends \
binutils \
ca-certificates \
curl \
xz-utils \
ca-certificates; \
rm -rf /var/lib/apt/lists/*; \
\
tar; \
rm -rf /var/lib/apt/lists/*
RUN set -eux; \
case "${TARGETARCH}" in \
amd64) UPX_ARCH="amd64" ;; \
arm64) UPX_ARCH="arm64" ;; \
amd64) ASSET="telemt-x86_64-linux-musl.tar.gz" ;; \
arm64) ASSET="telemt-aarch64-linux-musl.tar.gz" ;; \
*) echo "Unsupported TARGETARCH: ${TARGETARCH}" >&2; exit 1 ;; \
esac; \
\
VERSION="${TELEMT_VERSION#refs/tags/}"; \
if [ -z "${VERSION}" ] || [ "${VERSION}" = "latest" ]; then \
BASE_URL="https://github.com/${TELEMT_REPOSITORY}/releases/latest/download"; \
else \
BASE_URL="https://github.com/${TELEMT_REPOSITORY}/releases/download/${VERSION}"; \
fi; \
curl -fL \
--retry 5 \
--retry-delay 3 \
--connect-timeout 10 \
--max-time 120 \
-o /tmp/upx.tar.xz \
"https://github.com/telemt/telemt/releases/download/toolchains/upx-${UPX_ARCH}_linux.tar.xz"; \
\
tar -xf /tmp/upx.tar.xz -C /tmp; \
install -m 0755 /tmp/upx*/upx /usr/local/bin/upx; \
rm -rf /tmp/upx*
COPY ${BINARY} /telemt
RUN set -eux; \
test -f /telemt; \
-o "/tmp/${ASSET}" \
"${BASE_URL}/${ASSET}"; \
curl -fL \
--retry 5 \
--retry-delay 3 \
--connect-timeout 10 \
--max-time 120 \
-o "/tmp/${ASSET}.sha256" \
"${BASE_URL}/${ASSET}.sha256"; \
cd /tmp; \
sha256sum -c "${ASSET}.sha256"; \
tar -xzf "${ASSET}" -C /tmp; \
test -f /tmp/telemt; \
install -m 0755 /tmp/telemt /telemt; \
strip --strip-unneeded /telemt || true; \
upx --best --lzma /telemt || true
rm -f "/tmp/${ASSET}" "/tmp/${ASSET}.sha256" /tmp/telemt
# ==========================
# Debug image
# Debug Image
# ==========================
FROM debian:12-slim AS debug
@@ -71,7 +81,7 @@ ENTRYPOINT ["/app/telemt"]
CMD ["config.toml"]
# ==========================
# Production (distroless, for static MUSL binary)
# Production Distroless on MUSL
# ==========================
FROM gcr.io/distroless/static-debian12 AS prod

View File

@@ -2,6 +2,8 @@
***Löst Probleme, bevor andere überhaupt wissen, dass sie existieren*** / ***It solves problems before others even realize they exist***
[**Telemt Chat in Telegram**](https://t.me/telemtrs)
**Telemt** is a fast, secure, and feature-rich server written in Rust: it fully implements the official Telegram proxy algo and adds many production-ready improvements such as:
- [ME Pool + Reader/Writer + Registry + Refill + Adaptive Floor + Trio-State + Generation Lifecycle](https://github.com/telemt/telemt/blob/main/docs/model/MODEL.en.md)
- [Full-covered API w/ management](https://github.com/telemt/telemt/blob/main/docs/API.md)
@@ -9,60 +11,6 @@
- Prometheus-format Metrics
- TLS-Fronting and TCP-Splicing for masking from "prying" eyes
[**Telemt Chat in Telegram**](https://t.me/telemtrs)
## NEWS and EMERGENCY
### ✈️ Telemt 3 is released!
<table>
<tr>
<td width="50%" valign="top">
### 🇷🇺 RU
#### О релизах
[3.3.27](https://github.com/telemt/telemt/releases/tag/3.3.27) даёт баланс стабильности и передового функционала, а так же последние исправления по безопасности и багам
Будем рады вашему фидбеку и предложениям по улучшению — особенно в части **API**, **статистики**, **UX**
---
Если у вас есть компетенции в:
- Асинхронных сетевых приложениях
- Анализе трафика
- Реверс-инжиниринге
- Сетевых расследованиях
Мы открыты к архитектурным предложениям, идеям и pull requests
</td>
<td width="50%" valign="top">
### 🇬🇧 EN
#### About releases
[3.3.27](https://github.com/telemt/telemt/releases/tag/3.3.27) provides a balance of stability and advanced functionality, as well as the latest security and bug fixes
We are looking forward to your feedback and improvement proposals — especially regarding **API**, **statistics**, **UX**
---
If you have expertise in:
- Asynchronous network applications
- Traffic analysis
- Reverse engineering
- Network forensics
We welcome ideas, architectural feedback, and pull requests.
</td>
</tr>
</table>
# Features
💥 The configuration structure has changed since version 1.1.0.0. change it in your environment!
⚓ Our implementation of **TLS-fronting** is one of the most deeply debugged, focused, advanced and *almost* **"behaviorally consistent to real"**: we are confident we have it right - [see evidence on our validation and traces](#recognizability-for-dpi-and-crawler)
⚓ Our ***Middle-End Pool*** is fastest by design in standard scenarios, compared to other implementations of connecting to the Middle-End Proxy: non dramatically, but usual

View File

@@ -50,6 +50,8 @@ This document lists all configuration keys accepted by `config.toml`.
| me_d2c_flush_batch_max_bytes | `usize` | `131072` | `4096..=2_097_152`. | Max ME->client payload bytes coalesced before flush. |
| me_d2c_flush_batch_max_delay_us | `u64` | `500` | `0..=5000`. | Max microsecond wait for coalescing more ME->client frames (`0` disables timed coalescing). |
| me_d2c_ack_flush_immediate | `bool` | `true` | — | Flushes client writer immediately after quick-ack write. |
| me_quota_soft_overshoot_bytes | `u64` | `65536` | `0..=16_777_216`. | Extra per-route quota allowance (bytes) tolerated before writer-side quota enforcement drops route data. |
| me_d2c_frame_buf_shrink_threshold_bytes | `usize` | `262144` | `4096..=16_777_216`. | Threshold for shrinking oversized ME->client frame-aggregation buffers after flush. |
| direct_relay_copy_buf_c2s_bytes | `usize` | `65536` | `4096..=1_048_576`. | Copy buffer size for client->DC direction in direct relay. |
| direct_relay_copy_buf_s2c_bytes | `usize` | `262144` | `8192..=2_097_152`. | Copy buffer size for DC->client direction in direct relay. |
| crypto_pending_buffer | `usize` | `262144` | — | Max pending ciphertext buffer per client writer (bytes). |
@@ -89,6 +91,7 @@ This document lists all configuration keys accepted by `config.toml`.
| upstream_connect_retry_attempts | `u32` | `2` | Must be `> 0`. | Connect attempts for selected upstream before error/fallback. |
| upstream_connect_retry_backoff_ms | `u64` | `100` | — | Delay between upstream connect attempts (ms). |
| upstream_connect_budget_ms | `u64` | `3000` | Must be `> 0`. | Total wall-clock budget for one upstream connect request (ms). |
| tg_connect | `u64` | `10` | Must be `> 0`. | Per-attempt upstream TCP connect timeout to Telegram DC (seconds). |
| upstream_unhealthy_fail_threshold | `u32` | `5` | Must be `> 0`. | Consecutive failed requests before upstream is marked unhealthy. |
| upstream_connect_failfast_hard_errors | `bool` | `false` | — | Skips additional retries for hard non-transient connect errors. |
| stun_iface_mismatch_ignore | `bool` | `false` | none | Reserved compatibility flag in current runtime revision. |
@@ -243,7 +246,10 @@ Note: When `server.proxy_protocol` is enabled, incoming PROXY protocol headers a
| Parameter | Type | Default | Constraints / validation | Description |
|---|---|---|---|---|
| client_handshake | `u64` | `30` | — | Client handshake timeout. |
| tg_connect | `u64` | `10` | — | Upstream Telegram connect timeout. |
| relay_idle_policy_v2_enabled | `bool` | `true` | — | Enables soft/hard middle-relay client idle policy. |
| relay_client_idle_soft_secs | `u64` | `120` | Must be `> 0`; must be `<= relay_client_idle_hard_secs`. | Soft idle threshold for middle-relay client uplink inactivity (seconds). |
| relay_client_idle_hard_secs | `u64` | `360` | Must be `> 0`; must be `>= relay_client_idle_soft_secs`. | Hard idle threshold for middle-relay client uplink inactivity (seconds). |
| relay_idle_grace_after_downstream_activity_secs | `u64` | `30` | Must be `<= relay_client_idle_hard_secs`. | Extra hard-idle grace after recent downstream activity (seconds). |
| client_keepalive | `u64` | `15` | — | Client keepalive timeout. |
| client_ack | `u64` | `90` | — | Client ACK timeout. |
| me_one_retry | `u8` | `12` | none | Fast reconnect attempts budget for single-endpoint DC scenarios. |
@@ -255,6 +261,9 @@ Note: When `server.proxy_protocol` is enabled, incoming PROXY protocol headers a
|---|---|---|---|---|
| tls_domain | `String` | `"petrovich.ru"` | — | Primary TLS domain used in fake TLS handshake profile. |
| tls_domains | `String[]` | `[]` | — | Additional TLS domains for generating multiple links. |
| unknown_sni_action | `"drop" \| "mask"` | `"drop"` | — | Action for TLS ClientHello with unknown/non-configured SNI. |
| tls_fetch_scope | `String` | `""` | Value is trimmed during load; empty keeps default upstream routing behavior. | Upstream scope tag used for TLS-front metadata fetches. |
| tls_fetch | `Table` | built-in defaults | See `[censorship.tls_fetch]` section below. | TLS-front metadata fetch strategy settings. |
| mask | `bool` | `true` | — | Enables masking/fronting relay mode. |
| mask_host | `String \| null` | `null` | — | Upstream mask host for TLS fronting relay. |
| mask_port | `u16` | `443` | — | Upstream mask port for TLS fronting relay. |
@@ -280,6 +289,18 @@ Note: When `server.proxy_protocol` is enabled, incoming PROXY protocol headers a
| mask_timing_normalization_floor_ms | `u64` | `0` | Must be `> 0` when timing normalization is enabled; must be `<= ceiling`. | Lower bound (ms) for masking outcome normalization target. |
| mask_timing_normalization_ceiling_ms | `u64` | `0` | Must be `>= floor`; must be `<= 60000`. | Upper bound (ms) for masking outcome normalization target. |
## [censorship.tls_fetch]
| Parameter | Type | Default | Constraints / validation | Description |
|---|---|---|---|---|
| profiles | `("modern_chrome_like" \| "modern_firefox_like" \| "compat_tls12" \| "legacy_minimal")[]` | `["modern_chrome_like", "modern_firefox_like", "compat_tls12", "legacy_minimal"]` | Empty list falls back to defaults; values are deduplicated preserving order. | Ordered ClientHello profile fallback chain for TLS-front metadata fetch. |
| strict_route | `bool` | `true` | — | Fails closed on upstream-route connect errors instead of falling back to direct TCP when route is configured. |
| attempt_timeout_ms | `u64` | `5000` | Must be `> 0`. | Timeout budget per one TLS-fetch profile attempt (ms). |
| total_budget_ms | `u64` | `15000` | Must be `> 0`. | Total wall-clock budget across all TLS-fetch attempts (ms). |
| grease_enabled | `bool` | `false` | — | Enables GREASE-style random values in selected ClientHello extensions for fetch traffic. |
| deterministic | `bool` | `false` | — | Enables deterministic ClientHello randomness for debugging/tests. |
| profile_cache_ttl_secs | `u64` | `600` | `0` disables cache. | TTL for winner-profile cache entries used by TLS fetch path. |
### Shape-channel hardening notes (`[censorship]`)
These parameters are designed to reduce one specific fingerprint source during masking: the exact number of bytes sent from proxy to `mask_host` for invalid or probing traffic.

View File

@@ -1,110 +1,122 @@
## How to set up "proxy sponsor" channel and statistics via @MTProxybot bot
## How to set up a "proxy sponsor" channel and statistics via the @MTProxybot
1. Go to @MTProxybot bot.
2. Enter the command `/newproxy`
3. Send the server IP and port. For example: 1.2.3.4:443
4. Open the config `nano /etc/telemt/telemt.toml`.
5. Copy and send the user secret from the [access.users] section to the bot.
6. Copy the tag received from the bot. For example 1234567890abcdef1234567890abcdef.
1. Go to the @MTProxybot.
2. Enter the `/newproxy` command.
3. Send your server's IP address and port. For example: `1.2.3.4:443`.
4. Open the configuration file: `nano /etc/telemt/telemt.toml`.
5. Copy and send the user secret from the `[access.users]` section to the bot.
6. Copy the tag provided by the bot. For example: `1234567890abcdef1234567890abcdef`.
> [!WARNING]
> The link provided by the bot will not work. Do not copy or use it!
7. Uncomment the ad_tag parameter and enter the tag received from the bot.
8. Uncomment/add the parameter `use_middle_proxy = true`.
7. Uncomment the `ad_tag` parameter and enter the tag received from the bot.
8. Uncomment or add the `use_middle_proxy = true` parameter.
Config example:
Configuration example:
```toml
[general]
ad_tag = "1234567890abcdef1234567890abcdef"
use_middle_proxy = true
```
9. Save the config. Ctrl+S -> Ctrl+X.
10. Restart telemt `systemctl restart telemt`.
11. In the bot, send the command /myproxies and select the added server.
9. Save the changes (in nano: Ctrl+S -> Ctrl+X).
10. Restart the telemt service: `systemctl restart telemt`.
11. Send the `/myproxies` command to the bot and select the added server.
12. Click the "Set promotion" button.
13. Send a **public link** to the channel. Private channels cannot be added!
14. Wait approximately 1 hour for the information to update on Telegram servers.
14. Wait for about 1 hour for the information to update on Telegram servers.
> [!WARNING]
> You will not see the "proxy sponsor" if you are already subscribed to the channel.
> The sponsored channel will not be displayed to you if you are already subscribed to it.
**You can also set up different channels for different users.**
**You can also configure different sponsored channels for different users:**
```toml
[access.user_ad_tags]
hello = "ad_tag"
hello2 = "ad_tag2"
```
## Why is middle proxy (ME) needed
## Why do you need a middle proxy (ME)
https://github.com/telemt/telemt/discussions/167
## How many people can use 1 link
By default, 1 link can be used by any number of people.
You can limit the number of IPs using the proxy.
## How many people can use one link
By default, an unlimited number of people can use a single link.
However, you can limit the number of unique IP addresses for each user:
```toml
[access.user_max_unique_ips]
hello = 1
```
This parameter limits how many unique IPs can use 1 link simultaneously. If one user disconnects, a second user can connect. Also, multiple users can sit behind the same IP.
This parameter sets the maximum number of unique IP addresses from which a single link can be used simultaneously. If the first user disconnects, a second one can connect. At the same time, multiple users can connect from a single IP address simultaneously (for example, devices on the same Wi-Fi network).
## How to create multiple different links
1. Generate the required number of secrets `openssl rand -hex 16`
2. Open the config `nano /etc/telemt.toml`
3. Add new users.
1. Generate the required number of secrets using the command: `openssl rand -hex 16`.
2. Open the configuration file: `nano /etc/telemt/telemt.toml`.
3. Add new users to the `[access.users]` section:
```toml
[access.users]
user1 = "00000000000000000000000000000001"
user2 = "00000000000000000000000000000002"
user3 = "00000000000000000000000000000003"
```
4. Save the config. Ctrl+S -> Ctrl+X. You don't need to restart telemt.
5. Get the links via
4. Save the configuration (Ctrl+S -> Ctrl+X). There is no need to restart the telemt service.
5. Get the ready-to-use links using the command:
```bash
curl -s http://127.0.0.1:9091/v1/users | jq
```
## "Unknown TLS SNI" error
Usually, this error occurs if you have changed the `tls_domain` parameter, but users continue to connect using old links with the previous domain.
If you need to allow connections with any domains (ignoring SNI mismatches), add the following parameters:
```toml
[censorship]
unknown_sni_action = "mask"
```
## How to view metrics
1. Open the config `nano /etc/telemt.toml`
2. Add the following parameters
1. Open the configuration file: `nano /etc/telemt/telemt.toml`.
2. Add the following parameters:
```toml
[server]
metrics_port = 9090
metrics_whitelist = ["127.0.0.1/32", "::1/128", "0.0.0.0/0"]
```
3. Save the config. Ctrl+S -> Ctrl+X.
4. Metrics are available at SERVER_IP:9090/metrics.
3. Save the changes (Ctrl+S -> Ctrl+X).
4. After that, metrics will be available at: `SERVER_IP:9090/metrics`.
> [!WARNING]
> "0.0.0.0/0" in metrics_whitelist opens access from any IP. Replace with your own IP. For example "1.2.3.4"
> The value `"0.0.0.0/0"` in `metrics_whitelist` opens access to metrics from any IP address. It is recommended to replace it with your personal IP, for example: `"1.2.3.4/32"`.
## Additional parameters
### Domain in link instead of IP
To specify a domain in the links, add to the `[general.links]` section of the config file.
### Domain in the link instead of IP
To display a domain instead of an IP address in the connection links, add the following lines to the configuration file:
```toml
[general.links]
public_host = "proxy.example.com"
```
### Server connection limit
Limits the total number of open connections to the server:
### Total server connection limit
This parameter limits the total number of active connections to the server:
```toml
[server]
max_connections = 10000 # 0 - unlimited, 10000 - default
```
### Upstream Manager
To specify an upstream, add to the `[[upstreams]]` section of the config.toml file:
#### Binding to IP
To configure outbound connections (upstreams), add the corresponding parameters to the `[[upstreams]]` section of the configuration file:
#### Binding to an outbound IP address
```toml
[[upstreams]]
type = "direct"
weight = 1
enabled = true
interface = "192.168.1.100" # Change to your outgoing IP
interface = "192.168.1.100" # Replace with your outbound IP
```
#### SOCKS4/5 as Upstream
- Without authentication:
#### Using SOCKS4/5 as an Upstream
- Without authorization:
```toml
[[upstreams]]
type = "socks5" # Specify SOCKS4 or SOCKS5
@@ -113,7 +125,7 @@ weight = 1 # Set Weight for Scenarios
enabled = true
```
- With authentication:
- With authorization:
```toml
[[upstreams]]
type = "socks5" # Specify SOCKS4 or SOCKS5
@@ -124,8 +136,8 @@ weight = 1 # Set Weight for Scenarios
enabled = true
```
#### Shadowsocks as Upstream
Requires `use_middle_proxy = false`.
#### Using Shadowsocks as an Upstream
For this method to work, the `use_middle_proxy = false` parameter must be set.
```toml
[general]

View File

@@ -1,32 +1,32 @@
## Как настроить канал "спонсор прокси" и статистику через бота @MTProxybot
1. Зайти в бота @MTProxybot.
2. Ввести команду `/newproxy`
3. Отправить IP и порт сервера. Например: 1.2.3.4:443
4. Открыть конфиг `nano /etc/telemt/telemt.toml`.
5. Скопировать и отправить боту секрет пользователя из раздела [access.users].
6. Скопировать полученный tag у бота. Например 1234567890abcdef1234567890abcdef.
1. Зайдите в бота @MTProxybot.
2. Введите команду `/newproxy`.
3. Отправьте IP-адрес и порт сервера. Например: `1.2.3.4:443`.
4. Откройте файл конфигурации: `nano /etc/telemt/telemt.toml`.
5. Скопируйте и отправьте боту секрет пользователя из раздела `[access.users]`.
6. Скопируйте тег (tag), который выдаст бот. Например: `1234567890abcdef1234567890abcdef`.
> [!WARNING]
> Ссылка, которую выдает бот, не будет работать. Не копируйте и не используйте её!
7. Раскомментировать параметр ad_tag и вписать tag, полученный у бота.
8. Раскомментировать/добавить параметр use_middle_proxy = true.
> Ссылка, которую выдает бот, работать не будет. Не копируйте и не используйте её!
7. Раскомментируйте параметр `ad_tag` и впишите тег, полученный от бота.
8. Раскомментируйте или добавьте параметр `use_middle_proxy = true`.
Пример конфига:
Пример конфигурации:
```toml
[general]
ad_tag = "1234567890abcdef1234567890abcdef"
use_middle_proxy = true
```
9. Сохранить конфиг. Ctrl+S -> Ctrl+X.
10. Перезапустить telemt `systemctl restart telemt`.
11. В боте отправить команду /myproxies и выбрать добавленный сервер.
12. Нажать кнопку "Set promotion".
13. Отправить **публичную ссылку** на канал. Приватный канал добавить нельзя!
14. Подождать примерно 1 час, пока информация обновится на серверах Telegram.
9. Сохраните изменения (в nano: Ctrl+S -> Ctrl+X).
10. Перезапустите службу telemt: `systemctl restart telemt`.
11. В боте отправьте команду `/myproxies` и выберите добавленный сервер.
12. Нажмите кнопку «Set promotion».
13. Отправьте **публичную ссылку** на канал. Приватные каналы добавлять нельзя!
14. Подождите примерно 1 час, пока информация обновится на серверах Telegram.
> [!WARNING]
> У вас не будет отображаться "спонсор прокси" если вы уже подписаны на канал.
> Спонсорский канал не будет у вас отображаться, если вы уже на него подписаны.
**Также вы можете настроить разные каналы для разных пользователей.**
**Вы также можете настроить разные спонсорские каналы для разных пользователей:**
```toml
[access.user_ad_tags]
hello = "ad_tag"
@@ -37,74 +37,85 @@ hello2 = "ad_tag2"
https://github.com/telemt/telemt/discussions/167
## Сколько человек может пользоваться 1 ссылкой
## Сколько человек может пользоваться одной ссылкой
По умолчанию 1 ссылкой может пользоваться сколько угодно человек.
Вы можете ограничить число IP, использующих прокси.
По умолчанию одной ссылкой может пользоваться неограниченное число людей.
Однако вы можете ограничить количество уникальных IP-адресов для каждого пользователя:
```toml
[access.user_max_unique_ips]
hello = 1
```
Этот параметр ограничивает, сколько уникальных IP может использовать 1 ссылку одновременно. Если один пользователь отключится, второй сможет подключиться. Также с одного IP может сидеть несколько пользователей.
Этот параметр задает максимальное количество уникальных IP-адресов, с которых можно одновременно использовать одну ссылку. Если первый пользователь отключится, второй сможет подключиться. При этом с одного IP-адреса могут подключаться несколько пользователей одновременно (например, устройства в одной Wi-Fi сети).
## Как сделать несколько разных ссылок
## Как создать несколько разных ссылок
1. Сгенерируйте нужное число секретов `openssl rand -hex 16`
2. Открыть конфиг `nano /etc/telemt.toml`
3. Добавить новых пользователей.
1. Сгенерируйте необходимое количество секретов с помощью команды: `openssl rand -hex 16`.
2. Откройте файл конфигурации: `nano /etc/telemt/telemt.toml`.
3. Добавьте новых пользователей в секцию `[access.users]`:
```toml
[access.users]
user1 = "00000000000000000000000000000001"
user2 = "00000000000000000000000000000002"
user3 = "00000000000000000000000000000003"
```
4. Сохранить конфиг. Ctrl+S -> Ctrl+X. Перезапускать telemt не нужно.
5. Получить ссылки через
4. Сохраните конфигурацию (Ctrl+S -> Ctrl+X). Перезапускать службу telemt не нужно.
5. Получите готовые ссылки с помощью команды:
```bash
curl -s http://127.0.0.1:9091/v1/users | jq
```
## Ошибка "Unknown TLS SNI"
Обычно эта ошибка возникает, если вы изменили параметр `tls_domain`, но пользователи продолжают подключаться по старым ссылкам с прежним доменом.
Если необходимо разрешить подключение с любыми доменами (игнорируя несовпадения SNI), добавьте следующие параметры:
```toml
[censorship]
unknown_sni_action = "mask"
```
## Как посмотреть метрики
1. Открыть конфиг `nano /etc/telemt.toml`
2. Добавить следующие параметры
1. Откройте файл конфигурации: `nano /etc/telemt/telemt.toml`.
2. Добавьте следующие параметры:
```toml
[server]
metrics_port = 9090
metrics_whitelist = ["127.0.0.1/32", "::1/128", "0.0.0.0/0"]
```
3. Сохранить конфиг. Ctrl+S -> Ctrl+X.
4. Метрики доступны по адресу SERVER_IP:9090/metrics.
3. Сохраните изменения (Ctrl+S -> Ctrl+X).
4. После этого метрики будут доступны по адресу: `SERVER_IP:9090/metrics`.
> [!WARNING]
> "0.0.0.0/0" в metrics_whitelist открывает доступ с любого IP. Замените на свой ip. Например "1.2.3.4"
> Значение `"0.0.0.0/0"` в `metrics_whitelist` открывает доступ к метрикам с любого IP-адреса. Рекомендуется заменить его на ваш личный IP, например: `"1.2.3.4/32"`.
## Дополнительные параметры
### Домен в ссылке вместо IP
Чтобы указать домен в ссылках, добавьте в секцию `[general.links]` файла config.
Чтобы в ссылках для подключения отображался домен вместо IP-адреса, добавьте следующие строки в файл конфигурации:
```toml
[general.links]
public_host = "proxy.example.com"
```
### Общий лимит подключений к серверу
Ограничивает общее число открытых подключений к серверу:
Этот параметр ограничивает общее количество активных подключений к серверу:
```toml
[server]
max_connections = 10000 # 0 - unlimited, 10000 - default
max_connections = 10000 # 0 - без ограничений, 10000 - по умолчанию
```
### Upstream Manager
Чтобы указать апстрим, добавьте в секцию `[[upstreams]]` файла config.toml:
#### Привязка к IP
Для настройки исходящих подключений (апстримов) добавьте соответствующие параметры в секцию `[[upstreams]]` файла конфигурации:
#### Привязка к исходящему IP-адресу
```toml
[[upstreams]]
type = "direct"
weight = 1
enabled = true
interface = "192.168.1.100" # Change to your outgoing IP
interface = "192.168.1.100" # Замените на ваш исходящий IP
```
#### SOCKS4/5 как Upstream
#### Использование SOCKS4/5 в качестве Upstream
- Без авторизации:
```toml
[[upstreams]]
@@ -125,8 +136,8 @@ weight = 1 # Set Weight for Scenarios
enabled = true
```
#### Shadowsocks как Upstream
Требует `use_middle_proxy = false`.
#### Использование Shadowsocks в качестве Upstream
Для работы этого метода требуется установить параметр `use_middle_proxy = false`.
```toml
[general]

View File

@@ -27,12 +27,12 @@ chmod +x /bin/telemt
**0. Check port and generate secrets**
The port you have selected for use should be MISSING from the list, when:
The port you have selected for use should not be in the list:
```bash
netstat -lnp
```
Generate 16 bytes/32 characters HEX with OpenSSL or another way:
Generate 16 bytes/32 characters in HEX format with OpenSSL or another way:
```bash
openssl rand -hex 16
```
@@ -50,7 +50,7 @@ Save the obtained result somewhere. You will need it later!
**1. Place your config to /etc/telemt/telemt.toml**
Create config directory:
Create the config directory:
```bash
mkdir /etc/telemt
```
@@ -59,7 +59,7 @@ Open nano
```bash
nano /etc/telemt/telemt.toml
```
paste your config
Insert your configuration:
```toml
# === General Settings ===
@@ -93,8 +93,9 @@ hello = "00000000000000000000000000000000"
then Ctrl+S -> Ctrl+X to save
> [!WARNING]
> Replace the value of the hello parameter with the value you obtained in step 0.
> Replace the value of the tls_domain parameter with another website.
> Replace the value of the hello parameter with the value you obtained in step 0.
> Additionally, change the value of the tls_domain parameter to a different website.
> Changing the tls_domain parameter will break all links that use the old domain!
---
@@ -105,14 +106,14 @@ useradd -d /opt/telemt -m -r -U telemt
chown -R telemt:telemt /etc/telemt
```
**3. Create service on /etc/systemd/system/telemt.service**
**3. Create service in /etc/systemd/system/telemt.service**
Open nano
```bash
nano /etc/systemd/system/telemt.service
```
paste this Systemd Module
Insert this Systemd module:
```bash
[Unit]
Description=Telemt
@@ -147,13 +148,16 @@ systemctl daemon-reload
**6.** For automatic startup at system boot, enter `systemctl enable telemt`
**7.** To get the link(s), enter
**7.** To get the link(s), enter:
```bash
curl -s http://127.0.0.1:9091/v1/users | jq
```
> Any number of people can use one link.
> [!WARNING]
> Only the command from step 7 can provide a working link. Do not try to create it yourself or copy it from anywhere if you are not sure what you are doing!
---
# Telemt via Docker Compose

View File

@@ -95,6 +95,7 @@ hello = "00000000000000000000000000000000"
> [!WARNING]
> Замените значение параметра hello на значение, которое вы получили в пункте 0.
> Так же замените значение параметра tls_domain на другой сайт.
> Изменение параметра tls_domain сделает нерабочими все ссылки, использующие старый домен!
---

View File

@@ -37,11 +37,12 @@ mod runtime_watch;
mod runtime_zero;
mod users;
use config_store::{current_revision, parse_if_match};
use config_store::{current_revision, load_config_from_disk, parse_if_match};
use events::ApiEventStore;
use http_utils::{error_response, read_json, read_optional_json, success_response};
use model::{
ApiFailure, CreateUserRequest, HealthData, PatchUserRequest, RotateSecretRequest, SummaryData,
ApiFailure, CreateUserRequest, DeleteUserResponse, HealthData, PatchUserRequest,
RotateSecretRequest, SummaryData, UserActiveIps,
};
use runtime_edge::{
EdgeConnectionsCacheEntry, build_runtime_connections_summary_data,
@@ -362,15 +363,33 @@ async fn handle(
);
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/stats/users/active-ips") => {
let revision = current_revision(&shared.config_path).await?;
let usernames: Vec<_> = cfg.access.users.keys().cloned().collect();
let active_ips_map = shared.ip_tracker.get_active_ips_for_users(&usernames).await;
let mut data: Vec<UserActiveIps> = active_ips_map
.into_iter()
.filter(|(_, ips)| !ips.is_empty())
.map(|(username, active_ips)| UserActiveIps {
username,
active_ips,
})
.collect();
data.sort_by(|a, b| a.username.cmp(&b.username));
Ok(success_response(StatusCode::OK, data, revision))
}
("GET", "/v1/stats/users") | ("GET", "/v1/users") => {
let revision = current_revision(&shared.config_path).await?;
let disk_cfg = load_config_from_disk(&shared.config_path).await?;
let runtime_cfg = config_rx.borrow().clone();
let (detected_ip_v4, detected_ip_v6) = shared.detected_link_ips();
let users = users_from_config(
&cfg,
&disk_cfg,
&shared.stats,
&shared.ip_tracker,
detected_ip_v4,
detected_ip_v6,
Some(runtime_cfg.as_ref()),
)
.await;
Ok(success_response(StatusCode::OK, users, revision))
@@ -389,7 +408,7 @@ async fn handle(
let expected_revision = parse_if_match(req.headers());
let body = read_json::<CreateUserRequest>(req.into_body(), body_limit).await?;
let result = create_user(body, expected_revision, &shared).await;
let (data, revision) = match result {
let (mut data, revision) = match result {
Ok(ok) => ok,
Err(error) => {
shared
@@ -398,11 +417,18 @@ async fn handle(
return Err(error);
}
};
let runtime_cfg = config_rx.borrow().clone();
data.user.in_runtime = runtime_cfg.access.users.contains_key(&data.user.username);
shared.runtime_events.record(
"api.user.create.ok",
format!("username={}", data.user.username),
);
Ok(success_response(StatusCode::CREATED, data, revision))
let status = if data.user.in_runtime {
StatusCode::CREATED
} else {
StatusCode::ACCEPTED
};
Ok(success_response(status, data, revision))
}
_ => {
if let Some(user) = path.strip_prefix("/v1/users/")
@@ -411,13 +437,16 @@ async fn handle(
{
if method == Method::GET {
let revision = current_revision(&shared.config_path).await?;
let disk_cfg = load_config_from_disk(&shared.config_path).await?;
let runtime_cfg = config_rx.borrow().clone();
let (detected_ip_v4, detected_ip_v6) = shared.detected_link_ips();
let users = users_from_config(
&cfg,
&disk_cfg,
&shared.stats,
&shared.ip_tracker,
detected_ip_v4,
detected_ip_v6,
Some(runtime_cfg.as_ref()),
)
.await;
if let Some(user_info) =
@@ -445,7 +474,7 @@ async fn handle(
let body =
read_json::<PatchUserRequest>(req.into_body(), body_limit).await?;
let result = patch_user(user, body, expected_revision, &shared).await;
let (data, revision) = match result {
let (mut data, revision) = match result {
Ok(ok) => ok,
Err(error) => {
shared.runtime_events.record(
@@ -455,10 +484,17 @@ async fn handle(
return Err(error);
}
};
let runtime_cfg = config_rx.borrow().clone();
data.in_runtime = runtime_cfg.access.users.contains_key(&data.username);
shared
.runtime_events
.record("api.user.patch.ok", format!("username={}", data.username));
return Ok(success_response(StatusCode::OK, data, revision));
let status = if data.in_runtime {
StatusCode::OK
} else {
StatusCode::ACCEPTED
};
return Ok(success_response(status, data, revision));
}
if method == Method::DELETE {
if api_cfg.read_only {
@@ -486,7 +522,18 @@ async fn handle(
shared
.runtime_events
.record("api.user.delete.ok", format!("username={}", deleted_user));
return Ok(success_response(StatusCode::OK, deleted_user, revision));
let runtime_cfg = config_rx.borrow().clone();
let in_runtime = runtime_cfg.access.users.contains_key(&deleted_user);
let response = DeleteUserResponse {
username: deleted_user,
in_runtime,
};
let status = if response.in_runtime {
StatusCode::ACCEPTED
} else {
StatusCode::OK
};
return Ok(success_response(status, response, revision));
}
if method == Method::POST
&& let Some(base_user) = user.strip_suffix("/rotate-secret")
@@ -514,7 +561,7 @@ async fn handle(
&shared,
)
.await;
let (data, revision) = match result {
let (mut data, revision) = match result {
Ok(ok) => ok,
Err(error) => {
shared.runtime_events.record(
@@ -524,11 +571,19 @@ async fn handle(
return Err(error);
}
};
let runtime_cfg = config_rx.borrow().clone();
data.user.in_runtime =
runtime_cfg.access.users.contains_key(&data.user.username);
shared.runtime_events.record(
"api.user.rotate_secret.ok",
format!("username={}", base_user),
);
return Ok(success_response(StatusCode::OK, data, revision));
let status = if data.user.in_runtime {
StatusCode::OK
} else {
StatusCode::ACCEPTED
};
return Ok(success_response(status, data, revision));
}
if method == Method::POST {
return Ok(error_response(

View File

@@ -428,6 +428,7 @@ pub(super) struct UserLinks {
#[derive(Serialize)]
pub(super) struct UserInfo {
pub(super) username: String,
pub(super) in_runtime: bool,
pub(super) user_ad_tag: Option<String>,
pub(super) max_tcp_conns: Option<usize>,
pub(super) expiration_rfc3339: Option<String>,
@@ -442,12 +443,24 @@ pub(super) struct UserInfo {
pub(super) links: UserLinks,
}
#[derive(Serialize)]
pub(super) struct UserActiveIps {
pub(super) username: String,
pub(super) active_ips: Vec<IpAddr>,
}
#[derive(Serialize)]
pub(super) struct CreateUserResponse {
pub(super) user: UserInfo,
pub(super) secret: String,
}
#[derive(Serialize)]
pub(super) struct DeleteUserResponse {
pub(super) username: String,
pub(super) in_runtime: bool,
}
#[derive(Deserialize)]
pub(super) struct CreateUserRequest {
pub(super) username: String,

View File

@@ -35,11 +35,14 @@ pub(super) struct RuntimeGatesData {
pub(super) conditional_cast_enabled: bool,
pub(super) me_runtime_ready: bool,
pub(super) me2dc_fallback_enabled: bool,
pub(super) me2dc_fast_enabled: bool,
pub(super) use_middle_proxy: bool,
pub(super) route_mode: &'static str,
pub(super) reroute_active: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) reroute_to_direct_at_epoch_secs: Option<u64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub(super) reroute_reason: Option<&'static str>,
pub(super) startup_status: &'static str,
pub(super) startup_stage: String,
pub(super) startup_progress_pct: f64,
@@ -47,6 +50,7 @@ pub(super) struct RuntimeGatesData {
#[derive(Serialize)]
pub(super) struct EffectiveTimeoutLimits {
pub(super) client_first_byte_idle_secs: u64,
pub(super) client_handshake_secs: u64,
pub(super) tg_connect_secs: u64,
pub(super) client_keepalive_secs: u64,
@@ -86,6 +90,7 @@ pub(super) struct EffectiveMiddleProxyLimits {
pub(super) writer_pick_mode: &'static str,
pub(super) writer_pick_sample_size: u8,
pub(super) me2dc_fallback: bool,
pub(super) me2dc_fast: bool,
}
#[derive(Serialize)]
@@ -95,6 +100,11 @@ pub(super) struct EffectiveUserIpPolicyLimits {
pub(super) window_secs: u64,
}
#[derive(Serialize)]
pub(super) struct EffectiveUserTcpPolicyLimits {
pub(super) global_each: usize,
}
#[derive(Serialize)]
pub(super) struct EffectiveLimitsData {
pub(super) update_every_secs: u64,
@@ -104,6 +114,7 @@ pub(super) struct EffectiveLimitsData {
pub(super) upstream: EffectiveUpstreamLimits,
pub(super) middle_proxy: EffectiveMiddleProxyLimits,
pub(super) user_ip_policy: EffectiveUserIpPolicyLimits,
pub(super) user_tcp_policy: EffectiveUserTcpPolicyLimits,
}
#[derive(Serialize)]
@@ -169,6 +180,8 @@ pub(super) async fn build_runtime_gates_data(
let startup_summary = build_runtime_startup_summary(shared).await;
let route_state = shared.route_runtime.snapshot();
let route_mode = route_state.mode.as_str();
let fast_fallback_enabled =
cfg.general.use_middle_proxy && cfg.general.me2dc_fallback && cfg.general.me2dc_fast;
let reroute_active = cfg.general.use_middle_proxy
&& cfg.general.me2dc_fallback
&& matches!(route_state.mode, RelayRouteMode::Direct);
@@ -177,6 +190,15 @@ pub(super) async fn build_runtime_gates_data(
} else {
None
};
let reroute_reason = if reroute_active {
if fast_fallback_enabled {
Some("fast_not_ready_fallback")
} else {
Some("strict_grace_fallback")
}
} else {
None
};
let me_runtime_ready = if !cfg.general.use_middle_proxy {
true
} else {
@@ -194,10 +216,12 @@ pub(super) async fn build_runtime_gates_data(
conditional_cast_enabled: cfg.general.use_middle_proxy,
me_runtime_ready,
me2dc_fallback_enabled: cfg.general.me2dc_fallback,
me2dc_fast_enabled: fast_fallback_enabled,
use_middle_proxy: cfg.general.use_middle_proxy,
route_mode,
reroute_active,
reroute_to_direct_at_epoch_secs,
reroute_reason,
startup_status: startup_summary.status,
startup_stage: startup_summary.stage,
startup_progress_pct: startup_summary.progress_pct,
@@ -210,8 +234,9 @@ pub(super) fn build_limits_effective_data(cfg: &ProxyConfig) -> EffectiveLimitsD
me_reinit_every_secs: cfg.general.effective_me_reinit_every_secs(),
me_pool_force_close_secs: cfg.general.effective_me_pool_force_close_secs(),
timeouts: EffectiveTimeoutLimits {
client_first_byte_idle_secs: cfg.timeouts.client_first_byte_idle_secs,
client_handshake_secs: cfg.timeouts.client_handshake,
tg_connect_secs: cfg.timeouts.tg_connect,
tg_connect_secs: cfg.general.tg_connect,
client_keepalive_secs: cfg.timeouts.client_keepalive,
client_ack_secs: cfg.timeouts.client_ack,
me_one_retry: cfg.timeouts.me_one_retry,
@@ -263,12 +288,16 @@ pub(super) fn build_limits_effective_data(cfg: &ProxyConfig) -> EffectiveLimitsD
writer_pick_mode: me_writer_pick_mode_label(cfg.general.me_writer_pick_mode),
writer_pick_sample_size: cfg.general.me_writer_pick_sample_size,
me2dc_fallback: cfg.general.me2dc_fallback,
me2dc_fast: cfg.general.me2dc_fast,
},
user_ip_policy: EffectiveUserIpPolicyLimits {
global_each: cfg.access.user_max_unique_ips_global_each,
mode: user_max_unique_ips_mode_label(cfg.access.user_max_unique_ips_mode),
window_secs: cfg.access.user_max_unique_ips_window_secs,
},
user_tcp_policy: EffectiveUserTcpPolicyLimits {
global_each: cfg.access.user_max_tcp_conns_global_each,
},
}
}

View File

@@ -136,6 +136,7 @@ pub(super) async fn create_user(
&shared.ip_tracker,
detected_ip_v4,
detected_ip_v6,
None,
)
.await;
let user = users
@@ -143,8 +144,16 @@ pub(super) async fn create_user(
.find(|entry| entry.username == body.username)
.unwrap_or(UserInfo {
username: body.username.clone(),
in_runtime: false,
user_ad_tag: None,
max_tcp_conns: None,
max_tcp_conns: cfg
.access
.user_max_tcp_conns
.get(&body.username)
.copied()
.filter(|limit| *limit > 0)
.or((cfg.access.user_max_tcp_conns_global_each > 0)
.then_some(cfg.access.user_max_tcp_conns_global_each)),
expiration_rfc3339: None,
data_quota_bytes: None,
max_unique_ips: updated_limit,
@@ -236,6 +245,7 @@ pub(super) async fn patch_user(
&shared.ip_tracker,
detected_ip_v4,
detected_ip_v6,
None,
)
.await;
let user_info = users
@@ -293,6 +303,7 @@ pub(super) async fn rotate_secret(
&shared.ip_tracker,
detected_ip_v4,
detected_ip_v6,
None,
)
.await;
let user_info = users
@@ -365,6 +376,7 @@ pub(super) async fn users_from_config(
ip_tracker: &UserIpTracker,
startup_detected_ip_v4: Option<IpAddr>,
startup_detected_ip_v6: Option<IpAddr>,
runtime_cfg: Option<&ProxyConfig>,
) -> Vec<UserInfo> {
let mut names = cfg.access.users.keys().cloned().collect::<Vec<_>>();
names.sort();
@@ -394,8 +406,18 @@ pub(super) async fn users_from_config(
tls: Vec::new(),
});
users.push(UserInfo {
in_runtime: runtime_cfg
.map(|runtime| runtime.access.users.contains_key(&username))
.unwrap_or(false),
user_ad_tag: cfg.access.user_ad_tags.get(&username).cloned(),
max_tcp_conns: cfg.access.user_max_tcp_conns.get(&username).copied(),
max_tcp_conns: cfg
.access
.user_max_tcp_conns
.get(&username)
.copied()
.filter(|limit| *limit > 0)
.or((cfg.access.user_max_tcp_conns_global_each > 0)
.then_some(cfg.access.user_max_tcp_conns_global_each)),
expiration_rfc3339: cfg
.access
.user_expirations
@@ -572,3 +594,94 @@ fn resolve_tls_domains(cfg: &ProxyConfig) -> Vec<&str> {
}
domains
}
#[cfg(test)]
mod tests {
use super::*;
use crate::ip_tracker::UserIpTracker;
use crate::stats::Stats;
#[tokio::test]
async fn users_from_config_reports_effective_tcp_limit_with_global_fallback() {
let mut cfg = ProxyConfig::default();
cfg.access.users.insert(
"alice".to_string(),
"0123456789abcdef0123456789abcdef".to_string(),
);
cfg.access.user_max_tcp_conns_global_each = 7;
let stats = Stats::new();
let tracker = UserIpTracker::new();
let users = users_from_config(&cfg, &stats, &tracker, None, None, None).await;
let alice = users
.iter()
.find(|entry| entry.username == "alice")
.expect("alice must be present");
assert!(!alice.in_runtime);
assert_eq!(alice.max_tcp_conns, Some(7));
cfg.access.user_max_tcp_conns.insert("alice".to_string(), 5);
let users = users_from_config(&cfg, &stats, &tracker, None, None, None).await;
let alice = users
.iter()
.find(|entry| entry.username == "alice")
.expect("alice must be present");
assert!(!alice.in_runtime);
assert_eq!(alice.max_tcp_conns, Some(5));
cfg.access.user_max_tcp_conns.insert("alice".to_string(), 0);
let users = users_from_config(&cfg, &stats, &tracker, None, None, None).await;
let alice = users
.iter()
.find(|entry| entry.username == "alice")
.expect("alice must be present");
assert!(!alice.in_runtime);
assert_eq!(alice.max_tcp_conns, Some(7));
cfg.access.user_max_tcp_conns_global_each = 0;
let users = users_from_config(&cfg, &stats, &tracker, None, None, None).await;
let alice = users
.iter()
.find(|entry| entry.username == "alice")
.expect("alice must be present");
assert!(!alice.in_runtime);
assert_eq!(alice.max_tcp_conns, None);
}
#[tokio::test]
async fn users_from_config_marks_runtime_membership_when_snapshot_is_provided() {
let mut disk_cfg = ProxyConfig::default();
disk_cfg.access.users.insert(
"alice".to_string(),
"0123456789abcdef0123456789abcdef".to_string(),
);
disk_cfg.access.users.insert(
"bob".to_string(),
"fedcba9876543210fedcba9876543210".to_string(),
);
let mut runtime_cfg = ProxyConfig::default();
runtime_cfg.access.users.insert(
"alice".to_string(),
"0123456789abcdef0123456789abcdef".to_string(),
);
let stats = Stats::new();
let tracker = UserIpTracker::new();
let users =
users_from_config(&disk_cfg, &stats, &tracker, None, None, Some(&runtime_cfg)).await;
let alice = users
.iter()
.find(|entry| entry.username == "alice")
.expect("alice must be present");
let bob = users
.iter()
.find(|entry| entry.username == "bob")
.expect("bob must be present");
assert!(alice.in_runtime);
assert!(!bob.in_runtime);
}
}

View File

@@ -1,11 +1,270 @@
//! CLI commands: --init (fire-and-forget setup)
//! CLI commands: --init (fire-and-forget setup), daemon options, subcommands
//!
//! Subcommands:
//! - `start [OPTIONS] [config.toml]` - Start the daemon
//! - `stop [--pid-file PATH]` - Stop a running daemon
//! - `reload [--pid-file PATH]` - Reload configuration (SIGHUP)
//! - `status [--pid-file PATH]` - Check daemon status
//! - `run [OPTIONS] [config.toml]` - Run in foreground (default behavior)
use rand::RngExt;
use std::fs;
use std::path::{Path, PathBuf};
use std::process::Command;
#[cfg(unix)]
use crate::daemon::{self, DEFAULT_PID_FILE, DaemonOptions};
/// CLI subcommand to execute.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Subcommand {
/// Run the proxy (default, or explicit `run` subcommand).
Run,
/// Start as daemon (`start` subcommand).
Start,
/// Stop a running daemon (`stop` subcommand).
Stop,
/// Reload configuration (`reload` subcommand).
Reload,
/// Check daemon status (`status` subcommand).
Status,
/// Fire-and-forget setup (`--init`).
Init,
}
/// Parsed subcommand with its options.
#[derive(Debug)]
pub struct ParsedCommand {
pub subcommand: Subcommand,
pub pid_file: PathBuf,
pub config_path: String,
#[cfg(unix)]
pub daemon_opts: DaemonOptions,
pub init_opts: Option<InitOptions>,
}
impl Default for ParsedCommand {
fn default() -> Self {
Self {
subcommand: Subcommand::Run,
#[cfg(unix)]
pid_file: PathBuf::from(DEFAULT_PID_FILE),
#[cfg(not(unix))]
pid_file: PathBuf::from("/var/run/telemt.pid"),
config_path: "config.toml".to_string(),
#[cfg(unix)]
daemon_opts: DaemonOptions::default(),
init_opts: None,
}
}
}
/// Parse CLI arguments into a command structure.
pub fn parse_command(args: &[String]) -> ParsedCommand {
let mut cmd = ParsedCommand::default();
// Check for --init first (legacy form)
if args.iter().any(|a| a == "--init") {
cmd.subcommand = Subcommand::Init;
cmd.init_opts = parse_init_args(args);
return cmd;
}
// Check for subcommand as first argument
if let Some(first) = args.first() {
match first.as_str() {
"start" => {
cmd.subcommand = Subcommand::Start;
#[cfg(unix)]
{
cmd.daemon_opts = parse_daemon_args(args);
// Force daemonize for start command
cmd.daemon_opts.daemonize = true;
}
}
"stop" => {
cmd.subcommand = Subcommand::Stop;
}
"reload" => {
cmd.subcommand = Subcommand::Reload;
}
"status" => {
cmd.subcommand = Subcommand::Status;
}
"run" => {
cmd.subcommand = Subcommand::Run;
#[cfg(unix)]
{
cmd.daemon_opts = parse_daemon_args(args);
}
}
_ => {
// No subcommand, default to Run
#[cfg(unix)]
{
cmd.daemon_opts = parse_daemon_args(args);
}
}
}
}
// Parse remaining options
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
// Skip subcommand names
"start" | "stop" | "reload" | "status" | "run" => {}
// PID file option (for stop/reload/status)
"--pid-file" => {
i += 1;
if i < args.len() {
cmd.pid_file = PathBuf::from(&args[i]);
#[cfg(unix)]
{
cmd.daemon_opts.pid_file = Some(cmd.pid_file.clone());
}
}
}
s if s.starts_with("--pid-file=") => {
cmd.pid_file = PathBuf::from(s.trim_start_matches("--pid-file="));
#[cfg(unix)]
{
cmd.daemon_opts.pid_file = Some(cmd.pid_file.clone());
}
}
// Config path (positional, non-flag argument)
s if !s.starts_with('-') => {
cmd.config_path = s.to_string();
}
_ => {}
}
i += 1;
}
cmd
}
/// Execute a subcommand that doesn't require starting the server.
/// Returns `Some(exit_code)` if the command was handled, `None` if server should start.
#[cfg(unix)]
pub fn execute_subcommand(cmd: &ParsedCommand) -> Option<i32> {
match cmd.subcommand {
Subcommand::Stop => Some(cmd_stop(&cmd.pid_file)),
Subcommand::Reload => Some(cmd_reload(&cmd.pid_file)),
Subcommand::Status => Some(cmd_status(&cmd.pid_file)),
Subcommand::Init => {
if let Some(opts) = cmd.init_opts.clone() {
match run_init(opts) {
Ok(()) => Some(0),
Err(e) => {
eprintln!("[telemt] Init failed: {}", e);
Some(1)
}
}
} else {
Some(1)
}
}
// Run and Start need the server
Subcommand::Run | Subcommand::Start => None,
}
}
#[cfg(not(unix))]
pub fn execute_subcommand(cmd: &ParsedCommand) -> Option<i32> {
match cmd.subcommand {
Subcommand::Stop | Subcommand::Reload | Subcommand::Status => {
eprintln!("[telemt] Subcommand not supported on this platform");
Some(1)
}
Subcommand::Init => {
if let Some(opts) = cmd.init_opts.clone() {
match run_init(opts) {
Ok(()) => Some(0),
Err(e) => {
eprintln!("[telemt] Init failed: {}", e);
Some(1)
}
}
} else {
Some(1)
}
}
Subcommand::Run | Subcommand::Start => None,
}
}
/// Stop command: send SIGTERM to the running daemon.
#[cfg(unix)]
fn cmd_stop(pid_file: &Path) -> i32 {
use nix::sys::signal::Signal;
println!("Stopping telemt daemon...");
match daemon::signal_pid_file(pid_file, Signal::SIGTERM) {
Ok(()) => {
println!("Stop signal sent successfully");
// Wait for process to exit (up to 10 seconds)
for _ in 0..20 {
std::thread::sleep(std::time::Duration::from_millis(500));
if let daemon::DaemonStatus::NotRunning = daemon::check_status(pid_file) {
println!("Daemon stopped");
return 0;
}
}
println!("Daemon may still be shutting down");
0
}
Err(e) => {
eprintln!("Failed to stop daemon: {}", e);
1
}
}
}
/// Reload command: send SIGHUP to trigger config reload.
#[cfg(unix)]
fn cmd_reload(pid_file: &Path) -> i32 {
use nix::sys::signal::Signal;
println!("Reloading telemt configuration...");
match daemon::signal_pid_file(pid_file, Signal::SIGHUP) {
Ok(()) => {
println!("Reload signal sent successfully");
0
}
Err(e) => {
eprintln!("Failed to reload daemon: {}", e);
1
}
}
}
/// Status command: check if daemon is running.
#[cfg(unix)]
fn cmd_status(pid_file: &Path) -> i32 {
match daemon::check_status(pid_file) {
daemon::DaemonStatus::Running(pid) => {
println!("telemt is running (pid {})", pid);
0
}
daemon::DaemonStatus::Stale(pid) => {
println!("telemt is not running (stale pid file, was pid {})", pid);
// Clean up stale PID file
let _ = std::fs::remove_file(pid_file);
1
}
daemon::DaemonStatus::NotRunning => {
println!("telemt is not running");
1
}
}
}
/// Options for the init command
#[derive(Debug, Clone)]
pub struct InitOptions {
pub port: u16,
pub domain: String,
@@ -15,6 +274,64 @@ pub struct InitOptions {
pub no_start: bool,
}
/// Parse daemon-related options from CLI args.
#[cfg(unix)]
pub fn parse_daemon_args(args: &[String]) -> DaemonOptions {
let mut opts = DaemonOptions::default();
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
"--daemon" | "-d" => {
opts.daemonize = true;
}
"--foreground" | "-f" => {
opts.foreground = true;
}
"--pid-file" => {
i += 1;
if i < args.len() {
opts.pid_file = Some(PathBuf::from(&args[i]));
}
}
s if s.starts_with("--pid-file=") => {
opts.pid_file = Some(PathBuf::from(s.trim_start_matches("--pid-file=")));
}
"--run-as-user" => {
i += 1;
if i < args.len() {
opts.user = Some(args[i].clone());
}
}
s if s.starts_with("--run-as-user=") => {
opts.user = Some(s.trim_start_matches("--run-as-user=").to_string());
}
"--run-as-group" => {
i += 1;
if i < args.len() {
opts.group = Some(args[i].clone());
}
}
s if s.starts_with("--run-as-group=") => {
opts.group = Some(s.trim_start_matches("--run-as-group=").to_string());
}
"--working-dir" => {
i += 1;
if i < args.len() {
opts.working_dir = Some(PathBuf::from(&args[i]));
}
}
s if s.starts_with("--working-dir=") => {
opts.working_dir = Some(PathBuf::from(s.trim_start_matches("--working-dir=")));
}
_ => {}
}
i += 1;
}
opts
}
impl Default for InitOptions {
fn default() -> Self {
Self {
@@ -84,10 +401,16 @@ pub fn parse_init_args(args: &[String]) -> Option<InitOptions> {
/// Run the fire-and-forget setup.
pub fn run_init(opts: InitOptions) -> Result<(), Box<dyn std::error::Error>> {
use crate::service::{self, InitSystem, ServiceOptions};
eprintln!("[telemt] Fire-and-forget setup");
eprintln!();
// 1. Generate or validate secret
// 1. Detect init system
let init_system = service::detect_init_system();
eprintln!("[+] Detected init system: {}", init_system);
// 2. Generate or validate secret
let secret = match opts.secret {
Some(s) => {
if s.len() != 32 || !s.chars().all(|c| c.is_ascii_hexdigit()) {
@@ -104,72 +427,126 @@ pub fn run_init(opts: InitOptions) -> Result<(), Box<dyn std::error::Error>> {
eprintln!("[+] Port: {}", opts.port);
eprintln!("[+] Domain: {}", opts.domain);
// 2. Create config directory
// 3. Create config directory
fs::create_dir_all(&opts.config_dir)?;
let config_path = opts.config_dir.join("config.toml");
// 3. Write config
// 4. Write config
let config_content = generate_config(&opts.username, &secret, opts.port, &opts.domain);
fs::write(&config_path, &config_content)?;
eprintln!("[+] Config written to {}", config_path.display());
// 4. Write systemd unit
// 5. Generate and write service file
let exe_path =
std::env::current_exe().unwrap_or_else(|_| PathBuf::from("/usr/local/bin/telemt"));
let unit_path = Path::new("/etc/systemd/system/telemt.service");
let unit_content = generate_systemd_unit(&exe_path, &config_path);
let service_opts = ServiceOptions {
exe_path: &exe_path,
config_path: &config_path,
user: None, // Let systemd/init handle user
group: None,
pid_file: "/var/run/telemt.pid",
working_dir: Some("/var/lib/telemt"),
description: "Telemt MTProxy - Telegram MTProto Proxy",
};
match fs::write(unit_path, &unit_content) {
let service_path = service::service_file_path(init_system);
let service_content = service::generate_service_file(init_system, &service_opts);
// Ensure parent directory exists
if let Some(parent) = Path::new(service_path).parent() {
let _ = fs::create_dir_all(parent);
}
match fs::write(service_path, &service_content) {
Ok(()) => {
eprintln!("[+] Systemd unit written to {}", unit_path.display());
eprintln!("[+] Service file written to {}", service_path);
// Make script executable for OpenRC/FreeBSD
#[cfg(unix)]
if init_system == InitSystem::OpenRC || init_system == InitSystem::FreeBSDRc {
use std::os::unix::fs::PermissionsExt;
let mut perms = fs::metadata(service_path)?.permissions();
perms.set_mode(0o755);
fs::set_permissions(service_path, perms)?;
}
}
Err(e) => {
eprintln!("[!] Cannot write systemd unit (run as root?): {}", e);
eprintln!("[!] Manual unit file content:");
eprintln!("{}", unit_content);
eprintln!("[!] Cannot write service file (run as root?): {}", e);
eprintln!("[!] Manual service file content:");
eprintln!("{}", service_content);
// Still print links and config
// Still print links and installation instructions
eprintln!();
eprintln!("{}", service::installation_instructions(init_system));
print_links(&opts.username, &secret, opts.port, &opts.domain);
return Ok(());
}
}
// 5. Reload systemd
run_cmd("systemctl", &["daemon-reload"]);
// 6. Install and enable service based on init system
match init_system {
InitSystem::Systemd => {
run_cmd("systemctl", &["daemon-reload"]);
run_cmd("systemctl", &["enable", "telemt.service"]);
eprintln!("[+] Service enabled");
// 6. Enable service
run_cmd("systemctl", &["enable", "telemt.service"]);
eprintln!("[+] Service enabled");
if !opts.no_start {
run_cmd("systemctl", &["start", "telemt.service"]);
eprintln!("[+] Service started");
// 7. Start service (unless --no-start)
if !opts.no_start {
run_cmd("systemctl", &["start", "telemt.service"]);
eprintln!("[+] Service started");
std::thread::sleep(std::time::Duration::from_secs(1));
let status = Command::new("systemctl")
.args(["is-active", "telemt.service"])
.output();
// Brief delay then check status
std::thread::sleep(std::time::Duration::from_secs(1));
let status = Command::new("systemctl")
.args(["is-active", "telemt.service"])
.output();
match status {
Ok(out) if out.status.success() => {
eprintln!("[+] Service is running");
}
_ => {
eprintln!("[!] Service may not have started correctly");
eprintln!("[!] Check: journalctl -u telemt.service -n 20");
match status {
Ok(out) if out.status.success() => {
eprintln!("[+] Service is running");
}
_ => {
eprintln!("[!] Service may not have started correctly");
eprintln!("[!] Check: journalctl -u telemt.service -n 20");
}
}
} else {
eprintln!("[+] Service not started (--no-start)");
eprintln!("[+] Start manually: systemctl start telemt.service");
}
}
} else {
eprintln!("[+] Service not started (--no-start)");
eprintln!("[+] Start manually: systemctl start telemt.service");
InitSystem::OpenRC => {
run_cmd("rc-update", &["add", "telemt", "default"]);
eprintln!("[+] Service enabled");
if !opts.no_start {
run_cmd("rc-service", &["telemt", "start"]);
eprintln!("[+] Service started");
} else {
eprintln!("[+] Service not started (--no-start)");
eprintln!("[+] Start manually: rc-service telemt start");
}
}
InitSystem::FreeBSDRc => {
run_cmd("sysrc", &["telemt_enable=YES"]);
eprintln!("[+] Service enabled");
if !opts.no_start {
run_cmd("service", &["telemt", "start"]);
eprintln!("[+] Service started");
} else {
eprintln!("[+] Service not started (--no-start)");
eprintln!("[+] Start manually: service telemt start");
}
}
InitSystem::Unknown => {
eprintln!("[!] Unknown init system - service file written but not installed");
eprintln!("[!] You may need to install it manually");
}
}
eprintln!();
// 8. Print links
// 7. Print links
print_links(&opts.username, &secret, opts.port, &opts.domain);
Ok(())
@@ -207,6 +584,7 @@ me_pool_drain_soft_evict_cooldown_ms = 1000
me_bind_stale_mode = "never"
me_pool_min_fresh_ratio = 0.8
me_reinit_drain_timeout_secs = 90
tg_connect = 10
[network]
ipv4 = true
@@ -232,8 +610,8 @@ ip = "0.0.0.0"
ip = "::"
[timeouts]
client_handshake = 15
tg_connect = 10
client_first_byte_idle_secs = 300
client_handshake = 60
client_keepalive = 60
client_ack = 300
@@ -245,6 +623,7 @@ fake_cert_len = 2048
tls_full_cert_ttl_secs = 90
[access]
user_max_tcp_conns_global_each = 0
replay_check_len = 65536
replay_window_secs = 120
ignore_time_skew = false
@@ -264,35 +643,6 @@ weight = 10
)
}
fn generate_systemd_unit(exe_path: &Path, config_path: &Path) -> String {
format!(
r#"[Unit]
Description=Telemt MTProxy
Documentation=https://github.com/telemt/telemt
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
ExecStart={exe} {config}
Restart=always
RestartSec=5
LimitNOFILE=65535
# Security hardening
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
ReadWritePaths=/etc/telemt
PrivateTmp=true
[Install]
WantedBy=multi-user.target
"#,
exe = exe_path.display(),
config = config_path.display(),
)
}
fn run_cmd(cmd: &str, args: &[&str]) {
match Command::new(cmd).args(args).output() {
Ok(output) => {

View File

@@ -110,7 +110,11 @@ pub(crate) fn default_replay_window_secs() -> u64 {
}
pub(crate) fn default_handshake_timeout() -> u64 {
30
60
}
pub(crate) fn default_client_first_byte_idle_secs() -> u64 {
300
}
pub(crate) fn default_relay_idle_policy_v2_enabled() -> bool {
@@ -209,6 +213,10 @@ pub(crate) fn default_server_max_connections() -> u32 {
10_000
}
pub(crate) fn default_listen_backlog() -> u32 {
1024
}
pub(crate) fn default_accept_permit_timeout_ms() -> u64 {
DEFAULT_ACCEPT_PERMIT_TIMEOUT_MS
}
@@ -273,6 +281,10 @@ pub(crate) fn default_me2dc_fallback() -> bool {
true
}
pub(crate) fn default_me2dc_fast() -> bool {
false
}
pub(crate) fn default_keepalive_interval() -> u64 {
8
}
@@ -799,6 +811,10 @@ pub(crate) fn default_user_max_unique_ips_window_secs() -> u64 {
DEFAULT_USER_MAX_UNIQUE_IPS_WINDOW_SECS
}
pub(crate) fn default_user_max_tcp_conns_global_each() -> usize {
0
}
pub(crate) fn default_user_max_unique_ips_global_each() -> usize {
0
}

View File

@@ -117,6 +117,7 @@ pub struct HotFields {
pub users: std::collections::HashMap<String, String>,
pub user_ad_tags: std::collections::HashMap<String, String>,
pub user_max_tcp_conns: std::collections::HashMap<String, usize>,
pub user_max_tcp_conns_global_each: usize,
pub user_expirations: std::collections::HashMap<String, chrono::DateTime<chrono::Utc>>,
pub user_data_quota: std::collections::HashMap<String, u64>,
pub user_max_unique_ips: std::collections::HashMap<String, usize>,
@@ -240,6 +241,7 @@ impl HotFields {
users: cfg.access.users.clone(),
user_ad_tags: cfg.access.user_ad_tags.clone(),
user_max_tcp_conns: cfg.access.user_max_tcp_conns.clone(),
user_max_tcp_conns_global_each: cfg.access.user_max_tcp_conns_global_each,
user_expirations: cfg.access.user_expirations.clone(),
user_data_quota: cfg.access.user_data_quota.clone(),
user_max_unique_ips: cfg.access.user_max_unique_ips.clone(),
@@ -530,6 +532,7 @@ fn overlay_hot_fields(old: &ProxyConfig, new: &ProxyConfig) -> ProxyConfig {
cfg.access.users = new.access.users.clone();
cfg.access.user_ad_tags = new.access.user_ad_tags.clone();
cfg.access.user_max_tcp_conns = new.access.user_max_tcp_conns.clone();
cfg.access.user_max_tcp_conns_global_each = new.access.user_max_tcp_conns_global_each;
cfg.access.user_expirations = new.access.user_expirations.clone();
cfg.access.user_data_quota = new.access.user_data_quota.clone();
cfg.access.user_max_unique_ips = new.access.user_max_unique_ips.clone();
@@ -570,6 +573,7 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
}
if old.server.proxy_protocol != new.server.proxy_protocol
|| !listeners_equal(&old.server.listeners, &new.server.listeners)
|| old.server.listen_backlog != new.server.listen_backlog
|| old.server.listen_addr_ipv4 != new.server.listen_addr_ipv4
|| old.server.listen_addr_ipv6 != new.server.listen_addr_ipv6
|| old.server.listen_tcp != new.server.listen_tcp
@@ -651,6 +655,9 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
}
if old.general.me_route_no_writer_mode != new.general.me_route_no_writer_mode
|| old.general.me_route_no_writer_wait_ms != new.general.me_route_no_writer_wait_ms
|| old.general.me_route_hybrid_max_wait_ms != new.general.me_route_hybrid_max_wait_ms
|| old.general.me_route_blocking_send_timeout_ms
!= new.general.me_route_blocking_send_timeout_ms
|| old.general.me_route_inline_recovery_attempts
!= new.general.me_route_inline_recovery_attempts
|| old.general.me_route_inline_recovery_wait_ms
@@ -669,9 +676,11 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
warned = true;
warn!("config reload: general.me_init_retry_attempts changed; restart required");
}
if old.general.me2dc_fallback != new.general.me2dc_fallback {
if old.general.me2dc_fallback != new.general.me2dc_fallback
|| old.general.me2dc_fast != new.general.me2dc_fast
{
warned = true;
warn!("config reload: general.me2dc_fallback changed; restart required");
warn!("config reload: general.me2dc_fallback/me2dc_fast changed; restart required");
}
if old.general.proxy_config_v4_cache_path != new.general.proxy_config_v4_cache_path
|| old.general.proxy_config_v6_cache_path != new.general.proxy_config_v6_cache_path
@@ -690,6 +699,7 @@ fn warn_non_hot_changes(old: &ProxyConfig, new: &ProxyConfig, non_hot_changed: b
if old.general.upstream_connect_retry_attempts != new.general.upstream_connect_retry_attempts
|| old.general.upstream_connect_retry_backoff_ms
!= new.general.upstream_connect_retry_backoff_ms
|| old.general.tg_connect != new.general.tg_connect
|| old.general.upstream_unhealthy_fail_threshold
!= new.general.upstream_unhealthy_fail_threshold
|| old.general.upstream_connect_failfast_hard_errors
@@ -1138,6 +1148,12 @@ fn log_changes(
new_hot.user_max_tcp_conns.len()
);
}
if old_hot.user_max_tcp_conns_global_each != new_hot.user_max_tcp_conns_global_each {
info!(
"config reload: user_max_tcp_conns policy global_each={}",
new_hot.user_max_tcp_conns_global_each
);
}
if old_hot.user_expirations != new_hot.user_expirations {
info!(
"config reload: user_expirations updated ({} entries)",

View File

@@ -346,6 +346,12 @@ impl ProxyConfig {
));
}
if config.general.tg_connect == 0 {
return Err(ProxyError::Config(
"general.tg_connect must be > 0".to_string(),
));
}
if config.general.upstream_unhealthy_fail_threshold == 0 {
return Err(ProxyError::Config(
"general.upstream_unhealthy_fail_threshold must be > 0".to_string(),
@@ -1217,6 +1223,7 @@ mod tests {
default_me_init_retry_attempts()
);
assert_eq!(cfg.general.me2dc_fallback, default_me2dc_fallback());
assert_eq!(cfg.general.me2dc_fast, default_me2dc_fast());
assert_eq!(
cfg.general.proxy_config_v4_cache_path,
default_proxy_config_v4_cache_path()
@@ -1321,6 +1328,10 @@ mod tests {
default_api_runtime_edge_events_capacity()
);
assert_eq!(cfg.access.users, default_access_users());
assert_eq!(
cfg.access.user_max_tcp_conns_global_each,
default_user_max_tcp_conns_global_each()
);
assert_eq!(
cfg.access.user_max_unique_ips_mode,
UserMaxUniqueIpsMode::default()
@@ -1356,6 +1367,7 @@ mod tests {
default_me_init_retry_attempts()
);
assert_eq!(general.me2dc_fallback, default_me2dc_fallback());
assert_eq!(general.me2dc_fast, default_me2dc_fast());
assert_eq!(
general.proxy_config_v4_cache_path,
default_proxy_config_v4_cache_path()
@@ -1463,6 +1475,10 @@ mod tests {
let access = AccessConfig::default();
assert_eq!(access.users, default_access_users());
assert_eq!(
access.user_max_tcp_conns_global_each,
default_user_max_tcp_conns_global_each()
);
}
#[test]
@@ -1905,6 +1921,26 @@ mod tests {
let _ = std::fs::remove_file(path);
}
#[test]
fn tg_connect_zero_is_rejected() {
let toml = r#"
[general]
tg_connect = 0
[censorship]
tls_domain = "example.com"
[access.users]
user = "00000000000000000000000000000000"
"#;
let dir = std::env::temp_dir();
let path = dir.join("telemt_tg_connect_zero_test.toml");
std::fs::write(&path, toml).unwrap();
let err = ProxyConfig::load(&path).unwrap_err().to_string();
assert!(err.contains("general.tg_connect must be > 0"));
let _ = std::fs::remove_file(path);
}
#[test]
fn rpc_proxy_req_every_out_of_range_is_rejected() {
let toml = r#"

View File

@@ -17,6 +17,28 @@ fn remove_temp_config(path: &PathBuf) {
let _ = fs::remove_file(path);
}
#[test]
fn default_timeouts_enable_apple_compatible_handshake_profile() {
let cfg = ProxyConfig::default();
assert_eq!(cfg.timeouts.client_first_byte_idle_secs, 300);
assert_eq!(cfg.timeouts.client_handshake, 60);
}
#[test]
fn load_accepts_zero_first_byte_idle_timeout_as_legacy_opt_out() {
let path = write_temp_config(
r#"
[timeouts]
client_first_byte_idle_secs = 0
"#,
);
let cfg = ProxyConfig::load(&path).expect("config with zero first-byte idle timeout must load");
assert_eq!(cfg.timeouts.client_first_byte_idle_secs, 0);
remove_temp_config(&path);
}
#[test]
fn load_rejects_relay_hard_idle_smaller_than_soft_idle_with_clear_error() {
let path = write_temp_config(

View File

@@ -429,6 +429,11 @@ pub struct GeneralConfig {
#[serde(default = "default_me2dc_fallback")]
pub me2dc_fallback: bool,
/// Fast ME->Direct fallback mode for new sessions.
/// Active only when both `use_middle_proxy=true` and `me2dc_fallback=true`.
#[serde(default = "default_me2dc_fast")]
pub me2dc_fast: bool,
/// Enable ME keepalive padding frames.
#[serde(default = "default_true")]
pub me_keepalive_enabled: bool,
@@ -658,6 +663,10 @@ pub struct GeneralConfig {
#[serde(default = "default_upstream_connect_budget_ms")]
pub upstream_connect_budget_ms: u64,
/// Per-attempt TCP connect timeout to Telegram DC (seconds).
#[serde(default = "default_connect_timeout")]
pub tg_connect: u64,
/// Consecutive failed requests before upstream is marked unhealthy.
#[serde(default = "default_upstream_unhealthy_fail_threshold")]
pub upstream_unhealthy_fail_threshold: u32,
@@ -939,6 +948,7 @@ impl Default for GeneralConfig {
middle_proxy_warm_standby: default_middle_proxy_warm_standby(),
me_init_retry_attempts: default_me_init_retry_attempts(),
me2dc_fallback: default_me2dc_fallback(),
me2dc_fast: default_me2dc_fast(),
me_keepalive_enabled: default_true(),
me_keepalive_interval_secs: default_keepalive_interval(),
me_keepalive_jitter_secs: default_keepalive_jitter(),
@@ -1001,6 +1011,7 @@ impl Default for GeneralConfig {
upstream_connect_retry_attempts: default_upstream_connect_retry_attempts(),
upstream_connect_retry_backoff_ms: default_upstream_connect_retry_backoff_ms(),
upstream_connect_budget_ms: default_upstream_connect_budget_ms(),
tg_connect: default_connect_timeout(),
upstream_unhealthy_fail_threshold: default_upstream_unhealthy_fail_threshold(),
upstream_connect_failfast_hard_errors: default_upstream_connect_failfast_hard_errors(),
stun_iface_mismatch_ignore: false,
@@ -1266,6 +1277,11 @@ pub struct ServerConfig {
#[serde(default)]
pub listeners: Vec<ListenerConfig>,
/// TCP `listen(2)` backlog for client-facing sockets (also used for the metrics HTTP listener).
/// The effective queue is capped by the kernel (for example `somaxconn` on Linux).
#[serde(default = "default_listen_backlog")]
pub listen_backlog: u32,
/// Maximum number of concurrent client connections.
/// 0 means unlimited.
#[serde(default = "default_server_max_connections")]
@@ -1294,6 +1310,7 @@ impl Default for ServerConfig {
metrics_whitelist: default_metrics_whitelist(),
api: ApiConfig::default(),
listeners: Vec::new(),
listen_backlog: default_listen_backlog(),
max_connections: default_server_max_connections(),
accept_permit_timeout_ms: default_accept_permit_timeout_ms(),
}
@@ -1302,6 +1319,12 @@ impl Default for ServerConfig {
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TimeoutsConfig {
/// Maximum idle wait in seconds for the first client byte before handshake parsing starts.
/// `0` disables the separate idle phase and keeps legacy timeout behavior.
#[serde(default = "default_client_first_byte_idle_secs")]
pub client_first_byte_idle_secs: u64,
/// Maximum active handshake duration in seconds after the first client byte is received.
#[serde(default = "default_handshake_timeout")]
pub client_handshake: u64,
@@ -1323,9 +1346,6 @@ pub struct TimeoutsConfig {
#[serde(default = "default_relay_idle_grace_after_downstream_activity_secs")]
pub relay_idle_grace_after_downstream_activity_secs: u64,
#[serde(default = "default_connect_timeout")]
pub tg_connect: u64,
#[serde(default = "default_keepalive")]
pub client_keepalive: u64,
@@ -1344,13 +1364,13 @@ pub struct TimeoutsConfig {
impl Default for TimeoutsConfig {
fn default() -> Self {
Self {
client_first_byte_idle_secs: default_client_first_byte_idle_secs(),
client_handshake: default_handshake_timeout(),
relay_idle_policy_v2_enabled: default_relay_idle_policy_v2_enabled(),
relay_client_idle_soft_secs: default_relay_client_idle_soft_secs(),
relay_client_idle_hard_secs: default_relay_client_idle_hard_secs(),
relay_idle_grace_after_downstream_activity_secs:
default_relay_idle_grace_after_downstream_activity_secs(),
tg_connect: default_connect_timeout(),
client_keepalive: default_keepalive(),
client_ack: default_ack_timeout(),
me_one_retry: default_me_one_retry(),
@@ -1613,6 +1633,12 @@ pub struct AccessConfig {
#[serde(default)]
pub user_max_tcp_conns: HashMap<String, usize>,
/// Global per-user TCP connection limit applied when a user has no
/// positive individual override.
/// `0` disables the inherited limit.
#[serde(default = "default_user_max_tcp_conns_global_each")]
pub user_max_tcp_conns_global_each: usize,
#[serde(default)]
pub user_expirations: HashMap<String, DateTime<Utc>>,
@@ -1649,6 +1675,7 @@ impl Default for AccessConfig {
users: default_access_users(),
user_ad_tags: HashMap::new(),
user_max_tcp_conns: HashMap::new(),
user_max_tcp_conns_global_each: default_user_max_tcp_conns_global_each(),
user_expirations: HashMap::new(),
user_data_quota: HashMap::new(),
user_max_unique_ips: HashMap::new(),

541
src/daemon/mod.rs Normal file
View File

@@ -0,0 +1,541 @@
//! Unix daemon support for telemt.
//!
//! Provides classic Unix daemonization (double-fork), PID file management,
//! and privilege dropping for running telemt as a background service.
use std::fs::{self, File, OpenOptions};
use std::io::{self, Read, Write};
use std::os::unix::fs::OpenOptionsExt;
use std::path::{Path, PathBuf};
use nix::fcntl::{Flock, FlockArg};
use nix::unistd::{self, ForkResult, Gid, Pid, Uid, chdir, close, fork, getpid, setsid};
use tracing::{debug, info, warn};
/// Default PID file location.
pub const DEFAULT_PID_FILE: &str = "/var/run/telemt.pid";
/// Daemon configuration options parsed from CLI.
#[derive(Debug, Clone, Default)]
pub struct DaemonOptions {
/// Run as daemon (fork to background).
pub daemonize: bool,
/// Path to PID file.
pub pid_file: Option<PathBuf>,
/// User to run as after binding sockets.
pub user: Option<String>,
/// Group to run as after binding sockets.
pub group: Option<String>,
/// Working directory for the daemon.
pub working_dir: Option<PathBuf>,
/// Explicit foreground mode (for systemd Type=simple).
pub foreground: bool,
}
impl DaemonOptions {
/// Returns the effective PID file path.
pub fn pid_file_path(&self) -> &Path {
self.pid_file
.as_deref()
.unwrap_or(Path::new(DEFAULT_PID_FILE))
}
/// Returns true if we should actually daemonize.
/// Foreground flag takes precedence.
pub fn should_daemonize(&self) -> bool {
self.daemonize && !self.foreground
}
}
/// Error types for daemon operations.
#[derive(Debug, thiserror::Error)]
pub enum DaemonError {
#[error("fork failed: {0}")]
ForkFailed(#[source] nix::Error),
#[error("setsid failed: {0}")]
SetsidFailed(#[source] nix::Error),
#[error("chdir failed: {0}")]
ChdirFailed(#[source] nix::Error),
#[error("failed to open /dev/null: {0}")]
DevNullFailed(#[source] io::Error),
#[error("failed to redirect stdio: {0}")]
RedirectFailed(#[source] nix::Error),
#[error("PID file error: {0}")]
PidFile(String),
#[error("another instance is already running (pid {0})")]
AlreadyRunning(i32),
#[error("user '{0}' not found")]
UserNotFound(String),
#[error("group '{0}' not found")]
GroupNotFound(String),
#[error("failed to set uid/gid: {0}")]
PrivilegeDrop(#[source] nix::Error),
#[error("io error: {0}")]
Io(#[from] io::Error),
}
/// Result of a successful daemonize() call.
#[derive(Debug)]
pub enum DaemonizeResult {
/// We are the parent process and should exit.
Parent,
/// We are the daemon child process and should continue.
Child,
}
/// Performs classic Unix double-fork daemonization.
///
/// This detaches the process from the controlling terminal:
/// 1. First fork - parent exits, child continues
/// 2. setsid() - become session leader
/// 3. Second fork - ensure we can never acquire a controlling terminal
/// 4. chdir("/") - don't hold any directory open
/// 5. Redirect stdin/stdout/stderr to /dev/null
///
/// Returns `DaemonizeResult::Parent` in the original parent (which should exit),
/// or `DaemonizeResult::Child` in the final daemon child.
pub fn daemonize(working_dir: Option<&Path>) -> Result<DaemonizeResult, DaemonError> {
// First fork
match unsafe { fork() } {
Ok(ForkResult::Parent { .. }) => {
// Parent exits
return Ok(DaemonizeResult::Parent);
}
Ok(ForkResult::Child) => {
// Child continues
}
Err(e) => return Err(DaemonError::ForkFailed(e)),
}
// Create new session, become session leader
setsid().map_err(DaemonError::SetsidFailed)?;
// Second fork to ensure we can never acquire a controlling terminal
match unsafe { fork() } {
Ok(ForkResult::Parent { .. }) => {
// Intermediate parent exits
std::process::exit(0);
}
Ok(ForkResult::Child) => {
// Final daemon child continues
}
Err(e) => return Err(DaemonError::ForkFailed(e)),
}
// Change working directory
let target_dir = working_dir.unwrap_or(Path::new("/"));
chdir(target_dir).map_err(DaemonError::ChdirFailed)?;
// Redirect stdin, stdout, stderr to /dev/null
redirect_stdio_to_devnull()?;
Ok(DaemonizeResult::Child)
}
/// Redirects stdin, stdout, and stderr to /dev/null.
fn redirect_stdio_to_devnull() -> Result<(), DaemonError> {
let devnull = File::options()
.read(true)
.write(true)
.open("/dev/null")
.map_err(DaemonError::DevNullFailed)?;
let devnull_fd = std::os::unix::io::AsRawFd::as_raw_fd(&devnull);
// Use libc::dup2 directly for redirecting standard file descriptors
// nix 0.31's dup2 requires OwnedFd which doesn't work well with stdio fds
unsafe {
// Redirect stdin (fd 0)
if libc::dup2(devnull_fd, 0) < 0 {
return Err(DaemonError::RedirectFailed(nix::errno::Errno::last()));
}
// Redirect stdout (fd 1)
if libc::dup2(devnull_fd, 1) < 0 {
return Err(DaemonError::RedirectFailed(nix::errno::Errno::last()));
}
// Redirect stderr (fd 2)
if libc::dup2(devnull_fd, 2) < 0 {
return Err(DaemonError::RedirectFailed(nix::errno::Errno::last()));
}
}
// Close original devnull fd if it's not one of the standard fds
if devnull_fd > 2 {
let _ = close(devnull_fd);
}
Ok(())
}
/// PID file manager with flock-based locking.
pub struct PidFile {
path: PathBuf,
file: Option<File>,
locked: bool,
}
impl PidFile {
/// Creates a new PID file manager for the given path.
pub fn new<P: AsRef<Path>>(path: P) -> Self {
Self {
path: path.as_ref().to_path_buf(),
file: None,
locked: false,
}
}
/// Checks if another instance is already running.
///
/// Returns the PID of the running instance if one exists.
pub fn check_running(&self) -> Result<Option<i32>, DaemonError> {
if !self.path.exists() {
return Ok(None);
}
// Try to read existing PID
let mut contents = String::new();
File::open(&self.path)
.and_then(|mut f| f.read_to_string(&mut contents))
.map_err(|e| {
DaemonError::PidFile(format!("cannot read {}: {}", self.path.display(), e))
})?;
let pid: i32 = contents
.trim()
.parse()
.map_err(|_| DaemonError::PidFile(format!("invalid PID in {}", self.path.display())))?;
// Check if process is still running
if is_process_running(pid) {
Ok(Some(pid))
} else {
// Stale PID file
debug!(pid, path = %self.path.display(), "Removing stale PID file");
let _ = fs::remove_file(&self.path);
Ok(None)
}
}
/// Acquires the PID file lock and writes the current PID.
///
/// Fails if another instance is already running.
pub fn acquire(&mut self) -> Result<(), DaemonError> {
// Check for running instance first
if let Some(pid) = self.check_running()? {
return Err(DaemonError::AlreadyRunning(pid));
}
// Ensure parent directory exists
if let Some(parent) = self.path.parent() {
if !parent.exists() {
fs::create_dir_all(parent).map_err(|e| {
DaemonError::PidFile(format!(
"cannot create directory {}: {}",
parent.display(),
e
))
})?;
}
}
// Open/create PID file with exclusive lock
let file = OpenOptions::new()
.write(true)
.create(true)
.truncate(true)
.mode(0o644)
.open(&self.path)
.map_err(|e| {
DaemonError::PidFile(format!("cannot open {}: {}", self.path.display(), e))
})?;
// Try to acquire exclusive lock (non-blocking)
let flock = Flock::lock(file, FlockArg::LockExclusiveNonblock).map_err(|(_, errno)| {
// Check if another instance grabbed the lock
if let Some(pid) = self.check_running().ok().flatten() {
DaemonError::AlreadyRunning(pid)
} else {
DaemonError::PidFile(format!("cannot lock {}: {}", self.path.display(), errno))
}
})?;
// Write our PID
let pid = getpid();
let mut file = flock
.unlock()
.map_err(|(_, errno)| DaemonError::PidFile(format!("unlock failed: {}", errno)))?;
writeln!(file, "{}", pid).map_err(|e| {
DaemonError::PidFile(format!(
"cannot write PID to {}: {}",
self.path.display(),
e
))
})?;
// Re-acquire lock and keep it
let flock = Flock::lock(file, FlockArg::LockExclusiveNonblock).map_err(|(_, errno)| {
DaemonError::PidFile(format!("cannot re-lock {}: {}", self.path.display(), errno))
})?;
self.file = Some(flock.unlock().map_err(|(_, errno)| {
DaemonError::PidFile(format!("unlock for storage failed: {}", errno))
})?);
self.locked = true;
info!(pid = pid.as_raw(), path = %self.path.display(), "PID file created");
Ok(())
}
/// Releases the PID file lock and removes the file.
pub fn release(&mut self) -> Result<(), DaemonError> {
if let Some(file) = self.file.take() {
drop(file);
}
self.locked = false;
if self.path.exists() {
fs::remove_file(&self.path).map_err(|e| {
DaemonError::PidFile(format!("cannot remove {}: {}", self.path.display(), e))
})?;
debug!(path = %self.path.display(), "PID file removed");
}
Ok(())
}
/// Returns the path to this PID file.
#[allow(dead_code)]
pub fn path(&self) -> &Path {
&self.path
}
}
impl Drop for PidFile {
fn drop(&mut self) {
if self.locked {
if let Err(e) = self.release() {
warn!(error = %e, "Failed to clean up PID file on drop");
}
}
}
}
/// Checks if a process with the given PID is running.
fn is_process_running(pid: i32) -> bool {
// kill(pid, 0) checks if process exists without sending a signal
nix::sys::signal::kill(Pid::from_raw(pid), None).is_ok()
}
/// Drops privileges to the specified user and group.
///
/// This should be called after binding privileged ports but before
/// entering the main event loop.
pub fn drop_privileges(user: Option<&str>, group: Option<&str>) -> Result<(), DaemonError> {
// Look up group first (need to do this while still root)
let target_gid = if let Some(group_name) = group {
Some(lookup_group(group_name)?)
} else if let Some(user_name) = user {
// If no group specified but user is, use user's primary group
Some(lookup_user_primary_gid(user_name)?)
} else {
None
};
// Look up user
let target_uid = if let Some(user_name) = user {
Some(lookup_user(user_name)?)
} else {
None
};
// Drop privileges: set GID first, then UID
// (Setting UID first would prevent us from setting GID)
if let Some(gid) = target_gid {
unistd::setgid(gid).map_err(DaemonError::PrivilegeDrop)?;
// Also set supplementary groups to just this one
unistd::setgroups(&[gid]).map_err(DaemonError::PrivilegeDrop)?;
info!(gid = gid.as_raw(), "Dropped group privileges");
}
if let Some(uid) = target_uid {
unistd::setuid(uid).map_err(DaemonError::PrivilegeDrop)?;
info!(uid = uid.as_raw(), "Dropped user privileges");
}
Ok(())
}
/// Looks up a user by name and returns their UID.
fn lookup_user(name: &str) -> Result<Uid, DaemonError> {
// Use libc getpwnam
let c_name =
std::ffi::CString::new(name).map_err(|_| DaemonError::UserNotFound(name.to_string()))?;
unsafe {
let pwd = libc::getpwnam(c_name.as_ptr());
if pwd.is_null() {
Err(DaemonError::UserNotFound(name.to_string()))
} else {
Ok(Uid::from_raw((*pwd).pw_uid))
}
}
}
/// Looks up a user's primary GID by username.
fn lookup_user_primary_gid(name: &str) -> Result<Gid, DaemonError> {
let c_name =
std::ffi::CString::new(name).map_err(|_| DaemonError::UserNotFound(name.to_string()))?;
unsafe {
let pwd = libc::getpwnam(c_name.as_ptr());
if pwd.is_null() {
Err(DaemonError::UserNotFound(name.to_string()))
} else {
Ok(Gid::from_raw((*pwd).pw_gid))
}
}
}
/// Looks up a group by name and returns its GID.
fn lookup_group(name: &str) -> Result<Gid, DaemonError> {
let c_name =
std::ffi::CString::new(name).map_err(|_| DaemonError::GroupNotFound(name.to_string()))?;
unsafe {
let grp = libc::getgrnam(c_name.as_ptr());
if grp.is_null() {
Err(DaemonError::GroupNotFound(name.to_string()))
} else {
Ok(Gid::from_raw((*grp).gr_gid))
}
}
}
/// Reads PID from a PID file.
#[allow(dead_code)]
pub fn read_pid_file<P: AsRef<Path>>(path: P) -> Result<i32, DaemonError> {
let path = path.as_ref();
let mut contents = String::new();
File::open(path)
.and_then(|mut f| f.read_to_string(&mut contents))
.map_err(|e| DaemonError::PidFile(format!("cannot read {}: {}", path.display(), e)))?;
contents
.trim()
.parse()
.map_err(|_| DaemonError::PidFile(format!("invalid PID in {}", path.display())))
}
/// Sends a signal to the process specified in a PID file.
#[allow(dead_code)]
pub fn signal_pid_file<P: AsRef<Path>>(
path: P,
signal: nix::sys::signal::Signal,
) -> Result<(), DaemonError> {
let pid = read_pid_file(&path)?;
if !is_process_running(pid) {
return Err(DaemonError::PidFile(format!(
"process {} from {} is not running",
pid,
path.as_ref().display()
)));
}
nix::sys::signal::kill(Pid::from_raw(pid), signal)
.map_err(|e| DaemonError::PidFile(format!("cannot signal process {}: {}", pid, e)))?;
Ok(())
}
/// Returns the status of the daemon based on PID file.
#[allow(dead_code)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DaemonStatus {
/// Daemon is running with the given PID.
Running(i32),
/// PID file exists but process is not running.
Stale(i32),
/// No PID file exists.
NotRunning,
}
/// Checks the daemon status from a PID file.
#[allow(dead_code)]
pub fn check_status<P: AsRef<Path>>(path: P) -> DaemonStatus {
let path = path.as_ref();
if !path.exists() {
return DaemonStatus::NotRunning;
}
match read_pid_file(path) {
Ok(pid) => {
if is_process_running(pid) {
DaemonStatus::Running(pid)
} else {
DaemonStatus::Stale(pid)
}
}
Err(_) => DaemonStatus::NotRunning,
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_daemon_options_default() {
let opts = DaemonOptions::default();
assert!(!opts.daemonize);
assert!(!opts.should_daemonize());
assert_eq!(opts.pid_file_path(), Path::new(DEFAULT_PID_FILE));
}
#[test]
fn test_daemon_options_foreground_overrides() {
let opts = DaemonOptions {
daemonize: true,
foreground: true,
..Default::default()
};
assert!(!opts.should_daemonize());
}
#[test]
fn test_check_status_not_running() {
let path = "/tmp/telemt_test_nonexistent.pid";
assert_eq!(check_status(path), DaemonStatus::NotRunning);
}
#[test]
fn test_pid_file_basic() {
let path = "/tmp/telemt_test_pidfile.pid";
let _ = fs::remove_file(path);
let mut pf = PidFile::new(path);
assert!(pf.check_running().unwrap().is_none());
pf.acquire().unwrap();
assert!(Path::new(path).exists());
// Read it back
let pid = read_pid_file(path).unwrap();
assert_eq!(pid, std::process::id() as i32);
pf.release().unwrap();
assert!(!Path::new(path).exists());
}
}

305
src/logging.rs Normal file
View File

@@ -0,0 +1,305 @@
//! Logging configuration for telemt.
//!
//! Supports multiple log destinations:
//! - stderr (default, works with systemd journald)
//! - syslog (Unix only, for traditional init systems)
//! - file (with optional rotation)
#![allow(dead_code)] // Infrastructure module - used via CLI flags
use std::path::Path;
use tracing_subscriber::layer::SubscriberExt;
use tracing_subscriber::util::SubscriberInitExt;
use tracing_subscriber::{EnvFilter, fmt, reload};
/// Log destination configuration.
#[derive(Debug, Clone, Default)]
pub enum LogDestination {
/// Log to stderr (default, captured by systemd journald).
#[default]
Stderr,
/// Log to syslog (Unix only).
#[cfg(unix)]
Syslog,
/// Log to a file with optional rotation.
File {
path: String,
/// Rotate daily if true.
rotate_daily: bool,
},
}
/// Logging options parsed from CLI/config.
#[derive(Debug, Clone, Default)]
pub struct LoggingOptions {
/// Where to send logs.
pub destination: LogDestination,
/// Disable ANSI colors.
pub disable_colors: bool,
}
/// Guard that must be held to keep file logging active.
/// When dropped, flushes and closes log files.
pub struct LoggingGuard {
_guard: Option<tracing_appender::non_blocking::WorkerGuard>,
}
impl LoggingGuard {
fn new(guard: Option<tracing_appender::non_blocking::WorkerGuard>) -> Self {
Self { _guard: guard }
}
/// Creates a no-op guard for stderr/syslog logging.
pub fn noop() -> Self {
Self { _guard: None }
}
}
/// Initialize the tracing subscriber with the specified options.
///
/// Returns a reload handle for dynamic log level changes and a guard
/// that must be kept alive for file logging.
pub fn init_logging(
opts: &LoggingOptions,
initial_filter: &str,
) -> (
reload::Handle<EnvFilter, impl tracing::Subscriber + Send + Sync>,
LoggingGuard,
) {
let (filter_layer, filter_handle) = reload::Layer::new(EnvFilter::new(initial_filter));
match &opts.destination {
LogDestination::Stderr => {
let fmt_layer = fmt::Layer::default()
.with_ansi(!opts.disable_colors)
.with_target(true);
tracing_subscriber::registry()
.with(filter_layer)
.with(fmt_layer)
.init();
(filter_handle, LoggingGuard::noop())
}
#[cfg(unix)]
LogDestination::Syslog => {
// Use a custom fmt layer that writes to syslog
let fmt_layer = fmt::Layer::default()
.with_ansi(false)
.with_target(true)
.with_writer(SyslogWriter::new);
tracing_subscriber::registry()
.with(filter_layer)
.with(fmt_layer)
.init();
(filter_handle, LoggingGuard::noop())
}
LogDestination::File { path, rotate_daily } => {
let (non_blocking, guard) = if *rotate_daily {
// Extract directory and filename prefix
let path = Path::new(path);
let dir = path.parent().unwrap_or(Path::new("/var/log"));
let prefix = path
.file_name()
.and_then(|s| s.to_str())
.unwrap_or("telemt");
let file_appender = tracing_appender::rolling::daily(dir, prefix);
tracing_appender::non_blocking(file_appender)
} else {
let file = std::fs::OpenOptions::new()
.create(true)
.append(true)
.open(path)
.expect("Failed to open log file");
tracing_appender::non_blocking(file)
};
let fmt_layer = fmt::Layer::default()
.with_ansi(false)
.with_target(true)
.with_writer(non_blocking);
tracing_subscriber::registry()
.with(filter_layer)
.with(fmt_layer)
.init();
(filter_handle, LoggingGuard::new(Some(guard)))
}
}
}
/// Syslog writer for tracing.
#[cfg(unix)]
struct SyslogWriter {
_private: (),
}
#[cfg(unix)]
impl SyslogWriter {
fn new() -> Self {
// Open syslog connection on first use
static INIT: std::sync::Once = std::sync::Once::new();
INIT.call_once(|| {
unsafe {
// Open syslog with ident "telemt", LOG_PID, LOG_DAEMON facility
let ident = b"telemt\0".as_ptr() as *const libc::c_char;
libc::openlog(ident, libc::LOG_PID | libc::LOG_NDELAY, libc::LOG_DAEMON);
}
});
Self { _private: () }
}
}
#[cfg(unix)]
impl std::io::Write for SyslogWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
// Convert to C string, stripping newlines
let msg = String::from_utf8_lossy(buf);
let msg = msg.trim_end();
if msg.is_empty() {
return Ok(buf.len());
}
// Determine priority based on log level in the message
let priority = if msg.contains(" ERROR ") || msg.contains(" error ") {
libc::LOG_ERR
} else if msg.contains(" WARN ") || msg.contains(" warn ") {
libc::LOG_WARNING
} else if msg.contains(" INFO ") || msg.contains(" info ") {
libc::LOG_INFO
} else if msg.contains(" DEBUG ") || msg.contains(" debug ") {
libc::LOG_DEBUG
} else {
libc::LOG_INFO
};
// Write to syslog
let c_msg = std::ffi::CString::new(msg.as_bytes())
.unwrap_or_else(|_| std::ffi::CString::new("(invalid utf8)").unwrap());
unsafe {
libc::syslog(
priority,
b"%s\0".as_ptr() as *const libc::c_char,
c_msg.as_ptr(),
);
}
Ok(buf.len())
}
fn flush(&mut self) -> std::io::Result<()> {
Ok(())
}
}
#[cfg(unix)]
impl<'a> tracing_subscriber::fmt::MakeWriter<'a> for SyslogWriter {
type Writer = SyslogWriter;
fn make_writer(&'a self) -> Self::Writer {
SyslogWriter::new()
}
}
/// Parse log destination from CLI arguments.
pub fn parse_log_destination(args: &[String]) -> LogDestination {
let mut i = 0;
while i < args.len() {
match args[i].as_str() {
#[cfg(unix)]
"--syslog" => {
return LogDestination::Syslog;
}
"--log-file" => {
i += 1;
if i < args.len() {
return LogDestination::File {
path: args[i].clone(),
rotate_daily: false,
};
}
}
s if s.starts_with("--log-file=") => {
return LogDestination::File {
path: s.trim_start_matches("--log-file=").to_string(),
rotate_daily: false,
};
}
"--log-file-daily" => {
i += 1;
if i < args.len() {
return LogDestination::File {
path: args[i].clone(),
rotate_daily: true,
};
}
}
s if s.starts_with("--log-file-daily=") => {
return LogDestination::File {
path: s.trim_start_matches("--log-file-daily=").to_string(),
rotate_daily: true,
};
}
_ => {}
}
i += 1;
}
LogDestination::Stderr
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_log_destination_default() {
let args: Vec<String> = vec![];
assert!(matches!(
parse_log_destination(&args),
LogDestination::Stderr
));
}
#[test]
fn test_parse_log_destination_file() {
let args = vec!["--log-file".to_string(), "/var/log/telemt.log".to_string()];
match parse_log_destination(&args) {
LogDestination::File { path, rotate_daily } => {
assert_eq!(path, "/var/log/telemt.log");
assert!(!rotate_daily);
}
_ => panic!("Expected File destination"),
}
}
#[test]
fn test_parse_log_destination_file_daily() {
let args = vec!["--log-file-daily=/var/log/telemt".to_string()];
match parse_log_destination(&args) {
LogDestination::File { path, rotate_daily } => {
assert_eq!(path, "/var/log/telemt");
assert!(rotate_daily);
}
_ => panic!("Expected File destination"),
}
}
#[cfg(unix)]
#[test]
fn test_parse_log_destination_syslog() {
let args = vec!["--syslog".to_string()];
assert!(matches!(
parse_log_destination(&args),
LogDestination::Syslog
));
}
}

View File

@@ -21,10 +21,29 @@ pub(crate) async fn configure_admission_gate(
if config.general.use_middle_proxy {
if let Some(pool) = me_pool.as_ref() {
let initial_ready = pool.admission_ready_conditional_cast().await;
admission_tx.send_replace(initial_ready);
let _ = route_runtime.set_mode(RelayRouteMode::Middle);
let mut fallback_enabled = config.general.me2dc_fallback;
let mut fast_fallback_enabled = fallback_enabled && config.general.me2dc_fast;
let (initial_gate_open, initial_route_mode, initial_fallback_reason) = if initial_ready
{
(true, RelayRouteMode::Middle, None)
} else if fast_fallback_enabled {
(
true,
RelayRouteMode::Direct,
Some("fast_not_ready_fallback"),
)
} else {
(false, RelayRouteMode::Middle, None)
};
admission_tx.send_replace(initial_gate_open);
let _ = route_runtime.set_mode(initial_route_mode);
if initial_ready {
info!("Conditional-admission gate: open / ME pool READY");
} else if let Some(reason) = initial_fallback_reason {
warn!(
fallback_reason = reason,
"Conditional-admission gate opened in ME fast fallback mode"
);
} else {
warn!("Conditional-admission gate: closed / ME pool is NOT ready)");
}
@@ -34,10 +53,9 @@ pub(crate) async fn configure_admission_gate(
let route_runtime_gate = route_runtime.clone();
let mut config_rx_gate = config_rx.clone();
let mut admission_poll_ms = config.general.me_admission_poll_ms.max(1);
let mut fallback_enabled = config.general.me2dc_fallback;
tokio::spawn(async move {
let mut gate_open = initial_ready;
let mut route_mode = RelayRouteMode::Middle;
let mut gate_open = initial_gate_open;
let mut route_mode = initial_route_mode;
let mut ready_observed = initial_ready;
let mut not_ready_since = if initial_ready {
None
@@ -53,16 +71,23 @@ pub(crate) async fn configure_admission_gate(
let cfg = config_rx_gate.borrow_and_update().clone();
admission_poll_ms = cfg.general.me_admission_poll_ms.max(1);
fallback_enabled = cfg.general.me2dc_fallback;
fast_fallback_enabled = cfg.general.me2dc_fallback && cfg.general.me2dc_fast;
continue;
}
_ = tokio::time::sleep(Duration::from_millis(admission_poll_ms)) => {}
}
let ready = pool_for_gate.admission_ready_conditional_cast().await;
let now = Instant::now();
let (next_gate_open, next_route_mode, next_fallback_active) = if ready {
let (next_gate_open, next_route_mode, next_fallback_reason) = if ready {
ready_observed = true;
not_ready_since = None;
(true, RelayRouteMode::Middle, false)
(true, RelayRouteMode::Middle, None)
} else if fast_fallback_enabled {
(
true,
RelayRouteMode::Direct,
Some("fast_not_ready_fallback"),
)
} else {
let not_ready_started_at = *not_ready_since.get_or_insert(now);
let not_ready_for = now.saturating_duration_since(not_ready_started_at);
@@ -72,11 +97,12 @@ pub(crate) async fn configure_admission_gate(
STARTUP_FALLBACK_AFTER
};
if fallback_enabled && not_ready_for > fallback_after {
(true, RelayRouteMode::Direct, true)
(true, RelayRouteMode::Direct, Some("strict_grace_fallback"))
} else {
(false, RelayRouteMode::Middle, false)
(false, RelayRouteMode::Middle, None)
}
};
let next_fallback_active = next_fallback_reason.is_some();
if next_route_mode != route_mode {
route_mode = next_route_mode;
@@ -88,17 +114,28 @@ pub(crate) async fn configure_admission_gate(
"Middle-End routing restored for new sessions"
);
} else {
let fallback_after = if ready_observed {
RUNTIME_FALLBACK_AFTER
let fallback_reason = next_fallback_reason.unwrap_or("unknown");
if fallback_reason == "strict_grace_fallback" {
let fallback_after = if ready_observed {
RUNTIME_FALLBACK_AFTER
} else {
STARTUP_FALLBACK_AFTER
};
warn!(
target_mode = route_mode.as_str(),
cutover_generation = snapshot.generation,
grace_secs = fallback_after.as_secs(),
fallback_reason,
"ME pool stayed not-ready beyond grace; routing new sessions via Direct-DC"
);
} else {
STARTUP_FALLBACK_AFTER
};
warn!(
target_mode = route_mode.as_str(),
cutover_generation = snapshot.generation,
grace_secs = fallback_after.as_secs(),
"ME pool stayed not-ready beyond grace; routing new sessions via Direct-DC"
);
warn!(
target_mode = route_mode.as_str(),
cutover_generation = snapshot.generation,
fallback_reason,
"ME pool not-ready; routing new sessions via Direct-DC (fast mode)"
);
}
}
}
}
@@ -108,7 +145,10 @@ pub(crate) async fn configure_admission_gate(
admission_tx_gate.send_replace(gate_open);
if gate_open {
if next_fallback_active {
warn!("Conditional-admission gate opened in ME fallback mode");
warn!(
fallback_reason = next_fallback_reason.unwrap_or("unknown"),
"Conditional-admission gate opened in ME fallback mode"
);
} else {
info!("Conditional-admission gate opened / ME pool READY");
}

View File

@@ -8,6 +8,7 @@ use tracing::{debug, error, info, warn};
use crate::cli;
use crate::config::ProxyConfig;
use crate::logging::LogDestination;
use crate::transport::UpstreamManager;
use crate::transport::middle_proxy::{
ProxyConfigData, fetch_proxy_config_with_raw_via_upstream, load_proxy_config_cache,
@@ -27,7 +28,16 @@ pub(crate) fn resolve_runtime_config_path(
absolute.canonicalize().unwrap_or(absolute)
}
pub(crate) fn parse_cli() -> (String, Option<PathBuf>, bool, Option<String>) {
/// Parsed CLI arguments.
pub(crate) struct CliArgs {
pub config_path: String,
pub data_path: Option<PathBuf>,
pub silent: bool,
pub log_level: Option<String>,
pub log_destination: LogDestination,
}
pub(crate) fn parse_cli() -> CliArgs {
let mut config_path = "config.toml".to_string();
let mut data_path: Option<PathBuf> = None;
let mut silent = false;
@@ -35,6 +45,9 @@ pub(crate) fn parse_cli() -> (String, Option<PathBuf>, bool, Option<String>) {
let args: Vec<String> = std::env::args().skip(1).collect();
// Parse log destination
let log_destination = crate::logging::parse_log_destination(&args);
// Check for --init first (handled before tokio)
if let Some(init_opts) = cli::parse_init_args(&args) {
if let Err(e) = cli::run_init(init_opts) {
@@ -74,36 +87,35 @@ pub(crate) fn parse_cli() -> (String, Option<PathBuf>, bool, Option<String>) {
log_level = Some(s.trim_start_matches("--log-level=").to_string());
}
"--help" | "-h" => {
eprintln!("Usage: telemt [config.toml] [OPTIONS]");
eprintln!();
eprintln!("Options:");
eprintln!(
" --data-path <DIR> Set data directory (absolute path; overrides config value)"
);
eprintln!(" --silent, -s Suppress info logs");
eprintln!(" --log-level <LEVEL> debug|verbose|normal|silent");
eprintln!(" --help, -h Show this help");
eprintln!();
eprintln!("Setup (fire-and-forget):");
eprintln!(
" --init Generate config, install systemd service, start"
);
eprintln!(" --port <PORT> Listen port (default: 443)");
eprintln!(
" --domain <DOMAIN> TLS domain for masking (default: www.google.com)"
);
eprintln!(
" --secret <HEX> 32-char hex secret (auto-generated if omitted)"
);
eprintln!(" --user <NAME> Username (default: user)");
eprintln!(" --config-dir <DIR> Config directory (default: /etc/telemt)");
eprintln!(" --no-start Don't start the service after install");
print_help();
std::process::exit(0);
}
"--version" | "-V" => {
println!("telemt {}", env!("CARGO_PKG_VERSION"));
std::process::exit(0);
}
// Skip daemon-related flags (already parsed)
"--daemon" | "-d" | "--foreground" | "-f" => {}
s if s.starts_with("--pid-file") => {
if !s.contains('=') {
i += 1; // skip value
}
}
s if s.starts_with("--run-as-user") => {
if !s.contains('=') {
i += 1;
}
}
s if s.starts_with("--run-as-group") => {
if !s.contains('=') {
i += 1;
}
}
s if s.starts_with("--working-dir") => {
if !s.contains('=') {
i += 1;
}
}
s if !s.starts_with('-') => {
config_path = s.to_string();
}
@@ -114,7 +126,73 @@ pub(crate) fn parse_cli() -> (String, Option<PathBuf>, bool, Option<String>) {
i += 1;
}
(config_path, data_path, silent, log_level)
CliArgs {
config_path,
data_path,
silent,
log_level,
log_destination,
}
}
fn print_help() {
eprintln!("Usage: telemt [COMMAND] [OPTIONS] [config.toml]");
eprintln!();
eprintln!("Commands:");
eprintln!(" run Run in foreground (default if no command given)");
#[cfg(unix)]
{
eprintln!(" start Start as background daemon");
eprintln!(" stop Stop a running daemon");
eprintln!(" reload Reload configuration (send SIGHUP)");
eprintln!(" status Check if daemon is running");
}
eprintln!();
eprintln!("Options:");
eprintln!(
" --data-path <DIR> Set data directory (absolute path; overrides config value)"
);
eprintln!(" --silent, -s Suppress info logs");
eprintln!(" --log-level <LEVEL> debug|verbose|normal|silent");
eprintln!(" --help, -h Show this help");
eprintln!(" --version, -V Show version");
eprintln!();
eprintln!("Logging options:");
eprintln!(" --log-file <PATH> Log to file (default: stderr)");
eprintln!(" --log-file-daily <PATH> Log to file with daily rotation");
#[cfg(unix)]
eprintln!(" --syslog Log to syslog (Unix only)");
eprintln!();
#[cfg(unix)]
{
eprintln!("Daemon options (Unix only):");
eprintln!(" --daemon, -d Fork to background (daemonize)");
eprintln!(" --foreground, -f Explicit foreground mode (for systemd)");
eprintln!(" --pid-file <PATH> PID file path (default: /var/run/telemt.pid)");
eprintln!(" --run-as-user <USER> Drop privileges to this user after binding");
eprintln!(" --run-as-group <GROUP> Drop privileges to this group after binding");
eprintln!(" --working-dir <DIR> Working directory for daemon mode");
eprintln!();
}
eprintln!("Setup (fire-and-forget):");
eprintln!(" --init Generate config, install systemd service, start");
eprintln!(" --port <PORT> Listen port (default: 443)");
eprintln!(" --domain <DOMAIN> TLS domain for masking (default: www.google.com)");
eprintln!(" --secret <HEX> 32-char hex secret (auto-generated if omitted)");
eprintln!(" --user <NAME> Username (default: user)");
eprintln!(" --config-dir <DIR> Config directory (default: /etc/telemt)");
eprintln!(" --no-start Don't start the service after install");
#[cfg(unix)]
{
eprintln!();
eprintln!("Examples:");
eprintln!(" telemt config.toml Run in foreground");
eprintln!(" telemt start config.toml Start as daemon");
eprintln!(" telemt start --pid-file /tmp/t.pid Start with custom PID file");
eprintln!(" telemt stop Stop daemon");
eprintln!(" telemt reload Reload configuration");
eprintln!(" telemt status Check daemon status");
}
}
#[cfg(test)]

View File

@@ -72,6 +72,7 @@ pub(crate) async fn bind_listeners(
let options = ListenOptions {
reuse_port: listener_conf.reuse_allow,
ipv6_only: listener_conf.ip.is_ipv6(),
backlog: config.server.listen_backlog,
..Default::default()
};

View File

@@ -277,6 +277,8 @@ pub(crate) async fn initialize_me_pool(
config.general.me_warn_rate_limit_ms,
config.general.me_route_no_writer_mode,
config.general.me_route_no_writer_wait_ms,
config.general.me_route_hybrid_max_wait_ms,
config.general.me_route_blocking_send_timeout_ms,
config.general.me_route_inline_recovery_attempts,
config.general.me_route_inline_recovery_wait_ms,
);

View File

@@ -47,8 +47,55 @@ use crate::transport::UpstreamManager;
use crate::transport::middle_proxy::MePool;
use helpers::{parse_cli, resolve_runtime_config_path};
#[cfg(unix)]
use crate::daemon::{DaemonOptions, PidFile, drop_privileges};
/// Runs the full telemt runtime startup pipeline and blocks until shutdown.
///
/// On Unix, daemon options should be handled before calling this function
/// (daemonization must happen before tokio runtime starts).
#[cfg(unix)]
pub async fn run_with_daemon(
daemon_opts: DaemonOptions,
) -> std::result::Result<(), Box<dyn std::error::Error>> {
run_inner(daemon_opts).await
}
/// Runs the full telemt runtime startup pipeline and blocks until shutdown.
///
/// This is the main entry point for non-daemon mode or when called as a library.
#[allow(dead_code)]
pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
#[cfg(unix)]
{
// Parse CLI to get daemon options even in simple run() path
let args: Vec<String> = std::env::args().skip(1).collect();
let daemon_opts = crate::cli::parse_daemon_args(&args);
run_inner(daemon_opts).await
}
#[cfg(not(unix))]
{
run_inner().await
}
}
#[cfg(unix)]
async fn run_inner(
daemon_opts: DaemonOptions,
) -> std::result::Result<(), Box<dyn std::error::Error>> {
// Acquire PID file if daemonizing or if explicitly requested
// Keep it alive until shutdown (underscore prefix = intentionally kept for RAII cleanup)
let _pid_file = if daemon_opts.daemonize || daemon_opts.pid_file.is_some() {
let mut pf = PidFile::new(daemon_opts.pid_file_path());
if let Err(e) = pf.acquire() {
eprintln!("[telemt] {}", e);
std::process::exit(1);
}
Some(pf)
} else {
None
};
let process_started_at = Instant::now();
let process_started_at_epoch_secs = SystemTime::now()
.duration_since(UNIX_EPOCH)
@@ -61,7 +108,12 @@ pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
Some("load and validate config".to_string()),
)
.await;
let (config_path_cli, data_path, cli_silent, cli_log_level) = parse_cli();
let cli_args = parse_cli();
let config_path_cli = cli_args.config_path;
let data_path = cli_args.data_path;
let cli_silent = cli_args.silent;
let cli_log_level = cli_args.log_level;
let log_destination = cli_args.log_destination;
let startup_cwd = match std::env::current_dir() {
Ok(cwd) => cwd,
Err(e) => {
@@ -115,15 +167,13 @@ pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
);
std::process::exit(1);
}
} else {
if let Err(e) = std::fs::create_dir_all(data_path) {
eprintln!(
"[telemt] Can't create data_path {}: {}",
data_path.display(),
e
);
std::process::exit(1);
}
} else if let Err(e) = std::fs::create_dir_all(data_path) {
eprintln!(
"[telemt] Can't create data_path {}: {}",
data_path.display(),
e
);
std::process::exit(1);
}
if let Err(e) = std::env::set_current_dir(data_path) {
@@ -161,17 +211,43 @@ pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
)
.await;
// Configure color output based on config
let fmt_layer = if config.general.disable_colors {
fmt::Layer::default().with_ansi(false)
} else {
fmt::Layer::default().with_ansi(true)
};
// Initialize logging based on destination
let _logging_guard: Option<crate::logging::LoggingGuard>;
match log_destination {
crate::logging::LogDestination::Stderr => {
// Default: log to stderr (works with systemd journald)
let fmt_layer = if config.general.disable_colors {
fmt::Layer::default().with_ansi(false)
} else {
fmt::Layer::default().with_ansi(true)
};
tracing_subscriber::registry()
.with(filter_layer)
.with(fmt_layer)
.init();
_logging_guard = None;
}
#[cfg(unix)]
crate::logging::LogDestination::Syslog => {
// Syslog: for OpenRC/FreeBSD
let logging_opts = crate::logging::LoggingOptions {
destination: log_destination,
disable_colors: true,
};
let (_, guard) = crate::logging::init_logging(&logging_opts, "info");
_logging_guard = Some(guard);
}
crate::logging::LogDestination::File { .. } => {
// File logging with optional rotation
let logging_opts = crate::logging::LoggingOptions {
destination: log_destination,
disable_colors: true,
};
let (_, guard) = crate::logging::init_logging(&logging_opts, "info");
_logging_guard = Some(guard);
}
}
tracing_subscriber::registry()
.with(filter_layer)
.with(fmt_layer)
.init();
startup_tracker
.complete_component(
COMPONENT_TRACING_INIT,
@@ -225,6 +301,7 @@ pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
config.general.upstream_connect_retry_attempts,
config.general.upstream_connect_retry_backoff_ms,
config.general.upstream_connect_budget_ms,
config.general.tg_connect,
config.general.upstream_unhealthy_fail_threshold,
config.general.upstream_connect_failfast_hard_errors,
stats.clone(),
@@ -585,6 +662,14 @@ pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
std::process::exit(1);
}
// Drop privileges after binding sockets (which may require root for port < 1024)
if daemon_opts.user.is_some() || daemon_opts.group.is_some() {
if let Err(e) = drop_privileges(daemon_opts.user.as_deref(), daemon_opts.group.as_deref()) {
error!(error = %e, "Failed to drop privileges");
std::process::exit(1);
}
}
runtime_tasks::apply_runtime_log_filter(
has_rust_log,
&effective_log_level,
@@ -605,6 +690,9 @@ pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
runtime_tasks::mark_runtime_ready(&startup_tracker).await;
// Spawn signal handlers for SIGUSR1/SIGUSR2 (non-shutdown signals)
shutdown::spawn_signal_handlers(stats.clone(), process_started_at);
listeners::spawn_tcp_accept_loops(
listeners,
config_rx.clone(),
@@ -622,7 +710,7 @@ pub async fn run() -> std::result::Result<(), Box<dyn std::error::Error>> {
max_connections.clone(),
);
shutdown::wait_for_shutdown(process_started_at, me_pool).await;
shutdown::wait_for_shutdown(process_started_at, me_pool, stats).await;
Ok(())
}

View File

@@ -323,10 +323,12 @@ pub(crate) async fn spawn_metrics_if_configured(
let config_rx_metrics = config_rx.clone();
let ip_tracker_metrics = ip_tracker.clone();
let whitelist = config.server.metrics_whitelist.clone();
let listen_backlog = config.server.listen_backlog;
tokio::spawn(async move {
metrics::serve(
port,
listen,
listen_backlog,
stats,
beobachten,
ip_tracker_metrics,

View File

@@ -1,45 +1,206 @@
//! Shutdown and signal handling for telemt.
//!
//! Handles graceful shutdown on various signals:
//! - SIGINT (Ctrl+C) / SIGTERM: Graceful shutdown
//! - SIGQUIT: Graceful shutdown with stats dump
//! - SIGUSR1: Reserved for log rotation (logs acknowledgment)
//! - SIGUSR2: Dump runtime status to log
//!
//! SIGHUP is handled separately in config/hot_reload.rs for config reload.
use std::sync::Arc;
use std::time::{Duration, Instant};
#[cfg(not(unix))]
use tokio::signal;
use tracing::{error, info, warn};
#[cfg(unix)]
use tokio::signal::unix::{SignalKind, signal};
use tracing::{info, warn};
use crate::stats::Stats;
use crate::transport::middle_proxy::MePool;
use super::helpers::{format_uptime, unit_label};
pub(crate) async fn wait_for_shutdown(process_started_at: Instant, me_pool: Option<Arc<MePool>>) {
match signal::ctrl_c().await {
Ok(()) => {
let shutdown_started_at = Instant::now();
info!("Shutting down...");
let uptime_secs = process_started_at.elapsed().as_secs();
info!("Uptime: {}", format_uptime(uptime_secs));
if let Some(pool) = &me_pool {
match tokio::time::timeout(
Duration::from_secs(2),
pool.shutdown_send_close_conn_all(),
)
.await
{
Ok(total) => {
info!(
close_conn_sent = total,
"ME shutdown: RPC_CLOSE_CONN broadcast completed"
);
}
Err(_) => {
warn!("ME shutdown: RPC_CLOSE_CONN broadcast timed out");
}
}
}
let shutdown_secs = shutdown_started_at.elapsed().as_secs();
info!(
"Shutdown completed successfully in {} {}.",
shutdown_secs,
unit_label(shutdown_secs, "second", "seconds")
);
/// Signal that triggered shutdown.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ShutdownSignal {
/// SIGINT (Ctrl+C)
Interrupt,
/// SIGTERM
Terminate,
/// SIGQUIT (with stats dump)
Quit,
}
impl std::fmt::Display for ShutdownSignal {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
ShutdownSignal::Interrupt => write!(f, "SIGINT"),
ShutdownSignal::Terminate => write!(f, "SIGTERM"),
ShutdownSignal::Quit => write!(f, "SIGQUIT"),
}
Err(e) => error!("Signal error: {}", e),
}
}
/// Waits for a shutdown signal and performs graceful shutdown.
pub(crate) async fn wait_for_shutdown(
process_started_at: Instant,
me_pool: Option<Arc<MePool>>,
stats: Arc<Stats>,
) {
let signal = wait_for_shutdown_signal().await;
perform_shutdown(signal, process_started_at, me_pool, &stats).await;
}
/// Waits for any shutdown signal (SIGINT, SIGTERM, SIGQUIT).
#[cfg(unix)]
async fn wait_for_shutdown_signal() -> ShutdownSignal {
let mut sigint = signal(SignalKind::interrupt()).expect("Failed to register SIGINT handler");
let mut sigterm = signal(SignalKind::terminate()).expect("Failed to register SIGTERM handler");
let mut sigquit = signal(SignalKind::quit()).expect("Failed to register SIGQUIT handler");
tokio::select! {
_ = sigint.recv() => ShutdownSignal::Interrupt,
_ = sigterm.recv() => ShutdownSignal::Terminate,
_ = sigquit.recv() => ShutdownSignal::Quit,
}
}
#[cfg(not(unix))]
async fn wait_for_shutdown_signal() -> ShutdownSignal {
signal::ctrl_c().await.expect("Failed to listen for Ctrl+C");
ShutdownSignal::Interrupt
}
/// Performs graceful shutdown sequence.
async fn perform_shutdown(
signal: ShutdownSignal,
process_started_at: Instant,
me_pool: Option<Arc<MePool>>,
stats: &Stats,
) {
let shutdown_started_at = Instant::now();
info!(signal = %signal, "Received shutdown signal");
// Dump stats if SIGQUIT
if signal == ShutdownSignal::Quit {
dump_stats(stats, process_started_at);
}
info!("Shutting down...");
let uptime_secs = process_started_at.elapsed().as_secs();
info!("Uptime: {}", format_uptime(uptime_secs));
// Graceful ME pool shutdown
if let Some(pool) = &me_pool {
match tokio::time::timeout(Duration::from_secs(2), pool.shutdown_send_close_conn_all())
.await
{
Ok(total) => {
info!(
close_conn_sent = total,
"ME shutdown: RPC_CLOSE_CONN broadcast completed"
);
}
Err(_) => {
warn!("ME shutdown: RPC_CLOSE_CONN broadcast timed out");
}
}
}
let shutdown_secs = shutdown_started_at.elapsed().as_secs();
info!(
"Shutdown completed successfully in {} {}.",
shutdown_secs,
unit_label(shutdown_secs, "second", "seconds")
);
}
/// Dumps runtime statistics to the log.
fn dump_stats(stats: &Stats, process_started_at: Instant) {
let uptime_secs = process_started_at.elapsed().as_secs();
info!("=== Runtime Statistics Dump ===");
info!("Uptime: {}", format_uptime(uptime_secs));
// Connection stats
info!(
"Connections: total={}, current={} (direct={}, me={}), bad={}",
stats.get_connects_all(),
stats.get_current_connections_total(),
stats.get_current_connections_direct(),
stats.get_current_connections_me(),
stats.get_connects_bad(),
);
// ME pool stats
info!(
"ME keepalive: sent={}, pong={}, failed={}, timeout={}",
stats.get_me_keepalive_sent(),
stats.get_me_keepalive_pong(),
stats.get_me_keepalive_failed(),
stats.get_me_keepalive_timeout(),
);
// Relay stats
info!(
"Relay idle: soft_mark={}, hard_close={}, pressure_evict={}",
stats.get_relay_idle_soft_mark_total(),
stats.get_relay_idle_hard_close_total(),
stats.get_relay_pressure_evict_total(),
);
info!("=== End Statistics Dump ===");
}
/// Spawns a background task to handle operational signals (SIGUSR1, SIGUSR2).
///
/// These signals don't trigger shutdown but perform specific actions:
/// - SIGUSR1: Log rotation acknowledgment (for external log rotation tools)
/// - SIGUSR2: Dump runtime status to log
#[cfg(unix)]
pub(crate) fn spawn_signal_handlers(stats: Arc<Stats>, process_started_at: Instant) {
tokio::spawn(async move {
let mut sigusr1 =
signal(SignalKind::user_defined1()).expect("Failed to register SIGUSR1 handler");
let mut sigusr2 =
signal(SignalKind::user_defined2()).expect("Failed to register SIGUSR2 handler");
loop {
tokio::select! {
_ = sigusr1.recv() => {
handle_sigusr1();
}
_ = sigusr2.recv() => {
handle_sigusr2(&stats, process_started_at);
}
}
}
});
}
/// No-op on non-Unix platforms.
#[cfg(not(unix))]
pub(crate) fn spawn_signal_handlers(_stats: Arc<Stats>, _process_started_at: Instant) {
// No SIGUSR1/SIGUSR2 on non-Unix
}
/// Handles SIGUSR1 - log rotation signal.
///
/// This signal is typically sent by logrotate or similar tools after
/// rotating log files. Since tracing-subscriber doesn't natively support
/// reopening files, we just acknowledge the signal. If file logging is
/// added in the future, this would reopen log file handles.
#[cfg(unix)]
fn handle_sigusr1() {
info!("SIGUSR1 received - log rotation acknowledged");
// Future: If using file-based logging, reopen file handles here
}
/// Handles SIGUSR2 - dump runtime status.
#[cfg(unix)]
fn handle_sigusr2(stats: &Stats, process_started_at: Instant) {
info!("SIGUSR2 received - dumping runtime status");
dump_stats(stats, process_started_at);
}

View File

@@ -4,6 +4,8 @@ mod api;
mod cli;
mod config;
mod crypto;
#[cfg(unix)]
mod daemon;
mod error;
mod ip_tracker;
#[cfg(test)]
@@ -15,11 +17,13 @@ mod ip_tracker_hotpath_adversarial_tests;
#[cfg(test)]
#[path = "tests/ip_tracker_regression_tests.rs"]
mod ip_tracker_regression_tests;
mod logging;
mod maestro;
mod metrics;
mod network;
mod protocol;
mod proxy;
mod service;
mod startup;
mod stats;
mod stream;
@@ -27,8 +31,49 @@ mod tls_front;
mod transport;
mod util;
#[tokio::main]
async fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
fn main() -> std::result::Result<(), Box<dyn std::error::Error>> {
// Install rustls crypto provider early
let _ = rustls::crypto::ring::default_provider().install_default();
maestro::run().await
let args: Vec<String> = std::env::args().skip(1).collect();
let cmd = cli::parse_command(&args);
// Handle subcommands that don't need the server (stop, reload, status, init)
if let Some(exit_code) = cli::execute_subcommand(&cmd) {
std::process::exit(exit_code);
}
#[cfg(unix)]
{
let daemon_opts = cmd.daemon_opts;
// Daemonize BEFORE runtime
if daemon_opts.should_daemonize() {
match daemon::daemonize(daemon_opts.working_dir.as_deref()) {
Ok(daemon::DaemonizeResult::Parent) => {
std::process::exit(0);
}
Ok(daemon::DaemonizeResult::Child) => {
// continue
}
Err(e) => {
eprintln!("[telemt] Daemonization failed: {}", e);
std::process::exit(1);
}
}
}
tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()?
.block_on(maestro::run_with_daemon(daemon_opts))
}
#[cfg(not(unix))]
{
tokio::runtime::Builder::new_multi_thread()
.enable_all()
.build()?
.block_on(maestro::run())
}
}

View File

@@ -22,6 +22,7 @@ use crate::transport::{ListenOptions, create_listener};
pub async fn serve(
port: u16,
listen: Option<String>,
listen_backlog: u32,
stats: Arc<Stats>,
beobachten: Arc<BeobachtenStore>,
ip_tracker: Arc<UserIpTracker>,
@@ -40,7 +41,7 @@ pub async fn serve(
}
};
let is_ipv6 = addr.is_ipv6();
match bind_metrics_listener(addr, is_ipv6) {
match bind_metrics_listener(addr, is_ipv6, listen_backlog) {
Ok(listener) => {
info!("Metrics endpoint: http://{}/metrics and /beobachten", addr);
serve_listener(
@@ -60,7 +61,7 @@ pub async fn serve(
let mut listener_v6 = None;
let addr_v4 = SocketAddr::from(([0, 0, 0, 0], port));
match bind_metrics_listener(addr_v4, false) {
match bind_metrics_listener(addr_v4, false, listen_backlog) {
Ok(listener) => {
info!(
"Metrics endpoint: http://{}/metrics and /beobachten",
@@ -74,7 +75,7 @@ pub async fn serve(
}
let addr_v6 = SocketAddr::from(([0, 0, 0, 0, 0, 0, 0, 0], port));
match bind_metrics_listener(addr_v6, true) {
match bind_metrics_listener(addr_v6, true, listen_backlog) {
Ok(listener) => {
info!(
"Metrics endpoint: http://[::]:{}/metrics and /beobachten",
@@ -122,10 +123,15 @@ pub async fn serve(
}
}
fn bind_metrics_listener(addr: SocketAddr, ipv6_only: bool) -> std::io::Result<TcpListener> {
fn bind_metrics_listener(
addr: SocketAddr,
ipv6_only: bool,
listen_backlog: u32,
) -> std::io::Result<TcpListener> {
let options = ListenOptions {
reuse_port: false,
ipv6_only,
backlog: listen_backlog,
..Default::default()
};
let socket = create_listener(addr, &options)?;
@@ -1558,6 +1564,40 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_endpoint_quarantine_unexpected_total ME endpoint quarantines caused by unexpected writer removals"
);
let _ = writeln!(
out,
"# TYPE telemt_me_endpoint_quarantine_unexpected_total counter"
);
let _ = writeln!(
out,
"telemt_me_endpoint_quarantine_unexpected_total {}",
if me_allows_normal {
stats.get_me_endpoint_quarantine_unexpected_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_endpoint_quarantine_draining_suppressed_total Draining writer removals that skipped endpoint quarantine"
);
let _ = writeln!(
out,
"# TYPE telemt_me_endpoint_quarantine_draining_suppressed_total counter"
);
let _ = writeln!(
out,
"telemt_me_endpoint_quarantine_draining_suppressed_total {}",
if me_allows_normal {
stats.get_me_endpoint_quarantine_draining_suppressed_total()
} else {
0
}
);
let _ = writeln!(
out,
@@ -2318,6 +2358,20 @@ async fn render_metrics(stats: &Stats, config: &ProxyConfig, ip_tracker: &UserIp
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_hybrid_timeout_total ME hybrid route timeouts after bounded retry window"
);
let _ = writeln!(out, "# TYPE telemt_me_hybrid_timeout_total counter");
let _ = writeln!(
out,
"telemt_me_hybrid_timeout_total {}",
if me_allows_normal {
stats.get_me_hybrid_timeout_total()
} else {
0
}
);
let _ = writeln!(
out,
"# HELP telemt_me_async_recovery_trigger_total Async ME recovery trigger attempts from route path"
@@ -2608,6 +2662,9 @@ mod tests {
stats.increment_me_d2c_write_mode(crate::stats::MeD2cWriteMode::Coalesced);
stats.increment_me_d2c_quota_reject_total(crate::stats::MeD2cQuotaRejectStage::PostWrite);
stats.observe_me_d2c_frame_buf_shrink(4096);
stats.increment_me_endpoint_quarantine_total();
stats.increment_me_endpoint_quarantine_unexpected_total();
stats.increment_me_endpoint_quarantine_draining_suppressed_total();
stats.increment_user_connects("alice");
stats.increment_user_curr_connects("alice");
stats.add_user_octets_from("alice", 1024);
@@ -2658,6 +2715,9 @@ mod tests {
assert!(output.contains("telemt_me_d2c_quota_reject_total{stage=\"post_write\"} 1"));
assert!(output.contains("telemt_me_d2c_frame_buf_shrink_total 1"));
assert!(output.contains("telemt_me_d2c_frame_buf_shrink_bytes_total 4096"));
assert!(output.contains("telemt_me_endpoint_quarantine_total 1"));
assert!(output.contains("telemt_me_endpoint_quarantine_unexpected_total 1"));
assert!(output.contains("telemt_me_endpoint_quarantine_draining_suppressed_total 1"));
assert!(output.contains("telemt_user_connections_total{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_connections_current{user=\"alice\"} 1"));
assert!(output.contains("telemt_user_octets_from_client{user=\"alice\"} 1024"));
@@ -2724,6 +2784,12 @@ mod tests {
assert!(output.contains("# TYPE telemt_me_d2c_write_mode_total counter"));
assert!(output.contains("# TYPE telemt_me_d2c_batch_frames_bucket_total counter"));
assert!(output.contains("# TYPE telemt_me_d2c_flush_duration_us_bucket_total counter"));
assert!(output.contains("# TYPE telemt_me_endpoint_quarantine_total counter"));
assert!(output.contains("# TYPE telemt_me_endpoint_quarantine_unexpected_total counter"));
assert!(
output
.contains("# TYPE telemt_me_endpoint_quarantine_draining_suppressed_total counter")
);
assert!(output.contains("# TYPE telemt_me_writer_removed_total counter"));
assert!(
output

View File

@@ -416,16 +416,68 @@ where
debug!(peer = %real_peer, "New connection (generic stream)");
let first_byte = if config.timeouts.client_first_byte_idle_secs == 0 {
None
} else {
let idle_timeout = Duration::from_secs(config.timeouts.client_first_byte_idle_secs);
let mut first_byte = [0u8; 1];
match timeout(idle_timeout, stream.read(&mut first_byte)).await {
Ok(Ok(0)) => {
debug!(peer = %real_peer, "Connection closed before first client byte");
return Ok(());
}
Ok(Ok(_)) => Some(first_byte[0]),
Ok(Err(e))
if matches!(
e.kind(),
std::io::ErrorKind::UnexpectedEof
| std::io::ErrorKind::ConnectionReset
| std::io::ErrorKind::ConnectionAborted
| std::io::ErrorKind::BrokenPipe
| std::io::ErrorKind::NotConnected
) =>
{
debug!(
peer = %real_peer,
error = %e,
"Connection closed before first client byte"
);
return Ok(());
}
Ok(Err(e)) => {
debug!(
peer = %real_peer,
error = %e,
"Failed while waiting for first client byte"
);
return Err(ProxyError::Io(e));
}
Err(_) => {
debug!(
peer = %real_peer,
idle_secs = config.timeouts.client_first_byte_idle_secs,
"Closing idle pooled connection before first client byte"
);
return Ok(());
}
}
};
let handshake_timeout = handshake_timeout_with_mask_grace(&config);
let stats_for_timeout = stats.clone();
let config_for_timeout = config.clone();
let beobachten_for_timeout = beobachten.clone();
let peer_for_timeout = real_peer.ip();
// Phase 1: handshake (with timeout)
// Phase 2: active handshake (with timeout after the first client byte)
let outcome = match timeout(handshake_timeout, async {
let mut first_bytes = [0u8; 5];
stream.read_exact(&mut first_bytes).await?;
if let Some(first_byte) = first_byte {
first_bytes[0] = first_byte;
stream.read_exact(&mut first_bytes[1..]).await?;
} else {
stream.read_exact(&mut first_bytes).await?;
}
let is_tls = tls::is_tls_handshake(&first_bytes[..3]);
debug!(peer = %real_peer, is_tls = is_tls, "Handshake type detected");
@@ -736,36 +788,9 @@ impl RunningClientHandler {
debug!(peer = %peer, error = %e, "Failed to configure client socket");
}
let handshake_timeout = handshake_timeout_with_mask_grace(&self.config);
let stats = self.stats.clone();
let config_for_timeout = self.config.clone();
let beobachten_for_timeout = self.beobachten.clone();
let peer_for_timeout = peer.ip();
// Phase 1: handshake (with timeout)
let outcome = match timeout(handshake_timeout, self.do_handshake()).await {
Ok(Ok(outcome)) => outcome,
Ok(Err(e)) => {
debug!(peer = %peer, error = %e, "Handshake failed");
record_handshake_failure_class(
&beobachten_for_timeout,
&config_for_timeout,
peer_for_timeout,
&e,
);
return Err(e);
}
Err(_) => {
stats.increment_handshake_timeouts();
debug!(peer = %peer, "Handshake timeout");
record_beobachten_class(
&beobachten_for_timeout,
&config_for_timeout,
peer_for_timeout,
"other",
);
return Err(ProxyError::TgHandshakeTimeout);
}
let outcome = match self.do_handshake().await? {
Some(outcome) => outcome,
None => return Ok(()),
};
// Phase 2: relay (WITHOUT handshake timeout — relay has its own activity timeouts)
@@ -774,7 +799,7 @@ impl RunningClientHandler {
}
}
async fn do_handshake(mut self) -> Result<HandshakeOutcome> {
async fn do_handshake(mut self) -> Result<Option<HandshakeOutcome>> {
let mut local_addr = self.stream.local_addr().map_err(ProxyError::Io)?;
if self.proxy_protocol_enabled {
@@ -849,19 +874,108 @@ impl RunningClientHandler {
}
}
let mut first_bytes = [0u8; 5];
self.stream.read_exact(&mut first_bytes).await?;
let is_tls = tls::is_tls_handshake(&first_bytes[..3]);
let peer = self.peer;
debug!(peer = %peer, is_tls = is_tls, "Handshake type detected");
if is_tls {
self.handle_tls_client(first_bytes, local_addr).await
let first_byte = if self.config.timeouts.client_first_byte_idle_secs == 0 {
None
} else {
self.handle_direct_client(first_bytes, local_addr).await
}
let idle_timeout =
Duration::from_secs(self.config.timeouts.client_first_byte_idle_secs);
let mut first_byte = [0u8; 1];
match timeout(idle_timeout, self.stream.read(&mut first_byte)).await {
Ok(Ok(0)) => {
debug!(peer = %self.peer, "Connection closed before first client byte");
return Ok(None);
}
Ok(Ok(_)) => Some(first_byte[0]),
Ok(Err(e))
if matches!(
e.kind(),
std::io::ErrorKind::UnexpectedEof
| std::io::ErrorKind::ConnectionReset
| std::io::ErrorKind::ConnectionAborted
| std::io::ErrorKind::BrokenPipe
| std::io::ErrorKind::NotConnected
) =>
{
debug!(
peer = %self.peer,
error = %e,
"Connection closed before first client byte"
);
return Ok(None);
}
Ok(Err(e)) => {
debug!(
peer = %self.peer,
error = %e,
"Failed while waiting for first client byte"
);
return Err(ProxyError::Io(e));
}
Err(_) => {
debug!(
peer = %self.peer,
idle_secs = self.config.timeouts.client_first_byte_idle_secs,
"Closing idle pooled connection before first client byte"
);
return Ok(None);
}
}
};
let handshake_timeout = handshake_timeout_with_mask_grace(&self.config);
let stats = self.stats.clone();
let config_for_timeout = self.config.clone();
let beobachten_for_timeout = self.beobachten.clone();
let peer_for_timeout = self.peer.ip();
let peer_for_log = self.peer;
let outcome = match timeout(handshake_timeout, async {
let mut first_bytes = [0u8; 5];
if let Some(first_byte) = first_byte {
first_bytes[0] = first_byte;
self.stream.read_exact(&mut first_bytes[1..]).await?;
} else {
self.stream.read_exact(&mut first_bytes).await?;
}
let is_tls = tls::is_tls_handshake(&first_bytes[..3]);
let peer = self.peer;
debug!(peer = %peer, is_tls = is_tls, "Handshake type detected");
if is_tls {
self.handle_tls_client(first_bytes, local_addr).await
} else {
self.handle_direct_client(first_bytes, local_addr).await
}
})
.await
{
Ok(Ok(outcome)) => outcome,
Ok(Err(e)) => {
debug!(peer = %peer_for_log, error = %e, "Handshake failed");
record_handshake_failure_class(
&beobachten_for_timeout,
&config_for_timeout,
peer_for_timeout,
&e,
);
return Err(e);
}
Err(_) => {
stats.increment_handshake_timeouts();
debug!(peer = %peer_for_log, "Handshake timeout");
record_beobachten_class(
&beobachten_for_timeout,
&config_for_timeout,
peer_for_timeout,
"other",
);
return Err(ProxyError::TgHandshakeTimeout);
}
};
Ok(Some(outcome))
}
async fn handle_tls_client(
@@ -1252,7 +1366,11 @@ impl RunningClientHandler {
.access
.user_max_tcp_conns
.get(user)
.map(|v| *v as u64);
.copied()
.filter(|limit| *limit > 0)
.or((config.access.user_max_tcp_conns_global_each > 0)
.then_some(config.access.user_max_tcp_conns_global_each))
.map(|v| v as u64);
if !stats.try_acquire_user_curr_connects(user, limit) {
return Err(ProxyError::ConnectionLimitExceeded {
user: user.to_string(),
@@ -1311,7 +1429,11 @@ impl RunningClientHandler {
.access
.user_max_tcp_conns
.get(user)
.map(|v| *v as u64);
.copied()
.filter(|limit| *limit > 0)
.or((config.access.user_max_tcp_conns_global_each > 0)
.then_some(config.access.user_max_tcp_conns_global_each))
.map(|v| v as u64);
if !stats.try_acquire_user_curr_connects(user, limit) {
return Err(ProxyError::ConnectionLimitExceeded {
user: user.to_string(),

View File

@@ -13,7 +13,7 @@ use std::sync::Arc;
use std::sync::{Mutex, OnceLock};
use std::time::{Duration, Instant};
use tokio::io::{AsyncRead, AsyncWrite, AsyncWriteExt};
use tracing::{debug, trace, warn};
use tracing::{debug, info, trace, warn};
use zeroize::{Zeroize, Zeroizing};
use crate::config::{ProxyConfig, UnknownSniAction};
@@ -28,6 +28,8 @@ use rand::RngExt;
const ACCESS_SECRET_BYTES: usize = 16;
static INVALID_SECRET_WARNED: OnceLock<Mutex<HashSet<(String, String)>>> = OnceLock::new();
const UNKNOWN_SNI_WARN_COOLDOWN_SECS: u64 = 5;
static UNKNOWN_SNI_WARN_NEXT_ALLOWED: OnceLock<Mutex<Option<Instant>>> = OnceLock::new();
#[cfg(test)]
const WARNED_SECRET_MAX_ENTRIES: usize = 64;
#[cfg(not(test))]
@@ -86,6 +88,24 @@ fn auth_probe_saturation_state_lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
}
fn unknown_sni_warn_state_lock() -> std::sync::MutexGuard<'static, Option<Instant>> {
UNKNOWN_SNI_WARN_NEXT_ALLOWED
.get_or_init(|| Mutex::new(None))
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner())
}
fn should_emit_unknown_sni_warn(now: Instant) -> bool {
let mut guard = unknown_sni_warn_state_lock();
if let Some(next_allowed) = *guard
&& now < next_allowed
{
return false;
}
*guard = Some(now + Duration::from_secs(UNKNOWN_SNI_WARN_COOLDOWN_SECS));
true
}
fn normalize_auth_probe_ip(peer_ip: IpAddr) -> IpAddr {
match peer_ip {
IpAddr::V4(ip) => IpAddr::V4(ip),
@@ -412,6 +432,25 @@ fn auth_probe_test_lock() -> &'static Mutex<()> {
TEST_LOCK.get_or_init(|| Mutex::new(()))
}
#[cfg(test)]
fn unknown_sni_warn_test_lock() -> &'static Mutex<()> {
static TEST_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
TEST_LOCK.get_or_init(|| Mutex::new(()))
}
#[cfg(test)]
fn clear_unknown_sni_warn_state_for_testing() {
if UNKNOWN_SNI_WARN_NEXT_ALLOWED.get().is_some() {
let mut guard = unknown_sni_warn_state_lock();
*guard = None;
}
}
#[cfg(test)]
fn should_emit_unknown_sni_warn_for_testing(now: Instant) -> bool {
should_emit_unknown_sni_warn(now)
}
#[cfg(test)]
fn clear_warned_secrets_for_testing() {
if let Some(warned) = INVALID_SECRET_WARNED.get()
@@ -658,12 +697,25 @@ where
if client_sni.is_some() && matched_tls_domain.is_none() && preferred_user_hint.is_none() {
auth_probe_record_failure(peer.ip(), Instant::now());
maybe_apply_server_hello_delay(config).await;
debug!(
peer = %peer,
sni = ?client_sni,
action = ?config.censorship.unknown_sni_action,
"TLS handshake rejected by unknown SNI policy"
);
let sni = client_sni.as_deref().unwrap_or_default();
let log_now = Instant::now();
if should_emit_unknown_sni_warn(log_now) {
warn!(
peer = %peer,
sni = %sni,
unknown_sni = true,
unknown_sni_action = ?config.censorship.unknown_sni_action,
"TLS handshake rejected by unknown SNI policy"
);
} else {
info!(
peer = %peer,
sni = %sni,
unknown_sni = true,
unknown_sni_action = ?config.censorship.unknown_sni_action,
"TLS handshake rejected by unknown SNI policy"
);
}
return match config.censorship.unknown_sni_action {
UnknownSniAction::Drop => HandshakeResult::Error(ProxyError::UnknownTlsSni),
UnknownSniAction::Mask => HandshakeResult::BadClient { reader, writer },

View File

@@ -4,7 +4,7 @@ use std::collections::{BTreeSet, HashMap};
use std::future::Future;
use std::hash::{BuildHasher, Hash};
use std::net::{IpAddr, SocketAddr};
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::{Arc, Mutex, OnceLock};
use std::time::{Duration, Instant};
@@ -36,7 +36,6 @@ enum C2MeCommand {
const DESYNC_DEDUP_WINDOW: Duration = Duration::from_secs(60);
const DESYNC_DEDUP_MAX_ENTRIES: usize = 65_536;
const DESYNC_DEDUP_PRUNE_SCAN_LIMIT: usize = 1024;
const DESYNC_FULL_CACHE_EMIT_MIN_INTERVAL: Duration = Duration::from_millis(1000);
const DESYNC_ERROR_CLASS: &str = "frame_too_large_crypto_desync";
const C2ME_CHANNEL_CAPACITY_FALLBACK: usize = 128;
@@ -46,10 +45,6 @@ const RELAY_IDLE_IO_POLL_MAX: Duration = Duration::from_secs(1);
const TINY_FRAME_DEBT_PER_TINY: u32 = 8;
const TINY_FRAME_DEBT_LIMIT: u32 = 512;
#[cfg(test)]
const C2ME_SEND_TIMEOUT: Duration = Duration::from_millis(50);
#[cfg(not(test))]
const C2ME_SEND_TIMEOUT: Duration = Duration::from_secs(5);
#[cfg(test)]
const RELAY_TEST_STEP_TIMEOUT: Duration = Duration::from_secs(1);
const ME_D2C_FLUSH_BATCH_MAX_FRAMES_MIN: usize = 1;
const ME_D2C_FLUSH_BATCH_MAX_BYTES_MIN: usize = 4096;
@@ -57,12 +52,21 @@ const ME_D2C_FRAME_BUF_SHRINK_HYSTERESIS_FACTOR: usize = 2;
const ME_D2C_SINGLE_WRITE_COALESCE_MAX_BYTES: usize = 128 * 1024;
const QUOTA_RESERVE_SPIN_RETRIES: usize = 32;
static DESYNC_DEDUP: OnceLock<DashMap<u64, Instant>> = OnceLock::new();
static DESYNC_DEDUP_PREVIOUS: OnceLock<DashMap<u64, Instant>> = OnceLock::new();
static DESYNC_HASHER: OnceLock<RandomState> = OnceLock::new();
static DESYNC_FULL_CACHE_LAST_EMIT_AT: OnceLock<Mutex<Option<Instant>>> = OnceLock::new();
static DESYNC_DEDUP_EVER_SATURATED: OnceLock<AtomicBool> = OnceLock::new();
static DESYNC_DEDUP_ROTATION_STATE: OnceLock<Mutex<DesyncDedupRotationState>> = OnceLock::new();
// Invariant for async callers:
// this std::sync::Mutex is allowed only because critical sections are short,
// synchronous, and MUST never cross an `.await`.
static RELAY_IDLE_CANDIDATE_REGISTRY: OnceLock<Mutex<RelayIdleCandidateRegistry>> = OnceLock::new();
static RELAY_IDLE_MARK_SEQ: AtomicU64 = AtomicU64::new(0);
#[derive(Default)]
struct DesyncDedupRotationState {
current_started_at: Option<Instant>,
}
struct RelayForensicsState {
trace_id: u64,
conn_id: u64,
@@ -95,6 +99,7 @@ fn relay_idle_candidate_registry() -> &'static Mutex<RelayIdleCandidateRegistry>
fn relay_idle_candidate_registry_lock() -> std::sync::MutexGuard<'static, RelayIdleCandidateRegistry>
{
// Keep lock scope narrow and synchronous: callers must drop guard before any `.await`.
let registry = relay_idle_candidate_registry();
match registry.lock() {
Ok(guard) => guard,
@@ -312,64 +317,76 @@ fn should_emit_full_desync(key: u64, all_full: bool, now: Instant) -> bool {
return true;
}
let dedup = DESYNC_DEDUP.get_or_init(DashMap::new);
let saturated_before = dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES;
let ever_saturated = DESYNC_DEDUP_EVER_SATURATED.get_or_init(|| AtomicBool::new(false));
if saturated_before {
ever_saturated.store(true, Ordering::Relaxed);
}
let dedup_current = DESYNC_DEDUP.get_or_init(DashMap::new);
let dedup_previous = DESYNC_DEDUP_PREVIOUS.get_or_init(DashMap::new);
let rotation_state =
DESYNC_DEDUP_ROTATION_STATE.get_or_init(|| Mutex::new(DesyncDedupRotationState::default()));
if let Some(mut seen_at) = dedup.get_mut(&key) {
if now.duration_since(*seen_at) >= DESYNC_DEDUP_WINDOW {
*seen_at = now;
return true;
let mut state = match rotation_state.lock() {
Ok(guard) => guard,
Err(poisoned) => {
let mut guard = poisoned.into_inner();
*guard = DesyncDedupRotationState::default();
rotation_state.clear_poison();
guard
}
return false;
}
if dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES {
let mut stale_keys = Vec::new();
let mut oldest_candidate: Option<(u64, Instant)> = None;
for entry in dedup.iter().take(DESYNC_DEDUP_PRUNE_SCAN_LIMIT) {
let key = *entry.key();
let seen_at = *entry.value();
match oldest_candidate {
Some((_, oldest_seen)) if seen_at >= oldest_seen => {}
_ => oldest_candidate = Some((key, seen_at)),
}
if now.duration_since(seen_at) >= DESYNC_DEDUP_WINDOW {
stale_keys.push(*entry.key());
}
}
for stale_key in stale_keys {
dedup.remove(&stale_key);
}
if dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES {
let Some((evict_key, _)) = oldest_candidate else {
return false;
};
dedup.remove(&evict_key);
dedup.insert(key, now);
return should_emit_full_desync_full_cache(now);
}
}
dedup.insert(key, now);
let saturated_after = dedup.len() >= DESYNC_DEDUP_MAX_ENTRIES;
// Preserve the first sequential insert that reaches capacity as a normal
// emit, while still gating concurrent newcomer churn after the cache has
// ever been observed at saturation.
let was_ever_saturated = if saturated_after {
ever_saturated.swap(true, Ordering::Relaxed)
} else {
ever_saturated.load(Ordering::Relaxed)
};
if saturated_before || (saturated_after && was_ever_saturated) {
let rotate_now = match state.current_started_at {
Some(current_started_at) => match now.checked_duration_since(current_started_at) {
Some(elapsed) => elapsed >= DESYNC_DEDUP_WINDOW,
None => true,
},
None => true,
};
if rotate_now {
dedup_previous.clear();
for entry in dedup_current.iter() {
dedup_previous.insert(*entry.key(), *entry.value());
}
dedup_current.clear();
state.current_started_at = Some(now);
}
if let Some(seen_at) = dedup_current.get(&key).map(|entry| *entry.value()) {
let within_window = match now.checked_duration_since(seen_at) {
Some(elapsed) => elapsed < DESYNC_DEDUP_WINDOW,
None => true,
};
if within_window {
return false;
}
dedup_current.insert(key, now);
return true;
}
if let Some(seen_at) = dedup_previous.get(&key).map(|entry| *entry.value()) {
let within_window = match now.checked_duration_since(seen_at) {
Some(elapsed) => elapsed < DESYNC_DEDUP_WINDOW,
None => true,
};
if within_window {
// Keep the original timestamp when promoting from previous bucket,
// so dedup expiry remains tied to first-seen time.
dedup_current.insert(key, seen_at);
return false;
}
dedup_previous.remove(&key);
}
if dedup_current.len() >= DESYNC_DEDUP_MAX_ENTRIES {
// Bounded eviction path: rotate buckets instead of scanning/evicting
// arbitrary entries from a saturated single map.
dedup_previous.clear();
for entry in dedup_current.iter() {
dedup_previous.insert(*entry.key(), *entry.value());
}
dedup_current.clear();
state.current_started_at = Some(now);
dedup_current.insert(key, now);
should_emit_full_desync_full_cache(now)
} else {
dedup_current.insert(key, now);
true
}
}
@@ -405,8 +422,20 @@ fn clear_desync_dedup_for_testing() {
if let Some(dedup) = DESYNC_DEDUP.get() {
dedup.clear();
}
if let Some(ever_saturated) = DESYNC_DEDUP_EVER_SATURATED.get() {
ever_saturated.store(false, Ordering::Relaxed);
if let Some(dedup_previous) = DESYNC_DEDUP_PREVIOUS.get() {
dedup_previous.clear();
}
if let Some(rotation_state) = DESYNC_DEDUP_ROTATION_STATE.get() {
match rotation_state.lock() {
Ok(mut guard) => {
*guard = DesyncDedupRotationState::default();
}
Err(poisoned) => {
let mut guard = poisoned.into_inner();
*guard = DesyncDedupRotationState::default();
rotation_state.clear_poison();
}
}
}
if let Some(last_emit_at) = DESYNC_FULL_CACHE_LAST_EMIT_AT.get() {
match last_emit_at.lock() {
@@ -615,6 +644,7 @@ pub(crate) fn relay_idle_pressure_test_scope() -> std::sync::MutexGuard<'static,
async fn enqueue_c2me_command(
tx: &mpsc::Sender<C2MeCommand>,
cmd: C2MeCommand,
send_timeout: Option<Duration>,
) -> std::result::Result<(), mpsc::error::SendError<C2MeCommand>> {
match tx.try_send(cmd) {
Ok(()) => Ok(()),
@@ -625,12 +655,18 @@ async fn enqueue_c2me_command(
if tx.capacity() <= C2ME_SOFT_PRESSURE_MIN_FREE_SLOTS {
tokio::task::yield_now().await;
}
match timeout(C2ME_SEND_TIMEOUT, tx.reserve()).await {
Ok(Ok(permit)) => {
let reserve_result = match send_timeout {
Some(send_timeout) => match timeout(send_timeout, tx.reserve()).await {
Ok(result) => result,
Err(_) => return Err(mpsc::error::SendError(cmd)),
},
None => tx.reserve().await,
};
match reserve_result {
Ok(permit) => {
permit.send(cmd);
Ok(())
}
Ok(Err(_)) => Err(mpsc::error::SendError(cmd)),
Err(_) => Err(mpsc::error::SendError(cmd)),
}
}
@@ -756,6 +792,10 @@ where
.general
.me_c2me_channel_capacity
.max(C2ME_CHANNEL_CAPACITY_FALLBACK);
let c2me_send_timeout = match config.general.me_c2me_send_timeout_ms {
0 => None,
timeout_ms => Some(Duration::from_millis(timeout_ms)),
};
let (c2me_tx, mut c2me_rx) = mpsc::channel::<C2MeCommand>(c2me_channel_capacity);
let me_pool_c2me = me_pool.clone();
let c2me_sender = tokio::spawn(async move {
@@ -1132,7 +1172,7 @@ where
user = %user,
"Middle-relay pressure eviction for idle-candidate session"
);
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close).await;
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close, c2me_send_timeout).await;
main_result = Err(ProxyError::Proxy(
"middle-relay session evicted under pressure (idle-candidate)".to_string(),
));
@@ -1151,7 +1191,7 @@ where
"Cutover affected middle session, closing client connection"
);
tokio::time::sleep(delay).await;
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close).await;
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close, c2me_send_timeout).await;
main_result = Err(ProxyError::Proxy(ROUTE_SWITCH_ERROR_MSG.to_string()));
break;
}
@@ -1209,8 +1249,12 @@ where
flags |= RPC_FLAG_NOT_ENCRYPTED;
}
// Keep client read loop lightweight: route heavy ME send path via a dedicated task.
if enqueue_c2me_command(&c2me_tx, C2MeCommand::Data { payload, flags })
.await
if enqueue_c2me_command(
&c2me_tx,
C2MeCommand::Data { payload, flags },
c2me_send_timeout,
)
.await
.is_err()
{
main_result = Err(ProxyError::Proxy("ME sender channel closed".into()));
@@ -1220,7 +1264,9 @@ where
Ok(None) => {
debug!(conn_id, "Client EOF");
client_closed = true;
let _ = enqueue_c2me_command(&c2me_tx, C2MeCommand::Close).await;
let _ =
enqueue_c2me_command(&c2me_tx, C2MeCommand::Close, c2me_send_timeout)
.await;
break;
}
Err(e) => {

View File

@@ -94,6 +94,7 @@ async fn adversarial_tls_handshake_timeout_during_masking_delay() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -141,6 +142,7 @@ async fn blackhat_proxy_protocol_slowloris_timeout() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -193,6 +195,7 @@ async fn negative_proxy_protocol_enabled_but_client_sends_tls_hello() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -239,6 +242,7 @@ async fn edge_client_stream_exactly_4_bytes_eof() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -282,6 +286,7 @@ async fn edge_client_stream_tls_header_valid_but_body_1_byte_short_eof() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -328,6 +333,7 @@ async fn integration_non_tls_modes_disabled_immediately_masks() {
1,
1,
1,
10,
1,
false,
stats.clone(),

View File

@@ -47,6 +47,7 @@ async fn invariant_tls_clienthello_truncation_exact_boundary_triggers_masking()
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -177,6 +178,7 @@ async fn invariant_direct_mode_partial_header_eof_is_error_not_bad_connect() {
1,
1,
1,
10,
1,
false,
stats.clone(),

View File

@@ -40,6 +40,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -36,6 +36,7 @@ fn build_harness(config: ProxyConfig) -> PipelineHarness {
1,
1,
1,
10,
1,
false,
stats.clone(),

View File

@@ -20,6 +20,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -20,6 +20,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -34,6 +34,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -20,6 +20,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -20,6 +20,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -47,6 +47,7 @@ fn build_harness(secret_hex: &str, mask_port: u16) -> PipelineHarness {
1,
1,
1,
10,
1,
false,
stats.clone(),

View File

@@ -25,6 +25,7 @@ fn make_test_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -48,6 +48,7 @@ fn build_harness(secret_hex: &str, mask_port: u16) -> RedTeamHarness {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -237,6 +238,7 @@ async fn redteam_03_masking_duration_must_be_less_than_1ms_when_backend_down() {
1,
1,
1,
10,
1,
false,
Arc::new(Stats::new()),
@@ -477,6 +479,7 @@ async fn measure_invalid_probe_duration_ms(delay_ms: u64, tls_len: u16, body_sen
1,
1,
1,
10,
1,
false,
Arc::new(Stats::new()),
@@ -550,6 +553,7 @@ async fn capture_forwarded_probe_len(tls_len: u16, body_sent: usize) -> usize {
1,
1,
1,
10,
1,
false,
Arc::new(Stats::new()),

View File

@@ -22,6 +22,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -20,6 +20,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -20,6 +20,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -20,6 +20,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -20,6 +20,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -34,6 +34,7 @@ fn new_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -100,6 +100,7 @@ async fn blackhat_proxy_protocol_massive_garbage_rejected_quickly() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -146,6 +147,7 @@ async fn edge_tls_body_immediate_eof_triggers_masking_and_bad_connect() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -195,6 +197,7 @@ async fn security_classic_mode_disabled_masks_valid_length_payload() {
1,
1,
1,
10,
1,
false,
stats.clone(),

View File

@@ -1,8 +1,10 @@
use super::*;
use crate::config::{UpstreamConfig, UpstreamType};
use crate::crypto::AesCtr;
use crate::crypto::sha256_hmac;
use crate::protocol::constants::ProtoTag;
use crate::crypto::{AesCtr, sha256, sha256_hmac};
use crate::protocol::constants::{
DC_IDX_POS, HANDSHAKE_LEN, IV_LEN, PREKEY_LEN, PROTO_TAG_POS, ProtoTag, SKIP_LEN,
TLS_RECORD_CHANGE_CIPHER,
};
use crate::protocol::tls;
use crate::proxy::handshake::HandshakeSuccess;
use crate::stream::{CryptoReader, CryptoWriter};
@@ -339,6 +341,7 @@ async fn relay_task_abort_releases_user_gate_and_ip_reservation() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -452,6 +455,7 @@ async fn relay_cutover_releases_user_gate_and_ip_reservation() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -575,6 +579,7 @@ async fn integration_route_cutover_and_quota_overlap_fails_closed_and_releases_s
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -744,6 +749,7 @@ async fn proxy_protocol_header_is_rejected_when_trust_list_is_empty() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -820,6 +826,7 @@ async fn proxy_protocol_header_from_untrusted_peer_range_is_rejected_under_load(
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -979,6 +986,7 @@ async fn short_tls_probe_is_masked_through_client_pipeline() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1066,6 +1074,7 @@ async fn tls12_record_probe_is_masked_through_client_pipeline() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1151,6 +1160,7 @@ async fn handle_client_stream_increments_connects_all_exactly_once() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1243,6 +1253,7 @@ async fn running_client_handler_increments_connects_all_exactly_once() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1310,6 +1321,163 @@ async fn running_client_handler_increments_connects_all_exactly_once() {
);
}
#[tokio::test(start_paused = true)]
async fn idle_pooled_connection_closes_cleanly_in_generic_stream_path() {
let mut cfg = ProxyConfig::default();
cfg.general.beobachten = false;
cfg.timeouts.client_first_byte_idle_secs = 1;
let config = Arc::new(cfg);
let stats = Arc::new(Stats::new());
let upstream_manager = Arc::new(UpstreamManager::new(
vec![UpstreamConfig {
upstream_type: UpstreamType::Direct {
interface: None,
bind_addresses: None,
},
weight: 1,
enabled: true,
scopes: String::new(),
selected_scope: String::new(),
}],
1,
1,
1,
10,
1,
false,
stats.clone(),
));
let replay_checker = Arc::new(ReplayChecker::new(128, Duration::from_secs(60)));
let buffer_pool = Arc::new(BufferPool::new());
let rng = Arc::new(SecureRandom::new());
let route_runtime = Arc::new(RouteRuntimeController::new(RelayRouteMode::Direct));
let ip_tracker = Arc::new(UserIpTracker::new());
let beobachten = Arc::new(BeobachtenStore::new());
let (server_side, _client_side) = duplex(4096);
let peer: SocketAddr = "198.51.100.169:55200".parse().unwrap();
let handler = tokio::spawn(handle_client_stream(
server_side,
peer,
config,
stats.clone(),
upstream_manager,
replay_checker,
buffer_pool,
rng,
None,
route_runtime,
None,
ip_tracker,
beobachten,
false,
));
// Let the spawned handler arm the idle-phase timeout before advancing paused time.
tokio::task::yield_now().await;
tokio::time::advance(Duration::from_secs(2)).await;
tokio::task::yield_now().await;
let result = tokio::time::timeout(Duration::from_secs(1), handler)
.await
.unwrap()
.unwrap();
assert!(result.is_ok());
assert_eq!(stats.get_handshake_timeouts(), 0);
assert_eq!(stats.get_connects_bad(), 0);
}
#[tokio::test(start_paused = true)]
async fn idle_pooled_connection_closes_cleanly_in_client_handler_path() {
let front_listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
let front_addr = front_listener.local_addr().unwrap();
let mut cfg = ProxyConfig::default();
cfg.general.beobachten = false;
cfg.timeouts.client_first_byte_idle_secs = 1;
let config = Arc::new(cfg);
let stats = Arc::new(Stats::new());
let upstream_manager = Arc::new(UpstreamManager::new(
vec![UpstreamConfig {
upstream_type: UpstreamType::Direct {
interface: None,
bind_addresses: None,
},
weight: 1,
enabled: true,
scopes: String::new(),
selected_scope: String::new(),
}],
1,
1,
1,
10,
1,
false,
stats.clone(),
));
let replay_checker = Arc::new(ReplayChecker::new(128, Duration::from_secs(60)));
let buffer_pool = Arc::new(BufferPool::new());
let rng = Arc::new(SecureRandom::new());
let route_runtime = Arc::new(RouteRuntimeController::new(RelayRouteMode::Direct));
let ip_tracker = Arc::new(UserIpTracker::new());
let beobachten = Arc::new(BeobachtenStore::new());
let server_task = {
let config = config.clone();
let stats = stats.clone();
let upstream_manager = upstream_manager.clone();
let replay_checker = replay_checker.clone();
let buffer_pool = buffer_pool.clone();
let rng = rng.clone();
let route_runtime = route_runtime.clone();
let ip_tracker = ip_tracker.clone();
let beobachten = beobachten.clone();
tokio::spawn(async move {
let (stream, peer) = front_listener.accept().await.unwrap();
let real_peer_report = Arc::new(std::sync::Mutex::new(None));
ClientHandler::new(
stream,
peer,
config,
stats,
upstream_manager,
replay_checker,
buffer_pool,
rng,
None,
route_runtime,
None,
ip_tracker,
beobachten,
false,
real_peer_report,
)
.run()
.await
})
};
let _client = TcpStream::connect(front_addr).await.unwrap();
// Let the accepted connection reach the idle wait before advancing paused time.
tokio::task::yield_now().await;
tokio::time::advance(Duration::from_secs(2)).await;
tokio::task::yield_now().await;
let result = tokio::time::timeout(Duration::from_secs(1), server_task)
.await
.unwrap()
.unwrap();
assert!(result.is_ok());
assert_eq!(stats.get_handshake_timeouts(), 0);
assert_eq!(stats.get_connects_bad(), 0);
}
#[tokio::test]
async fn partial_tls_header_stall_triggers_handshake_timeout() {
let mut cfg = ProxyConfig::default();
@@ -1332,6 +1500,7 @@ async fn partial_tls_header_stall_triggers_handshake_timeout() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1477,6 +1646,148 @@ fn wrap_tls_application_data(payload: &[u8]) -> Vec<u8> {
record
}
fn wrap_tls_ccs_record() -> Vec<u8> {
let mut record = Vec::with_capacity(6);
record.push(TLS_RECORD_CHANGE_CIPHER);
record.extend_from_slice(&[0x03, 0x03]);
record.extend_from_slice(&1u16.to_be_bytes());
record.push(0x01);
record
}
fn make_valid_mtproto_handshake(
secret_hex: &str,
proto_tag: ProtoTag,
dc_idx: i16,
) -> [u8; HANDSHAKE_LEN] {
let secret = hex::decode(secret_hex).expect("secret hex must decode for mtproto test helper");
let mut handshake = [0x5Au8; HANDSHAKE_LEN];
for (idx, b) in handshake[SKIP_LEN..SKIP_LEN + PREKEY_LEN + IV_LEN]
.iter_mut()
.enumerate()
{
*b = (idx as u8).wrapping_add(1);
}
let dec_prekey = &handshake[SKIP_LEN..SKIP_LEN + PREKEY_LEN];
let dec_iv_bytes = &handshake[SKIP_LEN + PREKEY_LEN..SKIP_LEN + PREKEY_LEN + IV_LEN];
let mut dec_key_input = Vec::with_capacity(PREKEY_LEN + secret.len());
dec_key_input.extend_from_slice(dec_prekey);
dec_key_input.extend_from_slice(&secret);
let dec_key = sha256(&dec_key_input);
let mut dec_iv_arr = [0u8; IV_LEN];
dec_iv_arr.copy_from_slice(dec_iv_bytes);
let dec_iv = u128::from_be_bytes(dec_iv_arr);
let mut stream = AesCtr::new(&dec_key, dec_iv);
let keystream = stream.encrypt(&[0u8; HANDSHAKE_LEN]);
let mut target_plain = [0u8; HANDSHAKE_LEN];
target_plain[PROTO_TAG_POS..PROTO_TAG_POS + 4].copy_from_slice(&proto_tag.to_bytes());
target_plain[DC_IDX_POS..DC_IDX_POS + 2].copy_from_slice(&dc_idx.to_le_bytes());
for idx in PROTO_TAG_POS..HANDSHAKE_LEN {
handshake[idx] = target_plain[idx] ^ keystream[idx];
}
handshake
}
#[tokio::test]
async fn fragmented_tls_mtproto_with_interleaved_ccs_is_accepted() {
let secret_hex = "55555555555555555555555555555555";
let secret = [0x55u8; 16];
let client_hello = make_valid_tls_client_hello(&secret, 0);
let mtproto_handshake = make_valid_mtproto_handshake(secret_hex, ProtoTag::Secure, 2);
let mut cfg = ProxyConfig::default();
cfg.general.beobachten = false;
cfg.access.ignore_time_skew = true;
cfg.access
.users
.insert("user".to_string(), secret_hex.to_string());
let config = Arc::new(cfg);
let replay_checker = Arc::new(ReplayChecker::new(128, Duration::from_secs(60)));
let rng = SecureRandom::new();
let (server_side, mut client_side) = duplex(131072);
let peer: SocketAddr = "198.51.100.85:55007".parse().unwrap();
let (read_half, write_half) = tokio::io::split(server_side);
let (mut tls_reader, tls_writer, tls_user) = match handle_tls_handshake(
&client_hello,
read_half,
write_half,
peer,
&config,
&replay_checker,
&rng,
None,
)
.await
{
HandshakeResult::Success(result) => result,
_ => panic!("expected successful TLS handshake"),
};
let mut tls_response_head = [0u8; 5];
client_side
.read_exact(&mut tls_response_head)
.await
.unwrap();
assert_eq!(tls_response_head[0], 0x16);
let tls_response_len =
u16::from_be_bytes([tls_response_head[3], tls_response_head[4]]) as usize;
let mut tls_response_body = vec![0u8; tls_response_len];
client_side
.read_exact(&mut tls_response_body)
.await
.unwrap();
client_side
.write_all(&wrap_tls_application_data(&mtproto_handshake[..13]))
.await
.unwrap();
client_side.write_all(&wrap_tls_ccs_record()).await.unwrap();
client_side
.write_all(&wrap_tls_application_data(&mtproto_handshake[13..37]))
.await
.unwrap();
client_side.write_all(&wrap_tls_ccs_record()).await.unwrap();
client_side
.write_all(&wrap_tls_application_data(&mtproto_handshake[37..]))
.await
.unwrap();
let mtproto_data = tls_reader.read_exact(HANDSHAKE_LEN).await.unwrap();
assert_eq!(&mtproto_data[..], &mtproto_handshake);
let mtproto_handshake: [u8; HANDSHAKE_LEN] = mtproto_data[..].try_into().unwrap();
let (_, _, success) = match handle_mtproto_handshake(
&mtproto_handshake,
tls_reader,
tls_writer,
peer,
&config,
&replay_checker,
true,
Some(tls_user.as_str()),
)
.await
{
HandshakeResult::Success(result) => result,
_ => panic!("expected successful MTProto handshake"),
};
assert_eq!(success.user, "user");
assert_eq!(success.proto_tag, ProtoTag::Secure);
assert_eq!(success.dc_idx, 2);
}
#[tokio::test]
async fn valid_tls_path_does_not_fall_back_to_mask_backend() {
let listener = TcpListener::bind("127.0.0.1:0").await.unwrap();
@@ -1514,6 +1825,7 @@ async fn valid_tls_path_does_not_fall_back_to_mask_backend() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1622,6 +1934,7 @@ async fn valid_tls_with_invalid_mtproto_falls_back_to_mask_backend() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1728,6 +2041,7 @@ async fn client_handler_tls_bad_mtproto_is_forwarded_to_mask_backend() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1849,6 +2163,7 @@ async fn alpn_mismatch_tls_probe_is_masked_through_client_pipeline() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1941,6 +2256,7 @@ async fn invalid_hmac_tls_probe_is_masked_through_client_pipeline() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -2039,6 +2355,7 @@ async fn burst_invalid_tls_probes_are_masked_verbatim() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -2217,14 +2534,16 @@ async fn tcp_limit_rejection_does_not_reserve_ip_or_trigger_rollback() {
}
#[tokio::test]
async fn zero_tcp_limit_rejects_without_ip_or_counter_side_effects() {
async fn zero_tcp_limit_uses_global_fallback_and_rejects_without_side_effects() {
let mut config = ProxyConfig::default();
config
.access
.user_max_tcp_conns
.insert("user".to_string(), 0);
config.access.user_max_tcp_conns_global_each = 1;
let stats = Stats::new();
stats.increment_user_curr_connects("user");
let ip_tracker = UserIpTracker::new();
let peer_addr: SocketAddr = "198.51.100.211:50001".parse().unwrap();
@@ -2241,10 +2560,75 @@ async fn zero_tcp_limit_rejects_without_ip_or_counter_side_effects() {
result,
Err(ProxyError::ConnectionLimitExceeded { user }) if user == "user"
));
assert_eq!(
stats.get_user_curr_connects("user"),
1,
"TCP-limit rejection must keep pre-existing in-flight connection count unchanged"
);
assert_eq!(ip_tracker.get_active_ip_count("user").await, 0);
}
#[tokio::test]
async fn zero_tcp_limit_with_disabled_global_fallback_is_unlimited() {
let mut config = ProxyConfig::default();
config
.access
.user_max_tcp_conns
.insert("user".to_string(), 0);
config.access.user_max_tcp_conns_global_each = 0;
let stats = Stats::new();
let ip_tracker = UserIpTracker::new();
let peer_addr: SocketAddr = "198.51.100.212:50002".parse().unwrap();
let result = RunningClientHandler::check_user_limits_static(
"user",
&config,
&stats,
peer_addr,
&ip_tracker,
)
.await;
assert!(
result.is_ok(),
"per-user zero with global fallback disabled must not enforce a TCP limit"
);
assert_eq!(stats.get_user_curr_connects("user"), 0);
assert_eq!(ip_tracker.get_active_ip_count("user").await, 0);
}
#[tokio::test]
async fn global_tcp_fallback_applies_when_per_user_limit_is_missing() {
let mut config = ProxyConfig::default();
config.access.user_max_tcp_conns_global_each = 1;
let stats = Stats::new();
stats.increment_user_curr_connects("user");
let ip_tracker = UserIpTracker::new();
let peer_addr: SocketAddr = "198.51.100.213:50003".parse().unwrap();
let result = RunningClientHandler::check_user_limits_static(
"user",
&config,
&stats,
peer_addr,
&ip_tracker,
)
.await;
assert!(matches!(
result,
Err(ProxyError::ConnectionLimitExceeded { user }) if user == "user"
));
assert_eq!(
stats.get_user_curr_connects("user"),
1,
"Global fallback TCP-limit rejection must keep pre-existing counter unchanged"
);
assert_eq!(ip_tracker.get_active_ip_count("user").await, 0);
}
#[tokio::test]
async fn check_user_limits_static_success_does_not_leak_counter_or_ip_reservation() {
let user = "check-helper-user";
@@ -2876,6 +3260,7 @@ async fn relay_connect_error_releases_user_and_ip_before_return() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -3436,6 +3821,7 @@ async fn untrusted_proxy_header_source_is_rejected() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -3505,6 +3891,7 @@ async fn empty_proxy_trusted_cidrs_rejects_proxy_header_by_default() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -3601,6 +3988,7 @@ async fn oversized_tls_record_is_masked_in_generic_stream_pipeline() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -3703,6 +4091,7 @@ async fn oversized_tls_record_is_masked_in_client_handler_pipeline() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -3819,6 +4208,7 @@ async fn tls_record_len_min_minus_1_is_rejected_in_generic_stream_pipeline() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -3921,6 +4311,7 @@ async fn tls_record_len_min_minus_1_is_rejected_in_client_handler_pipeline() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -4026,6 +4417,7 @@ async fn tls_record_len_16384_is_accepted_in_generic_stream_pipeline() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -4126,6 +4518,7 @@ async fn tls_record_len_16384_is_accepted_in_client_handler_pipeline() {
1,
1,
1,
10,
1,
false,
stats.clone(),

View File

@@ -33,6 +33,7 @@ fn make_test_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -35,6 +35,7 @@ fn make_test_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -36,6 +36,7 @@ fn make_test_upstream_manager(stats: Arc<Stats>) -> Arc<UpstreamManager> {
1,
1,
1,
10,
1,
false,
stats,

View File

@@ -50,6 +50,7 @@ fn build_harness(secret_hex: &str, mask_port: u16) -> PipelineHarness {
1,
1,
1,
10,
1,
false,
stats.clone(),

View File

@@ -1302,6 +1302,7 @@ async fn direct_relay_abort_midflight_releases_route_gauge() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1408,6 +1409,7 @@ async fn direct_relay_cutover_midflight_releases_route_gauge() {
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1529,6 +1531,7 @@ async fn direct_relay_cutover_storm_multi_session_keeps_generic_errors_and_relea
1,
1,
1,
10,
1,
false,
stats.clone(),
@@ -1761,6 +1764,7 @@ async fn negative_direct_relay_dc_connection_refused_fails_fast() {
1,
100,
5000,
10,
3,
false,
stats.clone(),
@@ -1851,6 +1855,7 @@ async fn adversarial_direct_relay_cutover_integrity() {
1,
100,
5000,
10,
3,
false,
stats.clone(),

View File

@@ -1643,6 +1643,32 @@ fn auth_probe_capacity_fresh_full_map_still_tracks_newcomer_with_bounded_evictio
);
}
#[test]
fn unknown_sni_warn_cooldown_first_event_is_warn_and_repeated_events_are_info_until_window_expires()
{
let _guard = unknown_sni_warn_test_lock()
.lock()
.unwrap_or_else(|poisoned| poisoned.into_inner());
clear_unknown_sni_warn_state_for_testing();
let now = Instant::now();
assert!(
should_emit_unknown_sni_warn_for_testing(now),
"first unknown SNI event must be eligible for WARN emission"
);
assert!(
!should_emit_unknown_sni_warn_for_testing(now + Duration::from_secs(1)),
"events inside cooldown window must be demoted from WARN to INFO"
);
assert!(
should_emit_unknown_sni_warn_for_testing(
now + Duration::from_secs(UNKNOWN_SNI_WARN_COOLDOWN_SECS)
),
"once cooldown expires, next unknown SNI event must be WARN-eligible again"
);
}
#[test]
fn stress_auth_probe_full_map_churn_keeps_bound_and_tracks_newcomers() {
let _guard = auth_probe_test_lock()

View File

@@ -562,9 +562,10 @@ async fn timing_classifier_light_fuzz_pairwise_bucketed_accuracy_stays_bounded_u
if low_info_pair_count > 0 {
let low_info_baseline_avg = low_info_baseline_sum / low_info_pair_count as f64;
let low_info_hardened_avg = low_info_hardened_sum / low_info_pair_count as f64;
let low_info_avg_jitter_budget = 0.40 + acc_quant_step;
assert!(
low_info_hardened_avg <= low_info_baseline_avg + 0.40,
"normalization low-info average drift exceeded jitter budget: baseline_avg={low_info_baseline_avg:.3} hardened_avg={low_info_hardened_avg:.3}"
low_info_hardened_avg <= low_info_baseline_avg + low_info_avg_jitter_budget,
"normalization low-info average drift exceeded jitter budget: baseline_avg={low_info_baseline_avg:.3} hardened_avg={low_info_hardened_avg:.3} tolerated={low_info_avg_jitter_budget:.3}"
);
}

View File

@@ -126,6 +126,7 @@ async fn c2me_channel_full_path_yields_then_sends() {
payload: make_pooled_payload(&[0xBB, 0xCC]),
flags: 2,
},
None,
)
.await
});

388
src/service/mod.rs Normal file
View File

@@ -0,0 +1,388 @@
//! Service manager integration for telemt.
//!
//! Supports generating service files for:
//! - systemd (Linux)
//! - OpenRC (Alpine, Gentoo)
//! - rc.d (FreeBSD)
use std::path::Path;
/// Detected init/service system.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InitSystem {
/// systemd (most modern Linux distributions)
Systemd,
/// OpenRC (Alpine, Gentoo, some BSDs)
OpenRC,
/// FreeBSD rc.d
FreeBSDRc,
/// No known init system detected
Unknown,
}
impl std::fmt::Display for InitSystem {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
InitSystem::Systemd => write!(f, "systemd"),
InitSystem::OpenRC => write!(f, "OpenRC"),
InitSystem::FreeBSDRc => write!(f, "FreeBSD rc.d"),
InitSystem::Unknown => write!(f, "unknown"),
}
}
}
/// Detects the init system in use on the current host.
pub fn detect_init_system() -> InitSystem {
// Check for systemd first (most common on Linux)
if Path::new("/run/systemd/system").exists() {
return InitSystem::Systemd;
}
// Check for OpenRC
if Path::new("/sbin/openrc-run").exists() || Path::new("/sbin/openrc").exists() {
return InitSystem::OpenRC;
}
// Check for FreeBSD rc.d
if Path::new("/etc/rc.subr").exists() && Path::new("/etc/rc.d").exists() {
return InitSystem::FreeBSDRc;
}
// Fallback: check if systemctl exists even without /run/systemd
if Path::new("/usr/bin/systemctl").exists() || Path::new("/bin/systemctl").exists() {
return InitSystem::Systemd;
}
InitSystem::Unknown
}
/// Returns the default service file path for the given init system.
pub fn service_file_path(init_system: InitSystem) -> &'static str {
match init_system {
InitSystem::Systemd => "/etc/systemd/system/telemt.service",
InitSystem::OpenRC => "/etc/init.d/telemt",
InitSystem::FreeBSDRc => "/usr/local/etc/rc.d/telemt",
InitSystem::Unknown => "/etc/init.d/telemt",
}
}
/// Options for generating service files.
pub struct ServiceOptions<'a> {
/// Path to the telemt executable
pub exe_path: &'a Path,
/// Path to the configuration file
pub config_path: &'a Path,
/// User to run as (optional)
pub user: Option<&'a str>,
/// Group to run as (optional)
pub group: Option<&'a str>,
/// PID file path
pub pid_file: &'a str,
/// Working directory
pub working_dir: Option<&'a str>,
/// Description
pub description: &'a str,
}
impl<'a> Default for ServiceOptions<'a> {
fn default() -> Self {
Self {
exe_path: Path::new("/usr/local/bin/telemt"),
config_path: Path::new("/etc/telemt/config.toml"),
user: Some("telemt"),
group: Some("telemt"),
pid_file: "/var/run/telemt.pid",
working_dir: Some("/var/lib/telemt"),
description: "Telemt MTProxy - Telegram MTProto Proxy",
}
}
}
/// Generates a service file for the given init system.
pub fn generate_service_file(init_system: InitSystem, opts: &ServiceOptions) -> String {
match init_system {
InitSystem::Systemd => generate_systemd_unit(opts),
InitSystem::OpenRC => generate_openrc_script(opts),
InitSystem::FreeBSDRc => generate_freebsd_rc_script(opts),
InitSystem::Unknown => generate_systemd_unit(opts), // Default to systemd format
}
}
/// Generates an enhanced systemd unit file.
fn generate_systemd_unit(opts: &ServiceOptions) -> String {
let user_line = opts.user.map(|u| format!("User={}", u)).unwrap_or_default();
let group_line = opts
.group
.map(|g| format!("Group={}", g))
.unwrap_or_default();
let working_dir = opts
.working_dir
.map(|d| format!("WorkingDirectory={}", d))
.unwrap_or_default();
format!(
r#"[Unit]
Description={description}
Documentation=https://github.com/telemt/telemt
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
ExecStart={exe} --foreground --pid-file {pid_file} {config}
ExecReload=/bin/kill -HUP $MAINPID
PIDFile={pid_file}
Restart=always
RestartSec=5
{user}
{group}
{working_dir}
# Resource limits
LimitNOFILE=65535
LimitNPROC=4096
# Security hardening
NoNewPrivileges=true
ProtectSystem=strict
ProtectHome=true
PrivateTmp=true
PrivateDevices=true
ProtectKernelTunables=true
ProtectKernelModules=true
ProtectControlGroups=true
RestrictAddressFamilies=AF_INET AF_INET6 AF_UNIX
RestrictNamespaces=true
RestrictRealtime=true
RestrictSUIDSGID=true
MemoryDenyWriteExecute=true
LockPersonality=true
# Allow binding to privileged ports and writing to specific paths
AmbientCapabilities=CAP_NET_BIND_SERVICE
CapabilityBoundingSet=CAP_NET_BIND_SERVICE
ReadWritePaths=/etc/telemt /var/run /var/lib/telemt
[Install]
WantedBy=multi-user.target
"#,
description = opts.description,
exe = opts.exe_path.display(),
config = opts.config_path.display(),
pid_file = opts.pid_file,
user = user_line,
group = group_line,
working_dir = working_dir,
)
}
/// Generates an OpenRC init script.
fn generate_openrc_script(opts: &ServiceOptions) -> String {
let user = opts.user.unwrap_or("root");
let group = opts.group.unwrap_or("root");
format!(
r#"#!/sbin/openrc-run
# OpenRC init script for telemt
description="{description}"
command="{exe}"
command_args="--daemon --syslog --pid-file {pid_file} {config}"
command_user="{user}:{group}"
pidfile="{pid_file}"
depend() {{
need net
use logger
after firewall
}}
start_pre() {{
checkpath --directory --owner {user}:{group} --mode 0755 /var/run
checkpath --directory --owner {user}:{group} --mode 0755 /var/lib/telemt
checkpath --directory --owner {user}:{group} --mode 0755 /var/log/telemt
}}
reload() {{
ebegin "Reloading ${{RC_SVCNAME}}"
start-stop-daemon --signal HUP --pidfile "${{pidfile}}"
eend $?
}}
"#,
description = opts.description,
exe = opts.exe_path.display(),
config = opts.config_path.display(),
pid_file = opts.pid_file,
user = user,
group = group,
)
}
/// Generates a FreeBSD rc.d script.
fn generate_freebsd_rc_script(opts: &ServiceOptions) -> String {
let user = opts.user.unwrap_or("root");
let group = opts.group.unwrap_or("wheel");
format!(
r#"#!/bin/sh
#
# PROVIDE: telemt
# REQUIRE: LOGIN NETWORKING
# KEYWORD: shutdown
#
# Add the following lines to /etc/rc.conf to enable telemt:
#
# telemt_enable="YES"
# telemt_config="/etc/telemt/config.toml" # optional
# telemt_user="telemt" # optional
# telemt_group="telemt" # optional
#
. /etc/rc.subr
name="telemt"
rcvar="telemt_enable"
desc="{description}"
load_rc_config $name
: ${{telemt_enable:="NO"}}
: ${{telemt_config:="{config}"}}
: ${{telemt_user:="{user}"}}
: ${{telemt_group:="{group}"}}
: ${{telemt_pidfile:="{pid_file}"}}
pidfile="${{telemt_pidfile}}"
command="{exe}"
command_args="--daemon --syslog --pid-file ${{telemt_pidfile}} ${{telemt_config}}"
start_precmd="telemt_prestart"
reload_cmd="telemt_reload"
extra_commands="reload"
telemt_prestart() {{
install -d -o ${{telemt_user}} -g ${{telemt_group}} -m 755 /var/run
install -d -o ${{telemt_user}} -g ${{telemt_group}} -m 755 /var/lib/telemt
}}
telemt_reload() {{
if [ -f "${{pidfile}}" ]; then
echo "Reloading ${{name}} configuration."
kill -HUP $(cat ${{pidfile}})
else
echo "${{name}} is not running."
return 1
fi
}}
run_rc_command "$1"
"#,
description = opts.description,
exe = opts.exe_path.display(),
config = opts.config_path.display(),
pid_file = opts.pid_file,
user = user,
group = group,
)
}
/// Installation instructions for each init system.
pub fn installation_instructions(init_system: InitSystem) -> &'static str {
match init_system {
InitSystem::Systemd => {
r#"To install and enable the service:
sudo systemctl daemon-reload
sudo systemctl enable telemt
sudo systemctl start telemt
To check status:
sudo systemctl status telemt
To view logs:
journalctl -u telemt -f
To reload configuration:
sudo systemctl reload telemt
"#
}
InitSystem::OpenRC => {
r#"To install and enable the service:
sudo chmod +x /etc/init.d/telemt
sudo rc-update add telemt default
sudo rc-service telemt start
To check status:
sudo rc-service telemt status
To reload configuration:
sudo rc-service telemt reload
"#
}
InitSystem::FreeBSDRc => {
r#"To install and enable the service:
sudo chmod +x /usr/local/etc/rc.d/telemt
sudo sysrc telemt_enable="YES"
sudo service telemt start
To check status:
sudo service telemt status
To reload configuration:
sudo service telemt reload
"#
}
InitSystem::Unknown => {
r#"No supported init system detected.
You may need to create a service file manually or run telemt directly:
telemt start /etc/telemt/config.toml
"#
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_systemd_unit_generation() {
let opts = ServiceOptions::default();
let unit = generate_systemd_unit(&opts);
assert!(unit.contains("[Unit]"));
assert!(unit.contains("[Service]"));
assert!(unit.contains("[Install]"));
assert!(unit.contains("ExecReload="));
assert!(unit.contains("PIDFile="));
}
#[test]
fn test_openrc_script_generation() {
let opts = ServiceOptions::default();
let script = generate_openrc_script(&opts);
assert!(script.contains("#!/sbin/openrc-run"));
assert!(script.contains("depend()"));
assert!(script.contains("reload()"));
}
#[test]
fn test_freebsd_rc_script_generation() {
let opts = ServiceOptions::default();
let script = generate_freebsd_rc_script(&opts);
assert!(script.contains("#!/bin/sh"));
assert!(script.contains("PROVIDE: telemt"));
assert!(script.contains("run_rc_command"));
}
#[test]
fn test_service_file_paths() {
assert_eq!(
service_file_path(InitSystem::Systemd),
"/etc/systemd/system/telemt.service"
);
assert_eq!(service_file_path(InitSystem::OpenRC), "/etc/init.d/telemt");
assert_eq!(
service_file_path(InitSystem::FreeBSDRc),
"/usr/local/etc/rc.d/telemt"
);
}
}

View File

@@ -128,6 +128,8 @@ pub struct Stats {
me_crc_mismatch: AtomicU64,
me_seq_mismatch: AtomicU64,
me_endpoint_quarantine_total: AtomicU64,
me_endpoint_quarantine_unexpected_total: AtomicU64,
me_endpoint_quarantine_draining_suppressed_total: AtomicU64,
me_kdf_drift_total: AtomicU64,
me_kdf_port_only_drift_total: AtomicU64,
me_hardswap_pending_reuse_total: AtomicU64,
@@ -234,6 +236,7 @@ pub struct Stats {
me_writer_restored_same_endpoint_total: AtomicU64,
me_writer_restored_fallback_total: AtomicU64,
me_no_writer_failfast_total: AtomicU64,
me_hybrid_timeout_total: AtomicU64,
me_async_recovery_trigger_total: AtomicU64,
me_inline_recovery_total: AtomicU64,
ip_reservation_rollback_tcp_limit_total: AtomicU64,
@@ -1203,6 +1206,11 @@ impl Stats {
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_hybrid_timeout_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_hybrid_timeout_total.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_async_recovery_trigger_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_async_recovery_trigger_total
@@ -1245,6 +1253,18 @@ impl Stats {
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_endpoint_quarantine_unexpected_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_endpoint_quarantine_unexpected_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_endpoint_quarantine_draining_suppressed_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_endpoint_quarantine_draining_suppressed_total
.fetch_add(1, Ordering::Relaxed);
}
}
pub fn increment_me_kdf_drift_total(&self) {
if self.telemetry_me_allows_normal() {
self.me_kdf_drift_total.fetch_add(1, Ordering::Relaxed);
@@ -1497,6 +1517,14 @@ impl Stats {
pub fn get_me_endpoint_quarantine_total(&self) -> u64 {
self.me_endpoint_quarantine_total.load(Ordering::Relaxed)
}
pub fn get_me_endpoint_quarantine_unexpected_total(&self) -> u64 {
self.me_endpoint_quarantine_unexpected_total
.load(Ordering::Relaxed)
}
pub fn get_me_endpoint_quarantine_draining_suppressed_total(&self) -> u64 {
self.me_endpoint_quarantine_draining_suppressed_total
.load(Ordering::Relaxed)
}
pub fn get_me_kdf_drift_total(&self) -> u64 {
self.me_kdf_drift_total.load(Ordering::Relaxed)
}
@@ -1876,6 +1904,9 @@ impl Stats {
pub fn get_me_no_writer_failfast_total(&self) -> u64 {
self.me_no_writer_failfast_total.load(Ordering::Relaxed)
}
pub fn get_me_hybrid_timeout_total(&self) -> u64 {
self.me_hybrid_timeout_total.load(Ordering::Relaxed)
}
pub fn get_me_async_recovery_trigger_total(&self) -> u64 {
self.me_async_recovery_trigger_total.load(Ordering::Relaxed)
}

View File

@@ -244,10 +244,9 @@ fn order_profiles(
if let Some(pos) = ordered
.iter()
.position(|profile| *profile == cached.profile)
&& pos != 0
{
if pos != 0 {
ordered.swap(0, pos);
}
ordered.swap(0, pos);
}
}

View File

@@ -314,53 +314,6 @@ async fn run_update_cycle(
reinit_tx: &mpsc::Sender<MeReinitTrigger>,
) {
let upstream = pool.upstream.clone();
pool.update_runtime_reinit_policy(
cfg.general.hardswap,
cfg.general.me_pool_drain_ttl_secs,
cfg.general.me_instadrain,
cfg.general.me_pool_drain_threshold,
cfg.general.me_pool_drain_soft_evict_enabled,
cfg.general.me_pool_drain_soft_evict_grace_secs,
cfg.general.me_pool_drain_soft_evict_per_writer,
cfg.general.me_pool_drain_soft_evict_budget_per_core,
cfg.general.me_pool_drain_soft_evict_cooldown_ms,
cfg.general.effective_me_pool_force_close_secs(),
cfg.general.me_pool_min_fresh_ratio,
cfg.general.me_hardswap_warmup_delay_min_ms,
cfg.general.me_hardswap_warmup_delay_max_ms,
cfg.general.me_hardswap_warmup_extra_passes,
cfg.general.me_hardswap_warmup_pass_backoff_base_ms,
cfg.general.me_bind_stale_mode,
cfg.general.me_bind_stale_ttl_secs,
cfg.general.me_secret_atomic_snapshot,
cfg.general.me_deterministic_writer_sort,
cfg.general.me_writer_pick_mode,
cfg.general.me_writer_pick_sample_size,
cfg.general.me_single_endpoint_shadow_writers,
cfg.general.me_single_endpoint_outage_mode_enabled,
cfg.general.me_single_endpoint_outage_disable_quarantine,
cfg.general.me_single_endpoint_outage_backoff_min_ms,
cfg.general.me_single_endpoint_outage_backoff_max_ms,
cfg.general.me_single_endpoint_shadow_rotate_every_secs,
cfg.general.me_floor_mode,
cfg.general.me_adaptive_floor_idle_secs,
cfg.general.me_adaptive_floor_min_writers_single_endpoint,
cfg.general.me_adaptive_floor_min_writers_multi_endpoint,
cfg.general.me_adaptive_floor_recover_grace_secs,
cfg.general.me_adaptive_floor_writers_per_core_total,
cfg.general.me_adaptive_floor_cpu_cores_override,
cfg.general
.me_adaptive_floor_max_extra_writers_single_per_core,
cfg.general
.me_adaptive_floor_max_extra_writers_multi_per_core,
cfg.general.me_adaptive_floor_max_active_writers_per_core,
cfg.general.me_adaptive_floor_max_warm_writers_per_core,
cfg.general.me_adaptive_floor_max_active_writers_global,
cfg.general.me_adaptive_floor_max_warm_writers_global,
cfg.general.me_health_interval_ms_unhealthy,
cfg.general.me_health_interval_ms_healthy,
cfg.general.me_warn_rate_limit_ms,
);
let required_cfg_snapshots = cfg.general.me_config_stable_snapshots.max(1);
let required_secret_snapshots = cfg.general.proxy_secret_stable_snapshots.max(1);

View File

@@ -161,7 +161,7 @@ impl MePool {
} else {
let connect_fut = async {
if addr.is_ipv6()
&& let Some(v6) = self.detected_ipv6
&& let Some(v6) = self.nat_runtime.detected_ipv6
{
match TcpSocket::new_v6() {
Ok(sock) => {
@@ -305,7 +305,7 @@ impl MePool {
}
MeSocksKdfPolicy::Compat => {
self.stats.increment_me_socks_kdf_compat_fallback();
if self.nat_probe {
if self.nat_runtime.nat_probe {
let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress);
self.maybe_reflect_public_addr(family, bind_ip).await
} else {
@@ -313,7 +313,7 @@ impl MePool {
}
}
}
} else if self.nat_probe {
} else if self.nat_runtime.nat_probe {
let bind_ip = Self::direct_bind_ip_for_stun(family, upstream_egress);
self.maybe_reflect_public_addr(family, bind_ip).await
} else {
@@ -343,7 +343,10 @@ impl MePool {
.unwrap_or_default()
.as_secs() as u32;
let secret_atomic_snapshot = self.secret_atomic_snapshot.load(Ordering::Relaxed);
let secret_atomic_snapshot = self
.writer_selection_policy
.secret_atomic_snapshot
.load(Ordering::Relaxed);
let (ks, secret) = if secret_atomic_snapshot {
let snapshot = self.secret_snapshot().await;
(snapshot.key_selector, snapshot.secret)

View File

@@ -7,6 +7,8 @@ use std::sync::Arc;
use std::time::{Duration, Instant};
use rand::RngExt;
use tokio::sync::Semaphore;
use tokio::task::JoinSet;
use tracing::{debug, info, warn};
use crate::config::MeFloorMode;
@@ -14,6 +16,7 @@ use crate::crypto::SecureRandom;
use crate::network::IpFamily;
use super::MePool;
use super::pool::MeFamilyRuntimeState;
const JITTER_FRAC_NUM: u64 = 2; // jitter up to 50% of backoff
#[allow(dead_code)]
@@ -27,6 +30,9 @@ const HEALTH_RECONNECT_BUDGET_PER_CORE: usize = 2;
const HEALTH_RECONNECT_BUDGET_PER_DC: usize = 1;
const HEALTH_RECONNECT_BUDGET_MIN: usize = 4;
const HEALTH_RECONNECT_BUDGET_MAX: usize = 128;
const FAMILY_SUPPRESS_FAIL_STREAK_THRESHOLD: u32 = 5;
const FAMILY_SUPPRESS_DURATION_SECS: u64 = 60;
const FAMILY_RECOVER_SUCCESS_STREAK_TARGET: u32 = 2;
const HEALTH_DRAIN_CLOSE_BUDGET_PER_CORE: usize = 16;
const HEALTH_DRAIN_CLOSE_BUDGET_MIN: usize = 16;
const HEALTH_DRAIN_CLOSE_BUDGET_MAX: usize = 256;
@@ -56,6 +62,17 @@ struct FamilyFloorPlan {
target_writers_total: usize,
}
#[derive(Debug)]
struct FamilyReconnectOutcome {
key: (i32, IpFamily),
dc: i32,
family: IpFamily,
alive: usize,
required: usize,
endpoint_count: usize,
restored: usize,
}
pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_connections: usize) {
let mut backoff: HashMap<(i32, IpFamily), u64> = HashMap::new();
let mut next_attempt: HashMap<(i32, IpFamily), Instant> = HashMap::new();
@@ -78,6 +95,7 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
};
tokio::time::sleep(interval).await;
pool.prune_closed_writers().await;
pool.sweep_endpoint_quarantine().await;
reap_draining_writers(&pool, &mut drain_warn_next_allowed).await;
let v4_degraded = check_family(
IpFamily::V4,
@@ -113,6 +131,8 @@ pub async fn me_health_monitor(pool: Arc<MePool>, rng: Arc<SecureRandom>, _min_c
&mut floor_warn_next_allowed,
)
.await;
update_family_runtime_state(&pool, IpFamily::V4, v4_degraded);
update_family_runtime_state(&pool, IpFamily::V6, v6_degraded);
degraded_interval = v4_degraded || v6_degraded;
}
}
@@ -135,9 +155,11 @@ pub(super) async fn reap_draining_writers(
let now_epoch_secs = MePool::now_epoch_secs();
let now = Instant::now();
let drain_ttl_secs = pool
.drain_runtime
.me_pool_drain_ttl_secs
.load(std::sync::atomic::Ordering::Relaxed);
let drain_threshold = pool
.drain_runtime
.me_pool_drain_threshold
.load(std::sync::atomic::Ordering::Relaxed);
let activity = pool.registry.writer_activity_snapshot().await;
@@ -221,7 +243,10 @@ pub(super) async fn reap_draining_writers(
endpoint = %writer.addr,
generation = writer.generation,
drain_ttl_secs,
force_close_secs = pool.me_pool_force_close_secs.load(std::sync::atomic::Ordering::Relaxed),
force_close_secs = pool
.drain_runtime
.me_pool_force_close_secs
.load(std::sync::atomic::Ordering::Relaxed),
allow_drain_fallback = writer.allow_drain_fallback,
"ME draining writer remains non-empty past drain TTL"
);
@@ -365,7 +390,8 @@ async fn check_family(
endpoints.sort_unstable();
endpoints.dedup();
}
let mut reconnect_budget = health_reconnect_budget(pool, dc_endpoints.len());
let reconnect_budget = health_reconnect_budget(pool, dc_endpoints.len());
let reconnect_sem = Arc::new(Semaphore::new(reconnect_budget));
if pool.floor_mode() == MeFloorMode::Static {
adaptive_idle_since.clear();
@@ -422,6 +448,10 @@ async fn check_family(
floor_plan.active_writers_current,
floor_plan.warm_writers_current,
);
let live_writer_ids_by_addr = Arc::new(live_writer_ids_by_addr);
let writer_idle_since = Arc::new(writer_idle_since);
let bound_clients_by_writer = Arc::new(bound_clients_by_writer);
let mut reconnect_set = JoinSet::<FamilyReconnectOutcome>::new();
for (dc, endpoints) in dc_endpoints {
if endpoints.is_empty() {
@@ -461,7 +491,7 @@ async fn check_family(
required,
outage_backoff,
outage_next_attempt,
&mut reconnect_budget,
&reconnect_sem,
)
.await;
continue;
@@ -495,9 +525,9 @@ async fn check_family(
&endpoints,
alive,
required,
&live_writer_ids_by_addr,
&writer_idle_since,
&bound_clients_by_writer,
live_writer_ids_by_addr.as_ref(),
writer_idle_since.as_ref(),
bound_clients_by_writer.as_ref(),
idle_refresh_next_attempt,
)
.await;
@@ -510,8 +540,8 @@ async fn check_family(
&endpoints,
alive,
required,
&live_writer_ids_by_addr,
&bound_clients_by_writer,
live_writer_ids_by_addr.as_ref(),
bound_clients_by_writer.as_ref(),
shadow_rotate_deadline,
)
.await;
@@ -521,8 +551,8 @@ async fn check_family(
family_degraded = true;
let now = Instant::now();
if reconnect_budget == 0 {
let base_ms = pool.me_reconnect_backoff_base.as_millis() as u64;
if reconnect_sem.available_permits() == 0 {
let base_ms = pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64;
let next_ms = (*backoff.get(&key).unwrap_or(&base_ms)).max(base_ms);
let jitter = next_ms / JITTER_FRAC_NUM;
let wait = Duration::from_millis(next_ms)
@@ -545,7 +575,10 @@ async fn check_family(
continue;
}
let max_concurrent = pool.me_reconnect_max_concurrent_per_dc.max(1) as usize;
let max_concurrent = pool
.reconnect_runtime
.me_reconnect_max_concurrent_per_dc
.max(1) as usize;
if *inflight.get(&key).unwrap_or(&0) >= max_concurrent {
continue;
}
@@ -564,117 +597,165 @@ async fn check_family(
continue;
}
*inflight.entry(key).or_insert(0) += 1;
let mut restored = 0usize;
for _ in 0..missing {
if reconnect_budget == 0 {
break;
}
reconnect_budget = reconnect_budget.saturating_sub(1);
if pool.active_contour_writer_count_total().await
>= floor_plan.active_cap_effective_total
{
let swapped = maybe_swap_idle_writer_for_cap(
pool,
rng,
dc,
family,
&endpoints,
&live_writer_ids_by_addr,
&writer_idle_since,
&bound_clients_by_writer,
let pool_for_reconnect = pool.clone();
let rng_for_reconnect = rng.clone();
let reconnect_sem_for_dc = reconnect_sem.clone();
let endpoints_for_dc = endpoints.clone();
let live_writer_ids_by_addr_for_dc = live_writer_ids_by_addr.clone();
let writer_idle_since_for_dc = writer_idle_since.clone();
let bound_clients_by_writer_for_dc = bound_clients_by_writer.clone();
let active_cap_effective_total = floor_plan.active_cap_effective_total;
reconnect_set.spawn(async move {
let mut restored = 0usize;
for _ in 0..missing {
let Ok(reconnect_permit) = reconnect_sem_for_dc.clone().try_acquire_owned() else {
break;
};
if pool_for_reconnect.active_contour_writer_count_total().await
>= active_cap_effective_total
{
let swapped = maybe_swap_idle_writer_for_cap(
&pool_for_reconnect,
&rng_for_reconnect,
dc,
family,
&endpoints_for_dc,
live_writer_ids_by_addr_for_dc.as_ref(),
writer_idle_since_for_dc.as_ref(),
bound_clients_by_writer_for_dc.as_ref(),
)
.await;
if swapped {
pool_for_reconnect
.stats
.increment_me_floor_swap_idle_total();
restored += 1;
continue;
}
pool_for_reconnect
.stats
.increment_me_floor_cap_block_total();
pool_for_reconnect
.stats
.increment_me_floor_swap_idle_failed_total();
debug!(
dc = %dc,
?family,
alive,
required,
active_cap_effective_total,
"Adaptive floor cap reached, reconnect attempt blocked"
);
break;
}
let res = tokio::time::timeout(
pool_for_reconnect.reconnect_runtime.me_one_timeout,
pool_for_reconnect.connect_endpoints_round_robin(
dc,
&endpoints_for_dc,
rng_for_reconnect.as_ref(),
),
)
.await;
if swapped {
pool.stats.increment_me_floor_swap_idle_total();
restored += 1;
continue;
match res {
Ok(true) => {
restored += 1;
pool_for_reconnect.stats.increment_me_reconnect_success();
}
Ok(false) => {
pool_for_reconnect.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME round-robin reconnect failed")
}
Err(_) => {
pool_for_reconnect.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME reconnect timed out");
}
}
pool.stats.increment_me_floor_cap_block_total();
pool.stats.increment_me_floor_swap_idle_failed_total();
debug!(
dc = %dc,
?family,
alive,
required,
active_cap_effective_total = floor_plan.active_cap_effective_total,
"Adaptive floor cap reached, reconnect attempt blocked"
);
break;
drop(reconnect_permit);
}
let res = tokio::time::timeout(
pool.me_one_timeout,
pool.connect_endpoints_round_robin(dc, &endpoints, rng.as_ref()),
)
.await;
match res {
Ok(true) => {
restored += 1;
pool.stats.increment_me_reconnect_success();
}
Ok(false) => {
pool.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME round-robin reconnect failed")
}
Err(_) => {
pool.stats.increment_me_reconnect_attempt();
debug!(dc = %dc, ?family, "ME reconnect timed out");
}
}
}
let now_alive = alive + restored;
if now_alive >= required {
info!(
dc = %dc,
?family,
alive = now_alive,
FamilyReconnectOutcome {
key,
dc,
family,
alive,
required,
endpoint_count = endpoints.len(),
endpoint_count: endpoints_for_dc.len(),
restored,
}
});
}
while let Some(joined) = reconnect_set.join_next().await {
let outcome = match joined {
Ok(outcome) => outcome,
Err(join_error) => {
debug!(error = %join_error, "Health reconnect task failed");
continue;
}
};
let now = Instant::now();
let now_alive = outcome.alive + outcome.restored;
if now_alive >= outcome.required {
info!(
dc = %outcome.dc,
family = ?outcome.family,
alive = now_alive,
required = outcome.required,
endpoint_count = outcome.endpoint_count,
"ME writer floor restored for DC"
);
backoff.insert(key, pool.me_reconnect_backoff_base.as_millis() as u64);
let jitter = pool.me_reconnect_backoff_base.as_millis() as u64 / JITTER_FRAC_NUM;
let wait = pool.me_reconnect_backoff_base
backoff.insert(
outcome.key,
pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64,
);
let jitter = pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64
/ JITTER_FRAC_NUM;
let wait = pool.reconnect_runtime.me_reconnect_backoff_base
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
next_attempt.insert(key, now + wait);
next_attempt.insert(outcome.key, now + wait);
} else {
let curr = *backoff
.get(&key)
.unwrap_or(&(pool.me_reconnect_backoff_base.as_millis() as u64));
let next_ms =
(curr.saturating_mul(2)).min(pool.me_reconnect_backoff_cap.as_millis() as u64);
backoff.insert(key, next_ms);
.get(&outcome.key)
.unwrap_or(&(pool.reconnect_runtime.me_reconnect_backoff_base.as_millis() as u64));
let next_ms = (curr.saturating_mul(2))
.min(pool.reconnect_runtime.me_reconnect_backoff_cap.as_millis() as u64);
backoff.insert(outcome.key, next_ms);
let jitter = next_ms / JITTER_FRAC_NUM;
let wait = Duration::from_millis(next_ms)
+ Duration::from_millis(rand::rng().random_range(0..=jitter.max(1)));
next_attempt.insert(key, now + wait);
next_attempt.insert(outcome.key, now + wait);
if pool.is_runtime_ready() {
let warn_cooldown = pool.warn_rate_limit_duration();
if should_emit_rate_limited_warn(floor_warn_next_allowed, key, now, warn_cooldown) {
if should_emit_rate_limited_warn(
floor_warn_next_allowed,
outcome.key,
now,
warn_cooldown,
) {
warn!(
dc = %dc,
?family,
dc = %outcome.dc,
family = ?outcome.family,
alive = now_alive,
required,
endpoint_count = endpoints.len(),
required = outcome.required,
endpoint_count = outcome.endpoint_count,
backoff_ms = next_ms,
"DC writer floor is below required level, scheduled reconnect"
);
}
} else {
info!(
dc = %dc,
?family,
dc = %outcome.dc,
family = ?outcome.family,
alive = now_alive,
required,
endpoint_count = endpoints.len(),
required = outcome.required,
endpoint_count = outcome.endpoint_count,
backoff_ms = next_ms,
"DC writer floor is below required level during startup, scheduled reconnect"
);
}
}
if let Some(v) = inflight.get_mut(&key) {
if let Some(v) = inflight.get_mut(&outcome.key) {
*v = v.saturating_sub(1);
}
}
@@ -691,6 +772,68 @@ fn health_reconnect_budget(pool: &Arc<MePool>, dc_groups: usize) -> usize {
.clamp(HEALTH_RECONNECT_BUDGET_MIN, HEALTH_RECONNECT_BUDGET_MAX)
}
fn update_family_runtime_state(pool: &Arc<MePool>, family: IpFamily, degraded: bool) {
let now_epoch_secs = MePool::now_epoch_secs();
let previous_state = pool.family_runtime_state(family);
let mut state_since_epoch_secs = pool.family_runtime_state_since_epoch_secs(family);
let previous_suppressed_until_epoch_secs = pool.family_suppressed_until_epoch_secs(family);
let previous_fail_streak = pool.family_fail_streak(family);
let previous_recover_success_streak = pool.family_recover_success_streak(family);
let (next_state, suppressed_until_epoch_secs, fail_streak, recover_success_streak) =
if previous_suppressed_until_epoch_secs > now_epoch_secs {
let fail_streak = if degraded {
previous_fail_streak.saturating_add(1)
} else {
previous_fail_streak
};
(
MeFamilyRuntimeState::Suppressed,
previous_suppressed_until_epoch_secs,
fail_streak,
0,
)
} else if degraded {
let fail_streak = previous_fail_streak.saturating_add(1);
if fail_streak >= FAMILY_SUPPRESS_FAIL_STREAK_THRESHOLD {
(
MeFamilyRuntimeState::Suppressed,
now_epoch_secs.saturating_add(FAMILY_SUPPRESS_DURATION_SECS),
fail_streak,
0,
)
} else {
(MeFamilyRuntimeState::Degraded, 0, fail_streak, 0)
}
} else if matches!(previous_state, MeFamilyRuntimeState::Healthy) {
(MeFamilyRuntimeState::Healthy, 0, 0, 0)
} else {
let recover_success_streak = previous_recover_success_streak.saturating_add(1);
if recover_success_streak >= FAMILY_RECOVER_SUCCESS_STREAK_TARGET {
(MeFamilyRuntimeState::Healthy, 0, 0, 0)
} else {
(
MeFamilyRuntimeState::Recovering,
0,
0,
recover_success_streak,
)
}
};
if next_state != previous_state || state_since_epoch_secs == 0 {
state_since_epoch_secs = now_epoch_secs;
}
pool.set_family_runtime_state(
family,
next_state,
state_since_epoch_secs,
suppressed_until_epoch_secs,
fail_streak,
recover_success_streak,
);
}
fn should_emit_rate_limited_warn(
next_allowed: &mut HashMap<(i32, IpFamily), Instant>,
key: (i32, IpFamily),
@@ -715,6 +858,7 @@ fn adaptive_floor_class_min(
) -> usize {
if endpoint_count <= 1 {
let min_single = (pool
.floor_runtime
.me_adaptive_floor_min_writers_single_endpoint
.load(std::sync::atomic::Ordering::Relaxed) as usize)
.max(1);
@@ -971,7 +1115,7 @@ async fn maybe_swap_idle_writer_for_cap(
};
let connected = match tokio::time::timeout(
pool.me_one_timeout,
pool.reconnect_runtime.me_one_timeout,
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
)
.await
@@ -1077,7 +1221,7 @@ async fn maybe_refresh_idle_writer_for_dc(
};
let rotate_ok = match tokio::time::timeout(
pool.me_one_timeout,
pool.reconnect_runtime.me_one_timeout,
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
)
.await
@@ -1188,7 +1332,7 @@ async fn recover_single_endpoint_outage(
required: usize,
outage_backoff: &mut HashMap<(i32, IpFamily), u64>,
outage_next_attempt: &mut HashMap<(i32, IpFamily), Instant>,
reconnect_budget: &mut usize,
reconnect_sem: &Arc<Semaphore>,
) {
let now = Instant::now();
if let Some(ts) = outage_next_attempt.get(&key)
@@ -1198,7 +1342,7 @@ async fn recover_single_endpoint_outage(
}
let (min_backoff_ms, max_backoff_ms) = pool.single_endpoint_outage_backoff_bounds_ms();
if *reconnect_budget == 0 {
if reconnect_sem.available_permits() == 0 {
outage_next_attempt.insert(key, now + Duration::from_millis(min_backoff_ms.max(250)));
debug!(
dc = %key.0,
@@ -1209,7 +1353,17 @@ async fn recover_single_endpoint_outage(
);
return;
}
*reconnect_budget = (*reconnect_budget).saturating_sub(1);
let Ok(_reconnect_permit) = reconnect_sem.clone().try_acquire_owned() else {
outage_next_attempt.insert(key, now + Duration::from_millis(min_backoff_ms.max(250)));
debug!(
dc = %key.0,
family = ?key.1,
%endpoint,
required,
"Single-endpoint outage reconnect deferred by semaphore saturation"
);
return;
};
pool.stats
.increment_me_single_endpoint_outage_reconnect_attempt_total();
@@ -1218,7 +1372,7 @@ async fn recover_single_endpoint_outage(
pool.stats
.increment_me_single_endpoint_quarantine_bypass_total();
match tokio::time::timeout(
pool.me_one_timeout,
pool.reconnect_runtime.me_one_timeout,
pool.connect_one_for_dc(endpoint, key.0, rng.as_ref()),
)
.await
@@ -1247,7 +1401,7 @@ async fn recover_single_endpoint_outage(
} else {
let one_endpoint = [endpoint];
match tokio::time::timeout(
pool.me_one_timeout,
pool.reconnect_runtime.me_one_timeout,
pool.connect_endpoints_round_robin(key.0, &one_endpoint, rng.as_ref()),
)
.await
@@ -1372,7 +1526,7 @@ async fn maybe_rotate_single_endpoint_shadow(
};
let rotate_ok = match tokio::time::timeout(
pool.me_one_timeout,
pool.reconnect_runtime.me_one_timeout,
pool.connect_one_for_dc(endpoint, dc, rng.as_ref()),
)
.await
@@ -1687,6 +1841,8 @@ mod tests {
general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms,
)

File diff suppressed because it is too large Load Diff

View File

@@ -72,7 +72,7 @@ impl MePool {
}
if changed {
self.rebuild_endpoint_dc_map().await;
self.writer_available.notify_waiters();
self.notify_writer_epoch();
}
if changed {
SnapshotApplyOutcome::AppliedChanged
@@ -112,7 +112,7 @@ impl MePool {
pub async fn reconnect_all(self: &Arc<Self>) {
let ws = self.writers.read().await.clone();
for w in ws {
for w in ws.iter() {
if let Ok(()) = self
.connect_one_for_dc(w.addr, w.writer_dc, self.rng.as_ref())
.await

View File

@@ -14,7 +14,10 @@ use super::pool::MePool;
impl MePool {
pub async fn init(self: &Arc<Self>, pool_size: usize, rng: &Arc<SecureRandom>) -> Result<()> {
let family_order = self.family_order();
let connect_concurrency = self.me_reconnect_max_concurrent_per_dc.max(1) as usize;
let connect_concurrency = self
.reconnect_runtime
.me_reconnect_max_concurrent_per_dc
.max(1) as usize;
let ks = self.key_selector().await;
info!(
me_servers = self.proxy_map_v4.read().await.len(),
@@ -250,10 +253,12 @@ impl MePool {
return false;
}
if self.me_warmup_stagger_enabled {
let jitter =
rand::rng().random_range(0..=self.me_warmup_step_jitter.as_millis() as u64);
let delay_ms = self.me_warmup_step_delay.as_millis() as u64 + jitter;
if self.reconnect_runtime.me_warmup_stagger_enabled {
let jitter = rand::rng().random_range(
0..=self.reconnect_runtime.me_warmup_step_jitter.as_millis() as u64,
);
let delay_ms =
self.reconnect_runtime.me_warmup_step_delay.as_millis() as u64 + jitter;
tokio::time::sleep(std::time::Duration::from_millis(delay_ms)).await;
}
}

View File

@@ -42,10 +42,10 @@ pub async fn detect_public_ip() -> Option<IpAddr> {
impl MePool {
fn configured_stun_servers(&self) -> Vec<String> {
if !self.nat_stun_servers.is_empty() {
return self.nat_stun_servers.clone();
if !self.nat_runtime.nat_stun_servers.is_empty() {
return self.nat_runtime.nat_stun_servers.clone();
}
if let Some(s) = &self.nat_stun
if let Some(s) = &self.nat_runtime.nat_stun
&& !s.trim().is_empty()
{
return vec![s.clone()];
@@ -64,7 +64,7 @@ impl MePool {
let mut next_idx = 0usize;
let mut live_servers = Vec::new();
let mut best_by_ip: HashMap<IpAddr, (usize, std::net::SocketAddr)> = HashMap::new();
let concurrency = self.nat_probe_concurrency.max(1);
let concurrency = self.nat_runtime.nat_probe_concurrency.max(1);
while next_idx < servers.len() || !join_set.is_empty() {
while next_idx < servers.len() && join_set.len() < concurrency {
@@ -137,9 +137,13 @@ impl MePool {
}
pub(super) fn translate_ip_for_nat(&self, ip: IpAddr) -> IpAddr {
let nat_ip = self
.nat_ip_cfg
.or_else(|| self.nat_ip_detected.try_read().ok().and_then(|g| *g));
let nat_ip = self.nat_runtime.nat_ip_cfg.or_else(|| {
self.nat_runtime
.nat_ip_detected
.try_read()
.ok()
.and_then(|g| *g)
});
let Some(nat_ip) = nat_ip else {
return ip;
@@ -163,7 +167,7 @@ impl MePool {
addr: std::net::SocketAddr,
reflected: Option<std::net::SocketAddr>,
) -> std::net::SocketAddr {
let ip = if let Some(nat_ip) = self.nat_ip_cfg {
let ip = if let Some(nat_ip) = self.nat_runtime.nat_ip_cfg {
match (addr.ip(), nat_ip) {
(IpAddr::V4(_), IpAddr::V4(dst)) => IpAddr::V4(dst),
(IpAddr::V6(_), IpAddr::V6(dst)) => IpAddr::V6(dst),
@@ -185,22 +189,22 @@ impl MePool {
}
pub(super) async fn maybe_detect_nat_ip(&self, local_ip: IpAddr) -> Option<IpAddr> {
if self.nat_ip_cfg.is_some() {
return self.nat_ip_cfg;
if self.nat_runtime.nat_ip_cfg.is_some() {
return self.nat_runtime.nat_ip_cfg;
}
if !(is_bogon(local_ip) || local_ip.is_loopback() || local_ip.is_unspecified()) {
return None;
}
if let Some(ip) = *self.nat_ip_detected.read().await {
if let Some(ip) = *self.nat_runtime.nat_ip_detected.read().await {
return Some(ip);
}
match fetch_public_ipv4_with_retry().await {
Ok(Some(ip)) => {
{
let mut guard = self.nat_ip_detected.write().await;
let mut guard = self.nat_runtime.nat_ip_detected.write().await;
*guard = Some(IpAddr::V4(ip));
}
info!(public_ip = %ip, "Auto-detected public IP for NAT translation");
@@ -231,10 +235,10 @@ impl MePool {
}
// Backoff window
if use_shared_cache
&& let Some(until) = *self.stun_backoff_until.read().await
&& let Some(until) = *self.nat_runtime.stun_backoff_until.read().await
&& Instant::now() < until
{
if let Ok(cache) = self.nat_reflection_cache.try_lock() {
if let Ok(cache) = self.nat_runtime.nat_reflection_cache.try_lock() {
let slot = match family {
IpFamily::V4 => cache.v4,
IpFamily::V6 => cache.v6,
@@ -244,7 +248,8 @@ impl MePool {
return None;
}
if use_shared_cache && let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
if use_shared_cache && let Ok(mut cache) = self.nat_runtime.nat_reflection_cache.try_lock()
{
let slot = match family {
IpFamily::V4 => &mut cache.v4,
IpFamily::V6 => &mut cache.v6,
@@ -258,18 +263,18 @@ impl MePool {
let _singleflight_guard = if use_shared_cache {
Some(match family {
IpFamily::V4 => self.nat_reflection_singleflight_v4.lock().await,
IpFamily::V6 => self.nat_reflection_singleflight_v6.lock().await,
IpFamily::V4 => self.nat_runtime.nat_reflection_singleflight_v4.lock().await,
IpFamily::V6 => self.nat_runtime.nat_reflection_singleflight_v6.lock().await,
})
} else {
None
};
if use_shared_cache
&& let Some(until) = *self.stun_backoff_until.read().await
&& let Some(until) = *self.nat_runtime.stun_backoff_until.read().await
&& Instant::now() < until
{
if let Ok(cache) = self.nat_reflection_cache.try_lock() {
if let Ok(cache) = self.nat_runtime.nat_reflection_cache.try_lock() {
let slot = match family {
IpFamily::V4 => cache.v4,
IpFamily::V6 => cache.v6,
@@ -279,7 +284,8 @@ impl MePool {
return None;
}
if use_shared_cache && let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
if use_shared_cache && let Ok(mut cache) = self.nat_runtime.nat_reflection_cache.try_lock()
{
let slot = match family {
IpFamily::V4 => &mut cache.v4,
IpFamily::V6 => &mut cache.v6,
@@ -292,13 +298,14 @@ impl MePool {
}
let attempt = if use_shared_cache {
self.nat_probe_attempts
self.nat_runtime
.nat_probe_attempts
.fetch_add(1, std::sync::atomic::Ordering::Relaxed)
} else {
0
};
let configured_servers = self.configured_stun_servers();
let live_snapshot = self.nat_stun_live_servers.read().await.clone();
let live_snapshot = self.nat_runtime.nat_stun_live_servers.read().await.clone();
let primary_servers = if live_snapshot.is_empty() {
configured_servers.clone()
} else {
@@ -322,14 +329,15 @@ impl MePool {
let live_server_count = live_servers.len();
if !live_servers.is_empty() {
*self.nat_stun_live_servers.write().await = live_servers;
*self.nat_runtime.nat_stun_live_servers.write().await = live_servers;
} else {
self.nat_stun_live_servers.write().await.clear();
self.nat_runtime.nat_stun_live_servers.write().await.clear();
}
if let Some(reflected_addr) = selected_reflected {
if use_shared_cache {
self.nat_probe_attempts
self.nat_runtime
.nat_probe_attempts
.store(0, std::sync::atomic::Ordering::Relaxed);
}
info!(
@@ -338,7 +346,9 @@ impl MePool {
"STUN-Quorum reached, IP: {}",
reflected_addr.ip()
);
if use_shared_cache && let Ok(mut cache) = self.nat_reflection_cache.try_lock() {
if use_shared_cache
&& let Ok(mut cache) = self.nat_runtime.nat_reflection_cache.try_lock()
{
let slot = match family {
IpFamily::V4 => &mut cache.v4,
IpFamily::V6 => &mut cache.v6,
@@ -350,7 +360,7 @@ impl MePool {
if use_shared_cache {
let backoff = Duration::from_secs(60 * 2u64.pow((attempt as u32).min(6)));
*self.stun_backoff_until.write().await = Some(Instant::now() + backoff);
*self.nat_runtime.stun_backoff_until.write().await = Some(Instant::now() + backoff);
}
None
}

View File

@@ -13,13 +13,40 @@ use super::pool::{MePool, RefillDcKey, RefillEndpointKey, WriterContour};
const ME_FLAP_UPTIME_THRESHOLD_SECS: u64 = 20;
const ME_FLAP_QUARANTINE_SECS: u64 = 25;
const ME_FLAP_MIN_UPTIME_MILLIS: u64 = 500;
const ME_REFILL_TOTAL_ATTEMPT_CAP: u32 = 20;
impl MePool {
pub(super) async fn sweep_endpoint_quarantine(&self) {
let configured = self
.endpoint_dc_map
.read()
.await
.keys()
.copied()
.collect::<HashSet<SocketAddr>>();
let now = Instant::now();
let mut guard = self.endpoint_quarantine.lock().await;
guard.retain(|addr, expiry| *expiry > now && configured.contains(addr));
}
pub(super) async fn maybe_quarantine_flapping_endpoint(
&self,
addr: SocketAddr,
uptime: Duration,
reason: &'static str,
) {
if uptime < Duration::from_millis(ME_FLAP_MIN_UPTIME_MILLIS) {
debug!(
%addr,
reason,
uptime_ms = uptime.as_millis(),
min_uptime_ms = ME_FLAP_MIN_UPTIME_MILLIS,
"Skipping flap quarantine for ultra-short writer lifetime"
);
return;
}
if uptime > Duration::from_secs(ME_FLAP_UPTIME_THRESHOLD_SECS) {
return;
}
@@ -31,6 +58,7 @@ impl MePool {
self.stats.increment_me_endpoint_quarantine_total();
warn!(
%addr,
reason,
uptime_ms = uptime.as_millis(),
quarantine_secs = ME_FLAP_QUARANTINE_SECS,
"ME endpoint temporarily quarantined due to rapid writer flap"
@@ -205,11 +233,16 @@ impl MePool {
}
async fn refill_writer_after_loss(self: &Arc<Self>, addr: SocketAddr, writer_dc: i32) -> bool {
let fast_retries = self.me_reconnect_fast_retry_count.max(1);
let fast_retries = self.reconnect_runtime.me_reconnect_fast_retry_count.max(1);
let mut total_attempts = 0u32;
let same_endpoint_quarantined = self.is_endpoint_quarantined(addr).await;
if !same_endpoint_quarantined {
for attempt in 0..fast_retries {
if total_attempts >= ME_REFILL_TOTAL_ATTEMPT_CAP {
break;
}
total_attempts = total_attempts.saturating_add(1);
self.stats.increment_me_reconnect_attempt();
match self
.connect_one_for_dc(addr, writer_dc, self.rng.as_ref())
@@ -250,6 +283,10 @@ impl MePool {
}
for attempt in 0..fast_retries {
if total_attempts >= ME_REFILL_TOTAL_ATTEMPT_CAP {
break;
}
total_attempts = total_attempts.saturating_add(1);
self.stats.increment_me_reconnect_attempt();
if self
.connect_endpoints_round_robin(writer_dc, &dc_endpoints, self.rng.as_ref())

View File

@@ -37,16 +37,23 @@ impl MePool {
}
fn clear_pending_hardswap_state(&self) {
self.pending_hardswap_generation.store(0, Ordering::Relaxed);
self.pending_hardswap_started_at_epoch_secs
self.reinit
.pending_hardswap_generation
.store(0, Ordering::Relaxed);
self.pending_hardswap_map_hash.store(0, Ordering::Relaxed);
self.warm_generation.store(0, Ordering::Relaxed);
self.reinit
.pending_hardswap_started_at_epoch_secs
.store(0, Ordering::Relaxed);
self.reinit
.pending_hardswap_map_hash
.store(0, Ordering::Relaxed);
self.reinit.warm_generation.store(0, Ordering::Relaxed);
}
async fn promote_warm_generation_to_active(&self, generation: u64) {
self.active_generation.store(generation, Ordering::Relaxed);
self.warm_generation.store(0, Ordering::Relaxed);
self.reinit
.active_generation
.store(generation, Ordering::Relaxed);
self.reinit.warm_generation.store(0, Ordering::Relaxed);
let ws = self.writers.read().await;
for writer in ws.iter() {
@@ -184,8 +191,14 @@ impl MePool {
}
fn hardswap_warmup_connect_delay_ms(&self) -> u64 {
let min_ms = self.me_hardswap_warmup_delay_min_ms.load(Ordering::Relaxed);
let max_ms = self.me_hardswap_warmup_delay_max_ms.load(Ordering::Relaxed);
let min_ms = self
.reinit
.me_hardswap_warmup_delay_min_ms
.load(Ordering::Relaxed);
let max_ms = self
.reinit
.me_hardswap_warmup_delay_max_ms
.load(Ordering::Relaxed);
let (min_ms, max_ms) = if min_ms <= max_ms {
(min_ms, max_ms)
} else {
@@ -199,9 +212,11 @@ impl MePool {
fn hardswap_warmup_backoff_ms(&self, pass_idx: usize) -> u64 {
let base_ms = self
.reinit
.me_hardswap_warmup_pass_backoff_base_ms
.load(Ordering::Relaxed);
let cap_ms = (self.me_reconnect_backoff_cap.as_millis() as u64).max(base_ms);
let cap_ms =
(self.reconnect_runtime.me_reconnect_backoff_cap.as_millis() as u64).max(base_ms);
let shift = (pass_idx as u32).min(20);
let scaled = base_ms.saturating_mul(1u64 << shift);
let core = scaled.min(cap_ms);
@@ -244,6 +259,7 @@ impl MePool {
desired_by_dc: &HashMap<i32, HashSet<SocketAddr>>,
) {
let extra_passes = self
.reinit
.me_hardswap_warmup_extra_passes
.load(Ordering::Relaxed)
.min(10) as usize;
@@ -369,13 +385,20 @@ impl MePool {
let desired_map_hash = Self::desired_map_hash(&desired_by_dc);
let previous_generation = self.current_generation();
let hardswap = self.hardswap.load(Ordering::Relaxed);
let hardswap = self.reinit.hardswap.load(Ordering::Relaxed);
let generation = if hardswap {
let pending_generation = self.pending_hardswap_generation.load(Ordering::Relaxed);
let pending_generation = self
.reinit
.pending_hardswap_generation
.load(Ordering::Relaxed);
let pending_started_at = self
.reinit
.pending_hardswap_started_at_epoch_secs
.load(Ordering::Relaxed);
let pending_map_hash = self.pending_hardswap_map_hash.load(Ordering::Relaxed);
let pending_map_hash = self
.reinit
.pending_hardswap_map_hash
.load(Ordering::Relaxed);
let pending_age_secs = now_epoch_secs.saturating_sub(pending_started_at);
let pending_ttl_expired =
pending_started_at > 0 && pending_age_secs > ME_HARDSWAP_PENDING_TTL_SECS;
@@ -405,24 +428,30 @@ impl MePool {
"ME hardswap pending generation expired by TTL; starting fresh generation"
);
}
let next_generation = self.generation.fetch_add(1, Ordering::Relaxed) + 1;
self.pending_hardswap_generation
let next_generation = self.reinit.generation.fetch_add(1, Ordering::Relaxed) + 1;
self.reinit
.pending_hardswap_generation
.store(next_generation, Ordering::Relaxed);
self.pending_hardswap_started_at_epoch_secs
self.reinit
.pending_hardswap_started_at_epoch_secs
.store(now_epoch_secs, Ordering::Relaxed);
self.pending_hardswap_map_hash
self.reinit
.pending_hardswap_map_hash
.store(desired_map_hash, Ordering::Relaxed);
self.warm_generation
self.reinit
.warm_generation
.store(next_generation, Ordering::Relaxed);
next_generation
}
} else {
self.clear_pending_hardswap_state();
self.generation.fetch_add(1, Ordering::Relaxed) + 1
self.reinit.generation.fetch_add(1, Ordering::Relaxed) + 1
};
if hardswap {
self.warm_generation.store(generation, Ordering::Relaxed);
self.reinit
.warm_generation
.store(generation, Ordering::Relaxed);
self.warmup_generation_for_all_dcs(rng, generation, &desired_by_dc)
.await;
} else {
@@ -436,7 +465,8 @@ impl MePool {
.map(|w| (w.writer_dc, w.addr))
.collect();
let min_ratio = Self::permille_to_ratio(
self.me_pool_min_fresh_ratio_permille
self.drain_runtime
.me_pool_min_fresh_ratio_permille
.load(Ordering::Relaxed),
);
let (coverage_ratio, missing_dc) =

View File

@@ -94,9 +94,9 @@ impl MePool {
pub(crate) async fn api_nat_stun_snapshot(&self) -> MeApiNatStunSnapshot {
let now = Instant::now();
let mut configured_servers = if !self.nat_stun_servers.is_empty() {
self.nat_stun_servers.clone()
} else if let Some(stun) = &self.nat_stun {
let mut configured_servers = if !self.nat_runtime.nat_stun_servers.is_empty() {
self.nat_runtime.nat_stun_servers.clone()
} else if let Some(stun) = &self.nat_runtime.nat_stun {
if stun.trim().is_empty() {
Vec::new()
} else {
@@ -108,11 +108,11 @@ impl MePool {
configured_servers.sort();
configured_servers.dedup();
let mut live_servers = self.nat_stun_live_servers.read().await.clone();
let mut live_servers = self.nat_runtime.nat_stun_live_servers.read().await.clone();
live_servers.sort();
live_servers.dedup();
let reflection = self.nat_reflection_cache.lock().await;
let reflection = self.nat_runtime.nat_reflection_cache.lock().await;
let reflection_v4 = reflection.v4.map(|(ts, addr)| MeApiNatReflectionSnapshot {
addr,
age_secs: now.saturating_duration_since(ts).as_secs(),
@@ -123,17 +123,19 @@ impl MePool {
});
drop(reflection);
let backoff_until = *self.stun_backoff_until.read().await;
let backoff_until = *self.nat_runtime.stun_backoff_until.read().await;
let stun_backoff_remaining_ms = backoff_until.and_then(|until| {
(until > now).then_some(until.duration_since(now).as_millis() as u64)
});
MeApiNatStunSnapshot {
nat_probe_enabled: self.nat_probe,
nat_probe_enabled: self.nat_runtime.nat_probe,
nat_probe_disabled_runtime: self
.nat_runtime
.nat_probe_disabled
.load(std::sync::atomic::Ordering::Relaxed),
nat_probe_attempts: self
.nat_runtime
.nat_probe_attempts
.load(std::sync::atomic::Ordering::Relaxed),
configured_servers,

View File

@@ -160,7 +160,7 @@ impl MePool {
let writers = self.writers.read().await.clone();
let mut live_writers_by_dc = HashMap::<i16, usize>::new();
for writer in writers {
for writer in writers.iter() {
if writer.draining.load(Ordering::Relaxed) {
continue;
}
@@ -197,7 +197,7 @@ impl MePool {
let writers = self.writers.read().await.clone();
let mut live_writers_by_dc = HashMap::<i16, usize>::new();
for writer in writers {
for writer in writers.iter() {
if writer.draining.load(Ordering::Relaxed) {
continue;
}
@@ -224,7 +224,10 @@ impl MePool {
pub(crate) async fn api_status_snapshot(&self) -> MeApiStatusSnapshot {
let now_epoch_secs = Self::now_epoch_secs();
let active_generation = self.current_generation();
let drain_ttl_secs = self.me_pool_drain_ttl_secs.load(Ordering::Relaxed);
let drain_ttl_secs = self
.drain_runtime
.me_pool_drain_ttl_secs
.load(Ordering::Relaxed);
let mut endpoints_by_dc = BTreeMap::<i16, BTreeSet<SocketAddr>>::new();
if self.decision.ipv4_me {
@@ -255,7 +258,7 @@ impl MePool {
let mut dc_rtt_agg = HashMap::<i16, (f64, u64)>::new();
let mut writer_rows = Vec::<MeApiWriterStatusSnapshot>::with_capacity(writers.len());
for writer in writers {
for writer in writers.iter() {
let endpoint = writer.addr;
let dc = i16::try_from(writer.writer_dc).ok();
let draining = writer.draining.load(Ordering::Relaxed);
@@ -336,6 +339,7 @@ impl MePool {
let mut fresh_alive_writers = 0usize;
let floor_mode = self.floor_mode();
let adaptive_cpu_cores = (self
.floor_runtime
.me_adaptive_floor_cpu_cores_effective
.load(Ordering::Relaxed) as usize)
.max(1);
@@ -350,22 +354,26 @@ impl MePool {
self.required_writers_for_dc_with_floor_mode(endpoint_count, false);
let floor_min = if endpoint_count <= 1 {
(self
.floor_runtime
.me_adaptive_floor_min_writers_single_endpoint
.load(Ordering::Relaxed) as usize)
.max(1)
.min(base_required.max(1))
} else {
(self
.floor_runtime
.me_adaptive_floor_min_writers_multi_endpoint
.load(Ordering::Relaxed) as usize)
.max(1)
.min(base_required.max(1))
};
let extra_per_core = if endpoint_count <= 1 {
self.me_adaptive_floor_max_extra_writers_single_per_core
self.floor_runtime
.me_adaptive_floor_max_extra_writers_single_per_core
.load(Ordering::Relaxed) as usize
} else {
self.me_adaptive_floor_max_extra_writers_multi_per_core
self.floor_runtime
.me_adaptive_floor_max_extra_writers_multi_per_core
.load(Ordering::Relaxed) as usize
};
let floor_max =
@@ -436,6 +444,7 @@ impl MePool {
let now = Instant::now();
let now_epoch_secs = Self::now_epoch_secs();
let pending_started_at = self
.reinit
.pending_hardswap_started_at_epoch_secs
.load(Ordering::Relaxed);
let pending_hardswap_age_secs =
@@ -477,119 +486,175 @@ impl MePool {
}
MeApiRuntimeSnapshot {
active_generation: self.active_generation.load(Ordering::Relaxed),
warm_generation: self.warm_generation.load(Ordering::Relaxed),
pending_hardswap_generation: self.pending_hardswap_generation.load(Ordering::Relaxed),
active_generation: self.reinit.active_generation.load(Ordering::Relaxed),
warm_generation: self.reinit.warm_generation.load(Ordering::Relaxed),
pending_hardswap_generation: self
.reinit
.pending_hardswap_generation
.load(Ordering::Relaxed),
pending_hardswap_age_secs,
hardswap_enabled: self.hardswap.load(Ordering::Relaxed),
hardswap_enabled: self.reinit.hardswap.load(Ordering::Relaxed),
floor_mode: floor_mode_label(self.floor_mode()),
adaptive_floor_idle_secs: self.me_adaptive_floor_idle_secs.load(Ordering::Relaxed),
adaptive_floor_idle_secs: self
.floor_runtime
.me_adaptive_floor_idle_secs
.load(Ordering::Relaxed),
adaptive_floor_min_writers_single_endpoint: self
.floor_runtime
.me_adaptive_floor_min_writers_single_endpoint
.load(Ordering::Relaxed),
adaptive_floor_min_writers_multi_endpoint: self
.floor_runtime
.me_adaptive_floor_min_writers_multi_endpoint
.load(Ordering::Relaxed),
adaptive_floor_recover_grace_secs: self
.floor_runtime
.me_adaptive_floor_recover_grace_secs
.load(Ordering::Relaxed),
adaptive_floor_writers_per_core_total: self
.floor_runtime
.me_adaptive_floor_writers_per_core_total
.load(Ordering::Relaxed) as u16,
adaptive_floor_cpu_cores_override: self
.floor_runtime
.me_adaptive_floor_cpu_cores_override
.load(Ordering::Relaxed) as u16,
adaptive_floor_max_extra_writers_single_per_core: self
.floor_runtime
.me_adaptive_floor_max_extra_writers_single_per_core
.load(Ordering::Relaxed)
as u16,
adaptive_floor_max_extra_writers_multi_per_core: self
.floor_runtime
.me_adaptive_floor_max_extra_writers_multi_per_core
.load(Ordering::Relaxed)
as u16,
adaptive_floor_max_active_writers_per_core: self
.floor_runtime
.me_adaptive_floor_max_active_writers_per_core
.load(Ordering::Relaxed)
as u16,
adaptive_floor_max_warm_writers_per_core: self
.floor_runtime
.me_adaptive_floor_max_warm_writers_per_core
.load(Ordering::Relaxed)
as u16,
adaptive_floor_max_active_writers_global: self
.floor_runtime
.me_adaptive_floor_max_active_writers_global
.load(Ordering::Relaxed),
adaptive_floor_max_warm_writers_global: self
.floor_runtime
.me_adaptive_floor_max_warm_writers_global
.load(Ordering::Relaxed),
adaptive_floor_cpu_cores_detected: self
.floor_runtime
.me_adaptive_floor_cpu_cores_detected
.load(Ordering::Relaxed),
adaptive_floor_cpu_cores_effective: self
.floor_runtime
.me_adaptive_floor_cpu_cores_effective
.load(Ordering::Relaxed),
adaptive_floor_global_cap_raw: self
.floor_runtime
.me_adaptive_floor_global_cap_raw
.load(Ordering::Relaxed),
adaptive_floor_global_cap_effective: self
.floor_runtime
.me_adaptive_floor_global_cap_effective
.load(Ordering::Relaxed),
adaptive_floor_target_writers_total: self
.floor_runtime
.me_adaptive_floor_target_writers_total
.load(Ordering::Relaxed),
adaptive_floor_active_cap_configured: self
.floor_runtime
.me_adaptive_floor_active_cap_configured
.load(Ordering::Relaxed),
adaptive_floor_active_cap_effective: self
.floor_runtime
.me_adaptive_floor_active_cap_effective
.load(Ordering::Relaxed),
adaptive_floor_warm_cap_configured: self
.floor_runtime
.me_adaptive_floor_warm_cap_configured
.load(Ordering::Relaxed),
adaptive_floor_warm_cap_effective: self
.floor_runtime
.me_adaptive_floor_warm_cap_effective
.load(Ordering::Relaxed),
adaptive_floor_active_writers_current: self
.floor_runtime
.me_adaptive_floor_active_writers_current
.load(Ordering::Relaxed),
adaptive_floor_warm_writers_current: self
.floor_runtime
.me_adaptive_floor_warm_writers_current
.load(Ordering::Relaxed),
me_keepalive_enabled: self.me_keepalive_enabled,
me_keepalive_interval_secs: self.me_keepalive_interval.as_secs(),
me_keepalive_jitter_secs: self.me_keepalive_jitter.as_secs(),
me_keepalive_payload_random: self.me_keepalive_payload_random,
rpc_proxy_req_every_secs: self.rpc_proxy_req_every_secs.load(Ordering::Relaxed),
me_reconnect_max_concurrent_per_dc: self.me_reconnect_max_concurrent_per_dc,
me_reconnect_backoff_base_ms: self.me_reconnect_backoff_base.as_millis() as u64,
me_reconnect_backoff_cap_ms: self.me_reconnect_backoff_cap.as_millis() as u64,
me_reconnect_fast_retry_count: self.me_reconnect_fast_retry_count,
me_pool_drain_ttl_secs: self.me_pool_drain_ttl_secs.load(Ordering::Relaxed),
me_pool_force_close_secs: self.me_pool_force_close_secs.load(Ordering::Relaxed),
me_keepalive_enabled: self.writer_lifecycle.me_keepalive_enabled,
me_keepalive_interval_secs: self.writer_lifecycle.me_keepalive_interval.as_secs(),
me_keepalive_jitter_secs: self.writer_lifecycle.me_keepalive_jitter.as_secs(),
me_keepalive_payload_random: self.writer_lifecycle.me_keepalive_payload_random,
rpc_proxy_req_every_secs: self
.writer_lifecycle
.rpc_proxy_req_every_secs
.load(Ordering::Relaxed),
me_reconnect_max_concurrent_per_dc: self
.reconnect_runtime
.me_reconnect_max_concurrent_per_dc,
me_reconnect_backoff_base_ms: self
.reconnect_runtime
.me_reconnect_backoff_base
.as_millis() as u64,
me_reconnect_backoff_cap_ms: self.reconnect_runtime.me_reconnect_backoff_cap.as_millis()
as u64,
me_reconnect_fast_retry_count: self.reconnect_runtime.me_reconnect_fast_retry_count,
me_pool_drain_ttl_secs: self
.drain_runtime
.me_pool_drain_ttl_secs
.load(Ordering::Relaxed),
me_pool_force_close_secs: self
.drain_runtime
.me_pool_force_close_secs
.load(Ordering::Relaxed),
me_pool_min_fresh_ratio: Self::permille_to_ratio(
self.me_pool_min_fresh_ratio_permille
self.drain_runtime
.me_pool_min_fresh_ratio_permille
.load(Ordering::Relaxed),
),
me_bind_stale_mode: bind_stale_mode_label(self.bind_stale_mode()),
me_bind_stale_ttl_secs: self.me_bind_stale_ttl_secs.load(Ordering::Relaxed),
me_bind_stale_ttl_secs: self
.binding_policy
.me_bind_stale_ttl_secs
.load(Ordering::Relaxed),
me_single_endpoint_shadow_writers: self
.single_endpoint_runtime
.me_single_endpoint_shadow_writers
.load(Ordering::Relaxed),
me_single_endpoint_outage_mode_enabled: self
.single_endpoint_runtime
.me_single_endpoint_outage_mode_enabled
.load(Ordering::Relaxed),
me_single_endpoint_outage_disable_quarantine: self
.single_endpoint_runtime
.me_single_endpoint_outage_disable_quarantine
.load(Ordering::Relaxed),
me_single_endpoint_outage_backoff_min_ms: self
.single_endpoint_runtime
.me_single_endpoint_outage_backoff_min_ms
.load(Ordering::Relaxed),
me_single_endpoint_outage_backoff_max_ms: self
.single_endpoint_runtime
.me_single_endpoint_outage_backoff_max_ms
.load(Ordering::Relaxed),
me_single_endpoint_shadow_rotate_every_secs: self
.single_endpoint_runtime
.me_single_endpoint_shadow_rotate_every_secs
.load(Ordering::Relaxed),
me_deterministic_writer_sort: self.me_deterministic_writer_sort.load(Ordering::Relaxed),
me_deterministic_writer_sort: self
.writer_selection_policy
.me_deterministic_writer_sort
.load(Ordering::Relaxed),
me_writer_pick_mode: writer_pick_mode_label(self.writer_pick_mode()),
me_writer_pick_sample_size: self.writer_pick_sample_size() as u8,
me_socks_kdf_policy: socks_kdf_policy_label(self.socks_kdf_policy()),

View File

@@ -1,3 +1,4 @@
use std::collections::HashMap;
use std::io::ErrorKind;
use std::net::SocketAddr;
use std::sync::Arc;
@@ -25,6 +26,7 @@ const ME_ACTIVE_PING_SECS: u64 = 25;
const ME_ACTIVE_PING_JITTER_SECS: i64 = 5;
const ME_IDLE_KEEPALIVE_MAX_SECS: u64 = 5;
const ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS: u64 = 700;
const ME_PING_TRACKER_CLEANUP_EVERY: u32 = 32;
#[derive(Clone, Copy)]
enum WriterTeardownMode {
@@ -36,6 +38,240 @@ fn is_me_peer_closed_error(error: &ProxyError) -> bool {
matches!(error, ProxyError::Io(ioe) if ioe.kind() == ErrorKind::UnexpectedEof)
}
enum WriterLifecycleExit {
Reader(Result<()>),
Writer(Result<()>),
Ping,
Signal,
Cancelled,
}
async fn writer_command_loop(
mut rx: mpsc::Receiver<WriterCommand>,
mut rpc_writer: RpcWriter,
cancel: CancellationToken,
) -> Result<()> {
loop {
tokio::select! {
cmd = rx.recv() => {
match cmd {
Some(WriterCommand::Data(payload)) => {
rpc_writer.send(&payload).await?;
}
Some(WriterCommand::DataAndFlush(payload)) => {
rpc_writer.send_and_flush(&payload).await?;
}
Some(WriterCommand::Close) | None => return Ok(()),
}
}
_ = cancel.cancelled() => return Ok(()),
}
}
}
#[allow(clippy::too_many_arguments)]
async fn ping_loop(
pool_ping: std::sync::Weak<MePool>,
writer_id: u64,
tx_ping: mpsc::Sender<WriterCommand>,
ping_tracker_ping: Arc<tokio::sync::Mutex<HashMap<i64, Instant>>>,
stats_ping: Arc<crate::stats::Stats>,
keepalive_enabled: bool,
keepalive_interval: Duration,
keepalive_jitter: Duration,
cancel_ping_token: CancellationToken,
) {
let mut ping_id: i64 = rand::random::<i64>();
let mut cleanup_tick: u32 = 0;
let idle_interval_cap = Duration::from_secs(ME_IDLE_KEEPALIVE_MAX_SECS);
// Per-writer jittered start to avoid phase sync.
let startup_jitter = if keepalive_enabled {
let mut interval = keepalive_interval;
let Some(pool) = pool_ping.upgrade() else {
return;
};
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
} else {
let jitter =
rand::rng().random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let wait = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(wait)
};
tokio::select! {
_ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(startup_jitter) => {}
}
loop {
let wait = if keepalive_enabled {
let mut interval = keepalive_interval;
let Some(pool) = pool_ping.upgrade() else {
return;
};
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
interval
+ Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
} else {
let jitter =
rand::rng().random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let secs = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(secs)
};
tokio::select! {
_ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(wait) => {}
}
let sent_id = ping_id;
let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_PING_U32.to_le_bytes());
p.extend_from_slice(&sent_id.to_le_bytes());
{
let mut tracker = ping_tracker_ping.lock().await;
cleanup_tick = cleanup_tick.wrapping_add(1);
if cleanup_tick.is_multiple_of(ME_PING_TRACKER_CLEANUP_EVERY) {
let before = tracker.len();
tracker.retain(|_, ts| ts.elapsed() < Duration::from_secs(120));
let expired = before.saturating_sub(tracker.len());
if expired > 0 {
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
}
}
tracker.insert(sent_id, std::time::Instant::now());
}
ping_id = ping_id.wrapping_add(1);
stats_ping.increment_me_keepalive_sent();
if tx_ping
.send(WriterCommand::DataAndFlush(Bytes::from(p)))
.await
.is_err()
{
stats_ping.increment_me_keepalive_failed();
debug!("ME ping failed, removing dead writer");
return;
}
}
}
#[allow(clippy::too_many_arguments)]
async fn rpc_proxy_req_signal_loop(
pool_signal: std::sync::Weak<MePool>,
writer_id: u64,
tx_signal: mpsc::Sender<WriterCommand>,
stats_signal: Arc<crate::stats::Stats>,
cancel_signal: CancellationToken,
keepalive_jitter_signal: Duration,
rpc_proxy_req_every_secs: u64,
) {
if rpc_proxy_req_every_secs == 0 {
// Disabled service signal loop must stay parked until writer cancellation.
// Returning immediately here would complete `select!` and tear down writer lifecycle.
cancel_signal.cancelled().await;
return;
}
let interval = Duration::from_secs(rpc_proxy_req_every_secs);
let startup_jitter_ms = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
rand::rng().random_range(0..=effective_jitter_ms as u64)
};
tokio::select! {
_ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(Duration::from_millis(startup_jitter_ms)) => {}
}
loop {
let wait = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
interval
+ Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
};
tokio::select! {
_ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(wait) => {}
}
let Some(pool) = pool_signal.upgrade() else {
return;
};
let Some(meta) = pool.registry.get_last_writer_meta(writer_id).await else {
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
continue;
};
let (conn_id, mut service_rx) = pool.registry.register().await;
// Service RPC_PROXY_REQ signal path is intentionally route-only:
// do not bind synthetic conn_id into regular writer/client accounting.
let payload = build_proxy_req_payload(
conn_id,
meta.client_addr,
meta.our_addr,
&[],
pool.proxy_tag.as_deref(),
meta.proto_flags,
);
if tx_signal
.send(WriterCommand::DataAndFlush(payload))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
return;
}
stats_signal.increment_me_rpc_proxy_req_signal_sent_total();
if matches!(
tokio::time::timeout(
Duration::from_millis(ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS),
service_rx.recv(),
)
.await,
Ok(Some(_))
) {
stats_signal.increment_me_rpc_proxy_req_signal_response_total();
}
let mut close_payload = Vec::with_capacity(12);
close_payload.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
close_payload.extend_from_slice(&conn_id.to_le_bytes());
if tx_signal
.send(WriterCommand::DataAndFlush(Bytes::from(close_payload)))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
return;
}
stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total();
let _ = pool.registry.unregister(conn_id).await;
}
}
impl MePool {
pub(crate) async fn prune_closed_writers(self: &Arc<Self>) {
let closed_writer_ids: Vec<u64> = {
@@ -136,46 +372,15 @@ impl MePool {
let draining_started_at_epoch_secs = Arc::new(AtomicU64::new(0));
let drain_deadline_epoch_secs = Arc::new(AtomicU64::new(0));
let allow_drain_fallback = Arc::new(AtomicBool::new(false));
let (tx, mut rx) = mpsc::channel::<WriterCommand>(self.writer_cmd_channel_capacity);
let mut rpc_writer = RpcWriter {
let (tx, rx) =
mpsc::channel::<WriterCommand>(self.writer_lifecycle.writer_cmd_channel_capacity);
let rpc_writer = RpcWriter {
writer: hs.wr,
key: hs.write_key,
iv: hs.write_iv,
seq_no: 0,
crc_mode: hs.crc_mode,
};
let cancel_wr = cancel.clone();
let cleanup_done = Arc::new(AtomicBool::new(false));
let cleanup_for_writer = cleanup_done.clone();
let pool_writer_task = Arc::downgrade(self);
tokio::spawn(async move {
loop {
tokio::select! {
cmd = rx.recv() => {
match cmd {
Some(WriterCommand::Data(payload)) => {
if rpc_writer.send(&payload).await.is_err() { break; }
}
Some(WriterCommand::DataAndFlush(payload)) => {
if rpc_writer.send_and_flush(&payload).await.is_err() { break; }
}
Some(WriterCommand::Close) | None => break,
}
}
_ = cancel_wr.cancelled() => break,
}
}
if cleanup_for_writer
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
if let Some(pool) = pool_writer_task.upgrade() {
pool.remove_writer_and_close_clients(writer_id).await;
} else {
cancel_wr.cancel();
}
}
});
let writer = MeWriter {
id: writer_id,
addr,
@@ -193,329 +398,135 @@ impl MePool {
drain_deadline_epoch_secs: drain_deadline_epoch_secs.clone(),
allow_drain_fallback: allow_drain_fallback.clone(),
};
self.writers.write().await.push(writer.clone());
self.writers
.update(|writers| writers.push(writer.clone()))
.await;
self.registry.register_writer(writer_id, tx.clone()).await;
self.registry.mark_writer_idle(writer_id).await;
self.conn_count.fetch_add(1, Ordering::Relaxed);
self.writer_available.notify_one();
self.notify_writer_epoch();
let reg = self.registry.clone();
let writers_arc = self.writers_arc();
let ping_tracker = self.ping_tracker.clone();
let ping_tracker = Arc::new(tokio::sync::Mutex::new(HashMap::<i64, Instant>::new()));
let ping_tracker_reader = ping_tracker.clone();
let ping_tracker_ping = ping_tracker.clone();
let rtt_stats = self.rtt_stats.clone();
let stats_reader = self.stats.clone();
let stats_reader_close = self.stats.clone();
let stats_ping = self.stats.clone();
let pool = Arc::downgrade(self);
let cancel_ping = cancel.clone();
let tx_ping = tx.clone();
let ping_tracker_ping = ping_tracker.clone();
let cleanup_for_reader = cleanup_done.clone();
let cleanup_for_ping = cleanup_done.clone();
let keepalive_enabled = self.me_keepalive_enabled;
let keepalive_interval = self.me_keepalive_interval;
let keepalive_jitter = self.me_keepalive_jitter;
let rpc_proxy_req_every_secs = self.rpc_proxy_req_every_secs.load(Ordering::Relaxed);
let tx_signal = tx.clone();
let stats_signal = self.stats.clone();
let cancel_signal = cancel.clone();
let cleanup_for_signal = cleanup_done.clone();
let pool_signal = Arc::downgrade(self);
let keepalive_jitter_signal = self.me_keepalive_jitter;
let cancel_reader_token = cancel.clone();
let cancel_ping_token = cancel_ping.clone();
let reader_route_data_wait_ms = self.me_reader_route_data_wait_ms.clone();
tokio::spawn(async move {
let res = reader_loop(
hs.rd,
hs.read_key,
hs.read_iv,
hs.crc_mode,
reg.clone(),
BytesMut::new(),
BytesMut::new(),
tx.clone(),
ping_tracker_reader,
rtt_stats.clone(),
stats_reader,
writer_id,
degraded.clone(),
rtt_ema_ms_x10.clone(),
reader_route_data_wait_ms,
cancel_reader_token.clone(),
)
.await;
let idle_close_by_peer = if let Err(e) = res.as_ref() {
is_me_peer_closed_error(e) && reg.is_writer_empty(writer_id).await
} else {
false
};
if idle_close_by_peer {
stats_reader_close.increment_me_idle_close_by_peer_total();
info!(writer_id, "ME socket closed by peer on idle writer");
}
if cleanup_for_reader
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
if let Some(pool) = pool.upgrade() {
pool.remove_writer_and_close_clients(writer_id).await;
} else {
// Fallback for shutdown races: make writer task exit quickly so stale
// channels are observable by periodic prune.
cancel_reader_token.cancel();
}
}
if let Err(e) = res
&& !idle_close_by_peer
{
warn!(error = %e, "ME reader ended");
}
let remaining = writers_arc.read().await.len();
debug!(writer_id, remaining, "ME reader task finished");
});
let pool_lifecycle = Arc::downgrade(self);
let pool_ping = Arc::downgrade(self);
let pool_signal = Arc::downgrade(self);
let tx_reader = tx.clone();
let tx_ping = tx.clone();
let tx_signal = tx.clone();
let keepalive_enabled = self.writer_lifecycle.me_keepalive_enabled;
let keepalive_interval = self.writer_lifecycle.me_keepalive_interval;
let keepalive_jitter = self.writer_lifecycle.me_keepalive_jitter;
let keepalive_jitter_signal = self.writer_lifecycle.me_keepalive_jitter;
let rpc_proxy_req_every_secs = self
.writer_lifecycle
.rpc_proxy_req_every_secs
.load(Ordering::Relaxed);
let cancel_reader = cancel.clone();
let cancel_writer = cancel.clone();
let cancel_ping = cancel.clone();
let cancel_signal = cancel.clone();
let cancel_select = cancel.clone();
let cancel_cleanup = cancel.clone();
let reader_route_data_wait_ms = self.transport_policy.me_reader_route_data_wait_ms.clone();
tokio::spawn(async move {
let mut ping_id: i64 = rand::random::<i64>();
let idle_interval_cap = Duration::from_secs(ME_IDLE_KEEPALIVE_MAX_SECS);
// Per-writer jittered start to avoid phase sync.
let startup_jitter = if keepalive_enabled {
let mut interval = keepalive_interval;
if let Some(pool) = pool_ping.upgrade() {
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
} else {
return;
// Reader MUST be the first branch in biased select! to avoid read starvation.
let exit = tokio::select! {
biased;
reader_res = reader_loop(
hs.rd,
hs.read_key,
hs.read_iv,
hs.crc_mode,
reg.clone(),
BytesMut::new(),
BytesMut::new(),
tx_reader,
ping_tracker_reader,
rtt_stats,
stats_reader,
writer_id,
degraded,
rtt_ema_ms_x10,
reader_route_data_wait_ms,
cancel_reader,
) => WriterLifecycleExit::Reader(reader_res),
writer_res = writer_command_loop(rx, rpc_writer, cancel_writer) => {
WriterLifecycleExit::Writer(writer_res)
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
Duration::from_millis(rand::rng().random_range(0..=effective_jitter_ms as u64))
} else {
let jitter = rand::rng()
.random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let wait = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(wait)
_ = ping_loop(
pool_ping,
writer_id,
tx_ping,
ping_tracker_ping,
stats_ping,
keepalive_enabled,
keepalive_interval,
keepalive_jitter,
cancel_ping,
) => WriterLifecycleExit::Ping,
_ = rpc_proxy_req_signal_loop(
pool_signal,
writer_id,
tx_signal,
stats_signal,
cancel_signal,
keepalive_jitter_signal,
rpc_proxy_req_every_secs,
) => WriterLifecycleExit::Signal,
_ = cancel_select.cancelled() => WriterLifecycleExit::Cancelled,
};
tokio::select! {
_ = cancel_ping_token.cancelled() => return,
_ = tokio::time::sleep(startup_jitter) => {}
}
loop {
let wait = if keepalive_enabled {
let mut interval = keepalive_interval;
if let Some(pool) = pool_ping.upgrade() {
if pool.registry.is_writer_empty(writer_id).await {
interval = interval.min(idle_interval_cap);
}
match exit {
WriterLifecycleExit::Reader(res) => {
let idle_close_by_peer = if let Err(e) = res.as_ref() {
is_me_peer_closed_error(e) && reg.is_writer_empty(writer_id).await
} else {
break;
false
};
if idle_close_by_peer {
stats_reader_close.increment_me_idle_close_by_peer_total();
info!(writer_id, "ME socket closed by peer on idle writer");
}
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms =
keepalive_jitter.as_millis().min(jitter_cap_ms).max(1);
interval
+ Duration::from_millis(
rand::rng().random_range(0..=effective_jitter_ms as u64),
)
} else {
let jitter = rand::rng()
.random_range(-ME_ACTIVE_PING_JITTER_SECS..=ME_ACTIVE_PING_JITTER_SECS);
let secs = (ME_ACTIVE_PING_SECS as i64 + jitter).max(5) as u64;
Duration::from_secs(secs)
};
tokio::select! {
_ = cancel_ping_token.cancelled() => {
break;
}
_ = tokio::time::sleep(wait) => {}
}
let sent_id = ping_id;
let mut p = Vec::with_capacity(12);
p.extend_from_slice(&RPC_PING_U32.to_le_bytes());
p.extend_from_slice(&sent_id.to_le_bytes());
{
let mut tracker = ping_tracker_ping.lock().await;
let now_epoch_ms = std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64;
let mut run_cleanup = false;
if let Some(pool) = pool_ping.upgrade() {
let last_cleanup_ms = pool
.ping_tracker_last_cleanup_epoch_ms
.load(Ordering::Relaxed);
if now_epoch_ms.saturating_sub(last_cleanup_ms) >= 30_000
&& pool
.ping_tracker_last_cleanup_epoch_ms
.compare_exchange(
last_cleanup_ms,
now_epoch_ms,
Ordering::AcqRel,
Ordering::Relaxed,
)
.is_ok()
{
run_cleanup = true;
}
}
if run_cleanup {
let before = tracker.len();
tracker.retain(|_, (ts, _)| ts.elapsed() < Duration::from_secs(120));
let expired = before.saturating_sub(tracker.len());
if expired > 0 {
stats_ping.increment_me_keepalive_timeout_by(expired as u64);
}
}
tracker.insert(sent_id, (std::time::Instant::now(), writer_id));
}
ping_id = ping_id.wrapping_add(1);
stats_ping.increment_me_keepalive_sent();
if tx_ping
.send(WriterCommand::DataAndFlush(Bytes::from(p)))
.await
.is_err()
{
stats_ping.increment_me_keepalive_failed();
debug!("ME ping failed, removing dead writer");
cancel_ping.cancel();
if cleanup_for_ping
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
&& let Some(pool) = pool_ping.upgrade()
if let Err(e) = res
&& !idle_close_by_peer
{
pool.remove_writer_and_close_clients(writer_id).await;
warn!(error = %e, "ME reader ended");
}
break;
}
}
});
tokio::spawn(async move {
if rpc_proxy_req_every_secs == 0 {
return;
}
let interval = Duration::from_secs(rpc_proxy_req_every_secs);
let startup_jitter_ms = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
rand::rng().random_range(0..=effective_jitter_ms as u64)
};
tokio::select! {
_ = cancel_signal.cancelled() => return,
_ = tokio::time::sleep(Duration::from_millis(startup_jitter_ms)) => {}
}
loop {
let wait = {
let jitter_cap_ms = interval.as_millis() / 2;
let effective_jitter_ms = keepalive_jitter_signal
.as_millis()
.min(jitter_cap_ms)
.max(1);
interval
+ Duration::from_millis(
rand::rng().random_range(0..=effective_jitter_ms as u64),
)
};
tokio::select! {
_ = cancel_signal.cancelled() => break,
_ = tokio::time::sleep(wait) => {}
}
let Some(pool) = pool_signal.upgrade() else {
break;
};
let Some(meta) = pool.registry.get_last_writer_meta(writer_id).await else {
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
continue;
};
let (conn_id, mut service_rx) = pool.registry.register().await;
if !pool
.registry
.bind_writer(conn_id, writer_id, meta.clone())
.await
{
let _ = pool.registry.unregister(conn_id).await;
stats_signal.increment_me_rpc_proxy_req_signal_skipped_no_meta_total();
continue;
}
let payload = build_proxy_req_payload(
conn_id,
meta.client_addr,
meta.our_addr,
&[],
pool.proxy_tag.as_deref(),
meta.proto_flags,
);
if tx_signal
.send(WriterCommand::DataAndFlush(payload))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
cancel_signal.cancel();
if cleanup_for_signal
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
pool.remove_writer_and_close_clients(writer_id).await;
WriterLifecycleExit::Writer(res) => {
if let Err(e) = res {
warn!(error = %e, "ME writer command loop ended");
}
break;
}
stats_signal.increment_me_rpc_proxy_req_signal_sent_total();
if matches!(
tokio::time::timeout(
Duration::from_millis(ME_RPC_PROXY_REQ_RESPONSE_WAIT_MS),
service_rx.recv(),
)
.await,
Ok(Some(_))
) {
stats_signal.increment_me_rpc_proxy_req_signal_response_total();
WriterLifecycleExit::Ping => {
debug!(writer_id, "ME ping loop finished");
}
let mut close_payload = Vec::with_capacity(12);
close_payload.extend_from_slice(&RPC_CLOSE_EXT_U32.to_le_bytes());
close_payload.extend_from_slice(&conn_id.to_le_bytes());
if tx_signal
.send(WriterCommand::DataAndFlush(Bytes::from(close_payload)))
.await
.is_err()
{
stats_signal.increment_me_rpc_proxy_req_signal_failed_total();
let _ = pool.registry.unregister(conn_id).await;
cancel_signal.cancel();
if cleanup_for_signal
.compare_exchange(false, true, Ordering::AcqRel, Ordering::Relaxed)
.is_ok()
{
pool.remove_writer_and_close_clients(writer_id).await;
}
break;
WriterLifecycleExit::Signal => {
debug!(writer_id, "ME rpc_proxy_req signal loop finished");
}
stats_signal.increment_me_rpc_proxy_req_signal_close_sent_total();
let _ = pool.registry.unregister(conn_id).await;
WriterLifecycleExit::Cancelled => {}
}
if let Some(pool) = pool_lifecycle.upgrade() {
pool.remove_writer_and_close_clients(writer_id).await;
} else {
// Fallback for shutdown races: make lifecycle exit observable by prune.
cancel_cleanup.cancel();
}
let remaining = writers_arc.read().await.len();
debug!(writer_id, remaining, "ME writer lifecycle task finished");
});
Ok(())
@@ -594,23 +605,36 @@ impl MePool {
// The close command below is only a best-effort accelerator for task shutdown.
// Cleanup progress must never depend on command-channel availability.
let _ = self.registry.writer_lost(writer_id).await;
{
let mut tracker = self.ping_tracker.lock().await;
tracker.retain(|_, (_, wid)| *wid != writer_id);
}
self.rtt_stats.lock().await.remove(&writer_id);
if let Some(tx) = close_tx {
let _ = tx.send(WriterCommand::Close).await;
// Keep teardown critical path non-blocking: close is best-effort only.
let _ = tx.try_send(WriterCommand::Close);
}
if let Some(addr) = removed_addr {
if let Some(uptime) = removed_uptime {
// Quarantine flapping endpoints regardless of draining state.
self.maybe_quarantine_flapping_endpoint(addr, uptime).await;
// Quarantine contract: only unexpected removals are considered endpoint flap.
if trigger_refill {
self.stats
.increment_me_endpoint_quarantine_unexpected_total();
self.maybe_quarantine_flapping_endpoint(addr, uptime, "unexpected")
.await;
} else {
self.stats
.increment_me_endpoint_quarantine_draining_suppressed_total();
debug!(
%addr,
uptime_ms = uptime.as_millis(),
"Skipping endpoint quarantine for draining writer removal"
);
}
}
if trigger_refill && let Some(writer_dc) = removed_dc {
self.trigger_immediate_refill_for_dc(addr, writer_dc);
}
}
if removed {
self.notify_writer_epoch();
}
removed
}
@@ -676,7 +700,10 @@ impl MePool {
MeBindStaleMode::Never => false,
MeBindStaleMode::Always => true,
MeBindStaleMode::Ttl => {
let ttl_secs = self.me_bind_stale_ttl_secs.load(Ordering::Relaxed);
let ttl_secs = self
.binding_policy
.me_bind_stale_ttl_secs
.load(Ordering::Relaxed);
if ttl_secs == 0 {
return true;
}

View File

@@ -32,10 +32,10 @@ pub(crate) async fn reader_loop(
enc_leftover: BytesMut,
mut dec: BytesMut,
tx: mpsc::Sender<WriterCommand>,
ping_tracker: Arc<Mutex<HashMap<i64, (Instant, u64)>>>,
ping_tracker: Arc<Mutex<HashMap<i64, Instant>>>,
rtt_stats: Arc<Mutex<HashMap<u64, (f64, f64)>>>,
stats: Arc<Stats>,
_writer_id: u64,
writer_id: u64,
degraded: Arc<AtomicBool>,
writer_rtt_ema_ms_x10: Arc<AtomicU32>,
reader_route_data_wait_ms: Arc<AtomicU64>,
@@ -45,7 +45,7 @@ pub(crate) async fn reader_loop(
let mut expected_seq: i32 = 0;
loop {
let mut tmp = [0u8; 16_384];
let mut tmp = [0u8; 65_536];
let n = tokio::select! {
res = rd.read(&mut tmp) => res.map_err(ProxyError::Io)?,
_ = cancel.cancelled() => return Ok(()),
@@ -203,13 +203,13 @@ pub(crate) async fn reader_loop(
} else if pt == RPC_PONG_U32 && body.len() >= 8 {
let ping_id = i64::from_le_bytes(body[0..8].try_into().unwrap());
stats.increment_me_keepalive_pong();
if let Some((sent, wid)) = {
if let Some(sent) = {
let mut guard = ping_tracker.lock().await;
guard.remove(&ping_id)
} {
let rtt = sent.elapsed().as_secs_f64() * 1000.0;
let mut stats = rtt_stats.lock().await;
let entry = stats.entry(wid).or_insert((rtt, rtt));
let entry = stats.entry(writer_id).or_insert((rtt, rtt));
entry.1 = entry.1 * 0.8 + rtt * 0.2;
if rtt < entry.0 {
entry.0 = rtt;
@@ -224,7 +224,7 @@ pub(crate) async fn reader_loop(
Ordering::Relaxed,
);
trace!(
writer_id = wid,
writer_id,
rtt_ms = rtt,
ema_ms = entry.1,
base_ms = entry.0,

View File

@@ -3,8 +3,9 @@ use std::net::SocketAddr;
use std::sync::atomic::{AtomicU8, AtomicU64, Ordering};
use std::time::{Duration, SystemTime, UNIX_EPOCH};
use dashmap::DashMap;
use tokio::sync::mpsc::error::TrySendError;
use tokio::sync::{RwLock, mpsc};
use tokio::sync::{Mutex, mpsc};
use super::MeResponse;
use super::codec::WriterCommand;
@@ -50,8 +51,15 @@ pub(super) struct WriterActivitySnapshot {
pub active_sessions_by_target_dc: HashMap<i16, usize>,
}
struct RegistryInner {
map: HashMap<u64, mpsc::Sender<MeResponse>>,
struct RoutingTable {
map: DashMap<u64, mpsc::Sender<MeResponse>>,
}
struct BindingState {
inner: Mutex<BindingInner>,
}
struct BindingInner {
writers: HashMap<u64, mpsc::Sender<WriterCommand>>,
writer_for_conn: HashMap<u64, u64>,
conns_for_writer: HashMap<u64, HashSet<u64>>,
@@ -60,10 +68,9 @@ struct RegistryInner {
writer_idle_since_epoch_secs: HashMap<u64, u64>,
}
impl RegistryInner {
impl BindingInner {
fn new() -> Self {
Self {
map: HashMap::new(),
writers: HashMap::new(),
writer_for_conn: HashMap::new(),
conns_for_writer: HashMap::new(),
@@ -75,7 +82,8 @@ impl RegistryInner {
}
pub struct ConnRegistry {
inner: RwLock<RegistryInner>,
routing: RoutingTable,
binding: BindingState,
next_id: AtomicU64,
route_channel_capacity: usize,
route_backpressure_base_timeout_ms: AtomicU64,
@@ -94,7 +102,12 @@ impl ConnRegistry {
pub fn with_route_channel_capacity(route_channel_capacity: usize) -> Self {
let start = rand::random::<u64>() | 1;
Self {
inner: RwLock::new(RegistryInner::new()),
routing: RoutingTable {
map: DashMap::new(),
},
binding: BindingState {
inner: Mutex::new(BindingInner::new()),
},
next_id: AtomicU64::new(start),
route_channel_capacity: route_channel_capacity.max(1),
route_backpressure_base_timeout_ms: AtomicU64::new(ROUTE_BACKPRESSURE_BASE_TIMEOUT_MS),
@@ -130,14 +143,14 @@ impl ConnRegistry {
pub async fn register(&self) -> (u64, mpsc::Receiver<MeResponse>) {
let id = self.next_id.fetch_add(1, Ordering::Relaxed);
let (tx, rx) = mpsc::channel(self.route_channel_capacity);
self.inner.write().await.map.insert(id, tx);
self.routing.map.insert(id, tx);
(id, rx)
}
pub async fn register_writer(&self, writer_id: u64, tx: mpsc::Sender<WriterCommand>) {
let mut inner = self.inner.write().await;
inner.writers.insert(writer_id, tx);
inner
let mut binding = self.binding.inner.lock().await;
binding.writers.insert(writer_id, tx);
binding
.conns_for_writer
.entry(writer_id)
.or_insert_with(HashSet::new);
@@ -145,18 +158,18 @@ impl ConnRegistry {
/// Unregister connection, returning associated writer_id if any.
pub async fn unregister(&self, id: u64) -> Option<u64> {
let mut inner = self.inner.write().await;
inner.map.remove(&id);
inner.meta.remove(&id);
if let Some(writer_id) = inner.writer_for_conn.remove(&id) {
let became_empty = if let Some(set) = inner.conns_for_writer.get_mut(&writer_id) {
self.routing.map.remove(&id);
let mut binding = self.binding.inner.lock().await;
binding.meta.remove(&id);
if let Some(writer_id) = binding.writer_for_conn.remove(&id) {
let became_empty = if let Some(set) = binding.conns_for_writer.get_mut(&writer_id) {
set.remove(&id);
set.is_empty()
} else {
false
};
if became_empty {
inner
binding
.writer_idle_since_epoch_secs
.insert(writer_id, Self::now_epoch_secs());
}
@@ -167,10 +180,7 @@ impl ConnRegistry {
#[allow(dead_code)]
pub async fn route(&self, id: u64, resp: MeResponse) -> RouteResult {
let tx = {
let inner = self.inner.read().await;
inner.map.get(&id).cloned()
};
let tx = self.routing.map.get(&id).map(|entry| entry.value().clone());
let Some(tx) = tx else {
return RouteResult::NoConn;
@@ -223,10 +233,7 @@ impl ConnRegistry {
}
pub async fn route_nowait(&self, id: u64, resp: MeResponse) -> RouteResult {
let tx = {
let inner = self.inner.read().await;
inner.map.get(&id).cloned()
};
let tx = self.routing.map.get(&id).map(|entry| entry.value().clone());
let Some(tx) = tx else {
return RouteResult::NoConn;
@@ -249,10 +256,7 @@ impl ConnRegistry {
return self.route_nowait(id, resp).await;
}
let tx = {
let inner = self.inner.read().await;
inner.map.get(&id).cloned()
};
let tx = self.routing.map.get(&id).map(|entry| entry.value().clone());
let Some(tx) = tx else {
return RouteResult::NoConn;
@@ -291,33 +295,39 @@ impl ConnRegistry {
}
pub async fn bind_writer(&self, conn_id: u64, writer_id: u64, meta: ConnMeta) -> bool {
let mut inner = self.inner.write().await;
if !inner.writers.contains_key(&writer_id) {
let mut binding = self.binding.inner.lock().await;
// ROUTING IS THE SOURCE OF TRUTH:
// never keep/attach writer binding for a connection that is already
// absent from the routing table.
if !self.routing.map.contains_key(&conn_id) {
return false;
}
if !binding.writers.contains_key(&writer_id) {
return false;
}
let previous_writer_id = inner.writer_for_conn.insert(conn_id, writer_id);
let previous_writer_id = binding.writer_for_conn.insert(conn_id, writer_id);
if let Some(previous_writer_id) = previous_writer_id
&& previous_writer_id != writer_id
{
let became_empty =
if let Some(set) = inner.conns_for_writer.get_mut(&previous_writer_id) {
if let Some(set) = binding.conns_for_writer.get_mut(&previous_writer_id) {
set.remove(&conn_id);
set.is_empty()
} else {
false
};
if became_empty {
inner
binding
.writer_idle_since_epoch_secs
.insert(previous_writer_id, Self::now_epoch_secs());
}
}
inner.meta.insert(conn_id, meta.clone());
inner.last_meta_for_writer.insert(writer_id, meta);
inner.writer_idle_since_epoch_secs.remove(&writer_id);
inner
binding.meta.insert(conn_id, meta.clone());
binding.last_meta_for_writer.insert(writer_id, meta);
binding.writer_idle_since_epoch_secs.remove(&writer_id);
binding
.conns_for_writer
.entry(writer_id)
.or_insert_with(HashSet::new)
@@ -326,32 +336,32 @@ impl ConnRegistry {
}
pub async fn mark_writer_idle(&self, writer_id: u64) {
let mut inner = self.inner.write().await;
inner
let mut binding = self.binding.inner.lock().await;
binding
.conns_for_writer
.entry(writer_id)
.or_insert_with(HashSet::new);
inner
binding
.writer_idle_since_epoch_secs
.entry(writer_id)
.or_insert(Self::now_epoch_secs());
}
pub async fn get_last_writer_meta(&self, writer_id: u64) -> Option<ConnMeta> {
let inner = self.inner.read().await;
inner.last_meta_for_writer.get(&writer_id).cloned()
let binding = self.binding.inner.lock().await;
binding.last_meta_for_writer.get(&writer_id).cloned()
}
pub async fn writer_idle_since_snapshot(&self) -> HashMap<u64, u64> {
let inner = self.inner.read().await;
inner.writer_idle_since_epoch_secs.clone()
let binding = self.binding.inner.lock().await;
binding.writer_idle_since_epoch_secs.clone()
}
pub async fn writer_idle_since_for_writer_ids(&self, writer_ids: &[u64]) -> HashMap<u64, u64> {
let inner = self.inner.read().await;
let binding = self.binding.inner.lock().await;
let mut out = HashMap::<u64, u64>::with_capacity(writer_ids.len());
for writer_id in writer_ids {
if let Some(idle_since) = inner.writer_idle_since_epoch_secs.get(writer_id).copied() {
if let Some(idle_since) = binding.writer_idle_since_epoch_secs.get(writer_id).copied() {
out.insert(*writer_id, idle_since);
}
}
@@ -359,14 +369,14 @@ impl ConnRegistry {
}
pub(super) async fn writer_activity_snapshot(&self) -> WriterActivitySnapshot {
let inner = self.inner.read().await;
let binding = self.binding.inner.lock().await;
let mut bound_clients_by_writer = HashMap::<u64, usize>::new();
let mut active_sessions_by_target_dc = HashMap::<i16, usize>::new();
for (writer_id, conn_ids) in &inner.conns_for_writer {
for (writer_id, conn_ids) in &binding.conns_for_writer {
bound_clients_by_writer.insert(*writer_id, conn_ids.len());
}
for conn_meta in inner.meta.values() {
for conn_meta in binding.meta.values() {
if conn_meta.target_dc == 0 {
continue;
}
@@ -382,9 +392,39 @@ impl ConnRegistry {
}
pub async fn get_writer(&self, conn_id: u64) -> Option<ConnWriter> {
let inner = self.inner.read().await;
let writer_id = inner.writer_for_conn.get(&conn_id).cloned()?;
let writer = inner.writers.get(&writer_id).cloned()?;
let mut binding = self.binding.inner.lock().await;
// ROUTING IS THE SOURCE OF TRUTH:
// stale bindings are ignored and lazily cleaned when routing no longer
// contains the connection.
if !self.routing.map.contains_key(&conn_id) {
binding.meta.remove(&conn_id);
if let Some(stale_writer_id) = binding.writer_for_conn.remove(&conn_id)
&& let Some(conns) = binding.conns_for_writer.get_mut(&stale_writer_id)
{
conns.remove(&conn_id);
if conns.is_empty() {
binding
.writer_idle_since_epoch_secs
.insert(stale_writer_id, Self::now_epoch_secs());
}
}
return None;
}
let writer_id = binding.writer_for_conn.get(&conn_id).copied()?;
let Some(writer) = binding.writers.get(&writer_id).cloned() else {
binding.writer_for_conn.remove(&conn_id);
binding.meta.remove(&conn_id);
if let Some(conns) = binding.conns_for_writer.get_mut(&writer_id) {
conns.remove(&conn_id);
if conns.is_empty() {
binding
.writer_idle_since_epoch_secs
.insert(writer_id, Self::now_epoch_secs());
}
}
return None;
};
Some(ConnWriter {
writer_id,
tx: writer,
@@ -392,16 +432,16 @@ impl ConnRegistry {
}
pub async fn active_conn_ids(&self) -> Vec<u64> {
let inner = self.inner.read().await;
inner.writer_for_conn.keys().copied().collect()
let binding = self.binding.inner.lock().await;
binding.writer_for_conn.keys().copied().collect()
}
pub async fn writer_lost(&self, writer_id: u64) -> Vec<BoundConn> {
let mut inner = self.inner.write().await;
inner.writers.remove(&writer_id);
inner.last_meta_for_writer.remove(&writer_id);
inner.writer_idle_since_epoch_secs.remove(&writer_id);
let conns = inner
let mut binding = self.binding.inner.lock().await;
binding.writers.remove(&writer_id);
binding.last_meta_for_writer.remove(&writer_id);
binding.writer_idle_since_epoch_secs.remove(&writer_id);
let conns = binding
.conns_for_writer
.remove(&writer_id)
.unwrap_or_default()
@@ -410,11 +450,11 @@ impl ConnRegistry {
let mut out = Vec::new();
for conn_id in conns {
if inner.writer_for_conn.get(&conn_id).copied() != Some(writer_id) {
if binding.writer_for_conn.get(&conn_id).copied() != Some(writer_id) {
continue;
}
inner.writer_for_conn.remove(&conn_id);
if let Some(m) = inner.meta.get(&conn_id) {
binding.writer_for_conn.remove(&conn_id);
if let Some(m) = binding.meta.get(&conn_id) {
out.push(BoundConn {
conn_id,
meta: m.clone(),
@@ -426,13 +466,13 @@ impl ConnRegistry {
#[allow(dead_code)]
pub async fn get_meta(&self, conn_id: u64) -> Option<ConnMeta> {
let inner = self.inner.read().await;
inner.meta.get(&conn_id).cloned()
let binding = self.binding.inner.lock().await;
binding.meta.get(&conn_id).cloned()
}
pub async fn is_writer_empty(&self, writer_id: u64) -> bool {
let inner = self.inner.read().await;
inner
let binding = self.binding.inner.lock().await;
binding
.conns_for_writer
.get(&writer_id)
.map(|s| s.is_empty())
@@ -441,8 +481,8 @@ impl ConnRegistry {
#[allow(dead_code)]
pub async fn unregister_writer_if_empty(&self, writer_id: u64) -> bool {
let mut inner = self.inner.write().await;
let Some(conn_ids) = inner.conns_for_writer.get(&writer_id) else {
let mut binding = self.binding.inner.lock().await;
let Some(conn_ids) = binding.conns_for_writer.get(&writer_id) else {
// Writer is already absent from the registry.
return true;
};
@@ -450,19 +490,19 @@ impl ConnRegistry {
return false;
}
inner.writers.remove(&writer_id);
inner.last_meta_for_writer.remove(&writer_id);
inner.writer_idle_since_epoch_secs.remove(&writer_id);
inner.conns_for_writer.remove(&writer_id);
binding.writers.remove(&writer_id);
binding.last_meta_for_writer.remove(&writer_id);
binding.writer_idle_since_epoch_secs.remove(&writer_id);
binding.conns_for_writer.remove(&writer_id);
true
}
#[allow(dead_code)]
pub(super) async fn non_empty_writer_ids(&self, writer_ids: &[u64]) -> HashSet<u64> {
let inner = self.inner.read().await;
let binding = self.binding.inner.lock().await;
let mut out = HashSet::<u64>::with_capacity(writer_ids.len());
for writer_id in writer_ids {
if let Some(conns) = inner.conns_for_writer.get(writer_id)
if let Some(conns) = binding.conns_for_writer.get(writer_id)
&& !conns.is_empty()
{
out.insert(*writer_id);

View File

@@ -26,6 +26,9 @@ use rand::seq::SliceRandom;
const IDLE_WRITER_PENALTY_MID_SECS: u64 = 45;
const IDLE_WRITER_PENALTY_HIGH_SECS: u64 = 55;
const HYBRID_GLOBAL_BURST_PERIOD_ROUNDS: u32 = 4;
const HYBRID_RECENT_SUCCESS_WINDOW_MS: u64 = 120_000;
const HYBRID_TIMEOUT_WARN_RATE_LIMIT_MS: u64 = 5_000;
const HYBRID_RECOVERY_TRIGGER_MIN_INTERVAL_MS: u64 = 5_000;
const PICK_PENALTY_WARM: u64 = 200;
const PICK_PENALTY_DRAINING: u64 = 600;
const PICK_PENALTY_STALE: u64 = 300;
@@ -68,8 +71,11 @@ impl MePool {
},
)
};
let no_writer_mode =
MeRouteNoWriterMode::from_u8(self.me_route_no_writer_mode.load(Ordering::Relaxed));
let no_writer_mode = MeRouteNoWriterMode::from_u8(
self.route_runtime
.me_route_no_writer_mode
.load(Ordering::Relaxed),
);
let (routed_dc, unknown_target_dc) =
self.resolve_target_dc_for_routing(target_dc as i32).await;
let mut no_writer_deadline: Option<Instant> = None;
@@ -77,7 +83,11 @@ impl MePool {
let mut async_recovery_triggered = false;
let mut hybrid_recovery_round = 0u32;
let mut hybrid_last_recovery_at: Option<Instant> = None;
let hybrid_wait_step = self.me_route_no_writer_wait.max(Duration::from_millis(50));
let mut hybrid_total_deadline: Option<Instant> = None;
let hybrid_wait_step = self
.route_runtime
.me_route_no_writer_wait
.max(Duration::from_millis(50));
let mut hybrid_wait_current = hybrid_wait_step;
loop {
@@ -92,9 +102,13 @@ impl MePool {
.tx
.try_send(WriterCommand::Data(current_payload.clone()))
{
Ok(()) => return Ok(()),
Ok(()) => {
self.note_hybrid_route_success();
return Ok(());
}
Err(TrySendError::Full(cmd)) => {
if current.tx.send(cmd).await.is_ok() {
self.note_hybrid_route_success();
return Ok(());
}
warn!(writer_id = current.writer_id, "ME writer channel closed");
@@ -118,7 +132,7 @@ impl MePool {
match no_writer_mode {
MeRouteNoWriterMode::AsyncRecoveryFailfast => {
let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.me_route_no_writer_wait
Instant::now() + self.route_runtime.me_route_no_writer_wait
});
if !async_recovery_triggered && !unknown_target_dc {
let triggered =
@@ -139,7 +153,9 @@ impl MePool {
MeRouteNoWriterMode::InlineRecoveryLegacy => {
self.stats.increment_me_inline_recovery_total();
if !unknown_target_dc {
for _ in 0..self.me_route_inline_recovery_attempts.max(1) {
for _ in
0..self.route_runtime.me_route_inline_recovery_attempts.max(1)
{
for family in self.family_order() {
let map = match family {
IpFamily::V4 => self.proxy_map_v4.read().await.clone(),
@@ -168,7 +184,7 @@ impl MePool {
continue;
}
let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.me_route_inline_recovery_wait
Instant::now() + self.route_runtime.me_route_inline_recovery_wait
});
if !self.wait_for_writer_until(deadline).await {
if !self.writers.read().await.is_empty() {
@@ -182,6 +198,15 @@ impl MePool {
continue;
}
MeRouteNoWriterMode::HybridAsyncPersistent => {
let total_deadline = *hybrid_total_deadline.get_or_insert_with(|| {
Instant::now() + self.hybrid_total_wait_budget()
});
if Instant::now() >= total_deadline {
self.on_hybrid_timeout(total_deadline, routed_dc);
return Err(ProxyError::Proxy(
"ME writer not available within hybrid timeout".into(),
));
}
if !unknown_target_dc {
self.maybe_trigger_hybrid_recovery(
routed_dc,
@@ -214,8 +239,9 @@ impl MePool {
let pick_mode = self.writer_pick_mode();
match no_writer_mode {
MeRouteNoWriterMode::AsyncRecoveryFailfast => {
let deadline = *no_writer_deadline
.get_or_insert_with(|| Instant::now() + self.me_route_no_writer_wait);
let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.route_runtime.me_route_no_writer_wait
});
if !async_recovery_triggered && !unknown_target_dc {
let triggered =
self.trigger_async_recovery_for_target_dc(routed_dc).await;
@@ -238,7 +264,7 @@ impl MePool {
self.stats.increment_me_inline_recovery_total();
if unknown_target_dc {
let deadline = *no_writer_deadline.get_or_insert_with(|| {
Instant::now() + self.me_route_inline_recovery_wait
Instant::now() + self.route_runtime.me_route_inline_recovery_wait
});
if self.wait_for_candidate_until(routed_dc, deadline).await {
continue;
@@ -250,7 +276,9 @@ impl MePool {
"No ME writers available for target DC".into(),
));
}
if emergency_attempts >= self.me_route_inline_recovery_attempts.max(1) {
if emergency_attempts
>= self.route_runtime.me_route_inline_recovery_attempts.max(1)
{
self.stats
.increment_me_writer_pick_no_candidate_total(pick_mode);
self.stats.increment_me_no_writer_failfast_total();
@@ -292,6 +320,16 @@ impl MePool {
}
}
MeRouteNoWriterMode::HybridAsyncPersistent => {
let total_deadline = *hybrid_total_deadline.get_or_insert_with(|| {
Instant::now() + self.hybrid_total_wait_budget()
});
if Instant::now() >= total_deadline {
self.on_hybrid_timeout(total_deadline, routed_dc);
return Err(ProxyError::Proxy(
"No ME writers available for target DC within hybrid timeout"
.into(),
));
}
if !unknown_target_dc {
self.maybe_trigger_hybrid_recovery(
routed_dc,
@@ -332,7 +370,11 @@ impl MePool {
pick_sample_size,
)
} else {
if self.me_deterministic_writer_sort.load(Ordering::Relaxed) {
if self
.writer_selection_policy
.me_deterministic_writer_sort
.load(Ordering::Relaxed)
{
candidate_indices.sort_by(|lhs, rhs| {
let left = &writers_snapshot[*lhs];
let right = &writers_snapshot[*rhs];
@@ -423,6 +465,7 @@ impl MePool {
"Selected stale ME writer for fallback bind"
);
}
self.note_hybrid_route_success();
return Ok(());
}
Err(TrySendError::Full(_)) => {
@@ -453,7 +496,19 @@ impl MePool {
.increment_me_writer_pick_blocking_fallback_total();
let effective_our_addr = SocketAddr::new(w.source_ip, our_addr.port());
let (payload, meta) = build_routed_payload(effective_our_addr);
match w.tx.clone().reserve_owned().await {
let reserve_result =
if let Some(timeout) = self.route_runtime.me_route_blocking_send_timeout {
match tokio::time::timeout(timeout, w.tx.clone().reserve_owned()).await {
Ok(result) => result,
Err(_) => {
self.stats.increment_me_writer_pick_full_total(pick_mode);
continue;
}
}
} else {
w.tx.clone().reserve_owned().await
};
match reserve_result {
Ok(permit) => {
if !self.registry.bind_writer(conn_id, w.id, meta).await {
debug!(
@@ -471,6 +526,7 @@ impl MePool {
if w.generation < self.current_generation() {
self.stats.increment_pool_stale_pick_total();
}
self.note_hybrid_route_success();
return Ok(());
}
Err(_) => {
@@ -483,7 +539,7 @@ impl MePool {
}
async fn wait_for_writer_until(&self, deadline: Instant) -> bool {
let waiter = self.writer_available.notified();
let mut rx = self.writer_epoch.subscribe();
if !self.writers.read().await.is_empty() {
return true;
}
@@ -492,13 +548,14 @@ impl MePool {
return !self.writers.read().await.is_empty();
}
let timeout = deadline.saturating_duration_since(now);
if tokio::time::timeout(timeout, waiter).await.is_ok() {
return true;
if tokio::time::timeout(timeout, rx.changed()).await.is_ok() {
return !self.writers.read().await.is_empty();
}
!self.writers.read().await.is_empty()
}
async fn wait_for_candidate_until(&self, routed_dc: i32, deadline: Instant) -> bool {
let mut rx = self.writer_epoch.subscribe();
loop {
if self.has_candidate_for_target_dc(routed_dc).await {
return true;
@@ -509,7 +566,6 @@ impl MePool {
return self.has_candidate_for_target_dc(routed_dc).await;
}
let waiter = self.writer_available.notified();
if self.has_candidate_for_target_dc(routed_dc).await {
return true;
}
@@ -517,7 +573,7 @@ impl MePool {
if remaining.is_zero() {
return self.has_candidate_for_target_dc(routed_dc).await;
}
if tokio::time::timeout(remaining, waiter).await.is_err() {
if tokio::time::timeout(remaining, rx.changed()).await.is_err() {
return self.has_candidate_for_target_dc(routed_dc).await;
}
}
@@ -587,6 +643,9 @@ impl MePool {
hybrid_last_recovery_at: &mut Option<Instant>,
hybrid_wait_step: Duration,
) {
if !self.try_consume_hybrid_recovery_trigger_slot(HYBRID_RECOVERY_TRIGGER_MIN_INTERVAL_MS) {
return;
}
if let Some(last) = *hybrid_last_recovery_at
&& last.elapsed() < hybrid_wait_step
{
@@ -602,6 +661,78 @@ impl MePool {
*hybrid_last_recovery_at = Some(Instant::now());
}
fn hybrid_total_wait_budget(&self) -> Duration {
let base = self
.route_runtime
.me_route_hybrid_max_wait
.max(Duration::from_millis(50));
let now_ms = Self::now_epoch_millis();
let last_success_ms = self
.route_runtime
.me_route_last_success_epoch_ms
.load(Ordering::Relaxed);
if last_success_ms != 0
&& now_ms.saturating_sub(last_success_ms) <= HYBRID_RECENT_SUCCESS_WINDOW_MS
{
return base.saturating_mul(2);
}
base
}
fn note_hybrid_route_success(&self) {
self.route_runtime
.me_route_last_success_epoch_ms
.store(Self::now_epoch_millis(), Ordering::Relaxed);
}
fn on_hybrid_timeout(&self, deadline: Instant, routed_dc: i32) {
self.stats.increment_me_hybrid_timeout_total();
let now_ms = Self::now_epoch_millis();
let mut last_warn_ms = self
.route_runtime
.me_route_hybrid_timeout_warn_epoch_ms
.load(Ordering::Relaxed);
while now_ms.saturating_sub(last_warn_ms) >= HYBRID_TIMEOUT_WARN_RATE_LIMIT_MS {
match self
.route_runtime
.me_route_hybrid_timeout_warn_epoch_ms
.compare_exchange_weak(last_warn_ms, now_ms, Ordering::AcqRel, Ordering::Relaxed)
{
Ok(_) => {
warn!(
routed_dc,
budget_ms = self.hybrid_total_wait_budget().as_millis() as u64,
elapsed_ms = deadline.elapsed().as_millis() as u64,
"ME hybrid route timeout reached"
);
break;
}
Err(actual) => last_warn_ms = actual,
}
}
}
fn try_consume_hybrid_recovery_trigger_slot(&self, min_interval_ms: u64) -> bool {
let now_ms = Self::now_epoch_millis();
let mut last_trigger_ms = self
.route_runtime
.me_async_recovery_last_trigger_epoch_ms
.load(Ordering::Relaxed);
loop {
if now_ms.saturating_sub(last_trigger_ms) < min_interval_ms {
return false;
}
match self
.route_runtime
.me_async_recovery_last_trigger_epoch_ms
.compare_exchange_weak(last_trigger_ms, now_ms, Ordering::AcqRel, Ordering::Relaxed)
{
Ok(_) => return true,
Err(actual) => last_trigger_ms = actual,
}
}
}
pub async fn send_close(self: &Arc<Self>, conn_id: u64) -> Result<()> {
if let Some(w) = self.registry.get_writer(conn_id).await {
let mut p = Vec::with_capacity(12);
@@ -749,7 +880,7 @@ impl MePool {
(self.writer_idle_rank_for_selection(writer, idle_since_by_writer, now_epoch_secs)
as u64)
* 100;
let queue_cap = self.writer_cmd_channel_capacity.max(1) as u64;
let queue_cap = self.writer_lifecycle.writer_cmd_channel_capacity.max(1) as u64;
let queue_remaining = writer.tx.capacity() as u64;
let queue_used = queue_cap.saturating_sub(queue_remaining.min(queue_cap));
let queue_util_pct = queue_used.saturating_mul(100) / queue_cap;

View File

@@ -113,6 +113,8 @@ async fn make_pool(
general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms,
);

View File

@@ -111,6 +111,8 @@ async fn make_pool(
general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms,
);

View File

@@ -106,6 +106,8 @@ async fn make_pool(me_pool_drain_threshold: u64) -> Arc<MePool> {
general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms,
)

View File

@@ -95,6 +95,8 @@ async fn make_pool() -> Arc<MePool> {
general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms,
)

View File

@@ -35,7 +35,7 @@ async fn make_pool() -> Arc<MePool> {
NetworkDecision::default(),
None,
Arc::new(SecureRandom::new()),
Arc::new(Stats::default()),
Arc::new(Stats::new()),
general.me_keepalive_enabled,
general.me_keepalive_interval_secs,
general.me_keepalive_jitter_secs,
@@ -100,6 +100,8 @@ async fn make_pool() -> Arc<MePool> {
general.me_warn_rate_limit_ms,
MeRouteNoWriterMode::default(),
general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms,
)
@@ -171,10 +173,15 @@ async fn bind_conn_to_writer(pool: &Arc<MePool>, writer_id: u64, port: u16) -> u
}
#[tokio::test]
async fn remove_draining_writer_still_quarantines_flapping_endpoint() {
async fn remove_draining_writer_does_not_quarantine_flapping_endpoint() {
let pool = make_pool().await;
let writer_id = 77;
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 12, 0, 77)), 443);
let before_total = pool.stats.get_me_endpoint_quarantine_total();
let before_unexpected = pool.stats.get_me_endpoint_quarantine_unexpected_total();
let before_suppressed = pool
.stats
.get_me_endpoint_quarantine_draining_suppressed_total();
insert_writer(
&pool,
writer_id,
@@ -198,8 +205,18 @@ async fn remove_draining_writer_still_quarantines_flapping_endpoint() {
"writer must be removed from pool after cleanup"
);
assert!(
pool.is_endpoint_quarantined(addr).await,
"draining removals must still quarantine flapping endpoints"
!pool.is_endpoint_quarantined(addr).await,
"draining removals must not quarantine endpoint"
);
assert_eq!(pool.stats.get_me_endpoint_quarantine_total(), before_total);
assert_eq!(
pool.stats.get_me_endpoint_quarantine_unexpected_total(),
before_unexpected
);
assert_eq!(
pool.stats
.get_me_endpoint_quarantine_draining_suppressed_total(),
before_suppressed + 1
);
assert_eq!(pool.conn_count.load(Ordering::Relaxed), 0);
}
@@ -255,16 +272,21 @@ async fn edge_draining_only_detach_rejects_active_writer() {
}
#[tokio::test]
async fn adversarial_blackhat_single_remove_establishes_single_quarantine_entry() {
async fn adversarial_blackhat_single_unexpected_remove_establishes_single_quarantine_entry() {
let pool = make_pool().await;
let writer_id = 93;
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 12, 0, 93)), 443);
let before_total = pool.stats.get_me_endpoint_quarantine_total();
let before_unexpected = pool.stats.get_me_endpoint_quarantine_unexpected_total();
let before_suppressed = pool
.stats
.get_me_endpoint_quarantine_draining_suppressed_total();
insert_writer(
&pool,
writer_id,
2,
addr,
true,
false,
Instant::now() - Duration::from_secs(1),
)
.await;
@@ -272,6 +294,49 @@ async fn adversarial_blackhat_single_remove_establishes_single_quarantine_entry(
pool.remove_writer_and_close_clients(writer_id).await;
assert!(pool.is_endpoint_quarantined(addr).await);
assert_eq!(pool.endpoint_quarantine.lock().await.len(), 1);
assert_eq!(
pool.stats.get_me_endpoint_quarantine_total(),
before_total + 1
);
assert_eq!(
pool.stats.get_me_endpoint_quarantine_unexpected_total(),
before_unexpected + 1
);
assert_eq!(
pool.stats
.get_me_endpoint_quarantine_draining_suppressed_total(),
before_suppressed
);
}
#[tokio::test]
async fn remove_ultra_short_uptime_writer_skips_flap_quarantine() {
let pool = make_pool().await;
let writer_id = 931;
let addr = SocketAddr::new(IpAddr::V4(Ipv4Addr::new(127, 12, 0, 131)), 443);
let before_total = pool.stats.get_me_endpoint_quarantine_total();
let before_unexpected = pool.stats.get_me_endpoint_quarantine_unexpected_total();
insert_writer(
&pool,
writer_id,
2,
addr,
false,
Instant::now() - Duration::from_millis(50),
)
.await;
pool.remove_writer_and_close_clients(writer_id).await;
assert!(
!pool.is_endpoint_quarantined(addr).await,
"ultra-short unexpected lifetime must not quarantine endpoint"
);
assert_eq!(pool.stats.get_me_endpoint_quarantine_total(), before_total);
assert_eq!(
pool.stats.get_me_endpoint_quarantine_unexpected_total(),
before_unexpected + 1
);
}
#[tokio::test]

View File

@@ -106,6 +106,8 @@ async fn make_pool() -> (Arc<MePool>, Arc<SecureRandom>) {
general.me_warn_rate_limit_ms,
general.me_route_no_writer_mode,
general.me_route_no_writer_wait_ms,
general.me_route_hybrid_max_wait_ms,
general.me_route_blocking_send_timeout_ms,
general.me_route_inline_recovery_attempts,
general.me_route_inline_recovery_wait_ms,
);

View File

@@ -34,8 +34,6 @@ const NUM_DCS: usize = 5;
/// Timeout for individual DC ping attempt
const DC_PING_TIMEOUT_SECS: u64 = 5;
/// Timeout for direct TG DC TCP connect readiness.
const DIRECT_CONNECT_TIMEOUT_SECS: u64 = 10;
/// Interval between upstream health-check cycles.
const HEALTH_CHECK_INTERVAL_SECS: u64 = 30;
/// Timeout for a single health-check connect attempt.
@@ -319,6 +317,8 @@ pub struct UpstreamManager {
connect_retry_attempts: u32,
connect_retry_backoff: Duration,
connect_budget: Duration,
/// Per-attempt TCP connect timeout to Telegram DC (`[general] tg_connect`, seconds).
tg_connect_timeout_secs: u64,
unhealthy_fail_threshold: u32,
connect_failfast_hard_errors: bool,
no_upstreams_warn_epoch_ms: Arc<AtomicU64>,
@@ -332,6 +332,7 @@ impl UpstreamManager {
connect_retry_attempts: u32,
connect_retry_backoff_ms: u64,
connect_budget_ms: u64,
tg_connect_timeout_secs: u64,
unhealthy_fail_threshold: u32,
connect_failfast_hard_errors: bool,
stats: Arc<Stats>,
@@ -347,6 +348,7 @@ impl UpstreamManager {
connect_retry_attempts: connect_retry_attempts.max(1),
connect_retry_backoff: Duration::from_millis(connect_retry_backoff_ms),
connect_budget: Duration::from_millis(connect_budget_ms.max(1)),
tg_connect_timeout_secs: tg_connect_timeout_secs.max(1),
unhealthy_fail_threshold: unhealthy_fail_threshold.max(1),
connect_failfast_hard_errors,
no_upstreams_warn_epoch_ms: Arc::new(AtomicU64::new(0)),
@@ -798,7 +800,7 @@ impl UpstreamManager {
}
let remaining_budget = self.connect_budget.saturating_sub(elapsed);
let attempt_timeout =
Duration::from_secs(DIRECT_CONNECT_TIMEOUT_SECS).min(remaining_budget);
Duration::from_secs(self.tg_connect_timeout_secs).min(remaining_budget);
if attempt_timeout.is_zero() {
last_error = Some(ProxyError::ConnectionTimeout {
addr: target.to_string(),
@@ -1901,6 +1903,7 @@ mod tests {
1,
100,
1000,
10,
1,
false,
Arc::new(Stats::new()),