Merge pull request #585 from aaddrick/feature/583-gpu-mitigations

launcher+doctor: GPU FATAL mitigations (mitigates #583)
This commit is contained in:
Aaddrick
2026-05-14 05:26:38 -04:00
committed by GitHub
5 changed files with 321 additions and 3 deletions

View File

@@ -436,6 +436,66 @@ JSEOF
fi
}
# Surface a warning when systemd-coredump shows N+ recent Electron
# crashes. The most common cause on Linux is the GPU process FATAL
# exhaustion tracked in #583 — workaround for affected users is the
# upstream Settings → disable hardware acceleration toggle, or
# CLAUDE_DISABLE_GPU=1 in the environment for headless persistence.
#
# Arguments: $1 = electron path (e.g.,
# /usr/lib/claude-desktop/node_modules/electron/dist/electron)
# Used to filter results to claude-desktop's electron when possible;
# falls back to all-electron crashes when the path doesn't match
# (e.g., AppImage mount paths are transient).
_doctor_check_recent_crashes() {
local electron_path="${1:-}"
command -v coredumpctl &>/dev/null || return 0
# `coredumpctl list electron` filters by COMM=electron. If the
# exact electron_path matches any entry's EXE column, prefer that
# tighter count; otherwise fall back to all-electron entries.
local listing total_count path_count
listing=$(coredumpctl list electron \
--since='7 days ago' --no-pager 2>/dev/null) || return 0
[[ -n $listing ]] || return 0
# Drop the header line; count remaining entries.
total_count=$(awk 'NR>1 && NF>0' <<< "$listing" | wc -l)
((total_count == 0)) && return 0
if [[ -n $electron_path ]]; then
path_count=$(awk -v p="$electron_path" \
'NR>1 && index($0, p)' <<< "$listing" | wc -l)
else
path_count=0
fi
# Use the path-matched count when available; else the unfiltered
# count with a footnote so the user knows it may include other
# Electron apps (Slack, VSCode, etc.).
local count footnote=''
if ((path_count > 0)); then
count=$path_count
else
count=$total_count
footnote=' (some entries may be from other Electron apps)'
fi
if ((count >= 3)); then
_warn "Recent Electron crashes: $count in last 7 days$footnote"
_info \
'Most common cause: Chromium GPU process FATAL (#583).' \
'Try one of:'
_info ' Settings → toggle hardware acceleration off → restart'
_info ' or set CLAUDE_DISABLE_GPU=1 in the environment'
_info \
'Tracking:' \
'https://github.com/aaddrick/claude-desktop-debian/issues/583'
elif ((count > 0)); then
_info "Recent Electron crashes: $count in last 7 days$footnote"
fi
}
# Run all diagnostic checks and print results
# Arguments: $1 = electron path (optional, for package-specific checks)
run_doctor() {
@@ -923,6 +983,11 @@ print(len(servers))
fi
fi
# -- Recent crashes --
# Surfaces the GPU process FATAL pattern (#583) before users
# notice the in-app "Claude crashed repeatedly" prompt.
_doctor_check_recent_crashes "$electron_path"
# -- Log file --
local log_path
log_path="${XDG_CACHE_HOME:-$HOME/.cache}"

View File

@@ -42,7 +42,8 @@ log_session_env() {
QT_IM_MODULE \
CLAUDE_USE_WAYLAND \
CLAUDE_TITLEBAR_STYLE \
CLAUDE_GTK_IM_MODULE
CLAUDE_GTK_IM_MODULE \
CLAUDE_DISABLE_GPU
do
log_message " $key=${!key:-}"
done
@@ -140,10 +141,24 @@ build_electron_args() {
loginctl show-session "$XDG_SESSION_ID" \
-p Type --value 2>/dev/null
)
# Track GPU-disable decision so XRDP and CLAUDE_DISABLE_GPU don't
# stack duplicate flags. Either signal is sufficient.
local _disable_gpu=false
if [[ -n ${XRDP_SESSION:-} || $rdp_session_type == xrdp ]]; then
electron_args+=('--disable-gpu' '--disable-software-rasterizer')
_disable_gpu=true
log_message 'XRDP session detected - GPU compositing disabled'
fi
# CLAUDE_DISABLE_GPU=1: opt-in workaround for users hitting the
# Chromium GPU process FATAL exhaustion (#583). The same upstream
# behaviour is reachable via Settings → disable hardware
# acceleration; this lets users persist it via the env without
# having to reach the Settings UI through repeated crashes.
if [[ ${CLAUDE_DISABLE_GPU:-} == '1' ]]; then
_disable_gpu=true
log_message 'CLAUDE_DISABLE_GPU=1 - hardware acceleration disabled'
fi
[[ $_disable_gpu == true ]] \
&& electron_args+=('--disable-gpu' '--disable-software-rasterizer')
# X11 session - no special flags needed
if [[ $is_wayland != true ]]; then

View File

@@ -229,3 +229,94 @@ _skip_gtk_query() {
[[ $output != *'[WARN]'* ]]
[[ $output != *'ibus-gtk3'* ]]
}
# =============================================================================
# _doctor_check_recent_crashes: GPU FATAL crash counter (#583)
# =============================================================================
# Install a coredumpctl shim. $1 is the coredumpctl-list-style
# multi-line output to emit (header + entry rows). The shim ignores
# its arguments — tests don't exercise the filter syntax.
_install_coredumpctl_shim() {
mkdir -p "$TEST_TMP/bin"
cat > "$TEST_TMP/bin/coredumpctl" <<SHIM
#!/usr/bin/env bash
cat <<'OUT'
$1
OUT
SHIM
chmod +x "$TEST_TMP/bin/coredumpctl"
export PATH="$TEST_TMP/bin:$PATH"
}
@test "_doctor_check_recent_crashes: no coredumpctl on PATH — silent" {
# Force coredumpctl off PATH so the helper short-circuits.
# Restore PATH before returning so teardown's rm works.
local saved_path="$PATH"
export PATH="/no-such-dir-for-test"
run _doctor_check_recent_crashes \
'/usr/lib/claude-desktop/node_modules/electron/dist/electron'
export PATH="$saved_path"
[[ $status -eq 0 ]]
[[ -z $output ]]
}
@test "_doctor_check_recent_crashes: zero crashes — silent" {
# Listing has the header line only, no entry rows.
_install_coredumpctl_shim 'TIME PID UID GID SIG COREFILE EXE SIZE'
run _doctor_check_recent_crashes \
'/usr/lib/claude-desktop/node_modules/electron/dist/electron'
[[ $status -eq 0 ]]
[[ -z $output ]]
}
@test "_doctor_check_recent_crashes: 1 crash — info line, no warn" {
_install_coredumpctl_shim 'TIME PID UID GID SIG COREFILE EXE SIZE
Wed 2026-05-06 08:00:21 EDT 130375 1000 1000 SIGTRAP present /usr/lib/claude-desktop/node_modules/electron/dist/electron 21.6M'
run _doctor_check_recent_crashes \
'/usr/lib/claude-desktop/node_modules/electron/dist/electron'
[[ $status -eq 0 ]]
[[ $output == *'Recent Electron crashes: 1'* ]]
[[ $output != *'[WARN]'* ]]
}
@test "_doctor_check_recent_crashes: 3+ crashes — warn + #583 pointer" {
_install_coredumpctl_shim 'TIME PID UID GID SIG COREFILE EXE SIZE
Wed 2026-05-06 08:00:21 EDT 130375 1000 1000 SIGTRAP present /usr/lib/claude-desktop/node_modules/electron/dist/electron 21.6M
Mon 2026-05-04 07:44:48 EDT 930532 1000 1000 SIGTRAP present /usr/lib/claude-desktop/node_modules/electron/dist/electron 22.8M
Sun 2026-05-03 14:34:10 EDT 567221 1000 1000 SIGTRAP present /usr/lib/claude-desktop/node_modules/electron/dist/electron 12.4M'
run _doctor_check_recent_crashes \
'/usr/lib/claude-desktop/node_modules/electron/dist/electron'
[[ $status -eq 0 ]]
[[ $output == *'[WARN]'* ]]
[[ $output == *'Recent Electron crashes: 3'* ]]
[[ $output == *'CLAUDE_DISABLE_GPU=1'* ]]
[[ $output == *'/issues/583'* ]]
}
@test "_doctor_check_recent_crashes: path mismatch falls back with footnote" {
# Three crashes from a DIFFERENT electron binary (e.g., Slack).
# Caller passes claude-desktop's electron path, which doesn't
# match — helper falls back to total count and adds the footnote
# so the user knows the count may be cross-app.
_install_coredumpctl_shim 'TIME PID UID GID SIG COREFILE EXE SIZE
Wed 2026-05-06 09:00:00 EDT 200001 1000 1000 SIGSEGV present /usr/lib/slack/electron 30M
Wed 2026-05-05 09:00:00 EDT 200002 1000 1000 SIGSEGV present /usr/lib/slack/electron 30M
Wed 2026-05-04 09:00:00 EDT 200003 1000 1000 SIGSEGV present /usr/lib/slack/electron 30M'
run _doctor_check_recent_crashes \
'/usr/lib/claude-desktop/node_modules/electron/dist/electron'
[[ $status -eq 0 ]]
[[ $output == *'[WARN]'* ]]
[[ $output == *'may be from other Electron apps'* ]]
}
@test "_doctor_check_recent_crashes: empty electron_path falls back" {
_install_coredumpctl_shim 'TIME PID UID GID SIG COREFILE EXE SIZE
Wed 2026-05-06 08:00:21 EDT 130375 1000 1000 SIGTRAP present /usr/lib/claude-desktop/node_modules/electron/dist/electron 21.6M'
# Caller didn't pass an electron_path — helper still counts and
# emits the info line based on the unfiltered total.
run _doctor_check_recent_crashes ''
[[ $status -eq 0 ]]
[[ $output == *'Recent Electron crashes: 1'* ]]
[[ $output == *'may be from other Electron apps'* ]]
}

View File

@@ -107,6 +107,7 @@ teardown() {
CLAUDE_USE_WAYLAND='1'
CLAUDE_TITLEBAR_STYLE='hybrid'
CLAUDE_GTK_IM_MODULE='xim'
CLAUDE_DISABLE_GPU='1'
log_session_env
run cat "$log_file"
@@ -123,7 +124,8 @@ teardown() {
[[ "${lines[8]}" == ' CLAUDE_USE_WAYLAND=1' ]]
[[ "${lines[9]}" == ' CLAUDE_TITLEBAR_STYLE=hybrid' ]]
[[ "${lines[10]}" == ' CLAUDE_GTK_IM_MODULE=xim' ]]
[[ "${lines[11]}" == '}' ]]
[[ "${lines[11]}" == ' CLAUDE_DISABLE_GPU=1' ]]
[[ "${lines[12]}" == '}' ]]
}
@test "log_session_env: unset/empty values render as 'KEY=' (no value)" {
@@ -146,6 +148,7 @@ teardown() {
[[ "${lines[8]}" == ' CLAUDE_USE_WAYLAND=' ]]
[[ "${lines[9]}" == ' CLAUDE_TITLEBAR_STYLE=' ]]
[[ "${lines[10]}" == ' CLAUDE_GTK_IM_MODULE=' ]]
[[ "${lines[11]}" == ' CLAUDE_DISABLE_GPU=' ]]
}
# =============================================================================

View File

@@ -0,0 +1,144 @@
#!/usr/bin/env bats
#
# launcher-disable-gpu.bats
# Tests for the CLAUDE_DISABLE_GPU env var handling in
# build_electron_args (scripts/launcher-common.sh). The var is an
# opt-in workaround for the Chromium GPU process FATAL exhaustion
# tracked in #583. CLAUDE_DISABLE_GPU=1 adds --disable-gpu and
# --disable-software-rasterizer; co-occurrence with XRDP must not
# stack duplicate flags.
#
SCRIPT_DIR="$(cd "$(dirname "${BATS_TEST_FILENAME}")" && pwd)"
LAUNCHER_COMMON="${SCRIPT_DIR}/../scripts/launcher-common.sh"
setup() {
TEST_TMP=$(mktemp -d)
export TEST_TMP
# loginctl shim — same pattern as launcher-xrdp-detection.bats.
# Defaults to a non-XRDP session so CLAUDE_DISABLE_GPU is the
# only signal in play unless a test overrides MOCK_LOGINCTL_TYPE.
mkdir -p "$TEST_TMP/bin"
cat > "$TEST_TMP/bin/loginctl" <<'SHIM'
#!/usr/bin/env bash
printf '%s\n' "${MOCK_LOGINCTL_TYPE:-x11}"
SHIM
chmod +x "$TEST_TMP/bin/loginctl"
export PATH="$TEST_TMP/bin:$PATH"
log_file="$TEST_TMP/launcher.log"
: > "$log_file"
unset CLAUDE_DISABLE_GPU
unset XRDP_SESSION
unset XDG_SESSION_ID
unset MOCK_LOGINCTL_TYPE
# shellcheck disable=SC1090
source "$LAUNCHER_COMMON"
is_wayland=false
use_x11_on_wayland=true
}
teardown() {
if [[ -n ${TEST_TMP:-} && -d $TEST_TMP ]]; then
rm -rf "$TEST_TMP"
fi
}
args_contain() {
local needle="$1"
local arg
for arg in "${electron_args[@]}"; do
[[ $arg == "$needle" ]] && return 0
done
return 1
}
args_count() {
local needle="$1"
local arg count=0
for arg in "${electron_args[@]}"; do
[[ $arg == "$needle" ]] && ((count++))
done
printf '%d' "$count"
}
# =============================================================================
# CLAUDE_DISABLE_GPU=1 — flags must be added
# =============================================================================
@test "disable-gpu: CLAUDE_DISABLE_GPU=1 adds flags + logs message" {
export CLAUDE_DISABLE_GPU=1
build_electron_args deb
args_contain '--disable-gpu'
args_contain '--disable-software-rasterizer'
grep -q 'CLAUDE_DISABLE_GPU=1' "$log_file"
}
# =============================================================================
# Co-occurrence with XRDP — no duplicate flags
# =============================================================================
@test "disable-gpu: with XRDP_SESSION, flags added exactly once (no dup)" {
export CLAUDE_DISABLE_GPU=1
export XRDP_SESSION=1
export XDG_SESSION_ID=5
export MOCK_LOGINCTL_TYPE=xrdp
build_electron_args deb
[[ "$(args_count '--disable-gpu')" -eq 1 ]]
[[ "$(args_count '--disable-software-rasterizer')" -eq 1 ]]
# Both signals should still log (independent diagnostic value),
# but only one set of flags should reach electron_args.
grep -q 'XRDP session detected' "$log_file"
grep -q 'CLAUDE_DISABLE_GPU=1' "$log_file"
}
# =============================================================================
# Off-states — flags must NOT be added
# =============================================================================
@test "disable-gpu: unset — flags NOT added" {
build_electron_args deb
run args_contain '--disable-gpu'
[[ "$status" -ne 0 ]]
run args_contain '--disable-software-rasterizer'
[[ "$status" -ne 0 ]]
}
@test "disable-gpu: empty string — flags NOT added" {
export CLAUDE_DISABLE_GPU=''
build_electron_args deb
run args_contain '--disable-gpu'
[[ "$status" -ne 0 ]]
}
@test "disable-gpu: =0 — flags NOT added (only literal '1' opts in)" {
export CLAUDE_DISABLE_GPU=0
build_electron_args deb
run args_contain '--disable-gpu'
[[ "$status" -ne 0 ]]
}
@test "disable-gpu: =true — flags NOT added (no boolean aliases)" {
# Documents the strict equality check. If we ever add aliases,
# update this test to match. Strict-only matches the existing
# CLAUDE_USE_WAYLAND pattern.
export CLAUDE_DISABLE_GPU=true
build_electron_args deb
run args_contain '--disable-gpu'
[[ "$status" -ne 0 ]]
}