Files
claude-desktop-debian/scripts/launcher-common.sh

354 lines
13 KiB
Bash
Raw Normal View History

#!/usr/bin/env bash
# Common launcher functions for Claude Desktop (AppImage and deb)
# This file is sourced by both launchers to avoid code duplication
# Setup logging directory and file
# Sets: log_dir, log_file
setup_logging() {
log_dir="${XDG_CACHE_HOME:-$HOME/.cache}/claude-desktop-debian"
mkdir -p "$log_dir" || return 1
log_file="$log_dir/launcher.log"
}
# Log a message to the log file
# Usage: log_message "message"
log_message() {
echo "$1" >> "$log_file"
}
launcher: log session/IME env block at startup (#570) * launcher: log session/IME env block at startup (#548) Adds log_session_env, called once per launch from each packaging target (deb, rpm, AppImage, Nix). Emits a single env={ ... } block covering display (XDG_SESSION_TYPE, WAYLAND_DISPLAY, DISPLAY, XDG_CURRENT_DESKTOP), IME (GTK_IM_MODULE, XMODIFIERS, QT_IM_MODULE), and Claude-specific overrides (CLAUDE_USE_WAYLAND, CLAUDE_TITLEBAR_STYLE, CLAUDE_GTK_IM_MODULE). Empty/unset values are emitted as `KEY=` (rather than omitted) so absence is unambiguous in bug reports. Pure observability — no behavior change. Closes #548 Co-Authored-By: Claude <claude@anthropic.com> * test: consolidate log_session_env BATS coverage (#548) Collapse the four log_session_env cases into two, and tighten the assertions in both: - Old test 1 (substring match per key) + old test 4 (block braces on their own lines) → one test using exact-line equality on the `lines` array. Locks block structure and per-key formatting in a single pass; substring matching could not catch a regression that re-ordered keys, dropped indentation, or merged lines. - Old test 2 (unset values are KEY=) + old test 3 (empty-string is KEY=) → one test covering both code paths. Exact-line match proves the value after `=` is truly empty; the previous `*'KEY='*` substring would have matched `KEY=value` and the old test-3 regex was fragile (depended on trailing newline being literal `$'\n'` vs end-of-string `$`). Net: 77 → 42 lines, 4 → 2 cases, stronger guarantees. No change to the helper itself or the call sites — issue #548 acceptance criteria still hold. --------- Co-authored-by: Claude <claude@anthropic.com>
2026-05-05 06:45:42 -04:00
# Log the session/IME environment vars that drive display and input
# decisions, so bug reports include enough context to reason about
# them without round-trip env-dump requests (#548).
#
# Emits one block:
# env={
# KEY=value
# ...
# }
#
# Empty or unset values are emitted as `KEY=` so absence is
# unambiguous (vs. silently omitted). Caller must run setup_logging
# first.
log_session_env() {
local key
log_message 'env={'
for key in \
XDG_SESSION_TYPE \
WAYLAND_DISPLAY \
DISPLAY \
XDG_CURRENT_DESKTOP \
GTK_IM_MODULE \
XMODIFIERS \
QT_IM_MODULE \
CLAUDE_USE_WAYLAND \
CLAUDE_TITLEBAR_STYLE \
CLAUDE_GTK_IM_MODULE \
CLAUDE_DISABLE_GPU
launcher: log session/IME env block at startup (#570) * launcher: log session/IME env block at startup (#548) Adds log_session_env, called once per launch from each packaging target (deb, rpm, AppImage, Nix). Emits a single env={ ... } block covering display (XDG_SESSION_TYPE, WAYLAND_DISPLAY, DISPLAY, XDG_CURRENT_DESKTOP), IME (GTK_IM_MODULE, XMODIFIERS, QT_IM_MODULE), and Claude-specific overrides (CLAUDE_USE_WAYLAND, CLAUDE_TITLEBAR_STYLE, CLAUDE_GTK_IM_MODULE). Empty/unset values are emitted as `KEY=` (rather than omitted) so absence is unambiguous in bug reports. Pure observability — no behavior change. Closes #548 Co-Authored-By: Claude <claude@anthropic.com> * test: consolidate log_session_env BATS coverage (#548) Collapse the four log_session_env cases into two, and tighten the assertions in both: - Old test 1 (substring match per key) + old test 4 (block braces on their own lines) → one test using exact-line equality on the `lines` array. Locks block structure and per-key formatting in a single pass; substring matching could not catch a regression that re-ordered keys, dropped indentation, or merged lines. - Old test 2 (unset values are KEY=) + old test 3 (empty-string is KEY=) → one test covering both code paths. Exact-line match proves the value after `=` is truly empty; the previous `*'KEY='*` substring would have matched `KEY=value` and the old test-3 regex was fragile (depended on trailing newline being literal `$'\n'` vs end-of-string `$`). Net: 77 → 42 lines, 4 → 2 cases, stronger guarantees. No change to the helper itself or the call sites — issue #548 acceptance criteria still hold. --------- Co-authored-by: Claude <claude@anthropic.com>
2026-05-05 06:45:42 -04:00
do
log_message " $key=${!key:-}"
done
log_message '}'
}
# Detect display backend (Wayland vs X11)
# Sets: is_wayland, use_x11_on_wayland
detect_display_backend() {
# Detect if Wayland is running
is_wayland=false
[[ -n "${WAYLAND_DISPLAY:-}" ]] && is_wayland=true
# Default: Use X11/XWayland on Wayland for global hotkey support
# Set CLAUDE_USE_WAYLAND=1 to use native Wayland (global hotkeys disabled)
use_x11_on_wayland=true
[[ "${CLAUDE_USE_WAYLAND:-}" == '1' ]] && use_x11_on_wayland=false
# Fixes: #226 - Auto-detect compositors that require native Wayland
# Only Niri is auto-forced: it has no XWayland support.
# Sway and Hyprland have working XWayland, so users on those
# compositors who want native Wayland can set CLAUDE_USE_WAYLAND=1.
# XDG_CURRENT_DESKTOP can be colon-separated (e.g. "niri:GNOME");
# glob matching with *niri* handles this correctly.
if [[ $is_wayland == true && $use_x11_on_wayland == true ]]; then
local desktop="${XDG_CURRENT_DESKTOP:-}"
desktop="${desktop,,}"
if [[ -n "${NIRI_SOCKET:-}" || "$desktop" == *niri* ]]; then
log_message "Niri detected - forcing native Wayland"
use_x11_on_wayland=false
fi
fi
}
# Check if we have a valid display (not running from TTY)
# Returns: 0 if display available, 1 if not
check_display() {
[[ -n $DISPLAY || -n $WAYLAND_DISPLAY ]]
}
feat(linux): hybrid titlebar mode for clickable in-app topbar (#538) * feat(linux): hybrid titlebar mode for clickable in-app topbar Default `CLAUDE_TITLEBAR_STYLE` is now `hybrid`: native OS frame plus a BrowserView preload shim that convinces claude.ai's bundle to render its in-app topbar (hamburger / sidebar / search / nav / Cowork ghost). Stacked layout instead of Windows's combined bar, but every button is clickable. Why not the upstream `frame:false` + WCO config: investigation (see docs/learnings/linux-topbar-shim.md) ruled out `titleBarOverlay`, `titleBarStyle:'hidden'`, and the `.draggable` CSS class as the source of the topbar click-eating drag region. The remaining cause is a Chromium-level implicit drag region for `frame:false` windows that exists on both X11 and Wayland and has no Electron-API knob. With `frame:true` the OS handles dragging and Chromium pushes no drag-region map, so the buttons receive mouse events normally. Modes: - `hybrid` (default) — system frame + shim, topbar visible and clickable - `native` — system frame, no shim, no in-app topbar - `hidden` — frameless + WCO config, matches Windows/macOS upstream; topbar visible but not clickable on Linux. Kept for Wayland comparison and future investigation Tests: tests/launcher-common.bats grew 16 cases covering `_resolve_titlebar_style`, `build_electron_args` flag selection per mode, and `setup_electron_env` env-var wiring per mode. `claude-desktop --doctor` now reports the resolved mode and warns when `hidden` is set. Co-Authored-By: Claude <claude@anthropic.com> * docs(learnings): add hybrid-mode screenshot Visual reference of the stacked layout: DE-drawn titlebar on top with native window controls, claude.ai's in-app topbar (hamburger / search / back-forward) immediately below it. Co-Authored-By: Claude <claude@anthropic.com> * docs(learnings): fix codespell hit (Pre-emptive → Preemptive) Codespell flags hyphenated "Pre-emptive" as a misspelling of "Preemptive". Drops the hyphen to clear the spellcheck CI gate on PR #538. Co-Authored-By: Claude <claude@anthropic.com> --------- Co-authored-by: Claude <claude@anthropic.com>
2026-05-01 02:47:16 -04:00
# Resolve CLAUDE_TITLEBAR_STYLE to one of {hybrid,native,hidden},
# defaulting to 'hybrid' when unset or invalid. Echoed (not exported)
# so callers can branch on it without polluting the environment.
# 'hybrid' is the recommended Linux experience: native OS frame +
# in-app topbar via the wco-shim. 'hidden' is upstream's frameless
# WCO config; broken on Linux X11 (clicks unresponsive) but kept for
# Wayland/diagnostic comparison.
_resolve_titlebar_style() {
local raw="${CLAUDE_TITLEBAR_STYLE:-hybrid}"
raw="${raw,,}"
case "$raw" in
hybrid|hidden|native) echo "$raw" ;;
*) echo 'hybrid' ;;
esac
}
# Build Electron arguments array based on display backend
# Requires: is_wayland, use_x11_on_wayland to be set
# (call detect_display_backend first)
# Sets: electron_args array
# Arguments: $1 = "appimage" or "deb" (affects --no-sandbox behavior)
build_electron_args() {
local package_type="${1:-deb}"
electron_args=()
# AppImage always needs --no-sandbox due to FUSE constraints
[[ $package_type == 'appimage' ]] && electron_args+=('--no-sandbox')
feat(linux): hybrid titlebar mode for clickable in-app topbar (#538) * feat(linux): hybrid titlebar mode for clickable in-app topbar Default `CLAUDE_TITLEBAR_STYLE` is now `hybrid`: native OS frame plus a BrowserView preload shim that convinces claude.ai's bundle to render its in-app topbar (hamburger / sidebar / search / nav / Cowork ghost). Stacked layout instead of Windows's combined bar, but every button is clickable. Why not the upstream `frame:false` + WCO config: investigation (see docs/learnings/linux-topbar-shim.md) ruled out `titleBarOverlay`, `titleBarStyle:'hidden'`, and the `.draggable` CSS class as the source of the topbar click-eating drag region. The remaining cause is a Chromium-level implicit drag region for `frame:false` windows that exists on both X11 and Wayland and has no Electron-API knob. With `frame:true` the OS handles dragging and Chromium pushes no drag-region map, so the buttons receive mouse events normally. Modes: - `hybrid` (default) — system frame + shim, topbar visible and clickable - `native` — system frame, no shim, no in-app topbar - `hidden` — frameless + WCO config, matches Windows/macOS upstream; topbar visible but not clickable on Linux. Kept for Wayland comparison and future investigation Tests: tests/launcher-common.bats grew 16 cases covering `_resolve_titlebar_style`, `build_electron_args` flag selection per mode, and `setup_electron_env` env-var wiring per mode. `claude-desktop --doctor` now reports the resolved mode and warns when `hidden` is set. Co-Authored-By: Claude <claude@anthropic.com> * docs(learnings): add hybrid-mode screenshot Visual reference of the stacked layout: DE-drawn titlebar on top with native window controls, claude.ai's in-app topbar (hamburger / search / back-forward) immediately below it. Co-Authored-By: Claude <claude@anthropic.com> * docs(learnings): fix codespell hit (Pre-emptive → Preemptive) Codespell flags hyphenated "Pre-emptive" as a misspelling of "Preemptive". Drops the hyphen to clear the spellcheck CI gate on PR #538. Co-Authored-By: Claude <claude@anthropic.com> --------- Co-authored-by: Claude <claude@anthropic.com>
2026-05-01 02:47:16 -04:00
# CLAUDE_TITLEBAR_STYLE selects between:
# hybrid (default) / native: --disable-features=CustomTitlebar
# so Chromium's drawn CSD titlebar doesn't compete with
# the DE-drawn one. Both modes use frame:true.
# hidden: --enable-features=WindowControlsOverlay because WCO
# is off by default on Linux Chromium (Win/macOS have
# it on by default). Without this flag, titleBarOverlay
# is silently ignored at the page level.
local _tb
_tb=$(_resolve_titlebar_style)
if [[ $_tb == 'hidden' ]]; then
electron_args+=('--enable-features=WindowControlsOverlay')
else
electron_args+=('--disable-features=CustomTitlebar')
fi
# Remote XRDP sessions lack GPU acceleration and render a blank
# window when GPU compositing is enabled. Detect via XRDP_SESSION
# (set by xrdp's session init) and loginctl session Type. We do
# not probe xrdp-sesman via pgrep because that daemon also runs
# on hosts where the user is on a local (non-XRDP) session.
# Fixes: #319
local rdp_session_type=''
[[ -n ${XDG_SESSION_ID:-} ]] && rdp_session_type=$(
loginctl show-session "$XDG_SESSION_ID" \
-p Type --value 2>/dev/null
)
# Track GPU-disable decision so XRDP and CLAUDE_DISABLE_GPU don't
# stack duplicate flags. Either signal is sufficient.
local _disable_gpu=false
if [[ -n ${XRDP_SESSION:-} || $rdp_session_type == xrdp ]]; then
_disable_gpu=true
log_message 'XRDP session detected - GPU compositing disabled'
fi
# CLAUDE_DISABLE_GPU=1: opt-in workaround for users hitting the
# Chromium GPU process FATAL exhaustion (#583). The same upstream
# behaviour is reachable via Settings → disable hardware
# acceleration; this lets users persist it via the env without
# having to reach the Settings UI through repeated crashes.
if [[ ${CLAUDE_DISABLE_GPU:-} == '1' ]]; then
_disable_gpu=true
log_message 'CLAUDE_DISABLE_GPU=1 - hardware acceleration disabled'
fi
[[ $_disable_gpu == true ]] \
&& electron_args+=('--disable-gpu' '--disable-software-rasterizer')
# X11 session - no special flags needed
if [[ $is_wayland != true ]]; then
log_message 'X11 session detected'
return
fi
# Wayland: deb/nix packages need --no-sandbox in both modes
[[ $package_type == 'deb' || $package_type == 'nix' ]] \
&& electron_args+=('--no-sandbox')
if [[ $use_x11_on_wayland == true ]]; then
# Default: Use X11 via XWayland for global hotkey support
log_message 'Using X11 backend via XWayland (for global hotkey support)'
electron_args+=('--ozone-platform=x11')
else
# Native Wayland mode (user opted in via CLAUDE_USE_WAYLAND=1)
log_message 'Using native Wayland backend (global hotkeys may not work)'
electron_args+=('--enable-features=UseOzonePlatform,WaylandWindowDecorations')
electron_args+=('--ozone-platform=wayland')
electron_args+=('--enable-wayland-ime')
electron_args+=('--wayland-text-input-version=3')
# Override any system-wide GDK_BACKEND=x11 that would silently
# prevent GTK from connecting to the Wayland compositor, causing
# blurry rendering or launch failures on HiDPI displays.
export GDK_BACKEND=wayland
fi
}
# Kill orphaned cowork-vm-service daemon processes.
# After a crash or unclean shutdown the cowork daemon may outlive the
# main Electron UI process. The orphaned daemon holds LevelDB locks
fix: launcher-common.sh self-match and stale socket cleanup (#407) (#425) * fix: launcher-common.sh self-match and stale socket cleanup (#407) Three related bugs in scripts/launcher-common.sh that combine to break Claude Desktop startup after any crash that reparents the cowork daemon on Debian/Ubuntu/Mint systems. 1. cleanup_orphaned_cowork_daemon — the old pgrep pattern 'claude-desktop' self-matches the launcher's own bash process (cmdline `bash /usr/bin/claude-desktop`), causing the function to return early on every invocation. The SIGTERM loop never runs. Replaced with `pgrep -f 'app\.asar'` plus $$/$PPID exclusion, --type= filter (skips chromium helpers), and /proc/*/status check (skips stopped/zombie launcher bashes). Added SIGKILL escalation after ~2s so cleanup_stale_cowork_socket reliably sees no daemon. 2. cleanup_stale_cowork_socket — the old implementation required socat (not preinstalled on Debian/Ubuntu/Mint) and fell through to a find -mmin +1440 check that ignored any socket younger than 24h. Rewritten to use the ordering invariant: cleanup_orphaned_cowork_daemon runs first and kills any orphan, so at this point an extant daemon proves the socket is live and an absent daemon proves the socket is stale. No socat dependency. 3. run_doctor orphan check — same self-match flaw as (1). claude-desktop --doctor reported [PASS] Cowork daemon: running (parent alive) on systems with a genuine orphan, actively misleading users trying to diagnose this failure. Applied the same detection primitive as (1). Complements #410 (daemon-side crash recovery): #410 reduces how often orphans are created; this ensures the launcher actually cleans them up when they are. Fixes #407 Co-Authored-By: martin152 <martin152@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: credit martin152 in Acknowledgments for #407 launcher fix Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * style: quote RHS of $$/$PPID comparisons (SC2053) Shellcheck SC2053: quote RHS in [[ ]] equality tests to prevent glob matching. No behavior change — $$ and $PPID are always numeric PIDs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: martin152 <martin152@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 19:28:22 -05:00
# in ~/.config/Claude/Local Storage/ AND keeps the Unix socket at
# $XDG_RUNTIME_DIR/cowork-vm-service.sock bound, which causes a new
# launch to either silently quit (LevelDB) or connect to the stale
# daemon (socket) and hang with a blank window.
# Must run BEFORE cleanup_stale_lock / cleanup_stale_cowork_socket
# so that stale files left behind by the daemon can be cleaned up.
cleanup_orphaned_cowork_daemon() {
local cowork_pids
cowork_pids=$(pgrep -f 'cowork-vm-service\.js' 2>/dev/null) \
|| return 0
fix: launcher-common.sh self-match and stale socket cleanup (#407) (#425) * fix: launcher-common.sh self-match and stale socket cleanup (#407) Three related bugs in scripts/launcher-common.sh that combine to break Claude Desktop startup after any crash that reparents the cowork daemon on Debian/Ubuntu/Mint systems. 1. cleanup_orphaned_cowork_daemon — the old pgrep pattern 'claude-desktop' self-matches the launcher's own bash process (cmdline `bash /usr/bin/claude-desktop`), causing the function to return early on every invocation. The SIGTERM loop never runs. Replaced with `pgrep -f 'app\.asar'` plus $$/$PPID exclusion, --type= filter (skips chromium helpers), and /proc/*/status check (skips stopped/zombie launcher bashes). Added SIGKILL escalation after ~2s so cleanup_stale_cowork_socket reliably sees no daemon. 2. cleanup_stale_cowork_socket — the old implementation required socat (not preinstalled on Debian/Ubuntu/Mint) and fell through to a find -mmin +1440 check that ignored any socket younger than 24h. Rewritten to use the ordering invariant: cleanup_orphaned_cowork_daemon runs first and kills any orphan, so at this point an extant daemon proves the socket is live and an absent daemon proves the socket is stale. No socat dependency. 3. run_doctor orphan check — same self-match flaw as (1). claude-desktop --doctor reported [PASS] Cowork daemon: running (parent alive) on systems with a genuine orphan, actively misleading users trying to diagnose this failure. Applied the same detection primitive as (1). Complements #410 (daemon-side crash recovery): #410 reduces how often orphans are created; this ensures the launcher actually cleans them up when they are. Fixes #407 Co-Authored-By: martin152 <martin152@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: credit martin152 in Acknowledgments for #407 launcher fix Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * style: quote RHS of $$/$PPID comparisons (SC2053) Shellcheck SC2053: quote RHS in [[ ]] equality tests to prevent glob matching. No behavior change — $$ and $PPID are always numeric PIDs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: martin152 <martin152@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 19:28:22 -05:00
# Check if a live Claude Desktop UI process is also running.
#
# We can NOT use `pgrep -f 'claude-desktop'` on its own for this:
# it matches the launcher's own bash process (this script's
# cmdline contains "/usr/bin/claude-desktop"), any stale launcher
# bash left stopped/zombie after a previous crash, and the cowork
# daemon itself. Counting any of those as "the UI is alive"
# causes a false negative and the orphan survives.
#
# The reliable definition of "UI is alive" is: an Electron main
# process whose cmdline references app.asar and is NOT a Chromium
# helper (--type=...) and NOT the cowork daemon, and is actually
# runnable (not stopped/zombie).
local pid cmdline state
for pid in $(pgrep -f 'app\.asar' 2>/dev/null); do
# Skip our own launcher bash and its parent.
[[ $pid == "$$" || $pid == "$PPID" ]] && continue
cmdline=$(tr '\0' ' ' < "/proc/$pid/cmdline" 2>/dev/null) \
|| continue
fix: launcher-common.sh self-match and stale socket cleanup (#407) (#425) * fix: launcher-common.sh self-match and stale socket cleanup (#407) Three related bugs in scripts/launcher-common.sh that combine to break Claude Desktop startup after any crash that reparents the cowork daemon on Debian/Ubuntu/Mint systems. 1. cleanup_orphaned_cowork_daemon — the old pgrep pattern 'claude-desktop' self-matches the launcher's own bash process (cmdline `bash /usr/bin/claude-desktop`), causing the function to return early on every invocation. The SIGTERM loop never runs. Replaced with `pgrep -f 'app\.asar'` plus $$/$PPID exclusion, --type= filter (skips chromium helpers), and /proc/*/status check (skips stopped/zombie launcher bashes). Added SIGKILL escalation after ~2s so cleanup_stale_cowork_socket reliably sees no daemon. 2. cleanup_stale_cowork_socket — the old implementation required socat (not preinstalled on Debian/Ubuntu/Mint) and fell through to a find -mmin +1440 check that ignored any socket younger than 24h. Rewritten to use the ordering invariant: cleanup_orphaned_cowork_daemon runs first and kills any orphan, so at this point an extant daemon proves the socket is live and an absent daemon proves the socket is stale. No socat dependency. 3. run_doctor orphan check — same self-match flaw as (1). claude-desktop --doctor reported [PASS] Cowork daemon: running (parent alive) on systems with a genuine orphan, actively misleading users trying to diagnose this failure. Applied the same detection primitive as (1). Complements #410 (daemon-side crash recovery): #410 reduces how often orphans are created; this ensures the launcher actually cleans them up when they are. Fixes #407 Co-Authored-By: martin152 <martin152@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: credit martin152 in Acknowledgments for #407 launcher fix Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * style: quote RHS of $$/$PPID comparisons (SC2053) Shellcheck SC2053: quote RHS in [[ ]] equality tests to prevent glob matching. No behavior change — $$ and $PPID are always numeric PIDs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: martin152 <martin152@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 19:28:22 -05:00
# Skip the cowork daemon (matches app.asar.unpacked path).
[[ $cmdline == *cowork-vm-service* ]] && continue
fix: launcher-common.sh self-match and stale socket cleanup (#407) (#425) * fix: launcher-common.sh self-match and stale socket cleanup (#407) Three related bugs in scripts/launcher-common.sh that combine to break Claude Desktop startup after any crash that reparents the cowork daemon on Debian/Ubuntu/Mint systems. 1. cleanup_orphaned_cowork_daemon — the old pgrep pattern 'claude-desktop' self-matches the launcher's own bash process (cmdline `bash /usr/bin/claude-desktop`), causing the function to return early on every invocation. The SIGTERM loop never runs. Replaced with `pgrep -f 'app\.asar'` plus $$/$PPID exclusion, --type= filter (skips chromium helpers), and /proc/*/status check (skips stopped/zombie launcher bashes). Added SIGKILL escalation after ~2s so cleanup_stale_cowork_socket reliably sees no daemon. 2. cleanup_stale_cowork_socket — the old implementation required socat (not preinstalled on Debian/Ubuntu/Mint) and fell through to a find -mmin +1440 check that ignored any socket younger than 24h. Rewritten to use the ordering invariant: cleanup_orphaned_cowork_daemon runs first and kills any orphan, so at this point an extant daemon proves the socket is live and an absent daemon proves the socket is stale. No socat dependency. 3. run_doctor orphan check — same self-match flaw as (1). claude-desktop --doctor reported [PASS] Cowork daemon: running (parent alive) on systems with a genuine orphan, actively misleading users trying to diagnose this failure. Applied the same detection primitive as (1). Complements #410 (daemon-side crash recovery): #410 reduces how often orphans are created; this ensures the launcher actually cleans them up when they are. Fixes #407 Co-Authored-By: martin152 <martin152@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: credit martin152 in Acknowledgments for #407 launcher fix Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * style: quote RHS of $$/$PPID comparisons (SC2053) Shellcheck SC2053: quote RHS in [[ ]] equality tests to prevent glob matching. No behavior change — $$ and $PPID are always numeric PIDs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: martin152 <martin152@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 19:28:22 -05:00
# Skip Chromium helpers: zygote, renderer, gpu, utility, etc.
[[ $cmdline == *--type=* ]] && continue
# Skip stopped (T/t) and zombie (Z) processes — not a live UI.
state=$(awk '/^State:/ {print $2; exit}' \
"/proc/$pid/status" 2>/dev/null) || continue
[[ $state == T || $state == t || $state == Z ]] && continue
# Found a genuine live Electron UI — daemon is expected
return 0
done
fix: launcher-common.sh self-match and stale socket cleanup (#407) (#425) * fix: launcher-common.sh self-match and stale socket cleanup (#407) Three related bugs in scripts/launcher-common.sh that combine to break Claude Desktop startup after any crash that reparents the cowork daemon on Debian/Ubuntu/Mint systems. 1. cleanup_orphaned_cowork_daemon — the old pgrep pattern 'claude-desktop' self-matches the launcher's own bash process (cmdline `bash /usr/bin/claude-desktop`), causing the function to return early on every invocation. The SIGTERM loop never runs. Replaced with `pgrep -f 'app\.asar'` plus $$/$PPID exclusion, --type= filter (skips chromium helpers), and /proc/*/status check (skips stopped/zombie launcher bashes). Added SIGKILL escalation after ~2s so cleanup_stale_cowork_socket reliably sees no daemon. 2. cleanup_stale_cowork_socket — the old implementation required socat (not preinstalled on Debian/Ubuntu/Mint) and fell through to a find -mmin +1440 check that ignored any socket younger than 24h. Rewritten to use the ordering invariant: cleanup_orphaned_cowork_daemon runs first and kills any orphan, so at this point an extant daemon proves the socket is live and an absent daemon proves the socket is stale. No socat dependency. 3. run_doctor orphan check — same self-match flaw as (1). claude-desktop --doctor reported [PASS] Cowork daemon: running (parent alive) on systems with a genuine orphan, actively misleading users trying to diagnose this failure. Applied the same detection primitive as (1). Complements #410 (daemon-side crash recovery): #410 reduces how often orphans are created; this ensures the launcher actually cleans them up when they are. Fixes #407 Co-Authored-By: martin152 <martin152@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: credit martin152 in Acknowledgments for #407 launcher fix Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * style: quote RHS of $$/$PPID comparisons (SC2053) Shellcheck SC2053: quote RHS in [[ ]] equality tests to prevent glob matching. No behavior change — $$ and $PPID are always numeric PIDs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: martin152 <martin152@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 19:28:22 -05:00
# No UI process found — daemon is orphaned, terminate it.
# Escalate to SIGKILL if a daemon is stuck and does not exit
# after SIGTERM within ~2s, so cleanup_stale_cowork_socket
# (which runs next) reliably sees no daemon.
for pid in $cowork_pids; do
kill "$pid" 2>/dev/null || true
done
fix: launcher-common.sh self-match and stale socket cleanup (#407) (#425) * fix: launcher-common.sh self-match and stale socket cleanup (#407) Three related bugs in scripts/launcher-common.sh that combine to break Claude Desktop startup after any crash that reparents the cowork daemon on Debian/Ubuntu/Mint systems. 1. cleanup_orphaned_cowork_daemon — the old pgrep pattern 'claude-desktop' self-matches the launcher's own bash process (cmdline `bash /usr/bin/claude-desktop`), causing the function to return early on every invocation. The SIGTERM loop never runs. Replaced with `pgrep -f 'app\.asar'` plus $$/$PPID exclusion, --type= filter (skips chromium helpers), and /proc/*/status check (skips stopped/zombie launcher bashes). Added SIGKILL escalation after ~2s so cleanup_stale_cowork_socket reliably sees no daemon. 2. cleanup_stale_cowork_socket — the old implementation required socat (not preinstalled on Debian/Ubuntu/Mint) and fell through to a find -mmin +1440 check that ignored any socket younger than 24h. Rewritten to use the ordering invariant: cleanup_orphaned_cowork_daemon runs first and kills any orphan, so at this point an extant daemon proves the socket is live and an absent daemon proves the socket is stale. No socat dependency. 3. run_doctor orphan check — same self-match flaw as (1). claude-desktop --doctor reported [PASS] Cowork daemon: running (parent alive) on systems with a genuine orphan, actively misleading users trying to diagnose this failure. Applied the same detection primitive as (1). Complements #410 (daemon-side crash recovery): #410 reduces how often orphans are created; this ensures the launcher actually cleans them up when they are. Fixes #407 Co-Authored-By: martin152 <martin152@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: credit martin152 in Acknowledgments for #407 launcher fix Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * style: quote RHS of $$/$PPID comparisons (SC2053) Shellcheck SC2053: quote RHS in [[ ]] equality tests to prevent glob matching. No behavior change — $$ and $PPID are always numeric PIDs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: martin152 <martin152@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 19:28:22 -05:00
local _wait=0
while ((_wait < 20)); do
pgrep -f 'cowork-vm-service\.js' &>/dev/null || break
sleep 0.1
((_wait++))
done
if pgrep -f 'cowork-vm-service\.js' &>/dev/null; then
for pid in $cowork_pids; do
kill -KILL "$pid" 2>/dev/null || true
done
log_message "Killed orphaned cowork-vm-service daemon (SIGKILL, PIDs: $cowork_pids)"
else
log_message "Killed orphaned cowork-vm-service daemon (PIDs: $cowork_pids)"
fi
}
# Clean up stale SingletonLock if the owning process is no longer running.
# Electron uses requestSingleInstanceLock() which silently quits if the lock
# is held. A stale lock (from a crash or unclean update) blocks all launches
# with no user-facing error message.
# The lock is a symlink whose target is "hostname-PID".
cleanup_stale_lock() {
local config_dir="${XDG_CONFIG_HOME:-$HOME/.config}/Claude"
local lock_file="$config_dir/SingletonLock"
[[ -L $lock_file ]] || return 0
local lock_target
lock_target="$(readlink "$lock_file" 2>/dev/null)" || return 0
local lock_pid="${lock_target##*-}"
# Validate that we extracted a numeric PID
[[ $lock_pid =~ ^[0-9]+$ ]] || return 0
if kill -0 "$lock_pid" 2>/dev/null; then
# Process is still running — lock is valid
return 0
fi
rm -f "$lock_file"
log_message "Removed stale SingletonLock (PID $lock_pid no longer running)"
}
feat: KVM/bwrap isolation backends for cowork mode (#269) * feat: add KVM/bwrap isolation backends for cowork mode Refactor cowork-vm-service.js from monolithic VMManager into a pluggable backend architecture with three isolation levels: - HostBackend: direct execution on host (existing Phase 1 behavior) - BwrapBackend: bubblewrap namespace sandbox (PID/mount isolation) - KvmBackend: full QEMU/KVM VM with vsock, virtiofs, QMP monitor Backend auto-detected at startup (KVM > bwrap > host) or overridden via COWORK_VM_BACKEND env var. Shared helpers extracted for env filtering, arg cleanup, command resolution, and work dir handling. Also: - Add Cowork Mode section to --doctor diagnostics with per-tool checks and distro-specific install hints - Update Patch 4 to extract real win32 file entries for linux bundle manifest (enables app download infrastructure) - Update handover documentation for Phase 2/3 architecture Co-Authored-By: Claude <claude@anthropic.com> * fix: correct KvmBackend vsock port, direction, and kernel cmdline The guest sdk-daemon connects TO the host (CID=2), not the other way around. Confirmed via disassembly of the guest binary: the vsock port is 51234 (0xC822), matching the Hyper-V GUID in the Windows service. - Change VSOCK_GUEST_PORT from 2222 to 51234 - Reverse socat bridge: VSOCK-LISTEN (host listens) instead of VSOCK-CONNECT (host connecting to guest) - Add bridge server to accept persistent guest connection and route events/responses via callback map - Fix kernel cmdline: root=LABEL=cloudimg-rootfs (matches fstab) Co-Authored-By: Claude <claude@anthropic.com> * fix: patch VM download to use disk-backed temp dir on Linux Linux systems often mount /tmp as a small tmpfs (3-4GB). The VM rootfs download decompresses to ~9GB, causing ENOSPC. Patch the app's mkdtemp("wvm-") call to use the bundle directory (on real disk) instead of os.tmpdir() on Linux. Uses regex-based dynamic variable extraction to remain version-agnostic across minified code changes. Co-Authored-By: Claude <claude@anthropic.com> * fix: handle stale cowork socket (ECONNREFUSED) on Linux Stale sockets from previous sessions give ECONNREFUSED instead of ENOENT, bypassing the retry loop and auto-launch entirely. Fix: - Expand ENOENT check in retry loop to include ECONNREFUSED on Linux - Add cleanup_stale_cowork_socket() to launcher scripts (all formats) that removes dead sockets before Electron starts - Increase tmpdir patch search window from 1000 to 2000 chars Co-Authored-By: Claude <claude@anthropic.com> * fix: preserve DNS resolution inside bwrap sandbox On systems using systemd-resolved, /etc/resolv.conf is a symlink to /run/systemd/resolve/stub-resolv.conf. The bwrap --tmpfs /run option wiped this out, breaking DNS resolution inside the sandbox and preventing the spawned Claude process from reaching the API. Bind-mount the resolved /run/systemd/resolve/ directory back into the sandbox as read-only to restore DNS. Co-Authored-By: Claude <claude@anthropic.com> * fix: harden cowork isolation and build patches - Remove broken _setupEventForwarding (events already flow through _handleGuestData); the second bridge connection was silently ignored - Mount $HOME read-only in bwrap sandbox; only workDir and explicit mounts are writable (prevents writes to ~/.ssh, ~/.gnupg, etc.) - Scope Patch 4 win32 extraction to actual win32:{} block via brace counting to avoid crossing into darwin/linux sections - Set _qmpAvailable flag on QMP timeout instead of silently continuing - Wrap CID allocation at 65535 to prevent unbounded growth - Use execFileSync instead of execSync('which ...') in detectBackend - Coerce response ID to String for Map lookup in _handleGuestData - Use non-greedy [\s\S]*? in Patch 6 regex for nested brace robustness - Update patch count threshold from 4 to 5 after adding Patch 8 - Add age-based fallback for stale socket cleanup when socat is missing - Use indexOf-based splice in Patch 8 instead of string.replace() - Extract shared resolveSdkBinary helper to deduplicate SDK resolution - Remove dead retryFuncRe/retryFuncMatch variables from Patch 6 Co-Authored-By: Claude <claude@anthropic.com> * fix: address security and correctness issues from code review - Replace execSync string interpolation with execFileSync for qemu-img calls to eliminate shell injection risk - Add path validation to readFile in both LocalBackend and KvmBackend to restrict reads to within the user's home directory - Fix QMP _sendQmpCommand timer leak by clearing timeout on success - Fix _pendingCallbacks.delete() to use String(msg.id) matching the String(msg.id) used in the .get() lookup - Extract FORWARDED_EVENTS constant, cleanup helper, extractBlock helper, and consolidate doctor tool checks (from simplifier pass) Co-Authored-By: Claude <claude@anthropic.com> * docs: update README cowork notice with isolation backends and doctor info Co-Authored-By: Claude <claude@anthropic.com> --------- Co-authored-by: Claude <claude@anthropic.com>
2026-02-28 21:13:09 -05:00
# Clean up stale cowork-vm-service socket if no daemon is listening.
# The service daemon creates a Unix socket at
# $XDG_RUNTIME_DIR/cowork-vm-service.sock. After a crash or unclean
# shutdown, the socket file persists but nothing is listening, causing
# ECONNREFUSED instead of ENOENT when the app tries to connect.
fix: launcher-common.sh self-match and stale socket cleanup (#407) (#425) * fix: launcher-common.sh self-match and stale socket cleanup (#407) Three related bugs in scripts/launcher-common.sh that combine to break Claude Desktop startup after any crash that reparents the cowork daemon on Debian/Ubuntu/Mint systems. 1. cleanup_orphaned_cowork_daemon — the old pgrep pattern 'claude-desktop' self-matches the launcher's own bash process (cmdline `bash /usr/bin/claude-desktop`), causing the function to return early on every invocation. The SIGTERM loop never runs. Replaced with `pgrep -f 'app\.asar'` plus $$/$PPID exclusion, --type= filter (skips chromium helpers), and /proc/*/status check (skips stopped/zombie launcher bashes). Added SIGKILL escalation after ~2s so cleanup_stale_cowork_socket reliably sees no daemon. 2. cleanup_stale_cowork_socket — the old implementation required socat (not preinstalled on Debian/Ubuntu/Mint) and fell through to a find -mmin +1440 check that ignored any socket younger than 24h. Rewritten to use the ordering invariant: cleanup_orphaned_cowork_daemon runs first and kills any orphan, so at this point an extant daemon proves the socket is live and an absent daemon proves the socket is stale. No socat dependency. 3. run_doctor orphan check — same self-match flaw as (1). claude-desktop --doctor reported [PASS] Cowork daemon: running (parent alive) on systems with a genuine orphan, actively misleading users trying to diagnose this failure. Applied the same detection primitive as (1). Complements #410 (daemon-side crash recovery): #410 reduces how often orphans are created; this ensures the launcher actually cleans them up when they are. Fixes #407 Co-Authored-By: martin152 <martin152@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: credit martin152 in Acknowledgments for #407 launcher fix Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * style: quote RHS of $$/$PPID comparisons (SC2053) Shellcheck SC2053: quote RHS in [[ ]] equality tests to prevent glob matching. No behavior change — $$ and $PPID are always numeric PIDs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: martin152 <martin152@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 19:28:22 -05:00
#
# NOTE: this function MUST run after cleanup_orphaned_cowork_daemon,
# which is responsible for killing any orphaned daemon. Given that
# ordering, the presence of a live daemon proves the socket is in
# use; the absence of a daemon proves the socket is stale.
# We use that invariant directly instead of depending on socat (not
# shipped by default on Debian/Ubuntu) or an age heuristic (the old
# 24h fallback effectively disabled the cleanup for any recent
# crash).
feat: KVM/bwrap isolation backends for cowork mode (#269) * feat: add KVM/bwrap isolation backends for cowork mode Refactor cowork-vm-service.js from monolithic VMManager into a pluggable backend architecture with three isolation levels: - HostBackend: direct execution on host (existing Phase 1 behavior) - BwrapBackend: bubblewrap namespace sandbox (PID/mount isolation) - KvmBackend: full QEMU/KVM VM with vsock, virtiofs, QMP monitor Backend auto-detected at startup (KVM > bwrap > host) or overridden via COWORK_VM_BACKEND env var. Shared helpers extracted for env filtering, arg cleanup, command resolution, and work dir handling. Also: - Add Cowork Mode section to --doctor diagnostics with per-tool checks and distro-specific install hints - Update Patch 4 to extract real win32 file entries for linux bundle manifest (enables app download infrastructure) - Update handover documentation for Phase 2/3 architecture Co-Authored-By: Claude <claude@anthropic.com> * fix: correct KvmBackend vsock port, direction, and kernel cmdline The guest sdk-daemon connects TO the host (CID=2), not the other way around. Confirmed via disassembly of the guest binary: the vsock port is 51234 (0xC822), matching the Hyper-V GUID in the Windows service. - Change VSOCK_GUEST_PORT from 2222 to 51234 - Reverse socat bridge: VSOCK-LISTEN (host listens) instead of VSOCK-CONNECT (host connecting to guest) - Add bridge server to accept persistent guest connection and route events/responses via callback map - Fix kernel cmdline: root=LABEL=cloudimg-rootfs (matches fstab) Co-Authored-By: Claude <claude@anthropic.com> * fix: patch VM download to use disk-backed temp dir on Linux Linux systems often mount /tmp as a small tmpfs (3-4GB). The VM rootfs download decompresses to ~9GB, causing ENOSPC. Patch the app's mkdtemp("wvm-") call to use the bundle directory (on real disk) instead of os.tmpdir() on Linux. Uses regex-based dynamic variable extraction to remain version-agnostic across minified code changes. Co-Authored-By: Claude <claude@anthropic.com> * fix: handle stale cowork socket (ECONNREFUSED) on Linux Stale sockets from previous sessions give ECONNREFUSED instead of ENOENT, bypassing the retry loop and auto-launch entirely. Fix: - Expand ENOENT check in retry loop to include ECONNREFUSED on Linux - Add cleanup_stale_cowork_socket() to launcher scripts (all formats) that removes dead sockets before Electron starts - Increase tmpdir patch search window from 1000 to 2000 chars Co-Authored-By: Claude <claude@anthropic.com> * fix: preserve DNS resolution inside bwrap sandbox On systems using systemd-resolved, /etc/resolv.conf is a symlink to /run/systemd/resolve/stub-resolv.conf. The bwrap --tmpfs /run option wiped this out, breaking DNS resolution inside the sandbox and preventing the spawned Claude process from reaching the API. Bind-mount the resolved /run/systemd/resolve/ directory back into the sandbox as read-only to restore DNS. Co-Authored-By: Claude <claude@anthropic.com> * fix: harden cowork isolation and build patches - Remove broken _setupEventForwarding (events already flow through _handleGuestData); the second bridge connection was silently ignored - Mount $HOME read-only in bwrap sandbox; only workDir and explicit mounts are writable (prevents writes to ~/.ssh, ~/.gnupg, etc.) - Scope Patch 4 win32 extraction to actual win32:{} block via brace counting to avoid crossing into darwin/linux sections - Set _qmpAvailable flag on QMP timeout instead of silently continuing - Wrap CID allocation at 65535 to prevent unbounded growth - Use execFileSync instead of execSync('which ...') in detectBackend - Coerce response ID to String for Map lookup in _handleGuestData - Use non-greedy [\s\S]*? in Patch 6 regex for nested brace robustness - Update patch count threshold from 4 to 5 after adding Patch 8 - Add age-based fallback for stale socket cleanup when socat is missing - Use indexOf-based splice in Patch 8 instead of string.replace() - Extract shared resolveSdkBinary helper to deduplicate SDK resolution - Remove dead retryFuncRe/retryFuncMatch variables from Patch 6 Co-Authored-By: Claude <claude@anthropic.com> * fix: address security and correctness issues from code review - Replace execSync string interpolation with execFileSync for qemu-img calls to eliminate shell injection risk - Add path validation to readFile in both LocalBackend and KvmBackend to restrict reads to within the user's home directory - Fix QMP _sendQmpCommand timer leak by clearing timeout on success - Fix _pendingCallbacks.delete() to use String(msg.id) matching the String(msg.id) used in the .get() lookup - Extract FORWARDED_EVENTS constant, cleanup helper, extractBlock helper, and consolidate doctor tool checks (from simplifier pass) Co-Authored-By: Claude <claude@anthropic.com> * docs: update README cowork notice with isolation backends and doctor info Co-Authored-By: Claude <claude@anthropic.com> --------- Co-authored-by: Claude <claude@anthropic.com>
2026-02-28 21:13:09 -05:00
cleanup_stale_cowork_socket() {
local sock="${XDG_RUNTIME_DIR:-/tmp}/cowork-vm-service.sock"
[[ -S $sock ]] || return 0
fix: launcher-common.sh self-match and stale socket cleanup (#407) (#425) * fix: launcher-common.sh self-match and stale socket cleanup (#407) Three related bugs in scripts/launcher-common.sh that combine to break Claude Desktop startup after any crash that reparents the cowork daemon on Debian/Ubuntu/Mint systems. 1. cleanup_orphaned_cowork_daemon — the old pgrep pattern 'claude-desktop' self-matches the launcher's own bash process (cmdline `bash /usr/bin/claude-desktop`), causing the function to return early on every invocation. The SIGTERM loop never runs. Replaced with `pgrep -f 'app\.asar'` plus $$/$PPID exclusion, --type= filter (skips chromium helpers), and /proc/*/status check (skips stopped/zombie launcher bashes). Added SIGKILL escalation after ~2s so cleanup_stale_cowork_socket reliably sees no daemon. 2. cleanup_stale_cowork_socket — the old implementation required socat (not preinstalled on Debian/Ubuntu/Mint) and fell through to a find -mmin +1440 check that ignored any socket younger than 24h. Rewritten to use the ordering invariant: cleanup_orphaned_cowork_daemon runs first and kills any orphan, so at this point an extant daemon proves the socket is live and an absent daemon proves the socket is stale. No socat dependency. 3. run_doctor orphan check — same self-match flaw as (1). claude-desktop --doctor reported [PASS] Cowork daemon: running (parent alive) on systems with a genuine orphan, actively misleading users trying to diagnose this failure. Applied the same detection primitive as (1). Complements #410 (daemon-side crash recovery): #410 reduces how often orphans are created; this ensures the launcher actually cleans them up when they are. Fixes #407 Co-Authored-By: martin152 <martin152@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: credit martin152 in Acknowledgments for #407 launcher fix Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * style: quote RHS of $$/$PPID comparisons (SC2053) Shellcheck SC2053: quote RHS in [[ ]] equality tests to prevent glob matching. No behavior change — $$ and $PPID are always numeric PIDs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: martin152 <martin152@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 19:28:22 -05:00
# If a cowork daemon is alive, it owns this socket; leave it.
# cleanup_orphaned_cowork_daemon has already run and removed any
# orphan (with SIGKILL escalation), so anything still alive here
# is a non-orphaned, live daemon.
if pgrep -f 'cowork-vm-service\.js' &>/dev/null; then
return 0
feat: KVM/bwrap isolation backends for cowork mode (#269) * feat: add KVM/bwrap isolation backends for cowork mode Refactor cowork-vm-service.js from monolithic VMManager into a pluggable backend architecture with three isolation levels: - HostBackend: direct execution on host (existing Phase 1 behavior) - BwrapBackend: bubblewrap namespace sandbox (PID/mount isolation) - KvmBackend: full QEMU/KVM VM with vsock, virtiofs, QMP monitor Backend auto-detected at startup (KVM > bwrap > host) or overridden via COWORK_VM_BACKEND env var. Shared helpers extracted for env filtering, arg cleanup, command resolution, and work dir handling. Also: - Add Cowork Mode section to --doctor diagnostics with per-tool checks and distro-specific install hints - Update Patch 4 to extract real win32 file entries for linux bundle manifest (enables app download infrastructure) - Update handover documentation for Phase 2/3 architecture Co-Authored-By: Claude <claude@anthropic.com> * fix: correct KvmBackend vsock port, direction, and kernel cmdline The guest sdk-daemon connects TO the host (CID=2), not the other way around. Confirmed via disassembly of the guest binary: the vsock port is 51234 (0xC822), matching the Hyper-V GUID in the Windows service. - Change VSOCK_GUEST_PORT from 2222 to 51234 - Reverse socat bridge: VSOCK-LISTEN (host listens) instead of VSOCK-CONNECT (host connecting to guest) - Add bridge server to accept persistent guest connection and route events/responses via callback map - Fix kernel cmdline: root=LABEL=cloudimg-rootfs (matches fstab) Co-Authored-By: Claude <claude@anthropic.com> * fix: patch VM download to use disk-backed temp dir on Linux Linux systems often mount /tmp as a small tmpfs (3-4GB). The VM rootfs download decompresses to ~9GB, causing ENOSPC. Patch the app's mkdtemp("wvm-") call to use the bundle directory (on real disk) instead of os.tmpdir() on Linux. Uses regex-based dynamic variable extraction to remain version-agnostic across minified code changes. Co-Authored-By: Claude <claude@anthropic.com> * fix: handle stale cowork socket (ECONNREFUSED) on Linux Stale sockets from previous sessions give ECONNREFUSED instead of ENOENT, bypassing the retry loop and auto-launch entirely. Fix: - Expand ENOENT check in retry loop to include ECONNREFUSED on Linux - Add cleanup_stale_cowork_socket() to launcher scripts (all formats) that removes dead sockets before Electron starts - Increase tmpdir patch search window from 1000 to 2000 chars Co-Authored-By: Claude <claude@anthropic.com> * fix: preserve DNS resolution inside bwrap sandbox On systems using systemd-resolved, /etc/resolv.conf is a symlink to /run/systemd/resolve/stub-resolv.conf. The bwrap --tmpfs /run option wiped this out, breaking DNS resolution inside the sandbox and preventing the spawned Claude process from reaching the API. Bind-mount the resolved /run/systemd/resolve/ directory back into the sandbox as read-only to restore DNS. Co-Authored-By: Claude <claude@anthropic.com> * fix: harden cowork isolation and build patches - Remove broken _setupEventForwarding (events already flow through _handleGuestData); the second bridge connection was silently ignored - Mount $HOME read-only in bwrap sandbox; only workDir and explicit mounts are writable (prevents writes to ~/.ssh, ~/.gnupg, etc.) - Scope Patch 4 win32 extraction to actual win32:{} block via brace counting to avoid crossing into darwin/linux sections - Set _qmpAvailable flag on QMP timeout instead of silently continuing - Wrap CID allocation at 65535 to prevent unbounded growth - Use execFileSync instead of execSync('which ...') in detectBackend - Coerce response ID to String for Map lookup in _handleGuestData - Use non-greedy [\s\S]*? in Patch 6 regex for nested brace robustness - Update patch count threshold from 4 to 5 after adding Patch 8 - Add age-based fallback for stale socket cleanup when socat is missing - Use indexOf-based splice in Patch 8 instead of string.replace() - Extract shared resolveSdkBinary helper to deduplicate SDK resolution - Remove dead retryFuncRe/retryFuncMatch variables from Patch 6 Co-Authored-By: Claude <claude@anthropic.com> * fix: address security and correctness issues from code review - Replace execSync string interpolation with execFileSync for qemu-img calls to eliminate shell injection risk - Add path validation to readFile in both LocalBackend and KvmBackend to restrict reads to within the user's home directory - Fix QMP _sendQmpCommand timer leak by clearing timeout on success - Fix _pendingCallbacks.delete() to use String(msg.id) matching the String(msg.id) used in the .get() lookup - Extract FORWARDED_EVENTS constant, cleanup helper, extractBlock helper, and consolidate doctor tool checks (from simplifier pass) Co-Authored-By: Claude <claude@anthropic.com> * docs: update README cowork notice with isolation backends and doctor info Co-Authored-By: Claude <claude@anthropic.com> --------- Co-authored-by: Claude <claude@anthropic.com>
2026-02-28 21:13:09 -05:00
fi
fix: launcher-common.sh self-match and stale socket cleanup (#407) (#425) * fix: launcher-common.sh self-match and stale socket cleanup (#407) Three related bugs in scripts/launcher-common.sh that combine to break Claude Desktop startup after any crash that reparents the cowork daemon on Debian/Ubuntu/Mint systems. 1. cleanup_orphaned_cowork_daemon — the old pgrep pattern 'claude-desktop' self-matches the launcher's own bash process (cmdline `bash /usr/bin/claude-desktop`), causing the function to return early on every invocation. The SIGTERM loop never runs. Replaced with `pgrep -f 'app\.asar'` plus $$/$PPID exclusion, --type= filter (skips chromium helpers), and /proc/*/status check (skips stopped/zombie launcher bashes). Added SIGKILL escalation after ~2s so cleanup_stale_cowork_socket reliably sees no daemon. 2. cleanup_stale_cowork_socket — the old implementation required socat (not preinstalled on Debian/Ubuntu/Mint) and fell through to a find -mmin +1440 check that ignored any socket younger than 24h. Rewritten to use the ordering invariant: cleanup_orphaned_cowork_daemon runs first and kills any orphan, so at this point an extant daemon proves the socket is live and an absent daemon proves the socket is stale. No socat dependency. 3. run_doctor orphan check — same self-match flaw as (1). claude-desktop --doctor reported [PASS] Cowork daemon: running (parent alive) on systems with a genuine orphan, actively misleading users trying to diagnose this failure. Applied the same detection primitive as (1). Complements #410 (daemon-side crash recovery): #410 reduces how often orphans are created; this ensures the launcher actually cleans them up when they are. Fixes #407 Co-Authored-By: martin152 <martin152@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: credit martin152 in Acknowledgments for #407 launcher fix Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * style: quote RHS of $$/$PPID comparisons (SC2053) Shellcheck SC2053: quote RHS in [[ ]] equality tests to prevent glob matching. No behavior change — $$ and $PPID are always numeric PIDs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: martin152 <martin152@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 19:28:22 -05:00
# No daemon — the socket file is left over from a crash.
feat: KVM/bwrap isolation backends for cowork mode (#269) * feat: add KVM/bwrap isolation backends for cowork mode Refactor cowork-vm-service.js from monolithic VMManager into a pluggable backend architecture with three isolation levels: - HostBackend: direct execution on host (existing Phase 1 behavior) - BwrapBackend: bubblewrap namespace sandbox (PID/mount isolation) - KvmBackend: full QEMU/KVM VM with vsock, virtiofs, QMP monitor Backend auto-detected at startup (KVM > bwrap > host) or overridden via COWORK_VM_BACKEND env var. Shared helpers extracted for env filtering, arg cleanup, command resolution, and work dir handling. Also: - Add Cowork Mode section to --doctor diagnostics with per-tool checks and distro-specific install hints - Update Patch 4 to extract real win32 file entries for linux bundle manifest (enables app download infrastructure) - Update handover documentation for Phase 2/3 architecture Co-Authored-By: Claude <claude@anthropic.com> * fix: correct KvmBackend vsock port, direction, and kernel cmdline The guest sdk-daemon connects TO the host (CID=2), not the other way around. Confirmed via disassembly of the guest binary: the vsock port is 51234 (0xC822), matching the Hyper-V GUID in the Windows service. - Change VSOCK_GUEST_PORT from 2222 to 51234 - Reverse socat bridge: VSOCK-LISTEN (host listens) instead of VSOCK-CONNECT (host connecting to guest) - Add bridge server to accept persistent guest connection and route events/responses via callback map - Fix kernel cmdline: root=LABEL=cloudimg-rootfs (matches fstab) Co-Authored-By: Claude <claude@anthropic.com> * fix: patch VM download to use disk-backed temp dir on Linux Linux systems often mount /tmp as a small tmpfs (3-4GB). The VM rootfs download decompresses to ~9GB, causing ENOSPC. Patch the app's mkdtemp("wvm-") call to use the bundle directory (on real disk) instead of os.tmpdir() on Linux. Uses regex-based dynamic variable extraction to remain version-agnostic across minified code changes. Co-Authored-By: Claude <claude@anthropic.com> * fix: handle stale cowork socket (ECONNREFUSED) on Linux Stale sockets from previous sessions give ECONNREFUSED instead of ENOENT, bypassing the retry loop and auto-launch entirely. Fix: - Expand ENOENT check in retry loop to include ECONNREFUSED on Linux - Add cleanup_stale_cowork_socket() to launcher scripts (all formats) that removes dead sockets before Electron starts - Increase tmpdir patch search window from 1000 to 2000 chars Co-Authored-By: Claude <claude@anthropic.com> * fix: preserve DNS resolution inside bwrap sandbox On systems using systemd-resolved, /etc/resolv.conf is a symlink to /run/systemd/resolve/stub-resolv.conf. The bwrap --tmpfs /run option wiped this out, breaking DNS resolution inside the sandbox and preventing the spawned Claude process from reaching the API. Bind-mount the resolved /run/systemd/resolve/ directory back into the sandbox as read-only to restore DNS. Co-Authored-By: Claude <claude@anthropic.com> * fix: harden cowork isolation and build patches - Remove broken _setupEventForwarding (events already flow through _handleGuestData); the second bridge connection was silently ignored - Mount $HOME read-only in bwrap sandbox; only workDir and explicit mounts are writable (prevents writes to ~/.ssh, ~/.gnupg, etc.) - Scope Patch 4 win32 extraction to actual win32:{} block via brace counting to avoid crossing into darwin/linux sections - Set _qmpAvailable flag on QMP timeout instead of silently continuing - Wrap CID allocation at 65535 to prevent unbounded growth - Use execFileSync instead of execSync('which ...') in detectBackend - Coerce response ID to String for Map lookup in _handleGuestData - Use non-greedy [\s\S]*? in Patch 6 regex for nested brace robustness - Update patch count threshold from 4 to 5 after adding Patch 8 - Add age-based fallback for stale socket cleanup when socat is missing - Use indexOf-based splice in Patch 8 instead of string.replace() - Extract shared resolveSdkBinary helper to deduplicate SDK resolution - Remove dead retryFuncRe/retryFuncMatch variables from Patch 6 Co-Authored-By: Claude <claude@anthropic.com> * fix: address security and correctness issues from code review - Replace execSync string interpolation with execFileSync for qemu-img calls to eliminate shell injection risk - Add path validation to readFile in both LocalBackend and KvmBackend to restrict reads to within the user's home directory - Fix QMP _sendQmpCommand timer leak by clearing timeout on success - Fix _pendingCallbacks.delete() to use String(msg.id) matching the String(msg.id) used in the .get() lookup - Extract FORWARDED_EVENTS constant, cleanup helper, extractBlock helper, and consolidate doctor tool checks (from simplifier pass) Co-Authored-By: Claude <claude@anthropic.com> * docs: update README cowork notice with isolation backends and doctor info Co-Authored-By: Claude <claude@anthropic.com> --------- Co-authored-by: Claude <claude@anthropic.com>
2026-02-28 21:13:09 -05:00
rm -f "$sock"
fix: launcher-common.sh self-match and stale socket cleanup (#407) (#425) * fix: launcher-common.sh self-match and stale socket cleanup (#407) Three related bugs in scripts/launcher-common.sh that combine to break Claude Desktop startup after any crash that reparents the cowork daemon on Debian/Ubuntu/Mint systems. 1. cleanup_orphaned_cowork_daemon — the old pgrep pattern 'claude-desktop' self-matches the launcher's own bash process (cmdline `bash /usr/bin/claude-desktop`), causing the function to return early on every invocation. The SIGTERM loop never runs. Replaced with `pgrep -f 'app\.asar'` plus $$/$PPID exclusion, --type= filter (skips chromium helpers), and /proc/*/status check (skips stopped/zombie launcher bashes). Added SIGKILL escalation after ~2s so cleanup_stale_cowork_socket reliably sees no daemon. 2. cleanup_stale_cowork_socket — the old implementation required socat (not preinstalled on Debian/Ubuntu/Mint) and fell through to a find -mmin +1440 check that ignored any socket younger than 24h. Rewritten to use the ordering invariant: cleanup_orphaned_cowork_daemon runs first and kills any orphan, so at this point an extant daemon proves the socket is live and an absent daemon proves the socket is stale. No socat dependency. 3. run_doctor orphan check — same self-match flaw as (1). claude-desktop --doctor reported [PASS] Cowork daemon: running (parent alive) on systems with a genuine orphan, actively misleading users trying to diagnose this failure. Applied the same detection primitive as (1). Complements #410 (daemon-side crash recovery): #410 reduces how often orphans are created; this ensures the launcher actually cleans them up when they are. Fixes #407 Co-Authored-By: martin152 <martin152@users.noreply.github.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * docs: credit martin152 in Acknowledgments for #407 launcher fix Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * style: quote RHS of $$/$PPID comparisons (SC2053) Shellcheck SC2053: quote RHS in [[ ]] equality tests to prevent glob matching. No behavior change — $$ and $PPID are always numeric PIDs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: martin152 <martin152@users.noreply.github.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-18 19:28:22 -05:00
log_message "Removed stale cowork-vm-service socket (no daemon running)"
feat: KVM/bwrap isolation backends for cowork mode (#269) * feat: add KVM/bwrap isolation backends for cowork mode Refactor cowork-vm-service.js from monolithic VMManager into a pluggable backend architecture with three isolation levels: - HostBackend: direct execution on host (existing Phase 1 behavior) - BwrapBackend: bubblewrap namespace sandbox (PID/mount isolation) - KvmBackend: full QEMU/KVM VM with vsock, virtiofs, QMP monitor Backend auto-detected at startup (KVM > bwrap > host) or overridden via COWORK_VM_BACKEND env var. Shared helpers extracted for env filtering, arg cleanup, command resolution, and work dir handling. Also: - Add Cowork Mode section to --doctor diagnostics with per-tool checks and distro-specific install hints - Update Patch 4 to extract real win32 file entries for linux bundle manifest (enables app download infrastructure) - Update handover documentation for Phase 2/3 architecture Co-Authored-By: Claude <claude@anthropic.com> * fix: correct KvmBackend vsock port, direction, and kernel cmdline The guest sdk-daemon connects TO the host (CID=2), not the other way around. Confirmed via disassembly of the guest binary: the vsock port is 51234 (0xC822), matching the Hyper-V GUID in the Windows service. - Change VSOCK_GUEST_PORT from 2222 to 51234 - Reverse socat bridge: VSOCK-LISTEN (host listens) instead of VSOCK-CONNECT (host connecting to guest) - Add bridge server to accept persistent guest connection and route events/responses via callback map - Fix kernel cmdline: root=LABEL=cloudimg-rootfs (matches fstab) Co-Authored-By: Claude <claude@anthropic.com> * fix: patch VM download to use disk-backed temp dir on Linux Linux systems often mount /tmp as a small tmpfs (3-4GB). The VM rootfs download decompresses to ~9GB, causing ENOSPC. Patch the app's mkdtemp("wvm-") call to use the bundle directory (on real disk) instead of os.tmpdir() on Linux. Uses regex-based dynamic variable extraction to remain version-agnostic across minified code changes. Co-Authored-By: Claude <claude@anthropic.com> * fix: handle stale cowork socket (ECONNREFUSED) on Linux Stale sockets from previous sessions give ECONNREFUSED instead of ENOENT, bypassing the retry loop and auto-launch entirely. Fix: - Expand ENOENT check in retry loop to include ECONNREFUSED on Linux - Add cleanup_stale_cowork_socket() to launcher scripts (all formats) that removes dead sockets before Electron starts - Increase tmpdir patch search window from 1000 to 2000 chars Co-Authored-By: Claude <claude@anthropic.com> * fix: preserve DNS resolution inside bwrap sandbox On systems using systemd-resolved, /etc/resolv.conf is a symlink to /run/systemd/resolve/stub-resolv.conf. The bwrap --tmpfs /run option wiped this out, breaking DNS resolution inside the sandbox and preventing the spawned Claude process from reaching the API. Bind-mount the resolved /run/systemd/resolve/ directory back into the sandbox as read-only to restore DNS. Co-Authored-By: Claude <claude@anthropic.com> * fix: harden cowork isolation and build patches - Remove broken _setupEventForwarding (events already flow through _handleGuestData); the second bridge connection was silently ignored - Mount $HOME read-only in bwrap sandbox; only workDir and explicit mounts are writable (prevents writes to ~/.ssh, ~/.gnupg, etc.) - Scope Patch 4 win32 extraction to actual win32:{} block via brace counting to avoid crossing into darwin/linux sections - Set _qmpAvailable flag on QMP timeout instead of silently continuing - Wrap CID allocation at 65535 to prevent unbounded growth - Use execFileSync instead of execSync('which ...') in detectBackend - Coerce response ID to String for Map lookup in _handleGuestData - Use non-greedy [\s\S]*? in Patch 6 regex for nested brace robustness - Update patch count threshold from 4 to 5 after adding Patch 8 - Add age-based fallback for stale socket cleanup when socat is missing - Use indexOf-based splice in Patch 8 instead of string.replace() - Extract shared resolveSdkBinary helper to deduplicate SDK resolution - Remove dead retryFuncRe/retryFuncMatch variables from Patch 6 Co-Authored-By: Claude <claude@anthropic.com> * fix: address security and correctness issues from code review - Replace execSync string interpolation with execFileSync for qemu-img calls to eliminate shell injection risk - Add path validation to readFile in both LocalBackend and KvmBackend to restrict reads to within the user's home directory - Fix QMP _sendQmpCommand timer leak by clearing timeout on success - Fix _pendingCallbacks.delete() to use String(msg.id) matching the String(msg.id) used in the .get() lookup - Extract FORWARDED_EVENTS constant, cleanup helper, extractBlock helper, and consolidate doctor tool checks (from simplifier pass) Co-Authored-By: Claude <claude@anthropic.com> * docs: update README cowork notice with isolation backends and doctor info Co-Authored-By: Claude <claude@anthropic.com> --------- Co-authored-by: Claude <claude@anthropic.com>
2026-02-28 21:13:09 -05:00
}
# Set common environment variables
setup_electron_env() {
# ELECTRON_FORCE_IS_PACKAGED makes app.isPackaged return true, which
# causes the Claude app to resolve resources via process.resourcesPath.
# The Nix derivation creates a custom Electron tree with the binary
# copied and app resources co-located in resources/, so resourcesPath
# naturally points to the right place on all package types.
export ELECTRON_FORCE_IS_PACKAGED=true
feat(linux): hybrid titlebar mode for clickable in-app topbar (#538) * feat(linux): hybrid titlebar mode for clickable in-app topbar Default `CLAUDE_TITLEBAR_STYLE` is now `hybrid`: native OS frame plus a BrowserView preload shim that convinces claude.ai's bundle to render its in-app topbar (hamburger / sidebar / search / nav / Cowork ghost). Stacked layout instead of Windows's combined bar, but every button is clickable. Why not the upstream `frame:false` + WCO config: investigation (see docs/learnings/linux-topbar-shim.md) ruled out `titleBarOverlay`, `titleBarStyle:'hidden'`, and the `.draggable` CSS class as the source of the topbar click-eating drag region. The remaining cause is a Chromium-level implicit drag region for `frame:false` windows that exists on both X11 and Wayland and has no Electron-API knob. With `frame:true` the OS handles dragging and Chromium pushes no drag-region map, so the buttons receive mouse events normally. Modes: - `hybrid` (default) — system frame + shim, topbar visible and clickable - `native` — system frame, no shim, no in-app topbar - `hidden` — frameless + WCO config, matches Windows/macOS upstream; topbar visible but not clickable on Linux. Kept for Wayland comparison and future investigation Tests: tests/launcher-common.bats grew 16 cases covering `_resolve_titlebar_style`, `build_electron_args` flag selection per mode, and `setup_electron_env` env-var wiring per mode. `claude-desktop --doctor` now reports the resolved mode and warns when `hidden` is set. Co-Authored-By: Claude <claude@anthropic.com> * docs(learnings): add hybrid-mode screenshot Visual reference of the stacked layout: DE-drawn titlebar on top with native window controls, claude.ai's in-app topbar (hamburger / search / back-forward) immediately below it. Co-Authored-By: Claude <claude@anthropic.com> * docs(learnings): fix codespell hit (Pre-emptive → Preemptive) Codespell flags hyphenated "Pre-emptive" as a misspelling of "Preemptive". Drops the hyphen to clear the spellcheck CI gate on PR #538. Co-Authored-By: Claude <claude@anthropic.com> --------- Co-authored-by: Claude <claude@anthropic.com>
2026-05-01 02:47:16 -04:00
# ELECTRON_USE_SYSTEM_TITLE_BAR=1 forces a system titlebar at the
# Electron level. Set in 'native' and 'hybrid' modes (both use
# frame:true); skipped in 'hidden' mode (frame:false + WCO config).
if [[ $(_resolve_titlebar_style) != 'hidden' ]]; then
export ELECTRON_USE_SYSTEM_TITLE_BAR=1
fi
# CLAUDE_GTK_IM_MODULE: opt-in override for users hit by broken
# IBus integration on Linux (#549). Propagated to GTK_IM_MODULE
# so e.g. `xim` can be persisted without wrapping every launch.
if [[ -n ${CLAUDE_GTK_IM_MODULE:-} ]]; then
local prev="${GTK_IM_MODULE:-<unset>}"
export GTK_IM_MODULE="$CLAUDE_GTK_IM_MODULE"
log_message \
"GTK_IM_MODULE override: $prev -> $GTK_IM_MODULE (via CLAUDE_GTK_IM_MODULE)"
fi
}
#===============================================================================
# Doctor Diagnostics
#
# run_doctor and its helpers live in doctor.sh alongside this file. Sourced
# here so any consumer of launcher-common.sh gets the full run_doctor entry
# point without needing to know about the split. Each packaging target
# (deb/rpm/AppImage/Nix) installs doctor.sh next to launcher-common.sh.
#===============================================================================
# shellcheck source=scripts/doctor.sh
source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/doctor.sh"