From 5ead361c04ecee67abc4b123742b3c362cde6478 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Fri, 17 Apr 2026 15:36:07 +0000 Subject: [PATCH] talk: improve startup ordering, NATS tuning, signaling timeouts, and healthcheck depth Agent-Logs-Url: https://github.com/nextcloud/all-in-one/sessions/7fd39619-0f63-494c-902c-746f00d17d02 Co-Authored-By: szaimen <42591237+szaimen@users.noreply.github.com> Signed-off-by: Simon L. --- Containers/talk/Dockerfile | 4 +++- Containers/talk/healthcheck.sh | 3 +++ Containers/talk/start.sh | 8 ++++++-- Containers/talk/supervisord.conf | 22 +++++++++++++++------- 4 files changed, 27 insertions(+), 10 deletions(-) diff --git a/Containers/talk/Dockerfile b/Containers/talk/Dockerfile index 0d305af0..b8c1d167 100644 --- a/Containers/talk/Dockerfile +++ b/Containers/talk/Dockerfile @@ -82,7 +82,9 @@ RUN set -ex; \ touch \ /etc/nats.conf \ /etc/eturnal.yml; \ - echo "listen: 127.0.0.1:4222" | tee /etc/nats.conf; \ +# write_deadline: "10s" — without a write deadline, a lagging subscriber can stall the broker indefinitely, blocking all other signaling messages. +# max_payload: 8MB — the default is 1 MB; signaling payloads in large meetings (many participants, ICE candidates) can exceed this, causing dropped messages. + printf 'listen: 127.0.0.1:4222\nwrite_deadline: "10s"\nmax_payload: 8MB\n' | tee /etc/nats.conf; \ mkdir -p \ /var/tmp \ /conf \ diff --git a/Containers/talk/healthcheck.sh b/Containers/talk/healthcheck.sh index e4544763..7501d13b 100644 --- a/Containers/talk/healthcheck.sh +++ b/Containers/talk/healthcheck.sh @@ -5,3 +5,6 @@ nc -z 127.0.0.1 8188 || exit 1 nc -z 127.0.0.1 4222 || exit 1 nc -z 127.0.0.1 "$TALK_PORT" || exit 1 eturnalctl status || exit 1 +# Verify that the signaling server is actually serving requests, not just +# listening on the TCP port (which nc -z above only tests for open port). +wget -q -O /dev/null http://127.0.0.1:8081/api/v1/stats || exit 1 diff --git a/Containers/talk/start.sh b/Containers/talk/start.sh index 9a04333c..07db0f87 100644 --- a/Containers/talk/start.sh +++ b/Containers/talk/start.sh @@ -91,10 +91,12 @@ if [ -z "$TALK_MAX_SCREEN_BITRATE" ]; then TALK_MAX_SCREEN_BITRATE=2097152 fi -# Signling +# Signaling cat << SIGNALING_CONF > "/conf/signaling.conf" [http] listen = 0.0.0.0:8081 +readtimeout = 15 +writetimeout = 30 [app] debug = false @@ -110,7 +112,9 @@ internalsecret = ${INTERNAL_SECRET} backends = backend-1 allowall = false timeout = 10 -connectionsperhost = 8 +# connectionsperhost: This is the HTTP keep-alive connection pool size from the signaling server to the Nextcloud backend. +# Under load (many concurrent calls joining/leaving simultaneously) a pool of 8 creates a queue bottleneck for backend authentication and session lookups, thus increasing to 32. +connectionsperhost = 32 skipverify = ${SKIP_CERT_VERIFY} [backend-1] diff --git a/Containers/talk/supervisord.conf b/Containers/talk/supervisord.conf index f83cdfd8..7751a0bf 100644 --- a/Containers/talk/supervisord.conf +++ b/Containers/talk/supervisord.conf @@ -7,19 +7,23 @@ logfile_maxbytes=50MB logfile_backups=10 loglevel=error -[program:eturnal] -stdout_logfile=/dev/stdout -stdout_logfile_maxbytes=0 -stderr_logfile=/dev/stderr -stderr_logfile_maxbytes=0 -command=eturnalctl foreground - [program:nats-server] stdout_logfile=/dev/stdout stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 command=nats-server -c /etc/nats.conf +# Start first: signaling depends on NATS being available +priority=10 + +[program:eturnal] +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 +command=eturnalctl foreground +# Start alongside Janus; independent of signaling +priority=20 [program:janus] stdout_logfile=/dev/stdout @@ -28,6 +32,8 @@ stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 # debug-level 3 means warning command=janus --config=/conf/janus.jcfg --disable-colors --log-stdout --full-trickle --debug-level 3 +# Start alongside eturnal; signaling connects to Janus via WebSocket +priority=20 [program:signaling] stdout_logfile=/dev/stdout @@ -35,3 +41,5 @@ stdout_logfile_maxbytes=0 stderr_logfile=/dev/stderr stderr_logfile_maxbytes=0 command=nextcloud-spreed-signaling -config /conf/signaling.conf +# Start last: depends on NATS (priority=10) and Janus (priority=20) being up +priority=30