Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
5d87ebf
Squashed commit: uberjob work, protobuf removal, xrootd removal
iagaponenko Apr 5, 2024
a0b03f7
CzarFamilyMap create now waits for a successful read.
jgates108 Jul 22, 2024
bfd4ce5
Added worker executable.
jgates108 May 14, 2025
35fd6cc
Added memory/disk hybrid for transfering csv files.
jgates108 Jul 14, 2025
48ce147
Added worker exe, memory/disk hybrid for transfering csv files, fixed…
jgates108 Dec 18, 2024
b6722ab
UberJobReadyMsg transmit retries are now sent via WorkerCzarComIssue …
jgates108 Nov 19, 2025
0434b21
Rebases fixes.
jgates108 Nov 13, 2025
90c82f8
Merge pull request #964 from lsst/tickets/DM-51870
jgates108 Oct 13, 2025
aab0953
Review changes
jgates108 Dec 11, 2025
4ba9123
Merge pull request #976 from lsst/tickets/DM-52880
jgates108 Nov 17, 2025
c6a299b
Merge pull request #978 from lsst/tickets/DM-53242
jgates108 Dec 15, 2025
eb8a3cd
cmake cleanups
fritzm Oct 18, 2025
de7b1de
Whitespace cleanups
fritzm Oct 18, 2025
e809440
Don't build DEBUG by default
fritzm Oct 18, 2025
cdf94d0
Fix USING_VMUTEX compiler warn
fritzm Oct 16, 2025
33266af
Worker app cleanups
fritzm Oct 19, 2025
7179f2e
Foreman cleanups
fritzm Oct 18, 2025
41a3f64
Support explicitly named workers
fritzm Oct 25, 2025
0376870
Use password for qsreplica user
fritzm Oct 27, 2025
a7f3020
Added detection of missing tables and recovery, and improved errors.
jgates108 Dec 19, 2025
9b6c225
Support empty chunk map
fritzm Oct 26, 2025
b045ea7
Rename .cnf to .cfg consistently
fritzm Dec 10, 2025
77de976
Improved error information.
jgates108 Jan 29, 2026
629be49
Remove local db check at czar launch
fritzm Oct 29, 2025
1c8ea8e
Merge branch 'tickets/DM-53921' into tickets/DM-43715
fritzm Jan 24, 2026
9829e9d
Review changes
jgates108 Feb 10, 2026
d789ac0
Merge branch 'tickets/DM-54066' into tickets/DM-43715
fritzm Feb 6, 2026
c93338f
Fixed czar lockup due to shutdown worker.
jgates108 Feb 19, 2026
7c19d3e
Merge pull request #986 from lsst/tickets/DM-53238
jgates108 Feb 10, 2026
7723a40
Use tini in ingest helper container
fritzm Mar 12, 2026
b26c8d0
Initial helm chart
fritzm Oct 25, 2025
91328bf
Merge pull request #996 from lsst/tickets/DM-54111
jgates108 Feb 19, 2026
6b899d8
Passes integration tests in both modes.
jgates108 Mar 24, 2026
5ff2d8b
Add czar external service to Helm chart
fritzm Mar 12, 2026
9563cbc
Merge branch 'tickets/DM-52516' into tickets/DM-43715
fritzm Feb 22, 2026
a28c6aa
Improved messaging, added unit tests.
jgates108 Mar 30, 2026
70cfd98
Merge branch 'tickets/DM-54393' into tickets/DM-43715
fritzm Mar 20, 2026
caaf3f4
Merge pull request #1015 from lsst/tickets/DM-54206
jgates108 Apr 8, 2026
99f61f9
Fixed czar-http to return error messages from workers.
jgates108 Apr 16, 2026
bb553fb
Merge pull request #1020 from lsst/tickets/DM-54109
jgates108 Apr 17, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
24 changes: 4 additions & 20 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -334,37 +334,21 @@ jobs:
if: always()
run: docker logs ${USER}-czar-http-1

- name: Czar CMSD Log
if: always()
run: docker logs ${USER}-czar-cmsd-1

- name: Czar XROOTD Log
if: always()
run: docker logs ${USER}-czar-xrootd-1

- name: Czar MariaDB Log
if: always()
run: docker logs ${USER}-czar-mariadb-1

- name: Qzerv Worker 0 CMSD Log
- name: Qzerv Worker 0 worker-svc Log
if: always()
run: docker logs ${USER}-worker-cmsd-0-1

- name: Qzerv Worker 0 XROOTD Log
if: always()
run: docker logs ${USER}-worker-xrootd-0-1
run: docker logs ${USER}-worker-svc-0-1

- name: Qzerv Worker 0 MariaDB Log
if: always()
run: docker logs ${USER}-worker-mariadb-0-1

- name: Qzerv Worker 1 CMSD Log
if: always()
run: docker logs ${USER}-worker-cmsd-1-1

- name: Qzerv Worker 1 XROOTD Log
- name: Qzerv Worker 1 worker-svc Log
if: always()
run: docker logs ${USER}-worker-xrootd-1-1
run: docker logs ${USER}-worker-svc-1-1

- name: Qzerv Worker 1 MariaDB Log
if: always()
Expand Down
173 changes: 35 additions & 138 deletions deploy/compose/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,11 @@ x-log-volume:
- type: bind
source: ./log/
target: /config-etc/log/
x-worker-cmsd:
&worker-cmsd
image: "${QSERV_IMAGE:?err}"
init: true
# ports are published in worker-xrootd because this container uses that container's network stack.
x-worker-xrootd:
&worker-xrootd
x-worker-svc:
&worker-svc
image: "${QSERV_IMAGE:?err}"
init: true
expose:
- "1094"
- "2131"
- "3306" # for the worker db, which shares this container's network stack.
x-repl-worker:
&repl-worker
Expand All @@ -43,6 +36,7 @@ volumes:
volume_czar_xrootd:
volume_czar_home:
volume_czar_cfg:
volume_czar_transfer:

volume_czar_mariadb_data:
volume_czar_mariadb_cfg:
Expand All @@ -53,14 +47,12 @@ volumes:

volume_worker_0_data:
volume_worker_0_results:
volume_worker_0_xrootd:
volume_worker_0_home:
volume_worker_0_mariadb_lib:
volume_worker_0_mariadb_run:

volume_worker_1_data:
volume_worker_1_results:
volume_worker_1_xrootd:
volume_worker_1_home:
volume_worker_1_mariadb_lib:
volume_worker_1_mariadb_run:
Expand Down Expand Up @@ -97,29 +89,25 @@ services:
- type: volume
source: volume_worker_0_mariadb_run
target: /var/run/mysqld # This is where the mariadb container puts the socket file
network_mode: "service:worker-xrootd-0"
worker-xrootd-0:
<< : *worker-xrootd
network_mode: "service:worker-svc-0"

worker-svc-0:
<< : *worker-svc
command: >
entrypoint worker-xrootd
entrypoint worker-svc
--db-uri mysql://qsmaster:CHANGEME@127.0.0.1:3306
--db-admin-uri mysql://root:CHANGEME@127.0.0.1:3306
--vnid-config "@/usr/local/lib64/libreplica.so {{db_uri}}/qservw_worker 0 0"
--repl-instance-id qserv_proj
--repl-auth-key replauthkey
--repl-admin-auth-key=repladminauthkey
--repl-registry-host repl-registry
--repl-registry-port 25082
--results-dirname /qserv/data/results
--cmsd-manager-name czar-xrootd
--log-cfg-file=/config-etc/log/log-worker-xrootd.cnf
--log-cfg-file=/config-etc/log/log-worker-svc.cfg
volumes:
- type: volume
source: volume_worker_0_results
target: /qserv/data/results
- type: volume
source: volume_worker_0_xrootd
target: /var/run/xrootd
- type: volume
source: volume_worker_0_home
target: /home/qserv
Expand All @@ -130,43 +118,14 @@ services:
networks:
default:
aliases:
- worker-cmsd-0
- worker-mariadb-0
worker-cmsd-0:
<< : *worker-cmsd
command: >
entrypoint worker-cmsd
--db-uri mysql://qsmaster:CHANGEME@worker-mariadb-0:3306
--vnid-config "@/usr/local/lib64/libreplica.so mysql://qsmaster:CHANGEME@127.0.0.1:3306/qservw_worker 0 0"
--results-dirname /qserv/data/results
--repl-instance-id qserv_proj
--repl-auth-key replauthkey
--repl-admin-auth-key=repladminauthkey
--repl-registry-host repl-registry
--repl-registry-port 25082
--cmsd-manager-name czar-xrootd
network_mode: "service:worker-xrootd-0"
volumes:
- type: volume
source: volume_worker_0_results
target: /qserv/data/results
- type: volume
source: volume_worker_0_xrootd
target: /var/run/xrootd
- type: volume
source: volume_worker_0_home
target: /home/qserv
- type: volume
source: volume_worker_0_mariadb_run
target: /qserv/mariadb/run # This matches the ?socket=... location in --db-uri and --db-admin-uri
- << : *log-volume
repl-worker-0:
<< : *repl-worker
command: >
entrypoint worker-repl
--db-admin-uri mysql://root:CHANGEME@worker-mariadb-0:3306/qservw_worker
--repl-connection mysql://qsreplica@repl-mariadb:3306/qservReplica
--log-cfg-file=/config-etc/log/log-repl-worker.cnf
--repl-connection mysql://qsreplica:CHANGEME@repl-mariadb:3306/qservReplica
--log-cfg-file=/config-etc/log/log-repl-worker.cfg
--
--instance-id=qserv_proj
--auth-key=replauthkey
Expand All @@ -184,6 +143,7 @@ services:
source: volume_worker_0_home
target: /home/qserv
- << : *log-volume

# worker 1 uses and validates socket file (where possible) to connect to the worker-mariadb
worker-mariadb-1:
<< : *worker-mariadb
Expand All @@ -201,30 +161,26 @@ services:
- type: volume
source: volume_worker_1_mariadb_run
target: /var/run/mysqld # This is where the mariadb container puts the socket file
network_mode: "service:worker-xrootd-1"
worker-xrootd-1:
<< : *worker-xrootd
network_mode: "service:worker-svc-1"

worker-svc-1:
<< : *worker-svc
command: >
entrypoint --log-level DEBUG worker-xrootd
entrypoint --log-level DEBUG worker-svc
--db-uri mysql://qsmaster:CHANGEME@127.0.0.1:3306?socket={{db_socket}}
--db-admin-uri mysql://root:CHANGEME@127.0.0.1:3306?socket={{db_socket}}
--vnid-config "@/usr/local/lib64/libreplica.so mysql://qsmaster:CHANGEME@127.0.0.1:3306/qservw_worker 0 0"
--repl-instance-id qserv_proj
--repl-auth-key replauthkey
--repl-admin-auth-key=repladminauthkey
--repl-registry-host repl-registry
--repl-registry-port 25082
--results-dirname /qserv/data/results
--cmsd-manager-name czar-xrootd
--targs db_socket=/qserv/mariadb/run/mysqld.sock
--log-cfg-file=/config-etc/log/log-worker-xrootd.cnf
--log-cfg-file=/config-etc/log/log-worker-svc.cfg
volumes:
- type: volume
source: volume_worker_1_results
target: /qserv/data/results
- type: volume
source: volume_worker_1_xrootd
target: /var/run/xrootd
- type: volume
source: volume_worker_1_home
target: /home/qserv
Expand All @@ -235,44 +191,15 @@ services:
networks:
default:
aliases:
- worker-cmsd-1
- worker-mariadb-1
worker-cmsd-1:
<< : *worker-cmsd
command: >
entrypoint --log-level DEBUG worker-cmsd
--db-uri mysql://qsmaster:CHANGEME@worker-mariadb-1:3306?socket=/qserv/mariadb/run/mysqld.sock
--vnid-config "@/usr/local/lib64/libreplica.so mysql://qsmaster:CHANGEME@127.0.0.1:3306/qservw_worker 0 0"
--results-dirname /qserv/data/results
--repl-instance-id qserv_proj
--repl-auth-key replauthkey
--repl-admin-auth-key=repladminauthkey
--repl-registry-host repl-registry
--repl-registry-port 25082
--cmsd-manager-name czar-xrootd
network_mode: "service:worker-xrootd-1"
volumes:
- type: volume
source: volume_worker_1_results
target: /qserv/data/results
- type: volume
source: volume_worker_1_xrootd
target: /var/run/xrootd
- type: volume
source: volume_worker_1_home
target: /home/qserv
- type: volume
source: volume_worker_1_mariadb_run
target: /qserv/mariadb/run
- << : *log-volume
repl-worker-1:
<< : *repl-worker
# qserv-replica-worker app does not support socket file yet.
command: >
entrypoint worker-repl
--db-admin-uri mysql://root:CHANGEME@worker-mariadb-1:3306/qservw_worker
--repl-connection mysql://qsreplica@repl-mariadb:3306/qservReplica
--log-cfg-file=/config-etc/log/log-repl-worker.cnf
--repl-connection mysql://qsreplica:CHANGEME@repl-mariadb:3306/qservReplica
--log-cfg-file=/config-etc/log/log-repl-worker.cfg
--
--instance-id=qserv_proj
--auth-key=replauthkey
Expand All @@ -290,42 +217,7 @@ services:
source: volume_worker_1_home
target: /home/qserv
- << : *log-volume
czar-xrootd:
image: "${QSERV_IMAGE:?err}"
init: true
command: >
entrypoint xrootd-manager
--cmsd-manager-name czar-xrootd
hostname: czar-xrootd
expose:
- "1094"
- "2131"
volumes:
- type: volume
source: volume_czar_xrootd
target: /var/run/xrootd
- type: volume
source: volume_worker_1_home
target: /home/qserv
- << : *log-volume
networks:
default:
aliases:
- czar-cmsd
czar-cmsd:
image: "${QSERV_IMAGE:?err}"
init: true
# NOTE!! cms-delay-servers must match the number of workers being launched!
command: entrypoint cmsd-manager --cms-delay-servers 2
network_mode: "service:czar-xrootd"
volumes:
- type: volume
source: volume_czar_xrootd
target: /var/run/xrootd
- type: volume
source: volume_czar_home
target: /home/qserv
- << : *log-volume

czar-mariadb:
image: "${QSERV_MARIADB_IMAGE:?err}"
init: true
Expand All @@ -349,6 +241,7 @@ services:
- type: volume
source: volume_czar_mariadb_run
target: /var/run/mysqld

czar-proxy:
image: "${QSERV_IMAGE:?err}"
init: true
Expand All @@ -357,8 +250,7 @@ services:
--db-uri mysql://qsmaster:CHANGEME@127.0.0.1:3306?socket={{db_socket}}
--db-admin-uri mysql://root:CHANGEME@127.0.0.1:3306?socket={{db_socket}}
--targs db_socket=/qserv/mariadb/run/mysqld.sock
--xrootd-manager czar-xrootd
--log-cfg-file=/config-etc/log/log-czar-proxy.cnf
--log-cfg-file=/config-etc/log/log-czar-proxy.cfg
--repl-instance-id qserv_proj
--repl-auth-key replauthkey
--repl-admin-auth-key=repladminauthkey
Expand All @@ -377,6 +269,10 @@ services:
- type: volume
source: volume_czar_mariadb_run
target: /qserv/mariadb/run
- type: volume
source: volume_czar_transfer
target: /tmp

- << : *log-volume
expose:
- "3306" # for czar-mariadb
Expand All @@ -393,7 +289,6 @@ services:
command: >
entrypoint --log-level DEBUG czar-http
--db-uri mysql://qsmaster:CHANGEME@czar-mariadb:3306/
--xrootd-manager czar-xrootd
--czar-name http
--http-port 4048
--http-threads 4
Expand All @@ -404,7 +299,7 @@ services:
--http-conn-pool-size 2
--user qsmaster
--password CHANGEME
--log-cfg-file=/config-etc/log/log-czar-proxy.cnf
--log-cfg-file=/config-etc/log/log-czar-proxy.cfg
--repl-instance-id qserv_proj
--repl-auth-key replauthkey
--repl-admin-auth-key=repladminauthkey
Expand All @@ -414,6 +309,9 @@ services:
- type: volume
source: volume_czar_cfg
target: /config-etc
- type: volume
source: volume_czar_transfer
target: /tmp
- type: volume
source: volume_czar_home
target: /home/qserv
Expand Down Expand Up @@ -445,15 +343,14 @@ services:
init: true
command: >
entrypoint --log-level DEBUG replication-controller
--db-uri mysql://qsreplica@repl-mariadb:3306/qservReplica
--db-uri mysql://qsreplica:CHANGEME@repl-mariadb:3306/qservReplica
--db-admin-uri mysql://root:CHANGEME@repl-mariadb:3306/qservReplica
--qserv-czar-db=mysql://root:CHANGEME@czar-mariadb:3306/qservMeta
--log-cfg-file=/config-etc/log/log-repl-controller.cnf
--log-cfg-file=/config-etc/log/log-repl-controller.cfg
--
--instance-id=qserv_proj
--auth-key=replauthkey
--admin-auth-key=repladminauthkey
--xrootd-host=czar-xrootd
--registry-host=repl-registry
--controller-auto-register-workers=1
--qserv-sync-force
Expand All @@ -479,9 +376,9 @@ services:
init: true
command: >
entrypoint --log-level DEBUG replication-registry
--db-uri mysql://qsreplica@repl-mariadb:3306/qservReplica
--db-uri mysql://qsreplica:CHANGEME@repl-mariadb:3306/qservReplica
--db-admin-uri mysql://root:CHANGEME@repl-mariadb:3306/qservReplica
--log-cfg-file=/config-etc/log/log-repl-registry.cnf
--log-cfg-file=/config-etc/log/log-repl-registry.cfg
--
--instance-id=qserv_proj
--auth-key=replauthkey
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,3 @@ log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
log4j.appender.CONSOLE.layout.ConversionPattern=%d{yyyy-MM-ddTHH:mm:ss.SSSZ} LWP %-5X{LWP} %-5p %m%n

log4j.logger.lsst.qserv.xrdssi.msgs=WARN
Loading
Loading