1616# # specific language governing permissions and limitations
1717# # under the License.
1818
19- set -e
19+ set -eo pipefail
2020
2121# CloudStack B&R NAS Backup and Recovery Tool for KVM
2222
23- # TODO: do libvirt/logging etc checks
24-
2523# ## Declare variables ###
2624
2725OP=" "
@@ -31,8 +29,17 @@ NAS_ADDRESS=""
3129MOUNT_OPTS=" "
3230BACKUP_DIR=" "
3331DISK_PATHS=" "
32+ QUIESCE=" "
3433logFile=" /var/log/cloudstack/agent/agent.log"
3534
35+ # Exit codes
36+ EXIT_CLEANUP_FAILED=20
37+
38+ # Backup job timeout in seconds (default: 6 hours)
39+ BACKUP_TIMEOUT=${BACKUP_TIMEOUT:- 21600}
40+ # Minimum free space required on backup target in bytes (default: 1 GB)
41+ MIN_FREE_SPACE=${MIN_FREE_SPACE:- 1073741824}
42+
3643log () {
3744 [[ " $verb " -eq 1 ]] && builtin echo " $@ "
3845 if [[ " $1 " == " -ne" || " $1 " == " -e" || " $1 " == " -n" ]]; then
@@ -42,6 +49,56 @@ log() {
4249 fi
4350}
4451
52+ cleanup () {
53+ local status=0
54+
55+ # Resume the VM if it was paused during backup to prevent it from
56+ # remaining indefinitely paused when the backup job fails (e.g. due
57+ # to storage full or I/O errors on the backup target)
58+ if [[ -n " $VM " ]]; then
59+ local vm_state
60+ vm_state=$( virsh -c qemu:///system domstate " $VM " 2> /dev/null || true)
61+ if [[ " $vm_state " == " paused" ]]; then
62+ log -ne " Resuming paused VM $VM during backup cleanup"
63+ if ! virsh -c qemu:///system resume " $VM " > /dev/null 2>&1 ; then
64+ echo " Failed to resume VM $VM "
65+ status=1
66+ fi
67+ fi
68+ fi
69+
70+ if [[ -n " $dest " && -d " $dest " ]]; then
71+ rm -rf " $dest " || { echo " Failed to delete $dest " ; status=1; }
72+ fi
73+ if [[ -n " $mount_point " && -d " $mount_point " ]]; then
74+ umount " $mount_point " 2> /dev/null || { echo " Failed to unmount $mount_point " ; status=1; }
75+ rmdir " $mount_point " 2> /dev/null || true
76+ fi
77+
78+ if [[ $status -ne 0 ]]; then
79+ echo " Backup cleanup failed"
80+ exit $EXIT_CLEANUP_FAILED
81+ fi
82+ }
83+
84+ # Trap ensures cleanup always runs on exit (error, signal, or normal exit)
85+ # This prevents orphan NFS mounts from accumulating after failed backups
86+ trap cleanup EXIT
87+
88+ check_free_space () {
89+ local free_bytes
90+ free_bytes=$( df -P " $mount_point " 2> /dev/null | awk ' NR==2 {print $4}' )
91+ if [[ -n " $free_bytes " ]]; then
92+ # df reports in 1K blocks, convert to bytes
93+ free_bytes=$(( free_bytes * 1024 ))
94+ if [[ $free_bytes -lt $MIN_FREE_SPACE ]]; then
95+ echo " Insufficient free space on backup target: $(( free_bytes / 1048576 )) MB available, $(( MIN_FREE_SPACE / 1048576 )) MB required"
96+ exit 1
97+ fi
98+ log -ne " Backup target has $(( free_bytes / 1073741824 )) GB free space"
99+ fi
100+ }
101+
45102vercomp () {
46103 local IFS=.
47104 local i ver1=($1 ) ver2=($3 )
@@ -88,6 +145,7 @@ sanity_checks() {
88145
89146backup_running_vm () {
90147 mount_operation
148+ check_free_space
91149 mkdir -p $dest
92150
93151 name=" root"
@@ -99,39 +157,69 @@ backup_running_vm() {
99157 done
100158 echo " </disks></domainbackup>" >> $dest /backup.xml
101159
160+ # Quiesce guest filesystem before backup if requested and agent is available
161+ if [[ " $QUIESCE " == " true" ]]; then
162+ if virsh -c qemu:///system qemu-agent-command $VM ' {"execute":"guest-ping"}' > /dev/null 2>&1 ; then
163+ log -ne " Quiescing guest filesystem on $VM "
164+ virsh -c qemu:///system domfsfreeze $VM > /dev/null 2>&1 || log -ne " Warning: fsfreeze failed on $VM , proceeding without quiesce"
165+ else
166+ log -ne " Warning: qemu-guest-agent not available on $VM , skipping quiesce"
167+ fi
168+ fi
169+
102170 # Start push backup
103- virsh -c qemu:///system backup-begin --domain $VM --backupxml $dest /backup.xml > /dev/null 2> /dev/null
171+ if ! virsh -c qemu:///system backup-begin --domain $VM --backupxml $dest /backup.xml > /dev/null 2>&1 ; then
172+ echo " Failed to start backup for VM $VM "
173+ # Thaw filesystem if we froze it
174+ [[ " $QUIESCE " == " true" ]] && virsh -c qemu:///system domfsthaw $VM > /dev/null 2>&1 || true
175+ exit 1
176+ fi
177+
178+ # Thaw filesystem immediately after backup-begin (QEMU has its own consistent snapshot)
179+ if [[ " $QUIESCE " == " true" ]]; then
180+ virsh -c qemu:///system domfsthaw $VM > /dev/null 2>&1 || true
181+ log -ne " Thawed guest filesystem on $VM "
182+ fi
104183
105184 # Backup domain information
106185 virsh -c qemu:///system dumpxml $VM > $dest /domain-config.xml 2> /dev/null
107186 virsh -c qemu:///system dominfo $VM > $dest /dominfo.xml 2> /dev/null
108187 virsh -c qemu:///system domiflist $VM > $dest /domiflist.xml 2> /dev/null
109188 virsh -c qemu:///system domblklist $VM > $dest /domblklist.xml 2> /dev/null
110189
190+ # Wait for backup to complete with timeout
191+ local elapsed=0
111192 until virsh -c qemu:///system domjobinfo $VM --completed --keep-completed 2> /dev/null | grep " Completed" > /dev/null; do
193+ if [[ $elapsed -ge $BACKUP_TIMEOUT ]]; then
194+ echo " Backup timed out after ${BACKUP_TIMEOUT} s for VM $VM "
195+ virsh -c qemu:///system domjobabort $VM > /dev/null 2>&1 || true
196+ exit 1
197+ fi
112198 sleep 5
199+ elapsed=$(( elapsed + 5 ))
113200 done
114201 rm -f $dest /backup.xml
115202 sync
116203
117204 # Print statistics
118205 virsh -c qemu:///system domjobinfo $VM --completed
119206 du -sb $dest | cut -f1
120-
121- umount $mount_point
122- rmdir $mount_point
123207}
124208
125209backup_stopped_vm () {
126210 mount_operation
211+ check_free_space
127212 mkdir -p $dest
128213
129214 IFS=" ,"
130215
131216 name=" root"
132217 for disk in $DISK_PATHS ; do
133218 volUuid=" ${disk##*/ } "
134- qemu-img convert -O qcow2 $disk $dest /$name .$volUuid .qcow2 | tee -a " $logFile "
219+ if ! qemu-img convert -O qcow2 " $disk " " $dest /$name .$volUuid .qcow2" 2>&1 | tee -a " $logFile " ; then
220+ echo " Failed to convert disk $disk "
221+ exit 1
222+ fi
135223 name=" datadisk"
136224 done
137225 sync
@@ -142,20 +230,18 @@ backup_stopped_vm() {
142230delete_backup () {
143231 mount_operation
144232
145- rm -frv $dest
233+ rm -frv " $dest "
146234 sync
147- umount $mount_point
148- rmdir $mount_point
235+ # cleanup trap handles umount and rmdir
149236}
150237
151238mount_operation () {
152239 mount_point=$( mktemp -d -t csbackup.XXXXX)
153240 dest=" $mount_point /${BACKUP_DIR} "
154- if [ ${NAS_TYPE} == " cifs" ]; then
241+ if [[ " ${NAS_TYPE} " == " cifs" ] ]; then
155242 MOUNT_OPTS=" ${MOUNT_OPTS} ,nobrl"
156243 fi
157- mount -t ${NAS_TYPE} ${NAS_ADDRESS} ${mount_point} $( [[ ! -z " ${MOUNT_OPTS} " ]] && echo -o ${MOUNT_OPTS} ) | tee -a " $logFile "
158- if [ $? -eq 0 ]; then
244+ if mount -t " ${NAS_TYPE} " " ${NAS_ADDRESS} " " ${mount_point} " $( [[ -n " ${MOUNT_OPTS} " ]] && echo " -o" " ${MOUNT_OPTS} " ) 2>&1 | tee -a " $logFile " ; then
159245 log -ne " Successfully mounted ${NAS_TYPE} store"
160246 else
161247 echo " Failed to mount ${NAS_TYPE} store"
@@ -165,7 +251,17 @@ mount_operation() {
165251
166252function usage {
167253 echo " "
168- echo " Usage: $0 -o <operation> -v|--vm <domain name> -t <storage type> -s <storage address> -m <mount options> -p <backup path> -d <disks path>"
254+ echo " Usage: $0 -o <operation> -v|--vm <domain name> -t <storage type> -s <storage address> -m <mount options> -p <backup path> -d <disks path> [-q]"
255+ echo " "
256+ echo " Options:"
257+ echo " -o, --operation Operation to perform: backup, delete"
258+ echo " -v, --vm VM domain name"
259+ echo " -t, --type NAS type: nfs, cifs"
260+ echo " -s, --storage NAS address (e.g. 192.168.1.1:/share)"
261+ echo " -m, --mount Mount options"
262+ echo " -p, --path Backup directory path on NAS"
263+ echo " -d, --diskpaths Comma-separated disk paths (for stopped VM backup)"
264+ echo " -q, --quiesce Quiesce guest filesystem before backup (requires qemu-guest-agent)"
169265 echo " "
170266 exit 1
171267}
@@ -207,6 +303,10 @@ while [[ $# -gt 0 ]]; do
207303 shift
208304 shift
209305 ;;
306+ -q|--quiesce)
307+ QUIESCE=" true"
308+ shift
309+ ;;
210310 -h|--help)
211311 usage
212312 shift
0 commit comments