-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathparallel_batch_create.sh
More file actions
executable file
·54 lines (42 loc) · 2.14 KB
/
parallel_batch_create.sh
File metadata and controls
executable file
·54 lines (42 loc) · 2.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/bin/bash
#SBATCH --job-name=batch_create
# takes around 5 minutes for each day of input data. So with CHUNK_SIZE=5 (5 days) it should take around 25 minutes
#SBATCH --time=1:00:00
#SBATCH --ntasks=1
# CPU option (works with reduced batch size, the cheapest)
#SBATCH --partition=rome
#SBATCH --cpus-per-task=16
# GPU option
# SBATCH --partition=gpu_h100
# SBATCH --gpus-per-node=1
# #SBATCH --mem=180GB # better to use mem-per-cpu: # SLURM_MEM_PER_CPU, SLURM_MEM_PER_GPU, and SLURM_MEM_PER_NODE are mutually exclusive.
# SBATCH --mem-per-cpu=11520MB
# SBATCH --cpus-per-task=16
# Himem option
# SBATCH --partition=himem_4tb
# SBATCH --mem=480G
# SBATCH --mem-per-cpu=30GB
# SBATCH --cpus-per-task=16
# Himem option
# SBATCH --partition=himem_8tb
# #SBATCH --mem=960G # better to use mem-per-cpu: # SLURM_MEM_PER_CPU, SLURM_MEM_PER_GPU, and SLURM_MEM_PER_NODE are mutually exclusive.
# SBATCH --mem-per-cpu=60GB
# SBATCH --cpus-per-task=16
CHUNK_SIZE=5 # this means 5 days of data in 1 job (estimated ~5 minutes for each day)
# use:
# sbatch -a 0-1632 parallel_batch_create.sh (all batches, also with missing variables: from 2020-07-01 we start missing NDVI)
# sbatch -a 0-1328 parallel_batch_create.sh (stop at 2020-06 where we have all the variables still)
# first run the following to get the maximum index
# python src/dataset_creation/parallel_batch.py get-max-index --chunk-size=$CHUNK_SIZE
echo "SLURM_ARRAY_TASK_ID=$SLURM_ARRAY_TASK_ID"
source venv/bin/activate
export PYTHONUNBUFFERED=1
# srun python src/dataset_creation/parallel_batch.py run-single $SLURM_ARRAY_TASK_ID --chunk-size=$CHUNK_SIZE
python src/dataset_creation/parallel_batch.py run-single $SLURM_ARRAY_TASK_ID --chunk-size=$CHUNK_SIZE
# memory profiling
# srun python -m memray run src/dataset_creation/parallel_batch.py run-single $SLURM_ARRAY_TASK_ID --chunk-size=$CHUNK_SIZE
# process profiling
# srun python -m cProfile -o stats-$SLURM_JOB_ID.prof src/dataset_creation/parallel_batch.py run-single $SLURM_ARRAY_TASK_ID --chunk-size=$CHUNK_SIZE
# then to visualize profiling
# memray -vvv flamegraph --temporal src/dataset_creation/memray-parallel_batch.py.1362661.bin
# snakeviz stats-$SLURM_JOB_ID.prof