bfm-data/parallel_batch_create.sh at main · BioDT/bfm-data · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/bin/bash
#SBATCH --job-name=batch_create
# takes around 5 minutes for each day of input data. So with CHUNK_SIZE=5 (5 days) it should take around 25 minutes
#SBATCH --time=1:00:00
#SBATCH --ntasks=1

# CPU option (works with reduced batch size, the cheapest)
#SBATCH --partition=rome
#SBATCH --cpus-per-task=16


# GPU option
# SBATCH --partition=gpu_h100
# SBATCH --gpus-per-node=1
# #SBATCH --mem=180GB # better to use mem-per-cpu: # SLURM_MEM_PER_CPU, SLURM_MEM_PER_GPU, and SLURM_MEM_PER_NODE are mutually exclusive.
# SBATCH --mem-per-cpu=11520MB
# SBATCH --cpus-per-task=16

# Himem option
# SBATCH --partition=himem_4tb
# SBATCH --mem=480G
# SBATCH --mem-per-cpu=30GB
# SBATCH --cpus-per-task=16

# Himem option
# SBATCH --partition=himem_8tb
# #SBATCH --mem=960G # better to use mem-per-cpu: # SLURM_MEM_PER_CPU, SLURM_MEM_PER_GPU, and SLURM_MEM_PER_NODE are mutually exclusive.
# SBATCH --mem-per-cpu=60GB
# SBATCH --cpus-per-task=16


CHUNK_SIZE=5 # this means 5 days of data in 1 job (estimated ~5 minutes for each day)

# use:
# sbatch -a 0-1632 parallel_batch_create.sh (all batches, also with missing variables: from 2020-07-01 we start missing NDVI)
# sbatch -a 0-1328 parallel_batch_create.sh (stop at 2020-06 where we have all the variables still)
# first run the following to get the maximum index
# python src/dataset_creation/parallel_batch.py get-max-index --chunk-size=$CHUNK_SIZE

echo "SLURM_ARRAY_TASK_ID=$SLURM_ARRAY_TASK_ID"

source venv/bin/activate

export PYTHONUNBUFFERED=1
# srun python src/dataset_creation/parallel_batch.py run-single $SLURM_ARRAY_TASK_ID --chunk-size=$CHUNK_SIZE
python src/dataset_creation/parallel_batch.py run-single $SLURM_ARRAY_TASK_ID --chunk-size=$CHUNK_SIZE
# memory profiling
# srun python -m memray run src/dataset_creation/parallel_batch.py run-single $SLURM_ARRAY_TASK_ID --chunk-size=$CHUNK_SIZE
# process profiling
# srun python -m cProfile -o stats-$SLURM_JOB_ID.prof src/dataset_creation/parallel_batch.py run-single $SLURM_ARRAY_TASK_ID --chunk-size=$CHUNK_SIZE

# then to visualize profiling
# memray -vvv flamegraph --temporal src/dataset_creation/memray-parallel_batch.py.1362661.bin
# snakeviz stats-$SLURM_JOB_ID.prof