textreact/scripts/train_RCR.sh at main · thomas0809/textreact · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
#!/bin/bash

NUM_GPUS_PER_NODE=4
BATCH_SIZE=128
ACCUM_STEP=1

SAVE_PATH=output/RCR_textreact
NN_PATH=data/Tevatron_output/RCR/

mkdir -p ${SAVE_PATH}

NCCL_P2P_DISABLE=1 python main.py \
    --task condition \
    --encoder allenai/scibert_scivocab_uncased \
    --decoder textreact/configs/bert_l6.json \
    --encoder_pretrained \
    --data_path data/RCR/ \
    --train_file train.csv \
    --valid_file val.csv \
    --test_file test.csv \
    --vocab_file textreact/vocab/vocab_condition.txt \
    --corpus_file data/USPTO_rxn_corpus.csv \
    --nn_path ${NN_PATH} \
    --train_nn_file train_rank.json \
    --valid_nn_file val_rank.json \
    --test_nn_file test_rank.json \
    --num_neighbors 3 \
    --use_gold_neighbor \
    --save_path ${SAVE_PATH} \
    --max_length 512 \
    --shuffle_smiles \
    --mlm --mlm_ratio 0.15 --mlm_layer mlp --mlm_lambda 0.1 \
    --lr 1e-4 \
    --batch_size $((BATCH_SIZE / NUM_GPUS_PER_NODE / ACCUM_STEP)) \
    --gradient_accumulation_steps ${ACCUM_STEP} \
    --epochs 20 \
    --warmup 0.02 \
    --do_train --do_valid --do_test \
    --num_beams 15 \
    --precision 16-mixed \
    --gpus ${NUM_GPUS_PER_NODE}