Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions instructions/installation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ conda create -n SDR python=3.7 --yes
source ~/anaconda3/etc/profile.d/conda.sh
conda activate SDR

conda install -c pytorch pytorch==1.7.0 torchvision cudatoolkit=11.0 --yes
pip install -U cython transformers==3.1.0 nltk pytorch-metric-learning joblib pytorch-lightning==1.1.8 pandas
conda install -c pytorch pytorch==1.7.0 torchvision cudatoolkit=11.0 faiss-gpu --yes
pip install -U cython transformers==3.1.0 nltk pytorch-metric-learning joblib pytorch-lightning numpy pandas

Binary file added last_numSamples_90
Binary file not shown.
13 changes: 10 additions & 3 deletions models/SDR/SDR.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ def test_epoch_end(self, outputs, recos_path=None):
section_sentences_features = [out[0] for out in outputs]
popular_titles, idxs, gt_path = get_gt_seeds_titles(titles, self.hparams.dataset_name)


#print(np.array(outputs).shape, "\n", outputs[0])

self.hparams.test_sample_size = (
self.hparams.test_sample_size if self.hparams.test_sample_size > 0 else len(popular_titles)
)
Expand Down Expand Up @@ -195,7 +198,7 @@ def add_model_specific_args(parent_parser, task_name, dataset_name, is_lowest_le
parser.add_argument("--metric_loss_func", type=str, default="ContrastiveLoss") # TripletMarginLoss #CosineLoss
parser.add_argument("--sim_loss_lambda", type=float, default=0.1)
parser.add_argument("--limit_tokens", type=int, default=64)
parser.add_argument("--limit_val_indices_batches", type=int, default=500)
#parser.add_argument("--limit_val_indices_batches", type=int, default=500)
parser.add_argument("--metric_for_similarity", type=str, choices=["cosine", "norm_euc"], default="cosine")

parser.set_defaults(
Expand Down Expand Up @@ -229,8 +232,8 @@ def prepare_data(self):
block_size=block_size,
mode="val",
)
self.val_dataset.indices_map = self.val_dataset.indices_map[: self.hparams.limit_val_indices_batches]
self.val_dataset.labels = self.val_dataset.labels[: self.hparams.limit_val_indices_batches]
#self.val_dataset.indices_map = self.val_dataset.indices_map[: self.hparams.limit_val_indices_batches]
#self.val_dataset.labels = self.val_dataset.labels[: self.hparams.limit_val_indices_batches]

self.test_dataset = WikipediaTextDatasetParagraphsSentencesTest(
tokenizer=self.tokenizer,
Expand All @@ -239,4 +242,8 @@ def prepare_data(self):
block_size=block_size,
mode="test",
)

#self.test_dataset.indices_map = self.test_dataset.indices_map[: self.hparams.limit_test_indices_batches]
#self.test_dataset.labels = self.test_dataset.labels[: self.hparams.limit_test_indices_batches]
#self.test_dataset.examples = self.test_dataset.examples[: self.hparams.limit_test_indices_batches]

2 changes: 1 addition & 1 deletion models/doc_similarity_pl_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(
self, hparams,
):
super(DocEmbeddingTemplate, self).__init__()
self.hparams = hparams
self.save_hyperparameters(hparams)
self.hparams.hparams_dir = extract_model_path_for_hyperparams(self.hparams.default_root_dir, self)
self.losses = {}
self.tracks = {}
Expand Down
2 changes: 1 addition & 1 deletion models/reco/hierarchical_reco.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def vectorize_reco_hierarchical(all_features, titles,gt_path, output_path=""):
sections_per_article = np.array([len(article) for article in all_features])
sections_per_article_cumsum = np.array([0,] + [len(article) for article in all_features]).cumsum()
features_per_section = [sec for article in all_features for sec in article]
features_per_section_torch = [torch.from_numpy(feat) for feat in features_per_section]
features_per_section_torch = [torch.from_numpy(feat).unsqueeze(0) for feat in features_per_section]
features_per_section_padded = torch.nn.utils.rnn.pad_sequence(
features_per_section_torch, batch_first=True, padding_value=torch.tensor(float("nan"))
).cuda()
Expand Down
23 changes: 15 additions & 8 deletions sdr_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,15 @@ def main_train(model_class_pointer, hparams,parser):
trainer = pytorch_lightning.Trainer(
num_sanity_val_steps=2,
gradient_clip_val=hparams.max_grad_norm,
callbacks=[RunValidationOnStart()],
checkpoint_callback=ModelCheckpoint(
callbacks=[ModelCheckpoint(
save_top_k=3,
save_last=True,
mode="min" if "acc" not in hparams.metric_to_track else "max",
monitor=hparams.metric_to_track,
filepath=os.path.join(model.hparams.hparams_dir, "{epoch}"),
filename=os.path.join(model.hparams.hparams_dir, "{epoch}"),
verbose=True,
),
)],
checkpoint_callback=True,
logger=logger,
max_epochs=hparams.max_epochs,
gpus=hparams.gpus,
Expand All @@ -66,14 +66,21 @@ def main_train(model_class_pointer, hparams,parser):
accumulate_grad_batches=hparams.accumulate_grad_batches,
reload_dataloaders_every_epoch=True,
# load
resume_from_checkpoint=hparams.resume_from_checkpoint,
#resume_from_checkpoint=hparams.resume_from_checkpoint,
)
if(not hparams.test_only):
if(hparams.resume_from_checkpoint is not None):
trainer.fit(model, ckpt_path=hparams.resume_from_checkpoint)
else:
trainer.fit(model)
trainer.test(model)
else:
if(hparams.resume_from_checkpoint is not None):
model = model.load_from_checkpoint(hparams.resume_from_checkpoint,hparams=hparams, map_location=torch.device(f"cpu"))
trainer.test(model)
if(hparams.resume_from_checkpoint is not None):
trainer.test(model, ckpt_path=hparams.resume_from_checkpoint)
else:
trainer.test(model)
#model = model.load_from_checkpoint(hparams.resume_from_checkpoint,hparams=hparams, map_location=torch.device(f"cpu"))



if __name__ == "__main__":
Expand Down
12 changes: 12 additions & 0 deletions utils/argparse_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,18 @@ def init_parse_argparse_default_params(parser, dataset_name=None, arch=None):
parser.add_argument(
"--limit_train_batches", default=10000, type=int,
)

parser.add_argument(
"--limit_val_batches", default=10000, type=int,
)

parser.add_argument(
"--limit_test_batches", default=10000, type=int,
)

#parser.add_argument(
# "--limit_test_indices_batches", default=10000, type=int,
#)

parser.add_argument(
"--train_log_every_n_steps", default=50, type=int,
Expand Down