-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathMakefile
More file actions
124 lines (90 loc) · 2.85 KB
/
Makefile
File metadata and controls
124 lines (90 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
SHELL := /bin/bash
PYTHON := python3
ROOT := $(shell pwd)
SCRIPTS := scripts
DATASET := dataset
INPUT := inputs
DATA := data/csv
TEST := test
P :=
.PHONY: help scripts \
submissions benchmark \
extract-dataset copy-dataset \
get-codenet extract-codenet verify-codenet \
clean-codenet clean-dataset clean \
venv clean-venv \
tmux attach detach
help:
@echo "Usage: make <recipe> [-p=number]"
@echo -e " submissions:\t\tGets submissions in for benchmark script."
@echo -e " benchmark:\t\t\tExecutes the benchmark script."
@echo -e " inputs:\t\t\tCreate inputs for submissions."
@echo -e " clean:\t\t\tDeletes benchmark files."
@echo ""
@echo -e " get-codenet:\t\tDownloads the compressed CodeNet dataset (7.8GB)."
@echo -e " extract-codenet:\t\tExtracts the CodeNet datatset (~75GB)."
@echo -e " verify-codenet:\t\tVerifies the CodeNet datatset."
@echo -e " clean-codenet:\t\tDeletes the CodeNet dataset."
@echo ""
@echo -e " extract-dataset:\t\tExtracts submissions from CodeNet dataset."
@echo -e " copy-dataset:\t\tCopies the CodeNet submissions into the local dataset."
@echo -e " clean-dataset:\t\tDeletes the local dataset."
scripts:
@chmod +x $(SCRIPTS)/*.sh
# Benchmarking recipies
submissions: scripts
./$(SCRIPTS)/get_submissions.sh $(DATASET) $(TEST) p$(P)
benchmark:
$(PYTHON) src/benchmark.py 2>&1 | tee benchmark_out.txt
v2:
$(PYTHON) src/v2.py 2>&1 | tee benchmark_out.txt
wip:
$(PYTHON) src/wip.py 2>&1 | tee benchmark_out.txt
inputs: scripts
./$(SCRIPTS)/generate_inputs.sh -d $(INPUT) -f $(FLAGS)
# Project_CodeNet dataset recipies
get-codenet:
wget https://codait-cos-dax.s3.us.cloud-object-storage.appdomain.cloud/dax-project-codenet/1.0.0/Project_CodeNet.tar.gz
extract-codenet:
gzip -t Project_CodeNet.tar.gz && tar -zxf Project_CodeNet.tar.gz
verify-codenet: scripts
./$(SCRIPTS)/dataset_verify.sh -d=$(ROOT)/Project_CodeNet
# Local dataset recipies
extract-dataset: scripts
ifndef P
$(error "Usage: make <cmd> P=<num>")
endif
@mkdir -p $(TEST)
./$(SCRIPTS)/project_codenet_submissions.sh -d=$(ROOT)/Project_CodeNet p$(P) $(FLAGS) > $(TEST)/p$(P).txt
copy-dataset: scripts
./$(SCRIPTS)/copy_submissions.sh
# Clean recipies
clean:
rm -rf benchmark_out.txt out.txt
clean-data:
rm -rf inputs/ results/
clean-codenet:
@echo "Confirm: remove Project_CodeNet dataset (~75 GB)?"
@rm -rI Project_CodeNet/ Project_CodeNet.tar.gz
clean-dataset:
@echo "Confirm: remove local dataset?"
@rm -rI dataset/
# Python Environment
.venv:
@python -m venv .venv
venv: .venv
@source .venv/bin/activate && pip install numpy openai
clean-venv:
rm -rf .venv/
# RAG recipies
embedding:
@source .venv/bin/activate && python3 src/embed_dataset.py
generate:
@source .venv/bin/activate && python3 src/generate.py
# tmux commands
tmux:
@tmux new -s CacheRAG
attach:
@tmux attach -t CacheRAG
detach:
@tmux detach