-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsetup_data.sh
More file actions
executable file
·144 lines (130 loc) · 4.87 KB
/
setup_data.sh
File metadata and controls
executable file
·144 lines (130 loc) · 4.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#!/bin/bash
# Setup script for PaperCode data files
# This script helps set up the large data files needed for the project
echo "========================================="
echo "PaperCode Data Setup Script"
echo "========================================="
# Find a suitable python command
if command -v python3 &> /dev/null; then
PYTHON_CMD="python3"
elif command -v python &> /dev/null; then
PYTHON_CMD="python"
else
PYTHON_CMD=""
fi
# Create necessary directories
echo "Creating directories..."
mkdir -p data
mkdir -p backend
# Function to check if file exists and show size
check_file() {
if [ -f "$1" ]; then
size=$(du -h "$1" | cut -f1)
echo "✓ $1 exists (size: $size)"
return 0
else
echo "✗ $1 not found"
return 1
fi
}
echo ""
echo "Checking for required data files..."
echo "-----------------------------------------"
# Check for data files
missing_files=0
check_file "data/papers-with-abstracts.json" || missing_files=$((missing_files + 1))
check_file "data/evaluation-tables.json" || missing_files=$((missing_files + 1))
check_file "data/links-between-papers-and-code.json" || missing_files=$((missing_files + 1))
echo ""
echo "Checking for database files..."
echo "-----------------------------------------"
# Check for database files
check_file "backend/paperswithcode.db"
check_file "backend/paperswithcode_full.db"
check_file "backend/pwc.db"
if [ $missing_files -gt 0 ]; then
echo ""
echo "========================================="
echo "⚠️ Missing $missing_files data file(s)"
echo "========================================="
echo ""
echo "Please download the missing files from:"
echo "https://paperswithcode.com/datasets"
echo ""
echo "Or use one of these methods:"
echo ""
echo "1. Download from cloud storage (if provided by team)"
echo "2. Use Git LFS (if configured in the repository)"
echo "3. Download from release assets (if available)"
echo ""
echo "After downloading, place the files in the 'data' directory"
if [ -n "$PYTHON_CMD" ]; then
echo "Then run: cd backend && $PYTHON_CMD create_full_db.py"
else
echo "Then install Python and run: cd backend && python create_full_db.py"
fi
else
echo ""
echo "========================================="
echo "✅ All data files are present!"
echo "========================================="
# Ask if user wants to rebuild database
read -p "Do you want to rebuild the database? (y/n) " -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]; then
if [ -z "$PYTHON_CMD" ]; then
echo "Error: python or python3 not found. Cannot rebuild database."
else
echo "Rebuilding database using $PYTHON_CMD..."
# ---
# Option 1: (Currently Disabled) Build JSON database for simple_server.py
# To enable, uncomment the following block and comment out Option 2.
# ---
# echo "Building JSON database for simple_server..."
# cd backend
# if [ -f "create_full_db.py" ]; then
# $PYTHON_CMD create_full_db.py
# cd ..
# echo "Database rebuild for simple_server complete!"
# else
# echo "Warning: Database creation script for simple_server not found"
# cd ..
# fi
# ---
# Option 2: (Currently Active) Build SQLite database for the full_api_server.
# This is the recommended option for the full application.
# ---
echo "Building SQLite database for full_api_server..."
echo "Note: This will automatically create search performance indexes"
cd backend
if [ -f "scripts/load_full_data.py" ]; then
cd scripts
$PYTHON_CMD load_full_data.py
cd ../..
echo "Database rebuild for full API server complete!"
echo "🚀 Search performance indexes have been created automatically!"
else
echo "Warning: 'scripts/load_full_data.py' not found."
echo "Cannot build the database for the full API server."
cd ..
fi
fi
fi
fi
echo ""
echo "Setup check complete!"
# echo ""
# echo "========================================="
# echo "Generating paper thumbnails..."
# echo "========================================="
# if [ -f "backend/scripts/PDF2Image.py" ]; then
# if [ -z "$PYTHON_CMD" ]; then
# echo "Error: python or python3 not found. Cannot generate thumbnails."
# else
# echo "Generating thumbnails using $PYTHON_CMD..."
# (cd backend/scripts && $PYTHON_CMD PDF2Image.py)
# echo "Thumbnail generation complete!"
# fi
# else
# echo "Warning: PDF2Image.py script not found, skipping thumbnail generation."
# fi