-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata.sh
More file actions
65 lines (37 loc) · 3.01 KB
/
data.sh
File metadata and controls
65 lines (37 loc) · 3.01 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#!/bin/bash
#https://console.cloud.google.com/storage/browser/_details/genomics-public-data/resources/broad/hg38/v0/hapmap_3.3.hg38.vcf.gz.tbi;tab=live_object
#https://console.cloud.google.com/storage/browser/_details/gcp-public-data--broad-references/hg38/v0/1000G_omni2.5.hg38.vcf.gz.tbi;tab=live_object
#Make it executable chmod +x data.sh
#to run the script ./data.sh
# Define the data directory
data_dir="data"
# Download the Homo_sapiens_assembly38.fasta file using wget
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.idx
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/1000G_phase1.snps.high_confidence.hg38.vcf.gz.tbi
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/1000G_omni2.5.hg38.vcf.gz
wget -P "$data_dir" https://storage.googleapis.com/gcp-public-data--broad-references/hg38/v0/1000G_omni2.5.hg38.vcf.gz.tbi
wget -P "$data_dir" https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/hapmap_3.3.hg38.vcf.gz
wget -P "$data_dir" https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/hapmap_3.3.hg38.vcf.gz.tbi
wget -P "$data_dir" ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
wget -P "$data_dir" ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/hg38/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi
wget -P "$data_dir" https://storage.googleapis.com/genomics-public-data/resources/broad/hg38/v0/wgs_calling_regions.hg38.interval_list
# Define the directory name
VEP_DATA_DIR="vep-data"
# Create the directory
mkdir -p $VEP_DATA_DIR
# Change to the directory
cd $VEP_DATA_DIR
# Download the VEP cache file
curl -O ftp://ftp.ensembl.org/pub/release-110/variation/indexed_vep_cache/homo_sapiens_vep_110_GRCh38.tar.gz
# Extract the tar.gz file
tar xzf homo_sapiens_vep_110_GRCh38.tar.gz
# Optionally, remove the tar.gz file after extraction
rm homo_sapiens_vep_110_GRCh38.tar.gz
echo "VEP cache downloaded and extracted to $VEP_DATA_DIR"