-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmap_RNAseq_data.sh
More file actions
executable file
·125 lines (108 loc) · 3.88 KB
/
map_RNAseq_data.sh
File metadata and controls
executable file
·125 lines (108 loc) · 3.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#!/bin/bash
#
# Automatic ChIP-seq mapping pipeline
#
set -e
set -x
# Bedgraph folder
bedgraphs="."
mapped="."
logs="."
# data folder
data="."
#TAIR10
baseGDIR="/home/simras/Work/histone_mark_data/genome"
genome="TAIR10.fa"
#Download_Folder
downloads="."
STAR="/home/simras/Work/STAR/bin/Linux_x86_64/STAR"
# path of MACS binary
MACS=/home/simras/Work/MACS-1.4.2/bin/macs14
bam=Aligned.out.bam
adapt="AGATCGGAAGAGC"
i=0
j=0
mapped_b=0
for SRR in $(cat SRRIDs.txt)
do
tmp=${SRR%.fastq.gz}
tmp1=${tmp%_*}
SRR_ID=${tmp1##*/}
# map data to assembly 1
dat1=$downloads/$SRR_ID".fastq.gz"
dat2=$downloads/$SRR_ID"_2.fastq.gz"
if [ -f $dat2 ]
then
PAIRED="T"
dat1=$downloads/$SRR_ID"_1.fastq.gz"
else
adapt=$(./pick_adapter.sh $dat1)
PAIRED="F"
dat2=""
fi
new_bam=$mapped/$SRR_ID\_$genome.bam
sorted_bam=$mapped/$SRR_ID\_$genome"_"sorted.bam
if [ ! -f $sorted_bam ]
then
mapped_b=1
if [ $PAIRED == "T" ]
then
ad1=$(./pick_adapter.sh $data/$SRR_ID\_1.fastq.gz)
ad2=$(./pick_adapter.sh $data/$SRR_ID\_2.fastq.gz)
datno1=$downloads/$SRR_ID"_noadt_1.fastq.gz"
datno2=$downloads/$SRR_ID"_noadt_2.fastq.gz"
#cutadapt -a $ad1 -A $ad2 -o $datno1 -p $datno2 $dat1 $dat2
if [ ! -f $datno2 ]
then
cutadapt -a $ad2 -j 4 -o $datno2 $dat2
fi
if [ ! -f $datno1 ]
then
cutadapt -a $ad1 -j 4 -o $datno1 $dat1
fi
$STAR --genomeDir $baseGDIR --readFilesIn $datno1 $datno2 --runThreadN 8 --outSAMtype BAM Unsorted --readFilesCommand "zcat" --seedSearchStartLmax 30 --alignEndsType EndToEnd --alignIntronMax 1 --outSAMmultNmax 1
else
$STAR --genomeDir $baseGDIR --readFilesIn $dat1 --runThreadN 8 --outSAMtype BAM Unsorted --clip3pAdapterSeq $adapt --readFilesCommand "zcat" --seedSearchStartLmax 30 --alignEndsType EndToEnd --alignIntronMax 1 --outSAMmultNmax 1
fi
if [ $? -eq 0 ]
then
new_bam=$mapped/$SRR_ID\_$genome.bam
mv $bam $new_bam
mv Log.final.out $logs/$SRR_ID\_Log.final_2.out
mv Log.out $logs/$SRR_ID\_Log_2.out
mv Log.progress.out $logs/$SRR_ID\_Log.progress_2.out
samtools sort $new_bam > $sorted_bam
rm $new_bam
if [ $? -ne 0 ]
then
echo "samtools sort error" $SRR_ID > errors/$SRR_ID\_error.report
fi
else
echo "Mapping error " $SRR_ID > errors/$SRR_ID\_error.report
fi
fi
if [ -f $sorted_bam ] && [ ! -f $bedgraphs/$SRR_ID"_treat_afterfiting_all.bdg.gz" ]
then
#cd mapped
if [ $PAIRED == "T" ]
then
touch $bedgraphs/$SRR_ID"_treat_afterfiting_all.bdg.gz"
sorted_bam_PE=$mapped/$SRR_ID\_$genome"_"sorted_PE.bam
# histone mark data sets
#samtools view -b -f 0x2 $sorted_bam > $sorted_bam_PE
#$MACS -t $sorted_bam -f BAM -w -n $SRR_ID -S -g 1.35e+08 -m 3,50 --bdg --nomodel
bedtools genomecov -trackline -bg -ibam $sorted_bam -strand +|gzip -9 > $bedgraphs/$SRR_ID"_+_treat_afterfiting_all.bdg.gz"
bedtools genomecov -trackline -bg -ibam $sorted_bam -strand -|gzip -9 > $bedgraphs/$SRR_ID"_-_treat_afterfiting_all.bdg.gz"
bedtools genomecov -trackline -bg -ibam $sorted_bam -strand +|gzip -9 > $bedgraphs/$SRR_ID"_-+_treat_afterfiting_all.bdg.gz"
bedtools genomecov -trackline -bg -ibam $sorted_bam -strand -|gzip -9 >> $bedgraphs/$SRR_ID"_-+_treat_afterfiting_all.bdg.gz"
else
touch $bedgraphs/$SRR_ID"_treat_afterfiting_all.bdg.gz"
#$MACS -t $sorted_bam -f BAM -w -n $SRR_ID -S -g 1.35e+08 -m 3,50 --bdg
bedtools genomecov -trackline -bg -ibam $sorted_bam -strand +|gzip -9 > $bedgraphs/$SRR_ID"_+_treat_afterfiting_all.bdg.gz"
bedtools genomecov -trackline -bg -ibam $sorted_bam -strand -|gzip -9 > $bedgraphs/$SRR_ID"_-_treat_afterfiting_all.bdg.gz"
fi
# cleanup
mv $SRR_ID\_MACS_bedGraph/treat/$SRR_ID"_treat_afterfiting_all.bdg.gz" bedgraphs/
rm -r $SRR_ID"_peaks"* $SRR_ID"_summits.bed" $SRR_ID"_model.r" $SRR_ID"_MACS_bedGraph"
fi
done