@@ -4,68 +4,63 @@ biosample=$1
4
4
linkType=$2
5
5
links=$3
6
6
name=$4
7
+ blackList=$5
7
8
8
9
genome=hg19
9
10
version=V4
10
11
scriptDir=~ /Projects/Target-Gene-Prediction/Scripts/Generate-Benchmark
11
12
masterDir=~ /Lab/ENCODE/Encyclopedia/$version /Registry/$version -$genome /
12
13
masterList=$masterDir /Cell-Type-Specific/Master-Cell-List.txt
13
14
14
- if [[ $genome == " mm10" ]]
15
- then
16
- tss=~ /Lab/Reference/Mouse/GencodeM4/TSS.Filtered.4K.bed
17
- elif [[ $genome == " hg38" ]]
18
- then
19
- tss=~ /Lab/Reference/Human/$genome /GENCODE24/TSS.Filtered.4K.bed
20
- elif [[ $genome == " hg19" ]]
21
- then
22
- tss=~ /Lab/Reference/Human/$genome /Gencode19/TSS.Filtered.4K.bed
23
- fi
15
+ prox=~ /Lab/Reference/Human/$genome /Gencode19/TSS.2019.4K.bed
16
+ tss=~ /Lab/Reference/Human/$genome /Gencode19/TSS.2019.bed
24
17
25
18
file=$( awk ' {if ($9 == "' $biosample ' ") print $2}' $masterList )
26
19
27
20
bedtools intersect -v -a $masterDir /Cell-Type-Specific/Five-Group/$file * .bed \
28
- -b $tss | grep " Enhancer" > enhancers.bed
21
+ -b $prox | grep " Enhancer" > enhancers.bed
29
22
30
23
if [ $linkType == " ChIA-PET" ]
31
24
then
32
25
awk ' {if ($NF >= 4) print $0}' $links > links
33
- python $scriptDir /process.chiapet.py links enhancers.bed $tss \
34
- $name -Blacklist.txt > $name -Links.txt
26
+ python $scriptDir /process.chiapet.py links enhancers.bed $prox \
27
+ $name -Blacklist.txt $blackList > $name -Links.txt
35
28
36
29
elif [ $linkType == " Hi-C" ]
37
30
then
38
31
awk ' {if (NR != 1) print "chr"$1 "\t" $2 "\t" $3 "\t" "chr"$4 \
39
32
"\t" $5 "\t" $6}' $links > links
40
- python $scriptDir /process.chiapet.py links enhancers.bed $tss \
41
- $name -Blacklist.txt > $name -Links.txt
33
+ python $scriptDir /process.chiapet.py links enhancers.bed $prox \
34
+ $name -Blacklist.txt $blackList > $name -Links.txt
42
35
rm links
43
36
44
37
elif [ $linkType == " CHi-C" ]
45
38
then
46
39
awk ' {if (NR != 1 && $NF > 10) print $1 "\t" $2 "\t" $3 "\t" $7 \
47
40
"\t" $8 "\t" $9}' $links > links
48
- python $scriptDir /process.chiapet.py links enhancers.bed $tss \
49
- $name -Blacklist.txt > $name -Links.txt
41
+ python $scriptDir /process.chiapet.py links enhancers.bed $prox \
42
+ $name -Blacklist.txt $blackList > $name -Links.txt
50
43
rm links
51
44
52
45
elif [ $linkType == " eQTL" ]
53
46
then
54
- python $scriptDir /process.eqtl.py $links enhancers.bed $tss \
47
+ python $scriptDir /process.eqtl.py $links enhancers.bed $prox \
55
48
> $name -Links.txt
56
49
57
50
elif [ $linkType == " CRISPR" ]
58
51
then
59
- python $scriptDir /process.crispr.py $links enhancers.bed $tss \
52
+ python $scriptDir /process.crispr.py $links enhancers.bed $prox \
60
53
> $name -Links.txt
61
54
tssK562=~ /Lab/Target-Gene/Benchmark/Raw/CRISPR/Shendure/K562-V19-TSS.bed
62
55
fi
63
56
64
57
if [ $linkType == " CRISPR" ]
65
58
then
66
- cutoff=1000000
67
- python $scriptDir /calculate.distance.py $tss enhancers.bed \
68
- $name -Links.txt $name -Distance.txt
59
+ # cutoff=1000000
60
+ # python $scriptDir/calculate.distance.py $tss enhancers.bed \
61
+ # $name-Links.txt $name-Distance.txt
62
+ cutoff=$( python $scriptDir /calculate.distance.py $tss enhancers.bed \
63
+ $name -Links.txt $name -Distance.txt)
69
64
else
70
65
cutoff=$( python $scriptDir /calculate.distance.py $tss enhancers.bed \
71
66
$name -Links.txt $name -Distance.txt)
@@ -76,19 +71,43 @@ awk '{if ($3 <= '$cutoff') print $0}' $name-Distance.txt > \
76
71
77
72
if [ $linkType == " CRISPR" ]
78
73
then
74
+ # python $scriptDir/create.experiment.sets.py $name-Distance.cutoff.txt \
75
+ # $tssK562 enhancers.bed output $cutoff $name $linkType $blackList
79
76
python $scriptDir /create.experiment.sets.py $name -Distance.cutoff.txt \
80
- $tssK562 enhancers.bed output $cutoff $name $linkType
77
+ $tss enhancers.bed output $cutoff $name $linkType $blackList
81
78
else
82
79
python $scriptDir /create.experiment.sets.py $name -Distance.cutoff.txt \
83
- $tss enhancers.bed output $cutoff $name $linkType
80
+ $tss enhancers.bed output $cutoff $name $linkType $blackList
84
81
fi
85
82
86
83
awk ' {print $1}' positive | sort -u > ccre-list.txt
87
84
awk ' FNR==NR {x[$1];next} ($4 in x)' ccre-list.txt enhancers.bed > tmp.bed
88
85
awk ' {print $1 "\t" $2 "\t" 1}' positive > total
89
86
awk ' {print $1 "\t" $2 "\t" 0}' negative >> total
90
87
91
- python $scriptDir /assign.groups.py tmp.bed total > $name -Benchmark.v1.txt
88
+ p=$( wc -l positive | awk ' {print $1}' )
89
+ n=$( wc -l negative | awk ' {print $1}' )
90
+ echo -e " \t" $p " \t" $n " \t" $cutoff
91
+
92
+ if [ $blackList == " yes" ]
93
+ then
94
+ v1=" v1"
95
+ v2=" v2"
96
+ # cp
97
+ else
98
+ v1=" v3"
99
+ v2=" v4"
100
+ fi
101
+
102
+ cp $name -Distance.txt $name -Distance.$v1 .txt
103
+
104
+ python $scriptDir /assign.groups.py tmp.bed total > $name -Benchmark.$v1 .txt
105
+
106
+ python $scriptDir /select.ratio.pairs.py $name -Benchmark.$v1 .txt > new-total
107
+ awk ' {if ($3 == 1) p+=1; else n+=1}END{print "\t" p "\t" n "\t" "' $cutoff ' "}' new-total
108
+
109
+ cat skip.txt
110
+ python $scriptDir /assign.groups.py tmp.bed new-total > $name -Benchmark.$v2 .txt
92
111
93
- rm -f bed1 bed2 out1 out2 positive negative range output
112
+ rm -f bed1 bed2 out1 out2 positive negative range output new-total skip.txt
94
113
rm -f ccre-list.txt enhancers.bed intersection2.bed total tmp.bed
0 commit comments