-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgroup_humann2_uniref_abundances_to_GO.sh
executable file
·181 lines (155 loc) · 4.96 KB
/
group_humann2_uniref_abundances_to_GO.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/env bash
function msg_error() {
printf "ERROR\n%15s %s\n\n" "$1"
}
function prettyOpt() {
printf "%15s %s\n" "$1" "$2"
}
function shortUsage() {
echo -e "Usage:\n\tgroup_humann2_uniref_abundances_to_GO.sh [OPTIONS] -i humann2_gene_families_abundance -m molecular_function_abundances -b biological_process_abundances -c cellular_component_abundances"
echo
echo "Required options:"
prettyOpt "-i" "Path to file with UniRef50 gene family abundance (HUMAnN2 output)"
prettyOpt "-m" "Path to file which will contain GO slim term abudances corresponding to molecular functions"
prettyOpt "-b" "Path to file which will contain GO slim term abudances corresponding to biological processes"
prettyOpt "-c" "Path to file which will contain GO slim term abudances corresponding to cellular components"
echo
echo "Other options:"
prettyOpt "-a" "Path to basic Gene Ontology file"
prettyOpt "-s" "Path to basic slim Gene Ontology file"
prettyOpt "-u" "Path to file with HUMAnN2 correspondance betwen UniRef50 and GO"
prettyOpt "-g" "Path to GoaTools scripts"
prettyOpt "-p" "Path to HUMAnN2 scripts"
prettyOpt "-h" "Print this help message"
echo
exit 0
}
MY_PATH=`dirname "$0"`
input_file=""
molecular_function_file=""
biological_processes_file=""
cellular_component_file=""
data_dir=$MY_PATH"/data/"
if [ ! -d $data_dir ]; then
mkdir $data_dir
fi
go_file=$data_dir"/go.obo"
slim_go_file=$data_dir"/goslim_metagenomics.obo"
humann2_uniref_go=$data_dir"/map_infogo1000_uniref50.txt"
goatools_path=`which map_to_slim.py | xargs -n1 dirname`
humann2_path=`which humann2 | xargs -n1 dirname`
# Manage arguments
while getopts ":i:m:b:c:a:s:u:g:p:h" opt; do
case $opt in
i)
input_file=$OPTARG >&2
;;
m)
molecular_function_file=$OPTARG >&2
;;
b)
biological_processes_file=$OPTARG >&2
;;
c)
cellular_component_file=$OPTARG >&2
;;
a)
go_file=$OPTARG >&2
;;
s)
slim_go_file=$OPTARG >&2
;;
u)
humann2_uniref_go=$OPTARG >&2
;;
g)
goatools_path=$OPTARG >&2
;;
p)
humann2_path=$OPTARG >&2
;;
h)
shortUsage ;
exit 1
;;
\?)
echo "Invalid option: -$OPTARG" >&2
exit 1
;;
:)
echo "Option -$OPTARG requires an argument." >&2
exit 1
;;
esac
done
if [ -z $input_file ]; then
msg_error "Missing argument: -i"
shortUsage;
fi
if [ -z $molecular_function_file ]; then
msg_error "Missing argument: -m"
shortUsage;
fi
if [ -z $biological_processes_file ]; then
msg_error "Missing argument: -b"
shortUsage;
fi
if [ -z $cellular_component_file ]; then
msg_error "Missing argument: -c"
shortUsage;
fi
$MY_PATH"/src/group_humann2_uniref_abundances_to_GO_download_datasets.sh" $go_file $slim_go_file $humann2_uniref_go
tmp_data_dir="tmp_data"
if [ ! -d $tmp_data_dir ]; then
mkdir $tmp_data_dir
fi
if [ -d .venv ]; then
source .venv/bin/activate
fi
echo "Format HUMAnN2 UniRef50 GO mapping"
echo "=================================="
python $MY_PATH"/src/format_humann2_uniref_go_mapping.py" \
--uniref_go_mapping_input $humann2_uniref_go \
--uniref_go_mapping_output $tmp_data_dir"/uniref_go_mapping_output.txt" \
--go_names $tmp_data_dir"/humann2_go_names.txt"
echo ""
echo "Map to slim GO"
echo "=============="
python $goatools_path"/map_to_slim.py" \
--association_file $tmp_data_dir"/humann2_go_names.txt" \
$go_file \
$slim_go_file \
> $tmp_data_dir"/humman2_go_slim.txt"
echo ""
echo "Format slim GO"
echo "=============="
python $MY_PATH"/src/format_go_correspondance.py" \
--go_correspondance_input $tmp_data_dir"/humman2_go_slim.txt" \
--go_correspondance_output $tmp_data_dir"/formatted_humman2_go_slim.txt"
echo ""
echo "Regroup UniRef50 to GO"
echo "======================"
$humann2_path/humann2_regroup_table \
-i $input_file \
-f "sum" \
-c $tmp_data_dir"/uniref_go_mapping_output.txt" \
-o $tmp_data_dir"/humann2_go_abundances.txt"
echo ""
echo "Regroup GO to slim GO"
echo "====================="
$humann2_path/humann2_regroup_table \
-i $tmp_data_dir"/humann2_go_abundances.txt" \
-f "sum" \
-c $tmp_data_dir"/formatted_humman2_go_slim.txt" \
-o $tmp_data_dir"/humann2_slim_go_abundances.txt"
echo ""
echo "Format slim GO abundance"
echo "========================"
python $MY_PATH"/src/format_humann2_output.py" \
--go_slim $slim_go_file \
--humann2_output $tmp_data_dir"/humann2_slim_go_abundances.txt" \
--molecular_function_output_file $molecular_function_file \
--biological_processes_output_file $biological_processes_file \
--cellular_component_output_file $cellular_component_file
echo ""
#rm -rf $tmp_data_dir