-
Notifications
You must be signed in to change notification settings - Fork 2
/
edam.sh
197 lines (186 loc) · 8.5 KB
/
edam.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
#!/bin/sh
# This script is written by Andreas Tille (https://github.com/tillea)
# and copied from https://github.com/bio-tools/biotoolsConnect
# Create a dataset for EDAM
#
# FIXME:
# * "main license" (Files: *)
# * Upstream-Contact
usage() {
cat >/dev/stderr <<EOT
Usage: $0 [option]
-h print this help screen
-j JSON export
-m forcing public mirror over local one
Description:
Query local or public UDD for information that could be useful for EDAM.
EOT
}
if ! which psql >/dev/null ; then
echo "E: postgresql client 'psql' not available"
exit
fi
PORT="-p 5452"
SERVICE="service=udd"
#if there is a local UDD clone just use this
if psql $PORT -l 2>/dev/null | grep -qw udd ; then
SERVICE=udd
fi
# Check UDD connection
if ! psql $PORT $SERVICE -c "" 2>/dev/null ; then
echo "I: No local UDD found, use public mirror."
PORT="--port=5432"
export PGPASSWORD="public-udd-mirror"
SERVICE="--host=public-udd-mirror.xvm.mit.edu --username=public-udd-mirror udd"
fi
EXT=txt
while getopts "hjm" o; do
case "${o}" in
h)
usage
exit 0
;;
j)
JSONBEGIN="SELECT array_to_json(array_agg(t)) FROM ("
JSONEND=") t"
EXT=json
OUTPUTFORMAT=--tuples-only
;;
m)
PORT="--port=5432"
export PGPASSWORD="public-udd-mirror"
SERVICE="--host=public-udd-mirror.xvm.mit.edu --username=public-udd-mirror udd"
;;
*)
usage
exit 1
;;
esac
done
shift $((OPTIND-1))
team="'[email protected]'"
psql $PORT $OUTPUTFORMAT $SERVICE >edam.$EXT <<EOT
$JSONBEGIN
-- If you want to make the output at source level uncomment this
-- SELECT source, array_agg(package) as packages, distribution, release, component, version, homepage FROM
-- (
SELECT DISTINCT
p.package, p.distribution, p.release, p.component,
regexp_replace(regexp_replace(regexp_replace(regexp_replace(p.version, '-[.\d]+$', ''), '\+dfsg.*$', '') , '\+lgpl.*$', ''), '-\d*biolinux\d*$', '') AS version, -- strip Debian revision and other extensions to upstream version
p.source, p.homepage,
en.description AS description, en.long_description AS long_description,
interface.tags AS interface, biology.tags AS biology, field.tags AS fields, use.tags AS use,
pop.vote || ' / ' || pop.recent || ' / ' || pop.insts as popcon,
bibdoi.value as doi,
edam.topics as topics,
edam.scopes as edam_scopes,
biotools.entry as "bio.tools",
omictools.entry as "OMICtools",
seqwiki as "SEQwiki",
scicrunch as "SciCrunch",
rrid as "RRID"
FROM (
SELECT * FROM (
SELECT DISTINCT
package, distribution, release, component, strip_binary_upload(version) AS version,
source, homepage, description, description_md5
FROM packages
WHERE package IN
(SELECT DISTINCT package FROM blends_dependencies WHERE blend = 'debian-med' AND task IN ('bio', 'bio-dev'))
UNION
SELECT DISTINCT
package, 'prospective' AS distribution, 'vcs' AS release, component, strip_binary_upload(chlog_version) AS version,
source, homepage, description, description_md5
FROM blends_prospectivepackages
WHERE package IN
(SELECT DISTINCT package FROM blends_dependencies WHERE blend = 'debian-med') -- AND task IN ('bio', 'bio-dev', 'imaging', 'imaging-dev'))
) tmp
) p
LEFT OUTER JOIN (
SELECT package, description, long_description, release, description_md5 FROM descriptions WHERE language = 'en'
AND package IN
(SELECT DISTINCT package FROM blends_dependencies WHERE blend = 'debian-med' AND task IN ('bio', 'bio-dev'))
UNION
SELECT package, description, long_description, 'vcs' AS release, description_md5 FROM blends_prospectivepackages
) en ON en.package = p.package AND (en.release = p.release OR p.release = 'vcs') AND (en.description_md5 = p.description_md5 OR en.description_md5 IS NULL)
JOIN (
-- select packages which have versions outside experimental
SELECT px.package, strip_binary_upload(px.version) AS version,
(SELECT release FROM ( SELECT release, sort FROM releases
UNION
SELECT 'vcs' AS release, 10000 AS sort
) reltmp WHERE sort = MAX(rx.sort)) AS release
FROM (
-- select highest version which is not in experimental - except if a package resides in experimental only
SELECT pex.package, CASE WHEN pnoex.version IS NOT NULL THEN pnoex.version ELSE pex.version END AS version FROM
(SELECT package, MAX(version) AS version FROM packages
WHERE package IN
(SELECT DISTINCT package FROM blends_dependencies WHERE blend = 'debian-med' AND task IN ('bio', 'bio-dev'))
GROUP BY package
) pex
LEFT OUTER JOIN
(SELECT package, MAX(version) AS version FROM packages
WHERE package IN
(SELECT DISTINCT package FROM blends_dependencies WHERE blend = 'debian-med' AND task IN ('bio', 'bio-dev'))
AND release != 'experimental'
GROUP BY package
) pnoex ON pex.package = pnoex.package
UNION
SELECT DISTINCT package, strip_binary_upload(chlog_version) AS version FROM blends_prospectivepackages
) px
JOIN (
-- select the release in which this version is available
SELECT DISTINCT package, version, release FROM packages
WHERE package IN
(SELECT DISTINCT package FROM blends_dependencies WHERE blend = 'debian-med' AND task IN ('bio', 'bio-dev'))
UNION
SELECT DISTINCT package, chlog_version AS version, 'vcs' AS release FROM blends_prospectivepackages
WHERE package IN
(SELECT DISTINCT package FROM blends_dependencies WHERE blend = 'debian-med' AND task IN ('bio', 'bio-dev'))
) py ON px.package = py.package AND px.version = py.version
JOIN ( SELECT release, sort FROM releases
UNION
SELECT 'vcs' AS release, 10000 AS sort
) rx ON py.release = rx.release
GROUP BY px.package, px.version
) pvar ON pvar.package = p.package AND pvar.version = p.version AND pvar.release = p.release
LEFT OUTER JOIN (
SELECT package, array_agg(regexp_replace(tag, 'interface::', '')) AS tags
FROM debtags
WHERE tag LIKE 'interface::%'
GROUP BY package
) interface ON interface.package = p.package
LEFT OUTER JOIN (
SELECT package, array_agg(regexp_replace(tag, 'biology::', '')) AS tags
FROM debtags
WHERE tag LIKE 'biology::%'
GROUP BY package
) biology ON biology.package = p.package
LEFT OUTER JOIN (
SELECT package, array_agg(regexp_replace(tag, 'field::', '')) AS tags
FROM debtags
WHERE tag LIKE 'field::%'
GROUP BY package
) field ON field.package = p.package
LEFT OUTER JOIN (
SELECT package, array_agg(regexp_replace(tag, 'use::', '')) AS tags
FROM debtags
WHERE tag LIKE 'use::%'
GROUP BY package
) use ON use.package = p.package
LEFT OUTER JOIN bibref bibdoi ON p.source = bibdoi.source AND bibdoi.rank = 0 AND bibdoi.key = 'doi' AND bibdoi.package = ''
LEFT OUTER JOIN popcon pop ON p.package = pop.package
LEFT OUTER JOIN edam edam ON p.source = edam.source AND p.package = edam.package
LEFT OUTER JOIN registry biotools ON p.source = biotools.source AND biotools.name = 'bio.tools'
LEFT OUTER JOIN registry omictools ON p.source = omictools.source AND omictools.name = 'OMICtools'
LEFT OUTER JOIN registry seqwiki ON p.source = seqwiki.source AND seqwiki.name = 'SEQwiki'
LEFT OUTER JOIN registry scicrunch ON p.source = scicrunch.source AND scicrunch.name = 'SciCrunch'
LEFT OUTER JOIN registry rrid ON p.source = rrid.source AND rrid.name = 'RRID'
ORDER BY source, package
-- If you want to make the output at source level uncomment this
-- ) tmp
-- GROUP BY source, distribution, release, component, version, homepage
-- ORDER BY source
$JSONEND
;
EOT