-
Notifications
You must be signed in to change notification settings - Fork 1
/
dumpSolrCore.pl
executable file
·86 lines (63 loc) · 2.04 KB
/
dumpSolrCore.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env perl
use strict;
use warnings;
use FindBin qw($Bin);
use Getopt::Long::Descriptive;
use Data::Dumper;
use JSON;
my %uid_names = ( 'genome' => 'genome_id',
'genome_sequence' => 'sequence_id',
'genome_feature' => 'feature_id',
'pathway' => 'id',
'subsystem' => 'id',
'sp_gene' => 'id',
'genome_amr' => 'id',
'feature_sequence' => 'md5'
);
#my $solrServer = $ENV{PATRIC_SOLR_DEV};
my $solrServer = $ENV{PATRIC_SOLR};
my ($opt, $usage) =
describe_options(
"%c %o",
["core=s", "Solr core name"],
[],
["help|h", "Print usage message and exit"] );
print($usage->text), exit 0 if $opt->help;
print($usage->text), exit 1 unless ($opt->core);
my $core = $opt->core;
my $uid_name = $uid_names{$core}? $uid_names{$core} : "id";
my $cursor = "*";
my $next_cursor = "";
my $rows = 100000;
my $counter = 0;
my $format="&wt=json&indent=on";
while ($cursor ne $next_cursor){
$counter++;
$cursor = $next_cursor unless $counter == 1;
print "Processing $core\t$counter\t$cursor\n";
my $outfile = $core."_".$counter.".json";
my $core = "/$core";
my $query = "/select?q=date_inserted:[2019-04-15T00:00:00Z TO 2019-04-30T00:00:00Z]";
#my $query = "/select?q=*:*";
my $fields = "&fl=*";
my $sort = "&sort=$uid_name+asc";
my $rows = "&rows=$rows";
my $cursorMark = "&cursorMark=$cursor";
my $solrQuery = $solrServer.$core.$query.$fields.$sort.$rows.$format.$cursorMark;
print "\t$solrQuery\n";
#`wget -q -O $outfile "$solrQuery"`;
#$next_cursor = `grep nextCursor $outfile`;
#$next_cursor=~s/^\s*\"nextCursorMark\"\s*:\s*\"|\"\s*}\s*$//g;
#$next_cursor=~s/\//%2F/g;
my $result = `wget -q -O - "$solrQuery"`;
my $resultObj = decode_json($result);
$next_cursor = $resultObj->{nextCursorMark};
$next_cursor=~s/\//%2F/g;
foreach my $doc (@{$resultObj->{response}->{docs}}){
delete $doc->{_version_};
}
my $docsJson = to_json(\@{$resultObj->{response}->{docs}}, {pretty => 1});
open FH, ">$outfile";
print FH "$docsJson";
close FH;
}