-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheckDST.pl
117 lines (106 loc) · 2.64 KB
/
checkDST.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/env perl
# This script will examine the stations listed in a dst file to pull out
# genuine duplicates
for $dstFile (glob'*.dst') {
push @dstFiles, $dstFile;
}
$numFiles = $#dstFiles + 1;
if ($#dstFiles == 0) {
$dFile = $dstFiles[0];
} elsif ($#dstFiles < 0) {
die "There is no dst file to check.\n";
} else {
print "There are multiple dst files:\n";
for (@dstFiles) {
$i++;
print "\t$i.\t$_\n";
}
print "Type the number of the file you want to check: ";
chomp($fileNum = <STDIN>);
if ($fileNum < 1 || $fileNum > $numFiles) {
print 'Invalid response. Please select a number between 1 and ';
print "$numFiles\n";
die "\n";
}
$dFile = $dstFiles[$fileNum - 1];
}
for (glob'~/apref*.disconts') {$file = $_};
open APREF, $file;
while (<APREF>) {
unless (/^\#/) {
@_ = split ' ';
$aprefStation{$_[0]}++;
}
}
for $file (glob'~/renaming/*.renaming') {
open IN, $file;
while (<IN>) {
unless (/^!\#/) {
$station = substr $_, 0, 20;
$station =~ s/^\s*//;
$station =~ s/\s*$//;
$renameStation{$station}++;
}
}
}
for $file (glob'~/renaming/*.ignore') {
open IN, $file;
while (<IN>) {
chomp;
s/^\s*//;
s/\s*$//;
$ignoreStation{$_}++;
}
}
for $stn (glob'stn/inApriori/*.xml') {
open IN, $stn;
@_ = split '_', $stn;
($juris = $_[0]) =~ s/^stn\/inApriori\///;
while (<IN>) {
if (/<DnaStation>/) {
chomp($station = <IN>);
$station =~ s/^\s*<Name>//;
$station =~ s/<\/Name>\s*$//;
push @{$statJuris{$station}}, $juris;
}
}
}
for $stn (glob'stn/*.xml') {
open IN, $stn;
@_ = split '_', $stn;
($juris = $_[0]) =~ s/^stn\///;
while (<IN>) {
if (/<DnaStation>/) {
chomp($station = <IN>);
$station =~ s/^\s*<Name>//;
$station =~ s/<\/Name>\s*$//;
push @{$statJuris{$station}}, $juris;
}
}
}
open IN, $dFile;
while (<IN>) {last if (/^Default reference frame/)}
for (0..2) {<IN>}
while (<IN>) {
unless (/^\n/) {
chomp;
$_ = substr $_, 4;
unless ($aprefStation{$_} or $renameStation{$_} or $ignoreStation{$_}) {
push @dups, $_;
}
}
}
if (@dups) {
($out = $dFile) =~ s/dst/dup/;
open OUT, ">$out";
print "Duplicates were found and written to $out\n";
for (@dups) {
print OUT "$_";
foreach (@{$statJuris{$_}}) {
print OUT "\t$_";
}
print OUT "\n";
}
} else {
print "Too sweet! No duplicates\n";
}