Skip to content

Commit dd43b74

Browse files
authored
Program to uniquify lists & install script
killdupes uniquifys lists ignoring trailing whitespace and cr/lf douchebagger, a major cause of frustration among wordlist fiends.
1 parent f41fb03 commit dd43b74

File tree

2 files changed

+155
-0
lines changed

2 files changed

+155
-0
lines changed

install.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
sudo chmod 777 killdupes.pl
2+
sudo cp killdupes.pl /usr/bin/killdupes
3+

killdupes.pl

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
#!/usr/bin/perl
2+
3+
# little program to kill all the duplicates in a file and echo the result to standard output
4+
5+
my @file_array;
6+
my $line_no = 0;
7+
my $searched_line_no = 0;
8+
my $filename = @ARGV[0];
9+
my @searchresult;
10+
my $lastchar;
11+
my $curr_line;
12+
my @unique_array;
13+
my $curr_value;
14+
my $counter = 0;
15+
my %test_hash;
16+
17+
#my $option = @ARGV[1];
18+
19+
@input = @ARGV;
20+
my $debug_option;
21+
my $tws_option;
22+
my $dupefile_option;
23+
my $loud_option;
24+
25+
print STDERR "\n\n \t KILLDUPES by Batch McNulty (With thanks to Gabor Szabo) \n";
26+
print STDERR "\n Finally you can properly kill duplicate lines in a text file";
27+
print STDERR " without \n any nonsense about trailing whitespace or the wrong type of CR / LF.";
28+
print STDERR "\n";
29+
if (!@ARGV[0]) {
30+
print STDERR "\n USAGE: killdupes filename.ext";
31+
print STDERR "\n\tkilldupes filename.ext > output.txt";
32+
print STDERR "\n";
33+
print STDERR "\n Eliminates all duplicate lines in filename.ext and sends the results to ";
34+
print STDERR "\n standard output, where you can redirect them to a file or do whatever you ";
35+
print STDERR "\n like. It is more aggressive than sort -u or uniq because trailing whitespace ";
36+
print STDERR "\n and mixed Windows/Linux style CRLFs are ignored.";
37+
print STDERR "\n";
38+
39+
# These options are still in the program, but I didn't think they'd be any use to you.
40+
# Feel free to uncomment 'em though.
41+
42+
# print STDERR "\n killdupes filename.ext -loud ";
43+
# print STDERR "\n Also prints found duplicates to standard error (usually the screen).";
44+
# print STDERR "\n";
45+
# print STDERR "\n killdupes filename.txt -dupefile ";
46+
# print STDERR "\n Also prints found duplicates to dupefile.txt.";
47+
# print STDERR "\n";
48+
# print STDERR "\n killdupes filename.txt -debug";
49+
# print STDERR "\n Also prints debugging information to standard error (Implies -loud).";
50+
# print STDERR "\n";
51+
# print STDERR "\n killdupes filename.txt -ignoretws";
52+
# print STDERR "\n Ignores trailing whitespace - like sort -u.";
53+
54+
print STDERR "\n This program is free, but if you want to give me money, my Bitcoin address is: ";
55+
print STDERR "\n 1NYnGXRS4ZzNzmHu5Hsrqx169D7k7qBcYy " ;
56+
die "\n\nThis program requires you to enter a filename as a rider\n\n";
57+
}
58+
print STDERR "\n Opening $filename for killdupe... \n";
59+
60+
@input_matches = grep { /-ignoretws/ } @input;
61+
$tws_option = $input_matches[0];
62+
63+
@input_matches = grep { /-debug/ } @input;
64+
$debug_option = $input_matches[0];
65+
66+
67+
@input_matches = grep { /-dupefile/ } @input;
68+
$dupefile_option = $input_matches[0];
69+
70+
@input_matches = grep { /-loud/ } @input;
71+
$loud_option = $input_matches[0];
72+
73+
74+
75+
if ($tws_option eq "-ignoretws") {
76+
print STDERR "Ignoring trailing whitespace (seeking duplicates less agressively)";
77+
}
78+
79+
open (FH, $filename) or die "\n\n Looks like you pointed me to a file that doesn't exist or is corrupt.\n\n";
80+
while (<FH>) {
81+
$curr_line = $_;
82+
chomp $curr_line;
83+
chomp $curr_line;
84+
unless ($tws_option eq "-ignoretws") {
85+
$curr_line =~ s/\s+$//; # With thanks to Perlmaven.com's Gabor Szabo (https://perlmaven.com/trim)
86+
}
87+
@file_array[$line_no] = $curr_line;
88+
$line_no ++;
89+
}
90+
$last_array_entry = $line_no;
91+
$line_no = 0;
92+
############ debugging ############
93+
if ($debug_option eq "-debug") {
94+
print STDERR "OK, so here's the file array:";
95+
print STDERR "\n_____________________________________\n";
96+
print STDERR @file_array;
97+
print STDERR "\n";
98+
print STDERR "Trailing whitespaces and cr/lfs have been removed.";
99+
print STDERR "Now it's time to eliminate those duplicates";
100+
101+
print STDERR "lenght of file array:";
102+
103+
print STDERR $#file_array;
104+
print STDERR "test hash (shld be empty):";
105+
print STDERR join ",", keys %test_hash;
106+
print STDERR ".";
107+
print STDERR "\n\n About to process file array...\n";
108+
}
109+
110+
111+
################## /debugging ##############
112+
113+
# Removed trailing whitespace and cr /lf nonsense
114+
# Now to remove duplicate lines!
115+
116+
117+
######### Funny story - thanks to a programming error, I thought this code was faulty, #######
118+
#### but it was my mistake. Fixed now ########
119+
120+
foreach my $curr_value (@file_array) {
121+
if ($debug_option eq "-debug") {print STDERR "\n curr value:$curr_value.";} # debugging
122+
if (! $test_hash{$curr_value}) {
123+
push @unique_array, $curr_value;
124+
$test_hash{$curr_value} = 1;
125+
}
126+
else {
127+
if ($dupefile_option !~ "dupefile") {
128+
print STDERR "\n DUPE FOUND! $curr_value.";# my mistake
129+
}
130+
elsif ($dupefile_option eq "-dupefile") {
131+
print STDERR "Storing dupes in dupefile...";
132+
open (FH, ">>dupefile.txt") or die "Shit! Couldn't open dupefile!";
133+
printf (FH "$curr_value\n");
134+
close (FH);
135+
}
136+
}
137+
}
138+
### debugging
139+
if ($debug_option eq "-debug") {
140+
print STDERR "\n\n Test hash:";
141+
print STDERR join ",", keys %test_hash;
142+
print STDERR "\n";
143+
print STDERR "\n Unique array is now ready!\n";
144+
}
145+
### /debugging
146+
# Credit for the above code is also due to Gabor Szabo
147+
# https://perlmaven.com/unique-values-in-an-array-in-perl
148+
149+
print join "\n",@unique_array;
150+
print STDERR "\n";
151+
print STDERR "\n\n All done! Please Bitcoin me at: 1NYnGXRS4ZzNzmHu5Hsrqx169D7k7qBcYy \n\n";
152+

0 commit comments

Comments
 (0)