|
| 1 | +#!/usr/bin/perl |
| 2 | + |
| 3 | +# little program to kill all the duplicates in a file and echo the result to standard output |
| 4 | + |
| 5 | +my @file_array; |
| 6 | +my $line_no = 0; |
| 7 | +my $searched_line_no = 0; |
| 8 | +my $filename = @ARGV[0]; |
| 9 | +my @searchresult; |
| 10 | +my $lastchar; |
| 11 | +my $curr_line; |
| 12 | +my @unique_array; |
| 13 | +my $curr_value; |
| 14 | +my $counter = 0; |
| 15 | +my %test_hash; |
| 16 | + |
| 17 | +#my $option = @ARGV[1]; |
| 18 | + |
| 19 | +@input = @ARGV; |
| 20 | +my $debug_option; |
| 21 | +my $tws_option; |
| 22 | +my $dupefile_option; |
| 23 | +my $loud_option; |
| 24 | + |
| 25 | +print STDERR "\n\n \t KILLDUPES by Batch McNulty (With thanks to Gabor Szabo) \n"; |
| 26 | +print STDERR "\n Finally you can properly kill duplicate lines in a text file"; |
| 27 | +print STDERR " without \n any nonsense about trailing whitespace or the wrong type of CR / LF."; |
| 28 | +print STDERR "\n"; |
| 29 | +if (!@ARGV[0]) { |
| 30 | + print STDERR "\n USAGE: killdupes filename.ext"; |
| 31 | + print STDERR "\n\tkilldupes filename.ext > output.txt"; |
| 32 | + print STDERR "\n"; |
| 33 | + print STDERR "\n Eliminates all duplicate lines in filename.ext and sends the results to "; |
| 34 | + print STDERR "\n standard output, where you can redirect them to a file or do whatever you "; |
| 35 | + print STDERR "\n like. It is more aggressive than sort -u or uniq because trailing whitespace "; |
| 36 | + print STDERR "\n and mixed Windows/Linux style CRLFs are ignored."; |
| 37 | + print STDERR "\n"; |
| 38 | + |
| 39 | + # These options are still in the program, but I didn't think they'd be any use to you. |
| 40 | + # Feel free to uncomment 'em though. |
| 41 | + |
| 42 | +# print STDERR "\n killdupes filename.ext -loud "; |
| 43 | +# print STDERR "\n Also prints found duplicates to standard error (usually the screen)."; |
| 44 | +# print STDERR "\n"; |
| 45 | +# print STDERR "\n killdupes filename.txt -dupefile "; |
| 46 | +# print STDERR "\n Also prints found duplicates to dupefile.txt."; |
| 47 | +# print STDERR "\n"; |
| 48 | +# print STDERR "\n killdupes filename.txt -debug"; |
| 49 | +# print STDERR "\n Also prints debugging information to standard error (Implies -loud)."; |
| 50 | +# print STDERR "\n"; |
| 51 | +# print STDERR "\n killdupes filename.txt -ignoretws"; |
| 52 | +# print STDERR "\n Ignores trailing whitespace - like sort -u."; |
| 53 | + |
| 54 | + print STDERR "\n This program is free, but if you want to give me money, my Bitcoin address is: "; |
| 55 | + print STDERR "\n 1NYnGXRS4ZzNzmHu5Hsrqx169D7k7qBcYy " ; |
| 56 | + die "\n\nThis program requires you to enter a filename as a rider\n\n"; |
| 57 | +} |
| 58 | +print STDERR "\n Opening $filename for killdupe... \n"; |
| 59 | + |
| 60 | +@input_matches = grep { /-ignoretws/ } @input; |
| 61 | +$tws_option = $input_matches[0]; |
| 62 | + |
| 63 | +@input_matches = grep { /-debug/ } @input; |
| 64 | +$debug_option = $input_matches[0]; |
| 65 | + |
| 66 | + |
| 67 | +@input_matches = grep { /-dupefile/ } @input; |
| 68 | +$dupefile_option = $input_matches[0]; |
| 69 | + |
| 70 | +@input_matches = grep { /-loud/ } @input; |
| 71 | +$loud_option = $input_matches[0]; |
| 72 | + |
| 73 | + |
| 74 | + |
| 75 | +if ($tws_option eq "-ignoretws") { |
| 76 | + print STDERR "Ignoring trailing whitespace (seeking duplicates less agressively)"; |
| 77 | +} |
| 78 | + |
| 79 | +open (FH, $filename) or die "\n\n Looks like you pointed me to a file that doesn't exist or is corrupt.\n\n"; |
| 80 | +while (<FH>) { |
| 81 | + $curr_line = $_; |
| 82 | + chomp $curr_line; |
| 83 | + chomp $curr_line; |
| 84 | + unless ($tws_option eq "-ignoretws") { |
| 85 | + $curr_line =~ s/\s+$//; # With thanks to Perlmaven.com's Gabor Szabo (https://perlmaven.com/trim) |
| 86 | + } |
| 87 | + @file_array[$line_no] = $curr_line; |
| 88 | + $line_no ++; |
| 89 | +} |
| 90 | +$last_array_entry = $line_no; |
| 91 | +$line_no = 0; |
| 92 | +############ debugging ############ |
| 93 | +if ($debug_option eq "-debug") { |
| 94 | + print STDERR "OK, so here's the file array:"; |
| 95 | + print STDERR "\n_____________________________________\n"; |
| 96 | + print STDERR @file_array; |
| 97 | + print STDERR "\n"; |
| 98 | + print STDERR "Trailing whitespaces and cr/lfs have been removed."; |
| 99 | + print STDERR "Now it's time to eliminate those duplicates"; |
| 100 | + |
| 101 | + print STDERR "lenght of file array:"; |
| 102 | + |
| 103 | + print STDERR $#file_array; |
| 104 | + print STDERR "test hash (shld be empty):"; |
| 105 | + print STDERR join ",", keys %test_hash; |
| 106 | + print STDERR "."; |
| 107 | + print STDERR "\n\n About to process file array...\n"; |
| 108 | +} |
| 109 | + |
| 110 | + |
| 111 | +################## /debugging ############## |
| 112 | + |
| 113 | +# Removed trailing whitespace and cr /lf nonsense |
| 114 | +# Now to remove duplicate lines! |
| 115 | + |
| 116 | + |
| 117 | +######### Funny story - thanks to a programming error, I thought this code was faulty, ####### |
| 118 | +#### but it was my mistake. Fixed now ######## |
| 119 | + |
| 120 | +foreach my $curr_value (@file_array) { |
| 121 | + if ($debug_option eq "-debug") {print STDERR "\n curr value:$curr_value.";} # debugging |
| 122 | + if (! $test_hash{$curr_value}) { |
| 123 | + push @unique_array, $curr_value; |
| 124 | + $test_hash{$curr_value} = 1; |
| 125 | + } |
| 126 | + else { |
| 127 | + if ($dupefile_option !~ "dupefile") { |
| 128 | + print STDERR "\n DUPE FOUND! $curr_value.";# my mistake |
| 129 | + } |
| 130 | + elsif ($dupefile_option eq "-dupefile") { |
| 131 | + print STDERR "Storing dupes in dupefile..."; |
| 132 | + open (FH, ">>dupefile.txt") or die "Shit! Couldn't open dupefile!"; |
| 133 | + printf (FH "$curr_value\n"); |
| 134 | + close (FH); |
| 135 | + } |
| 136 | + } |
| 137 | +} |
| 138 | +### debugging |
| 139 | +if ($debug_option eq "-debug") { |
| 140 | + print STDERR "\n\n Test hash:"; |
| 141 | + print STDERR join ",", keys %test_hash; |
| 142 | + print STDERR "\n"; |
| 143 | + print STDERR "\n Unique array is now ready!\n"; |
| 144 | +} |
| 145 | +### /debugging |
| 146 | +# Credit for the above code is also due to Gabor Szabo |
| 147 | +# https://perlmaven.com/unique-values-in-an-array-in-perl |
| 148 | + |
| 149 | +print join "\n",@unique_array; |
| 150 | +print STDERR "\n"; |
| 151 | +print STDERR "\n\n All done! Please Bitcoin me at: 1NYnGXRS4ZzNzmHu5Hsrqx169D7k7qBcYy \n\n"; |
| 152 | + |
0 commit comments