search.pl

#!/usr/bin/env perl

#<--------------------------------- MAN PAGE --------------------------------->|

=pod

=head1 NAME

search - Search a book and sort result by popularity


=head1 SYNOPSIS

B<search.pl> 
[B<-z> F<columns>] 
[B<-r> F<number>] 
[B<-c> F<numdays>] 
[B<-o> F<filename>] 
[B<-i>]
F<keyword>...

Use quotes if you want exact matches (see examples section)

=head1 OPTIONS

Mandatory arguments to long options are mandatory for short options too.

=over 4

=item B<-z, --order>=F<columns>

sort order, all descending, comma-separated column names,
default is "stars,num_ratings,year"
(you're free to change the order but not the names)


=item B<-r, --ratings>=F<number>

only include books with N or more ratings:
a 4-stars book rated by 30 readers might be "better" than a 5-stars book rated
by 1 reader (perhaps the author). This also declutters our F<outfile>.
Use low values to cut away the nonsense, use high values only if you know
the available range otherwise you might get zero results.
Default is 5 or 0 if exact match. 


=item B<-c, --cache>=F<numdays>

number of days to store and reuse downloaded data in F</tmp/FileCache/>,
default is 7 days. This helps on experimenting with parameters. 
Loading data from Goodreads is a time consuming process.


=item B<-o, --outfile>=F<filename>

name of the HTML file where we write results to, 
default see section FILES


=item B<-i, --ignore-errors>

Don't retry on errors, just keep going. 
Sometimes useful if a single Goodreads resource hangs over long periods 
and you're okay with some values missing in your result.
This option is not recommended when you run the program unattended.


=item B<-?, --help>

show full man page

=back


=head1 FILES

F<./list-out/search-$KEYWORD.html>

F</tmp/FileCache/>


=head1 EXAMPLES

$ ./search.pl linux

$ ./search.pl --ratings=10 --order=stars,num_ratings linux kernel

$ ./search.pl --order=year,num_ratings linux kernel

$ ./search.pl -r 10 -z year "linux kernel"


=head1 REPORTING BUGS

Report bugs to <datakadabra@gmail.com> or use Github's issue tracker
L<https://github.com/andre-st/goodreads-toolbox/issues>


=head1 COPYRIGHT

This is free software. You may redistribute copies of it under the terms of
the GNU General Public License L<https://www.gnu.org/licenses/gpl.html>.
There is NO WARRANTY, to the extent permitted by law.


=head1 SEE ALSO

More info in ./help/search.md


=head1 VERSION

2022-03-10 (Since 2018-07-29)

=cut

#<--------------------------------- 79 chars --------------------------------->|


use strict;
use warnings;
use locale;
use 5.18.0;

# Perl core:
use FindBin;
use local::lib "$FindBin::Bin/lib/local/";
use        lib "$FindBin::Bin/lib/";
use Time::HiRes qw( time tv_interval );
use POSIX       qw( strftime locale_h );
use File::Spec; # Platform indep. directory separator
use IO::File;
use Getopt::Long;
use Pod::Usage;
# Third party:
use List::MoreUtils qw( uniq );
# Ours:
use Goodscrapes;


# ----------------------------------------------------------------------------
# Program configuration:
# 
setlocale( LC_CTYPE, "en_US" );  # GR dates all en_US
STDOUT->autoflush( 1 );
gsetopt( cache_days => 7 );
 
our $TSTART = time();
our @ORDER;
our $NUMRATINGS;
our $PHRASE;
our $OUTPATH;
our $ISEXACT;
my  $ordercsv = '';

GetOptions( 'ratings|r=i'     => \$NUMRATINGS,
            'order|z=s'       => \$ordercsv,
            'outfile|o=s'     => \$OUTPATH,
            'ignore-errors|i' => sub{  gsetopt( ignore_errors => 1 );   },
            'cache|c=i'       => sub{  gsetopt( cache_days => $_[1] );  },
            'help|?'          => sub{  pod2usage( -verbose => 2 );      })
	or pod2usage( 1 );

$PHRASE     = join( ' ', @ARGV ) or pod2usage( 1 );
$OUTPATH    = File::Spec->catfile( $FindBin::Bin, 'list-out', "search-${PHRASE}.html" ) if !$OUTPATH;
$ISEXACT    = index( $ARGV[0], ' ' ) > -1;  # Quoted "aaa bbb" as single argument, otherwise 2 args
$NUMRATINGS = $ISEXACT ? 0 : 5 if !defined $NUMRATINGS;
$ordercsv   =~ s/\s+//g;  # Mistakenly added spaces
@ORDER      = uniq(( split( ',', lc $ordercsv ), qw( stars num_ratings year )));  # Adds missing


# ----------------------------------------------------------------------------
# Primary data structures:
# 
my @books;


# ----------------------------------------------------------------------------
# Load basic data:
#
printf( "Searching books:\n\n about..... %s\n rated by.. %d members or more\n order by.. %s\n progress.. ",
		$ISEXACT ? "$PHRASE (exact)" : $PHRASE, $NUMRATINGS, join( ', ', @ORDER ) );

gsearch( phrase      => $PHRASE,
         ra_into     => \@books,
         is_exact    => $ISEXACT,
         ra_order_by => \@ORDER,
         num_ratings => $NUMRATINGS,
         on_progress => gmeter() );


# ----------------------------------------------------------------------------
# Write results to HTML file
# 
printf( "\n\nWriting search result (N=%d) to \"%s\"... ", scalar @books, $OUTPATH );

my $fh  = IO::File->new( $OUTPATH, 'w' ) or die "[FATAL] Cannot write to $OUTPATH ($!)";
my $now = strftime( '%a %b %e %H:%M:%S %Y', localtime );

print $fh ghtmlhead( "Query: \"$PHRASE\", $now", [ '!Cover', 'Title', 'Author', ">$ORDER[0]:", "$ORDER[1]:", "$ORDER[2]:" ]);

my $line;
for my $b (@books)
{
	$line++;
	print $fh qq{
			<tr>
			<td><img src="${\ghtmlsafe( $b->{img_url} )}" height="80" /></td>
			<td><a  href="${\ghtmlsafe( $b->{url    } )}" target="_blank">
			              ${\ghtmlsafe( $b->{title  } )}</a></td>
			<td>
				<a href="${\ghtmlsafe( $b->{rh_author}->{url } )}" target="_blank">
				         ${\ghtmlsafe( $b->{rh_author}->{name} )}</a>
			</td>
			<td>${\ghtmlsafe( $b->{$ORDER[0]} )}</td>
			<td>${\ghtmlsafe( $b->{$ORDER[1]} )}</td>
			<td>${\ghtmlsafe( $b->{$ORDER[2]} )}</td>
			</tr>
			};
}

print $fh ghtmlfoot();
undef $fh;


# ----------------------------------------------------------------------------
# Done:
#
printf( "\nTotal time: %.0f minutes\n", (time()-$TSTART)/60 );