Implement raise_error option for decoded_content.

gisle · gisle · commit da90ad574e0d · 2004-11-30T12:00:21.000Z
diff --git a/lib/HTTP/Message.pm b/lib/HTTP/Message.pm
@@ -1,10 +1,10 @@
 package HTTP::Message;
 
-# $Id: Message.pm,v 1.51 2004/11/30 11:37:26 gisle Exp $
+# $Id: Message.pm,v 1.52 2004/11/30 12:00:22 gisle Exp $
 
 use strict;
 use vars qw($VERSION $AUTOLOAD);
-$VERSION = sprintf("%d.%02d", q$Revision: 1.51 $ =~ /(\d+)\.(\d+)/);
+$VERSION = sprintf("%d.%02d", q$Revision: 1.52 $ =~ /(\d+)\.(\d+)/);
 
 require HTTP::Headers;
 require Carp;
@@ -159,64 +159,71 @@ sub content_ref
 sub decoded_content
 {
     my($self, %opt) = @_;
+    my $content_ref;
 
-    require HTTP::Headers::Util;
-    my($ct, %ct_param);
-    if (my @ct = HTTP::Headers::Util::split_header_words($self->header("Content-Type"))) {
-	($ct, undef, %ct_param) = @{$ct[-1]};
-	$ct = lc($ct);
+    eval {
 
-	Carp::croak("Can't decode multipart content")
-	    if $ct =~ m,^multipart/,;
-    }
+	require HTTP::Headers::Util;
+	my($ct, %ct_param);
+	if (my @ct = HTTP::Headers::Util::split_header_words($self->header("Content-Type"))) {
+	    ($ct, undef, %ct_param) = @{$ct[-1]};
+	    $ct = lc($ct);
 
-    my $content_ref = $self->content_ref;
-    Carp::croak("Can't decode ref content") if ref($content_ref) ne "SCALAR";
-
-    if (my $h = $self->header("Content-Encoding")) {
-	$h =~ s/^\s+//;
-	$h =~ s/\s+$//;
-	for my $ce (reverse split(/\s*,\s*/, lc($h))) {
-	    next unless $ce || $ce eq "identity";
-	    if ($ce eq "gzip" || $ce eq "x-gzip") {
-		require Compress::Zlib;
-		$content_ref = \Compress::Zlib::memGunzip($$content_ref);
-		Carp::croak("Can't gunzip content") unless defined $$content_ref;
-	    }
-	    elsif ($ce eq "x-bzip2") {
-		require Compress::Bzip2;
-		$content_ref = Compress::Bzip2::decompress($$content_ref);
-		Carp::croak("Can't bunzip content") unless defined $$content_ref;
-	    }
-	    elsif ($ce eq "deflate") {
-		require Compress::Zlib;
-		$content_ref = \Compress::Zlib::uncompress($$content_ref);
-		Carp::croak("Can't inflate content") unless defined $$content_ref;
-	    }
-	    elsif ($ce eq "compress" || $ce eq "x-compress") {
-		Carp::croak("Can't uncompress content");
-	    }
-	    elsif ($ce eq "base64") {  # not really C-T-E, but should be harmless
-		require MIME::Base64;
-		$content_ref = \MIME::Base64::decode($$content_ref);
-	    }
-	    elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless
-		require MIME::QuotedPrint;
-		$content_ref = \MIME::QuotedPrint::decode($$content_ref);
-	    }
-	    else {
-		Carp::croak("Don't know how to decode Content-Encoding '$ce'");
+	    die "Can't decode multipart content" if $ct =~ m,^multipart/,;
+	}
+
+	$content_ref = $self->content_ref;
+	die "Can't decode ref content" if ref($content_ref) ne "SCALAR";
+
+	if (my $h = $self->header("Content-Encoding")) {
+	    $h =~ s/^\s+//;
+	    $h =~ s/\s+$//;
+	    for my $ce (reverse split(/\s*,\s*/, lc($h))) {
+		next unless $ce || $ce eq "identity";
+		if ($ce eq "gzip" || $ce eq "x-gzip") {
+		    require Compress::Zlib;
+		    $content_ref = \Compress::Zlib::memGunzip($$content_ref);
+		    die "Can't gunzip content" unless defined $$content_ref;
+		}
+		elsif ($ce eq "x-bzip2") {
+		    require Compress::Bzip2;
+		    $content_ref = Compress::Bzip2::decompress($$content_ref);
+		    die "Can't bunzip content" unless defined $$content_ref;
+		}
+		elsif ($ce eq "deflate") {
+		    require Compress::Zlib;
+		    $content_ref = \Compress::Zlib::uncompress($$content_ref);
+		    die "Can't inflate content" unless defined $$content_ref;
+		}
+		elsif ($ce eq "compress" || $ce eq "x-compress") {
+		    die "Can't uncompress content";
+		}
+		elsif ($ce eq "base64") {  # not really C-T-E, but should be harmless
+		    require MIME::Base64;
+		    $content_ref = \MIME::Base64::decode($$content_ref);
+		}
+		elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless
+		    require MIME::QuotedPrint;
+		    $content_ref = \MIME::QuotedPrint::decode($$content_ref);
+		}
+		else {
+		    die "Don't know how to decode Content-Encoding '$ce'";
+		}
 	    }
 	}
-    }
 
-    if ($ct && $ct =~ m,^text/,,) {
-	my $charset = $opt{charset} || $ct_param{charset} || $opt{default_charset} || "ISO-8859-1";
-	$charset = lc($charset);
-	if ($charset ne "none") {
-	    require Encode;
-	    $content_ref = \Encode::decode($charset, $$content_ref, Encode::FB_CROAK());
+	if ($ct && $ct =~ m,^text/,,) {
+	    my $charset = $opt{charset} || $ct_param{charset} || $opt{default_charset} || "ISO-8859-1";
+	    $charset = lc($charset);
+	    if ($charset ne "none") {
+		require Encode;
+		$content_ref = \Encode::decode($charset, $$content_ref, Encode::FB_CROAK());
+	    }
 	}
+    };
+    if ($@) {
+	Carp::croak($@) if $opt{raise_error};
+	return undef;
     }
 
     return $opt{ref} ? $content_ref : $$content_ref;
@@ -537,6 +544,13 @@ C<none> can used to suppress decoding of the charset.
 
 This override the default charset of "ISO-8859-1".
 
+=item C<raise_error>
+
+If TRUE then raise an exception if not able to decode content.  Reason
+might be that the specified C<Content-Encoding> or C<charset> is not
+supported.  If this option is FALSE, then decode_content() will return
+C<undef> on errors, but will still set $@.
+
 =item C<ref>
 
 If TRUE then a reference to decoded content is returned.  This might
diff --git a/lib/HTTP/Response.pm b/lib/HTTP/Response.pm
@@ -1,10 +1,10 @@
 package HTTP::Response;
 
-# $Id: Response.pm,v 1.49 2004/04/09 20:30:41 gisle Exp $
+# $Id: Response.pm,v 1.50 2004/11/30 12:00:22 gisle Exp $
 
 require HTTP::Message;
 @ISA = qw(HTTP::Message);
-$VERSION = sprintf("%d.%02d", q$Revision: 1.49 $ =~ /(\d+)\.(\d+)/);
+$VERSION = sprintf("%d.%02d", q$Revision: 1.50 $ =~ /(\d+)\.(\d+)/);
 
 use strict;
 use HTTP::Status ();
@@ -302,10 +302,15 @@ headers.
 
 =item $r->content( $content )
 
-This is used to get/set the content and it is inherited from the
+This is used to get/set the raw content and it is inherited from the
 C<HTTP::Message> base class.  See L<HTTP::Message> for details and
 other methods that can be used to access the content.
 
+=item $r->decoded_content( %options )
+
+This will return the content after any C<Content-Encoding> and
+charsets has been decoded.  See L<HTTP::Message> for details.
+
 =item $r->request
 
 =item $r->request( $request )
diff --git a/lwpcook.pod b/lwpcook.pod
@@ -55,7 +55,7 @@ handle the response returned.
 
   # check the outcome
   if ($res->is_success) {
-     print $res->content;
+     print $res->decoded_content;
   }
   else {
      print "Error: " . $res->status_line . "\n";
@@ -157,7 +157,7 @@ required header, with something like this:
  $req = HTTP::Request->new('GET',"http://www.perl.com");
 
  $res = $ua->request($req);
- print $res->content if $res->is_success;
+ print $res->decoded_content if $res->is_success;
 
 Replace C<proxy.myorg.com>, C<username> and
 C<password> with something suitable for your site.
diff --git a/lwptut.pod b/lwptut.pod
@@ -111,7 +111,7 @@ illustrated:
 
   # Otherwise, process the content somehow:
   
-  if($response->content =~ m/jazz/i) {
+  if($response->decoded_content =~ m/jazz/i) {
     print "They're talking about jazz today on Fresh Air!\n";
   }
   else {
@@ -147,9 +147,9 @@ C<< $response->content_type >>
 
 =item *
 
-The actual content of the response, in C<< $response->content >>.
+The actual content of the response, in C<< $response->decoded_content >>.
 If the response is HTML, that's where the HTML source will be; if
-it's a GIF, then C<< $response->content >> will be the binary
+it's a GIF, then C<< $response->decoded_content >> will be the binary
 GIF data.
 
 =item *
@@ -311,7 +311,7 @@ the HTML the report of the number of matches:
   die "Weird content type at $url -- ", $response->content_type
    unless $response->content_type eq 'text/html';
 
-  if( $response->content =~ m{AltaVista found ([0-9,]+) results} ) {
+  if( $response->decoded_content =~ m{AltaVista found ([0-9,]+) results} ) {
     # The substring will be like "AltaVista found 2,345 results"
     print "$word: $1\n";
   }
@@ -384,7 +384,7 @@ list of new modules in CPAN:
   die "Can't get $url -- ", $response->status_line
    unless $response->is_success;
   
-  my $html = $response->content;
+  my $html = $response->decoded_content;
   while( $html =~ m/<A HREF=\"(.*?)\"/g ) {
     print "$1\n";
   }
diff --git a/t/base/message.t b/t/base/message.t
@@ -3,7 +3,7 @@
 use strict;
 use Test qw(plan ok skip);
 
-plan tests => 84;
+plan tests => 88;
 
 require HTTP::Message;
 
@@ -321,3 +321,15 @@ $@ = "";
 skip($] < 5.008 ? "No Encode module" : "",
      sub { eval { $m->decoded_content } }, "\x{FEFF}Hi there \x{263A}\n");
 ok($@ || "", "");
+
+$m->header("Content-Encoding", "foobar");
+ok($m->decoded_content, undef);
+ok($@ =~ /^Don't know how to decode Content-Encoding 'foobar'/);
+
+my $err = 0;
+eval {
+    $m->decoded_content(raise_error => 1);
+    $err++;
+};
+ok($@ =~ /Don't know how to decode Content-Encoding 'foobar'/);
+ok($err, 0);