Skip to content

Commit da90ad5

Browse files
committed
Implement raise_error option for decoded_content.
1 parent 4d6d500 commit da90ad5

File tree

5 files changed

+95
-64
lines changed

5 files changed

+95
-64
lines changed

lib/HTTP/Message.pm

Lines changed: 67 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
package HTTP::Message;
22

3-
# $Id: Message.pm,v 1.51 2004/11/30 11:37:26 gisle Exp $
3+
# $Id: Message.pm,v 1.52 2004/11/30 12:00:22 gisle Exp $
44

55
use strict;
66
use vars qw($VERSION $AUTOLOAD);
7-
$VERSION = sprintf("%d.%02d", q$Revision: 1.51 $ =~ /(\d+)\.(\d+)/);
7+
$VERSION = sprintf("%d.%02d", q$Revision: 1.52 $ =~ /(\d+)\.(\d+)/);
88

99
require HTTP::Headers;
1010
require Carp;
@@ -159,64 +159,71 @@ sub content_ref
159159
sub decoded_content
160160
{
161161
my($self, %opt) = @_;
162+
my $content_ref;
162163

163-
require HTTP::Headers::Util;
164-
my($ct, %ct_param);
165-
if (my @ct = HTTP::Headers::Util::split_header_words($self->header("Content-Type"))) {
166-
($ct, undef, %ct_param) = @{$ct[-1]};
167-
$ct = lc($ct);
164+
eval {
168165

169-
Carp::croak("Can't decode multipart content")
170-
if $ct =~ m,^multipart/,;
171-
}
166+
require HTTP::Headers::Util;
167+
my($ct, %ct_param);
168+
if (my @ct = HTTP::Headers::Util::split_header_words($self->header("Content-Type"))) {
169+
($ct, undef, %ct_param) = @{$ct[-1]};
170+
$ct = lc($ct);
172171

173-
my $content_ref = $self->content_ref;
174-
Carp::croak("Can't decode ref content") if ref($content_ref) ne "SCALAR";
175-
176-
if (my $h = $self->header("Content-Encoding")) {
177-
$h =~ s/^\s+//;
178-
$h =~ s/\s+$//;
179-
for my $ce (reverse split(/\s*,\s*/, lc($h))) {
180-
next unless $ce || $ce eq "identity";
181-
if ($ce eq "gzip" || $ce eq "x-gzip") {
182-
require Compress::Zlib;
183-
$content_ref = \Compress::Zlib::memGunzip($$content_ref);
184-
Carp::croak("Can't gunzip content") unless defined $$content_ref;
185-
}
186-
elsif ($ce eq "x-bzip2") {
187-
require Compress::Bzip2;
188-
$content_ref = Compress::Bzip2::decompress($$content_ref);
189-
Carp::croak("Can't bunzip content") unless defined $$content_ref;
190-
}
191-
elsif ($ce eq "deflate") {
192-
require Compress::Zlib;
193-
$content_ref = \Compress::Zlib::uncompress($$content_ref);
194-
Carp::croak("Can't inflate content") unless defined $$content_ref;
195-
}
196-
elsif ($ce eq "compress" || $ce eq "x-compress") {
197-
Carp::croak("Can't uncompress content");
198-
}
199-
elsif ($ce eq "base64") { # not really C-T-E, but should be harmless
200-
require MIME::Base64;
201-
$content_ref = \MIME::Base64::decode($$content_ref);
202-
}
203-
elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless
204-
require MIME::QuotedPrint;
205-
$content_ref = \MIME::QuotedPrint::decode($$content_ref);
206-
}
207-
else {
208-
Carp::croak("Don't know how to decode Content-Encoding '$ce'");
172+
die "Can't decode multipart content" if $ct =~ m,^multipart/,;
173+
}
174+
175+
$content_ref = $self->content_ref;
176+
die "Can't decode ref content" if ref($content_ref) ne "SCALAR";
177+
178+
if (my $h = $self->header("Content-Encoding")) {
179+
$h =~ s/^\s+//;
180+
$h =~ s/\s+$//;
181+
for my $ce (reverse split(/\s*,\s*/, lc($h))) {
182+
next unless $ce || $ce eq "identity";
183+
if ($ce eq "gzip" || $ce eq "x-gzip") {
184+
require Compress::Zlib;
185+
$content_ref = \Compress::Zlib::memGunzip($$content_ref);
186+
die "Can't gunzip content" unless defined $$content_ref;
187+
}
188+
elsif ($ce eq "x-bzip2") {
189+
require Compress::Bzip2;
190+
$content_ref = Compress::Bzip2::decompress($$content_ref);
191+
die "Can't bunzip content" unless defined $$content_ref;
192+
}
193+
elsif ($ce eq "deflate") {
194+
require Compress::Zlib;
195+
$content_ref = \Compress::Zlib::uncompress($$content_ref);
196+
die "Can't inflate content" unless defined $$content_ref;
197+
}
198+
elsif ($ce eq "compress" || $ce eq "x-compress") {
199+
die "Can't uncompress content";
200+
}
201+
elsif ($ce eq "base64") { # not really C-T-E, but should be harmless
202+
require MIME::Base64;
203+
$content_ref = \MIME::Base64::decode($$content_ref);
204+
}
205+
elsif ($ce eq "quoted-printable") { # not really C-T-E, but should be harmless
206+
require MIME::QuotedPrint;
207+
$content_ref = \MIME::QuotedPrint::decode($$content_ref);
208+
}
209+
else {
210+
die "Don't know how to decode Content-Encoding '$ce'";
211+
}
209212
}
210213
}
211-
}
212214

213-
if ($ct && $ct =~ m,^text/,,) {
214-
my $charset = $opt{charset} || $ct_param{charset} || $opt{default_charset} || "ISO-8859-1";
215-
$charset = lc($charset);
216-
if ($charset ne "none") {
217-
require Encode;
218-
$content_ref = \Encode::decode($charset, $$content_ref, Encode::FB_CROAK());
215+
if ($ct && $ct =~ m,^text/,,) {
216+
my $charset = $opt{charset} || $ct_param{charset} || $opt{default_charset} || "ISO-8859-1";
217+
$charset = lc($charset);
218+
if ($charset ne "none") {
219+
require Encode;
220+
$content_ref = \Encode::decode($charset, $$content_ref, Encode::FB_CROAK());
221+
}
219222
}
223+
};
224+
if ($@) {
225+
Carp::croak($@) if $opt{raise_error};
226+
return undef;
220227
}
221228

222229
return $opt{ref} ? $content_ref : $$content_ref;
@@ -537,6 +544,13 @@ C<none> can used to suppress decoding of the charset.
537544
538545
This override the default charset of "ISO-8859-1".
539546
547+
=item C<raise_error>
548+
549+
If TRUE then raise an exception if not able to decode content. Reason
550+
might be that the specified C<Content-Encoding> or C<charset> is not
551+
supported. If this option is FALSE, then decode_content() will return
552+
C<undef> on errors, but will still set $@.
553+
540554
=item C<ref>
541555
542556
If TRUE then a reference to decoded content is returned. This might

lib/HTTP/Response.pm

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
package HTTP::Response;
22

3-
# $Id: Response.pm,v 1.49 2004/04/09 20:30:41 gisle Exp $
3+
# $Id: Response.pm,v 1.50 2004/11/30 12:00:22 gisle Exp $
44

55
require HTTP::Message;
66
@ISA = qw(HTTP::Message);
7-
$VERSION = sprintf("%d.%02d", q$Revision: 1.49 $ =~ /(\d+)\.(\d+)/);
7+
$VERSION = sprintf("%d.%02d", q$Revision: 1.50 $ =~ /(\d+)\.(\d+)/);
88

99
use strict;
1010
use HTTP::Status ();
@@ -302,10 +302,15 @@ headers.
302302
303303
=item $r->content( $content )
304304
305-
This is used to get/set the content and it is inherited from the
305+
This is used to get/set the raw content and it is inherited from the
306306
C<HTTP::Message> base class. See L<HTTP::Message> for details and
307307
other methods that can be used to access the content.
308308
309+
=item $r->decoded_content( %options )
310+
311+
This will return the content after any C<Content-Encoding> and
312+
charsets has been decoded. See L<HTTP::Message> for details.
313+
309314
=item $r->request
310315
311316
=item $r->request( $request )

lwpcook.pod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ handle the response returned.
5555

5656
# check the outcome
5757
if ($res->is_success) {
58-
print $res->content;
58+
print $res->decoded_content;
5959
}
6060
else {
6161
print "Error: " . $res->status_line . "\n";
@@ -157,7 +157,7 @@ required header, with something like this:
157157
$req = HTTP::Request->new('GET',"http://www.perl.com");
158158

159159
$res = $ua->request($req);
160-
print $res->content if $res->is_success;
160+
print $res->decoded_content if $res->is_success;
161161

162162
Replace C<proxy.myorg.com>, C<username> and
163163
C<password> with something suitable for your site.

lwptut.pod

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@ illustrated:
111111

112112
# Otherwise, process the content somehow:
113113

114-
if($response->content =~ m/jazz/i) {
114+
if($response->decoded_content =~ m/jazz/i) {
115115
print "They're talking about jazz today on Fresh Air!\n";
116116
}
117117
else {
@@ -147,9 +147,9 @@ C<< $response->content_type >>
147147

148148
=item *
149149

150-
The actual content of the response, in C<< $response->content >>.
150+
The actual content of the response, in C<< $response->decoded_content >>.
151151
If the response is HTML, that's where the HTML source will be; if
152-
it's a GIF, then C<< $response->content >> will be the binary
152+
it's a GIF, then C<< $response->decoded_content >> will be the binary
153153
GIF data.
154154

155155
=item *
@@ -311,7 +311,7 @@ the HTML the report of the number of matches:
311311
die "Weird content type at $url -- ", $response->content_type
312312
unless $response->content_type eq 'text/html';
313313

314-
if( $response->content =~ m{AltaVista found ([0-9,]+) results} ) {
314+
if( $response->decoded_content =~ m{AltaVista found ([0-9,]+) results} ) {
315315
# The substring will be like "AltaVista found 2,345 results"
316316
print "$word: $1\n";
317317
}
@@ -384,7 +384,7 @@ list of new modules in CPAN:
384384
die "Can't get $url -- ", $response->status_line
385385
unless $response->is_success;
386386

387-
my $html = $response->content;
387+
my $html = $response->decoded_content;
388388
while( $html =~ m/<A HREF=\"(.*?)\"/g ) {
389389
print "$1\n";
390390
}

t/base/message.t

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
use strict;
44
use Test qw(plan ok skip);
55

6-
plan tests => 84;
6+
plan tests => 88;
77

88
require HTTP::Message;
99

@@ -321,3 +321,15 @@ $@ = "";
321321
skip($] < 5.008 ? "No Encode module" : "",
322322
sub { eval { $m->decoded_content } }, "\x{FEFF}Hi there \x{263A}\n");
323323
ok($@ || "", "");
324+
325+
$m->header("Content-Encoding", "foobar");
326+
ok($m->decoded_content, undef);
327+
ok($@ =~ /^Don't know how to decode Content-Encoding 'foobar'/);
328+
329+
my $err = 0;
330+
eval {
331+
$m->decoded_content(raise_error => 1);
332+
$err++;
333+
};
334+
ok($@ =~ /Don't know how to decode Content-Encoding 'foobar'/);
335+
ok($err, 0);

0 commit comments

Comments
 (0)