@@ -246,6 +246,64 @@ class istreambuf
246246 return static_cast <long int >(zstrm_p->total_out - static_cast <uLong>(in_avail ()));
247247 }
248248
249+ struct ZlibHeader {
250+ // Based on RFC 1950 (https://datatracker.ietf.org/doc/html/rfc1950#section-2.2)
251+ // See also:
252+ // http://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like
253+
254+ // 0 to 7, log2 of the windowSize in bytes
255+ uint8_t cminfo;
256+ // always 8, the compression method
257+ uint8_t cm;
258+ // 0 to 3, the compression level, higher is more compressed
259+ uint8_t flevel;
260+ // usually 0, true if a preset dictionary is provided after the header
261+ bool fdict;
262+ // 0 to 31, checksum: ((cminfo * 16 + cm) * 256 + flevel * 32 + fdict * 16 + fcheck) % 31 = 0
263+ uint8_t fcheck;
264+ private:
265+ uint16_t total;
266+
267+ public:
268+ ZlibHeader (const uint8_t cmf, const uint8_t flg) {
269+ // the top 4 bits
270+ cminfo = cmf >> 4 ;
271+ // the bottom 4 bits
272+ cm = cmf & 0xf ;
273+
274+ // the top 2 bits
275+ flevel = flg >> 6 ;
276+ // the 3rd top bit
277+ fdict = flg & 0x20 ;
278+ // the bottom 5 bits
279+ fcheck = flg & 0x1f ;
280+
281+ // reinterpret as integer in MSB order
282+ total = cmf * 256 + flg;
283+ }
284+
285+ [[nodiscard]] bool isValid () const noexcept {
286+ return cm == 8 && total % 31 == 0 ;
287+ }
288+ };
289+
290+ static bool is_compressed (const char * const buffer, const char * const end) {
291+ // Buffer too short
292+ if (buffer + 2 > end)
293+ return false ;
294+
295+ const auto b0 = static_cast <uint8_t >(buffer[0 ]);
296+ const auto b1 = static_cast <uint8_t >(buffer[1 ]);
297+
298+ // Check for Gzip magic numbers
299+ // http://en.wikipedia.org/wiki/Gzip
300+ if (b0 == 0x1F && b1 == 0x8B )
301+ return true ;
302+ if (ZlibHeader (b0, b1).isValid ())
303+ return true ;
304+ return false ;
305+ }
306+
249307 std::streambuf::int_type underflow () override
250308 {
251309 if (this ->gptr () == this ->egptr ())
@@ -272,16 +330,7 @@ class istreambuf
272330 if (auto_detect && ! auto_detect_run)
273331 {
274332 auto_detect_run = true ;
275- unsigned char b0 = *reinterpret_cast < unsigned char * >(in_buff_start);
276- unsigned char b1 = *reinterpret_cast < unsigned char * >(in_buff_start + 1 );
277- // Ref:
278- // http://en.wikipedia.org/wiki/Gzip
279- // http://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like
280- is_text = ! (in_buff_start + 2 <= in_buff_end
281- && ((b0 == 0x1F && b1 == 0x8B ) // gzip header
282- || (b0 == 0x78 && (b1 == 0x01 // zlib header
283- || b1 == 0x9C
284- || b1 == 0xDA ))));
333+ is_text = !is_compressed (in_buff_start, in_buff_end);
285334 }
286335 if (is_text)
287336 {
0 commit comments