Skip to content

Commit dc86d3d

Browse files
authored
Fix zlib header restriction is too restrictive (#74)
* Refactored header detection * Fixed typo * Renamed function * Fixed incorrectly using end instead of start of buffer * Removed unused imports * Fixed typo in bitmask * Parameter spacing
1 parent cb0ccd2 commit dc86d3d

File tree

1 file changed

+59
-10
lines changed

1 file changed

+59
-10
lines changed

src/zstr.hpp

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,64 @@ class istreambuf
246246
return static_cast<long int>(zstrm_p->total_out - static_cast<uLong>(in_avail()));
247247
}
248248

249+
struct ZlibHeader {
250+
// Based on RFC 1950 (https://datatracker.ietf.org/doc/html/rfc1950#section-2.2)
251+
// See also:
252+
// http://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like
253+
254+
// 0 to 7, log2 of the windowSize in bytes
255+
uint8_t cminfo;
256+
// always 8, the compression method
257+
uint8_t cm;
258+
// 0 to 3, the compression level, higher is more compressed
259+
uint8_t flevel;
260+
// usually 0, true if a preset dictionary is provided after the header
261+
bool fdict;
262+
// 0 to 31, checksum: ((cminfo * 16 + cm) * 256 + flevel * 32 + fdict * 16 + fcheck) % 31 = 0
263+
uint8_t fcheck;
264+
private:
265+
uint16_t total;
266+
267+
public:
268+
ZlibHeader(const uint8_t cmf, const uint8_t flg) {
269+
// the top 4 bits
270+
cminfo = cmf >> 4;
271+
// the bottom 4 bits
272+
cm = cmf & 0xf;
273+
274+
// the top 2 bits
275+
flevel = flg >> 6;
276+
// the 3rd top bit
277+
fdict = flg & 0x20;
278+
// the bottom 5 bits
279+
fcheck = flg & 0x1f;
280+
281+
// reinterpret as integer in MSB order
282+
total = cmf * 256 + flg;
283+
}
284+
285+
[[nodiscard]] bool isValid() const noexcept {
286+
return cm == 8 && total % 31 == 0;
287+
}
288+
};
289+
290+
static bool is_compressed(const char* const buffer, const char* const end) {
291+
// Buffer too short
292+
if (buffer + 2 > end)
293+
return false;
294+
295+
const auto b0 = static_cast<uint8_t>(buffer[0]);
296+
const auto b1 = static_cast<uint8_t>(buffer[1]);
297+
298+
// Check for Gzip magic numbers
299+
// http://en.wikipedia.org/wiki/Gzip
300+
if (b0 == 0x1F && b1 == 0x8B)
301+
return true;
302+
if (ZlibHeader(b0, b1).isValid())
303+
return true;
304+
return false;
305+
}
306+
249307
std::streambuf::int_type underflow() override
250308
{
251309
if (this->gptr() == this->egptr())
@@ -272,16 +330,7 @@ class istreambuf
272330
if (auto_detect && ! auto_detect_run)
273331
{
274332
auto_detect_run = true;
275-
unsigned char b0 = *reinterpret_cast< unsigned char * >(in_buff_start);
276-
unsigned char b1 = *reinterpret_cast< unsigned char * >(in_buff_start + 1);
277-
// Ref:
278-
// http://en.wikipedia.org/wiki/Gzip
279-
// http://stackoverflow.com/questions/9050260/what-does-a-zlib-header-look-like
280-
is_text = ! (in_buff_start + 2 <= in_buff_end
281-
&& ((b0 == 0x1F && b1 == 0x8B) // gzip header
282-
|| (b0 == 0x78 && (b1 == 0x01 // zlib header
283-
|| b1 == 0x9C
284-
|| b1 == 0xDA))));
333+
is_text = !is_compressed(in_buff_start, in_buff_end);
285334
}
286335
if (is_text)
287336
{

0 commit comments

Comments
 (0)