From eb21d3ec1b621f6922e82a319c16b9eccfdc1dd1 Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Mon, 2 Sep 2024 14:15:40 +0100 Subject: [PATCH] Improve checking of *out_size in rans4x16 & arith These should be set to a minimum of at least rans_compress_bound_4x16() or arith_compress_bound() but as they're user supplied parameters we ought to consider the possibility of them being smaller and the potential for an encode failure. (Although arguably it's a user error too as we're not using the API as intended.) This happens in many places, but not consistently when very small sizes are given such that we don't have room for the compression header meta-data. --- htscodecs/arith_dynamic.c | 29 +++++++++++++++++--- htscodecs/rANS_static4x16pr.c | 50 ++++++++++++++++++++++++++++++++--- htscodecs/tokenise_name3.c | 1 + 3 files changed, 74 insertions(+), 6 deletions(-) diff --git a/htscodecs/arith_dynamic.c b/htscodecs/arith_dynamic.c index 37aca77..6dd8f00 100644 --- a/htscodecs/arith_dynamic.c +++ b/htscodecs/arith_dynamic.c @@ -733,7 +733,7 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, unsigned int c_meta_len; uint8_t *rle = NULL, *packed = NULL; - if (in_size > INT_MAX) { + if (in_size > INT_MAX || (out && *out_size == 0)) { *out_size = 0; return NULL; } @@ -751,6 +751,8 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, if (order & X_CAT) { out[0] = X_CAT; c_meta_len = 1 + var_put_u32(&out[1], out_end, in_size); + if (c_meta_len + in_size > *out_size) + return NULL; memcpy(out+c_meta_len, in, in_size); *out_size = in_size+c_meta_len; } @@ -759,6 +761,9 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, int N = (order>>8); if (N == 0) N = 4; // default for compatibility with old tests + if (N > in_size) + N = in_size; + if (N > 255) return NULL; @@ -843,6 +848,9 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, // {1, 128}}; for (j = 1; j <= m[MIN(i,3)][0]; j++) { + if (out2 - out > *out_size) + continue; // an error, but caught in best_sz check later + olen2 = *out_size - (out2 - out); //fprintf(stderr, "order=%d m=%d\n", order&3, m[MIN(i,4)][j]); if ((order&3) == 0 && (m[MIN(i,3)][j]&1)) @@ -855,8 +863,11 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, best_j = j; } } -// if (best_j == 0) // none desireable -// return NULL; + + if (best_sz == INT_MAX) { + free(transposed); + return NULL; + } if (best_j != j-1) { olen2 = *out_size - (out2 - out); arith_compress_to(transposed+idx[i], part_len[i], @@ -962,8 +973,20 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size, } if (*out_size >= in_size) { + if (out > out_end) { + free(rle); + free(packed); + return NULL; + } + out[0] &= ~(3|X_EXT); // no entropy encoding, but keep e.g. PACK out[0] |= X_CAT | no_size; + + if (out + c_meta_len + in_size > out_end) { + free(rle); + free(packed); + return NULL; + } memcpy(out+c_meta_len, in, in_size); *out_size = in_size; } diff --git a/htscodecs/rANS_static4x16pr.c b/htscodecs/rANS_static4x16pr.c index 8c9a64a..e7db682 100644 --- a/htscodecs/rANS_static4x16pr.c +++ b/htscodecs/rANS_static4x16pr.c @@ -1164,7 +1164,7 @@ void rans_set_cpu(int opts) { unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, unsigned char *out,unsigned int *out_size, int order) { - if (in_size > INT_MAX) { + if (in_size > INT_MAX || (out && *out_size == 0)) { *out_size = 0; return NULL; } @@ -1199,6 +1199,12 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, int N = (order>>8) & 0xff; if (N == 0) N = 4; // default for compatibility with old tests + if (N > in_size) + N = in_size; + + if (N > 255) + return NULL; + unsigned char *transposed = malloc(in_size); unsigned int part_len[256]; unsigned int idx[256]; @@ -1249,7 +1255,7 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, out2_start = out2 = out+7+5*N; // shares a buffer with c_meta for (i = 0; i < N; i++) { // Brute force try all methods. - int j, m[] = {1,64,128,0}, best_j = 0, best_sz = in_size+10; + int j, m[] = {1,64,128,0}, best_j = 0, best_sz = INT_MAX; for (j = 0; j < sizeof(m)/sizeof(*m); j++) { if ((order & m[j]) != m[j]) continue; @@ -1257,18 +1263,23 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, // order-1 *only*; bit check above cannot elide order-0 if ((order & RANS_ORDER_STRIPE_NO0) && (m[j]&1) == 0) continue; + + if (out2 - out > *out_size) + continue; // an error, but caught in best_sz check later + olen2 = *out_size - (out2 - out); rans_compress_to_4x16(transposed+idx[i], part_len[i], out2, &olen2, m[j] | RANS_ORDER_NOSZ | (order&RANS_ORDER_X32)); - if (best_sz > olen2) { + if (olen2 && best_sz > olen2) { best_sz = olen2; best_j = j; if (j < sizeof(m)/sizeof(*m) && olen2 > out_best_len) { unsigned char *tmp = realloc(out_best, olen2); if (!tmp) { free(out_free); + free(transposed); return NULL; } out_best = tmp; @@ -1279,6 +1290,14 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, memcpy(out_best, out2, olen2); } } + + if (best_sz == INT_MAX) { + free(out_best); + free(out_free); + free(transposed); + return NULL; + } + if (best_j < sizeof(m)/sizeof(*m)) { // Copy the best compression to output buffer if not current memcpy(out2, out_best, best_sz); @@ -1298,9 +1317,15 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, } if (order & RANS_ORDER_CAT) { + if (out > out_end) + return NULL; + out[0] = RANS_ORDER_CAT; c_meta_len = 1; c_meta_len += var_put_u32(&out[1], out_end, in_size); + + if (c_meta_len + in_size > *out_size) + return NULL; if (in_size) memcpy(out+c_meta_len, in, in_size); *out_size = c_meta_len + in_size; @@ -1404,6 +1429,13 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, out[0] &= ~RANS_ORDER_RLE; } + if (c_meta_len > *out_size) { + free(rle); + free(packed); + *out_size = 0; + return NULL; + } + *out_size -= c_meta_len; if (order && in_size < 8) { out[0] &= ~1; @@ -1413,8 +1445,20 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size, rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size); if (*out_size >= in_size) { + if (out > out_end) { + free(rle); + free(packed); + return NULL; + } + out[0] &= ~3; out[0] |= RANS_ORDER_CAT | no_size; + + if (out + c_meta_len + in_size > out_end) { + free(rle); + free(packed); + return NULL; + } if (in_size) memcpy(out+c_meta_len, in, in_size); *out_size = in_size; diff --git a/htscodecs/tokenise_name3.c b/htscodecs/tokenise_name3.c index 7493579..08e6261 100644 --- a/htscodecs/tokenise_name3.c +++ b/htscodecs/tokenise_name3.c @@ -1555,6 +1555,7 @@ uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith, if (compress(ctx->desc[i].buf, ctx->desc[i].buf_l, i&0xf, level, use_arith, out, &out_len) < 0) { free_context(ctx); + free(out); return NULL; }