Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve checking of *out_size in rans4x16 & arith #127

Merged
merged 1 commit into from
Sep 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 66 additions & 18 deletions htscodecs/arith_dynamic.c
Original file line number Diff line number Diff line change
Expand Up @@ -733,15 +733,17 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
unsigned int c_meta_len;
uint8_t *rle = NULL, *packed = NULL;

if (in_size > INT_MAX) {
if (in_size > INT_MAX || (out && *out_size == 0)) {
*out_size = 0;
return NULL;
}

if (!out) {
*out_size = arith_compress_bound(in_size, order);
if (!(out = malloc(*out_size)))
if (!(out = malloc(*out_size))) {
*out_size = 0;
return NULL;
}
}
unsigned char *out_end = out + *out_size;

Expand All @@ -751,24 +753,30 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
if (order & X_CAT) {
out[0] = X_CAT;
c_meta_len = 1 + var_put_u32(&out[1], out_end, in_size);
if (c_meta_len + in_size > *out_size) {
*out_size = 0;
return NULL;
}
memcpy(out+c_meta_len, in, in_size);
*out_size = in_size+c_meta_len;
}

if (order & X_STRIPE) {
int N = (order>>8);
int N = (order>>8) & 0xff;
if (N == 0) N = 4; // default for compatibility with old tests

if (N > 255)
return NULL;
if (N > in_size)
N = in_size;

unsigned char *transposed = malloc(in_size);
unsigned int part_len[256];
unsigned int idx[256];
if (!transposed)
if (!transposed) {
*out_size = 0;
return NULL;
int i, j, x;
}

int i, j, x;
for (i = 0; i < N; i++) {
part_len[i] = in_size / N + ((in_size % N) > i);
idx[i] = i ? idx[i-1] + part_len[i-1] : 0; // cumulative index
Expand All @@ -788,13 +796,20 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
c_meta_len = 1;
*out = order & ~X_NOSZ;
c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size);
if (c_meta_len >= *out_size) {
free(transposed);
*out_size = 0;
return NULL;
}

out[c_meta_len++] = N;

out2_start = out2 = out+7+5*N; // shares a buffer with c_meta
for (i = 0; i < N; i++) {
// Brute force try all methods.
// FIXME: optimise this bit. Maybe learn over time?
int j, best_j = 0, best_sz = INT_MAX;
uint8_t *r;

// Works OK with read names. The first byte is the most important,
// as it has most variability (little-endian). After that it's
Expand Down Expand Up @@ -843,24 +858,37 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
// {1, 128}};

for (j = 1; j <= m[MIN(i,3)][0]; j++) {
if (out2 - out > *out_size)
continue; // an error, but caught in best_sz check later

olen2 = *out_size - (out2 - out);
//fprintf(stderr, "order=%d m=%d\n", order&3, m[MIN(i,4)][j]);
if ((order&3) == 0 && (m[MIN(i,3)][j]&1))
continue;

arith_compress_to(transposed+idx[i], part_len[i],
out2, &olen2, m[MIN(i,3)][j] | X_NOSZ);
if (best_sz > olen2) {
r = arith_compress_to(transposed+idx[i], part_len[i],
out2, &olen2, m[MIN(i,3)][j] | X_NOSZ);
if (r && olen2 && best_sz > olen2) {
best_sz = olen2;
best_j = j;
}
}
// if (best_j == 0) // none desireable
// return NULL;

if (best_sz == INT_MAX) {
free(transposed);
*out_size = 0;
return NULL;
}
if (best_j != j-1) {
olen2 = *out_size - (out2 - out);
arith_compress_to(transposed+idx[i], part_len[i],
out2, &olen2, m[MIN(i,3)][best_j] | X_NOSZ);
r = arith_compress_to(transposed+idx[i], part_len[i],
out2, &olen2,
m[MIN(i,3)][best_j] | X_NOSZ);
if (!r) {
free(transposed);
*out_size = 0;
return NULL;
}
}
out2 += olen2;
c_meta_len += var_put_u32(out+c_meta_len, out_end, olen2);
Expand Down Expand Up @@ -893,6 +921,10 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
// PACK 2, 4 or 8 symbols into one byte.
int pmeta_len;
uint64_t packed_len;
if (c_meta_len + 256 > *out_size) {
*out_size = 0;
return NULL;
}
packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len);
if (!packed) {
out[0] &= ~X_PACK;
Expand Down Expand Up @@ -934,6 +966,7 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
#else
fprintf(stderr, "Htscodecs has been compiled without libbz2 support\n");
free(out);
*out_size = 0;
return NULL;
#endif

Expand All @@ -945,25 +978,40 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
// *out_size = lzma_size;

} else {
uint8_t *r;
if (do_rle) {
if (order == 0)
arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size);
r=arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size);
else
arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size);
r=arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size);
} else {
//if (order == 2)
// arith_compress_O2(in, in_size, out+c_meta_len, out_size);
//else
if (order == 1)
arith_compress_O1(in, in_size, out+c_meta_len, out_size);
r=arith_compress_O1(in, in_size, out+c_meta_len, out_size);
else
arith_compress_O0(in, in_size, out+c_meta_len, out_size);
r=arith_compress_O0(in, in_size, out+c_meta_len, out_size);
}

if (!r) {
free(rle);
free(packed);
*out_size = 0;
return NULL;
}
}

if (*out_size >= in_size) {
out[0] &= ~(3|X_EXT); // no entropy encoding, but keep e.g. PACK
out[0] |= X_CAT | no_size;

if (out + c_meta_len + in_size > out_end) {
free(rle);
free(packed);
*out_size = 0;
return NULL;
}
memcpy(out+c_meta_len, in, in_size);
*out_size = in_size;
}
Expand Down
94 changes: 84 additions & 10 deletions htscodecs/rANS_static4x16pr.c
Original file line number Diff line number Diff line change
Expand Up @@ -1164,7 +1164,7 @@ void rans_set_cpu(int opts) {
unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
unsigned char *out,unsigned int *out_size,
int order) {
if (in_size > INT_MAX) {
if (in_size > INT_MAX || (out && *out_size == 0)) {
*out_size = 0;
return NULL;
}
Expand All @@ -1177,8 +1177,10 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
*out_size = rans_compress_bound_4x16(in_size, order);
if (*out_size == 0)
return NULL;
if (!(out_free = out = malloc(*out_size)))
if (!(out_free = out = malloc(*out_size))) {
*out_size = 0;
return NULL;
}
}

unsigned char *out_end = out + *out_size;
Expand All @@ -1199,11 +1201,15 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
int N = (order>>8) & 0xff;
if (N == 0) N = 4; // default for compatibility with old tests

if (N > in_size)
N = in_size;

unsigned char *transposed = malloc(in_size);
unsigned int part_len[256];
unsigned int idx[256];
if (!transposed) {
free(out_free);
*out_size = 0;
return NULL;
}
int i, j, x;
Expand Down Expand Up @@ -1241,6 +1247,13 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
c_meta_len = 1;
*out = order & ~RANS_ORDER_NOSZ;
c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size);
if (c_meta_len >= *out_size) {
free(out_free);
free(transposed);
*out_size = 0;
return NULL;
}

out[c_meta_len++] = N;

unsigned char *out_best = NULL;
Expand All @@ -1249,26 +1262,33 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
out2_start = out2 = out+7+5*N; // shares a buffer with c_meta
for (i = 0; i < N; i++) {
// Brute force try all methods.
int j, m[] = {1,64,128,0}, best_j = 0, best_sz = in_size+10;
uint8_t *r;
int j, m[] = {1,64,128,0}, best_j = 0, best_sz = INT_MAX;
for (j = 0; j < sizeof(m)/sizeof(*m); j++) {
if ((order & m[j]) != m[j])
continue;

// order-1 *only*; bit check above cannot elide order-0
if ((order & RANS_ORDER_STRIPE_NO0) && (m[j]&1) == 0)
continue;

if (out2 - out > *out_size)
continue; // an error, but caught in best_sz check later

olen2 = *out_size - (out2 - out);
rans_compress_to_4x16(transposed+idx[i], part_len[i],
out2, &olen2,
m[j] | RANS_ORDER_NOSZ
| (order&RANS_ORDER_X32));
if (best_sz > olen2) {
r = rans_compress_to_4x16(transposed+idx[i], part_len[i],
out2, &olen2,
m[j] | RANS_ORDER_NOSZ
| (order&RANS_ORDER_X32));
if (r && olen2 && best_sz > olen2) {
best_sz = olen2;
best_j = j;
if (j < sizeof(m)/sizeof(*m) && olen2 > out_best_len) {
unsigned char *tmp = realloc(out_best, olen2);
if (!tmp) {
free(out_free);
free(transposed);
*out_size = 0;
return NULL;
}
out_best = tmp;
Expand All @@ -1279,6 +1299,15 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
memcpy(out_best, out2, olen2);
}
}

if (best_sz == INT_MAX) {
free(out_best);
free(out_free);
free(transposed);
*out_size = 0;
return NULL;
}

if (best_j < sizeof(m)/sizeof(*m)) {
// Copy the best compression to output buffer if not current
memcpy(out2, out_best, best_sz);
Expand All @@ -1301,6 +1330,11 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
out[0] = RANS_ORDER_CAT;
c_meta_len = 1;
c_meta_len += var_put_u32(&out[1], out_end, in_size);

if (c_meta_len + in_size > *out_size) {
*out_size = 0;
return NULL;
}
if (in_size)
memcpy(out+c_meta_len, in, in_size);
*out_size = c_meta_len + in_size;
Expand Down Expand Up @@ -1329,6 +1363,11 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
// PACK 2, 4 or 8 symbols into one byte.
int pmeta_len;
uint64_t packed_len;
if (c_meta_len + 256 > *out_size) {
free(out_free);
*out_size = 0;
return NULL;
}
packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len);
if (!packed) {
out[0] &= ~RANS_ORDER_PACK;
Expand Down Expand Up @@ -1357,6 +1396,7 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
c_rmeta_len = in_size+257;
if (!(meta = malloc(c_rmeta_len))) {
free(out_free);
*out_size = 0;
return NULL;
}

Expand All @@ -1380,8 +1420,23 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
// Compress lengths with O0 and literals with O0/O1 ("order" param)
int sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2), sz2;
sz += var_put_u32(out+c_meta_len+sz, out_end, rle_len);
if ((c_meta_len+sz+5) > *out_size) {
free(out_free);
free(rle);
free(meta);
free(packed);
*out_size = 0;
return NULL;
}
c_rmeta_len = *out_size - (c_meta_len+sz+5);
rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len);
if (!rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len)) {
free(out_free);
free(rle);
free(meta);
free(packed);
*out_size = 0;
return NULL;
}
if (c_rmeta_len < rmeta_len) {
sz2 = var_put_u32(out+c_meta_len+sz, out_end, c_rmeta_len);
memmove(out+c_meta_len+sz+sz2, out+c_meta_len+sz+5, c_rmeta_len);
Expand All @@ -1404,17 +1459,36 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
out[0] &= ~RANS_ORDER_RLE;
}

if (c_meta_len > *out_size) {
free(rle);
free(packed);
*out_size = 0;
return NULL;
}

*out_size -= c_meta_len;
if (order && in_size < 8) {
out[0] &= ~1;
order &= ~1;
}

rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size);
if (!rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size)) {
free(rle);
free(packed);
*out_size = 0;
return NULL;
}

if (*out_size >= in_size) {
out[0] &= ~3;
out[0] |= RANS_ORDER_CAT | no_size;

if (out + c_meta_len + in_size > out_end) {
free(rle);
free(packed);
*out_size = 0;
return NULL;
}
if (in_size)
memcpy(out+c_meta_len, in, in_size);
*out_size = in_size;
Expand Down
1 change: 1 addition & 0 deletions htscodecs/tokenise_name3.c
Original file line number Diff line number Diff line change
Expand Up @@ -1555,6 +1555,7 @@ uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith,
if (compress(ctx->desc[i].buf, ctx->desc[i].buf_l, i&0xf, level,
use_arith, out, &out_len) < 0) {
free_context(ctx);
free(out);
return NULL;
}

Expand Down