Skip to content

Commit 027ebf6

Browse files
committed
Improve checking of *out_size in rans4x16 & arith
These should be set to a minimum of at least rans_compress_bound_4x16() or arith_compress_bound() but as they're user supplied parameters we ought to consider the possibility of them being smaller and the potential for an encode failure. (Although arguably it's a user error too as we're not using the API as intended.) This happens in many places, but not consistently when very small sizes are given such that we don't have room for the compression header meta-data.
1 parent 5179428 commit 027ebf6

File tree

3 files changed

+151
-28
lines changed

3 files changed

+151
-28
lines changed

htscodecs/arith_dynamic.c

+66-18
Original file line numberDiff line numberDiff line change
@@ -733,15 +733,17 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
733733
unsigned int c_meta_len;
734734
uint8_t *rle = NULL, *packed = NULL;
735735

736-
if (in_size > INT_MAX) {
736+
if (in_size > INT_MAX || (out && *out_size == 0)) {
737737
*out_size = 0;
738738
return NULL;
739739
}
740740

741741
if (!out) {
742742
*out_size = arith_compress_bound(in_size, order);
743-
if (!(out = malloc(*out_size)))
743+
if (!(out = malloc(*out_size))) {
744+
*out_size = 0;
744745
return NULL;
746+
}
745747
}
746748
unsigned char *out_end = out + *out_size;
747749

@@ -751,24 +753,30 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
751753
if (order & X_CAT) {
752754
out[0] = X_CAT;
753755
c_meta_len = 1 + var_put_u32(&out[1], out_end, in_size);
756+
if (c_meta_len + in_size > *out_size) {
757+
*out_size = 0;
758+
return NULL;
759+
}
754760
memcpy(out+c_meta_len, in, in_size);
755761
*out_size = in_size+c_meta_len;
756762
}
757763

758764
if (order & X_STRIPE) {
759-
int N = (order>>8);
765+
int N = (order>>8) & 0xff;
760766
if (N == 0) N = 4; // default for compatibility with old tests
761767

762-
if (N > 255)
763-
return NULL;
768+
if (N > in_size)
769+
N = in_size;
764770

765771
unsigned char *transposed = malloc(in_size);
766772
unsigned int part_len[256];
767773
unsigned int idx[256];
768-
if (!transposed)
774+
if (!transposed) {
775+
*out_size = 0;
769776
return NULL;
770-
int i, j, x;
777+
}
771778

779+
int i, j, x;
772780
for (i = 0; i < N; i++) {
773781
part_len[i] = in_size / N + ((in_size % N) > i);
774782
idx[i] = i ? idx[i-1] + part_len[i-1] : 0; // cumulative index
@@ -788,13 +796,20 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
788796
c_meta_len = 1;
789797
*out = order & ~X_NOSZ;
790798
c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size);
799+
if (c_meta_len >= *out_size) {
800+
free(transposed);
801+
*out_size = 0;
802+
return NULL;
803+
}
804+
791805
out[c_meta_len++] = N;
792806

793807
out2_start = out2 = out+7+5*N; // shares a buffer with c_meta
794808
for (i = 0; i < N; i++) {
795809
// Brute force try all methods.
796810
// FIXME: optimise this bit. Maybe learn over time?
797811
int j, best_j = 0, best_sz = INT_MAX;
812+
uint8_t *r;
798813

799814
// Works OK with read names. The first byte is the most important,
800815
// as it has most variability (little-endian). After that it's
@@ -843,24 +858,37 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
843858
// {1, 128}};
844859

845860
for (j = 1; j <= m[MIN(i,3)][0]; j++) {
861+
if (out2 - out > *out_size)
862+
continue; // an error, but caught in best_sz check later
863+
846864
olen2 = *out_size - (out2 - out);
847865
//fprintf(stderr, "order=%d m=%d\n", order&3, m[MIN(i,4)][j]);
848866
if ((order&3) == 0 && (m[MIN(i,3)][j]&1))
849867
continue;
850868

851-
arith_compress_to(transposed+idx[i], part_len[i],
852-
out2, &olen2, m[MIN(i,3)][j] | X_NOSZ);
853-
if (best_sz > olen2) {
869+
r = arith_compress_to(transposed+idx[i], part_len[i],
870+
out2, &olen2, m[MIN(i,3)][j] | X_NOSZ);
871+
if (r && olen2 && best_sz > olen2) {
854872
best_sz = olen2;
855873
best_j = j;
856874
}
857875
}
858-
// if (best_j == 0) // none desireable
859-
// return NULL;
876+
877+
if (best_sz == INT_MAX) {
878+
free(transposed);
879+
*out_size = 0;
880+
return NULL;
881+
}
860882
if (best_j != j-1) {
861883
olen2 = *out_size - (out2 - out);
862-
arith_compress_to(transposed+idx[i], part_len[i],
863-
out2, &olen2, m[MIN(i,3)][best_j] | X_NOSZ);
884+
r = arith_compress_to(transposed+idx[i], part_len[i],
885+
out2, &olen2,
886+
m[MIN(i,3)][best_j] | X_NOSZ);
887+
if (!r) {
888+
free(transposed);
889+
*out_size = 0;
890+
return NULL;
891+
}
864892
}
865893
out2 += olen2;
866894
c_meta_len += var_put_u32(out+c_meta_len, out_end, olen2);
@@ -893,6 +921,10 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
893921
// PACK 2, 4 or 8 symbols into one byte.
894922
int pmeta_len;
895923
uint64_t packed_len;
924+
if (c_meta_len + 256 > *out_size) {
925+
*out_size = 0;
926+
return NULL;
927+
}
896928
packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len);
897929
if (!packed) {
898930
out[0] &= ~X_PACK;
@@ -934,6 +966,7 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
934966
#else
935967
fprintf(stderr, "Htscodecs has been compiled without libbz2 support\n");
936968
free(out);
969+
*out_size = 0;
937970
return NULL;
938971
#endif
939972

@@ -945,25 +978,40 @@ unsigned char *arith_compress_to(unsigned char *in, unsigned int in_size,
945978
// *out_size = lzma_size;
946979

947980
} else {
981+
uint8_t *r;
948982
if (do_rle) {
949983
if (order == 0)
950-
arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size);
984+
r=arith_compress_O0_RLE(in, in_size, out+c_meta_len, out_size);
951985
else
952-
arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size);
986+
r=arith_compress_O1_RLE(in, in_size, out+c_meta_len, out_size);
953987
} else {
954988
//if (order == 2)
955989
// arith_compress_O2(in, in_size, out+c_meta_len, out_size);
956990
//else
957991
if (order == 1)
958-
arith_compress_O1(in, in_size, out+c_meta_len, out_size);
992+
r=arith_compress_O1(in, in_size, out+c_meta_len, out_size);
959993
else
960-
arith_compress_O0(in, in_size, out+c_meta_len, out_size);
994+
r=arith_compress_O0(in, in_size, out+c_meta_len, out_size);
995+
}
996+
997+
if (!r) {
998+
free(rle);
999+
free(packed);
1000+
*out_size = 0;
1001+
return NULL;
9611002
}
9621003
}
9631004

9641005
if (*out_size >= in_size) {
9651006
out[0] &= ~(3|X_EXT); // no entropy encoding, but keep e.g. PACK
9661007
out[0] |= X_CAT | no_size;
1008+
1009+
if (out + c_meta_len + in_size > out_end) {
1010+
free(rle);
1011+
free(packed);
1012+
*out_size = 0;
1013+
return NULL;
1014+
}
9671015
memcpy(out+c_meta_len, in, in_size);
9681016
*out_size = in_size;
9691017
}

htscodecs/rANS_static4x16pr.c

+84-10
Original file line numberDiff line numberDiff line change
@@ -1164,7 +1164,7 @@ void rans_set_cpu(int opts) {
11641164
unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
11651165
unsigned char *out,unsigned int *out_size,
11661166
int order) {
1167-
if (in_size > INT_MAX) {
1167+
if (in_size > INT_MAX || (out && *out_size == 0)) {
11681168
*out_size = 0;
11691169
return NULL;
11701170
}
@@ -1177,8 +1177,10 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
11771177
*out_size = rans_compress_bound_4x16(in_size, order);
11781178
if (*out_size == 0)
11791179
return NULL;
1180-
if (!(out_free = out = malloc(*out_size)))
1180+
if (!(out_free = out = malloc(*out_size))) {
1181+
*out_size = 0;
11811182
return NULL;
1183+
}
11821184
}
11831185

11841186
unsigned char *out_end = out + *out_size;
@@ -1199,11 +1201,15 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
11991201
int N = (order>>8) & 0xff;
12001202
if (N == 0) N = 4; // default for compatibility with old tests
12011203

1204+
if (N > in_size)
1205+
N = in_size;
1206+
12021207
unsigned char *transposed = malloc(in_size);
12031208
unsigned int part_len[256];
12041209
unsigned int idx[256];
12051210
if (!transposed) {
12061211
free(out_free);
1212+
*out_size = 0;
12071213
return NULL;
12081214
}
12091215
int i, j, x;
@@ -1241,6 +1247,13 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
12411247
c_meta_len = 1;
12421248
*out = order & ~RANS_ORDER_NOSZ;
12431249
c_meta_len += var_put_u32(out+c_meta_len, out_end, in_size);
1250+
if (c_meta_len >= *out_size) {
1251+
free(out_free);
1252+
free(transposed);
1253+
*out_size = 0;
1254+
return NULL;
1255+
}
1256+
12441257
out[c_meta_len++] = N;
12451258

12461259
unsigned char *out_best = NULL;
@@ -1249,26 +1262,33 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
12491262
out2_start = out2 = out+7+5*N; // shares a buffer with c_meta
12501263
for (i = 0; i < N; i++) {
12511264
// Brute force try all methods.
1252-
int j, m[] = {1,64,128,0}, best_j = 0, best_sz = in_size+10;
1265+
uint8_t *r;
1266+
int j, m[] = {1,64,128,0}, best_j = 0, best_sz = INT_MAX;
12531267
for (j = 0; j < sizeof(m)/sizeof(*m); j++) {
12541268
if ((order & m[j]) != m[j])
12551269
continue;
12561270

12571271
// order-1 *only*; bit check above cannot elide order-0
12581272
if ((order & RANS_ORDER_STRIPE_NO0) && (m[j]&1) == 0)
12591273
continue;
1274+
1275+
if (out2 - out > *out_size)
1276+
continue; // an error, but caught in best_sz check later
1277+
12601278
olen2 = *out_size - (out2 - out);
1261-
rans_compress_to_4x16(transposed+idx[i], part_len[i],
1262-
out2, &olen2,
1263-
m[j] | RANS_ORDER_NOSZ
1264-
| (order&RANS_ORDER_X32));
1265-
if (best_sz > olen2) {
1279+
r = rans_compress_to_4x16(transposed+idx[i], part_len[i],
1280+
out2, &olen2,
1281+
m[j] | RANS_ORDER_NOSZ
1282+
| (order&RANS_ORDER_X32));
1283+
if (r && olen2 && best_sz > olen2) {
12661284
best_sz = olen2;
12671285
best_j = j;
12681286
if (j < sizeof(m)/sizeof(*m) && olen2 > out_best_len) {
12691287
unsigned char *tmp = realloc(out_best, olen2);
12701288
if (!tmp) {
12711289
free(out_free);
1290+
free(transposed);
1291+
*out_size = 0;
12721292
return NULL;
12731293
}
12741294
out_best = tmp;
@@ -1279,6 +1299,15 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
12791299
memcpy(out_best, out2, olen2);
12801300
}
12811301
}
1302+
1303+
if (best_sz == INT_MAX) {
1304+
free(out_best);
1305+
free(out_free);
1306+
free(transposed);
1307+
*out_size = 0;
1308+
return NULL;
1309+
}
1310+
12821311
if (best_j < sizeof(m)/sizeof(*m)) {
12831312
// Copy the best compression to output buffer if not current
12841313
memcpy(out2, out_best, best_sz);
@@ -1301,6 +1330,11 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
13011330
out[0] = RANS_ORDER_CAT;
13021331
c_meta_len = 1;
13031332
c_meta_len += var_put_u32(&out[1], out_end, in_size);
1333+
1334+
if (c_meta_len + in_size > *out_size) {
1335+
*out_size = 0;
1336+
return NULL;
1337+
}
13041338
if (in_size)
13051339
memcpy(out+c_meta_len, in, in_size);
13061340
*out_size = c_meta_len + in_size;
@@ -1329,6 +1363,11 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
13291363
// PACK 2, 4 or 8 symbols into one byte.
13301364
int pmeta_len;
13311365
uint64_t packed_len;
1366+
if (c_meta_len + 256 > *out_size) {
1367+
free(out_free);
1368+
*out_size = 0;
1369+
return NULL;
1370+
}
13321371
packed = hts_pack(in, in_size, out+c_meta_len, &pmeta_len, &packed_len);
13331372
if (!packed) {
13341373
out[0] &= ~RANS_ORDER_PACK;
@@ -1357,6 +1396,7 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
13571396
c_rmeta_len = in_size+257;
13581397
if (!(meta = malloc(c_rmeta_len))) {
13591398
free(out_free);
1399+
*out_size = 0;
13601400
return NULL;
13611401
}
13621402

@@ -1380,8 +1420,23 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
13801420
// Compress lengths with O0 and literals with O0/O1 ("order" param)
13811421
int sz = var_put_u32(out+c_meta_len, out_end, rmeta_len*2), sz2;
13821422
sz += var_put_u32(out+c_meta_len+sz, out_end, rle_len);
1423+
if ((c_meta_len+sz+5) > *out_size) {
1424+
free(out_free);
1425+
free(rle);
1426+
free(meta);
1427+
free(packed);
1428+
*out_size = 0;
1429+
return NULL;
1430+
}
13831431
c_rmeta_len = *out_size - (c_meta_len+sz+5);
1384-
rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len);
1432+
if (!rans_enc_func(do_simd, 0)(meta, rmeta_len, out+c_meta_len+sz+5, &c_rmeta_len)) {
1433+
free(out_free);
1434+
free(rle);
1435+
free(meta);
1436+
free(packed);
1437+
*out_size = 0;
1438+
return NULL;
1439+
}
13851440
if (c_rmeta_len < rmeta_len) {
13861441
sz2 = var_put_u32(out+c_meta_len+sz, out_end, c_rmeta_len);
13871442
memmove(out+c_meta_len+sz+sz2, out+c_meta_len+sz+5, c_rmeta_len);
@@ -1404,17 +1459,36 @@ unsigned char *rans_compress_to_4x16(unsigned char *in, unsigned int in_size,
14041459
out[0] &= ~RANS_ORDER_RLE;
14051460
}
14061461

1462+
if (c_meta_len > *out_size) {
1463+
free(rle);
1464+
free(packed);
1465+
*out_size = 0;
1466+
return NULL;
1467+
}
1468+
14071469
*out_size -= c_meta_len;
14081470
if (order && in_size < 8) {
14091471
out[0] &= ~1;
14101472
order &= ~1;
14111473
}
14121474

1413-
rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size);
1475+
if (!rans_enc_func(do_simd, order)(in, in_size, out+c_meta_len, out_size)) {
1476+
free(rle);
1477+
free(packed);
1478+
*out_size = 0;
1479+
return NULL;
1480+
}
14141481

14151482
if (*out_size >= in_size) {
14161483
out[0] &= ~3;
14171484
out[0] |= RANS_ORDER_CAT | no_size;
1485+
1486+
if (out + c_meta_len + in_size > out_end) {
1487+
free(rle);
1488+
free(packed);
1489+
*out_size = 0;
1490+
return NULL;
1491+
}
14181492
if (in_size)
14191493
memcpy(out+c_meta_len, in, in_size);
14201494
*out_size = in_size;

htscodecs/tokenise_name3.c

+1
Original file line numberDiff line numberDiff line change
@@ -1555,6 +1555,7 @@ uint8_t *tok3_encode_names(char *blk, int len, int level, int use_arith,
15551555
if (compress(ctx->desc[i].buf, ctx->desc[i].buf_l, i&0xf, level,
15561556
use_arith, out, &out_len) < 0) {
15571557
free_context(ctx);
1558+
free(out);
15581559
return NULL;
15591560
}
15601561

0 commit comments

Comments
 (0)