38
38
39
39
#endif
40
40
41
+ // There appears to be an unreconcilable syntax difference between Linux and Darwin assemblers.
42
+ // Name of a private label (i.e. not exported to symbol table) on Darwin has to start with "L",
43
+ // on Linux has to start with ".". There's no way to have a name start with both "." and "L", so
44
+ // we have to use a macro.
45
+ #if defined(__APPLE__)
46
+ #define LOCAL_LABEL(label) L_ ## label
47
+ #else
48
+ #define LOCAL_LABEL(label) .L_ ## label
49
+ #endif
50
+
41
51
#if ZSTD_ENABLE_ASM_X86_64_BMI2
42
52
43
53
/* Calling convention:
@@ -198,7 +208,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
198
208
subq $24 , %rsp
199
209
.cfi_def_cfa_offset 184
200
210
201
- .L_4X1_compute_olimit :
211
+ LOCAL_LABEL(4X1_compute_olimit) :
202
212
/* Computes how many iterations we can do safely
203
213
* %r15, %rax may be clobbered
204
214
* rbx, rdx must be saved
@@ -245,19 +255,19 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
245
255
/* If (op3 + 20 > olimit) */
246
256
movq %op3, %rax /* rax = op3 */
247
257
cmpq %rax , %olimit /* op3 == olimit */
248
- je .L_4X1_exit
258
+ je LOCAL_LABEL(4X1_exit)
249
259
250
260
/* If (ip1 < ip0) go to exit */
251
261
cmpq %ip0, %ip1
252
- jb .L_4X1_exit
262
+ jb LOCAL_LABEL(4X1_exit)
253
263
254
264
/* If (ip2 < ip1) go to exit */
255
265
cmpq %ip1, %ip2
256
- jb .L_4X1_exit
266
+ jb LOCAL_LABEL(4X1_exit)
257
267
258
268
/* If (ip3 < ip2) go to exit */
259
269
cmpq %ip2, %ip3
260
- jb .L_4X1_exit
270
+ jb LOCAL_LABEL(4X1_exit)
261
271
262
272
/* Reads top 11 bits from bits[n]
263
273
* Loads dt[bits[n]] into var[n]
@@ -318,7 +328,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
318
328
319
329
.p2align 6
320
330
321
- .L_4X1_loop_body :
331
+ LOCAL_LABEL(4X1_loop_body) :
322
332
/* Decode 5 symbols in each of the 4 streams (20 total)
323
333
* Must have called GET_NEXT_DELT for each stream
324
334
*/
@@ -356,21 +366,21 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
356
366
357
367
/* If op3 < olimit: continue the loop */
358
368
cmp %op3, 24 (%rsp )
359
- ja .L_4X1_loop_body
369
+ ja LOCAL_LABEL(4X1_loop_body)
360
370
361
371
/* Reload ip[1,2,3] from stack */
362
372
movq 0 (%rsp ), %ip1
363
373
movq 8 (%rsp ), %ip2
364
374
movq 16 (%rsp ), %ip3
365
375
366
376
/* Re-compute olimit */
367
- jmp .L_4X1_compute_olimit
377
+ jmp LOCAL_LABEL(4X1_compute_olimit)
368
378
369
379
#undef GET_NEXT_DELT
370
380
#undef DECODE_FROM_DELT
371
381
#undef DECODE
372
382
#undef RELOAD_BITS
373
- .L_4X1_exit :
383
+ LOCAL_LABEL(4X1_exit) :
374
384
addq $24 , %rsp
375
385
.cfi_def_cfa_offset 160
376
386
@@ -546,7 +556,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
546
556
subq $8 , %rsp
547
557
.cfi_def_cfa_offset 192
548
558
549
- .L_4X2_compute_olimit :
559
+ LOCAL_LABEL(4X2_compute_olimit) :
550
560
/* Computes how many iterations we can do safely
551
561
* %r15, %rax may be clobbered
552
562
* rdx must be saved
@@ -610,19 +620,19 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
610
620
/* If (op3 + 10 > olimit) */
611
621
movq %op3, %rax /* rax = op3 */
612
622
cmpq %rax , %olimit /* op3 == olimit */
613
- je .L_4X2_exit
623
+ je LOCAL_LABEL(4X2_exit)
614
624
615
625
/* If (ip1 < ip0) go to exit */
616
626
cmpq %ip0, %ip1
617
- jb .L_4X2_exit
627
+ jb LOCAL_LABEL(4X2_exit)
618
628
619
629
/* If (ip2 < ip1) go to exit */
620
630
cmpq %ip1, %ip2
621
- jb .L_4X2_exit
631
+ jb LOCAL_LABEL(4X2_exit)
622
632
623
633
/* If (ip3 < ip2) go to exit */
624
634
cmpq %ip2, %ip3
625
- jb .L_4X2_exit
635
+ jb LOCAL_LABEL(4X2_exit)
626
636
627
637
#define DECODE(n, idx) \
628
638
movq %bits##n, %rax; \
@@ -649,7 +659,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
649
659
650
660
.p2align 6
651
661
652
- .L_4X2_loop_body :
662
+ LOCAL_LABEL(4X2_loop_body) :
653
663
/* We clobber r8, so store it on the stack */
654
664
movq %r8 , 0 (%rsp )
655
665
@@ -666,12 +676,12 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
666
676
FOR_EACH_STREAM(RELOAD_BITS)
667
677
668
678
cmp %op3, 48 (%rsp )
669
- ja .L_4X2_loop_body
670
- jmp .L_4X2_compute_olimit
679
+ ja LOCAL_LABEL(4X2_loop_body)
680
+ jmp LOCAL_LABEL(4X2_compute_olimit)
671
681
672
682
#undef DECODE
673
683
#undef RELOAD_BITS
674
- .L_4X2_exit :
684
+ LOCAL_LABEL(4X2_exit) :
675
685
addq $8 , %rsp
676
686
.cfi_def_cfa_offset 184
677
687
/* Restore stack (oend & olimit) */
0 commit comments