Skip to content

Commit a9fc6aa

Browse files
committed
[libcpu/arm64] add C11 atomic ticket spinlock
Replace the former implementation of flag-based spinlock which is unfair Besides, C11 atomic implementation is more readable (it's C anyway), and maintainable. Cause toolchain can use their builtin optimization and tune for different micro-architectures. For example armv8.5 introduces a better instruction. The compiler can help with that when it knows your target platform in support of it. Signed-off-by: Shell <[email protected]>
1 parent aee6048 commit a9fc6aa

File tree

9 files changed

+238
-109
lines changed

9 files changed

+238
-109
lines changed

libcpu/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ if ARCH_ARMV8 && ARCH_CPU_64BIT
1212
config ARCH_HAVE_EFFICIENT_UNALIGNED_ACCESS
1313
bool
1414
default y
15+
config ARCH_USING_GENERIC_CPUID
16+
bool "Using generic cpuid implemenation"
17+
default n
1518
endmenu
1619
endif
1720

libcpu/aarch64/common/context_gcc.S

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ int rt_hw_cpu_id(void)
4444
.weak rt_hw_cpu_id
4545
.type rt_hw_cpu_id, @function
4646
rt_hw_cpu_id:
47+
#if RT_CPUS_NR > 1
4748
mrs x0, tpidr_el1
49+
#else
50+
mov x0, xzr
51+
#endif
4852
ret
4953

5054
/*

libcpu/aarch64/common/cpu.c

Lines changed: 57 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* 2011-09-15 Bernard first version
99
* 2019-07-28 zdzn add smp support
1010
* 2023-02-21 GuEe-GUI mov cpu ofw init to setup
11+
* 2024-04-29 Shell Add generic ticket spinlock using C11 atomic
1112
*/
1213

1314
#include <rthw.h>
@@ -55,65 +56,78 @@ rt_weak rt_uint64_t rt_cpu_mpidr_early[] =
5556
};
5657
#endif /* RT_USING_SMART */
5758

58-
static inline void arch_spin_lock(arch_spinlock_t *lock)
59-
{
60-
unsigned int tmp;
61-
62-
asm volatile(
63-
" sevl\n"
64-
"1: wfe\n"
65-
"2: ldaxr %w0, %1\n"
66-
" cbnz %w0, 1b\n"
67-
" stxr %w0, %w2, %1\n"
68-
" cbnz %w0, 2b\n"
69-
: "=&r" (tmp), "+Q" (lock->lock)
70-
: "r" (1)
71-
: "cc", "memory");
72-
}
59+
/* in support of C11 atomic */
60+
#if __STDC_VERSION__ >= 201112L
61+
#include <stdatomic.h>
7362

74-
static inline int arch_spin_trylock(arch_spinlock_t *lock)
63+
union _spinlock
7564
{
76-
unsigned int tmp;
77-
78-
asm volatile(
79-
" ldaxr %w0, %1\n"
80-
" cbnz %w0, 1f\n"
81-
" stxr %w0, %w2, %1\n"
82-
"1:\n"
83-
: "=&r" (tmp), "+Q" (lock->lock)
84-
: "r" (1)
85-
: "cc", "memory");
86-
87-
return !tmp;
88-
}
65+
_Atomic(rt_uint32_t) _value;
66+
struct
67+
{
68+
_Atomic(rt_uint16_t) owner;
69+
_Atomic(rt_uint16_t) next;
70+
} ticket;
71+
};
8972

90-
static inline void arch_spin_unlock(arch_spinlock_t *lock)
73+
void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock)
9174
{
92-
asm volatile(
93-
" stlr %w1, %0\n"
94-
: "=Q" (lock->lock) : "r" (0) : "memory");
95-
}
75+
union _spinlock *lock = (void *)_lock;
9676

97-
void rt_hw_spin_lock_init(arch_spinlock_t *lock)
98-
{
99-
lock->lock = 0;
77+
/**
78+
* just a dummy note that this is an atomic operation, though it alway is
79+
* even without usage of atomic API in arm64
80+
*/
81+
atomic_store_explicit(&lock->_value, 0, memory_order_relaxed);
10082
}
10183

102-
void rt_hw_spin_lock(rt_hw_spinlock_t *lock)
84+
rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *_lock)
10385
{
104-
arch_spin_lock(lock);
86+
rt_bool_t rc;
87+
rt_uint32_t readonce;
88+
union _spinlock temp;
89+
union _spinlock *lock = (void *)_lock;
90+
91+
readonce = atomic_load_explicit(&lock->_value, memory_order_acquire);
92+
temp._value = readonce;
93+
94+
if (temp.ticket.owner != temp.ticket.next)
95+
{
96+
rc = RT_FALSE;
97+
}
98+
else
99+
{
100+
temp.ticket.next += 1;
101+
rc = atomic_compare_exchange_strong_explicit(
102+
&lock->_value, &readonce, temp._value,
103+
memory_order_acquire, memory_order_relaxed);
104+
}
105+
return rc;
105106
}
106107

107-
void rt_hw_spin_unlock(rt_hw_spinlock_t *lock)
108+
void rt_hw_spin_lock(rt_hw_spinlock_t *_lock)
108109
{
109-
arch_spin_unlock(lock);
110+
union _spinlock *lock = (void *)_lock;
111+
rt_uint64_t owner;
112+
rt_uint16_t ticket =
113+
atomic_fetch_add_explicit(&lock->ticket.next, 1, memory_order_relaxed);
114+
115+
owner = atomic_load_explicit(&lock->ticket.owner, memory_order_acquire);
116+
while (owner != ticket)
117+
{
118+
__asm__ volatile("isb");
119+
owner = atomic_load_explicit(&lock->ticket.owner, memory_order_acquire);
120+
}
110121
}
111122

112-
rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *lock)
123+
void rt_hw_spin_unlock(rt_hw_spinlock_t *_lock)
113124
{
114-
return arch_spin_trylock(lock);
125+
union _spinlock *lock = (void *)_lock;
126+
atomic_fetch_add_explicit(&lock->ticket.owner, 1, memory_order_release);
115127
}
116128

129+
#endif
130+
117131
static int _cpus_init_data_hardcoded(int num_cpus, rt_uint64_t *cpu_hw_ids, struct cpu_ops_t *cpu_ops[])
118132
{
119133
// load in cpu_hw_ids in cpuid_to_hwid,

libcpu/aarch64/common/cpu_gcc.S

Lines changed: 134 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -1,103 +1,176 @@
11
/*
2-
* Copyright (c) 2006-2020, RT-Thread Development Team
2+
* Copyright (c) 2006-2024, RT-Thread Development Team
33
*
44
* SPDX-License-Identifier: Apache-2.0
55
*
66
* Date Author Notes
77
* 2018-10-06 ZhaoXiaowei the first version
8+
* 2024-04-28 Shell add generic spinlock implementation
89
*/
9-
10+
1011
.text
1112
.globl rt_hw_get_current_el
1213
rt_hw_get_current_el:
13-
MRS X0, CurrentEL
14-
CMP X0, 0xc
15-
B.EQ 3f
16-
CMP X0, 0x8
17-
B.EQ 2f
18-
CMP X0, 0x4
19-
B.EQ 1f
20-
21-
LDR X0, =0
22-
B 0f
14+
MRS X0, CurrentEL
15+
CMP X0, 0xc
16+
B.EQ 3f
17+
CMP X0, 0x8
18+
B.EQ 2f
19+
CMP X0, 0x4
20+
B.EQ 1f
21+
22+
LDR X0, =0
23+
B 0f
2324
3:
24-
LDR X0, =3
25-
B 0f
25+
LDR X0, =3
26+
B 0f
2627
2:
27-
LDR X0, =2
28-
B 0f
28+
LDR X0, =2
29+
B 0f
2930
1:
30-
LDR X0, =1
31-
B 0f
31+
LDR X0, =1
32+
B 0f
3233
0:
33-
RET
34+
RET
3435

3536

3637
.globl rt_hw_set_current_vbar
3738
rt_hw_set_current_vbar:
38-
MRS X1, CurrentEL
39-
CMP X1, 0xc
40-
B.EQ 3f
41-
CMP X1, 0x8
42-
B.EQ 2f
43-
CMP X1, 0x4
44-
B.EQ 1f
45-
B 0f
39+
MRS X1, CurrentEL
40+
CMP X1, 0xc
41+
B.EQ 3f
42+
CMP X1, 0x8
43+
B.EQ 2f
44+
CMP X1, 0x4
45+
B.EQ 1f
46+
B 0f
4647
3:
47-
MSR VBAR_EL3,X0
48-
B 0f
48+
MSR VBAR_EL3,X0
49+
B 0f
4950
2:
50-
MSR VBAR_EL2,X0
51-
B 0f
51+
MSR VBAR_EL2,X0
52+
B 0f
5253
1:
53-
MSR VBAR_EL1,X0
54-
B 0f
54+
MSR VBAR_EL1,X0
55+
B 0f
5556
0:
56-
RET
57+
RET
5758

5859
.globl rt_hw_set_elx_env
5960
rt_hw_set_elx_env:
60-
MRS X1, CurrentEL
61-
CMP X1, 0xc
62-
B.EQ 3f
63-
CMP X1, 0x8
64-
B.EQ 2f
65-
CMP X1, 0x4
66-
B.EQ 1f
67-
B 0f
61+
MRS X1, CurrentEL
62+
CMP X1, 0xc
63+
B.EQ 3f
64+
CMP X1, 0x8
65+
B.EQ 2f
66+
CMP X1, 0x4
67+
B.EQ 1f
68+
B 0f
6869
3:
69-
MRS X0, SCR_EL3
70-
ORR X0, X0, #0xF /* SCR_EL3.NS|IRQ|FIQ|EA */
71-
MSR SCR_EL3, X0
72-
B 0f
70+
MRS X0, SCR_EL3
71+
ORR X0, X0, #0xF /* SCR_EL3.NS|IRQ|FIQ|EA */
72+
MSR SCR_EL3, X0
73+
B 0f
7374
2:
74-
MRS X0, HCR_EL2
75-
ORR X0, X0, #0x38
76-
MSR HCR_EL2, X0
77-
B 0f
75+
MRS X0, HCR_EL2
76+
ORR X0, X0, #0x38
77+
MSR HCR_EL2, X0
78+
B 0f
7879
1:
79-
B 0f
80+
B 0f
8081
0:
81-
RET
82+
RET
8283

83-
.global rt_cpu_vector_set_base
84+
.globl rt_cpu_vector_set_base
8485
rt_cpu_vector_set_base:
85-
MSR VBAR_EL1,X0
86+
MSR VBAR_EL1,X0
8687
RET
8788

8889

8990
/**
9091
* unsigned long rt_hw_ffz(unsigned long x)
9192
*/
92-
.global rt_hw_ffz
93+
.globl rt_hw_ffz
9394
rt_hw_ffz:
94-
mvn x1, x0
95-
clz x0, x1
96-
mov x1, #0x3f
97-
sub x0, x1, x0
95+
mvn x1, x0
96+
clz x0, x1
97+
mov x1, #0x3f
98+
sub x0, x1, x0
9899
ret
99100

100-
.global rt_hw_clz
101+
.globl rt_hw_clz
101102
rt_hw_clz:
102-
clz x0, x0
103+
clz x0, x0
104+
ret
105+
106+
/**
107+
* Spinlock (fallback implementation)
108+
*/
109+
110+
rt_hw_spin_lock_init:
111+
.weak rt_hw_spin_lock_init
112+
stlr wzr, [x0]
113+
ret
114+
115+
rt_hw_spin_trylock:
116+
.weak rt_hw_spin_trylock
117+
sub sp, sp, #16
118+
ldar w2, [x0]
119+
add x1, sp, 8
120+
stlr w2, [x1]
121+
ldarh w1, [x1]
122+
and w1, w1, 65535
123+
add x3, sp, 10
124+
ldarh w3, [x3]
125+
cmp w1, w3, uxth
126+
beq 1f
127+
mov w0, 0
128+
add sp, sp, 16
129+
ret
130+
1:
131+
add x1, sp, 10
132+
2:
133+
ldaxrh w3, [x1]
134+
add w3, w3, 1
135+
stlxrh w4, w3, [x1]
136+
cbnz w4, 2b
137+
add x1, sp, 8
138+
ldar w1, [x1]
139+
3:
140+
ldaxr w3, [x0]
141+
cmp w3, w2
142+
bne 4f
143+
stxr w4, w1, [x0]
144+
cbnz w4, 3b
145+
4:
146+
cset w0, eq
147+
add sp, sp, 16
148+
ret
149+
150+
rt_hw_spin_lock:
151+
.weak rt_hw_spin_lock
152+
add x1, x0, 2
153+
1:
154+
ldxrh w2, [x1]
155+
add w3, w2, 1
156+
stxrh w4, w3, [x1]
157+
cbnz w4, 1b
158+
and w2, w2, 65535
159+
ldarh w1, [x0]
160+
cmp w2, w1, uxth
161+
beq 3f
162+
2:
163+
isb
164+
ldarh w1, [x0]
165+
cmp w2, w1, uxth
166+
bne 2b
167+
3:
168+
ret
169+
170+
rt_hw_spin_unlock:
171+
.weak rt_hw_spin_unlock
172+
ldxrh w1, [x0]
173+
add w1, w1, 1
174+
stlxrh w2, w1, [x0]
175+
cbnz w2, rt_hw_spin_unlock
103176
ret

0 commit comments

Comments
 (0)