Skip to content

Commit b12a82d

Browse files
committed
[libcpu/arm64] add C11 atomic ticket spinlock
Replace the former implementation of flag-based spinlock which is unfair Besides, C11 atomic implementation is more readable (it's C anyway), and maintainable. Cause toolchain can use their builtin optimization and tune for different micro-architectures. For example armv8.5 introduces a better instruction. The compiler can help with that when it knows your target platform in support of it. Signed-off-by: Shell <[email protected]>
1 parent aee6048 commit b12a82d

File tree

9 files changed

+188
-59
lines changed

9 files changed

+188
-59
lines changed

libcpu/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ if ARCH_ARMV8 && ARCH_CPU_64BIT
1212
config ARCH_HAVE_EFFICIENT_UNALIGNED_ACCESS
1313
bool
1414
default y
15+
config ARCH_USING_GENERIC_CPUID
16+
bool "Using generic cpuid implemenation"
17+
default n
1518
endmenu
1619
endif
1720

libcpu/aarch64/common/context_gcc.S

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,11 @@ int rt_hw_cpu_id(void)
4444
.weak rt_hw_cpu_id
4545
.type rt_hw_cpu_id, @function
4646
rt_hw_cpu_id:
47+
#if RT_CPUS_NR > 1
4748
mrs x0, tpidr_el1
49+
#else
50+
mov x0, xzr
51+
#endif
4852
ret
4953

5054
/*

libcpu/aarch64/common/cpu.c

Lines changed: 57 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* 2011-09-15 Bernard first version
99
* 2019-07-28 zdzn add smp support
1010
* 2023-02-21 GuEe-GUI mov cpu ofw init to setup
11+
* 2024-04-29 Shell Add generic ticket spinlock using C11 atomic
1112
*/
1213

1314
#include <rthw.h>
@@ -55,65 +56,78 @@ rt_weak rt_uint64_t rt_cpu_mpidr_early[] =
5556
};
5657
#endif /* RT_USING_SMART */
5758

58-
static inline void arch_spin_lock(arch_spinlock_t *lock)
59-
{
60-
unsigned int tmp;
61-
62-
asm volatile(
63-
" sevl\n"
64-
"1: wfe\n"
65-
"2: ldaxr %w0, %1\n"
66-
" cbnz %w0, 1b\n"
67-
" stxr %w0, %w2, %1\n"
68-
" cbnz %w0, 2b\n"
69-
: "=&r" (tmp), "+Q" (lock->lock)
70-
: "r" (1)
71-
: "cc", "memory");
72-
}
59+
/* in support of C11 atomic */
60+
#if __STDC_VERSION__ >= 201112L
61+
#include <stdatomic.h>
7362

74-
static inline int arch_spin_trylock(arch_spinlock_t *lock)
63+
union _spinlock
7564
{
76-
unsigned int tmp;
77-
78-
asm volatile(
79-
" ldaxr %w0, %1\n"
80-
" cbnz %w0, 1f\n"
81-
" stxr %w0, %w2, %1\n"
82-
"1:\n"
83-
: "=&r" (tmp), "+Q" (lock->lock)
84-
: "r" (1)
85-
: "cc", "memory");
86-
87-
return !tmp;
88-
}
65+
_Atomic(rt_uint32_t) _value;
66+
struct
67+
{
68+
_Atomic(rt_uint16_t) owner;
69+
_Atomic(rt_uint16_t) next;
70+
} ticket;
71+
};
8972

90-
static inline void arch_spin_unlock(arch_spinlock_t *lock)
73+
void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock)
9174
{
92-
asm volatile(
93-
" stlr %w1, %0\n"
94-
: "=Q" (lock->lock) : "r" (0) : "memory");
95-
}
75+
union _spinlock *lock = (void *)_lock;
9676

97-
void rt_hw_spin_lock_init(arch_spinlock_t *lock)
98-
{
99-
lock->lock = 0;
77+
/**
78+
* just a dummy note that this is an atomic operation, though it alway is
79+
* even without usage of atomic API in arm64
80+
*/
81+
atomic_store_explicit(&lock->_value, 0, memory_order_relaxed);
10082
}
10183

102-
void rt_hw_spin_lock(rt_hw_spinlock_t *lock)
84+
rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *_lock)
10385
{
104-
arch_spin_lock(lock);
86+
rt_bool_t rc;
87+
rt_uint32_t readonce;
88+
union _spinlock temp;
89+
union _spinlock *lock = (void *)_lock;
90+
91+
readonce = atomic_load_explicit(&lock->_value, memory_order_acquire);
92+
temp._value = readonce;
93+
94+
if (temp.ticket.owner != temp.ticket.next)
95+
{
96+
rc = RT_FALSE;
97+
}
98+
else
99+
{
100+
temp.ticket.next += 1;
101+
rc = atomic_compare_exchange_strong_explicit(
102+
&lock->_value, &readonce, temp._value,
103+
memory_order_acquire, memory_order_relaxed);
104+
}
105+
return rc;
105106
}
106107

107-
void rt_hw_spin_unlock(rt_hw_spinlock_t *lock)
108+
void rt_hw_spin_lock(rt_hw_spinlock_t *_lock)
108109
{
109-
arch_spin_unlock(lock);
110+
union _spinlock *lock = (void *)_lock;
111+
rt_uint64_t owner;
112+
rt_uint16_t ticket =
113+
atomic_fetch_add_explicit(&lock->ticket.next, 1, memory_order_relaxed);
114+
115+
owner = atomic_load_explicit(&lock->ticket.owner, memory_order_acquire);
116+
while (owner != ticket)
117+
{
118+
__asm__ volatile("isb");
119+
owner = atomic_load_explicit(&lock->ticket.owner, memory_order_acquire);
120+
}
110121
}
111122

112-
rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *lock)
123+
void rt_hw_spin_unlock(rt_hw_spinlock_t *_lock)
113124
{
114-
return arch_spin_trylock(lock);
125+
union _spinlock *lock = (void *)_lock;
126+
atomic_fetch_add_explicit(&lock->ticket.owner, 1, memory_order_release);
115127
}
116128

129+
#endif
130+
117131
static int _cpus_init_data_hardcoded(int num_cpus, rt_uint64_t *cpu_hw_ids, struct cpu_ops_t *cpu_ops[])
118132
{
119133
// load in cpu_hw_ids in cpuid_to_hwid,

libcpu/aarch64/common/cpu_gcc.S

Lines changed: 84 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
/*
2-
* Copyright (c) 2006-2020, RT-Thread Development Team
2+
* Copyright (c) 2006-2024, RT-Thread Development Team
33
*
44
* SPDX-License-Identifier: Apache-2.0
55
*
66
* Date Author Notes
77
* 2018-10-06 ZhaoXiaowei the first version
8+
* 2024-04-28 Shell add generic spinlock implementation
89
*/
9-
10+
1011
.text
1112
.globl rt_hw_get_current_el
1213
rt_hw_get_current_el:
@@ -17,7 +18,7 @@ rt_hw_get_current_el:
1718
B.EQ 2f
1819
CMP X0, 0x4
1920
B.EQ 1f
20-
21+
2122
LDR X0, =0
2223
B 0f
2324
3:
@@ -80,7 +81,7 @@ rt_hw_set_elx_env:
8081
0:
8182
RET
8283

83-
.global rt_cpu_vector_set_base
84+
.globl rt_cpu_vector_set_base
8485
rt_cpu_vector_set_base:
8586
MSR VBAR_EL1,X0
8687
RET
@@ -89,15 +90,87 @@ rt_cpu_vector_set_base:
8990
/**
9091
* unsigned long rt_hw_ffz(unsigned long x)
9192
*/
92-
.global rt_hw_ffz
93+
.globl rt_hw_ffz
9394
rt_hw_ffz:
94-
mvn x1, x0
95-
clz x0, x1
96-
mov x1, #0x3f
97-
sub x0, x1, x0
95+
mvn x1, x0
96+
clz x0, x1
97+
mov x1, #0x3f
98+
sub x0, x1, x0
9899
ret
99100

100-
.global rt_hw_clz
101+
.globl rt_hw_clz
101102
rt_hw_clz:
102-
clz x0, x0
103+
clz x0, x0
103104
ret
105+
106+
/**
107+
* Spinlock (fallback implementation)
108+
*/
109+
110+
rt_hw_spin_lock_init:
111+
.weak rt_hw_spin_lock_init
112+
stlr wzr, [x0]
113+
ret
114+
115+
rt_hw_spin_trylock:
116+
.weak rt_hw_spin_trylock
117+
sub sp, sp, #16
118+
ldar w2, [x0]
119+
add x1, sp, 8
120+
stlr w2, [x1]
121+
ldarh w1, [x1]
122+
and w1, w1, 65535
123+
add x3, sp, 10
124+
ldarh w3, [x3]
125+
cmp w1, w3, uxth
126+
beq 1f
127+
mov w0, 0
128+
add sp, sp, 16
129+
ret
130+
1:
131+
add x1, sp, 10
132+
2:
133+
ldaxrh w3, [x1]
134+
add w3, w3, 1
135+
stlxrh w4, w3, [x1]
136+
cbnz w4, 2b
137+
add x1, sp, 8
138+
ldar w1, [x1]
139+
3:
140+
ldaxr w3, [x0]
141+
cmp w3, w2
142+
bne 4f
143+
stxr w4, w1, [x0]
144+
cbnz w4, 3b
145+
4:
146+
cset w0, eq
147+
add sp, sp, 16
148+
ret
149+
150+
rt_hw_spin_lock:
151+
.weak rt_hw_spin_lock
152+
add x1, x0, 2
153+
1:
154+
ldxrh w2, [x1]
155+
add w3, w2, 1
156+
stxrh w4, w3, [x1]
157+
cbnz w4, 1b
158+
and w2, w2, 65535
159+
ldarh w1, [x0]
160+
cmp w2, w1, uxth
161+
beq 3f
162+
2:
163+
isb
164+
ldarh w1, [x0]
165+
cmp w2, w1, uxth
166+
bne 2b
167+
3:
168+
ret
169+
170+
rt_hw_spin_unlock:
171+
.weak rt_hw_spin_unlock
172+
ldxrh w1, [x0]
173+
add w1, w1, 1
174+
stlxrh w2, w1, [x0]
175+
cbnz w2, rt_hw_spin_unlock
176+
ret

libcpu/aarch64/common/cpuport.h

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,42 @@
1717
#include <rtdef.h>
1818

1919
#ifdef RT_USING_SMP
20-
typedef struct {
21-
volatile unsigned int lock;
20+
21+
/**
22+
* Spinlock
23+
*/
24+
25+
typedef struct
26+
{
27+
rt_uint32_t value;
2228
} rt_hw_spinlock_t;
23-
#endif
29+
30+
/**
31+
* Generic hw-cpu-id
32+
*/
33+
#ifdef ARCH_USING_GENERIC_CPUID
34+
35+
#if RT_CPUS_NR > 0
36+
37+
rt_inline int rt_hw_cpu_id(void)
38+
{
39+
long cpuid;
40+
__asm__ volatile("mrs %0, tpidr_el1":"=r"(cpuid));
41+
return cpuid;
42+
}
43+
44+
#else
45+
46+
rt_inline int rt_hw_cpu_id(void)
47+
{
48+
return 0;
49+
}
50+
51+
#endif /* RT_CPUS_NR > 1 */
52+
53+
#endif /* ARCH_USING_GENERIC_CPUID */
54+
55+
#endif /* RT_USING_SMP */
2456

2557
#define rt_hw_barrier(cmd, ...) \
2658
__asm__ volatile (RT_STRINGIFY(cmd) " "RT_STRINGIFY(__VA_ARGS__):::"memory")

src/cpu_mp.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ rt_base_t rt_cpus_lock(void)
146146
pcpu = rt_cpu_self();
147147
if (pcpu->current_thread != RT_NULL)
148148
{
149-
register rt_ubase_t lock_nest = rt_atomic_load(&(pcpu->current_thread->cpus_lock_nest));
149+
rt_ubase_t lock_nest = rt_atomic_load(&(pcpu->current_thread->cpus_lock_nest));
150150

151151
rt_atomic_add(&(pcpu->current_thread->cpus_lock_nest), 1);
152152
if (lock_nest == 0)

src/scheduler_mp.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,6 +1089,7 @@ void rt_exit_critical_safe(rt_base_t critical_level)
10891089

10901090
void rt_exit_critical_safe(rt_base_t critical_level)
10911091
{
1092+
RT_UNUSED(critical_level);
10921093
return rt_exit_critical();
10931094
}
10941095

src/thread.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,8 @@ rt_thread_t rt_thread_self(void)
374374
self = rt_cpu_self()->current_thread;
375375
rt_hw_local_irq_enable(lock);
376376
return self;
377-
#else
377+
378+
#else /* !RT_USING_SMP */
378379
extern rt_thread_t rt_current_thread;
379380

380381
return rt_current_thread;

tools/ci/cpp_check.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ def check(self):
2727
[
2828
'cppcheck',
2929
'-DRT_ASSERT(x)=',
30+
'-DRTM_EXPORT(x)=',
3031
'-Drt_list_for_each_entry(a,b,c)=a=(void*)b;',
3132
'-I include',
3233
'-I thread/components/finsh',

0 commit comments

Comments
 (0)