RT-Thread
diff --git a/‎libcpu/Kconfig
Lines changed: 3 additions & 0 deletions b/‎libcpu/Kconfig
Lines changed: 3 additions & 0 deletions
diff --git a/‎libcpu/aarch64/common/context_gcc.S
Lines changed: 4 additions & 0 deletions b/‎libcpu/aarch64/common/context_gcc.S
Lines changed: 4 additions & 0 deletions
diff --git a/‎libcpu/aarch64/common/cpu.c
Lines changed: 57 additions & 43 deletions b/‎libcpu/aarch64/common/cpu.c
Lines changed: 57 additions & 43 deletions
diff --git a/‎libcpu/aarch64/common/cpu_gcc.S
Lines changed: 134 additions & 61 deletions b/‎libcpu/aarch64/common/cpu_gcc.S
Lines changed: 134 additions & 61 deletions
@@ -12,6 +12,9 @@ if ARCH_ARMV8 && ARCH_CPU_64BIT
     config ARCH_HAVE_EFFICIENT_UNALIGNED_ACCESS
         bool
         default y
+    config ARCH_USING_GENERIC_CPUID
+        bool "Using generic cpuid implemenation"
+        default n
     endmenu
 endif
 
 
@@ -44,7 +44,11 @@ int rt_hw_cpu_id(void)
 .weak rt_hw_cpu_id
 .type rt_hw_cpu_id, @function
 rt_hw_cpu_id:
+#if RT_CPUS_NR > 1
     mrs x0, tpidr_el1
+#else
+    mov x0, xzr
+#endif
     ret
 
 /*
 
@@ -8,6 +8,7 @@
  * 2011-09-15     Bernard      first version
  * 2019-07-28     zdzn         add smp support
  * 2023-02-21     GuEe-GUI     mov cpu ofw init to setup
+ * 2024-04-29     Shell        Add generic ticket spinlock using C11 atomic
  */
 
 #include <rthw.h>
@@ -55,65 +56,78 @@ rt_weak rt_uint64_t rt_cpu_mpidr_early[] =
 };
 #endif /* RT_USING_SMART */
 
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-    unsigned int tmp;
-
-    asm volatile(
-    "   sevl\n"
-    "1: wfe\n"
-    "2: ldaxr   %w0, %1\n"
-    "   cbnz    %w0, 1b\n"
-    "   stxr    %w0, %w2, %1\n"
-    "   cbnz    %w0, 2b\n"
-    : "=&r" (tmp), "+Q" (lock->lock)
-    : "r" (1)
-    : "cc", "memory");
-}
+/* in support of C11 atomic */
+#if __STDC_VERSION__ >= 201112L
+#include <stdatomic.h>
 
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
+union _spinlock
 {
-    unsigned int tmp;
-
-    asm volatile(
-    "  ldaxr   %w0, %1\n"
-    "  cbnz    %w0, 1f\n"
-    "  stxr    %w0, %w2, %1\n"
-    "1:\n"
-    : "=&r" (tmp), "+Q" (lock->lock)
-    : "r" (1)
-    : "cc", "memory");
-
-    return !tmp;
-}
+    _Atomic(rt_uint32_t) _value;
+    struct
+    {
+        _Atomic(rt_uint16_t) owner;
+        _Atomic(rt_uint16_t) next;
+    } ticket;
+};
 
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
+void rt_hw_spin_lock_init(rt_hw_spinlock_t *_lock)
 {
-    asm volatile(
-    " stlr    %w1, %0\n"
-    : "=Q" (lock->lock) : "r" (0) : "memory");
-}
+    union _spinlock *lock = (void *)_lock;
 
-void rt_hw_spin_lock_init(arch_spinlock_t *lock)
-{
-    lock->lock = 0;
+    /**
+     * just a dummy note that this is an atomic operation, though it alway is
+     * even without usage of atomic API in arm64
+     */
+    atomic_store_explicit(&lock->_value, 0, memory_order_relaxed);
 }
 
-void rt_hw_spin_lock(rt_hw_spinlock_t *lock)
+rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *_lock)
 {
-    arch_spin_lock(lock);
+    rt_bool_t rc;
+    rt_uint32_t readonce;
+    union _spinlock temp;
+    union _spinlock *lock = (void *)_lock;
+
+    readonce = atomic_load_explicit(&lock->_value, memory_order_acquire);
+    temp._value = readonce;
+
+    if (temp.ticket.owner != temp.ticket.next)
+    {
+        rc = RT_FALSE;
+    }
+    else
+    {
+        temp.ticket.next += 1;
+        rc = atomic_compare_exchange_strong_explicit(
+            &lock->_value, &readonce, temp._value,
+            memory_order_acquire, memory_order_relaxed);
+    }
+    return rc;
 }
 
-void rt_hw_spin_unlock(rt_hw_spinlock_t *lock)
+void rt_hw_spin_lock(rt_hw_spinlock_t *_lock)
 {
-    arch_spin_unlock(lock);
+    union _spinlock *lock = (void *)_lock;
+    rt_uint64_t owner;
+    rt_uint16_t ticket =
+        atomic_fetch_add_explicit(&lock->ticket.next, 1, memory_order_relaxed);
+
+    owner = atomic_load_explicit(&lock->ticket.owner, memory_order_acquire);
+    while (owner != ticket)
+    {
+        __asm__ volatile("isb");
+        owner = atomic_load_explicit(&lock->ticket.owner, memory_order_acquire);
+    }
 }
 
-rt_bool_t rt_hw_spin_trylock(rt_hw_spinlock_t *lock)
+void rt_hw_spin_unlock(rt_hw_spinlock_t *_lock)
 {
-    return arch_spin_trylock(lock);
+    union _spinlock *lock = (void *)_lock;
+    atomic_fetch_add_explicit(&lock->ticket.owner, 1, memory_order_release);
 }
 
+#endif
+
 static int _cpus_init_data_hardcoded(int num_cpus, rt_uint64_t *cpu_hw_ids, struct cpu_ops_t *cpu_ops[])
 {
     // load in cpu_hw_ids in cpuid_to_hwid,
 
@@ -1,103 +1,176 @@
 /*
- * Copyright (c) 2006-2020, RT-Thread Development Team
+ * Copyright (c) 2006-2024, RT-Thread Development Team
  *
  * SPDX-License-Identifier: Apache-2.0
  *
  * Date           Author       Notes
  * 2018-10-06     ZhaoXiaowei    the first version
+ * 2024-04-28     Shell        add generic spinlock implementation
  */
- 
+
 .text
 .globl rt_hw_get_current_el
 rt_hw_get_current_el:
-	MRS		X0, CurrentEL
-	CMP		X0, 0xc
-	B.EQ	3f
-	CMP		X0, 0x8
-	B.EQ	2f
-	CMP		X0, 0x4
-	B.EQ	1f
-	
-	LDR		X0, =0
-	B		0f
+    MRS        X0, CurrentEL
+    CMP        X0, 0xc
+    B.EQ    3f
+    CMP        X0, 0x8
+    B.EQ    2f
+    CMP        X0, 0x4
+    B.EQ    1f
+
+    LDR        X0, =0
+    B        0f
 3:
-	LDR		X0, =3
-	B		0f
+    LDR        X0, =3
+    B        0f
 2:
-	LDR		X0, =2
-	B		0f
+    LDR        X0, =2
+    B        0f
 1:
-	LDR		X0, =1
-	B		0f
+    LDR        X0, =1
+    B        0f
 0:
-	RET
+    RET
 
 
 .globl rt_hw_set_current_vbar
 rt_hw_set_current_vbar:
-	MRS		X1, CurrentEL
-	CMP		X1, 0xc
-	B.EQ	3f
-	CMP		X1, 0x8
-	B.EQ	2f
-	CMP		X1, 0x4
-	B.EQ	1f
-	B		0f
+    MRS        X1, CurrentEL
+    CMP        X1, 0xc
+    B.EQ    3f
+    CMP        X1, 0x8
+    B.EQ    2f
+    CMP        X1, 0x4
+    B.EQ    1f
+    B        0f
 3:
-	MSR		VBAR_EL3,X0
-	B		0f
+    MSR        VBAR_EL3,X0
+    B        0f
 2:
-	MSR		VBAR_EL2,X0
-	B		0f
+    MSR        VBAR_EL2,X0
+    B        0f
 1:
-	MSR		VBAR_EL1,X0
-	B		0f
+    MSR        VBAR_EL1,X0
+    B        0f
 0:
-	RET
+    RET
 
 .globl rt_hw_set_elx_env
 rt_hw_set_elx_env:
-	MRS		X1, CurrentEL
-	CMP		X1, 0xc
-	B.EQ	3f
-	CMP		X1, 0x8
-	B.EQ	2f
-	CMP		X1, 0x4
-	B.EQ	1f
-	B		0f
+    MRS        X1, CurrentEL
+    CMP        X1, 0xc
+    B.EQ    3f
+    CMP        X1, 0x8
+    B.EQ    2f
+    CMP        X1, 0x4
+    B.EQ    1f
+    B        0f
 3:
-	MRS		X0, SCR_EL3
-	ORR		X0, X0, #0xF			/* SCR_EL3.NS|IRQ|FIQ|EA */
-	MSR		SCR_EL3, X0
-	B		0f
+    MRS        X0, SCR_EL3
+    ORR        X0, X0, #0xF            /* SCR_EL3.NS|IRQ|FIQ|EA */
+    MSR        SCR_EL3, X0
+    B        0f
 2:
-	MRS	X0, HCR_EL2
-	ORR	X0, X0, #0x38
-	MSR	HCR_EL2, X0
-	B		0f
+    MRS    X0, HCR_EL2
+    ORR    X0, X0, #0x38
+    MSR    HCR_EL2, X0
+    B        0f
 1:
-	B		0f
+    B        0f
 0:
-	RET
+    RET
 
-.global rt_cpu_vector_set_base
+.globl rt_cpu_vector_set_base
 rt_cpu_vector_set_base:
-	MSR		VBAR_EL1,X0
+    MSR        VBAR_EL1,X0
     RET
 
 
 /**
  * unsigned long rt_hw_ffz(unsigned long x)
  */
-.global rt_hw_ffz
+.globl rt_hw_ffz
 rt_hw_ffz:
-    mvn x1, x0
-    clz x0, x1
-    mov x1, #0x3f
-    sub x0, x1, x0
+    mvn     x1, x0
+    clz     x0, x1
+    mov     x1, #0x3f
+    sub     x0, x1, x0
     ret
 
-.global rt_hw_clz
+.globl rt_hw_clz
 rt_hw_clz:
-    clz x0, x0
+    clz     x0, x0
+    ret
+
+/**
+ * Spinlock (fallback implementation)
+ */
+
+rt_hw_spin_lock_init:
+    .weak   rt_hw_spin_lock_init
+    stlr    wzr, [x0]
+    ret
+
+rt_hw_spin_trylock:
+    .weak   rt_hw_spin_trylock
+    sub     sp, sp, #16
+    ldar    w2, [x0]
+    add     x1, sp, 8
+    stlr    w2, [x1]
+    ldarh   w1, [x1]
+    and     w1, w1, 65535
+    add     x3, sp, 10
+    ldarh   w3, [x3]
+    cmp     w1, w3, uxth
+    beq     1f
+    mov     w0, 0
+    add     sp, sp, 16
+    ret
+1:
+    add     x1, sp, 10
+2:
+    ldaxrh  w3, [x1]
+    add     w3, w3, 1
+    stlxrh  w4, w3, [x1]
+    cbnz    w4, 2b
+    add     x1, sp, 8
+    ldar    w1, [x1]
+3:
+    ldaxr   w3, [x0]
+    cmp     w3, w2
+    bne     4f
+    stxr    w4, w1, [x0]
+    cbnz    w4, 3b
+4:
+    cset    w0, eq
+    add     sp, sp, 16
+    ret
+
+rt_hw_spin_lock:
+    .weak   rt_hw_spin_lock
+    add     x1, x0, 2
+1:
+    ldxrh   w2, [x1]
+    add     w3, w2, 1
+    stxrh   w4, w3, [x1]
+    cbnz    w4, 1b
+    and     w2, w2, 65535
+    ldarh   w1, [x0]
+    cmp     w2, w1, uxth
+    beq     3f
+2:
+    isb
+    ldarh   w1, [x0]
+    cmp     w2, w1, uxth
+    bne     2b
+3:
+    ret
+
+rt_hw_spin_unlock:
+    .weak   rt_hw_spin_unlock
+    ldxrh   w1, [x0]
+    add     w1, w1, 1
+    stlxrh  w2, w1, [x0]
+    cbnz    w2, rt_hw_spin_unlock
     ret