-
-
Notifications
You must be signed in to change notification settings - Fork 107
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
There is a difference in behaviour between GCC 10.3.0 and GCC 13.2.1. Newer compiler optimises or generates code for unaligned memory accesses. This is concluded by using the -Os compiler flag, and the code is working fine.
- Loading branch information
Showing
6 changed files
with
94 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,11 @@ | ||
/** | ||
* @file net_memcpy.h | ||
* @brief Provides optimized memory manipulation functions for embedded systems. | ||
* | ||
* This header defines utility functions for memory operations such as memset and memcpy. | ||
* Optimized for performance and alignment considerations on ARM architectures. | ||
*/ | ||
/* Copyright (C) 2021-2024 by Arjan van Vught mailto:[email protected] | ||
/* Copyright (C) 2021-2025 by Arjan van Vught mailto:[email protected] | ||
* | ||
* Permission is hereby granted, free of charge, to any person obtaining a copy | ||
* of this software and associated documentation files (the "Software"), to deal | ||
|
@@ -25,15 +28,67 @@ | |
#define NET_MEMCPY_H_ | ||
|
||
#include <cstdint> | ||
#include <cstring> | ||
#include <cstddef> | ||
|
||
#include "net/protocol/ip4.h" | ||
|
||
namespace net { | ||
/** | ||
* @brief Optimized memset function for setting memory with a fixed value. | ||
* | ||
* @tparam V The constant value to set each byte to. | ||
* @tparam L The constant length of the memory to set. Must be greater than 0. | ||
* @param dest Pointer to the memory block to fill. | ||
* | ||
* This implementation optimizes for compile-time knowledge of the memory size. | ||
* For sizes greater than `sizeof(uint32_t)`, it delegates to `std::memset` for | ||
* potentially more optimized bulk operations. For smaller sizes, including the | ||
* special case of `L == sizeof(uint32_t)`, it uses a loop to ensure alignment | ||
* safety and avoid unaligned access. | ||
* | ||
* @note The implementation considers potential unaligned access issues when | ||
* `L == sizeof(uint32_t)` and ensures safe byte-by-byte copying in this case. | ||
* | ||
* @warning The caller must ensure that `dest` points to a valid memory block | ||
* of at least `L` bytes to avoid undefined behavior. | ||
*/ | ||
template <uint8_t V, size_t L> | ||
inline void memset(void *dest) { | ||
static_assert(L > 0, "Length must be greater than 0"); | ||
|
||
if constexpr (L > sizeof(uint32_t)) { | ||
// Use std::memset for larger memory blocks | ||
std::memset(dest, V, L); | ||
} else { | ||
// Handle potential unaligned access for L <= sizeof(uint32_t) | ||
#ifdef __ARM_ARCH_7A__ | ||
volatile auto *pDst = reinterpret_cast<uint8_t *>(dest); | ||
#else | ||
auto *pDst = reinterpret_cast<uint8_t *>(dest); | ||
#endif | ||
// For smaller sizes L <= sizeof(uint32_t), write bytes one at a time | ||
for (size_t i = 0; i < L; i++) { | ||
*pDst++ = V; | ||
} | ||
} | ||
} | ||
/** | ||
* @brief Optimized memcpy function for copying memory. | ||
* | ||
* @param dest Pointer to the destination memory block. | ||
* @param src Pointer to the source memory block. | ||
* @param n Number of bytes to copy. | ||
* @return A pointer to the destination memory block. | ||
* | ||
* This implementation attempts to perform word-aligned copying when both | ||
* source and destination are aligned. | ||
*/ | ||
inline void *memcpy(void *__restrict__ dest, void const *__restrict__ src, size_t n) { | ||
auto *plDst = reinterpret_cast<uintptr_t*>(dest); | ||
auto const *plSrc = reinterpret_cast<uintptr_t const*>(src); | ||
auto *plDst = reinterpret_cast<uintptr_t *>(dest); | ||
auto const *plSrc = reinterpret_cast<uintptr_t const *>(src); | ||
|
||
// Perform word-aligned copying if both pointers are aligned | ||
if (((reinterpret_cast<uintptr_t>(src) & (sizeof(uintptr_t) - 1)) == 0) | ||
&& ((reinterpret_cast<uintptr_t>(dest) & (sizeof(uintptr_t) - 1)) == 0)) { | ||
while (n >= sizeof(uintptr_t)) { | ||
|
@@ -42,8 +97,9 @@ inline void *memcpy(void *__restrict__ dest, void const *__restrict__ src, size_ | |
} | ||
} | ||
|
||
auto *pcDst = reinterpret_cast<uint8_t*>(plDst); | ||
auto const *pcSrc = reinterpret_cast<uint8_t const*>(plSrc); | ||
// Copy remaining bytes using byte pointers | ||
auto *pcDst = reinterpret_cast<uint8_t *>(plDst); | ||
auto const *pcSrc = reinterpret_cast<uint8_t const *>(plSrc); | ||
|
||
while (n--) { | ||
*pcDst++ = *pcSrc++; | ||
|
@@ -52,6 +108,12 @@ inline void *memcpy(void *__restrict__ dest, void const *__restrict__ src, size_ | |
return dest; | ||
} | ||
|
||
/** | ||
* @brief Copies an IPv4 address from a 32-bit source to a byte array. | ||
* | ||
* @param pIpAddress Pointer to the destination byte array. | ||
* @param nIpAddress 32-bit IPv4 address to copy. | ||
*/ | ||
inline void memcpy_ip(uint8_t *pIpAddress, const uint32_t nIpAddress) { | ||
#ifdef __ARM_ARCH_7A__ | ||
// Ensure destination pointer is aligned | ||
|
@@ -75,21 +137,27 @@ inline void memcpy_ip(uint8_t *pIpAddress, const uint32_t nIpAddress) { | |
auto *pSrc = src.u8; | ||
auto *pDst = pIpAddress; | ||
#endif | ||
for (uint32_t i = 0; i < IPv4_ADDR_LEN; i++) { | ||
pDst[i] = pSrc[i]; | ||
} | ||
for (size_t i = 0; i < IPv4_ADDR_LEN; i++) { | ||
*pDst++ = *pSrc++; | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* @brief Copies an IPv4 address from a byte array to a 32-bit integer. | ||
* | ||
* @param pIpAddress Pointer to the source byte array. | ||
* @return The 32-bit IPv4 address. | ||
*/ | ||
inline uint32_t memcpy_ip(const uint8_t *pIpAddress) { | ||
#ifdef __ARM_ARCH_7A__ | ||
// Ensure destination pointer is aligned | ||
// Ensure source pointer is aligned | ||
if ((reinterpret_cast<uint32_t>(pIpAddress) & ((sizeof(uint32_t) - 1))) == 0) { | ||
// Destination pointer is already aligned, perform fast copy | ||
// Source pointer is already aligned, perform fast copy | ||
return *reinterpret_cast<const uint32_t *>(pIpAddress); | ||
} else | ||
#endif | ||
{ // Destination pointer is not aligned, copy byte by byte | ||
{ // Source pointer is not aligned, copy byte by byte | ||
typedef union pcast32 { | ||
uint32_t u32; | ||
uint8_t u8[4]; | ||
|
@@ -103,9 +171,9 @@ inline uint32_t memcpy_ip(const uint8_t *pIpAddress) { | |
auto *pSrc = pIpAddress; | ||
auto *pDst = src.u8; | ||
#endif | ||
for (uint32_t i = 0; i < IPv4_ADDR_LEN; i++) { | ||
pDst[i] = pSrc[i]; | ||
} | ||
for (size_t i = 0; i < IPv4_ADDR_LEN; i++) { | ||
*pDst++ = *pSrc++; | ||
} | ||
|
||
return src.u32; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters