Skip to content

Commit

Permalink
Fixed issue with GCC 13.2.1.
Browse files Browse the repository at this point in the history
There is a difference in behaviour between GCC 10.3.0 and GCC 13.2.1. Newer compiler optimises or generates code for unaligned memory accesses. This is concluded by using the -Os compiler flag, and the code is working fine.
  • Loading branch information
vanvught committed Jan 23, 2025
1 parent 63d4564 commit 32d54fe
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 27 deletions.
6 changes: 3 additions & 3 deletions lib-network/.settings/language.settings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<project>
<configuration id="cdt.managedbuild.toolchain.gnu.cross.base.563973127" name="rpi">
<extension point="org.eclipse.cdt.core.LanguageSettingsProvider">
<provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuiltinSpecsDetector" console="false" env-hash="1361842646839228070" id="org.eclipse.embedcdt.managedbuild.cross.arm.core.GCCBuiltinSpecsDetector" keep-relative-paths="false" name="CDT Arm Cross GCC Built-in Compiler Settings" parameter="${COMMAND} ${FLAGS} ${cross_toolchain_flags} -E -P -v -dD &quot;${INPUTS}&quot;" prefer-non-shared="true">
<provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuiltinSpecsDetector" console="false" env-hash="-1621778804544586894" id="org.eclipse.embedcdt.managedbuild.cross.arm.core.GCCBuiltinSpecsDetector" keep-relative-paths="false" name="CDT Arm Cross GCC Built-in Compiler Settings" parameter="${COMMAND} ${FLAGS} ${cross_toolchain_flags} -E -P -v -dD &quot;${INPUTS}&quot;" prefer-non-shared="true">
<language-scope id="org.eclipse.cdt.core.gcc"/>
<language-scope id="org.eclipse.cdt.core.g++"/>
</provider>
Expand All @@ -17,7 +17,7 @@
<provider copy-of="extension" id="org.eclipse.cdt.ui.UserLanguageSettingsProvider"/>
<provider-reference id="org.eclipse.cdt.core.ReferencedProjectsLanguageSettingsProvider" ref="shared-provider"/>
<provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuildCommandParser" id="org.eclipse.cdt.managedbuilder.core.GCCBuildCommandParser" keep-relative-paths="false" name="CDT GCC Build Output Parser" parameter="([^/\\\\]*)((g?cc)|([gc]\+\+)|(clang))" prefer-non-shared="true"/>
<provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuiltinSpecsDetector" console="false" env-hash="1369006412721853526" id="org.eclipse.cdt.managedbuilder.core.GCCBuiltinSpecsDetector" keep-relative-paths="false" name="CDT GCC Built-in Compiler Settings" parameter="${COMMAND} ${FLAGS} -E -P -v -dD &quot;${INPUTS}&quot;" prefer-non-shared="true">
<provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuiltinSpecsDetector" console="false" env-hash="-1614615038661961438" id="org.eclipse.cdt.managedbuilder.core.GCCBuiltinSpecsDetector" keep-relative-paths="false" name="CDT GCC Built-in Compiler Settings" parameter="${COMMAND} ${FLAGS} -E -P -v -dD &quot;${INPUTS}&quot;" prefer-non-shared="true">
<language-scope id="org.eclipse.cdt.core.gcc"/>
<language-scope id="org.eclipse.cdt.core.g++"/>
</provider>
Expand All @@ -26,7 +26,7 @@
</configuration>
<configuration id="ilg.gnuarmeclipse.managedbuild.cross.toolchain.base.309283989.303033930" name="H3">
<extension point="org.eclipse.cdt.core.LanguageSettingsProvider">
<provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuiltinSpecsDetector" console="true" env-hash="1915196592284708194" id="org.eclipse.embedcdt.managedbuild.cross.arm.core.GCCBuiltinSpecsDetector" keep-relative-paths="false" name="CDT Arm Cross GCC Built-in Compiler Settings" parameter="${COMMAND} ${FLAGS} ${cross_toolchain_flags} -E -P -v -dD &quot;${INPUTS}&quot;" prefer-non-shared="true">
<provider class="org.eclipse.cdt.managedbuilder.language.settings.providers.GCCBuiltinSpecsDetector" console="true" env-hash="1352335747619580462" id="org.eclipse.embedcdt.managedbuild.cross.arm.core.GCCBuiltinSpecsDetector" keep-relative-paths="false" name="CDT Arm Cross GCC Built-in Compiler Settings" parameter="${COMMAND} ${FLAGS} ${cross_toolchain_flags} -E -P -v -dD &quot;${INPUTS}&quot;" prefer-non-shared="true">
<language-scope id="org.eclipse.cdt.core.gcc"/>
<language-scope id="org.eclipse.cdt.core.g++"/>
</provider>
Expand Down
2 changes: 1 addition & 1 deletion lib-network/src/net/arp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -263,7 +263,7 @@ static void arp_cache_clean_record(net::arp::Record& record) {
if (record.packet.p != nullptr) {
delete[] record.packet.p;
}
memset(&record, 0, sizeof(struct net::arp::Record));
std::memset(&record, 0, sizeof(struct net::arp::Record));
}

static void arp_send_request_unicast(const uint32_t nIp, const uint8_t *pMacAddress) {
Expand Down
8 changes: 4 additions & 4 deletions lib-network/src/net/dhcp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ namespace net {
static dhcp::Message s_dhcp_message SECTION_NETWORK ALIGNED;

static void message_init() {
memset(&s_dhcp_message, 0, sizeof(dhcp::Message));
std::memset(&s_dhcp_message, 0, sizeof(dhcp::Message));

s_dhcp_message.op = dhcp::OpCode::BOOTREQUEST;
s_dhcp_message.htype = dhcp::HardwareType::HTYPE_10MB; // This is the current default
Expand Down Expand Up @@ -682,7 +682,7 @@ bool dhcp_start() {
assert(nTimerId >= 0);
}

memset(dhcp, 0, sizeof(struct dhcp::Dhcp));
std::memset(dhcp, 0, sizeof(struct dhcp::Dhcp));
dhcp->nHandle = udp_begin(net::iana::IANA_PORT_DHCP_CLIENT, dhcp_input);

#ifndef NDEBUG
Expand Down Expand Up @@ -729,7 +729,7 @@ void dhcp_release_and_stop() {

/* clean old DHCP offer */
dhcp->server_ip_addr.addr = 0;
memset(&dhcp->offered, 0, sizeof(struct dhcp::Dhcp::Offered));
std::memset(&dhcp->offered, 0, sizeof(struct dhcp::Dhcp::Offered));
dhcp->t1_renew_time = dhcp->t2_rebind_time = dhcp->lease_used = dhcp->t0_timeout = 0;

if (dhcp_supplied_address()) {
Expand Down Expand Up @@ -790,7 +790,7 @@ void dhcp_process(const dhcp::Message *const pResponse, const uint32_t nSize) {
auto *dhcp = reinterpret_cast<struct dhcp::Dhcp *>(globals::netif_default.dhcp);
assert(dhcp != nullptr);

memset(&dhcp->offered, 0, sizeof(struct dhcp::Dhcp::Offered));
std::memset(&dhcp->offered, 0, sizeof(struct dhcp::Dhcp::Offered));

dhcp->server_ip_addr.addr = 0;

Expand Down
96 changes: 82 additions & 14 deletions lib-network/src/net/net_memcpy.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
/**
* @file net_memcpy.h
* @brief Provides optimized memory manipulation functions for embedded systems.
*
* This header defines utility functions for memory operations such as memset and memcpy.
* Optimized for performance and alignment considerations on ARM architectures.
*/
/* Copyright (C) 2021-2024 by Arjan van Vught mailto:[email protected]
/* Copyright (C) 2021-2025 by Arjan van Vught mailto:[email protected]
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -25,15 +28,67 @@
#define NET_MEMCPY_H_

#include <cstdint>
#include <cstring>
#include <cstddef>

#include "net/protocol/ip4.h"

namespace net {
/**
* @brief Optimized memset function for setting memory with a fixed value.
*
* @tparam V The constant value to set each byte to.
* @tparam L The constant length of the memory to set. Must be greater than 0.
* @param dest Pointer to the memory block to fill.
*
* This implementation optimizes for compile-time knowledge of the memory size.
* For sizes greater than `sizeof(uint32_t)`, it delegates to `std::memset` for
* potentially more optimized bulk operations. For smaller sizes, including the
* special case of `L == sizeof(uint32_t)`, it uses a loop to ensure alignment
* safety and avoid unaligned access.
*
* @note The implementation considers potential unaligned access issues when
* `L == sizeof(uint32_t)` and ensures safe byte-by-byte copying in this case.
*
* @warning The caller must ensure that `dest` points to a valid memory block
* of at least `L` bytes to avoid undefined behavior.
*/
template <uint8_t V, size_t L>
inline void memset(void *dest) {
static_assert(L > 0, "Length must be greater than 0");

if constexpr (L > sizeof(uint32_t)) {
// Use std::memset for larger memory blocks
std::memset(dest, V, L);
} else {
// Handle potential unaligned access for L <= sizeof(uint32_t)
#ifdef __ARM_ARCH_7A__
volatile auto *pDst = reinterpret_cast<uint8_t *>(dest);
#else
auto *pDst = reinterpret_cast<uint8_t *>(dest);
#endif
// For smaller sizes L <= sizeof(uint32_t), write bytes one at a time
for (size_t i = 0; i < L; i++) {
*pDst++ = V;
}
}
}
/**
* @brief Optimized memcpy function for copying memory.
*
* @param dest Pointer to the destination memory block.
* @param src Pointer to the source memory block.
* @param n Number of bytes to copy.
* @return A pointer to the destination memory block.
*
* This implementation attempts to perform word-aligned copying when both
* source and destination are aligned.
*/
inline void *memcpy(void *__restrict__ dest, void const *__restrict__ src, size_t n) {
auto *plDst = reinterpret_cast<uintptr_t*>(dest);
auto const *plSrc = reinterpret_cast<uintptr_t const*>(src);
auto *plDst = reinterpret_cast<uintptr_t *>(dest);
auto const *plSrc = reinterpret_cast<uintptr_t const *>(src);

// Perform word-aligned copying if both pointers are aligned
if (((reinterpret_cast<uintptr_t>(src) & (sizeof(uintptr_t) - 1)) == 0)
&& ((reinterpret_cast<uintptr_t>(dest) & (sizeof(uintptr_t) - 1)) == 0)) {
while (n >= sizeof(uintptr_t)) {
Expand All @@ -42,8 +97,9 @@ inline void *memcpy(void *__restrict__ dest, void const *__restrict__ src, size_
}
}

auto *pcDst = reinterpret_cast<uint8_t*>(plDst);
auto const *pcSrc = reinterpret_cast<uint8_t const*>(plSrc);
// Copy remaining bytes using byte pointers
auto *pcDst = reinterpret_cast<uint8_t *>(plDst);
auto const *pcSrc = reinterpret_cast<uint8_t const *>(plSrc);

while (n--) {
*pcDst++ = *pcSrc++;
Expand All @@ -52,6 +108,12 @@ inline void *memcpy(void *__restrict__ dest, void const *__restrict__ src, size_
return dest;
}

/**
* @brief Copies an IPv4 address from a 32-bit source to a byte array.
*
* @param pIpAddress Pointer to the destination byte array.
* @param nIpAddress 32-bit IPv4 address to copy.
*/
inline void memcpy_ip(uint8_t *pIpAddress, const uint32_t nIpAddress) {
#ifdef __ARM_ARCH_7A__
// Ensure destination pointer is aligned
Expand All @@ -75,21 +137,27 @@ inline void memcpy_ip(uint8_t *pIpAddress, const uint32_t nIpAddress) {
auto *pSrc = src.u8;
auto *pDst = pIpAddress;
#endif
for (uint32_t i = 0; i < IPv4_ADDR_LEN; i++) {
pDst[i] = pSrc[i];
}
for (size_t i = 0; i < IPv4_ADDR_LEN; i++) {
*pDst++ = *pSrc++;
}
}
}

/**
* @brief Copies an IPv4 address from a byte array to a 32-bit integer.
*
* @param pIpAddress Pointer to the source byte array.
* @return The 32-bit IPv4 address.
*/
inline uint32_t memcpy_ip(const uint8_t *pIpAddress) {
#ifdef __ARM_ARCH_7A__
// Ensure destination pointer is aligned
// Ensure source pointer is aligned
if ((reinterpret_cast<uint32_t>(pIpAddress) & ((sizeof(uint32_t) - 1))) == 0) {
// Destination pointer is already aligned, perform fast copy
// Source pointer is already aligned, perform fast copy
return *reinterpret_cast<const uint32_t *>(pIpAddress);
} else
#endif
{ // Destination pointer is not aligned, copy byte by byte
{ // Source pointer is not aligned, copy byte by byte
typedef union pcast32 {
uint32_t u32;
uint8_t u8[4];
Expand All @@ -103,9 +171,9 @@ inline uint32_t memcpy_ip(const uint8_t *pIpAddress) {
auto *pSrc = pIpAddress;
auto *pDst = src.u8;
#endif
for (uint32_t i = 0; i < IPv4_ADDR_LEN; i++) {
pDst[i] = pSrc[i];
}
for (size_t i = 0; i < IPv4_ADDR_LEN; i++) {
*pDst++ = *pSrc++;
}

return src.u32;
}
Expand Down
2 changes: 1 addition & 1 deletion lib-network/src/net/tcp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ static void tcp_bswap32_acknum_seqnum(struct t_tcp *p_tcp) {
}

static void tcp_init_tcb(struct tcb *pTcb, const uint16_t nLocalPort) {
memset(pTcb, 0, sizeof(struct tcb));
std::memset(pTcb, 0, sizeof(struct tcb));

pTcb->nLocalPort = nLocalPort;

Expand Down
7 changes: 3 additions & 4 deletions lib-network/src/net/udp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
#endif

#include <cstdint>
#include <cstring>
#include <algorithm>
#include <cassert>

Expand Down Expand Up @@ -158,10 +157,10 @@ static void udp_send_implementation(int nIndex, const uint8_t *pData, uint32_t n
net::memcpy(pOutBuffer->udp.data, pData, nSize);

if (nRemoteIp == net::IPADDR_BROADCAST) {
memset(pOutBuffer->ether.dst, 0xFF, ETH_ADDR_LEN);
memset(pOutBuffer->ip4.dst, 0xFF, IPv4_ADDR_LEN);
net::memset<0xFF, ETH_ADDR_LEN>(pOutBuffer->ether.dst);
net::memset<0xFF, IPv4_ADDR_LEN>(pOutBuffer->ip4.dst);
} else if ((nRemoteIp & net::globals::nBroadcastMask) == net::globals::nBroadcastMask) {
memset(pOutBuffer->ether.dst, 0xFF, ETH_ADDR_LEN);
net::memset<0xFF, ETH_ADDR_LEN>(pOutBuffer->ether.dst);
net::memcpy_ip(pOutBuffer->ip4.dst, nRemoteIp);
} else {
if ((nRemoteIp & 0xF0) == 0xE0) { // Multicast, we know the MAC Address
Expand Down

0 comments on commit 32d54fe

Please sign in to comment.