From 46f121d9d847bcafa8bbf56df28c7fd0c1f73615 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Fri, 15 Mar 2024 10:52:16 +1100 Subject: [PATCH 1/3] Enable Link Time Optimisation ref: https://gcc.gnu.org/wiki/LinkTimeOptimization Link Time Optimisation defers final compilation until the link stage, which allows more aggressive inlining and optimisations to occur, as more information is known at link time vs when each object is compiled. For the esp32dev platform, I have the following size reductions: Without LTO: 1313024 .pio/build/esp32dev/firmware.bin With LTO: 1214256 .pio/build/esp32dev/firmware.bin Signed-off-by: Alastair D'Silva --- pio-scripts/lto.py | 24 ++++++++++++++++++++++++ platformio.ini | 4 ++++ wled00/wled00.ino | 2 ++ 3 files changed, 30 insertions(+) create mode 100644 pio-scripts/lto.py diff --git a/pio-scripts/lto.py b/pio-scripts/lto.py new file mode 100644 index 0000000000..f6e7cab5dd --- /dev/null +++ b/pio-scripts/lto.py @@ -0,0 +1,24 @@ +Import('env') + +env.Replace(AR=env['AR'].replace('elf-ar', 'elf-gcc-ar')) +env.Replace(RANLIB=env['RANLIB'].replace('elf-ranlib', 'elf-gcc-ranlib')) + +# Something later clobbers AR & RANLIB, so until https://github.com/platformio/platform-espressif32/pull/1329 +# is available, wrap the replace function to protect them + +# Save a reference to the original env.Replace() +original_replace = env.Replace + +def create_replace_wrapper(env): + def replace_wrapper(**kw): + if 'AR' in kw: + kw.pop("AR") + if 'RANLIB' in kw: + kw.pop("RANLIB") + + original_replace(**kw) + + return replace_wrapper + +# Replace the env.Replace with the wrapper +env.Replace = create_replace_wrapper(env) diff --git a/platformio.ini b/platformio.ini index 6306595a29..3d1a85db3f 100644 --- a/platformio.ini +++ b/platformio.ini @@ -87,6 +87,7 @@ debug_flags = -D DEBUG=1 -D WLED_DEBUG # This reduces the OTA size with ~45KB, so it's especially useful on low memory boards (512k/1m). # ------------------------------------------------------------------------------ build_flags = + -flto -Wno-attributes -DMQTT_MAX_PACKET_SIZE=1024 -DSECURE_CLIENT=SECURE_CLIENT_BEARSSL @@ -103,6 +104,8 @@ build_flags = -D DECODE_LG=true -DWLED_USE_MY_CONFIG + + build_unflags = build_flags_esp8266 = ${common.build_flags} ${esp8266.build_flags} @@ -121,6 +124,7 @@ extra_scripts = post:pio-scripts/strip-floats.py pre:pio-scripts/user_config_copy.py pre:pio-scripts/build_ui.py + pre:pio-scripts/lto.py # ------------------------------------------------------------------------------ # COMMON SETTINGS: diff --git a/wled00/wled00.ino b/wled00/wled00.ino index 866543ab93..50e6561429 100644 --- a/wled00/wled00.ino +++ b/wled00/wled00.ino @@ -12,10 +12,12 @@ */ #include "wled.h" +void setup() __attribute__((used)); void setup() { WLED::instance().setup(); } +void loop() __attribute__((used)); void loop() { WLED::instance().loop(); } From b42def696284d1e5408d0c0f9381dfe82ba17068 Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Fri, 15 Mar 2024 11:27:38 +1100 Subject: [PATCH 2/3] Disable LTO for ESP8266 due to build errors These require investigation: /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: libc_replacements.cpp.o (symbol from plugin): in function `_open_r': (.text+0x0): multiple definition of `puts'; /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/lib/libc.a(lib_a-puts.o):/workdir/repo/newlib/newlib/libc/stdio/puts.c:129: first defined here /home/runner/.platformio/packages/framework-arduinoespressif8266/cores/esp8266/core_esp8266_wiring_digital.cpp:129:3: warning: type 'struct ArgStructure' violates the C++ One Definition Rule [-Wodr] 129 | } ArgStructure; | ^ /home/runner/.platformio/packages/framework-arduinoespressif8266/cores/esp8266/FunctionalInterrupt.h:26:8: note: a different type is defined in another translation unit 26 | struct ArgStructure { | ^ /home/runner/.platformio/packages/framework-arduinoespressif8266/cores/esp8266/core_esp8266_wiring_digital.cpp:128:8: note: the first difference of corresponding definitions is field 'functionInfo' 128 | void* functionInfo; | ^ /home/runner/.platformio/packages/framework-arduinoespressif8266/cores/esp8266/FunctionalInterrupt.h:28:16: note: a field of same name but different type is defined in another translation unit 28 | FunctionInfo* functionInfo = nullptr; | ^ /home/runner/.platformio/packages/framework-arduinoespressif8266/cores/esp8266/core_esp8266_wiring_digital.cpp:129:3: note: type 'void' should match type 'struct FunctionInfo' 129 | } ArgStructure; | ^ /home/runner/.platformio/packages/framework-arduinoespressif8266/cores/esp8266/FunctionalInterrupt.h:21:8: note: the incompatible type is defined here 21 | struct FunctionInfo { | ^ /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: .pio/build/nodemcuv2/firmware.elf section `.text1' will not fit in region `iram1_0_seg' /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: /tmp/firmware.elf.ojg0kn.ltrans0.ltrans.o:(.text+0x4): undefined reference to `stack_thunk_save' /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: /tmp/firmware.elf.ojg0kn.ltrans0.ltrans.o:(.text+0x8): undefined reference to `SigningVerifier_verify' /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: /tmp/firmware.elf.ojg0kn.ltrans0.ltrans.o: in function `thunk_SigningVerifier_verify': :(.text+0x1f): undefined reference to `SigningVerifier_verify' /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: :(.text+0x32): undefined reference to `stack_thunk_fatal_smashing' /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: :(.text+0x72): undefined reference to `stack_thunk_fatal_smashing' /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: /tmp/firmware.elf.ojg0kn.ltrans0.ltrans.o: in function `thunk_br_ssl_engine_recvapp_ack': :(.text+0xb2): undefined reference to `stack_thunk_fatal_smashing' /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: /tmp/firmware.elf.ojg0kn.ltrans0.ltrans.o: in function `thunk_br_ssl_engine_recvapp_buf': :(.text+0xf2): undefined reference to `stack_thunk_fatal_smashing' /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: /tmp/firmware.elf.ojg0kn.ltrans0.ltrans.o: in function `thunk_br_ssl_engine_recvrec_ack': :(.text+0x132): undefined reference to `stack_thunk_fatal_smashing' /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: /tmp/firmware.elf.ojg0kn.ltrans0.ltrans.o::(.text+0x172): more undefined references to `stack_thunk_fatal_smashing' follow /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: /tmp/firmware.elf.ojg0kn.ltrans0.ltrans.o: in function `__wrap_system_restart_local': :(.text.__wrap_system_restart_local+0x2): undefined reference to `postmortem_report' /home/runner/.platformio/packages/toolchain-xtensa/bin/../lib/gcc/xtensa-lx106-elf/10.3.0/../../../../xtensa-lx106-elf/bin/ld: /tmp/firmware.elf.ojg0kn.ltrans3.ltrans.o: in function `_ZN8Espalexa18handleAlexaApiCallEP21AsyncWebServerRequest': :(.text+0x3c68): undefined reference to `_ZN23NeoEsp8266I2sMethodCore15c_StateDataSizeE' collect2: error: ld returned 1 exit status *** [.pio/build/nodemcuv2/firmware.elf] Error 1 Signed-off-by: Alastair D'Silva --- platformio.ini | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/platformio.ini b/platformio.ini index 3d1a85db3f..16c510cda2 100644 --- a/platformio.ini +++ b/platformio.ini @@ -87,7 +87,6 @@ debug_flags = -D DEBUG=1 -D WLED_DEBUG # This reduces the OTA size with ~45KB, so it's especially useful on low memory boards (512k/1m). # ------------------------------------------------------------------------------ build_flags = - -flto -Wno-attributes -DMQTT_MAX_PACKET_SIZE=1024 -DSECURE_CLIENT=SECURE_CLIENT_BEARSSL @@ -104,13 +103,15 @@ build_flags = -D DECODE_LG=true -DWLED_USE_MY_CONFIG - +lto_flags = + -flto build_unflags = -build_flags_esp8266 = ${common.build_flags} ${esp8266.build_flags} -build_flags_esp32 = ${common.build_flags} ${esp32.build_flags} -build_flags_esp32_V4= ${common.build_flags} ${esp32_idf_V4.build_flags} +# No LTO for ESP8266 until it builds without errors +build_flags_esp8266 = ${common.build_flags} ${esp8266.build_flags} +build_flags_esp32 = ${common.build_flags} ${esp32.build_flags} ${common.lto_flags} +build_flags_esp32_V4= ${common.build_flags} ${esp32_idf_V4.build_flags} ${common.lto_flags} ldscript_1m128k = eagle.flash.1m128.ld ldscript_2m512k = eagle.flash.2m512.ld From 42c313423aefda652f637234ddd5cec21f5a134d Mon Sep 17 00:00:00 2001 From: Alastair D'Silva Date: Fri, 15 Mar 2024 20:49:09 +1100 Subject: [PATCH 3/3] Squashme: allow Arduino core to be LTO'd, add comments about debug Signed-off-by: Alastair D'Silva --- platformio.ini | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/platformio.ini b/platformio.ini index 16c510cda2..3a1eb4c9ea 100644 --- a/platformio.ini +++ b/platformio.ini @@ -59,6 +59,7 @@ platform_packages = platformio/framework-arduinoespressif8266 # esp8266 : see https://docs.platformio.org/en/latest/platforms/espressif8266.html#debug-level # esp32 : see https://docs.platformio.org/en/latest/platforms/espressif32.html#debug-level # ------------------------------------------------------------------------------ +# Enabling debug? You might want to disable LTO to avoid call stacks being squashed through inlining debug_flags = -D DEBUG=1 -D WLED_DEBUG -DDEBUG_ESP_WIFI -DDEBUG_ESP_HTTP_CLIENT -DDEBUG_ESP_HTTP_UPDATE -DDEBUG_ESP_HTTP_SERVER -DDEBUG_ESP_UPDATER -DDEBUG_ESP_OTA -DDEBUG_TLS_MEM ;; for esp8266 # if needed (for memleaks etc) also add; -DDEBUG_ESP_OOM -include "umm_malloc/umm_malloc_cfg.h" @@ -107,9 +108,10 @@ lto_flags = -flto build_unflags = + -fno-lto -# No LTO for ESP8266 until it builds without errors -build_flags_esp8266 = ${common.build_flags} ${esp8266.build_flags} +# No LTO for ESP8266 as we overflow IRAM +build_flags_esp8266 = ${common.build_flags} ${esp8266.build_flags} build_flags_esp32 = ${common.build_flags} ${esp32.build_flags} ${common.lto_flags} build_flags_esp32_V4= ${common.build_flags} ${esp32_idf_V4.build_flags} ${common.lto_flags} @@ -203,6 +205,7 @@ build_flags = ; decrease code cache size and increase IRAM to fit all pixel functions -D PIO_FRAMEWORK_ARDUINO_MMU_CACHE16_IRAM48 ;; in case of linker errors like "section `.text1' will not fit in region `iram1_0_seg'" ; -D PIO_FRAMEWORK_ARDUINO_MMU_CACHE16_IRAM48_SECHEAP_SHARED ;; (experimental) adds some extra heap, but may cause slowdown + lib_deps = #https://github.com/lorol/LITTLEFS.git