diff --git a/cpu/arm7_common/arm_cpu.c b/cpu/arm7_common/arm_cpu.c index d358f3c0ae..1634349956 100644 --- a/cpu/arm7_common/arm_cpu.c +++ b/cpu/arm7_common/arm_cpu.c @@ -16,6 +16,7 @@ * @} */ +#include #include #include "arm_cpu.h" #include "irq.h" @@ -42,37 +43,54 @@ __attribute__((used, section(".svc_stack"), aligned(4))) uint8_t svc_stack[ISR_S *--------------------------------------------------------------------------*/ char *thread_stack_init(thread_task_func_t task_func, void *arg, void *stack_start, int stack_size) { - unsigned int *stk; - int i; - stk = (unsigned int *)((uintptr_t)stack_start + stack_size); - stk--; + uint32_t *stk; + uintptr_t sp; + stk = (uint32_t *)((uintptr_t)stack_start + stack_size); + /* adjust to 32 bit boundary by clearing the last two bits in the address */ + stk = (uint32_t *)(((uintptr_t)stk) & ~((uintptr_t)0x3)); + + /* stack start marker */ + stk--; *stk = STACK_MARKER; + /* make sure the stack is double word aligned (8 bytes) */ + /* This is required in order to conform with Procedure Call Standard for the + * ARM® Architecture (AAPCS) */ + /* https://web.archive.org/web/20150316114702/http://infocenter.arm.com/help/topic/com.arm.doc.ihi0042e/IHI0042E_aapcs.pdf + */ + if (((uint32_t) stk & 0x7) != 0) { + /* add a single word padding */ + --stk; + *stk = ~((uint32_t)STACK_MARKER); + } + + sp = (uintptr_t)stk; + /* set the return address (LR) */ stk--; - *stk = (unsigned int) sched_task_exit; + *stk = (uintptr_t)sched_task_exit; /* set the stack pointer (SP) */ stk--; - *stk = (unsigned int)((unsigned int)stack_start + stack_size) - 4; + *stk = sp; /* build base stack */ - for (i = REGISTER_CNT; i > 0 ; i--) { + for (int i = REGISTER_CNT; i > 0 ; i--) { stk--; *stk = i; } /* set argument to task_func */ stk--; - *stk = ((unsigned int) arg); + *stk = (uintptr_t)arg; /* set the entry point */ stk--; - *stk = ((unsigned int) task_func); + *stk = (uintptr_t)task_func; /* set the saved program status register */ stk--; - *stk = (unsigned int) NEW_TASK_CPSR; + *stk = (uint32_t)NEW_TASK_CPSR; return (char *)stk; } diff --git a/tests/thread_stack_alignment/Makefile b/tests/thread_stack_alignment/Makefile new file mode 100644 index 0000000000..439621d3cf --- /dev/null +++ b/tests/thread_stack_alignment/Makefile @@ -0,0 +1,10 @@ +include ../Makefile.tests_common + +USEMODULE += printf_float + +# On ESP* a custom sched_task_exit() is used that does not implement +# test_utils_print_stack_usage yet, which is needed by the test script +# to measure the worst case memory wasting when stacks are unaligned. +FEATURES_BLACKLIST += arch_esp + +include $(RIOTBASE)/Makefile.include diff --git a/tests/thread_stack_alignment/Makefile.ci b/tests/thread_stack_alignment/Makefile.ci new file mode 100644 index 0000000000..d5368f025e --- /dev/null +++ b/tests/thread_stack_alignment/Makefile.ci @@ -0,0 +1,12 @@ +BOARD_INSUFFICIENT_MEMORY := \ + arduino-duemilanove \ + arduino-leonardo \ + arduino-nano \ + arduino-uno \ + atmega328p \ + atmega328p-xplained-mini \ + nucleo-f031k6 \ + nucleo-l011k4 \ + samd10-xmini \ + stm32f030f4-demo \ + # diff --git a/tests/thread_stack_alignment/README.md b/tests/thread_stack_alignment/README.md new file mode 100644 index 0000000000..f179392e2e --- /dev/null +++ b/tests/thread_stack_alignment/README.md @@ -0,0 +1,22 @@ +Testing for Correct Stack Alignment +=================================== + +This test application asks the linker to align a stack to 128 B (assuming this +is the worst case alignment requirement). Not that features like the MPU may +result in much higher alignment requirements than the CPU actually has, thus +128 B is not crazy as it may sound. For each offset from 0 to 127 it will +then launch a thread using the aligned stack plus the current offset, thus +iterating over all possible stack alignments. + +The test thread run `snprintf()` to format a double, compares the output with +the expected result, and exists to allow the subsequent thread to reuse the +stack. This is a good test for two reasons: Variadic functions (such as +`snprintf()`) on some platforms have different calling conventions that may +more easily trigger alignment issues, and an FPU may have a higher alignment +requirement than the CPU has. + +The test is considered as passing if for all tested alignments the call to +`snprintf()` produces the correct result and no crash happens on the way. + +Finally, the test script will collect the output of the stack consumptions and +give out the worst case penalty a user has to face diff --git a/tests/thread_stack_alignment/main.c b/tests/thread_stack_alignment/main.c new file mode 100644 index 0000000000..818ba696d3 --- /dev/null +++ b/tests/thread_stack_alignment/main.c @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2022 Otto-von-Guericke-Universität Magdeburg + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @ingroup tests + * @{ + * + * @file + * @brief Thread stack alignment test application + * + * @author Marian Buschsieweke + * + * @} + */ + +#include +#include +#include +#include +#include +/* work around bug with LLVM: needs included first, + * so do not sort this one alphabetically */ +#include + +#include "irq.h" +#include "sched.h" +#include "thread.h" +#include "thread_arch.h" + +#define PI_ROUNDED 3.14159 +#define _QUOTE(x) #x +#define QUOTE(x) _QUOTE(x) +#define ALIGNMENT 128 +#define STACKSIZE (THREAD_STACKSIZE_DEFAULT + THREAD_EXTRA_STACKSIZE_PRINTF \ + + ALIGNMENT) + +static char alignas(ALIGNMENT) stack[STACKSIZE + ALIGNMENT]; +static atomic_bool test_failed = ATOMIC_VAR_INIT(false); + +static void *thread_func(void *arg) +{ + (void)arg; + static const double pi_const = PI_ROUNDED; + static const char pi_str[] = QUOTE(PI_ROUNDED); + + /* Force compiler to not optimize out the heavy lifting by loading the + * value of pi with a "volatile" read. The compiler must assume that the + * contents of pi are only known at runtime */ + double pi = (volatile const double)pi_const; + + char buf[16] = ""; + /* Since snprintf() has a variable number of arguments, arguments are + * typically passed via stack even if the calling convention would pass + * arguments via registers most of the time. And typically double has + * the highest alignment requirement, so this is likely to run into + * issues when the stack is not aligned as the target arch requires it */ + snprintf(buf, sizeof(buf) - 1, "%1.5f", pi); + + if (0 != memcmp(pi_str, buf, sizeof(pi_str))) { + atomic_store(&test_failed, true); + puts("FAILED"); + printf("Got \"%s\", expected \"%s\"\n", buf, pi_str); + return NULL; + } + + puts("OK"); + return NULL; +} + +int main(void) +{ + bool failed = 0; + printf("Testing with a stack sized %u and an alignment up to %u\n", + (unsigned)STACKSIZE, (unsigned)ALIGNMENT); + for (size_t i = 0; i < ALIGNMENT; i++) { + atomic_store(&test_failed, false); + printf("Testing for alignment %u: ", (unsigned)i); + kernel_pid_t p; + p = thread_create(stack + i, STACKSIZE, THREAD_PRIORITY_MAIN - 1, + THREAD_CREATE_STACKTEST, + thread_func, NULL, "test"); + /* we expect that the new thread is scheduled to directly after it is + * created and this will only continue one the thread has terminated. + * But let's better be safe than sorry */ + while (thread_get(p) != NULL) { + thread_yield(); + } + + if (atomic_load(&test_failed)) { + failed = true; + } + } + + if (failed) { + puts("TEST FAILED"); + } + else { + puts("TEST PASSED"); + } + return 0; +} diff --git a/tests/thread_stack_alignment/tests/01-run.py b/tests/thread_stack_alignment/tests/01-run.py new file mode 100755 index 0000000000..9f47280b33 --- /dev/null +++ b/tests/thread_stack_alignment/tests/01-run.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2022 Otto-von-Guericke-Universität Magdeburg +# +# This file is subject to the terms and conditions of the GNU Lesser +# General Public License v2.1. See the file LICENSE in the top level +# directory for more details. + +# @author Marian Buschsieweke + +import sys +import json +from testrunner import run + + +def testfunc(child): + child.expect(r"Testing with a stack sized (\d+) and an alignment up to (\d+)\r\n") + stack_size = int(child.match.group(1)) + alignment = int(child.match.group(2)) + stack_used_min = stack_size + stack_used_max = 0 + for i in range(alignment): + child.expect_exact(f"Testing for alignment {i}: OK") + child.expect(r"(\{[^\n\r]*\})\r\n") + stats = json.loads(child.match.group(1))["threads"][0] + assert stats["name"] == "test" + if stack_used_max < stats["stack_used"]: + stack_used_max = stats["stack_used"] + if stack_used_min > stats["stack_used"]: + stack_used_min = stats["stack_used"] + + child.expect_exact("TEST PASSED") + alignment_loss = stack_used_max - stack_used_min + if alignment_loss > 0: + print(f"NOTE: Up to {alignment_loss} B of RAM is lost when thread " + + "stacks are not properly aligned") + + +if __name__ == "__main__": + sys.exit(run(testfunc))