1
0
mirror of https://github.com/RIOT-OS/RIOT.git synced 2025-01-18 12:52:44 +01:00

tests/bench_periph_gpio_ll: add benchmark for periph/gpio_ll

This commit is contained in:
Marian Buschsieweke 2022-01-26 16:11:55 +01:00
parent 22a17731ea
commit 28791c42a4
No known key found for this signature in database
GPG Key ID: CB8E3238CE715A94
6 changed files with 407 additions and 0 deletions

5
tests/bench_periph_gpio_ll/.gitignore vendored Normal file
View File

@ -0,0 +1,5 @@
# Allow custom pin mapping in Makefile.$(BOARD) files, but those don't need to
# go upstream
/Makefile.*
# but un-ignore Makefile.ci
!/Makefile.ci

View File

@ -0,0 +1,42 @@
BOARD ?= nucleo-f767zi
# Custom per-board pin configuration (e.g. for setting PORT_IN, PIN_IN_0, ...)
# can be provided in a Makefile.$(BOARD) file:
-include Makefile.$(BOARD)
# Choose two output pins that do not conflict with stdio and are not connected
# to external devices such as sensors, network devices, etc.
#
# Beware: If other pins on the output port are configured as output GPIOs, they
# might be written to during this test.
PORT_OUT ?= 0
PIN_OUT_0 ?= 0
PIN_OUT_1 ?= 1
include ../Makefile.tests_common
FEATURES_REQUIRED += periph_gpio_ll
FEATURES_REQUIRED += periph_gpio
FEATURES_OPTIONAL += periph_gpio_ll_irq
FEATURES_OPTIONAL += periph_gpio_ll_irq_level_triggered_high
FEATURES_OPTIONAL += periph_gpio_ll_irq_level_triggered_low
USEMODULE += ztimer_usec
include $(RIOTBASE)/Makefile.include
# Configure if compensation of loop overhead in the estimation of the
# toggling speed should be performed. Default: Do so, except for Cortex-M7.
# For the Cortex-M7 the loop instructions are emitted together with the GPIO
# writes due to the dual issue feature. Hence, there is no loop overhead for
# Cortex-M7 to compensate for.
ifeq (cortex-m7,$(CPU_CORE))
COMPENSATE_OVERHEAD ?= 0
endif
COMPENSATE_OVERHEAD ?= 1
CFLAGS += -DPORT_OUT=$(PORT_OUT)
CFLAGS += -DPIN_OUT_0=$(PIN_OUT_0)
CFLAGS += -DPIN_OUT_1=$(PIN_OUT_1)
CFLAGS += -DCOMPENSATE_OVERHEAD=$(COMPENSATE_OVERHEAD)

View File

@ -0,0 +1,8 @@
BOARD_INSUFFICIENT_MEMORY := \
arduino-duemilanove \
arduino-nano \
arduino-uno \
atmega328p \
atmega328p-xplained-mini \
nucleo-l011k4 \
#

View File

@ -0,0 +1,40 @@
# Benchmark for `periph/gpio_ll`
This application will generate a square wave on two output pins with a phase
difference of zero between them using both the `periph/gpio` API (as reference)
and the `periph/gpio_ll` API. You can use a logic analyzer or scope to verify
that the square waves are indeed simultaneous (no phase difference) and their
frequency. Note that with the pin based `periph/gpio` API a phase difference is
expected, but not for the port based `periph/gpio_ll` API.
In addition, a timer is used to measure the average frequency over 50,000
square wave periods. The overhead of the loop is estimated and a compensated
frequency (that would be achievable only by unrolling the loop) is calculated.
Both frequencies are printed, in addition to the number of CPU cycles per wave
period. The optimal value is 2 CPU cycles (signal is 1 cycle high and 1 cycle
low).
## Configuration
Configure in the `Makefile` or set via environment variables the number of
the GPIO port to use via the `PORT_OUT` variable. The `PIN_OUT_0` and
`PIN_OUT_1` variables select the pins to use within that GPIO port. If possible,
choose a GPIO port that is fully broken out to pins of your board but left
unconnected. That way you can connect a scope or a logic analyzer to verify
the output.
Note that the test using `gpio_ll_write()` might cause changes to unrelated pins
on the `PORT_OUT` GPIO port, by restoring their value to what it was at the
beginning of the benchmark.
## FAQ
Why are 4 functions calls used for `periph/gpio`, but only 2 for
`periph/gpio_ll`? This isn't fair!
Since in a port based APIs multiple pins can be accessed at once, only two
accesses are needed (one for the high and one for the low part of each square
wave period). In the pin based `periph/gpio` API, two accesses are needed per
pin. This unfair advantage in speed is one of the reasons we want a low level
port based API in RIOT - in addition to a more convenient to use and high level
pin based API.

View File

@ -0,0 +1,294 @@
/*
* Copyright (C) 2021 Otto-von-Guericke-Universität Magdeburg
*
* This file is subject to the terms and conditions of the GNU Lesser
* General Public License v2.1. See the file LICENSE in the top level
* directory for more details.
*/
/**
* @ingroup tests
* @{
*
* @file
* @brief Test application for the Peripheral GPIO Low-Level API
*
* @author Marian Buschsieweke <marian.buschsieweke@ovgu.de>
*
* @}
*/
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "mutex.h"
#include "periph/gpio.h"
#include "periph/gpio_ll.h"
#include "test_utils/expect.h"
#include "ztimer.h"
#include "timex.h"
#ifndef COMPENSATE_OVERHEAD
#define COMPENSATE_OVERHEAD 1
#endif
static gpio_port_t port_out = GPIO_PORT(PORT_OUT);
static void print_summary_compensated(uint_fast16_t loops, uint32_t duration,
uint32_t duration_uncompensated)
{
printf("%" PRIuFAST16 " iterations took %" PRIu32 " us "
"(%" PRIu32 " us uncompensated)\n",
loops, duration, duration_uncompensated);
printf("Two square waves pins at %12" PRIu32 " Hz "
"(%12" PRIu32 " Hz uncompensated)\n",
(uint32_t)((uint64_t)US_PER_SEC * loops / duration),
(uint32_t)((uint64_t)US_PER_SEC * loops / duration_uncompensated));
#ifdef CLOCK_CORECLOCK
uint64_t divisor = (uint64_t)US_PER_SEC * loops / CLOCK_CORECLOCK;
uint32_t cycles = (duration + divisor / 2) / divisor;
uint32_t cycles_uncompensated = (duration_uncompensated + divisor / 2)
/ divisor;
printf("~%" PRIu32 " CPU cycles per square wave period "
"(~%" PRIu32 " cycles uncompensated)\n",
cycles, cycles_uncompensated);
if (cycles <= 2) {
puts(":-D");
}
else if (cycles <= 4) {
puts(":-)");
}
else if (cycles <= 8) {
puts(":-|");
}
else if (cycles <= 16) {
puts(":-(");
}
else {
puts(":'-(");
}
#endif
}
static void print_summary_uncompensated(uint_fast16_t loops, uint32_t duration)
{
printf("%" PRIuFAST16 " iterations took %" PRIu32 " us\n",
loops, duration);
printf("Two square waves pins at %12" PRIu32 " Hz\n",
(uint32_t)((uint64_t)US_PER_SEC * loops / duration));
#ifdef CLOCK_CORECLOCK
uint64_t divisor = (uint64_t)US_PER_SEC * loops / CLOCK_CORECLOCK;
uint32_t cycles = (duration + divisor / 2) / divisor;
printf("~%" PRIu32 " CPU cycles per square wave period\n", cycles);
if (cycles <= 2) {
puts(":-D");
}
else if (cycles <= 4) {
puts(":-)");
}
else if (cycles <= 8) {
puts(":-|");
}
else if (cycles <= 16) {
puts(":-(");
}
else {
puts(":'-(");
}
#endif
}
int main(void)
{
static const uint_fast16_t loops = 50000;
uint32_t loop_overhead = 0;
uword_t mask_both = (1U << PIN_OUT_0) | (1U << PIN_OUT_1);
puts("\n"
"Benchmarking GPIO APIs\n"
"======================");
if (COMPENSATE_OVERHEAD) {
puts("\n"
"estimating loop overhead for compensation\n"
"-----------------------------------------");
uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
__asm__ volatile ("" : : : );
}
loop_overhead = ztimer_now(ZTIMER_USEC) - start;
printf("%" PRIu32 " us for %" PRIuFAST16 " iterations\n",
loop_overhead, loops);
}
{
puts("\n"
"periph/gpio: Using 2x gpio_set() and 2x gpio_clear()\n"
"---------------------------------------------------");
gpio_t p0 = GPIO_PIN(PORT_OUT, PIN_OUT_0);
gpio_t p1 = GPIO_PIN(PORT_OUT, PIN_OUT_1);
gpio_init(p0, GPIO_OUT);
gpio_init(p1, GPIO_OUT);
uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_set(p0);
gpio_set(p1);
gpio_clear(p0);
gpio_clear(p1);
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;
if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}
{
puts("\n"
"periph/gpio_ll: Using gpio_ll_set() and gpio_ll_clear()\n"
"-------------------------------------------------------");
gpio_conf_t conf = {
.state = GPIO_OUTPUT_PUSH_PULL,
.slew_rate = GPIO_SLEW_FASTEST
};
expect(0 == gpio_ll_init(port_out, PIN_OUT_0, &conf));
expect(0 == gpio_ll_init(port_out, PIN_OUT_1, &conf));
uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_ll_set(port_out, (1UL << PIN_OUT_0) | (1UL << PIN_OUT_1));
gpio_ll_clear(port_out, (1UL << PIN_OUT_0) | (1UL << PIN_OUT_1));
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;
if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}
{
puts("\n"
"periph/gpio: Using 4x gpio_toggle()\n"
"-----------------------------------");
gpio_t p0 = GPIO_PIN(PORT_OUT, PIN_OUT_0);
gpio_t p1 = GPIO_PIN(PORT_OUT, PIN_OUT_1);
gpio_init(p0, GPIO_OUT);
gpio_init(p1, GPIO_OUT);
uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_toggle(p0);
gpio_toggle(p1);
gpio_toggle(p0);
gpio_toggle(p1);
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;
if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}
{
puts("\n"
"periph/gpio_ll: Using 2x gpio_ll_toggle()\n"
"-----------------------------------------");
gpio_conf_t conf = {
.state = GPIO_OUTPUT_PUSH_PULL,
.slew_rate = GPIO_SLEW_FASTEST
};
expect(0 == gpio_ll_init(port_out, PIN_OUT_0, &conf));
expect(0 == gpio_ll_init(port_out, PIN_OUT_1, &conf));
uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_ll_toggle(port_out, mask_both);
gpio_ll_toggle(port_out, mask_both);
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;
if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}
{
puts("\n"
"periph/gpio: Using 4x gpio_write()\n"
"----------------------------------");
gpio_t p0 = GPIO_PIN(PORT_OUT, PIN_OUT_0);
gpio_t p1 = GPIO_PIN(PORT_OUT, PIN_OUT_1);
gpio_init(p0, GPIO_OUT);
gpio_init(p1, GPIO_OUT);
uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_write(p0, 1);
gpio_write(p1, 1);
gpio_write(p0, 0);
gpio_write(p1, 0);
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;
if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}
{
puts("\n"
"periph/gpio_ll: Using 2x gpio_ll_write()\n"
"----------------------------------------");
gpio_conf_t conf = {
.state = GPIO_OUTPUT_PUSH_PULL,
.slew_rate = GPIO_SLEW_FASTEST
};
expect(0 == gpio_ll_init(port_out, PIN_OUT_0, &conf));
expect(0 == gpio_ll_init(port_out, PIN_OUT_1, &conf));
uword_t both_high = gpio_ll_prepare_write(port_out, mask_both,
mask_both);
uword_t both_low = gpio_ll_prepare_write(port_out, mask_both, 0);
uint32_t start = ztimer_now(ZTIMER_USEC);
for (uint_fast16_t i = loops; i > 0; i--) {
gpio_ll_write(port_out, both_high);
gpio_ll_write(port_out, both_low);
}
uint32_t duration = ztimer_now(ZTIMER_USEC) - start;
if (COMPENSATE_OVERHEAD) {
print_summary_compensated(loops, duration - loop_overhead,
duration);
}
else {
print_summary_uncompensated(loops, duration);
}
}
puts("\n\nTEST SUCCEEDED");
return 0;
}

View File

@ -0,0 +1,18 @@
#!/usr/bin/env python3
# Copyright (C) 2022 Otto-von-Guericke-Universität Magdeburg
#
# This file is subject to the terms and conditions of the GNU Lesser
# General Public License v2.1. See the file LICENSE in the top level
# directory for more details.
import sys
from testrunner import run
def testfunc(child):
child.expect('TEST SUCCEEDED')
if __name__ == "__main__":
sys.exit(run(testfunc))