From fc57bfb511831110ec755b1a7140d7adfc3bd63d Mon Sep 17 00:00:00 2001 From: Martine Lenders Date: Thu, 21 Jan 2021 17:32:19 +0100 Subject: [PATCH] congure_quic: initial import of QUIC congestion control --- sys/Makefile.dep | 4 + sys/congure/Kconfig | 2 + sys/congure/Makefile | 3 + sys/congure/quic/Kconfig | 4 + sys/congure/quic/Makefile | 3 + sys/congure/quic/congure_quic.c | 296 ++++++++++++++++++++++++++++++++ sys/include/congure/quic.h | 233 +++++++++++++++++++++++++ 7 files changed, 545 insertions(+) create mode 100644 sys/congure/quic/Kconfig create mode 100644 sys/congure/quic/Makefile create mode 100644 sys/congure/quic/congure_quic.c create mode 100644 sys/include/congure/quic.h diff --git a/sys/Makefile.dep b/sys/Makefile.dep index 270b3816a5..72b1a1bca8 100644 --- a/sys/Makefile.dep +++ b/sys/Makefile.dep @@ -37,6 +37,10 @@ ifneq (,$(filter congure_%,$(USEMODULE))) USEMODULE += congure endif +ifneq (,$(filter congure_quic,$(USEMODULE))) + USEMODULE += ztimer_msec +endif + ifneq (,$(filter congure_test,$(USEMODULE))) USEMODULE += fmt endif diff --git a/sys/congure/Kconfig b/sys/congure/Kconfig index 7971b41ac4..7b2fe4fa6a 100644 --- a/sys/congure/Kconfig +++ b/sys/congure/Kconfig @@ -9,6 +9,7 @@ menu "CongURE congestion control abstraction" depends on USEMODULE_CONGURE rsource "mock/Kconfig" +rsource "quic/Kconfig" rsource "reno/Kconfig" rsource "test/Kconfig" @@ -23,6 +24,7 @@ menuconfig MODULE_CONGURE if MODULE_CONGURE rsource "mock/Kconfig" +rsource "quic/Kconfig" rsource "reno/Kconfig" rsource "test/Kconfig" diff --git a/sys/congure/Makefile b/sys/congure/Makefile index 78bd97f60e..67edc5ed39 100644 --- a/sys/congure/Makefile +++ b/sys/congure/Makefile @@ -1,3 +1,6 @@ +ifneq (,$(filter congure_quic,$(USEMODULE))) + DIRS += quic +endif ifneq (,$(filter congure_mock,$(USEMODULE))) DIRS += mock endif diff --git a/sys/congure/quic/Kconfig b/sys/congure/quic/Kconfig new file mode 100644 index 0000000000..6fc3e009b2 --- /dev/null +++ b/sys/congure/quic/Kconfig @@ -0,0 +1,4 @@ +config MODULE_CONGURE_QUIC + bool "CongURE implementation of QUIC's congestion control" + depends on MODULE_CONGURE + depends on MODULE_ZTIMER diff --git a/sys/congure/quic/Makefile b/sys/congure/quic/Makefile new file mode 100644 index 0000000000..8dcee7c215 --- /dev/null +++ b/sys/congure/quic/Makefile @@ -0,0 +1,3 @@ +MODULE := congure_quic + +include $(RIOTBASE)/Makefile.base diff --git a/sys/congure/quic/congure_quic.c b/sys/congure/quic/congure_quic.c new file mode 100644 index 0000000000..5b6b097759 --- /dev/null +++ b/sys/congure/quic/congure_quic.c @@ -0,0 +1,296 @@ +/* + * Copyright (C) 2021 Freie Universität Berlin + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @{ + * + * @file + * @author Martine Lenders + * + * See [RFC 9002, Appendix B](https://tools.ietf.org/html/rfc9002#appendix-B) + * and parts of [RFC 9002, Appendix A](https://tools.ietf.org/html/rfc9002#appendix-A) + * (for pacing calculation) as basis for this implementation. + */ + +#include +#include +#include +#include + +#include "clist.h" +#include "timex.h" +#include "ztimer.h" + +#include "congure/quic.h" + +static void _snd_init(congure_snd_t *cong, void *ctx); +static int32_t _snd_inter_msg_interval(congure_snd_t *cong, unsigned msg_size); +static void _snd_report_msg_sent(congure_snd_t *cong, unsigned sent_size); +static void _snd_report_msg_discarded(congure_snd_t *cong, unsigned msg_size); +static void _snd_report_msgs_lost(congure_snd_t *cong, congure_snd_msg_t *msgs); +static void _snd_report_msg_acked(congure_snd_t *cong, congure_snd_msg_t *msg, + congure_snd_ack_t *ack); +static void _snd_report_ecn_ce(congure_snd_t *cong, ztimer_now_t time); + +static const congure_snd_driver_t _driver = { + .init = _snd_init, + .inter_msg_interval = _snd_inter_msg_interval, + .report_msg_sent = _snd_report_msg_sent, + .report_msg_discarded = _snd_report_msg_discarded, + .report_msgs_timeout = _snd_report_msgs_lost, + .report_msgs_lost = _snd_report_msgs_lost, + .report_msg_acked = _snd_report_msg_acked, + .report_ecn_ce = _snd_report_ecn_ce, +}; + +static inline bool _in_recov(congure_quic_snd_t *c, ztimer_now_t sent_time) +{ + return sent_time <= c->recovery_start; +} + +static void _on_congestion_event(congure_quic_snd_t *c, ztimer_now_t sent_time) +{ + if (_in_recov(c, sent_time)) { + return; + } + /* enter congestion recovery period */ + c->recovery_start = ztimer_now(ZTIMER_MSEC); + c->ssthresh = (c->super.cwnd * c->consts->loss_reduction_numerator) + / c->consts->loss_reduction_denominator; + c->super.cwnd = (c->ssthresh > c->consts->min_wnd) + ? c->ssthresh : c->consts->min_wnd; + if (c->consts->cong_event_cb) { + c->consts->cong_event_cb(c->super.ctx); + } +} + +static void _update_rtts(congure_quic_snd_t *c, ztimer_now_t msg_send_time, + ztimer_now_t ack_recv_time, uint16_t ack_delay) +{ + uint16_t latest_rtt; + + assert((ack_recv_time - msg_send_time) <= UINT16_MAX); + /* we assume that is in the uint16_t range, but just in case NDEBUG + * is set, let's cap it at UINT16_MAX */ + if ((ack_recv_time - msg_send_time) > UINT16_MAX) { + latest_rtt = UINT16_MAX; + } + else { + latest_rtt = ack_recv_time - msg_send_time; + } + + if (c->first_rtt_sample > 0) { /* an RTT sample was taken */ + c->min_rtt = (c->min_rtt > latest_rtt) ? latest_rtt : c->min_rtt; + /* adjust latest_rtt for ack_delay if plausible */ + if (latest_rtt > (c->min_rtt + ack_delay)) { + latest_rtt -= ack_delay; + } + c->rtt_var = ((3U * c->rtt_var) / 4U) + + (abs((int)c->smoothed_rtt - (int)latest_rtt) / 4U); + c->smoothed_rtt = ((7U * c->smoothed_rtt) / 8U) + (latest_rtt / 8U); + } + else { + c->min_rtt = latest_rtt; + c->smoothed_rtt = latest_rtt; + c->rtt_var = latest_rtt / 2; + c->first_rtt_sample = ztimer_now(ZTIMER_MSEC); + } +} + +static void _reset_cwnd_in_pc(congure_quic_snd_t *c) +{ + c->super.cwnd = c->consts->min_wnd; + if (c->ssthresh < c->consts->min_wnd) { + /* See https://github.com/quicwg/base-drafts/issues/4826#issuecomment-776305871 + * XXX: this differs from the pseudo-code in + * Appendix B.8, where when `ssthresh` is lower than + * `cwnd` (e.g. because ) + */ + c->ssthresh = c->consts->min_wnd; + } + c->recovery_start = 0; +} + +static void _reset_cwnd(congure_quic_snd_t *c, congure_snd_msg_t *msgs) +{ + /* Reset the congestion window if the loss of these packets indicates + * persistent congestion. Only consider packets sent after getting an RTT + * sample */ + if (c->first_rtt_sample > 0U) { + /* XXX need to untangle clist_foreach() to add to lost and remove + * elements from `msgs` in-place (using prev and next) */ + congure_snd_msg_t *ptr = (congure_snd_msg_t *)msgs->super.next; + + /* untangle clist_foreach, since there is no easy + * way to provide both `lost` and `c` to the handler function */ + if (ptr) { + ztimer_now_t latest = 0U; + ztimer_now_t earliest = + ((congure_snd_msg_t *)ptr->super.next)->send_time; + uint32_t pc_duration; /* use uint32_t here to prevent overflows */ + uint16_t rtt_var = (4 * c->rtt_var); + + if (rtt_var > c->consts->granularity) { + rtt_var = c->consts->granularity; + } + + pc_duration = (c->smoothed_rtt + rtt_var + c->max_ack_delay) * + c->consts->pc_thresh; + + do { + ptr = (congure_snd_msg_t *)ptr->super.next; + if (ptr->send_time > c->first_rtt_sample) { + /* consider for persistent congestion */ + if (latest < ptr->send_time) { + latest = ptr->send_time; + } + if (earliest > ptr->send_time) { + earliest = ptr->send_time; + } + if ((latest - earliest) > pc_duration) { + /* in persistent congestion */ + _reset_cwnd_in_pc(c); + } + } + } while ((&ptr->super) != msgs->super.next); + } + } +} + +static void _dec_flight_size(congure_quic_snd_t *c, unsigned msg_size) +{ + /* check for integer underflow */ + if ((c->in_flight_size - msg_size) > c->in_flight_size) { + c->in_flight_size = 0U; + } + else { + c->in_flight_size -= msg_size; + } +} + +static void _snd_init(congure_snd_t *cong, void *ctx) +{ + congure_quic_snd_t *c = (congure_quic_snd_t *)cong; + + c->super.ctx = ctx; + c->first_rtt_sample = 0; + c->super.cwnd = c->consts->init_wnd; + c->in_flight_size = 0U; + c->recovery_start = 0U; + c->ssthresh = CONGURE_WND_SIZE_MAX; + c->limited = 0U; + c->max_ack_delay = 0U; + c->smoothed_rtt = c->consts->init_rtt; + c->rtt_var = c->consts->init_rtt / 2U; + c->min_rtt = 0U; +} + +static int32_t _snd_inter_msg_interval(congure_snd_t *cong, unsigned msg_size) +{ + congure_quic_snd_t *c = container_of(cong, congure_quic_snd_t, super); + + /* interval in QUIC spec is a divisor, so flip denominator and numerator; + * smoothed_rtt is in ms, but expected result is in us */ + return (c->consts->inter_msg_interval_denominator * c->smoothed_rtt * + msg_size * US_PER_MS) / + (c->consts->inter_msg_interval_numerator * c->super.cwnd); +} + +static void _snd_report_msg_sent(congure_snd_t *cong, unsigned sent_size) +{ + congure_quic_snd_t *c = (congure_quic_snd_t *)cong; + + if ((c->in_flight_size + sent_size) < c->super.cwnd) { + c->in_flight_size += sent_size; + } + else { + /* state machine is dependent on flight size being smaller or equal + * to cwnd as such cap cwnd here, in case caller reports a message in + * flight that was marked as lost, but the caller is using a later + * message to send another ACK. */ + c->in_flight_size = c->super.cwnd; + } +} + +static void _snd_report_msg_discarded(congure_snd_t *cong, unsigned msg_size) +{ + congure_quic_snd_t *c = (congure_quic_snd_t *)cong; + + assert(msg_size <= c->in_flight_size); + + _dec_flight_size(c, msg_size); +} + +static void _snd_report_msgs_lost(congure_snd_t *cong, congure_snd_msg_t *msgs) +{ + congure_quic_snd_t *c = (congure_quic_snd_t *)cong; + /* XXX need to untangle clist_foreach() to record last_lost_sent */ + congure_snd_msg_t *ptr = (congure_snd_msg_t *)msgs->super.next; + ztimer_now_t last_lost_sent = 0U; + + if (ptr) { + do { + ptr = (congure_snd_msg_t *)ptr->super.next; + _dec_flight_size(c, ptr->size); + if (last_lost_sent < ptr->send_time) { + last_lost_sent = ptr->send_time; + } + } while ((&ptr->super) != msgs->super.next); + } + if (last_lost_sent) { + _on_congestion_event(c, last_lost_sent); + } + _reset_cwnd(c, msgs); +} + +static void _snd_report_msg_acked(congure_snd_t *cong, congure_snd_msg_t *msg, + congure_snd_ack_t *ack) +{ + congure_quic_snd_t *c = (congure_quic_snd_t *)cong; + + _dec_flight_size(c, msg->size); + + /* https://tools.ietf.org/html/rfc9002#appendix-A.7 */ + if ((msg->size > 0) && (ack->recv_time > 0)) { + _update_rtts(c, msg->send_time, ack->recv_time, ack->delay); + } + /* Do not increase congestion_window if application limited or flow control + * limited. */ + if (c->limited) { + return; + } + + /* do not change congestion window in recovery period */ + if (_in_recov(c, msg->send_time)) { + return; + } + if (c->super.cwnd < c->ssthresh) { + /* in slow start mode */ + c->super.cwnd += msg->size; + } + else { + /* congestion avoidance */ + c->super.cwnd += (c->consts->max_msg_size * msg->size) / c->super.cwnd; + } +} + +static void _snd_report_ecn_ce(congure_snd_t *cong, ztimer_now_t time) +{ + _on_congestion_event((congure_quic_snd_t *)cong, time); +} + +void congure_quic_snd_setup(congure_quic_snd_t *c, + const congure_quic_snd_consts_t *consts) +{ + assert(consts->inter_msg_interval_numerator >= + consts->inter_msg_interval_denominator); + c->super.driver = &_driver; + c->consts = consts; +} + +/** @} */ diff --git a/sys/include/congure/quic.h b/sys/include/congure/quic.h new file mode 100644 index 0000000000..37815333f5 --- /dev/null +++ b/sys/include/congure/quic.h @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2021 Freie Universität Berlin + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License v2.1. See the file LICENSE in the top level + * directory for more details. + */ + +/** + * @defgroup sys_congure_quic CongURE implementation of QUIC's CC + * @ingroup sys_congure + * @brief Implementation of QUIC's congestion control algorithm for the + * CongURE framework. + * @{ + * + * @file + * + * @author Martine S. Lenders + */ +#ifndef CONGURE_QUIC_H +#define CONGURE_QUIC_H + +#include "ztimer.h" + +#include "congure.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @brief Constants for the congestion control. + * + * Example usage (to use the same values as specified in + * [RFC 9002](https://tools.ietf.org/html/rfc9002#section-7.6)): + * + * ~~~~~~~~~~~~~~~~ {.c} + * static const congure_quic_snd_consts_t consts = { + * .cong_event_cb = _maybe_send_one_pkt, + * .init_wnd = 12000, // 10 * max_datagram_size + * .min_wnd = 2400, // 2 * max_datagram_size + * .init_rtt = 333, // kInitialRtt = 333ms + * .max_msg_size = 1200, // max_datagram_size + * .pc_thresh = 3000, // kPersistentCongestionThreshold = 3s + * .granularity = 1, // kGranularity = 1ms + * .loss_reduction_numerator = 1, // kLossReductionFactor = .5 + * .loss_reduction_denominator = 2, + * .inter_msg_interval_numerator = 5, // Pacing factor N = 1.25 + * .inter_msg_interval_denominator = 4, + * } + * static congure_quic_snd_t cong; + * + * // ... + * congure_quic_snd_setup(&cong, &const); + * ~~~~~~~~~~~~~~~~ + */ +typedef struct { + /** + * @brief congestion event callback + * + * This callback is called when congestion event is detected by + * message loss or a CE notification. QUIC typically uses this to send + * a packet to speed up loss + * recovery. + * + * @param[in] ctx callback context + */ + void (*cong_event_cb)(void *ctx); + + /** + * @brief Initial congestion window size in initiator-defined units. + */ + congure_wnd_size_t init_wnd; + + /** + * @brief minimum congestion window size in initiator-defined units. + */ + congure_wnd_size_t min_wnd; + + /** + * @brief The assumed RTT in milliseconds before an RTT sample is taken + */ + uint16_t init_rtt; + + /** + * @brief maximum message size in initiator-defined units. + */ + uint16_t max_msg_size; + + /** + * @brief period of time in milliseconds for persistent congestion + * to be establisched + * @see [RFC 9002, section 7.6](https://tools.ietf.org/html/rfc9002#section-7.6) + */ + uint16_t pc_thresh; + + /** + * @brief system timer granularity in milliseconds (typically 1) + */ + uint16_t granularity; + + /** + * @brief numerator for the factor the congestion window should be + * reduced by when a new loss event is detected + */ + uint8_t loss_reduction_numerator; + + /** + * @brief denominator for the factor the congestion window should be + * reduced by when a new loss event is detected + */ + uint8_t loss_reduction_denominator; + + /** + * @brief numerator for the factor N used to adapt the message interval + * + * @see [RFC 9002, section 7.7](https://tools.ietf.org/html/rfc9002#section-7.7) + */ + uint8_t inter_msg_interval_numerator; + + /** + * @brief denominator for the factor N used to adapt the message interval + * + * @see [RFC 9002, section 7.7](https://tools.ietf.org/html/rfc9002#section-7.7) + */ + uint8_t inter_msg_interval_denominator; +} congure_quic_snd_consts_t; + +/** + * @brief State object for CongURE QUIC + * + * @extends congure_snd_t + */ +typedef struct { + congure_snd_t super; /**< see @ref congure_snd_t */ + + /** + * @brief Constants + */ + const congure_quic_snd_consts_t *consts; + + /** + * @brief Timestamp in milliseconds of when the first RTT sample was + * obtained + */ + ztimer_now_t first_rtt_sample; + + /** + * @brief Sum of caller-defined units of message sizes of all messages + * that are yet not ack'd or declared lost + */ + unsigned in_flight_size; + + /** + * @brief Timestamp in milliseconds of when congestion was first detected. + * + * This is the time when congestion recovery mode is entered. + */ + ztimer_now_t recovery_start; + + /** + * @brief Slow start threshold in caller-defined units. + * + * When congure_quic_snd_t::cwnd is below congure_quic_snd_t::ssthresh the + * algorithm is in slow start mode and congure_quic_snd_t::cwnd grows in + * number of caller-defined units of acknowledged messages sizes + */ + congure_wnd_size_t ssthresh; + + /** + * @brief The smoothed RTT of a connection between peers in milliseconds + */ + uint16_t smoothed_rtt; + + /** + * @brief The RTT variation + */ + uint16_t rtt_var; + + /** + * @brief The minimum RTT seen over a period of time + */ + uint16_t min_rtt; + + /** + * @brief Set to one if congestion control should is limited by the + * application or flow control + * + * Should be supplied and may be changed by user before calling a @ref + * sys_congure function. + * + * @see [RFC 9002, Appendix B.5](https://tools.ietf.org/html/rfc9002#appendix-B.5) + */ + uint16_t limited; + + /** + * @brief Advertised maximum amount of time in milliseconds a receiver + * intends to delay its acknowledgements + * + * Used to establish persistent congestion. + * + * Should be supplied and may be changed by user before calling a @ref + * sys_congure function. If this value is not provided by the * protocol, + * leave it at 0. + */ + uint16_t max_ack_delay; +} congure_quic_snd_t; + +/** + * @brief Set's up the driver for a CongURE QUIC object + * + * @pre inter_msg_interval_numerator of `consts` must be greater than or equal + * to its inter_msg_interval_denominator. + * See [RFC 9002, section 7.7](https://tools.ietf.org/html/rfc9002#section-7.7): + * > Using a value for "N" that is small, but at least 1 (for + * > example, 1.25) ensures that variations in round-trip time do not + * > result in under-utilization of the congestion window. + * + * @param[in] c A CongURE QUIC object. + * @param[in] consts The constants to use for @p c. + * congure_quic_snd_consts_t::inter_msg_interval_numerator + * must be greater than or equal to + * congure_quic_snd_consts_t::inter_msg_interval_denominator + */ +void congure_quic_snd_setup(congure_quic_snd_t *c, + const congure_quic_snd_consts_t *consts); + +#ifdef __cplusplus +} +#endif + +#endif /* CONGURE_QUIC_H */ +/** @} */