1
0
mirror of https://github.com/RIOT-OS/RIOT.git synced 2024-12-29 04:50:03 +01:00

Merge pull request #13758 from cgundogan/uri_parser

sys/uri_parser: minimal and non-destructive URI parsing
This commit is contained in:
Martine Lenders 2020-04-01 00:40:27 +02:00 committed by GitHub
commit d9b9426a86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 714 additions and 0 deletions

109
sys/include/uri_parser.h Normal file
View File

@ -0,0 +1,109 @@
/*
* Copyright (C) 2020 HAW Hamburg
*
* This file is subject to the terms and conditions of the GNU Lesser
* General Public License v2.1. See the file LICENSE in the top level
* directory for more details.
*/
/**
* @defgroup sys_uri_parser A minimal, non-destructive URI parser
* @ingroup sys
* @brief A minimal, non-destructive URI parser
*
* @see https://tools.ietf.org/html/rfc3986
*
* @{
*
* @brief Handler functions for uri_parser
* @author Cenk Gündoğan <cenk.guendogan@haw-hamburg.de>
*
*/
#ifndef URI_PARSER_H
#define URI_PARSER_H
#include <string.h>
#include <stdint.h>
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief container that holds all results
*/
typedef struct {
char *scheme; /**< scheme */
char *userinfo; /**< userinfo */
char *host; /**< host */
char *port; /**< port */
char *path; /**< path */
char *query; /**< query */
uint16_t scheme_len; /**< length of @p scheme */
uint16_t userinfo_len; /**< length of @p userinfo */
uint16_t host_len; /**< length of @p host */
uint16_t port_len; /**< length of @p port */
uint16_t path_len; /**< length of @p path */
uint16_t query_len; /**< length of @p query */
} uri_parser_result_t;
/**
* @brief Checks whether @p uri is absolute or relative
*
* @param[in] uri URI to check. Must not be `NULL`
* @param[in] uri_len Length of @p uri
*
* @pre `uri != NULL`
*
* @return true if @p uri is an absolute URI
* @return false if @p uri is a relative URI
*/
bool uri_parser_is_absolute(const char *uri, size_t uri_len);
/**
* @brief Checks whether @p uri is absolute or relative
*
* @param[in] uri Zero-terminated URI to check. Must not be `Null`
*
* @pre `uri != NULL`
*
* @return true if @p uri is an absolute URI
* @return false if @p uri is a relative URI
*/
bool uri_parser_is_absolute_string(const char *uri);
/**
* @brief Parse a URI
*
* @param[out] result pointer to a container that will hold the result
* @param[in] uri URI to parse. Must not be `NULL`
* @param[in] uri_len Length of @p uri
*
* @pre `uri != NULL`
*
* @return 0 on success
* @return -1 on parsing error
*/
int uri_parser_process(uri_parser_result_t *result, const char *uri,
size_t uri_len);
/**
* @brief Parse a URI
*
* @param[out] result pointer to a container that will hold the result
* @param[in] uri Zero-terminated URI to parse. Must not be `NULL`
*
* @pre `uri != NULL`
*
* @return 0 on success
* @return -1 on parsing error
*/
int uri_parser_process_string(uri_parser_result_t *result, const char *uri);
#ifdef __cplusplus
}
#endif
#endif /* URI_PARSER_H */
/** @} */

1
sys/uri_parser/Makefile Normal file
View File

@ -0,0 +1 @@
include $(RIOTBASE)/Makefile.base

256
sys/uri_parser/uri_parser.c Normal file
View File

@ -0,0 +1,256 @@
/*
* Copyright (C) 2020 HAW Hamburg
*
* This file is subject to the terms and conditions of the GNU Lesser
* General Public License v2.1. See the file LICENSE in the top level
* directory for more details.
*/
/**
* @ingroup sys_uri_parser
* @{
*
* @file
* @brief A minimal, non-destructive URI parser.
* @see https://tools.ietf.org/html/rfc3986
*
* @author Cenk Gündoğan <cenk.guendogan@haw-hamburg.de>
*
* @}
*/
#include "uri_parser.h"
#define ENABLE_DEBUG (0)
#include "debug.h"
/* strchr for non-Null-terminated strings (buffers) */
static char *_strchrb(char *start, char *stop, char c)
{
for (; start < stop; start++) {
if (*start == c) {
return start;
}
}
return NULL;
}
static char *_consume_scheme(uri_parser_result_t *result, char *uri,
char *uri_end, bool *has_authority)
{
assert(uri);
assert(!has_authority);
/* cannot have empty scheme */
if (uri[0] == ':') {
return NULL;
}
char *p = _strchrb(uri, uri_end, ':');
result->scheme = uri;
result->scheme_len = p - uri;
/* check if authority part exists '://' */
if ((p[1] != '\0') && (p[2] != '\0') && (p[1] == '/') && (p[2] == '/')) {
*has_authority = true;
/* skip '://' */
return p + 3;
}
/* skip ':' */
return p + 1;
}
void _consume_userinfo(uri_parser_result_t *result, char *uri,
char *authority_end)
{
/* check for userinfo within authority */
char *userinfo_end = _strchrb(uri, authority_end, '@');
/* check if match */
if (userinfo_end) {
result->userinfo = uri;
result->userinfo_len = userinfo_end - uri;
/* shift host part beyond userinfo and '@' */
result->host += result->userinfo_len + 1;
result->host_len -= result->userinfo_len + 1;
}
}
bool _consume_port(uri_parser_result_t *result, char *ipv6_end,
char *authority_end)
{
/* check for port after host part */
char *port_begin = NULL;
/* repeat until last ':' in authority section */
/* if ipv6 address, check after ipv6 end marker */
char *p = (ipv6_end ? ipv6_end : result->host);
while (p != NULL && (p < authority_end)) {
port_begin = p;
p = _strchrb(p + 1, authority_end, ':');
}
/* check if match */
if (port_begin && (port_begin[0] == ':')) {
/* port should be at least one character, => + 1 */
if (port_begin + 1 == authority_end) {
return false;
}
result->port = port_begin + 1;
result->port_len = authority_end - result->port;
/* cut host part before port and ':' */
result->host_len -= result->port_len + 1;
}
return true;
}
static char *_consume_authority(uri_parser_result_t *result, char *uri,
char *uri_end)
{
assert(uri);
/* search until first '/' */
char *authority_end = _strchrb(uri, uri_end, '/');
if (!authority_end) {
authority_end = uri_end;
}
result->host = uri;
result->host_len = authority_end - uri;
/* consume userinfo, if available */
_consume_userinfo(result, uri, authority_end);
char *ipv6_end = NULL;
/* validate IPv6 form */
if (result->host[0] == '[') {
ipv6_end = _strchrb(result->host, uri_end, ']');
/* found end marker of IPv6 form beyond authority part */
if (ipv6_end >= authority_end) {
return NULL;
}
}
/* consume port, if available */
if (!_consume_port(result, ipv6_end, authority_end)) {
return NULL;
}
/* do not allow empty host if userinfo or port are set */
if ((result->host_len == 0) &&
(result->userinfo || result->port)) {
return NULL;
}
/* this includes the '/' */
return authority_end;
}
static char *_consume_path(uri_parser_result_t *result, char *uri,
char *uri_end)
{
assert(uri);
result->path = uri;
result->path_len = (uri_end - uri);
/* check for query start '?' */
char *path_end = _strchrb(uri, uri_end, '?');
/* no query string found, return! */
if (!path_end) {
return (result->path + result->path_len);
}
/* there is a query string */
result->query = path_end + 1;
/* do not count '?' */
result->query_len = result->path_len - (path_end - uri) - 1;
/* cut path part before query and '?' */
result->path_len -= result->query_len + 1;
return (result->query + result->query_len);
}
static int _parse_relative(uri_parser_result_t *result, char *uri,
char *uri_end)
{
/* we expect '\0', i.e., end of string */
uri = _consume_path(result, uri, uri_end);
if (uri[0] != '\0') {
return -1;
}
return 0;
}
static int _parse_absolute(uri_parser_result_t *result, char *uri,
char *uri_end)
{
bool has_authority = false;
uri = _consume_scheme(result, uri, uri_end, &has_authority);
if (uri == NULL) {
return -1;
}
if (has_authority) {
uri = _consume_authority(result, uri, uri_end);
if (uri == NULL) {
return -1;
}
}
/* parsing the path, starting with '/' */
return _parse_relative(result, uri, uri_end);
}
bool uri_parser_is_absolute(const char *uri, size_t uri_len)
{
char *colon = _strchrb((char *)uri, (char *)(uri + uri_len), ':');
/* potentially absolute, if ':' exists */
if (colon) {
/* first character should be ALPHA */
if (!(((uri[0] >= 'A') && (uri[0] <= 'Z')) ||
((uri[0] >= 'a') && (uri[0] <= 'z')))) {
/* relative */
return false;
}
/* absolute */
return true;
}
/* relative */
return false;
}
bool uri_parser_is_absolute_string(const char *uri)
{
return uri_parser_is_absolute(uri, strlen(uri));
}
int uri_parser_process(uri_parser_result_t *result, const char *uri,
size_t uri_len)
{
/* uri cannot be empty */
if ((NULL == uri) || (uri[0] == '\0')) {
return -1;
}
memset(result, 0, sizeof(*result));
if (uri_parser_is_absolute(uri, uri_len)) {
return _parse_absolute(result, (char *)uri, (char *)(uri + uri_len));
}
else {
return _parse_relative(result, (char *)uri, (char *)(uri + uri_len));
}
return 0;
}
int uri_parser_process_string(uri_parser_result_t *result, const char *uri)
{
return uri_parser_process(result, uri, strlen(uri));
}

View File

@ -0,0 +1 @@
include $(RIOTBASE)/Makefile.base

View File

@ -0,0 +1 @@
USEMODULE += uri_parser

View File

@ -0,0 +1,309 @@
/*
* Copyright (C) 2020 HAW Hamburg
*
* This file is subject to the terms and conditions of the GNU Lesser
* General Public License v2.1. See the file LICENSE in the top level
* directory for more details.
*/
/**
* @{
*
* @file
*/
#include <stdio.h>
#include "embUnit.h"
#include "uri_parser.h"
#include "unittests-constants.h"
#include "tests-uri_parser.h"
#define VEC(u, s, us, h, po, pa, q, e) \
{ .uri = u, .scheme = s, .userinfo = us, .host = h, .port = po, \
.path = pa, .query = q, .expected = e}
#define VEC_CHECK(comp) \
do { \
if (ures.comp == NULL) { \
TEST_ASSERT(validate_uris[i].comp[0] == '\0'); \
} \
else { \
TEST_ASSERT_EQUAL_INT(strlen(validate_uris[i].comp), \
ures.comp##_len); \
TEST_ASSERT_EQUAL_INT(0, \
memcmp(ures.comp, \
validate_uris[i].comp, \
strlen(validate_uris[i].comp))); \
} \
} while (0)
typedef struct {
char uri[64];
char scheme[8];
char userinfo[16];
char host[16];
char port[32];
char path[48];
char query[32];
int expected;
} validate_t;
/*
VEC(uri_to_parse,
scheme, userinfo, host, port,
path, query, expected return value)
*/
static const validate_t validate_uris[26] = {
/* uri to parse */
VEC("coap://RIOT:test@[2001:db8::1]:5683/.well-known/core?v=1",
/* parsed scheme */
"coap",
/* parsed userinfo */
"RIOT:test",
/* parsed host */
"[2001:db8::1]",
/* parsed port */
"5683",
/* parsed path */
"/.well-known/core",
/* parsed query */
"v=1",
/* expected return value */
0),
VEC("/.well-known/core?v=1",
"",
"",
"",
"",
"/.well-known/core",
"v=1",
0),
VEC("coap://R@[2001:db8::1]:5own/v=1",
"coap",
"R",
"[2001:db8::1]",
"5own",
"/v=1",
"",
0),
VEC("coap://R@[2001:db8::1]:5own/:v=1",
"coap",
"R",
"[2001:db8::1]",
"5own",
"/:v=1",
"",
0),
VEC("cap://R@[2001:db8::1]:5own/?v=1",
"cap",
"R",
"[2001:db8::1]",
"5own",
"/",
"v=1",
0),
VEC("oap://Y2001:db8::1]:5own/av=1",
"oap",
"",
"Y2001:db8::1]",
"5own",
"/av=1",
"",
0),
VEC("//Rb[ʰ00J:d/5v=0",
"",
"",
"",
"",
"//Rb[ʰ00J:d/5v=0",
"",
0),
VEC("coap://oap://P@[2001:b",
"",
"",
"",
"",
"",
"",
-1),
VEC("coap:///R@[2008::1]:5own//R@[2008::1]:5own/?v=1",
"coap",
"",
"",
"",
"/R@[2008::1]:5own//R@[2008::1]:5own/",
"v=1",
0),
VEC("coaP://R/RZ[2001[8:01[8::1]:5o:1]:5oTMv=1",
"coaP",
"",
"R",
"",
"/RZ[2001[8:01[8::1]:5o:1]:5oTMv=1",
"",
0),
VEC("coap://R@////////////////7///v=1",
"",
"",
"",
"",
"",
"",
-1),
VEC("coa[:////[2001:db5ow:5own/Ov=1",
"coa[",
"",
"",
"",
"//[2001:db5ow:5own/Ov=1",
"",
0),
VEC("tel:+1-816-555-1212",
"tel",
"",
"",
"",
"+1-816-555-1212",
"",
0),
VEC("sms:+15105550101,+15105550102?body=hello%20there",
"sms",
"",
"",
"",
"+15105550101,+15105550102",
"body=hello%20there",
0),
VEC("a",
"",
"",
"",
"",
"a",
"",
0),
VEC("mailto:test@example.com",
"mailto",
"",
"",
"",
"test@example.com",
"",
0),
VEC("ftp://ftp.is.co.za/rfc/rfc1808.txt",
"ftp",
"",
"ftp.is.co.za",
"",
"/rfc/rfc1808.txt",
"",
0),
VEC("http://www.ietf.org/rfc/rfc2396.txt",
"http",
"",
"www.ietf.org",
"",
"/rfc/rfc2396.txt",
"",
0),
VEC("ldap://[2001:db8::7]/c=GB?objectClass?one",
"ldap",
"",
"[2001:db8::7]",
"",
"/c=GB",
"objectClass?one",
0),
VEC("mailto:John.Doe@example.com",
"mailto",
"",
"",
"",
"John.Doe@example.com",
"",
0),
VEC("news:comp.infosystems.www.servers.unix",
"news",
"",
"",
"",
"comp.infosystems.www.servers.unix",
"",
0),
VEC("tel:+1-816-555-1212",
"tel",
"",
"",
"",
"+1-816-555-1212",
"",
0),
VEC("telnet://192.0.2.16:80/",
"telnet",
"",
"192.0.2.16",
"80",
"/",
"",
0),
VEC("urn:oasis:names:specification:docbook:dtd:xml:4.1.2",
"urn",
"",
"",
"",
"oasis:names:specification:docbook:dtd:xml:4.1.2",
"",
0),
VEC("",
"",
"",
"",
"",
"",
"",
-1),
VEC("/",
"",
"",
"",
"",
"/",
"",
0),
};
static void test_uri_parser__validate(void)
{
uri_parser_result_t ures;
for (unsigned i = 0; i < ARRAY_SIZE(validate_uris); i++) {
int res = uri_parser_process_string(&ures, validate_uris[i].uri);
TEST_ASSERT_EQUAL_INT(validate_uris[i].expected, res);
if (res == 0) {
VEC_CHECK(scheme);
VEC_CHECK(userinfo);
VEC_CHECK(host);
VEC_CHECK(port);
VEC_CHECK(path);
VEC_CHECK(query);
}
}
}
Test *tests_uri_parser_tests(void)
{
EMB_UNIT_TESTFIXTURES(fixtures) {
new_TestFixture(test_uri_parser__validate),
};
EMB_UNIT_TESTCALLER(uri_parser_tests, NULL, NULL, fixtures);
return (Test *)&uri_parser_tests;
}
void tests_uri_parser(void)
{
TESTS_RUN(tests_uri_parser_tests());
}
/** @} */

View File

@ -0,0 +1,37 @@
/*
* Copyright (C) 2020 HAW Hamburg
*
* This file is subject to the terms and conditions of the GNU Lesser
* General Public License v2.1. See the file LICENSE in the top level
* directory for more details.
*/
/**
* @addtogroup unittests
* @{
*
* @file
* @brief Unit tests for the uri_parser module
*
* @author Cenk Gündoğan <cenk.guendogan@haw-hamburg.de>
*/
#ifndef TESTS_URI_PARSER_H
#define TESTS_URI_PARSER_H
#include "embUnit.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief The entry point of this test suite.
*/
void tests_uri_parser(void);
#ifdef __cplusplus
}
#endif
#endif /* TESTS_URI_PARSER_H */
/** @} */