diff options
author | David Garcia Quintas <dgq@google.com> | 2015-09-07 11:28:58 -0700 |
---|---|---|
committer | David Garcia Quintas <dgq@google.com> | 2015-09-07 15:24:31 -0700 |
commit | 634daca1a02707f5db11b9433359a22b68c75de2 (patch) | |
tree | 8a9a3cb59d47dcd9c9b1614ee01b571303b2b243 | |
parent | 02128e9bbf92c3900f0927a724482b5b3ff7541f (diff) |
Added queries and fragments to uri parser
-rw-r--r-- | src/core/client_config/uri_parser.c | 127 | ||||
-rw-r--r-- | src/core/client_config/uri_parser.h | 2 | ||||
-rw-r--r-- | test/core/client_config/uri_parser_test.c | 37 |
3 files changed, 139 insertions, 27 deletions
diff --git a/src/core/client_config/uri_parser.c b/src/core/client_config/uri_parser.c index 410a61c8cf..34b968a565 100644 --- a/src/core/client_config/uri_parser.c +++ b/src/core/client_config/uri_parser.c @@ -60,13 +60,80 @@ static grpc_uri *bad_uri(const char *uri_text, int pos, const char *section, return NULL; } -static char *copy_fragment(const char *src, int begin, int end) { +/** Returns a copy of \a src[begin, end) */ +static char *copy_component(const char *src, int begin, int end) { char *out = gpr_malloc(end - begin + 1); memcpy(out, src + begin, end - begin); out[end - begin] = 0; return out; } +/** Returns how many chars to advance if \a uri_text[i] begins a valid \a pchar + * production. If \a uri_text[i] introduces an invalid \a pchar (such as percent + * sign not followed by two hex digits), -1 is returned. */ +static int parse_pchar(const char *uri_text, int i) { + /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * pct-encoded = "%" HEXDIG HEXDIG + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" */ + char c = uri_text[i]; + if ( ((c >= 'A') && (c <= 'Z')) || + ((c >= 'a') && (c <= 'z')) || + ((c >= '0') && (c <= '9')) || + (c == '-' || c == '.' || c == '_' || c == '~') || /* unreserved */ + + (c == '!' || c == '$' || c == '&' || c == '\'' || c == '$' || c == '&' || + c == '(' || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' || + c == '=') /* sub-delims */ ) { + return 1; + } + if (c == '%') { /* pct-encoded */ + int j; + if (uri_text[i+1] == 0 || uri_text[i+2] == 0) { + return -1; + } + for (j = i + 1; j < 2; j++) { + c = uri_text[j]; + if (!(((c >= '0') && (c <= '9')) || + ((c >= 'a') && (c <= 'f')) || + ((c >= 'A') && (c <= 'F')))) { + return -1; + } + } + return 2; + } + return 0; +} + +/* *( pchar / "?" / "/" ) */ +static int parse_query(const char *uri_text, int i) { + char c; + while ((c = uri_text[i]) != 0) { + const int advance = parse_pchar(uri_text, i); /* pchar */ + switch (advance) { + case 0: /* uri_text[i] isn't in pchar */ + /* maybe it's ? or / */ + if (uri_text[i] == '?' || uri_text[i] == '/') { + i++; + break; + } else { + return i; + } + case 1: + case 2: + i += advance; + break; + default: /* uri_text[i] introduces an invalid URI */ + return -i; + } + } + return i; /* first uri_text position past the \a query production, maybe \0 */ +} + +/* alias for consistency */ +static int (*parse_fragment)(const char *uri_text, int i) = parse_query; + grpc_uri *grpc_uri_parse(const char *uri_text, int suppress_errors) { grpc_uri *uri; int scheme_begin = 0; @@ -75,6 +142,10 @@ grpc_uri *grpc_uri_parse(const char *uri_text, int suppress_errors) { int authority_end = -1; int path_begin = -1; int path_end = -1; + int query_begin = -1; + int query_end = -1; + int fragment_begin = -1; + int fragment_end = -1; int i; for (i = scheme_begin; uri_text[i] != 0; i++) { @@ -99,15 +170,9 @@ grpc_uri *grpc_uri_parse(const char *uri_text, int suppress_errors) { if (uri_text[scheme_end + 1] == '/' && uri_text[scheme_end + 2] == '/') { authority_begin = scheme_end + 3; for (i = authority_begin; uri_text[i] != 0 && authority_end == -1; i++) { - if (uri_text[i] == '/') { + if (uri_text[i] == '/' || uri_text[i] == '?' || uri_text[i] == '#') { authority_end = i; } - if (uri_text[i] == '?') { - return bad_uri(uri_text, i, "query_not_supported", suppress_errors); - } - if (uri_text[i] == '#') { - return bad_uri(uri_text, i, "fragment_not_supported", suppress_errors); - } } if (authority_end == -1 && uri_text[i] == 0) { authority_end = i; @@ -122,20 +187,48 @@ grpc_uri *grpc_uri_parse(const char *uri_text, int suppress_errors) { } for (i = path_begin; uri_text[i] != 0; i++) { - if (uri_text[i] == '?') { - return bad_uri(uri_text, i, "query_not_supported", suppress_errors); + if (uri_text[i] == '?' || uri_text[i] == '#') { + path_end = i; + break; + } + } + if (path_end == -1 && uri_text[i] == 0) { + path_end = i; + } + if (path_end == -1) { + return bad_uri(uri_text, i, "path", suppress_errors); + } + + if (uri_text[i] == '?') { + query_begin = i + 1; + i = parse_query(uri_text, query_begin); + if (i < 0) { + return bad_uri(uri_text, -i, "query", suppress_errors); + } else if (uri_text[i] != 0 && uri_text[i] != '#') { + /* We must be at the end or at the beginning of a fragment */ + return bad_uri(uri_text, i, "query", suppress_errors); } - if (uri_text[i] == '#') { - return bad_uri(uri_text, i, "fragment_not_supported", suppress_errors); + query_end = i; + } + if (uri_text[i] == '#') { + fragment_begin = i + 1; + i = parse_fragment(uri_text, fragment_begin); + if (i < 0) { + return bad_uri(uri_text, i - fragment_end, "fragment", suppress_errors); + } else if (uri_text[i] != 0) { + /* We must be at the end */ + return bad_uri(uri_text, i, "fragment", suppress_errors); } + fragment_end = i; } - path_end = i; uri = gpr_malloc(sizeof(*uri)); memset(uri, 0, sizeof(*uri)); - uri->scheme = copy_fragment(uri_text, scheme_begin, scheme_end); - uri->authority = copy_fragment(uri_text, authority_begin, authority_end); - uri->path = copy_fragment(uri_text, path_begin, path_end); + uri->scheme = copy_component(uri_text, scheme_begin, scheme_end); + uri->authority = copy_component(uri_text, authority_begin, authority_end); + uri->path = copy_component(uri_text, path_begin, path_end); + uri->query = copy_component(uri_text, query_begin, query_end); + uri->fragment = copy_component(uri_text, fragment_begin, fragment_end); return uri; } @@ -145,5 +238,7 @@ void grpc_uri_destroy(grpc_uri *uri) { gpr_free(uri->scheme); gpr_free(uri->authority); gpr_free(uri->path); + gpr_free(uri->query); + gpr_free(uri->fragment); gpr_free(uri); } diff --git a/src/core/client_config/uri_parser.h b/src/core/client_config/uri_parser.h index ce4e6aecb0..b8daa13bd4 100644 --- a/src/core/client_config/uri_parser.h +++ b/src/core/client_config/uri_parser.h @@ -38,6 +38,8 @@ typedef struct { char *scheme; char *authority; char *path; + char *query; + char *fragment; } grpc_uri; /** parse a uri, return NULL on failure */ diff --git a/test/core/client_config/uri_parser_test.c b/test/core/client_config/uri_parser_test.c index d324029c7e..580c18b699 100644 --- a/test/core/client_config/uri_parser_test.c +++ b/test/core/client_config/uri_parser_test.c @@ -40,12 +40,15 @@ #include "test/core/util/test_config.h" static void test_succeeds(const char *uri_text, const char *scheme, - const char *authority, const char *path) { + const char *authority, const char *path, + const char *query, const char* fragment) { grpc_uri *uri = grpc_uri_parse(uri_text, 0); GPR_ASSERT(uri); GPR_ASSERT(0 == strcmp(scheme, uri->scheme)); GPR_ASSERT(0 == strcmp(authority, uri->authority)); GPR_ASSERT(0 == strcmp(path, uri->path)); + GPR_ASSERT(0 == strcmp(query, uri->query)); + GPR_ASSERT(0 == strcmp(fragment, uri->fragment)); grpc_uri_destroy(uri); } @@ -55,17 +58,29 @@ static void test_fails(const char *uri_text) { int main(int argc, char **argv) { grpc_test_init(argc, argv); - test_succeeds("http://www.google.com", "http", "www.google.com", ""); - test_succeeds("dns:///foo", "dns", "", "/foo"); - test_succeeds("http://www.google.com:90", "http", "www.google.com:90", ""); - test_succeeds("a192.4-df:foo.coom", "a192.4-df", "", "foo.coom"); - test_succeeds("a+b:foo.coom", "a+b", "", "foo.coom"); + test_succeeds("http://www.google.com", "http", "www.google.com", "", "", ""); + test_succeeds("dns:///foo", "dns", "", "/foo", "", ""); + test_succeeds("http://www.google.com:90", "http", "www.google.com:90", "", "", + ""); + test_succeeds("a192.4-df:foo.coom", "a192.4-df", "", "foo.coom", "", ""); + test_succeeds("a+b:foo.coom", "a+b", "", "foo.coom", "", ""); test_succeeds("zookeeper://127.0.0.1:2181/foo/bar", "zookeeper", - "127.0.0.1:2181", "/foo/bar"); + "127.0.0.1:2181", "/foo/bar", "", ""); + test_succeeds("http://www.google.com?yay-i'm-using-queries", "http", + "www.google.com", "", "yay-i'm-using-queries", ""); + test_succeeds("dns:foo.com#fragment-all-the-things", "dns", "", "foo.com", "", + "fragment-all-the-things"); + test_succeeds("http:?legit", "http", "", "", "legit", ""); + test_succeeds("unix:#this-is-ok-too", "unix", "", "", "", "this-is-ok-too"); + test_succeeds("http:?legit#twice", "http", "", "", "legit", "twice"); + test_succeeds("http://foo?bar#lol?", "http", "foo", "", "bar", "lol?"); + test_succeeds("http://foo?bar#lol?/", "http", "foo", "", "bar", "lol?/"); + test_fails("xyz"); - test_fails("http://www.google.com?why-are-you-using-queries"); - test_fails("dns:foo.com#fragments-arent-supported-here"); - test_fails("http:?huh"); - test_fails("unix:#yeah-right"); + test_fails("http:?dangling-pct-%0"); + test_fails("http://foo?[bar]"); + test_fails("http://foo?x[bar]"); + test_fails("http://foo?bar#lol#"); + return 0; } |