diff options
author | ncteisen <ncteisen@gmail.com> | 2018-10-31 14:14:09 -0700 |
---|---|---|
committer | ncteisen <ncteisen@gmail.com> | 2018-10-31 14:14:09 -0700 |
commit | 1a3c2d3e3ae3e7ab242c486db7e6405c9b7f8999 (patch) | |
tree | ccec148b8cb7494fee8eed6d5e3b8ea97518dd49 /src/core/lib/uri | |
parent | 1e64fa629c75ec2cf3be6dd40985f9517c4d33bc (diff) |
Move uri_parser to lib
Diffstat (limited to 'src/core/lib/uri')
-rw-r--r-- | src/core/lib/uri/uri_parser.cc | 314 | ||||
-rw-r--r-- | src/core/lib/uri/uri_parser.h | 50 |
2 files changed, 364 insertions, 0 deletions
diff --git a/src/core/lib/uri/uri_parser.cc b/src/core/lib/uri/uri_parser.cc new file mode 100644 index 0000000000..f212c7d2c0 --- /dev/null +++ b/src/core/lib/uri/uri_parser.cc @@ -0,0 +1,314 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include <grpc/support/port_platform.h> + +#include "src/core/lib/uri/uri_parser.h" + +#include <string.h> + +#include <grpc/slice_buffer.h> +#include <grpc/support/alloc.h> +#include <grpc/support/log.h> +#include <grpc/support/string_util.h> + +#include "src/core/lib/gpr/string.h" +#include "src/core/lib/slice/percent_encoding.h" +#include "src/core/lib/slice/slice_internal.h" +#include "src/core/lib/slice/slice_string_helpers.h" + +/** a size_t default value... maps to all 1's */ +#define NOT_SET (~(size_t)0) + +static grpc_uri* bad_uri(const char* uri_text, size_t pos, const char* section, + bool suppress_errors) { + char* line_prefix; + size_t pfx_len; + + if (!suppress_errors) { + gpr_asprintf(&line_prefix, "bad uri.%s: '", section); + pfx_len = strlen(line_prefix) + pos; + gpr_log(GPR_ERROR, "%s%s'", line_prefix, uri_text); + gpr_free(line_prefix); + + line_prefix = static_cast<char*>(gpr_malloc(pfx_len + 1)); + memset(line_prefix, ' ', pfx_len); + line_prefix[pfx_len] = 0; + gpr_log(GPR_ERROR, "%s^ here", line_prefix); + gpr_free(line_prefix); + } + + return nullptr; +} + +/** Returns a copy of percent decoded \a src[begin, end) */ +static char* decode_and_copy_component(const char* src, size_t begin, + size_t end) { + grpc_slice component = + (begin == NOT_SET || end == NOT_SET) + ? grpc_empty_slice() + : grpc_slice_from_copied_buffer(src + begin, end - begin); + grpc_slice decoded_component = + grpc_permissive_percent_decode_slice(component); + char* out = grpc_dump_slice(decoded_component, GPR_DUMP_ASCII); + grpc_slice_unref_internal(component); + grpc_slice_unref_internal(decoded_component); + return out; +} + +static bool valid_hex(char c) { + return ((c >= 'a') && (c <= 'f')) || ((c >= 'A') && (c <= 'F')) || + ((c >= '0') && (c <= '9')); +} + +/** Returns how many chars to advance if \a uri_text[i] begins a valid \a pchar + * production. If \a uri_text[i] introduces an invalid \a pchar (such as percent + * sign not followed by two hex digits), NOT_SET is returned. */ +static size_t parse_pchar(const char* uri_text, size_t i) { + /* pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * pct-encoded = "%" HEXDIG HEXDIG + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + / "*" / "+" / "," / ";" / "=" */ + char c = uri_text[i]; + switch (c) { + default: + if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || + ((c >= '0') && (c <= '9'))) { + return 1; + } + break; + case ':': + case '@': + case '-': + case '.': + case '_': + case '~': + case '!': + case '$': + case '&': + case '\'': + case '(': + case ')': + case '*': + case '+': + case ',': + case ';': + case '=': + return 1; + case '%': /* pct-encoded */ + if (valid_hex(uri_text[i + 1]) && valid_hex(uri_text[i + 2])) { + return 2; + } + return NOT_SET; + } + return 0; +} + +/* *( pchar / "?" / "/" ) */ +static int parse_fragment_or_query(const char* uri_text, size_t* i) { + char c; + while ((c = uri_text[*i]) != 0) { + const size_t advance = parse_pchar(uri_text, *i); /* pchar */ + switch (advance) { + case 0: /* uri_text[i] isn't in pchar */ + /* maybe it's ? or / */ + if (uri_text[*i] == '?' || uri_text[*i] == '/') { + (*i)++; + break; + } else { + return 1; + } + GPR_UNREACHABLE_CODE(return 0); + default: + (*i) += advance; + break; + case NOT_SET: /* uri_text[i] introduces an invalid URI */ + return 0; + } + } + /* *i is the first uri_text position past the \a query production, maybe \0 */ + return 1; +} + +static void parse_query_parts(grpc_uri* uri) { + static const char* QUERY_PARTS_SEPARATOR = "&"; + static const char* QUERY_PARTS_VALUE_SEPARATOR = "="; + GPR_ASSERT(uri->query != nullptr); + if (uri->query[0] == '\0') { + uri->query_parts = nullptr; + uri->query_parts_values = nullptr; + uri->num_query_parts = 0; + return; + } + + gpr_string_split(uri->query, QUERY_PARTS_SEPARATOR, &uri->query_parts, + &uri->num_query_parts); + uri->query_parts_values = + static_cast<char**>(gpr_malloc(uri->num_query_parts * sizeof(char**))); + for (size_t i = 0; i < uri->num_query_parts; i++) { + char** query_param_parts; + size_t num_query_param_parts; + char* full = uri->query_parts[i]; + gpr_string_split(full, QUERY_PARTS_VALUE_SEPARATOR, &query_param_parts, + &num_query_param_parts); + GPR_ASSERT(num_query_param_parts > 0); + uri->query_parts[i] = query_param_parts[0]; + if (num_query_param_parts > 1) { + /* TODO(dgq): only the first value after the separator is considered. + * Perhaps all chars after the first separator for the query part should + * be included, even if they include the separator. */ + uri->query_parts_values[i] = query_param_parts[1]; + } else { + uri->query_parts_values[i] = nullptr; + } + for (size_t j = 2; j < num_query_param_parts; j++) { + gpr_free(query_param_parts[j]); + } + gpr_free(query_param_parts); + gpr_free(full); + } +} + +grpc_uri* grpc_uri_parse(const char* uri_text, bool suppress_errors) { + grpc_uri* uri; + size_t scheme_begin = 0; + size_t scheme_end = NOT_SET; + size_t authority_begin = NOT_SET; + size_t authority_end = NOT_SET; + size_t path_begin = NOT_SET; + size_t path_end = NOT_SET; + size_t query_begin = NOT_SET; + size_t query_end = NOT_SET; + size_t fragment_begin = NOT_SET; + size_t fragment_end = NOT_SET; + size_t i; + + for (i = scheme_begin; uri_text[i] != 0; i++) { + if (uri_text[i] == ':') { + scheme_end = i; + break; + } + if (uri_text[i] >= 'a' && uri_text[i] <= 'z') continue; + if (uri_text[i] >= 'A' && uri_text[i] <= 'Z') continue; + if (i != scheme_begin) { + if (uri_text[i] >= '0' && uri_text[i] <= '9') continue; + if (uri_text[i] == '+') continue; + if (uri_text[i] == '-') continue; + if (uri_text[i] == '.') continue; + } + break; + } + if (scheme_end == NOT_SET) { + return bad_uri(uri_text, i, "scheme", suppress_errors); + } + + if (uri_text[scheme_end + 1] == '/' && uri_text[scheme_end + 2] == '/') { + authority_begin = scheme_end + 3; + for (i = authority_begin; uri_text[i] != 0 && authority_end == NOT_SET; + i++) { + if (uri_text[i] == '/' || uri_text[i] == '?' || uri_text[i] == '#') { + authority_end = i; + } + } + if (authority_end == NOT_SET && uri_text[i] == 0) { + authority_end = i; + } + if (authority_end == NOT_SET) { + return bad_uri(uri_text, i, "authority", suppress_errors); + } + /* TODO(ctiller): parse the authority correctly */ + path_begin = authority_end; + } else { + path_begin = scheme_end + 1; + } + + for (i = path_begin; uri_text[i] != 0; i++) { + if (uri_text[i] == '?' || uri_text[i] == '#') { + path_end = i; + break; + } + } + if (path_end == NOT_SET && uri_text[i] == 0) { + path_end = i; + } + if (path_end == NOT_SET) { + return bad_uri(uri_text, i, "path", suppress_errors); + } + + if (uri_text[i] == '?') { + query_begin = ++i; + if (!parse_fragment_or_query(uri_text, &i)) { + return bad_uri(uri_text, i, "query", suppress_errors); + } else if (uri_text[i] != 0 && uri_text[i] != '#') { + /* We must be at the end or at the beginning of a fragment */ + return bad_uri(uri_text, i, "query", suppress_errors); + } + query_end = i; + } + if (uri_text[i] == '#') { + fragment_begin = ++i; + if (!parse_fragment_or_query(uri_text, &i)) { + return bad_uri(uri_text, i - fragment_end, "fragment", suppress_errors); + } else if (uri_text[i] != 0) { + /* We must be at the end */ + return bad_uri(uri_text, i, "fragment", suppress_errors); + } + fragment_end = i; + } + + uri = static_cast<grpc_uri*>(gpr_zalloc(sizeof(*uri))); + uri->scheme = decode_and_copy_component(uri_text, scheme_begin, scheme_end); + uri->authority = + decode_and_copy_component(uri_text, authority_begin, authority_end); + uri->path = decode_and_copy_component(uri_text, path_begin, path_end); + uri->query = decode_and_copy_component(uri_text, query_begin, query_end); + uri->fragment = + decode_and_copy_component(uri_text, fragment_begin, fragment_end); + parse_query_parts(uri); + + return uri; +} + +const char* grpc_uri_get_query_arg(const grpc_uri* uri, const char* key) { + GPR_ASSERT(key != nullptr); + if (key[0] == '\0') return nullptr; + + for (size_t i = 0; i < uri->num_query_parts; ++i) { + if (0 == strcmp(key, uri->query_parts[i])) { + return uri->query_parts_values[i]; + } + } + return nullptr; +} + +void grpc_uri_destroy(grpc_uri* uri) { + if (!uri) return; + gpr_free(uri->scheme); + gpr_free(uri->authority); + gpr_free(uri->path); + gpr_free(uri->query); + for (size_t i = 0; i < uri->num_query_parts; ++i) { + gpr_free(uri->query_parts[i]); + gpr_free(uri->query_parts_values[i]); + } + gpr_free(uri->query_parts); + gpr_free(uri->query_parts_values); + gpr_free(uri->fragment); + gpr_free(uri); +} diff --git a/src/core/lib/uri/uri_parser.h b/src/core/lib/uri/uri_parser.h new file mode 100644 index 0000000000..c13778bc55 --- /dev/null +++ b/src/core/lib/uri/uri_parser.h @@ -0,0 +1,50 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#ifndef GRPC_CORE_EXT_URI_URI_PARSER_H +#define GRPC_CORE_EXT_URI_URI_PARSER_H + +#include <grpc/support/port_platform.h> + +#include <stddef.h> + +typedef struct { + char* scheme; + char* authority; + char* path; + char* query; + /** Query substrings separated by '&' */ + char** query_parts; + /** Number of elements in \a query_parts and \a query_parts_values */ + size_t num_query_parts; + /** Split each query part by '='. NULL if not present. */ + char** query_parts_values; + char* fragment; +} grpc_uri; + +/** parse a uri, return NULL on failure */ +grpc_uri* grpc_uri_parse(const char* uri_text, bool suppress_errors); + +/** return the part of a query string after the '=' in "?key=xxx&...", or NULL + * if key is not present */ +const char* grpc_uri_get_query_arg(const grpc_uri* uri, const char* key); + +/** destroy a uri */ +void grpc_uri_destroy(grpc_uri* uri); + +#endif /* GRPC_CORE_EXT_URI_URI_PARSER_H */ |