diff --git a/src/core/uri/include/sourcemeta/core/uri.h b/src/core/uri/include/sourcemeta/core/uri.h index cce657529..418da9e08 100644 --- a/src/core/uri/include/sourcemeta/core/uri.h +++ b/src/core/uri/include/sourcemeta/core/uri.h @@ -469,6 +469,34 @@ class SOURCEMETA_CORE_URI_EXPORT URI { /// ``` static auto canonicalize(std::string_view input) -> std::string; + /// Check if the given string is a valid absolute URI (has a scheme) per + /// RFC 3986 without constructing a full URI object. For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// assert(sourcemeta::core::URI::is_uri("https://example.com/path")); + /// assert(!sourcemeta::core::URI::is_uri("://bad")); + /// assert(!sourcemeta::core::URI::is_uri("relative/path")); + /// ``` + [[nodiscard]] static auto is_uri(std::string_view input) noexcept -> bool; + + /// Check if the given string is a valid URI reference per RFC 3986 + /// (absolute or relative) without constructing a full URI object. + /// For example: + /// + /// ```cpp + /// #include + /// #include + /// + /// assert(sourcemeta::core::URI::is_uri_reference("https://example.com")); + /// assert(sourcemeta::core::URI::is_uri_reference("relative/path")); + /// assert(!sourcemeta::core::URI::is_uri_reference("://bad")); + /// ``` + [[nodiscard]] static auto is_uri_reference(std::string_view input) noexcept + -> bool; + private: auto parse(std::string_view input) -> void; diff --git a/src/core/uri/parse.cc b/src/core/uri/parse.cc index 7d18035dc..bb1f8bec5 100644 --- a/src/core/uri/parse.cc +++ b/src/core/uri/parse.cc @@ -3,12 +3,15 @@ #include "escaping.h" #include "grammar.h" -#include // assert -#include // std::isalnum, std::isxdigit, std::isalpha, std::isdigit -#include // std::uint64_t -#include // std::optional -#include // std::string, std::stoul +#include // assert +#include // std::isalnum, std::isxdigit, std::isalpha, std::isdigit +#include // std::uint64_t +#include // std::numeric_limits +#include // std::optional +#include // std::out_of_range +#include // std::string, std::stoul #include // std::string_view +#include // std::conditional_t namespace { @@ -86,12 +89,17 @@ auto validate_percent_encoded_utf8(const std::string_view input, return 3 * (1 + continuation_count); } +template auto parse_scheme(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { if (position >= input.size() || !std::isalpha(static_cast(input[position]))) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } const auto start = position; @@ -102,21 +110,35 @@ auto parse_scheme(const std::string_view input, } if (position < input.size() && input[position] == URI_COLON) { - std::string scheme{input.substr(start, position - start)}; - position += 1; - return scheme; + if constexpr (CheckOnly) { + position += 1; + return true; + } else { + std::string scheme{input.substr(start, position - start)}; + position += 1; + return scheme; + } } position = start; - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } +template auto parse_port(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { if (position >= input.size() || !std::isdigit(static_cast(input[position]))) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } const auto start = position; @@ -125,12 +147,23 @@ auto parse_port(const std::string_view input, position += 1; } - const std::string port_string{input.substr(start, position - start)}; - return std::stoul(port_string); + if constexpr (CheckOnly) { + return true; + } else { + try { + const std::string port_string{input.substr(start, position - start)}; + return std::stoul(port_string); + } catch (const std::out_of_range &) { + throw sourcemeta::core::URIParseError{ + static_cast(start + 1)}; + } + } } +template auto parse_ipv6(const std::string_view input, - std::string_view::size_type &position) -> std::string { + std::string_view::size_type &position) + -> std::conditional_t { assert(input[position] == URI_OPEN_BRACKET); const auto start = position; @@ -192,19 +225,34 @@ auto parse_ipv6(const std::string_view input, static_cast(start + 1)}; } - std::string ipv6{input.substr(start + 1, position - start - 1)}; - position += 1; - return ipv6; + if constexpr (CheckOnly) { + position += 1; + } else { + std::string ipv6{input.substr(start + 1, position - start - 1)}; + position += 1; + return ipv6; + } } +template auto parse_host(const std::string_view input, - std::string_view::size_type &position) -> std::string { + std::string_view::size_type &position) + -> std::conditional_t { if (position >= input.size()) { - return std::string{}; + if constexpr (!CheckOnly) { + return std::string{}; + } else { + return; + } } if (input[position] == URI_OPEN_BRACKET) { - return parse_ipv6(input, position); + if constexpr (CheckOnly) { + parse_ipv6(input, position); + return; + } else { + return parse_ipv6(input, position); + } } const auto start = position; @@ -226,23 +274,31 @@ auto parse_host(const std::string_view input, } } - if (position == start) { - return std::string{}; - } + if constexpr (!CheckOnly) { + if (position == start) { + return std::string{}; + } - return std::string{input.substr(start, position - start)}; + return std::string{input.substr(start, position - start)}; + } } +template auto parse_userinfo(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { const auto start = position; while (position < input.size()) { const auto current = input[position]; if (current == URI_AT) { - std::string userinfo{input.substr(start, position - start)}; - position += 1; - return userinfo; + if constexpr (CheckOnly) { + position += 1; + return true; + } else { + std::string userinfo{input.substr(start, position - start)}; + position += 1; + return userinfo; + } } if (current == URI_PERCENT) { @@ -257,19 +313,32 @@ auto parse_userinfo(const std::string_view input, } position = start; - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } +template auto parse_path(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { if (position >= input.size()) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } const auto first_char = input[position]; if (first_char == URI_QUESTION || first_char == URI_HASH) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } const auto start = position; @@ -290,14 +359,23 @@ auto parse_path(const std::string_view input, } } - return std::string{input.substr(start, position - start)}; + if constexpr (CheckOnly) { + return true; + } else { + return std::string{input.substr(start, position - start)}; + } } +template auto parse_query(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { if (position >= input.size() || input[position] != URI_QUESTION) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } position += 1; @@ -321,14 +399,23 @@ auto parse_query(const std::string_view input, } } - return std::string{input.substr(start, position - start)}; + if constexpr (CheckOnly) { + return true; + } else { + return std::string{input.substr(start, position - start)}; + } } +template auto parse_fragment(const std::string_view input, std::string_view::size_type &position) - -> std::optional { + -> std::conditional_t> { if (position >= input.size() || input[position] != URI_HASH) { - return std::nullopt; + if constexpr (CheckOnly) { + return false; + } else { + return std::nullopt; + } } position += 1; @@ -349,59 +436,83 @@ auto parse_fragment(const std::string_view input, } } - return std::string{input.substr(start, position - start)}; + if constexpr (CheckOnly) { + return true; + } else { + return std::string{input.substr(start, position - start)}; + } } -} // namespace - -namespace sourcemeta::core { - +template auto parse_authority(const std::string_view input, std::string_view::size_type &position, - std::optional &userinfo, - std::optional &host, - std::optional &port) -> void { - auto userinfo_raw = parse_userinfo(input, position); - if (userinfo_raw.has_value()) { - uri_unescape_selective_inplace(userinfo_raw.value()); - userinfo = std::move(userinfo_raw.value()); - } + [[maybe_unused]] std::optional &userinfo, + [[maybe_unused]] std::optional &host, + [[maybe_unused]] std::optional &port) + -> void { + if constexpr (CheckOnly) { + parse_userinfo(input, position); + parse_host(input, position); + } else { + auto userinfo_raw = parse_userinfo(input, position); + if (userinfo_raw.has_value()) { + uri_unescape_selective_inplace(userinfo_raw.value()); + userinfo = std::move(userinfo_raw.value()); + } - auto host_raw = parse_host(input, position); - uri_unescape_selective_inplace(host_raw); - host = std::move(host_raw); + auto host_raw = parse_host(input, position); + uri_unescape_selective_inplace(host_raw); + host = std::move(host_raw); + } // RFC 3986: authority = [ userinfo "@" ] host [ ":" port ] // port = *DIGIT (empty port after colon is valid) if (position < input.size() && input[position] == URI_COLON) { position += 1; - const auto port_value = parse_port(input, position); - if (port_value.has_value()) { - port = port_value.value(); + if constexpr (CheckOnly) { + parse_port(input, position); + } else { + const auto port_start = position; + const auto port_value = parse_port(input, position); + if (port_value.has_value()) { + if (port_value.value() > std::numeric_limits::max()) { + throw sourcemeta::core::URIParseError{ + static_cast(port_start + 1)}; + } + + port = static_cast(port_value.value()); + } } } if (position < input.size() && input[position] == URI_AT) { - throw URIParseError{static_cast(position + 1)}; + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; } } -auto URI::parse(const std::string_view input) -> void { - assert(!this->scheme_.has_value()); - assert(!this->userinfo_.has_value()); - assert(!this->host_.has_value()); - assert(!this->port_.has_value()); - assert(!this->path_.has_value()); - assert(!this->query_.has_value()); - assert(!this->fragment_.has_value()); - +template +auto do_parse(const std::string_view input, + [[maybe_unused]] std::optional &scheme, + [[maybe_unused]] std::optional &userinfo, + [[maybe_unused]] std::optional &host, + [[maybe_unused]] std::optional &port, + [[maybe_unused]] std::optional &path, + [[maybe_unused]] std::optional &query, + [[maybe_unused]] std::optional &fragment) -> bool { if (input.empty()) { - return; + return false; } std::string_view::size_type position{0}; - this->scheme_ = parse_scheme(input, position); + bool has_scheme; + if constexpr (CheckOnly) { + has_scheme = parse_scheme(input, position); + } else { + scheme = parse_scheme(input, position); + has_scheme = scheme.has_value(); + } const auto has_authority = position + 1 < input.size() && input[position] == URI_SLASH && @@ -409,56 +520,127 @@ auto URI::parse(const std::string_view input) -> void { if (has_authority) { position += 2; - parse_authority(input, position, this->userinfo_, this->host_, this->port_); + parse_authority(input, position, userinfo, host, port); // RFC 3986: hier-part = "//" authority path-abempty // path-abempty = *( "/" segment ), so after authority the next character // must be "/", "?", "#", or end-of-input if (position < input.size() && input[position] != URI_SLASH && input[position] != URI_QUESTION && input[position] != URI_HASH) { - throw URIParseError{static_cast(position + 1)}; + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; } } - auto path = parse_path(input, position); + const auto path_start = position; + bool has_path; + if constexpr (CheckOnly) { + has_path = parse_path(input, position); + } else { + auto parsed_path = parse_path(input, position); + has_path = parsed_path.has_value(); + + if (has_path) { + // RFC 3986: relative-ref without authority uses path-noscheme, + // where the first segment must not contain a colon + if (!has_scheme && !has_authority) { + const auto &path_value = parsed_path.value(); + if (!path_value.empty() && path_value[0] != URI_SLASH) { + const auto first_slash = path_value.find(URI_SLASH); + const auto colon_pos = path_value.find(URI_COLON); + if (colon_pos != std::string::npos && + (first_slash == std::string::npos || colon_pos < first_slash)) { + throw sourcemeta::core::URIParseError{ + static_cast(colon_pos + 1)}; + } + } + } - if (path.has_value()) { - // RFC 3986: relative-ref without authority uses path-noscheme, - // where the first segment must not contain a colon - if (!this->scheme_.has_value() && !has_authority) { - const auto &path_value = path.value(); - if (!path_value.empty() && path_value[0] != URI_SLASH) { - const auto first_slash = path_value.find(URI_SLASH); - const auto colon_pos = path_value.find(URI_COLON); - if (colon_pos != std::string::npos && - (first_slash == std::string::npos || colon_pos < first_slash)) { - throw URIParseError{static_cast(colon_pos + 1)}; + uri_unescape_selective_inplace(parsed_path.value()); + path = std::move(parsed_path.value()); + } else if (has_authority || has_scheme) { + if (input.ends_with(URI_SLASH) || input == "/") { + path = "/"; + } + } + } + + if constexpr (CheckOnly) { + if (has_path && !has_scheme && !has_authority) { + if (input[path_start] != URI_SLASH) { + const auto path_view = input.substr(path_start, position - path_start); + const auto first_slash = path_view.find(URI_SLASH); + const auto colon_pos = path_view.find(URI_COLON); + if (colon_pos != std::string_view::npos && + (first_slash == std::string_view::npos || + colon_pos < first_slash)) { + throw sourcemeta::core::URIParseError{ + static_cast(path_start + colon_pos + 1)}; } } } + } + + if constexpr (CheckOnly) { + parse_query(input, position); + parse_fragment(input, position); + } else { + auto parsed_query = parse_query(input, position); + if (parsed_query.has_value()) { + uri_unescape_selective_inplace(parsed_query.value()); + query = std::move(parsed_query.value()); + } - uri_unescape_selective_inplace(path.value()); - this->path_ = std::move(path.value()); - } else if (has_authority || this->scheme_.has_value()) { - if (input.ends_with(URI_SLASH) || input == "/") { - this->path_ = "/"; + auto parsed_fragment = parse_fragment(input, position); + if (parsed_fragment.has_value()) { + uri_unescape_selective_inplace(parsed_fragment.value()); + fragment = std::move(parsed_fragment.value()); } } - auto query = parse_query(input, position); - if (query.has_value()) { - uri_unescape_selective_inplace(query.value()); - this->query_ = std::move(query.value()); + if (position < input.size()) { + throw sourcemeta::core::URIParseError{ + static_cast(position + 1)}; } - auto fragment = parse_fragment(input, position); - if (fragment.has_value()) { - uri_unescape_selective_inplace(fragment.value()); - this->fragment_ = std::move(fragment.value()); + return has_scheme; +} + +} // namespace + +namespace sourcemeta::core { + +auto URI::parse(const std::string_view input) -> void { + assert(!this->scheme_.has_value()); + assert(!this->userinfo_.has_value()); + assert(!this->host_.has_value()); + assert(!this->port_.has_value()); + assert(!this->path_.has_value()); + assert(!this->query_.has_value()); + assert(!this->fragment_.has_value()); + do_parse(input, this->scheme_, this->userinfo_, this->host_, + this->port_, this->path_, this->query_, this->fragment_); +} + +auto URI::is_uri(const std::string_view input) noexcept -> bool { + try { + std::optional scheme, userinfo, host, path, query, fragment; + std::optional port; + return do_parse(input, scheme, userinfo, host, port, path, query, + fragment); + } catch (...) { + return false; } +} - if (position < input.size()) { - throw URIParseError{static_cast(position + 1)}; +auto URI::is_uri_reference(const std::string_view input) noexcept -> bool { + try { + std::optional scheme, userinfo, host, path, query, fragment; + std::optional port; + do_parse(input, scheme, userinfo, host, port, path, query, fragment); + return true; + } catch (...) { + return false; } } diff --git a/test/uri/uri_parse_test.cc b/test/uri/uri_parse_test.cc index 09d34cbb8..e90cb48dc 100644 --- a/test/uri/uri_parse_test.cc +++ b/test/uri/uri_parse_test.cc @@ -8,6 +8,8 @@ TEST(URI_parse, syntax_error_1) { EXPECT_THROW(sourcemeta::core::URI uri{"//[::44.1"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("//[::44.1")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference("//[::44.1")); } // Inspired from @@ -16,11 +18,17 @@ TEST(URI_parse, syntax_error_1) { TEST(URI_parse, syntax_error_2) { EXPECT_THROW(sourcemeta::core::URI uri{"http://moo:21@moo:21@moo/"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://moo:21@moo:21@moo/")); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri_reference("http://moo:21@moo:21@moo/")); } TEST(URI_parse, syntax_error_3) { EXPECT_THROW(sourcemeta::core::URI uri{"http://moo:21@moo:21@moo:21/"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://moo:21@moo:21@moo:21/")); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri_reference("http://moo:21@moo:21@moo:21/")); } // Inspired from @@ -29,15 +37,24 @@ TEST(URI_parse, syntax_error_3) { TEST(URI_parse, syntax_error_4) { EXPECT_THROW(sourcemeta::core::URI uri{"http://[vA.123456"}, // missing "]" sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://[vA.123456")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference("http://[vA.123456")); } TEST(URI_parse, syntax_error_5) { EXPECT_THROW(sourcemeta::core::URI uri{"https://www.example.com#/foo%bar"}, sourcemeta::core::URIParseError); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri("https://www.example.com#/foo%bar")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com#/foo%bar")); } TEST(URI_parse, urn_with_slash) { // RFC 8141 explicitly allows "/" in URN NSS + EXPECT_TRUE(sourcemeta::core::URI::is_uri("urn:example:foo/bar/baz")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("urn:example:foo/bar/baz")); sourcemeta::core::URI uri{"urn:example:foo/bar/baz"}; EXPECT_EQ(uri.scheme().value(), "urn"); EXPECT_EQ(uri.path().value(), "example:foo/bar/baz"); @@ -46,6 +63,9 @@ TEST(URI_parse, urn_with_slash) { TEST(URI_parse, urn_with_numeric_path) { // Example from RFC 8141 + EXPECT_TRUE(sourcemeta::core::URI::is_uri("urn:example:1/406/47452/2")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("urn:example:1/406/47452/2")); sourcemeta::core::URI uri{"urn:example:1/406/47452/2"}; EXPECT_EQ(uri.scheme().value(), "urn"); EXPECT_EQ(uri.path().value(), "example:1/406/47452/2"); @@ -54,6 +74,8 @@ TEST(URI_parse, urn_with_numeric_path) { TEST(URI_parse, urn_with_query) { // RFC 8141 allows query components in URNs + EXPECT_TRUE(sourcemeta::core::URI::is_uri("urn:example:foo?+bar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("urn:example:foo?+bar")); sourcemeta::core::URI uri{"urn:example:foo?+bar"}; EXPECT_EQ(uri.scheme().value(), "urn"); EXPECT_EQ(uri.path().value(), "example:foo"); @@ -63,6 +85,8 @@ TEST(URI_parse, urn_with_query) { TEST(URI_parse, urn_with_fragment) { // RFC 8141 allows fragments in URNs + EXPECT_TRUE(sourcemeta::core::URI::is_uri("urn:example:foo#bar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("urn:example:foo#bar")); sourcemeta::core::URI uri{"urn:example:foo#bar"}; EXPECT_EQ(uri.scheme().value(), "urn"); EXPECT_EQ(uri.path().value(), "example:foo"); @@ -73,43 +97,70 @@ TEST(URI_parse, urn_with_fragment) { TEST(URI_parse, syntax_error_percent_at_end) { EXPECT_THROW(sourcemeta::core::URI uri{"https://www.example.com#/foo%"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("https://www.example.com#/foo%")); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri_reference("https://www.example.com#/foo%")); } TEST(URI_parse, syntax_error_percent_one_hex) { EXPECT_THROW(sourcemeta::core::URI uri{"https://www.example.com#/foo%2"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("https://www.example.com#/foo%2")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com#/foo%2")); } TEST(URI_parse, syntax_error_percent_non_hex) { EXPECT_THROW(sourcemeta::core::URI uri{"https://www.example.com#/foo%ZZ"}, sourcemeta::core::URIParseError); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri("https://www.example.com#/foo%ZZ")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com#/foo%ZZ")); } TEST(URI_parse, syntax_error_percent_in_path) { EXPECT_THROW(sourcemeta::core::URI uri{"https://www.example.com/foo%bar"}, sourcemeta::core::URIParseError); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri("https://www.example.com/foo%bar")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com/foo%bar")); } TEST(URI_parse, syntax_error_percent_in_query) { EXPECT_THROW(sourcemeta::core::URI uri{"https://www.example.com?foo%bar"}, sourcemeta::core::URIParseError); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri("https://www.example.com?foo%bar")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com?foo%bar")); } TEST(URI_parse, syntax_error_percent_in_host) { EXPECT_THROW(sourcemeta::core::URI uri{"https://www.exam%ple.com"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("https://www.exam%ple.com")); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri_reference("https://www.exam%ple.com")); } // RFC 3986: fragment = *( pchar / "/" / "?" ) TEST(URI_parse, syntax_error_double_fragment_delimiter) { EXPECT_THROW(sourcemeta::core::URI uri{"http://example.com/#frag#ment"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://example.com/#frag#ment")); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri_reference("http://example.com/#frag#ment")); } // RFC 3986: "[" and "]" are gen-delims only allowed in IP-literal within host. TEST(URI_parse, syntax_error_brackets_in_query) { EXPECT_THROW(sourcemeta::core::URI uri{"http://example.com/?q=[value]"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://example.com/?q=[value]")); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri_reference("http://example.com/?q=[value]")); } // RFC 3986: relative-ref path-noscheme means the first segment of a @@ -117,43 +168,92 @@ TEST(URI_parse, syntax_error_brackets_in_query) { TEST(URI_parse, syntax_error_digit_prefix_scheme_like) { EXPECT_THROW(sourcemeta::core::URI uri{"2http://example.com"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("2http://example.com")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference("2http://example.com")); } TEST(URI_parse, syntax_error_underscore_scheme_like) { EXPECT_THROW(sourcemeta::core::URI uri{"my_scheme://example.com"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("my_scheme://example.com")); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri_reference("my_scheme://example.com")); } TEST(URI_parse, syntax_error_colon_slash_slash) { EXPECT_THROW(sourcemeta::core::URI uri{"://example.com"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("://example.com")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference("://example.com")); } TEST(URI_parse, syntax_error_hyphen_prefix_scheme_like) { EXPECT_THROW(sourcemeta::core::URI uri{"-http://example.com"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("-http://example.com")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference("-http://example.com")); } // RFC 3986: path-abempty after authority must start with "/" or be empty TEST(URI_parse, syntax_error_port_trailing_alpha) { EXPECT_THROW(sourcemeta::core::URI uri{"http://example.com:80a"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://example.com:80a")); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri_reference("http://example.com:80a")); } TEST(URI_parse, syntax_error_port_trailing_range) { EXPECT_THROW(sourcemeta::core::URI uri{"http://example.com:80-90"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://example.com:80-90")); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri_reference("http://example.com:80-90")); +} + +TEST(URI_parse, syntax_error_port_exceeds_uint32) { + EXPECT_THROW(sourcemeta::core::URI uri{"http://example.com:4294967296"}, + sourcemeta::core::URIParseError); + // RFC 3986: port = *DIGIT, so any digit sequence is syntactically valid + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://example.com:4294967296")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("http://example.com:4294967296")); +} + +TEST(URI_parse, syntax_error_port_overflow_unsigned_long) { + EXPECT_THROW( + sourcemeta::core::URI uri{ + "http://example.com:999999999999999999999999999999"}, + sourcemeta::core::URIParseError); + // RFC 3986: port = *DIGIT, so any digit sequence is syntactically valid + EXPECT_TRUE(sourcemeta::core::URI::is_uri( + "http://example.com:999999999999999999999999999999")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://example.com:999999999999999999999999999999")); +} + +TEST(URI_parse, rfc3986_port_max_uint32) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://example.com:4294967295")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("http://example.com:4294967295")); + sourcemeta::core::URI uri{"http://example.com:4294967295"}; + EXPECT_EQ(uri.port().value(), 4294967295U); } TEST(URI_parse, syntax_error_bare_ipv6_no_brackets) { EXPECT_THROW(sourcemeta::core::URI uri{"http://2001:db8::1"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://2001:db8::1")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference("http://2001:db8::1")); } // RFC 3986: IPv6 brackets must contain valid hex digits, colons, and dots TEST(URI_parse, syntax_error_ipv6_invalid_hex) { EXPECT_THROW(sourcemeta::core::URI uri{"http://[2001:db8::gggg]"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://[2001:db8::gggg]")); + EXPECT_FALSE( + sourcemeta::core::URI::is_uri_reference("http://[2001:db8::gggg]")); } // RFC 3986: IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) @@ -161,19 +261,27 @@ TEST(URI_parse, syntax_error_ipv6_invalid_hex) { TEST(URI_parse, syntax_error_ipvfuture_bare_v) { EXPECT_THROW(sourcemeta::core::URI uri{"http://[v]"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://[v]")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference("http://[v]")); } TEST(URI_parse, syntax_error_ipvfuture_missing_dot) { EXPECT_THROW(sourcemeta::core::URI uri{"http://[vabc]"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://[vabc]")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference("http://[vabc]")); } TEST(URI_parse, syntax_error_ipvfuture_non_hex_version) { EXPECT_THROW(sourcemeta::core::URI uri{"http://[vZ.foo]"}, sourcemeta::core::URIParseError); + EXPECT_FALSE(sourcemeta::core::URI::is_uri("http://[vZ.foo]")); + EXPECT_FALSE(sourcemeta::core::URI::is_uri_reference("http://[vZ.foo]")); } TEST(URI_parse, success_ipvfuture_valid) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://[vFF.a:b]")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("http://[vFF.a:b]")); sourcemeta::core::URI uri{"http://[vFF.a:b]"}; EXPECT_EQ(uri.host().value(), "vFF.a:b"); EXPECT_EQ(uri.recompose(), "http://[vFF.a:b]"); @@ -181,12 +289,17 @@ TEST(URI_parse, success_ipvfuture_valid) { // RFC 3986: port = *DIGIT (empty port after colon is valid) TEST(URI_parse, rfc3986_empty_port) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://example.com:")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("http://example.com:")); sourcemeta::core::URI uri{"http://example.com:"}; EXPECT_EQ(uri.host().value(), "example.com"); EXPECT_FALSE(uri.port().has_value()); } TEST(URI_parse, rfc3986_empty_port_with_path) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://example.com:/path")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("http://example.com:/path")); sourcemeta::core::URI uri{"http://example.com:/path"}; EXPECT_EQ(uri.host().value(), "example.com"); EXPECT_FALSE(uri.port().has_value()); @@ -197,6 +310,10 @@ TEST(URI_parse, rfc3986_empty_port_with_path) { // https://github.com/uriparser/uriparser/blob/bf0174e83164a4659c51c135399478bec389eafa/test/test.cpp#L315 TEST(URI_parse, success_1) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri( + "//user:pass@[::1]:80/segment/index.html?query#frag")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "//user:pass@[::1]:80/segment/index.html?query#frag")); sourcemeta::core::URI uri{ "//user:pass@[::1]:80/segment/index.html?query#frag"}; EXPECT_EQ(uri.recompose(), @@ -204,11 +321,19 @@ TEST(URI_parse, success_1) { } TEST(URI_parse, success_2) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri( + "http://[::1]:80/segment/index.html?query#frag")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://[::1]:80/segment/index.html?query#frag")); sourcemeta::core::URI uri{"http://[::1]:80/segment/index.html?query#frag"}; EXPECT_EQ(uri.recompose(), "http://[::1]:80/segment/index.html?query#frag"); } TEST(URI_parse, success_3) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri( + "http://user:pass@[::1]/segment/index.html?query#frag")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://user:pass@[::1]/segment/index.html?query#frag")); sourcemeta::core::URI uri{ "http://user:pass@[::1]/segment/index.html?query#frag"}; EXPECT_EQ(uri.recompose(), @@ -216,11 +341,19 @@ TEST(URI_parse, success_3) { } TEST(URI_parse, success_4) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("http://user:pass@[::1]:80?query#frag")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://user:pass@[::1]:80?query#frag")); sourcemeta::core::URI uri{"http://user:pass@[::1]:80?query#frag"}; EXPECT_EQ(uri.recompose(), "http://user:pass@[::1]:80?query#frag"); } TEST(URI_parse, success_5) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri( + "http://user:pass@[::1]:80/segment/index.html#frag")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://user:pass@[::1]:80/segment/index.html#frag")); sourcemeta::core::URI uri{ "http://user:pass@[::1]:80/segment/index.html#frag"}; EXPECT_EQ(uri.recompose(), @@ -228,6 +361,10 @@ TEST(URI_parse, success_5) { } TEST(URI_parse, success_6) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri( + "http://user:pass@[::1]:80/segment/index.html?query")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://user:pass@[::1]:80/segment/index.html?query")); sourcemeta::core::URI uri{ "http://user:pass@[::1]:80/segment/index.html?query"}; EXPECT_EQ(uri.recompose(), @@ -235,85 +372,139 @@ TEST(URI_parse, success_6) { } TEST(URI_parse, success_7) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("ftp://host:21/gnu/")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("ftp://host:21/gnu/")); sourcemeta::core::URI uri{"ftp://host:21/gnu/"}; EXPECT_EQ(uri.recompose(), "ftp://host:21/gnu/"); } TEST(URI_parse, success_with_percent_25_stays_encoded) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("https://www.example.com#/foo%25bar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com#/foo%25bar")); sourcemeta::core::URI uri{"https://www.example.com#/foo%25bar"}; EXPECT_EQ(uri.fragment(), "/foo%bar"); EXPECT_EQ(uri.recompose(), "https://www.example.com#/foo%25bar"); } TEST(URI_parse, success_with_space_percent_20_stays_encoded) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("https://www.example.com/foo%20bar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com/foo%20bar")); sourcemeta::core::URI uri{"https://www.example.com/foo%20bar"}; EXPECT_EQ(uri.path(), "/foo bar"); EXPECT_EQ(uri.recompose(), "https://www.example.com/foo%20bar"); } TEST(URI_parse, success_with_equals_percent_3D_stays_encoded) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("https://www.example.com?foo%3Dbar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com?foo%3Dbar")); sourcemeta::core::URI uri{"https://www.example.com?foo%3Dbar"}; EXPECT_EQ(uri.query(), "foo=bar"); EXPECT_EQ(uri.recompose(), "https://www.example.com?foo=bar"); } TEST(URI_parse, success_with_slash_percent_2F_stays_encoded) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("https://www.example.com#/foo%2Fbar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com#/foo%2Fbar")); sourcemeta::core::URI uri{"https://www.example.com#/foo%2Fbar"}; EXPECT_EQ(uri.fragment(), "/foo/bar"); EXPECT_EQ(uri.recompose(), "https://www.example.com#/foo/bar"); } TEST(URI_parse, success_with_lowercase_normalized_to_uppercase) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("https://www.example.com#/foo%2fbar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com#/foo%2fbar")); sourcemeta::core::URI uri{"https://www.example.com#/foo%2fbar"}; EXPECT_EQ(uri.fragment(), "/foo/bar"); EXPECT_EQ(uri.recompose(), "https://www.example.com#/foo/bar"); } TEST(URI_parse, success_unreserved_char_decoded_hyphen) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("https://www.example.com#/foo%2Dbar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com#/foo%2Dbar")); sourcemeta::core::URI uri{"https://www.example.com#/foo%2Dbar"}; EXPECT_EQ(uri.fragment(), "/foo-bar"); EXPECT_EQ(uri.recompose(), "https://www.example.com#/foo-bar"); } TEST(URI_parse, success_unreserved_char_decoded_tilde) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("https://www.example.com#/foo%7Ebar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com#/foo%7Ebar")); sourcemeta::core::URI uri{"https://www.example.com#/foo%7Ebar"}; EXPECT_EQ(uri.fragment(), "/foo~bar"); EXPECT_EQ(uri.recompose(), "https://www.example.com#/foo~bar"); } TEST(URI_parse, success_unreserved_char_decoded_underscore) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("https://www.example.com#/foo%5Fbar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com#/foo%5Fbar")); sourcemeta::core::URI uri{"https://www.example.com#/foo%5Fbar"}; EXPECT_EQ(uri.fragment(), "/foo_bar"); EXPECT_EQ(uri.recompose(), "https://www.example.com#/foo_bar"); } TEST(URI_parse, success_unreserved_char_decoded_letter) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("https://www.example.com#/foo%41bar")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "https://www.example.com#/foo%41bar")); sourcemeta::core::URI uri{"https://www.example.com#/foo%41bar"}; EXPECT_EQ(uri.fragment(), "/fooAbar"); EXPECT_EQ(uri.recompose(), "https://www.example.com#/fooAbar"); } TEST(URI_parse, relative_1) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("one/two/three")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("one/two/three")); sourcemeta::core::URI uri{"one/two/three"}; EXPECT_EQ(uri.recompose(), "one/two/three"); } TEST(URI_parse, relative_2) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("/one/two/three")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("/one/two/three")); sourcemeta::core::URI uri{"/one/two/three"}; EXPECT_EQ(uri.recompose(), "/one/two/three"); } TEST(URI_parse, relative_3) { + EXPECT_FALSE( + sourcemeta::core::URI::is_uri("//user:pass@localhost/one/two/three")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "//user:pass@localhost/one/two/three")); sourcemeta::core::URI uri{"//user:pass@localhost/one/two/three"}; EXPECT_EQ(uri.recompose(), "//user:pass@localhost/one/two/three"); } TEST(URI_parse, real_life_1) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri( + "http://sourceforge.net/projects/uriparser/")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://sourceforge.net/projects/uriparser/")); sourcemeta::core::URI uri{"http://sourceforge.net/projects/uriparser/"}; EXPECT_EQ(uri.recompose(), "http://sourceforge.net/projects/uriparser/"); } TEST(URI_parse, real_life_2) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri( + "http://sourceforge.net/project/platformdownload.php?group_id=182840")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://sourceforge.net/project/platformdownload.php?group_id=182840")); sourcemeta::core::URI uri{ "http://sourceforge.net/project/platformdownload.php?group_id=182840"}; EXPECT_EQ( @@ -322,36 +513,57 @@ TEST(URI_parse, real_life_2) { } TEST(URI_parse, mailto) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("mailto:test@example.com")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("mailto:test@example.com")); sourcemeta::core::URI uri{"mailto:test@example.com"}; EXPECT_EQ(uri.recompose(), "mailto:test@example.com"); } TEST(URI_parse, relative_4) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("../../")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("../../")); sourcemeta::core::URI uri{"../../"}; EXPECT_EQ(uri.recompose(), "../../"); } TEST(URI_parse, root_path) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("/")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("/")); sourcemeta::core::URI uri{"/"}; EXPECT_EQ(uri.recompose(), "/"); } TEST(URI_parse, empty_uri) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("")); sourcemeta::core::URI uri{""}; EXPECT_EQ(uri.recompose(), ""); } TEST(URI_parse, file_uri) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("file:///bin/bash")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("file:///bin/bash")); sourcemeta::core::URI uri{"file:///bin/bash"}; EXPECT_EQ(uri.recompose(), "file:///bin/bash"); } TEST(URI_parse, percent_encoding) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri( + "http://www.example.com/name%20with%20spaces/")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://www.example.com/name%20with%20spaces/")); sourcemeta::core::URI uri{"http://www.example.com/name%20with%20spaces/"}; EXPECT_EQ(uri.recompose(), "http://www.example.com/name%20with%20spaces/"); } TEST(URI_parse, rfc3986_complete_uri) { + EXPECT_TRUE( + sourcemeta::core::URI::is_uri("http://user:pass@example.com:8080/path/to/" + "resource?query=value&key=data#section")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://user:pass@example.com:8080/path/to/" + "resource?query=value&key=data#section")); sourcemeta::core::URI uri{"http://user:pass@example.com:8080/path/to/" "resource?query=value&key=data#section"}; EXPECT_EQ(uri.scheme().value(), "http"); @@ -364,6 +576,8 @@ TEST(URI_parse, rfc3986_complete_uri) { } TEST(URI_parse, rfc3986_minimal_uri) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("s:p")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("s:p")); sourcemeta::core::URI uri{"s:p"}; EXPECT_EQ(uri.scheme().value(), "s"); EXPECT_TRUE(uri.path().has_value()); @@ -371,24 +585,35 @@ TEST(URI_parse, rfc3986_minimal_uri) { } TEST(URI_parse, rfc3986_authority_without_userinfo) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://example.com/path")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("http://example.com/path")); sourcemeta::core::URI uri{"http://example.com/path"}; EXPECT_FALSE(uri.userinfo().has_value()); EXPECT_EQ(uri.host().value(), "example.com"); } TEST(URI_parse, rfc3986_authority_with_empty_userinfo) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://@example.com/path")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("http://@example.com/path")); sourcemeta::core::URI uri{"http://@example.com/path"}; EXPECT_TRUE(uri.userinfo().has_value()); EXPECT_EQ(uri.userinfo().value(), ""); } TEST(URI_parse, rfc3986_authority_with_userinfo_no_password) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://user@example.com/path")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("http://user@example.com/path")); sourcemeta::core::URI uri{"http://user@example.com/path"}; EXPECT_TRUE(uri.userinfo().has_value()); EXPECT_EQ(uri.userinfo().value(), "user"); } TEST(URI_parse, rfc3986_path_absolute_no_authority) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("/absolute/path")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("/absolute/path")); sourcemeta::core::URI uri{"/absolute/path"}; EXPECT_FALSE(uri.scheme().has_value()); EXPECT_FALSE(uri.host().has_value()); @@ -397,6 +622,8 @@ TEST(URI_parse, rfc3986_path_absolute_no_authority) { } TEST(URI_parse, rfc3986_path_relative_simple) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("relative/path")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("relative/path")); sourcemeta::core::URI uri{"relative/path"}; EXPECT_FALSE(uri.scheme().has_value()); EXPECT_FALSE(uri.host().has_value()); @@ -405,6 +632,8 @@ TEST(URI_parse, rfc3986_path_relative_simple) { } TEST(URI_parse, rfc3986_query_only) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("?query=value")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("?query=value")); sourcemeta::core::URI uri{"?query=value"}; EXPECT_FALSE(uri.scheme().has_value()); EXPECT_TRUE(uri.query().has_value()); @@ -413,6 +642,8 @@ TEST(URI_parse, rfc3986_query_only) { } TEST(URI_parse, rfc3986_fragment_only) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("#fragment")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("#fragment")); sourcemeta::core::URI uri{"#fragment"}; EXPECT_FALSE(uri.scheme().has_value()); EXPECT_TRUE(uri.fragment().has_value()); @@ -421,21 +652,33 @@ TEST(URI_parse, rfc3986_fragment_only) { } TEST(URI_parse, rfc3986_percent_encoded_unreserved) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://example.com/%7Euser/path")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://example.com/%7Euser/path")); sourcemeta::core::URI uri{"http://example.com/%7Euser/path"}; EXPECT_EQ(uri.recompose(), "http://example.com/~user/path"); } TEST(URI_parse, rfc3986_percent_encoded_reserved) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri( + "http://example.com/path%2Fwith%2Fslashes")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://example.com/path%2Fwith%2Fslashes")); sourcemeta::core::URI uri{"http://example.com/path%2Fwith%2Fslashes"}; EXPECT_EQ(uri.recompose(), "http://example.com/path/with/slashes"); } TEST(URI_parse, rfc3986_mixed_case_percent_encoding) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://example.com/%3a%3A%3b%3B")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://example.com/%3a%3A%3b%3B")); sourcemeta::core::URI uri{"http://example.com/%3a%3A%3b%3B"}; EXPECT_EQ(uri.recompose(), "http://example.com/::;;"); } TEST(URI_parse, rfc3986_authority_only) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("//example.com")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("//example.com")); sourcemeta::core::URI uri{"//example.com"}; EXPECT_FALSE(uri.scheme().has_value()); EXPECT_EQ(uri.host().value(), "example.com"); @@ -443,6 +686,8 @@ TEST(URI_parse, rfc3986_authority_only) { } TEST(URI_parse, rfc3986_authority_with_port_no_path) { + EXPECT_FALSE(sourcemeta::core::URI::is_uri("//example.com:8080")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("//example.com:8080")); sourcemeta::core::URI uri{"//example.com:8080"}; EXPECT_FALSE(uri.scheme().has_value()); EXPECT_EQ(uri.host().value(), "example.com"); @@ -451,12 +696,18 @@ TEST(URI_parse, rfc3986_authority_with_port_no_path) { } TEST(URI_parse, rfc3986_ipv4_dotted_decimal) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://127.0.0.1/path")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("http://127.0.0.1/path")); sourcemeta::core::URI uri{"http://127.0.0.1/path"}; EXPECT_EQ(uri.host().value(), "127.0.0.1"); EXPECT_EQ(uri.recompose(), "http://127.0.0.1/path"); } TEST(URI_parse, rfc3986_ipv6_full_form) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri( + "http://[2001:0db8:0000:0000:0000:0000:0000:0001]/path")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference( + "http://[2001:0db8:0000:0000:0000:0000:0000:0001]/path")); sourcemeta::core::URI uri{ "http://[2001:0db8:0000:0000:0000:0000:0000:0001]/path"}; // uriparser normalizes IPv6 but may not fully compress leading zeros @@ -469,18 +720,26 @@ TEST(URI_parse, rfc3986_ipv6_full_form) { } TEST(URI_parse, rfc3986_ipv6_compressed) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://[::1]/path")); + EXPECT_TRUE(sourcemeta::core::URI::is_uri_reference("http://[::1]/path")); sourcemeta::core::URI uri{"http://[::1]/path"}; EXPECT_EQ(uri.host().value(), "::1"); EXPECT_EQ(uri.recompose(), "http://[::1]/path"); } TEST(URI_parse, rfc3986_empty_path_with_query) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://example.com?query")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("http://example.com?query")); sourcemeta::core::URI uri{"http://example.com?query"}; EXPECT_FALSE(uri.path().has_value()); EXPECT_EQ(uri.query().value(), "query"); } TEST(URI_parse, rfc3986_empty_path_with_fragment) { + EXPECT_TRUE(sourcemeta::core::URI::is_uri("http://example.com#fragment")); + EXPECT_TRUE( + sourcemeta::core::URI::is_uri_reference("http://example.com#fragment")); sourcemeta::core::URI uri{"http://example.com#fragment"}; EXPECT_FALSE(uri.path().has_value()); EXPECT_EQ(uri.fragment().value(), "fragment");