Skip to content

Commit b0ebae3

Browse files
toughengineerBillyONeal
authored andcommitted
Added URI resolution according to RFC3986 (#897)
Added URI resolution according to RFC3986, Section 5 (https://tools.ietf.org/html/rfc3986#section-5)
1 parent 625c666 commit b0ebae3

File tree

4 files changed

+195
-9
lines changed

4 files changed

+195
-9
lines changed

Release/include/cpprest/base_uri.h

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -379,12 +379,20 @@ namespace web {
379379
/// <summary>
380380
/// Returns the full (encoded) URI as a string.
381381
/// </summary>
382-
/// <returns>The full encoded URI string.</returns>
382+
/// <returns>The full encoded URI string.</returns>
383383
utility::string_t to_string() const
384384
{
385385
return m_uri;
386386
}
387387

388+
/// <summary>
389+
/// Returns an URI resolved against <c>this</c> as the base URI
390+
/// according to RFC3986, Section 5 (https://tools.ietf.org/html/rfc3986#section-5).
391+
/// </summary>
392+
/// <param name="relativeUri">The relative URI to be resolved against <c>this</c> as base.</param>
393+
/// <returns>The new resolved URI string.</returns>
394+
_ASYNCRTIMP utility::string_t resolve_uri(const utility::string_t &relativeUri) const;
395+
388396
_ASYNCRTIMP bool operator == (const uri &other) const;
389397

390398
bool operator < (const uri &other) const
@@ -413,4 +421,4 @@ namespace web {
413421
details::uri_components m_components;
414422
};
415423

416-
} // namespace web
424+
} // namespace web

Release/src/uri/uri.cpp

Lines changed: 116 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,9 @@ namespace web { namespace details
1919
{
2020
namespace
2121
{
22+
const ::utility::string_t dotSegment = _XPLATSTR(".");
23+
const ::utility::string_t dotDotSegment = _XPLATSTR("..");
24+
2225
/// <summary>
2326
/// Unreserved characters are those that are allowed in a URI but do not have a reserved purpose. They include:
2427
/// - A-Z
@@ -423,7 +426,60 @@ namespace
423426
return encoded;
424427
}
425428

426-
}
429+
// 5.2.3. Merge Paths https://tools.ietf.org/html/rfc3986#section-5.2.3
430+
utility::string_t mergePaths(const utility::string_t &base, const utility::string_t &relative)
431+
{
432+
const auto lastSlash = base.rfind(_XPLATSTR('/'));
433+
if (lastSlash == utility::string_t::npos)
434+
{
435+
return base + _XPLATSTR('/') + relative;
436+
}
437+
else if (lastSlash == base.size() - 1)
438+
{
439+
return base + relative;
440+
}
441+
// path contains and does not end with '/', we remove segment after last '/'
442+
return base.substr(0, lastSlash + 1) + relative;
443+
}
444+
445+
// 5.2.4. Remove Dot Segments https://tools.ietf.org/html/rfc3986#section-5.2.4
446+
void removeDotSegments(uri_builder &builder)
447+
{
448+
if (builder.path().find(_XPLATSTR('.')) == utility::string_t::npos)
449+
return;
450+
451+
const auto segments = uri::split_path(builder.path());
452+
std::vector<std::reference_wrapper<const utility::string_t>> result;
453+
for (auto& segment : segments)
454+
{
455+
if (segment == dotSegment)
456+
continue;
457+
else if (segment != dotDotSegment)
458+
result.push_back(segment);
459+
else if (!result.empty())
460+
result.pop_back();
461+
}
462+
if (result.empty())
463+
{
464+
builder.set_path(utility::string_t());
465+
return;
466+
}
467+
utility::string_t path = result.front().get();
468+
for (size_t i = 1; i != result.size(); ++i)
469+
{
470+
path += _XPLATSTR('/');
471+
path += result[i].get();
472+
}
473+
if (segments.back() == dotDotSegment
474+
|| segments.back() == dotSegment
475+
|| builder.path().back() == _XPLATSTR('/'))
476+
{
477+
path += _XPLATSTR('/');
478+
}
479+
480+
builder.set_path(std::move(path));
481+
}
482+
} // namespace
427483

428484
utility::string_t uri_components::join()
429485
{
@@ -448,7 +504,8 @@ utility::string_t uri_components::join()
448504

449505
if (!m_scheme.empty())
450506
{
451-
ret.append(m_scheme).append({ _XPLATSTR(':') });
507+
ret.append(m_scheme);
508+
ret.push_back(_XPLATSTR(':'));
452509
}
453510

454511
if (!m_host.empty())
@@ -473,25 +530,27 @@ utility::string_t uri_components::join()
473530
// only add the leading slash when the host is present
474531
if (!m_host.empty() && m_path.front() != _XPLATSTR('/'))
475532
{
476-
ret.append({ _XPLATSTR('/') });
533+
ret.push_back(_XPLATSTR('/'));
477534
}
478535

479536
ret.append(m_path);
480537
}
481538

482539
if (!m_query.empty())
483540
{
484-
ret.append({ _XPLATSTR('?') }).append(m_query);
541+
ret.push_back(_XPLATSTR('?'));
542+
ret.append(m_query);
485543
}
486544

487545
if (!m_fragment.empty())
488546
{
489-
ret.append({ _XPLATSTR('#') }).append(m_fragment);
547+
ret.push_back(_XPLATSTR('#'));
548+
ret.append(m_fragment);
490549
}
491550

492551
return ret;
493552
}
494-
}
553+
} // namespace details
495554

496555
uri::uri(const details::uri_components &components) : m_components(components)
497556
{
@@ -715,7 +774,7 @@ std::map<utility::string_t, utility::string_t> uri::split_query(const utility::s
715774
utility::string_t key(key_value_pair.begin(), key_value_pair.begin() + equals_index);
716775
utility::string_t value(key_value_pair.begin() + equals_index + 1, key_value_pair.end());
717776
results[key] = value;
718-
}
777+
}
719778
}
720779

721780
return results;
@@ -784,4 +843,54 @@ bool uri::operator == (const uri &other) const
784843
return true;
785844
}
786845

846+
//resolving URI according to RFC3986, Section 5 https://tools.ietf.org/html/rfc3986#section-5
847+
utility::string_t uri::resolve_uri(const utility::string_t &relativeUri) const
848+
{
849+
if (relativeUri.empty())
850+
{
851+
return to_string();
852+
}
853+
854+
if (relativeUri[0] == _XPLATSTR('/')) // starts with '/'
855+
{
856+
if (relativeUri.size() >= 2 && relativeUri[1] == _XPLATSTR('/')) // starts with '//'
857+
{
858+
return this->scheme() + _XPLATSTR(':') + relativeUri;
859+
}
860+
861+
// otherwise relative to root
862+
auto builder = uri_builder(this->authority());
863+
builder.append(relativeUri);
864+
details::removeDotSegments(builder);
865+
return builder.to_string();
866+
}
867+
868+
const auto url = uri(relativeUri);
869+
if (!url.scheme().empty())
870+
return relativeUri;
871+
872+
if (!url.authority().is_empty())
873+
{
874+
return uri_builder(url).set_scheme(this->scheme()).to_string();
875+
}
876+
877+
// relative url
878+
auto builder = uri_builder(*this);
879+
if (url.path() == _XPLATSTR("/") || url.path().empty()) // web::uri considers empty path as '/'
880+
{
881+
if (!url.query().empty())
882+
{
883+
builder.set_query(url.query());
884+
}
885+
}
886+
else if (!this->path().empty())
887+
{
888+
builder.set_path(details::mergePaths(this->path(), url.path()));
889+
details::removeDotSegments(builder);
890+
builder.set_query(url.query());
891+
}
892+
893+
return builder.set_fragment(url.fragment()).to_string();
787894
}
895+
896+
} // namespace web

Release/tests/functional/uri/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ set(SOURCES
88
operator_tests.cpp
99
splitting_tests.cpp
1010
uri_builder_tests.cpp
11+
resolve_uri_tests.cpp
1112
stdafx.cpp
1213
)
1314

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#include "stdafx.h"
2+
3+
using namespace web;
4+
using namespace utility;
5+
6+
namespace tests { namespace functional { namespace uri_tests {
7+
8+
//testing resolution against examples from Section 5.4 https://tools.ietf.org/html/rfc3986#section-5.4
9+
SUITE(resolve_uri_tests)
10+
{
11+
//5.4.1. Normal Examples https://tools.ietf.org/html/rfc3986#section-5.4.1
12+
TEST(resolve_uri_normal)
13+
{
14+
const uri baseUri = U("http://a/b/c/d;p?q");
15+
16+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g:h")), U("g:h"));
17+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g")), U("http://a/b/c/g"));
18+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("./g")), U("http://a/b/c/g"));
19+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g/")), U("http://a/b/c/g/"));
20+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("/g")), U("http://a/g"));
21+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("//g")), U("http://g"));
22+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("?y")), U("http://a/b/c/d;p?y"));
23+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g?y")), U("http://a/b/c/g?y"));
24+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("#s")), U("http://a/b/c/d;p?q#s"));
25+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g#s")), U("http://a/b/c/g#s"));
26+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g?y#s")), U("http://a/b/c/g?y#s"));
27+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U(";x")), U("http://a/b/c/;x"));
28+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g;x")), U("http://a/b/c/g;x"));
29+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g;x?y#s")), U("http://a/b/c/g;x?y#s"));
30+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("")), U("http://a/b/c/d;p?q"));
31+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U(".")), U("http://a/b/c/"));
32+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("./")), U("http://a/b/c/"));
33+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("..")), U("http://a/b/"));
34+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("../")), U("http://a/b/"));
35+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("../g")), U("http://a/b/g"));
36+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("../..")), U("http://a/"));
37+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("../../")), U("http://a/"));
38+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("../../g")), U("http://a/g"));
39+
}
40+
//5.4.2. Abnormal Examples https://tools.ietf.org/html/rfc3986#section-5.4.2
41+
TEST(resolve_uri_abnormal)
42+
{
43+
const uri baseUri = U("http://a/b/c/d;p?q");
44+
45+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("../../../g")), U("http://a/g"));
46+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("../../../../g")), U("http://a/g"));
47+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("/./g")), U("http://a/g"));
48+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("/../g")), U("http://a/g"));
49+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g.")), U("http://a/b/c/g."));
50+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U(".g")), U("http://a/b/c/.g"));
51+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g..")), U("http://a/b/c/g.."));
52+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("..g")), U("http://a/b/c/..g"));
53+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("./../g")), U("http://a/b/g"));
54+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("./g/.")), U("http://a/b/c/g/"));
55+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g/./h")), U("http://a/b/c/g/h"));
56+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g/../h")), U("http://a/b/c/h"));
57+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g;x=1/./y")), U("http://a/b/c/g;x=1/y"));
58+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g;x=1/../y")), U("http://a/b/c/y"));
59+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g?y/./x")), U("http://a/b/c/g?y/./x"));
60+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g?y/../x")), U("http://a/b/c/g?y/../x"));
61+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g#s/./x")), U("http://a/b/c/g#s/./x"));
62+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("g#s/../x")), U("http://a/b/c/g#s/../x"));
63+
VERIFY_ARE_EQUAL(baseUri.resolve_uri(U("http:g")), U("http:g"));
64+
}
65+
66+
} // SUITE(resolve_uri_tests)
67+
68+
}}}

0 commit comments

Comments
 (0)