forked from chromium/chromium
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurl_canon_pathurl.cc
144 lines (125 loc) · 5.58 KB
/
url_canon_pathurl.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Functions for canonicalizing "path" URLs. Not to be confused with the path
// of a URL, these are URLs that have no authority section, only a path. For
// example, "javascript:" and "data:".
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
namespace url {
namespace {
// Canonicalize the given |component| from |source| into |output| and
// |new_component|. If |separator| is non-zero, it is pre-pended to |output|
// prior to the canonicalized component; i.e. for the '?' or '#' characters.
template <typename CHAR, typename UCHAR>
void DoCanonicalizePathComponent(const CHAR* source,
const Component& component,
char separator,
CanonOutput* output,
Component* new_component) {
if (component.is_valid()) {
if (separator)
output->push_back(separator);
// Copy the path using path URL's more lax escaping rules (think for
// javascript:). We convert to UTF-8 and escape characters from the
// C0 control percent-encode set, but leave all other characters alone.
// This helps readability of JavaScript.
// https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
new_component->begin = output->length();
size_t end = static_cast<size_t>(component.end());
for (size_t i = static_cast<size_t>(component.begin); i < end; i++) {
UCHAR uch = static_cast<UCHAR>(source[i]);
if (uch < 0x20 || uch > 0x7E)
AppendUTF8EscapedChar(source, &i, end, output);
else
output->push_back(static_cast<char>(uch));
}
new_component->len = output->length() - new_component->begin;
} else {
// Empty part.
new_component->reset();
}
}
template <typename CHAR, typename UCHAR>
bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
// Scheme: this will append the colon.
bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
output, &new_parsed->scheme);
// We assume there's no authority for path URLs. Note that hosts should never
// have -1 length.
new_parsed->username.reset();
new_parsed->password.reset();
new_parsed->host.reset();
new_parsed->port.reset();
// Canonicalize path via the weaker path URL rules.
//
// Note: parsing the path part should never cause a failure, see
// https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
DoCanonicalizePathComponent<CHAR, UCHAR>(source.path, parsed.path, '\0',
output, &new_parsed->path);
// Similar to mailto:, always use the default UTF-8 charset converter for
// query.
CanonicalizeQuery(source.query, parsed.query, nullptr, output,
&new_parsed->query);
CanonicalizeRef(source.ref, parsed.ref, output, &new_parsed->ref);
return success;
}
} // namespace
bool CanonicalizePathURL(const char* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
return DoCanonicalizePathURL<char, unsigned char>(
URLComponentSource<char>(spec), parsed, output, new_parsed);
}
bool CanonicalizePathURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
return DoCanonicalizePathURL<char16_t, char16_t>(
URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
}
void CanonicalizePathURLPath(const char* source,
const Component& component,
CanonOutput* output,
Component* new_component) {
DoCanonicalizePathComponent<char, unsigned char>(source, component, '\0',
output, new_component);
}
void CanonicalizePathURLPath(const char16_t* source,
const Component& component,
CanonOutput* output,
Component* new_component) {
DoCanonicalizePathComponent<char16_t, char16_t>(source, component, '\0',
output, new_component);
}
bool ReplacePathURL(const char* base,
const Parsed& base_parsed,
const Replacements<char>& replacements,
CanonOutput* output,
Parsed* new_parsed) {
URLComponentSource<char> source(base);
Parsed parsed(base_parsed);
SetupOverrideComponents(base, replacements, &source, &parsed);
return DoCanonicalizePathURL<char, unsigned char>(
source, parsed, output, new_parsed);
}
bool ReplacePathURL(const char* base,
const Parsed& base_parsed,
const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed) {
RawCanonOutput<1024> utf8;
URLComponentSource<char> source(base);
Parsed parsed(base_parsed);
SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
return DoCanonicalizePathURL<char, unsigned char>(
source, parsed, output, new_parsed);
}
} // namespace url