forked from chromium/chromium
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathurl_canon_pathurl.cc
141 lines (125 loc) · 5.65 KB
/
url_canon_pathurl.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Functions for canonicalizing "path" URLs. Not to be confused with the path
// of a URL, these are URLs that have no authority section, only a path. For
// example, "javascript:" and "data:".
#include "url/url_canon.h"
#include "url/url_canon_internal.h"
namespace url {
namespace {
// Canonicalize the given |component| from |source| into |output| and
// |new_component|. If |separator| is non-zero, it is pre-pended to |output|
// prior to the canonicalized component; i.e. for the '?' or '#' characters.
template <typename CHAR, typename UCHAR>
void DoCanonicalizePathComponent(const CHAR* source,
const Component& component,
char separator,
CanonOutput* output,
Component* new_component) {
if (component.is_valid()) {
if (separator)
output->push_back(separator);
// Copy the path using path URL's more lax escaping rules (think for
// javascript:). We convert to UTF-8 and escape characters from the
// C0 control percent-encode set, but leave all other characters alone.
// This helps readability of JavaScript.
// https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
// https://url.spec.whatwg.org/#c0-control-percent-encode-set
new_component->begin = output->length();
int end = component.end();
for (int i = component.begin; i < end; i++) {
UCHAR uch = static_cast<UCHAR>(source[i]);
if (uch < 0x20 || uch > 0x7E)
AppendUTF8EscapedChar(source, &i, end, output);
else
output->push_back(static_cast<char>(uch));
}
new_component->len = output->length() - new_component->begin;
} else {
// Empty part.
new_component->reset();
}
}
template <typename CHAR, typename UCHAR>
bool DoCanonicalizePathURL(const URLComponentSource<CHAR>& source,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
// Scheme: this will append the colon.
bool success = CanonicalizeScheme(source.scheme, parsed.scheme,
output, &new_parsed->scheme);
// We assume there's no authority for path URLs. Note that hosts should never
// have -1 length.
new_parsed->username.reset();
new_parsed->password.reset();
new_parsed->host.reset();
new_parsed->port.reset();
// We allow path URLs to have the path, query and fragment components, but we
// will canonicalize each of the via the weaker path URL rules.
//
// Note: parsing the path part should never cause a failure, see
// https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state
DoCanonicalizePathComponent<CHAR, UCHAR>(source.path, parsed.path, '\0',
output, &new_parsed->path);
DoCanonicalizePathComponent<CHAR, UCHAR>(source.query, parsed.query, '?',
output, &new_parsed->query);
DoCanonicalizePathComponent<CHAR, UCHAR>(source.ref, parsed.ref, '#', output,
&new_parsed->ref);
return success;
}
} // namespace
bool CanonicalizePathURL(const char* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
return DoCanonicalizePathURL<char, unsigned char>(
URLComponentSource<char>(spec), parsed, output, new_parsed);
}
bool CanonicalizePathURL(const char16_t* spec,
int spec_len,
const Parsed& parsed,
CanonOutput* output,
Parsed* new_parsed) {
return DoCanonicalizePathURL<char16_t, char16_t>(
URLComponentSource<char16_t>(spec), parsed, output, new_parsed);
}
void CanonicalizePathURLPath(const char* source,
const Component& component,
CanonOutput* output,
Component* new_component) {
DoCanonicalizePathComponent<char, unsigned char>(source, component, '\0',
output, new_component);
}
void CanonicalizePathURLPath(const char16_t* source,
const Component& component,
CanonOutput* output,
Component* new_component) {
DoCanonicalizePathComponent<char16_t, char16_t>(source, component, '\0',
output, new_component);
}
bool ReplacePathURL(const char* base,
const Parsed& base_parsed,
const Replacements<char>& replacements,
CanonOutput* output,
Parsed* new_parsed) {
URLComponentSource<char> source(base);
Parsed parsed(base_parsed);
SetupOverrideComponents(base, replacements, &source, &parsed);
return DoCanonicalizePathURL<char, unsigned char>(
source, parsed, output, new_parsed);
}
bool ReplacePathURL(const char* base,
const Parsed& base_parsed,
const Replacements<char16_t>& replacements,
CanonOutput* output,
Parsed* new_parsed) {
RawCanonOutput<1024> utf8;
URLComponentSource<char> source(base);
Parsed parsed(base_parsed);
SetupUTF16OverrideComponents(base, replacements, &utf8, &source, &parsed);
return DoCanonicalizePathURL<char, unsigned char>(
source, parsed, output, new_parsed);
}
} // namespace url