@@ -156,6 +156,68 @@ private function absolutizeUrl()
156
156
* Parse url into components of a URI as specified by RFC 3986.
157
157
*/
158
158
private function parseUrl ($ url )
159
+ {
160
+ // RFC 3986 - Parsing a URI Reference with a Regular Expression.
161
+ // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?
162
+ // 12 3 4 5 6 7 8 9
163
+ //
164
+ // "http://www.ics.uci.edu/pub/ietf/uri/#Related"
165
+ // $1 = http: (scheme)
166
+ // $2 = http (scheme)
167
+ // $3 = //www.ics.uci.edu (ignore)
168
+ // $4 = www.ics.uci.edu (authority)
169
+ // $5 = /pub/ietf/uri/ (path)
170
+ // $6 = <undefined> (ignore)
171
+ // $7 = <undefined> (query)
172
+ // $8 = #Related (ignore)
173
+ // $9 = Related (fragment)
174
+ preg_match ('/^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/ ' , $ url , $ output_array );
175
+
176
+ $ parts = array ();
177
+ if (isset ($ output_array ['1 ' ]) && $ output_array ['1 ' ] !== '' ) {
178
+ $ parts ['scheme ' ] = $ output_array ['1 ' ];
179
+ }
180
+ if (isset ($ output_array ['2 ' ]) && $ output_array ['2 ' ] !== '' ) {
181
+ $ parts ['scheme ' ] = $ output_array ['2 ' ];
182
+ }
183
+ if (isset ($ output_array ['4 ' ]) && $ output_array ['4 ' ] !== '' ) {
184
+ // authority = [ userinfo "@" ] host [ ":" port ]
185
+ $ parts ['host ' ] = $ output_array ['4 ' ];
186
+ if (strpos ($ parts ['host ' ], ': ' ) !== false ) {
187
+ $ host_parts = explode (': ' , $ output_array ['4 ' ]);
188
+ $ parts ['port ' ] = array_pop ($ host_parts );
189
+ $ parts ['host ' ] = implode (': ' , $ host_parts );
190
+ if (strpos ($ parts ['host ' ], '@ ' ) !== false ) {
191
+ $ host_parts = explode ('@ ' , $ parts ['host ' ]);
192
+ $ parts ['host ' ] = array_pop ($ host_parts );
193
+ $ parts ['user ' ] = implode ('@ ' , $ host_parts );
194
+ if (strpos ($ parts ['user ' ], ': ' ) !== false ) {
195
+ $ user_parts = explode (': ' , $ parts ['user ' ], 2 );
196
+ $ parts ['user ' ] = array_shift ($ user_parts );
197
+ $ parts ['pass ' ] = implode (': ' , $ user_parts );
198
+ }
199
+ }
200
+ }
201
+ }
202
+ if (isset ($ output_array ['5 ' ]) && $ output_array ['5 ' ] !== '' ) {
203
+ $ parts ['path ' ] = $ this ->percentEncodeChars ($ output_array ['5 ' ]);
204
+ }
205
+ if (isset ($ output_array ['7 ' ]) && $ output_array ['7 ' ] !== '' ) {
206
+ $ parts ['query ' ] = $ output_array ['7 ' ];
207
+ }
208
+ if (isset ($ output_array ['9 ' ]) && $ output_array ['9 ' ] !== '' ) {
209
+ $ parts ['fragment ' ] = $ output_array ['9 ' ];
210
+ }
211
+ return $ parts ;
212
+ }
213
+
214
+ /**
215
+ * Percent-encode characters.
216
+ *
217
+ * Percent-encode characters to represent a data octet in a component when
218
+ * that octet's corresponding character is outside the allowed set.
219
+ */
220
+ private function percentEncodeChars ($ chars )
159
221
{
160
222
// ALPHA = A-Z / a-z
161
223
$ alpha = 'A-Za-z ' ;
@@ -177,14 +239,14 @@ private function parseUrl($url)
177
239
$ hexdig .= 'a-f ' ;
178
240
179
241
$ pattern = '/(?:[^ ' . $ unreserved . $ sub_delims . preg_quote (':@%/? ' , '/ ' ) . ']++|%(?![ ' . $ hexdig . ']{2}))/ ' ;
180
- $ url = preg_replace_callback (
242
+ $ percent_encoded_chars = preg_replace_callback (
181
243
$ pattern ,
182
244
function ($ matches ) {
183
245
return rawurlencode ($ matches [0 ]);
184
246
},
185
- $ url
247
+ $ chars
186
248
);
187
- return parse_url ( $ url ) ;
249
+ return $ percent_encoded_chars ;
188
250
}
189
251
190
252
/**
0 commit comments