@@ -29,6 +29,9 @@ const {
2929 Symbol,
3030 SymbolIterator,
3131 SymbolToStringTag,
32+ TypedArrayPrototypeGetBuffer,
33+ TypedArrayPrototypeGetByteLength,
34+ TypedArrayPrototypeGetByteOffset,
3235 decodeURIComponent,
3336} = primordials ;
3437
@@ -80,13 +83,17 @@ const {
8083 CHAR_LOWERCASE_Z ,
8184 CHAR_PERCENT ,
8285 CHAR_PLUS ,
86+ CHAR_COLON ,
8387} = require ( 'internal/constants' ) ;
8488const path = require ( 'path' ) ;
89+ const { Buffer } = require ( 'buffer' ) ;
8590
8691const {
8792 validateFunction,
8893} = require ( 'internal/validators' ) ;
8994
95+ const { percentDecode } = require ( 'internal/data_url' ) ;
96+
9097const querystring = require ( 'querystring' ) ;
9198
9299const bindingUrl = internalBinding ( 'url' ) ;
@@ -1481,6 +1488,76 @@ function getPathFromURLWin32(url) {
14811488 return StringPrototypeSlice ( pathname , 1 ) ;
14821489}
14831490
1491+ function getPathBufferFromURLWin32 ( url ) {
1492+ const hostname = url . hostname ;
1493+ let pathname = url . pathname ;
1494+ // In the getPathFromURLWin32 variant, we scan the input for backslash (\)
1495+ // and forward slash (/) characters, specifically looking for the ASCII/UTF8
1496+ // encoding these and forbidding their use. This is a bit tricky
1497+ // because these may conflict with non-UTF8 encodings. For instance,
1498+ // in shift-jis, %5C identifies the symbol for the Japanese Yen and not the
1499+ // backslash. If we have a url like file:///foo/%5c/bar, then we really have
1500+ // no way of knowing if that %5c is meant to be a backslash \ or a yen sign.
1501+ // Passing in an encoding option does not help since our Buffer encoding only
1502+ // knows about certain specific text encodings and a single file path might
1503+ // actually contain segments that use multiple encodings. It's tricky! So,
1504+ // for this variation where we are producing a buffer, we won't scan for the
1505+ // slashes at all, and instead will decode the bytes literally into the
1506+ // returned Buffer. That said, that can also be tricky because, on windows,
1507+ // the file path separator *is* the ASCII backslash. This is a known issue
1508+ // on windows specific to the Shift-JIS encoding that we're not really going
1509+ // to solve here. Instead, we're going to do the best we can and just
1510+ // interpret the input url as a sequence of bytes.
1511+
1512+ // Because we are converting to a Windows file path here, we need to replace
1513+ // the explicit forward slash separators with backslashes. Note that this
1514+ // intentionally disregards any percent-encoded forward slashes in the path.
1515+ pathname = SideEffectFreeRegExpPrototypeSymbolReplace ( FORWARD_SLASH , pathname , '\\' ) ;
1516+
1517+ // Now, let's start to build our Buffer. We will initially start with a
1518+ // Buffer allocated to fit in the entire string. Worst case there are no
1519+ // percent encoded characters and we take the string as is. Any invalid
1520+ // percent encodings, e.g. `%ZZ` are ignored and are passed through
1521+ // literally.
1522+ const decodedu8 = percentDecode ( Buffer . from ( pathname , 'utf8' ) ) ;
1523+ const decodedPathname = Buffer . from ( TypedArrayPrototypeGetBuffer ( decodedu8 ) ,
1524+ TypedArrayPrototypeGetByteOffset ( decodedu8 ) ,
1525+ TypedArrayPrototypeGetByteLength ( decodedu8 ) ) ;
1526+ if ( hostname !== '' ) {
1527+ // If hostname is set, then we have a UNC path
1528+ // Pass the hostname through domainToUnicode just in case
1529+ // it is an IDN using punycode encoding. We do not need to worry
1530+ // about percent encoding because the URL parser will have
1531+ // already taken care of that for us. Note that this only
1532+ // causes IDNs with an appropriate `xn--` prefix to be decoded.
1533+
1534+ // This is a bit tricky because of the need to convert to a Buffer
1535+ // followed by concatenation of the results.
1536+ const prefix = Buffer . from ( '\\\\' , 'ascii' ) ;
1537+ const domain = Buffer . from ( domainToUnicode ( hostname ) , 'utf8' ) ;
1538+
1539+ return Buffer . concat ( [ prefix , domain , decodedPathname ] ) ;
1540+ }
1541+ // Otherwise, it's a local path that requires a drive letter
1542+ // In this case we're only going to pay attention to the second and
1543+ // third bytes in the decodedPathname. If first byte is either an ASCII
1544+ // uppercase letter between 'A' and 'Z' or lowercase letter between
1545+ // 'a' and 'z', and the second byte must be an ASCII `:` or the
1546+ // operation will fail.
1547+
1548+ const letter = decodedPathname [ 1 ] | 0x20 ;
1549+ const sep = decodedPathname [ 2 ] ;
1550+
1551+ if ( letter < CHAR_LOWERCASE_A || letter > CHAR_LOWERCASE_Z || // a..z A..Z
1552+ ( sep !== CHAR_COLON ) ) {
1553+ throw new ERR_INVALID_FILE_URL_PATH ( 'must be absolute' ) ;
1554+ }
1555+
1556+ // Now, we'll just return everything except the first byte of
1557+ // decodedPathname
1558+ return decodedPathname . subarray ( 1 ) ;
1559+ }
1560+
14841561function getPathFromURLPosix ( url ) {
14851562 if ( url . hostname !== '' ) {
14861563 throw new ERR_INVALID_FILE_URL_HOST ( platform ) ;
@@ -1499,6 +1576,28 @@ function getPathFromURLPosix(url) {
14991576 return decodeURIComponent ( pathname ) ;
15001577}
15011578
1579+ function getPathBufferFromURLPosix ( url ) {
1580+ if ( url . hostname !== '' ) {
1581+ throw new ERR_INVALID_FILE_URL_HOST ( platform ) ;
1582+ }
1583+ const pathname = url . pathname ;
1584+
1585+ // In the getPathFromURLPosix variant, we scan the input for forward slash
1586+ // (/) characters, specifically looking for the ASCII/UTF8 and forbidding
1587+ // its use. This is a bit tricky because these may conflict with non-UTF8
1588+ // encodings. Passing in an encoding option does not help since our Buffer
1589+ // encoding only knows about certain specific text encodings and a single
1590+ // file path might actually contain segments that use multiple encodings.
1591+ // It's tricky! So, for this variation where we are producing a buffer, we
1592+ // won't scan for the slashes at all, and instead will decode the bytes
1593+ // literally into the returned Buffer. We're going to do the best we can and
1594+ // just interpret the input url as a sequence of bytes.
1595+ const u8 = percentDecode ( Buffer . from ( pathname , 'utf8' ) ) ;
1596+ return Buffer . from ( TypedArrayPrototypeGetBuffer ( u8 ) ,
1597+ TypedArrayPrototypeGetByteOffset ( u8 ) ,
1598+ TypedArrayPrototypeGetByteLength ( u8 ) ) ;
1599+ }
1600+
15021601function fileURLToPath ( path , options = kEmptyObject ) {
15031602 const windows = options ?. windows ;
15041603 if ( typeof path === 'string' )
@@ -1510,6 +1609,24 @@ function fileURLToPath(path, options = kEmptyObject) {
15101609 return ( windows ?? isWindows ) ? getPathFromURLWin32 ( path ) : getPathFromURLPosix ( path ) ;
15111610}
15121611
1612+ // An alternative to fileURLToPath that outputs a Buffer
1613+ // instead of a string. The other fileURLToPath does not
1614+ // handle non-UTF8 encoded percent encodings at all, so
1615+ // converting to a Buffer is necessary in cases where the
1616+ // to string conversion would fail.
1617+ function fileURLToPathBuffer ( path , options = kEmptyObject ) {
1618+ const windows = options ?. windows ;
1619+ if ( typeof path === 'string' ) {
1620+ path = new URL ( path ) ;
1621+ } else if ( ! isURL ( path ) ) {
1622+ throw new ERR_INVALID_ARG_TYPE ( 'path' , [ 'string' , 'URL' ] , path ) ;
1623+ }
1624+ if ( path . protocol !== 'file:' ) {
1625+ throw new ERR_INVALID_URL_SCHEME ( 'file' ) ;
1626+ }
1627+ return ( windows ?? isWindows ) ? getPathBufferFromURLWin32 ( path ) : getPathBufferFromURLPosix ( path ) ;
1628+ }
1629+
15131630function pathToFileURL ( filepath , options = kEmptyObject ) {
15141631 const windows = options ?. windows ?? isWindows ;
15151632 const isUNC = windows && StringPrototypeStartsWith ( filepath , '\\\\' ) ;
@@ -1570,6 +1687,7 @@ function getURLOrigin(url) {
15701687
15711688module . exports = {
15721689 fileURLToPath,
1690+ fileURLToPathBuffer,
15731691 pathToFileURL,
15741692 toPathIfFileURL,
15751693 installObjectURLMethods,
0 commit comments