Skip to content

Commit

Permalink
urlapi: CURLU_PUNY2IDN - convert from punycode to IDN name
Browse files Browse the repository at this point in the history
Asssisted-by: Jay Satiro
Closes curl#11655
  • Loading branch information
bagder committed Aug 13, 2023
1 parent 0efe8b2 commit c350069
Show file tree
Hide file tree
Showing 7 changed files with 93 additions and 1 deletion.
10 changes: 10 additions & 0 deletions docs/libcurl/curl_url_get.3
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,16 @@ If libcurl is built without IDN capabilities, using this bit will make
anything outside the ASCII range.

(Added in curl 7.88.0)
.IP CURLU_PUNY2IDN
If set and asked to retrieve the \fBCURLUPART_HOST\fP or \fBCURLUPART_URL\fP
parts, libcurl returns the host name in its IDN (International Domain Name)
UTF-8 version if it otherwise is a punycode version.

If libcurl is built without IDN capabilities, using this bit will make
\fIcurl_url_get(3)\fP return \fICURLUE_LACKS_IDN\fP if the host name is using
punycode.

(Added in curl 8.3.0)
.SH PARTS
.IP CURLUPART_URL
When asked to return the full URL, \fIcurl_url_get(3)\fP will return a
Expand Down
1 change: 1 addition & 0 deletions docs/libcurl/symbols-in-versions
Original file line number Diff line number Diff line change
Expand Up @@ -1063,6 +1063,7 @@ CURLU_NO_AUTHORITY 7.67.0
CURLU_NO_DEFAULT_PORT 7.62.0
CURLU_NON_SUPPORT_SCHEME 7.62.0
CURLU_PATH_AS_IS 7.62.0
CURLU_PUNY2IDN 8.3.0
CURLU_PUNYCODE 7.88.0
CURLU_URLDECODE 7.62.0
CURLU_URLENCODE 7.62.0
Expand Down
1 change: 1 addition & 0 deletions include/curl/urlapi.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ typedef enum {
scheme is unknown. */
#define CURLU_ALLOW_SPACE (1<<11) /* Allow spaces in the URL */
#define CURLU_PUNYCODE (1<<12) /* get the host name in punycode */
#define CURLU_PUNY2IDN (1<<13) /* punycode => IDN conversion */

typedef struct Curl_URL CURLU;

Expand Down
50 changes: 49 additions & 1 deletion lib/idn.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ bool Curl_win32_idn_to_ascii(const char *in, char **out)
wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
if(in_w) {
wchar_t punycode[IDN_MAX_LENGTH];
int chars = IdnToAscii(0, in_w, -1, punycode, IDN_MAX_LENGTH);
int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
IDN_MAX_LENGTH);
curlx_unicodefree(in_w);
if(chars) {
char *mstr = curlx_convert_wchar_to_UTF8(punycode);
Expand All @@ -91,6 +92,27 @@ bool Curl_win32_idn_to_ascii(const char *in, char **out)
return success;
}

char *Curl_win32_ascii_to_idn(const char *in)
{
char *out = NULL;

wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
if(in_w) {
WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
IDN_MAX_LENGTH);
if(chars) {
/* 'chars' is "the number of characters retrieved" */
char *mstr = curlx_convert_wchar_to_UTF8(idn);
if(mstr) {
out = strdup(mstr);
curlx_unicodefree(mstr);
}
}
}
return out;
}

#endif /* USE_WIN32_IDN */

/*
Expand Down Expand Up @@ -144,6 +166,19 @@ static char *idn_decode(const char *input)
return decoded;
}

static char *idn_encode(const char *puny)
{
char *enc = NULL;
#ifdef USE_LIBIDN2
int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
if(rc != IDNA_SUCCESS)
return NULL;
#elif defined(USE_WIN32_IDN)
enc = Curl_win32_ascii_to_idn(puny);
#endif
return enc;
}

char *Curl_idn_decode(const char *input)
{
char *d = idn_decode(input);
Expand All @@ -157,6 +192,19 @@ char *Curl_idn_decode(const char *input)
return d;
}

char *Curl_idn_encode(const char *puny)
{
char *d = idn_encode(puny);
#ifdef USE_LIBIDN2
if(d) {
char *c = strdup(d);
idn2_free(d);
d = c;
}
#endif
return d;
}

/*
* Frees data allocated by idnconvert_hostname()
*/
Expand Down
2 changes: 2 additions & 0 deletions lib/idn.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@

#ifdef USE_WIN32_IDN
bool Curl_win32_idn_to_ascii(const char *in, char **out);
char *Curl_win32_ascii_to_idn(const char *in);
#endif /* USE_WIN32_IDN */
bool Curl_is_ASCII_name(const char *hostname);
CURLcode Curl_idnconvert_hostname(struct hostname *host);
#if defined(USE_LIBIDN2) || defined(USE_WIN32_IDN)
#define USE_IDN
void Curl_free_idnconverted_hostname(struct hostname *host);
char *Curl_idn_decode(const char *input);
char *Curl_idn_encode(const char *input);
#ifdef USE_LIBIDN2
#define Curl_idn_free(x) idn2_free(x)
#else
Expand Down
27 changes: 27 additions & 0 deletions lib/urlapi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1403,6 +1403,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
bool urldecode = (flags & CURLU_URLDECODE)?1:0;
bool urlencode = (flags & CURLU_URLENCODE)?1:0;
bool punycode = FALSE;
bool depunyfy = FALSE;
bool plusdecode = FALSE;
(void)flags;
if(!u)
Expand Down Expand Up @@ -1433,6 +1434,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
ptr = u->host;
ifmissing = CURLUE_NO_HOST;
punycode = (flags & CURLU_PUNYCODE)?1:0;
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
break;
case CURLUPART_ZONEID:
ptr = u->zoneid;
Expand Down Expand Up @@ -1483,6 +1485,7 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
char *port = u->port;
char *allochost = NULL;
punycode = (flags & CURLU_PUNYCODE)?1:0;
depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
if(u->scheme && strcasecompare("file", u->scheme)) {
url = aprintf("file://%s%s%s",
u->path,
Expand Down Expand Up @@ -1548,6 +1551,17 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
#endif
}
}
else if(depunyfy) {
if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
#ifndef USE_IDN
return CURLUE_LACKS_IDN;
#else
allochost = Curl_idn_encode(u->host);
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
#endif
}
}

url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
scheme,
Expand Down Expand Up @@ -1626,6 +1640,19 @@ CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
#endif
}
}
else if(depunyfy) {
if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
#ifndef USE_IDN
return CURLUE_LACKS_IDN;
#else
char *allochost = Curl_idn_encode(*part);
if(!allochost)
return CURLUE_OUT_OF_MEMORY;
free(*part);
*part = allochost;
#endif
}
}

return CURLUE_OK;
}
Expand Down
3 changes: 3 additions & 0 deletions tests/libtest/lib1560.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ static const struct testcase get_parts_list[] ={
{"https://räksmörgås.se",
"https | [11] | [12] | [13] | xn--rksmrgs-5wao1o.se | "
"[15] | / | [16] | [17]", 0, CURLU_PUNYCODE, CURLUE_OK},
{"https://xn--rksmrgs-5wao1o.se",
"https | [11] | [12] | [13] | räksmörgås.se | "
"[15] | / | [16] | [17]", 0, CURLU_PUNY2IDN, CURLUE_OK},
#else
{"https://räksmörgås.se",
"https | [11] | [12] | [13] | [30] | [15] | / | [16] | [17]",
Expand Down

0 comments on commit c350069

Please sign in to comment.