Skip to content

Commit

Permalink
added handling of \x.
Browse files Browse the repository at this point in the history
  • Loading branch information
kramm committed Sep 22, 2004
1 parent fe997bf commit e8458c3
Showing 1 changed file with 81 additions and 8 deletions.
89 changes: 81 additions & 8 deletions src/parser.lex
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,55 @@ static void count(char*text, int len, int condition)

static char*prefix = 0;

static char utf8buf[16];
static char* getUTF8(unsigned int charnum)
{
memset(utf8buf, 0, sizeof(utf8buf));

if(charnum < 0x80) {
utf8buf[0] = charnum;
return utf8buf;
} else if(charnum <0x800) {
/* 0000 0080-0000 07FF 110xxxxx 10xxxxxx */
utf8buf[0] = 0xc0 | (charnum >> 6);
utf8buf[1] = 0x80 | (charnum & 0x3f);
return utf8buf;
} else if(charnum < 0x10000) {
/* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx */
utf8buf[0] = 0xe0 | (charnum >> 12);
utf8buf[1] = 0x80 |((charnum >> 6)&0x3f);
utf8buf[2] = 0x80 |((charnum )&0x3f);
return utf8buf;
} else if(charnum < 0x200000) {
/* 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
utf8buf[0] = 0xf0 | (charnum >> 18);
utf8buf[1] = 0x80 |((charnum >> 12)&0x3f);
utf8buf[2] = 0x80 |((charnum >> 6 )&0x3f);
utf8buf[3] = 0x80 |((charnum )&0x3f);
return utf8buf;
} else if(charnum < 0x4000000) {
/* 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx */
utf8buf[0] = 0xf8 | (charnum >> 24);
utf8buf[1] = 0x80 |((charnum >> 18)&0x3f);
utf8buf[2] = 0x80 |((charnum >> 12)&0x3f);
utf8buf[3] = 0x80 |((charnum >> 6 )&0x3f);
utf8buf[4] = 0x80 |((charnum )&0x3f);
return utf8buf;
} else if(charnum < 0x80000000) {
/* 0400 0000-7FFF FFFF 1111110x 10xxxxxx ... 10xxxxxx */
utf8buf[0] = 0xfc | (charnum >> 30);
utf8buf[1] = 0x80 |((charnum >> 24)&0x3f);
utf8buf[2] = 0x80 |((charnum >> 18)&0x3f);
utf8buf[3] = 0x80 |((charnum >> 12)&0x3f);
utf8buf[4] = 0x80 |((charnum >> 6 )&0x3f);
utf8buf[5] = 0x80 |((charnum )&0x3f);
return utf8buf;
} else {
fprintf(stderr, "Illegal character: 0x%08x\n", charnum);
return utf8buf;
}
}

static void unescapeString(string_t * tmp)
{
char *p, *p1;
Expand All @@ -39,19 +88,43 @@ static void unescapeString(string_t * tmp)

for (p1=tmp->str; (p=strchr(p1, '\\')); p1 = p+1)
{
int nr=2;
int new=1;
switch(p[1])
{
case '\\': p[1] = '\\'; tmp->len--; break;
case '"': p[1] = '"'; tmp->len--; break;
case 'b': p[1] = '\b'; tmp->len--; break;
case 'f': p[1] = '\f'; tmp->len--; break;
case 'n': p[1] = '\n'; tmp->len--; break;
case 'r': p[1] = '\r'; tmp->len--; break;
case 't': p[1] = '\t'; tmp->len--; break;
case '\\': p[0] = '\\'; break;
case '"': p[0] = '"'; break;
case 'b': p[0] = '\b'; break;
case 'f': p[0] = '\f'; break;
case 'n': p[0] = '\n'; break;
case 'r': p[0] = '\r'; break;
case 't': p[0] = '\t'; break;
case 'x': {
int num=0;
while(strchr("0123456789abcdefABCDEF", p[nr])) {
num <<= 4;
if(p[nr]>='0' && p[nr]<='9') num |= p[nr] - '0';
if(p[nr]>='a' && p[nr]<='f') num |= p[nr] - 'a' + 10;
if(p[nr]>='A' && p[nr]<='F') num |= p[nr] - 'A' + 10;
nr++;
}
char*utf8 = getUTF8(num);
new = strlen(utf8);

memcpy(p, utf8, new); // do not copy the terminating zero
break;
}
default:
continue;
}
strcpy(p, p+1);
tmp->len -= (nr-new);
int t;
char*to=p+new,*from=p+nr;
while(*from) {
*to = *from;
to++;
from++;
}
}
}

Expand Down

0 comments on commit e8458c3

Please sign in to comment.