From 66c0a2ed96c0f40b7618234f9040f03b16dea116 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Sun, 26 Apr 2020 10:11:02 +0900 Subject: [PATCH] Concatenate adjacent string literals --- preprocess.c | 27 +++++++++++++++++++++++++++ test/string.c | 6 ++++++ test/test.h | 1 + 3 files changed, 34 insertions(+) diff --git a/preprocess.c b/preprocess.c index bfaaa4a5..d47fe2a4 100644 --- a/preprocess.c +++ b/preprocess.c @@ -919,6 +919,32 @@ static void init_macros(void) { add_builtin("__LINE__", line_macro); } +// Concatenate adjacent string literals into a single string literal +// as per the C spec. +static void join_adjacent_string_literals(Token *tok1) { + while (tok1->kind != TK_EOF) { + Token *tok2 = tok1->next; + + if (tok1->kind != TK_STR || tok2->kind != TK_STR) { + tok1 = tok1->next; + continue; + } + + Token *t = copy_token(tok1); + t->ty = array_of(tok1->ty->base, tok1->ty->array_len + tok2->ty->array_len - 1); + t->str = calloc(1, t->ty->size); + t->next = tok2->next; + + int i = 0; + for (int j = 0; j < tok1->ty->size - tok1->ty->base->size; i++, j++) + t->str[i] = tok1->str[j]; + for (int j = 0; j < tok2->ty->size; i++, j++) + t->str[i] = tok2->str[j]; + + *tok1 = *t; + } +} + // Entry point function of the preprocessor. Token *preprocess(Token *tok) { init_macros(); @@ -926,5 +952,6 @@ Token *preprocess(Token *tok) { if (cond_incl) error_tok(cond_incl->tok, "unterminated conditional directive"); convert_keywords(tok); + join_adjacent_string_literals(tok); return tok; } diff --git a/test/string.c b/test/string.c index 303e72b2..25f1988a 100644 --- a/test/string.c +++ b/test/string.c @@ -35,6 +35,12 @@ int main() { ASSERT(0, "\x00"[0]); ASSERT(119, "\x77"[0]); + ASSERT(7, sizeof("abc" "def")); + ASSERT(9, sizeof("abc" "d" "efgh")); + ASSERT(0, strcmp("abc" "d" "\nefgh", "abcd\nefgh")); + ASSERT(0, !strcmp("abc" "d", "abcd\nefgh")); + ASSERT(0, strcmp("\x9" "0", "\t0")); + printf("OK\n"); return 0; } diff --git a/test/test.h b/test/test.h index 3d5f5a94..b2eb5d7f 100644 --- a/test/test.h +++ b/test/test.h @@ -4,5 +4,6 @@ int assert(int expected, int actual, char *code); int printf(char *fmt, ...); int sprintf(char *buf, char *fmt, ...); int strcmp(char *p, char *q); +int strncmp(char *p, char *q, long n); int memcmp(char *p, char *q, long n); void exit(int n);