integrated-application-development · fourls · Jul 22, 2025 · cirras · Jul 25, 2025 · cirras
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Added
+
+- Full support for the `HIGHCHARUNICODE` compiler directive.
+
 ## [1.17.2] - 2025-07-03
 
 ### Fixed

diff --git a/...i-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java b/...i-frontend/src/main/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImpl.java
@@ -20,6 +20,8 @@
 
 import au.com.integradev.delphi.antlr.ast.visitors.DelphiParserVisitor;
 import au.com.integradev.delphi.preprocessor.TextBlockLineEndingMode;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
 import java.util.ArrayDeque;
 import java.util.Deque;
 import java.util.stream.Collectors;
@@ -28,6 +30,7 @@
 import org.apache.commons.lang3.Strings;
 import org.sonar.plugins.communitydelphi.api.ast.DelphiNode;
 import org.sonar.plugins.communitydelphi.api.ast.TextLiteralNode;
+import org.sonar.plugins.communitydelphi.api.directive.SwitchDirective.SwitchKind;
 import org.sonar.plugins.communitydelphi.api.token.DelphiTokenType;
 import org.sonar.plugins.communitydelphi.api.type.IntrinsicType;
 import org.sonar.plugins.communitydelphi.api.type.Type;
@@ -167,26 +170,38 @@ private String createSingleLineValue() {
     return imageBuilder.toString();
   }
 
-  private static char characterEscapeToChar(String image) {
+  private boolean isHighCharUnicode() {
+    return getAst()
+        .getDelphiFile()
+        .getCompilerSwitchRegistry()
+        .isActiveSwitch(SwitchKind.HIGHCHARUNICODE, getTokenIndex());
+  }
+
+  public Charset getAnsiCharset() {
+    return Charset.forName(System.getProperty("native.encoding"));
+  }
+
+  private char characterEscapeToChar(String image) {
     image = image.substring(1);
     int radix = 10;
 
-    switch (image.charAt(0)) {
-      case '$':
-        radix = 16;
-        image = image.substring(1);
-        break;
-      case '%':
-        radix = 2;
-        image = image.substring(1);
-        break;
-      default:
-        // do nothing
+    if (image.charAt(0) == '$') {
+      radix = 16;
+      image = image.substring(1);
     }
 
     image = StringUtils.remove(image, '_');
+    char character = (char) Integer.parseInt(image, radix);
 
-    return (char) Integer.parseInt(image, radix);
+    if (isHighCharUnicode() || character > 255) {
+      // With HIGHCHARUNICODE ON, all escapes are interpreted as UTF-16.
+      // Escapes above 255 are always interpreted as UTF-16.
+      return character;
+    } else {
+      // With HIGHCHARUNICODE OFF, escapes between 0-255 are interpreted in the system code page.
+      var buffer = ByteBuffer.allocate(1).put((byte) character).flip();
+      return getAnsiCharset().decode(buffer).get();
+    }
   }
 
   @Override

diff --git a/...ontend/src/test/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImplTest.java b/...ontend/src/test/java/au/com/integradev/delphi/antlr/ast/node/TextLiteralNodeImplTest.java
@@ -20,17 +20,24 @@
 
 import static org.assertj.core.api.Assertions.assertThat;
 import static org.mockito.ArgumentMatchers.anyInt;
+import static org.mockito.ArgumentMatchers.eq;
 import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.spy;
 import static org.mockito.Mockito.when;
 
 import au.com.integradev.delphi.antlr.DelphiLexer;
 import au.com.integradev.delphi.antlr.ast.DelphiAstImpl;
 import au.com.integradev.delphi.file.DelphiFile;
+import au.com.integradev.delphi.preprocessor.CompilerSwitchRegistry;
 import au.com.integradev.delphi.preprocessor.TextBlockLineEndingMode;
 import au.com.integradev.delphi.preprocessor.TextBlockLineEndingModeRegistry;
+import java.nio.charset.Charset;
 import org.antlr.runtime.CommonToken;
 import org.junit.jupiter.api.Test;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
 import org.sonar.plugins.communitydelphi.api.ast.DelphiNode;
+import org.sonar.plugins.communitydelphi.api.directive.SwitchDirective.SwitchKind;
 
 class TextLiteralNodeImplTest {
   @Test
@@ -59,22 +66,45 @@ void testMultilineImage() {
     assertThat(node.isMultiline()).isTrue();
   }
 
-  @Test
-  void testGetImageWithCharacterEscapes() {
-    TextLiteralNodeImpl node = new TextLiteralNodeImpl(DelphiLexer.TkTextLiteral);
+  @ParameterizedTest(name = "HIGHCHARUNICODE = {0}")
+  @ValueSource(booleans = {true, false})
+  void testGetImageWithCharacterEscapes(boolean highCharUnicode) {
+    var registry = mock(CompilerSwitchRegistry.class);
+    when(registry.isActiveSwitch(eq(SwitchKind.HIGHCHARUNICODE), anyInt()))
+        .thenReturn(highCharUnicode);
+    var file = mock(DelphiFile.class);
+    when(file.getCompilerSwitchRegistry()).thenReturn(registry);
+    var ast = mock(DelphiAstImpl.class);
+    when(ast.getDelphiFile()).thenReturn(file);
+
+    TextLiteralNodeImpl node = spy(new TextLiteralNodeImpl(DelphiLexer.TkTextLiteral));
+    when(node.getAnsiCharset()).thenReturn(Charset.forName("windows-1252"));
+    node.setParent(ast);
+
     node.addChild(createNode(DelphiLexer.TkQuotedString, "'F'"));
     node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#111"));
     node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#111"));
     node.addChild(createNode(DelphiLexer.TkQuotedString, "'B'"));
     node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$61"));
     node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$72"));
     node.addChild(createNode(DelphiLexer.TkQuotedString, "'B'"));
-    node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#%01100001"));
-    node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#%01111010"));
+    node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$80"));
+    node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$98"));
+    node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$A3"));
+    node.addChild(createNode(DelphiLexer.TkCharacterEscapeCode, "#$20AC"));
+    node.addChild(createNode(DelphiLexer.TkQuotedString, "'az'"));
 
-    assertThat(node.getImage()).isEqualTo("'F'#111#111'B'#$61#$72'B'#%01100001#%01111010");
-    assertThat(node.getValue()).isEqualTo(node.getImageWithoutQuotes()).isEqualTo("FooBarBaz");
     assertThat(node.isMultiline()).isFalse();
+    assertThat(node.getImage()).isEqualTo("'F'#111#111'B'#$61#$72'B'#$80#$98#$A3#$20AC'az'");
+    if (highCharUnicode) {
+      assertThat(node.getValue())
+          .isEqualTo(node.getImageWithoutQuotes())
+          .isEqualTo("FooBarB\u0080\u0098£€az");
+    } else {
+      assertThat(node.getValue())
+          .isEqualTo(node.getImageWithoutQuotes())
+          .isEqualTo("FooBarB€˜£€az");
+    }
   }
 
   @Test