Skip to content

Commit f405ee4

Browse files
authored
Fixed PG error positions (#9229)
1 parent fc767cd commit f405ee4

File tree

4 files changed

+55
-10
lines changed

4 files changed

+55
-10
lines changed

ydb/library/yql/parser/pg_wrapper/parser.cpp

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,22 @@ void PGParse(const TString& input, IPGParseEvents& events) {
208208
};
209209

210210
if (parsetree_and_error.error) {
211-
TPosition position(1, 1);
212-
TTextWalker walker(position);
213-
size_t distance = Min(size_t(parsetree_and_error.error->cursorpos), input.Size());
214-
for (size_t i = 0; i < distance; ++i) {
215-
walker.Advance(input[i]);
211+
TPosition position(0, 1);
212+
// cursorpos is about codepoints, not bytes
213+
TTextWalker walker(position, true);
214+
auto cursorpos = parsetree_and_error.error->cursorpos;
215+
size_t codepoints = 0;
216+
if (cursorpos >= 0) {
217+
for (size_t i = 0; i < input.Size(); ++i) {
218+
if (codepoints == cursorpos) {
219+
break;
220+
}
221+
222+
if (!TTextWalker::IsUtf8Intermediate(input[i])) {
223+
++codepoints;
224+
}
225+
walker.Advance(input[i]);
226+
}
216227
}
217228

218229
events.OnError(TIssue(position, "ERROR: " + TString(parsetree_and_error.error->message) + "\n"));

ydb/library/yql/parser/pg_wrapper/ut/parser_ut.cpp

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ const TStringBuf ExpectedSelect1 = "({RAWSTMT :stmt {SELECTSTMT :distinctClause
2828

2929
const TString Error1 = "ERROR: syntax error at or near \"SELECT1\"\n";
3030

31-
Y_UNIT_TEST_SUITE(TWrapperTests) {
31+
Y_UNIT_TEST_SUITE(ParseTests) {
3232
Y_UNIT_TEST(TestOk) {
3333
TEvents events;
3434
PGParse(TString("SELECT 1"), events);
@@ -47,11 +47,34 @@ Y_UNIT_TEST_SUITE(TWrapperTests) {
4747
UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Row, 2);
4848
UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Column, 3);
4949
}
50+
51+
Y_UNIT_TEST(TestErrorPosUtf8) {
52+
{
53+
TEvents events;
54+
PGParse(TString("/* привет */SELECT1"), events);
55+
UNIT_ASSERT(!events.Result);
56+
UNIT_ASSERT(events.Issue);
57+
auto msg = events.Issue->GetMessage();
58+
UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Row, 1);
59+
UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Column, 13);
60+
}
61+
62+
{
63+
TEvents events;
64+
PGParse(TString("/* привет */\n\nSELECT1"), events);
65+
UNIT_ASSERT(!events.Result);
66+
UNIT_ASSERT(events.Issue);
67+
auto msg = events.Issue->GetMessage();
68+
UNIT_ASSERT_NO_DIFF(msg, Error1);
69+
UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Row, 3);
70+
UNIT_ASSERT_VALUES_EQUAL(events.Issue->Position.Column, 1);
71+
}
72+
}
5073
}
5174

5275
const ui32 threadsCount = 10;
5376

54-
Y_UNIT_TEST_SUITE(TMTWrapperTests) {
77+
Y_UNIT_TEST_SUITE(MTParseTests) {
5578
Y_UNIT_TEST(TestOk) {
5679
TVector<THolder<TThread>> threads;
5780
for (ui32 i = 0; i < threadsCount; ++i) {

ydb/library/yql/public/issue/yql_issue.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,13 +53,18 @@ TTextWalker& TTextWalker::Advance(char c) {
5353
return *this;
5454
}
5555

56+
ui32 charDistance = 1;
57+
if (Utf8Aware && IsUtf8Intermediate(c)) {
58+
charDistance = 0;
59+
}
60+
5661
// either not '\r' or second '\r'
5762
if (LfCount) {
5863
Position.Row += LfCount;
59-
Position.Column = 1;
64+
Position.Column = charDistance;
6065
LfCount = 0;
6166
} else {
62-
Position.Column += 1 + (HaveCr && c != '\r');
67+
Position.Column += charDistance + (HaveCr && c != '\r');
6368
}
6469
HaveCr = (c == '\r');
6570
return *this;

ydb/library/yql/public/issue/yql_issue.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,18 @@ struct TPosition {
5555

5656
class TTextWalker {
5757
public:
58-
TTextWalker(TPosition& position)
58+
TTextWalker(TPosition& position, bool utf8Aware = false)
5959
: Position(position)
60+
, Utf8Aware(utf8Aware)
6061
, HaveCr(false)
6162
, LfCount(0)
6263
{
6364
}
6465

66+
static inline bool IsUtf8Intermediate(char c) {
67+
return (c & 0xC0) == 0x80;
68+
}
69+
6570
template<typename T>
6671
TTextWalker& Advance(const T& buf) {
6772
for (char c : buf) {
@@ -74,6 +79,7 @@ class TTextWalker {
7479

7580
private:
7681
TPosition& Position;
82+
const bool Utf8Aware;
7783
bool HaveCr;
7884
ui32 LfCount;
7985
};

0 commit comments

Comments
 (0)