@@ -123,10 +123,25 @@ std::optional<int> getHexDigit(char c) {
123123 return {};
124124}
125125
126+ enum Sign { NoSign, Pos, Neg };
127+
126128// The result of lexing an integer token fragment.
127129struct LexIntResult : LexResult {
128130 uint64_t n;
129131 Sign sign;
132+
133+ template <typename T> bool isUnsigned () {
134+ static_assert (std::is_integral_v<T> && std::is_unsigned_v<T>);
135+ return sign == NoSign && n <= std::numeric_limits<T>::max ();
136+ }
137+
138+ template <typename T> bool isSigned () {
139+ static_assert (std::is_integral_v<T> && std::is_signed_v<T>);
140+ if (sign == Neg) {
141+ return uint64_t (std::numeric_limits<T>::min ()) <= n || n == 0 ;
142+ }
143+ return n <= uint64_t (std::numeric_limits<T>::max ());
144+ }
130145};
131146
132147// Lexing context that accumulates lexed input to produce an integer token
@@ -887,123 +902,6 @@ std::optional<LexResult> keyword(std::string_view in) {
887902
888903} // anonymous namespace
889904
890- template <typename T> std::optional<T> Token::getU () const {
891- static_assert (std::is_integral_v<T> && std::is_unsigned_v<T>);
892- if (auto * tok = std::get_if<IntTok>(&data)) {
893- if (tok->sign == NoSign && tok->n <= std::numeric_limits<T>::max ()) {
894- return T (tok->n );
895- }
896- // TODO: Add error production for unsigned overflow.
897- }
898- return {};
899- }
900-
901- template <typename T> std::optional<T> Token::getS () const {
902- static_assert (std::is_integral_v<T> && std::is_signed_v<T>);
903- if (auto * tok = std::get_if<IntTok>(&data)) {
904- if (tok->sign == Neg) {
905- if (uint64_t (std::numeric_limits<T>::min ()) <= tok->n || tok->n == 0 ) {
906- return T (tok->n );
907- }
908- } else {
909- if (tok->n <= uint64_t (std::numeric_limits<T>::max ())) {
910- return T (tok->n );
911- }
912- }
913- }
914- return {};
915- }
916-
917- template <typename T> std::optional<T> Token::getI () const {
918- static_assert (std::is_integral_v<T> && std::is_unsigned_v<T>);
919- if (auto n = getU<T>()) {
920- return *n;
921- }
922- if (auto n = getS<std::make_signed_t <T>>()) {
923- return T (*n);
924- }
925- return {};
926- }
927-
928- template std::optional<uint64_t > Token::getU<uint64_t >() const ;
929- template std::optional<int64_t > Token::getS<int64_t >() const ;
930- template std::optional<uint64_t > Token::getI<uint64_t >() const ;
931- template std::optional<uint32_t > Token::getU<uint32_t >() const ;
932- template std::optional<int32_t > Token::getS<int32_t >() const ;
933- template std::optional<uint32_t > Token::getI<uint32_t >() const ;
934- template std::optional<uint16_t > Token::getU<uint16_t >() const ;
935- template std::optional<int16_t > Token::getS<int16_t >() const ;
936- template std::optional<uint16_t > Token::getI<uint16_t >() const ;
937- template std::optional<uint8_t > Token::getU<uint8_t >() const ;
938- template std::optional<int8_t > Token::getS<int8_t >() const ;
939- template std::optional<uint8_t > Token::getI<uint8_t >() const ;
940-
941- std::optional<double > Token::getF64 () const {
942- constexpr int signif = 52 ;
943- constexpr uint64_t payloadMask = (1ull << signif) - 1 ;
944- constexpr uint64_t nanDefault = 1ull << (signif - 1 );
945- if (auto * tok = std::get_if<FloatTok>(&data)) {
946- double d = tok->d ;
947- if (std::isnan (d)) {
948- // Inject payload.
949- uint64_t payload = tok->nanPayload ? *tok->nanPayload : nanDefault;
950- if (payload == 0 || payload > payloadMask) {
951- // TODO: Add error production for out-of-bounds payload.
952- return {};
953- }
954- uint64_t bits;
955- static_assert (sizeof (bits) == sizeof (d));
956- memcpy (&bits, &d, sizeof (bits));
957- bits = (bits & ~payloadMask) | payload;
958- memcpy (&d, &bits, sizeof (bits));
959- }
960- return d;
961- }
962- if (auto * tok = std::get_if<IntTok>(&data)) {
963- if (tok->sign == Neg) {
964- if (tok->n == 0 ) {
965- return -0.0 ;
966- }
967- return double (int64_t (tok->n ));
968- }
969- return double (tok->n );
970- }
971- return {};
972- }
973-
974- std::optional<float > Token::getF32 () const {
975- constexpr int signif = 23 ;
976- constexpr uint32_t payloadMask = (1u << signif) - 1 ;
977- constexpr uint64_t nanDefault = 1ull << (signif - 1 );
978- if (auto * tok = std::get_if<FloatTok>(&data)) {
979- float f = tok->d ;
980- if (std::isnan (f)) {
981- // Validate and inject payload.
982- uint64_t payload = tok->nanPayload ? *tok->nanPayload : nanDefault;
983- if (payload == 0 || payload > payloadMask) {
984- // TODO: Add error production for out-of-bounds payload.
985- return {};
986- }
987- uint32_t bits;
988- static_assert (sizeof (bits) == sizeof (f));
989- memcpy (&bits, &f, sizeof (bits));
990- bits = (bits & ~payloadMask) | payload;
991- memcpy (&f, &bits, sizeof (bits));
992- }
993- return f;
994- }
995- if (auto * tok = std::get_if<IntTok>(&data)) {
996- if (tok->sign == Neg) {
997- if (tok->n == 0 ) {
998- return -0 .0f ;
999- }
1000- return float (int64_t (tok->n ));
1001- }
1002- return float (tok->n );
1003- }
1004- return {};
1005- }
1006-
1007905void Lexer::skipSpace () {
1008906 while (true ) {
1009907 if (auto ctx = annotation (next ())) {
@@ -1020,9 +918,6 @@ void Lexer::skipSpace() {
1020918}
1021919
1022920bool Lexer::takeLParen () {
1023- if (curr) {
1024- return false ;
1025- }
1026921 if (LexCtx (next ()).startsWith (" (" sv)) {
1027922 ++index;
1028923 advance ();
@@ -1032,9 +927,6 @@ bool Lexer::takeLParen() {
1032927}
1033928
1034929bool Lexer::takeRParen () {
1035- if (curr) {
1036- return false ;
1037- }
1038930 if (LexCtx (next ()).startsWith (" )" sv)) {
1039931 ++index;
1040932 advance ();
@@ -1044,9 +936,6 @@ bool Lexer::takeRParen() {
1044936}
1045937
1046938std::optional<std::string> Lexer::takeString () {
1047- if (curr) {
1048- return std::nullopt ;
1049- }
1050939 if (auto result = str (next ())) {
1051940 index += result->span .size ();
1052941 advance ();
@@ -1060,9 +949,6 @@ std::optional<std::string> Lexer::takeString() {
1060949}
1061950
1062951std::optional<Name> Lexer::takeID () {
1063- if (curr) {
1064- return std::nullopt ;
1065- }
1066952 if (auto result = ident (next ())) {
1067953 index += result->span .size ();
1068954 advance ();
@@ -1080,9 +966,6 @@ std::optional<Name> Lexer::takeID() {
1080966}
1081967
1082968std::optional<std::string_view> Lexer::takeKeyword () {
1083- if (curr) {
1084- return std::nullopt ;
1085- }
1086969 if (auto result = keyword (next ())) {
1087970 index += result->span .size ();
1088971 advance ();
@@ -1130,20 +1013,124 @@ std::optional<uint32_t> Lexer::takeAlign() {
11301013 return std::nullopt ;
11311014}
11321015
1133- void Lexer::lexToken () {
1134- // TODO: Ensure we're getting the longest possible match.
1135- Token tok;
1136- if (auto t = integer (next ())) {
1137- tok = Token{t->span , IntTok{t->n , t->sign }};
1138- } else if (auto t = float_ (next ())) {
1139- tok = Token{t->span , FloatTok{t->nanPayload , t->d }};
1140- } else {
1141- // TODO: Do something about lexing errors.
1142- curr = std::nullopt ;
1143- return ;
1016+ template <typename T> std::optional<T> Lexer::takeU () {
1017+ static_assert (std::is_integral_v<T> && std::is_unsigned_v<T>);
1018+ if (auto result = integer (next ()); result && result->isUnsigned <T>()) {
1019+ index += result->span .size ();
1020+ advance ();
1021+ return T (result->n );
1022+ }
1023+ // TODO: Add error production for unsigned overflow.
1024+ return std::nullopt ;
1025+ }
1026+
1027+ template <typename T> std::optional<T> Lexer::takeS () {
1028+ static_assert (std::is_integral_v<T> && std::is_signed_v<T>);
1029+ if (auto result = integer (next ()); result && result->isSigned <T>()) {
1030+ index += result->span .size ();
1031+ advance ();
1032+ return T (result->n );
1033+ }
1034+ return std::nullopt ;
1035+ }
1036+
1037+ template <typename T> std::optional<T> Lexer::takeI () {
1038+ static_assert (std::is_integral_v<T> && std::is_unsigned_v<T>);
1039+ if (auto result = integer (next ())) {
1040+ if (result->isUnsigned <T>() || result->isSigned <std::make_signed_t <T>>()) {
1041+ index += result->span .size ();
1042+ advance ();
1043+ return T (result->n );
1044+ }
11441045 }
1145- index += tok.span .size ();
1146- curr = {tok};
1046+ return std::nullopt ;
1047+ }
1048+
1049+ template std::optional<uint64_t > Lexer::takeU<uint64_t >();
1050+ template std::optional<int64_t > Lexer::takeS<int64_t >();
1051+ template std::optional<uint64_t > Lexer::takeI<uint64_t >();
1052+ template std::optional<uint32_t > Lexer::takeU<uint32_t >();
1053+ template std::optional<int32_t > Lexer::takeS<int32_t >();
1054+ template std::optional<uint32_t > Lexer::takeI<uint32_t >();
1055+ template std::optional<uint16_t > Lexer::takeU<uint16_t >();
1056+ template std::optional<int16_t > Lexer::takeS<int16_t >();
1057+ template std::optional<uint16_t > Lexer::takeI<uint16_t >();
1058+ template std::optional<uint8_t > Lexer::takeU<uint8_t >();
1059+ template std::optional<int8_t > Lexer::takeS<int8_t >();
1060+ template std::optional<uint8_t > Lexer::takeI<uint8_t >();
1061+
1062+ std::optional<double > Lexer::takeF64 () {
1063+ constexpr int signif = 52 ;
1064+ constexpr uint64_t payloadMask = (1ull << signif) - 1 ;
1065+ constexpr uint64_t nanDefault = 1ull << (signif - 1 );
1066+ if (auto result = float_ (next ())) {
1067+ double d = result->d ;
1068+ if (std::isnan (d)) {
1069+ // Inject payload.
1070+ uint64_t payload = result->nanPayload ? *result->nanPayload : nanDefault;
1071+ if (payload == 0 || payload > payloadMask) {
1072+ // TODO: Add error production for out-of-bounds payload.
1073+ return std::nullopt ;
1074+ }
1075+ uint64_t bits;
1076+ static_assert (sizeof (bits) == sizeof (d));
1077+ memcpy (&bits, &d, sizeof (bits));
1078+ bits = (bits & ~payloadMask) | payload;
1079+ memcpy (&d, &bits, sizeof (bits));
1080+ }
1081+ index += result->span .size ();
1082+ advance ();
1083+ return d;
1084+ }
1085+ if (auto result = integer (next ())) {
1086+ index += result->span .size ();
1087+ advance ();
1088+ if (result->sign == Neg) {
1089+ if (result->n == 0 ) {
1090+ return -0.0 ;
1091+ }
1092+ return double (int64_t (result->n ));
1093+ }
1094+ return double (result->n );
1095+ }
1096+ return std::nullopt ;
1097+ }
1098+
1099+ std::optional<float > Lexer::takeF32 () {
1100+ constexpr int signif = 23 ;
1101+ constexpr uint32_t payloadMask = (1u << signif) - 1 ;
1102+ constexpr uint64_t nanDefault = 1ull << (signif - 1 );
1103+ if (auto result = float_ (next ())) {
1104+ float f = result->d ;
1105+ if (std::isnan (f)) {
1106+ // Validate and inject payload.
1107+ uint64_t payload = result->nanPayload ? *result->nanPayload : nanDefault;
1108+ if (payload == 0 || payload > payloadMask) {
1109+ // TODO: Add error production for out-of-bounds payload.
1110+ return std::nullopt ;
1111+ }
1112+ uint32_t bits;
1113+ static_assert (sizeof (bits) == sizeof (f));
1114+ memcpy (&bits, &f, sizeof (bits));
1115+ bits = (bits & ~payloadMask) | payload;
1116+ memcpy (&f, &bits, sizeof (bits));
1117+ }
1118+ index += result->span .size ();
1119+ advance ();
1120+ return f;
1121+ }
1122+ if (auto result = integer (next ())) {
1123+ index += result->span .size ();
1124+ advance ();
1125+ if (result->sign == Neg) {
1126+ if (result->n == 0 ) {
1127+ return -0 .0f ;
1128+ }
1129+ return float (int64_t (result->n ));
1130+ }
1131+ return float (result->n );
1132+ }
1133+ return std::nullopt ;
11471134}
11481135
11491136TextPos Lexer::position (const char * c) const {
@@ -1164,52 +1151,8 @@ bool TextPos::operator==(const TextPos& other) const {
11641151 return line == other.line && col == other.col ;
11651152}
11661153
1167- bool IntTok::operator ==(const IntTok& other) const {
1168- return n == other.n && sign == other.sign ;
1169- }
1170-
1171- bool FloatTok::operator ==(const FloatTok& other) const {
1172- return std::signbit (d) == std::signbit (other.d ) &&
1173- (d == other.d || (std::isnan (d) && std::isnan (other.d ) &&
1174- nanPayload == other.nanPayload ));
1175- }
1176-
1177- bool Token::operator ==(const Token& other) const {
1178- return span == other.span &&
1179- std::visit (
1180- [](auto & t1, auto & t2) {
1181- if constexpr (std::is_same_v<decltype (t1), decltype (t2)>) {
1182- return t1 == t2;
1183- } else {
1184- return false ;
1185- }
1186- },
1187- data,
1188- other.data );
1189- }
1190-
11911154std::ostream& operator <<(std::ostream& os, const TextPos& pos) {
11921155 return os << pos.line << " :" << pos.col ;
11931156}
11941157
1195- std::ostream& operator <<(std::ostream& os, const IntTok& tok) {
1196- return os << (tok.sign == Pos ? " +" : tok.sign == Neg ? " -" : " " ) << tok.n ;
1197- }
1198-
1199- std::ostream& operator <<(std::ostream& os, const FloatTok& tok) {
1200- if (std::isnan (tok.d )) {
1201- os << (std::signbit (tok.d ) ? " +" : " -" );
1202- if (tok.nanPayload ) {
1203- return os << " nan:0x" << std::hex << *tok.nanPayload << std::dec;
1204- }
1205- return os << " nan" ;
1206- }
1207- return os << tok.d ;
1208- }
1209-
1210- std::ostream& operator <<(std::ostream& os, const Token& tok) {
1211- std::visit ([&](const auto & t) { os << t; }, tok.data );
1212- return os << " \" " << tok.span << " \" " ;
1213- }
1214-
12151158} // namespace wasm::WATParser
0 commit comments