diff --git a/projects/10/jackanalyzer/main.cpp b/projects/10/jackanalyzer/main.cpp index 8224e3c..9b921de 100644 --- a/projects/10/jackanalyzer/main.cpp +++ b/projects/10/jackanalyzer/main.cpp @@ -15,18 +15,16 @@ ABSL_FLAG(bool, v, false, "verbose output, print XML output to console"); void Tokenize(std::filesystem::path jack_path) { LOG(INFO) << "Processing Jack file: " << jack_path.string(); - nand2tetris::JackFile jack_file(jack_path.string()); std::filesystem::path xml_path = jack_path; xml_path.replace_filename( absl::StrFormat("%sT.xml", jack_path.stem().string())); nand2tetris::TokensXmlFile xml_file(xml_path.string()); - while (jack_file.token()) { + for (auto& token : nand2tetris::Tokenize(jack_path.string())) { if (absl::GetFlag(FLAGS_v)) { - LOG(INFO) << jack_file.token()->ToXmlElement(); + LOG(INFO) << token->ToXmlElement(); } - xml_file << *jack_file.token(); - jack_file.Advance(); + xml_file << *token; } } diff --git a/projects/10/jackanalyzer/tokenizer.cpp b/projects/10/jackanalyzer/tokenizer.cpp index f604de1..76e78e5 100644 --- a/projects/10/jackanalyzer/tokenizer.cpp +++ b/projects/10/jackanalyzer/tokenizer.cpp @@ -1,8 +1,11 @@ #include "tokenizer.h" +#include #include +#include #include #include +#include #include "absl/log/check.h" #include "absl/log/log.h" @@ -43,106 +46,99 @@ std::string Token::ToXmlElement() const { type_str); } -JackFile::JackFile(std::string_view path) : file_(path.data()), path_(path) { - QCHECK(file_.is_open()) << "Failed to open file: " << path; - Advance(); -} - -void JackFile::Advance() { - if (token_) { - delete token_; - token_ = nullptr; - } - - while (file_) { - // Ignore preceding spaces in stream. - while (absl::ascii_isspace(file_.peek())) { - file_.get(); - } - if (file_.peek() == std::char_traits::eof()) { - file_.get(); - return; - } - - if (file_.peek() == '/' && ReadComment()) { - continue; - } - - if (absl::ascii_isdigit(file_.peek())) { - ReadIntegerConstant(); - return; - } - if (file_.peek() == '"') { - ReadStringConstant(); - return; - } - if (Token::IsSymbol(file_.peek())) { - token_ = new Token(TokenType::kSymbol, std::string(1, file_.get())); - return; - } - if (absl::ascii_isalpha(file_.peek()) || file_.peek() == '_') { - ReadKeywordOrIdentifier(); - return; - } - LOG(FATAL) << "Invalid character: " << static_cast(file_.peek()); - } -} +namespace { -bool JackFile::ReadComment() { - CHECK(file_.get() == '/'); - if (file_.peek() == '/') { // Single-line comment. - file_.ignore(std::numeric_limits::max(), '\n'); +bool ReadComment(std::ifstream& file) { + CHECK(file.get() == '/'); + if (file.peek() == '/') { // Single-line comment. + file.ignore(std::numeric_limits::max(), '\n'); return true; - } else if (file_.peek() == '*') { // Multi-line comment. - file_.get(); + } else if (file.peek() == '*') { // Multi-line comment. + file.get(); char c; - while (file_.get(c)) { - if (c == '*' && file_.peek() == '/') { - file_.get(); + while (file.get(c)) { + if (c == '*' && file.peek() == '/') { + file.get(); break; } } - QCHECK(file_) << "Unterminated multi-line comment"; + QCHECK(file) << "Unterminated multi-line comment"; return true; } - file_.unget(); + file.unget(); return false; } -void JackFile::ReadIntegerConstant() { +std::unique_ptr ReadIntegerConstant(std::ifstream& file) { std::string value; - while (absl::ascii_isdigit(file_.peek())) { - value.push_back(file_.get()); + while (absl::ascii_isdigit(file.peek())) { + value.push_back(file.get()); } QCHECK(!value.empty()); - token_ = new Token(TokenType::kIntegerConstant, std::move(value)); + return std::make_unique(TokenType::kIntegerConstant, std::move(value)); } -void JackFile::ReadStringConstant() { - CHECK(file_.get() == '"'); +std::unique_ptr ReadStringConstant(std::ifstream& file) { + CHECK(file.get() == '"'); std::string value; char c; - while (file_.get(c)) { + while (file.get(c)) { if (c == '"') { break; } value.push_back(c); } - QCHECK(file_) << "Unterminated string constant"; - token_ = new Token(TokenType::kStringConstant, std::move(value)); + QCHECK(file) << "Unterminated string constant"; + return std::make_unique(TokenType::kStringConstant, std::move(value)); } -void JackFile::ReadKeywordOrIdentifier() { +std::unique_ptr ReadKeywordOrIdentifier(std::ifstream& file) { std::string value; - while (absl::ascii_isalnum(file_.peek()) || file_.peek() == '_') { - value.push_back(file_.get()); + while (absl::ascii_isalnum(file.peek()) || file.peek() == '_') { + value.push_back(file.get()); } QCHECK(!value.empty()); if (Token::IsKeyword(value)) { - token_ = new Token(TokenType::kKeyword, std::move(value)); + return std::make_unique(TokenType::kKeyword, std::move(value)); } else { - token_ = new Token(TokenType::kIdentifier, std::move(value)); + return std::make_unique(TokenType::kIdentifier, std::move(value)); + } +} + +} // namespace + +std::vector> Tokenize(std::string_view jack_file_path) { + std::ifstream file(jack_file_path.data()); + QCHECK(file.is_open()) << "Failed to open file: " << jack_file_path; + std::vector> tokens; + while (file) { + // Ignore preceding spaces in stream. + while (absl::ascii_isspace(file.peek())) { + file.get(); + } + if (file.peek() == std::char_traits::eof()) { + break; + } + + if (file.peek() == '/' && ReadComment(file)) { + continue; + } + + if (absl::ascii_isdigit(file.peek())) { + tokens.push_back(ReadIntegerConstant(file)); + } else if (file.peek() == '"') { + tokens.push_back(ReadStringConstant(file)); + } else if (Token::IsSymbol(file.peek())) { + tokens.push_back(std::make_unique(TokenType::kSymbol, + std::string(1, file.get()))); + } else if (absl::ascii_isalpha(file.peek()) || file.peek() == '_') { + tokens.push_back(ReadKeywordOrIdentifier(file)); + } else { + LOG(FATAL) << "Invalid character: " << static_cast(file.peek()); + } } + file.close(); + return tokens; } } // namespace nand2tetris diff --git a/projects/10/jackanalyzer/tokenizer.h b/projects/10/jackanalyzer/tokenizer.h index e78527c..91e6d5e 100644 --- a/projects/10/jackanalyzer/tokenizer.h +++ b/projects/10/jackanalyzer/tokenizer.h @@ -2,8 +2,10 @@ #define NAND2TETRIS_JACKANALYZER_TOKENIZER_H_ #include +#include #include #include +#include namespace nand2tetris { @@ -46,24 +48,7 @@ class Token { std::string value_; }; -class JackFile { - public: - JackFile(std::string_view path); - - void Advance(); - - Token* token() const { return token_; } - - private: - bool ReadComment(); - void ReadIntegerConstant(); - void ReadStringConstant(); - void ReadKeywordOrIdentifier(); - - std::ifstream file_; - std::string path_; - Token* token_ = nullptr; -}; +std::vector> Tokenize(std::string_view jack_file_path); } // namespace nand2tetris