From 3701d26ac233afcb2314ec5affaaca32e3e82bc0 Mon Sep 17 00:00:00 2001 From: Thomas Brady Date: Wed, 25 Sep 2024 14:11:03 -0700 Subject: [PATCH] WIP append to trusted hash file --- src/catchup/CatchupWork.cpp | 7 +- src/catchup/VerifyLedgerChainWork.cpp | 16 +++ src/catchup/VerifyLedgerChainWork.h | 5 + src/history/test/HistoryTests.cpp | 4 +- .../WriteVerifiedCheckpointHashesWork.cpp | 116 +++++++++++++++--- .../WriteVerifiedCheckpointHashesWork.h | 9 +- src/historywork/test/HistoryWorkTests.cpp | 50 ++++++++ src/main/CommandLine.cpp | 17 ++- 8 files changed, 199 insertions(+), 25 deletions(-) diff --git a/src/catchup/CatchupWork.cpp b/src/catchup/CatchupWork.cpp index 760c15436c..ddc56e79dc 100644 --- a/src/catchup/CatchupWork.cpp +++ b/src/catchup/CatchupWork.cpp @@ -175,9 +175,12 @@ CatchupWork::downloadVerifyLedgerChain(CatchupRange const& catchupRange, auto fatalFailurePromise = std::promise(); mFatalFailureFuture = fatalFailurePromise.get_future().share(); + // TODO maybe we can set maxPrevVerified.first = verifyRange.first, the issue is + // whether we have a trusted hash in this context. + LedgerNumHashPair maxPrevVerified {LedgerManager::GENESIS_LEDGER_SEQ, Hash{}}; + mVerifyLedgers = std::make_shared( - mApp, *mDownloadDir, verifyRange, mLastClosedLedgerHashPair, - mRangeEndFuture, std::move(fatalFailurePromise)); + mApp, *mDownloadDir, verifyRange, mLastClosedLedgerHashPair, maxPrevVerified, mRangeEndFuture, std::move(fatalFailurePromise)); // Never retry the sequence: downloads already have retries, and there's no // point retrying verification diff --git a/src/catchup/VerifyLedgerChainWork.cpp b/src/catchup/VerifyLedgerChainWork.cpp index 3f46f67fc8..7c52562c27 100644 --- a/src/catchup/VerifyLedgerChainWork.cpp +++ b/src/catchup/VerifyLedgerChainWork.cpp @@ -107,6 +107,7 @@ trySetFuture(std::promise& promise, T value) VerifyLedgerChainWork::VerifyLedgerChainWork( Application& app, TmpDir const& downloadDir, LedgerRange const& range, LedgerNumHashPair const& lastClosedLedger, + LedgerNumHashPair const& maxPrevVerified, std::shared_future trustedMaxLedger, std::promise&& fatalFailure, std::shared_ptr outputStream) @@ -118,6 +119,7 @@ VerifyLedgerChainWork::VerifyLedgerChainWork( : mApp.getHistoryManager().checkpointContainingLedger( mRange.last())) , mLastClosed(lastClosedLedger) + , mMaxPrevVerified(maxPrevVerified) , mFatalFailurePromise(std::move(fatalFailure)) , mTrustedMaxLedger(trustedMaxLedger) , mVerifiedMinLedgerPrevFuture(mVerifiedMinLedgerPrev.get_future().share()) @@ -240,6 +242,20 @@ VerifyLedgerChainWork::verifyHistoryOfSingleCheckpoint() mChainDisagreesWithLocalState = lclResult; } } + // If the curr history entry is the same ledger as our mMaxPrevVerified, + // verify that the hashes match. + if (curr.header.ledgerSeq == mMaxPrevVerified.first && mMaxPrevVerified.first != LedgerManager::GENESIS_LEDGER_SEQ) + { + if (curr.hash != mMaxPrevVerified.second) + { + CLOG_ERROR(History, "Checkpoint {} does not agree with trusted " + "checkpoint hash {}", + LedgerManager::ledgerAbbrev(curr), + LedgerManager::ledgerAbbrev(mMaxPrevVerified.first, + *mMaxPrevVerified.second)); + return HistoryManager::VERIFY_STATUS_ERR_BAD_HASH; + } + } if (beginCheckpoint) { diff --git a/src/catchup/VerifyLedgerChainWork.h b/src/catchup/VerifyLedgerChainWork.h index 53606a1774..b2220b10ce 100644 --- a/src/catchup/VerifyLedgerChainWork.h +++ b/src/catchup/VerifyLedgerChainWork.h @@ -26,6 +26,10 @@ class VerifyLedgerChainWork : public BasicWork LedgerRange const mRange; uint32_t mCurrCheckpoint; LedgerNumHashPair const mLastClosed; + // The max ledger number and hash that we have verified up to at some time in the + // past (or genesis if we have no previous verification). Invocations of + // VerifyLedgerChainWork will verify down to this ledger. + LedgerNumHashPair const mMaxPrevVerified; // Record any instance where the chain we're verifying disagrees with the // local node state. This _might_ mean we can't possibly catch up (eg. we're @@ -78,6 +82,7 @@ class VerifyLedgerChainWork : public BasicWork VerifyLedgerChainWork( Application& app, TmpDir const& downloadDir, LedgerRange const& range, LedgerNumHashPair const& lastClosedLedger, + LedgerNumHashPair const& maxPrevVerified, std::shared_future trustedMaxLedger, std::promise&& fatalFailure, std::shared_ptr outputStream = nullptr); diff --git a/src/history/test/HistoryTests.cpp b/src/history/test/HistoryTests.cpp index 2b882a9099..4e88125c1e 100644 --- a/src/history/test/HistoryTests.cpp +++ b/src/history/test/HistoryTests.cpp @@ -198,6 +198,7 @@ TEST_CASE("History bucket verification", "[history][catchup]") TEST_CASE("Ledger chain verification", "[ledgerheaderverification]") { + // TODO run two versions of this -- one with an existing file, one without. Config cfg(getTestConfig(0)); VirtualClock clock; auto cg = std::make_shared(); @@ -241,8 +242,9 @@ TEST_CASE("Ledger chain verification", "[ledgerheaderverification]") auto fataFailurePromise = std::promise(); std::shared_future fatalFailureFuture = fataFailurePromise.get_future().share(); + LedgerNumHashPair maxPrevVerified{LedgerManager::GENESIS_LEDGER_SEQ, Hash{}}; auto w = wm.executeWork( - tmpDir, ledgerRange, lclPair, ledgerRangeEndFuture, + tmpDir, ledgerRange, lclPair, maxPrevVerified, ledgerRangeEndFuture, std::move(fataFailurePromise)); REQUIRE(expectedState == w->getState()); REQUIRE(fatalFailureFuture.valid()); diff --git a/src/historywork/WriteVerifiedCheckpointHashesWork.cpp b/src/historywork/WriteVerifiedCheckpointHashesWork.cpp index 200c0f9e64..8affc3a0d9 100644 --- a/src/historywork/WriteVerifiedCheckpointHashesWork.cpp +++ b/src/historywork/WriteVerifiedCheckpointHashesWork.cpp @@ -15,9 +15,44 @@ #include #include #include +#include "crypto/Hex.h" +#include namespace stellar { +LedgerNumHashPair +WriteVerifiedCheckpointHashesWork::loadLatestHashPairFromJsonOutput( + std::string const& filename) +{ + if (!std::filesystem::exists(filename)) + { + // If the file does not exist, the latest ledger is genesis. + return {LedgerManager::GENESIS_LEDGER_SEQ, Hash{}}; + } + + std::ifstream in (filename); + Json::Value root; + Json::Reader rdr; + if (!rdr.parse(in, root)) + { + throw std::runtime_error("failed to parse JSON input " + filename); + } + if (!root.isArray()) + { + throw std::runtime_error("expected top-level array in " + filename); + } + if (root.size() < 1) + { + return {LedgerManager::GENESIS_LEDGER_SEQ, Hash{}}; + } + // Latest hash is the first element in the array. + auto const& jpair = root[0]; + if (!jpair.isArray() || (jpair.size() != 2)) + { + throw std::runtime_error("expecting 2-element sub-array in " + filename); + } + return {jpair[0].asUInt(), hexToBin256(jpair[1].asString())}; +} Hash WriteVerifiedCheckpointHashesWork::loadHashFromJsonOutput( @@ -54,8 +89,7 @@ WriteVerifiedCheckpointHashesWork::loadHashFromJsonOutput( } WriteVerifiedCheckpointHashesWork::WriteVerifiedCheckpointHashesWork( - Application& app, LedgerNumHashPair rangeEnd, std::string const& outputFile, - uint32_t nestedBatchSize, std::shared_ptr archive) + Application& app, LedgerNumHashPair rangeEnd, std::string const& trustedHashFile, uint32_t nestedBatchSize, std::shared_ptr archive) : BatchWork(app, "write-verified-checkpoint-hashes") , mNestedBatchSize(nestedBatchSize) , mRangeEnd(rangeEnd) @@ -63,7 +97,8 @@ WriteVerifiedCheckpointHashesWork::WriteVerifiedCheckpointHashesWork( , mRangeEndFuture(mRangeEndPromise.get_future().share()) , mCurrCheckpoint(rangeEnd.first) , mArchive(archive) - , mOutputFileName(outputFile) + , mTrustedHashFileName(trustedHashFile) + , mOutputFileName(mTrustedHashFileName + ".tmp") { mRangeEndPromise.set_value(mRangeEnd); if (mArchive) @@ -71,6 +106,7 @@ WriteVerifiedCheckpointHashesWork::WriteVerifiedCheckpointHashesWork( CLOG_INFO(History, "selected archive {}", mArchive->getName()); } startOutputFile(); + parseTrustedHashFile(); } WriteVerifiedCheckpointHashesWork::~WriteVerifiedCheckpointHashesWork() @@ -81,7 +117,7 @@ WriteVerifiedCheckpointHashesWork::~WriteVerifiedCheckpointHashesWork() bool WriteVerifiedCheckpointHashesWork::hasNext() const { - return mCurrCheckpoint != LedgerManager::GENESIS_LEDGER_SEQ; + return mCurrCheckpoint > mLatestTrustedHashPair.first; } std::shared_ptr @@ -101,10 +137,22 @@ WriteVerifiedCheckpointHashesWork::yieldMoreWork() std::make_optional(lclHe.hash)); uint32_t const span = mNestedBatchSize * freq; uint32_t const last = mCurrCheckpoint; - uint32_t const first = - last <= span ? LedgerManager::GENESIS_LEDGER_SEQ - : hm.firstLedgerInCheckpointContaining(last - span); - + uint32_t first; + // If the latest trusted ledger is greater than the first ledger in the range + // then the range should start at the trusted ledger. + if (first < mLatestTrustedHashPair.first) + { + first = mLatestTrustedHashPair.first; + } + else if (last <= span) + { + first = LedgerManager::GENESIS_LEDGER_SEQ; + } + else + { + first = hm.firstLedgerInCheckpointContaining(last - span); + } + LedgerRange const ledgerRange = LedgerRange::inclusive(first, last); CheckpointRange const checkpointRange(ledgerRange, hm); @@ -138,7 +186,7 @@ WriteVerifiedCheckpointHashesWork::yieldMoreWork() : mRangeEndFuture); auto currWork = std::make_shared( - mApp, *tmpDir, ledgerRange, lcl, prevTrusted, std::promise(), + mApp, *tmpDir, ledgerRange, lcl, mLatestTrustedHashPair, prevTrusted, std::promise(), mOutputFile); auto prevWork = mPrevVerifyWork; auto predicate = [prevWork](Application&) { @@ -177,18 +225,58 @@ WriteVerifiedCheckpointHashesWork::startOutputFile() (*mOutputFile) << "["; } +void +WriteVerifiedCheckpointHashesWork::parseTrustedHashFile() +{ + auto trustedHash = loadLatestHashPairFromJsonOutput(mTrustedHashFileName); + CLOG_INFO(History, "trusted hash from {}: {}", + mTrustedHashFileName, hexAbbrev(*trustedHash.second)); + mLatestTrustedHashPair = trustedHash; +} + void WriteVerifiedCheckpointHashesWork::endOutputFile() { if (mOutputFile && mOutputFile->is_open()) { - // Each line of output made by a VerifyLedgerChainWork has a trailing - // comma, and trailing commas are not a valid end of a JSON array; so we - // terminate the array here with an entry that does _not_ have a - // trailing comma (and identifies an invalid ledger number anyways). - (*mOutputFile) << "\n[0, \"\"]\n]\n"; + if (std::filesystem::exists(mTrustedHashFileName)) + { + // Append everything except the first line of mTrustedHashFile to mOutputFile. + std::ifstream trustedHashFile(mTrustedHashFileName); + if (trustedHashFile) + { + std::string line; + // Ignore the first line ("["") + std::getline(trustedHashFile, line); + // Append the rest of the lines to mOutputFile. + while (std::getline(trustedHashFile, line)) + { + (*mOutputFile) << line << "\n"; + } + trustedHashFile.close(); + } + else + { + CLOG_WARNING(History, "failed to open trusted hash file {}", + mTrustedHashFileName); + } + } + else + { + // Each line of output made by a VerifyLedgerChainWork has a trailing + // comma, and trailing commas are not a valid end of a JSON array; so we + // terminate the array here with an entry that does _not_ have a + // trailing comma (and identifies an invalid ledger number anyways). + (*mOutputFile) << "\n[0, \"\"]\n]\n"; + } mOutputFile->close(); mOutputFile.reset(); + // Rename mOutputFileName to mTrustedHashFileName. + if (std::rename(mOutputFileName.c_str(), mTrustedHashFileName.c_str())) + { + CLOG_ERROR(History, "failed to rename {} to {}", + mOutputFileName, mTrustedHashFileName); + } } } diff --git a/src/historywork/WriteVerifiedCheckpointHashesWork.h b/src/historywork/WriteVerifiedCheckpointHashesWork.h index a1eefcb3d7..279a787ff6 100644 --- a/src/historywork/WriteVerifiedCheckpointHashesWork.h +++ b/src/historywork/WriteVerifiedCheckpointHashesWork.h @@ -27,7 +27,7 @@ class WriteVerifiedCheckpointHashesWork : public BatchWork public: WriteVerifiedCheckpointHashesWork( Application& app, LedgerNumHashPair rangeEnd, - std::string const& outputFile, + std::string const& trustedHashFile, uint32_t nestedBatchSize = NESTED_DOWNLOAD_BATCH_SIZE, std::shared_ptr archive = nullptr); ~WriteVerifiedCheckpointHashesWork(); @@ -35,10 +35,13 @@ class WriteVerifiedCheckpointHashesWork : public BatchWork // Helper to load a hash back from a file produced by this class. static Hash loadHashFromJsonOutput(uint32_t seq, std::string const& filename); + // Helper to load the latest hash back from a file produced by this class. + static LedgerNumHashPair loadLatestHashPairFromJsonOutput(std::string const& filename); void onSuccess() override; private: + void parseTrustedHashFile(); // This class is a batch work, but it also creates a conditional dependency // chain among its batch elements (for trusted ledger propagation): this // dependency chain can in turn cause the BatchWork logic to stall, failing @@ -78,6 +81,8 @@ class WriteVerifiedCheckpointHashesWork : public BatchWork void startOutputFile(); void endOutputFile(); std::shared_ptr mOutputFile; - std::string mOutputFileName; + std::string const mTrustedHashFileName; + std::string const mOutputFileName; + LedgerNumHashPair mLatestTrustedHashPair; }; } diff --git a/src/historywork/test/HistoryWorkTests.cpp b/src/historywork/test/HistoryWorkTests.cpp index 8cade8ec85..2c681ca4e4 100644 --- a/src/historywork/test/HistoryWorkTests.cpp +++ b/src/historywork/test/HistoryWorkTests.cpp @@ -14,6 +14,8 @@ #include #include +#include + using namespace stellar; using namespace historytestutils; @@ -39,14 +41,62 @@ TEST_CASE("write verified checkpoint hashes", "[historywork]") REQUIRE(w->getState() == BasicWork::State::WORK_SUCCESS); } + std::map first_set; + + for (auto const& p : pairs) + { + first_set.emplace(p.first, *p.second); + LOG_DEBUG(DEFAULT_LOG, "Verified {} with hash {}", p.first, + hexAbbrev(*p.second)); + Hash h = WriteVerifiedCheckpointHashesWork::loadHashFromJsonOutput( + p.first, file); + REQUIRE(h == *p.second); + } + // Check that the "latest" ledger in the file is the same as the last + // pair in the pairs vector. + auto latest = WriteVerifiedCheckpointHashesWork::loadLatestHashPairFromJsonOutput( + file); + REQUIRE(latest.first == pairs.back().first); + // Advance the simulations. + auto secondCheckpointLedger = + catchupSimulation.getLastCheckpointLedger(10 * nestedBatchSize); + catchupSimulation.ensureOnlineCatchupPossible(secondCheckpointLedger, + 5 * nestedBatchSize); + pairs = catchupSimulation.getAllPublishedCheckpoints(); + pair = pairs.back(); + // Run work again with existing file. + { + auto w = wm.executeWork( + pairs.back(), file, nestedBatchSize); + REQUIRE(w->getState() == BasicWork::State::WORK_SUCCESS); + } + std::map second_set; + for (auto const& p : pairs) + { + second_set.emplace(p.first, *p.second); + } + // Ensure the file contains all pairs, from the first run and the second. + int counter = 0; for (auto const& p : pairs) { + counter++; LOG_DEBUG(DEFAULT_LOG, "Verified {} with hash {}", p.first, hexAbbrev(*p.second)); Hash h = WriteVerifiedCheckpointHashesWork::loadHashFromJsonOutput( p.first, file); + if (h != *p.second) + { + std::cout << "Hash mismatch for ledger " << p.first << ", counter=" << counter << std::endl; + Hash h = WriteVerifiedCheckpointHashesWork::loadHashFromJsonOutput( + p.first, file); + } REQUIRE(h == *p.second); } + // Check that the "latest" ledger in the file is the same as the last + // pair in the pairs vector. + latest = WriteVerifiedCheckpointHashesWork::loadLatestHashPairFromJsonOutput( + file); + REQUIRE(latest.first == pairs.back().first); } TEST_CASE("check single ledger header work", "[historywork]") diff --git a/src/main/CommandLine.cpp b/src/main/CommandLine.cpp index 5210bb8fb1..a4c8d0c359 100644 --- a/src/main/CommandLine.cpp +++ b/src/main/CommandLine.cpp @@ -205,6 +205,13 @@ outputFileParser(std::string& string) return clara::Opt{string, "FILE-NAME"}["--output-file"]("output file"); } +clara::Opt +trustedHashFileParser(std::string& string) +{ + return clara::Opt{string, "FILE-NAME"}["--trusted-hash-file"]( + "file containing trusted hashes, generated by a previous call to verify-checkpoints"); +} + clara::Opt outputDirParser(std::string& string) { @@ -1012,21 +1019,20 @@ runPublish(CommandLineArgs const& args) int runWriteVerifiedCheckpointHashes(CommandLineArgs const& args) { - std::string outputFile; + std::string trustedHashFile; uint32_t startLedger = 0; std::string startHash; CommandLine::ConfigOption configOption; return runWithHelp( args, {configurationParser(configOption), historyLedgerNumber(startLedger), - historyHashParser(startHash), outputFileParser(outputFile).required()}, + historyHashParser(startHash), trustedHashFileParser(trustedHashFile).required()}, [&] { VirtualClock clock(VirtualClock::REAL_TIME); auto cfg = configOption.getConfig(); - // Set up for quick in-memory no-catchup mode. + // Set up for quick no-catchup mode. cfg.QUORUM_INTERSECTION_CHECKER = false; - cfg.setInMemoryMode(); cfg.MODE_DOES_CATCHUP = false; auto app = Application::create(clock, cfg, false); @@ -1047,8 +1053,7 @@ runWriteVerifiedCheckpointHashes(CommandLineArgs const& args) app->getOverlayManager().shutdown(); app->getHerder().shutdown(); app->getWorkScheduler() - .executeWork(authPair, - outputFile); + .executeWork(authPair, trustedHashFile); app->gracefulStop(); return 0; }