Skip to content

Commit 6747ea0

Browse files
[CAS] Add UnifiedOnDiskCache and OnDiskCAS (#114103)
Add a new abstraction layer UnifiedOnDiskCache that adds new functions of disk space management and data validation that builds on top of OnDiskGraphDB and OnDiskKeyValueDB. Build upon UnifiedOnDiskCache, it is OnDiskCAS that implements ObjectStore and ActionCache interface for LLVM tools to interact with CAS storage.
1 parent e876540 commit 6747ea0

26 files changed

+1970
-65
lines changed

llvm/include/llvm/CAS/ActionCache.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,9 @@ class ActionCache {
7575
CanBeDistributed);
7676
}
7777

78+
/// Validate the ActionCache contents.
79+
virtual Error validate() const = 0;
80+
7881
virtual ~ActionCache() = default;
7982

8083
protected:
@@ -97,6 +100,9 @@ class ActionCache {
97100
/// Create an action cache in memory.
98101
std::unique_ptr<ActionCache> createInMemoryActionCache();
99102

103+
/// Create an action cache on disk.
104+
Expected<std::unique_ptr<ActionCache>> createOnDiskActionCache(StringRef Path);
105+
100106
} // end namespace llvm::cas
101107

102108
#endif // LLVM_CAS_ACTIONCACHE_H
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
10+
#define LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H
11+
12+
#include "llvm/Support/Error.h"
13+
14+
namespace llvm::cas {
15+
16+
class ActionCache;
17+
class ObjectStore;
18+
19+
/// Create on-disk \c ObjectStore and \c ActionCache instances based on
20+
/// \c ondisk::UnifiedOnDiskCache, with built-in hashing.
21+
Expected<std::pair<std::unique_ptr<ObjectStore>, std::unique_ptr<ActionCache>>>
22+
createOnDiskUnifiedCASDatabases(StringRef Path);
23+
24+
/// Represents the result of validating the contents using
25+
/// \c validateOnDiskUnifiedCASDatabasesIfNeeded.
26+
///
27+
/// Note: invalid results are handled as an \c Error.
28+
enum class ValidationResult {
29+
/// The data is already valid.
30+
Valid,
31+
/// The data was invalid, but was recovered.
32+
Recovered,
33+
/// Validation was skipped, as it was not needed.
34+
Skipped,
35+
};
36+
37+
/// Validate the data in \p Path, if needed to ensure correctness.
38+
///
39+
/// \param Path directory for the on-disk database.
40+
/// \param CheckHash Whether to validate hashes match the data.
41+
/// \param AllowRecovery Whether to automatically recover from invalid data by
42+
/// marking the files for garbage collection.
43+
/// \param ForceValidation Whether to force validation to occur even if it
44+
/// should not be necessary.
45+
/// \param LLVMCasBinaryPath If provided, validation is performed out-of-process
46+
/// using the given \c llvm-cas executable which protects against crashes
47+
/// during validation. Otherwise validation is performed in-process.
48+
///
49+
/// \returns \c Valid if the data is already valid, \c Recovered if data
50+
/// was invalid but has been cleared, \c Skipped if validation is not needed,
51+
/// or an \c Error if validation cannot be performed or if the data is left
52+
/// in an invalid state because \p AllowRecovery is false.
53+
Expected<ValidationResult> validateOnDiskUnifiedCASDatabasesIfNeeded(
54+
StringRef Path, bool CheckHash, bool AllowRecovery, bool ForceValidation,
55+
std::optional<StringRef> LLVMCasBinaryPath);
56+
57+
} // namespace llvm::cas
58+
59+
#endif // LLVM_CAS_BUILTINUNIFIEDCASDATABASES_H

llvm/include/llvm/CAS/ObjectStore.h

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file contains the declaration of the ObjectStore class.
11+
///
12+
//===----------------------------------------------------------------------===//
813

914
#ifndef LLVM_CAS_OBJECTSTORE_H
1015
#define LLVM_CAS_OBJECTSTORE_H
@@ -111,7 +116,10 @@ class ObjectStore {
111116
virtual Expected<bool> isMaterialized(ObjectRef Ref) const = 0;
112117

113118
/// Validate the underlying object referred by CASID.
114-
virtual Error validate(const CASID &ID) = 0;
119+
virtual Error validateObject(const CASID &ID) = 0;
120+
121+
/// Validate the entire ObjectStore.
122+
virtual Error validate(bool CheckHash) const = 0;
115123

116124
protected:
117125
/// Load the object referenced by \p Ref.
@@ -215,9 +223,39 @@ class ObjectStore {
215223
return Data.size();
216224
}
217225

226+
/// Set the size for limiting growth of on-disk storage. This has an effect
227+
/// for when the instance is closed.
228+
///
229+
/// Implementations may leave this unimplemented.
230+
virtual Error setSizeLimit(std::optional<uint64_t> SizeLimit) {
231+
return Error::success();
232+
}
233+
234+
/// \returns the storage size of the on-disk CAS data.
235+
///
236+
/// Implementations that don't have an implementation for this should return
237+
/// \p std::nullopt.
238+
virtual Expected<std::optional<uint64_t>> getStorageSize() const {
239+
return std::nullopt;
240+
}
241+
242+
/// Prune local storage to reduce its size according to the desired size
243+
/// limit. Pruning can happen concurrently with other operations.
244+
///
245+
/// Implementations may leave this unimplemented.
246+
virtual Error pruneStorageData() { return Error::success(); }
247+
218248
/// Validate the whole node tree.
219249
Error validateTree(ObjectRef Ref);
220250

251+
/// Import object from another CAS. This will import the full tree from the
252+
/// other CAS.
253+
Expected<ObjectRef> importObject(ObjectStore &Upstream, ObjectRef Other);
254+
255+
/// Print the ObjectStore internals for debugging purpose.
256+
virtual void print(raw_ostream &) const {}
257+
void dump() const;
258+
221259
/// Get CASContext
222260
const CASContext &getContext() const { return Context; }
223261

@@ -290,8 +328,15 @@ class ObjectProxy {
290328
ObjectHandle H;
291329
};
292330

331+
/// Create an in memory CAS.
293332
std::unique_ptr<ObjectStore> createInMemoryCAS();
294333

334+
/// \returns true if \c LLVM_ENABLE_ONDISK_CAS configuration was enabled.
335+
bool isOnDiskCASEnabled();
336+
337+
/// Create a persistent on-disk path at \p Path.
338+
Expected<std::unique_ptr<ObjectStore>> createOnDiskCAS(const Twine &Path);
339+
295340
} // namespace cas
296341
} // namespace llvm
297342

llvm/include/llvm/CAS/OnDiskGraphDB.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -340,13 +340,16 @@ class OnDiskGraphDB {
340340
/// \param HashByteSize Size for the object digest hash bytes.
341341
/// \param UpstreamDB Optional on-disk store to be used for faulting-in nodes
342342
/// if they don't exist in the primary store. The upstream store is only used
343-
/// for reading nodes, new nodes are only written to the primary store.
343+
/// for reading nodes, new nodes are only written to the primary store. User
344+
/// need to make sure \p UpstreamDB outlives current instance of
345+
/// OnDiskGraphDB and the common usage is to have an \p UnifiedOnDiskCache to
346+
/// manage both.
344347
/// \param Policy If \p UpstreamDB is provided, controls how nodes are copied
345348
/// to primary store. This is recorded at creation time and subsequent opens
346349
/// need to pass the same policy otherwise the \p open will fail.
347350
static Expected<std::unique_ptr<OnDiskGraphDB>>
348351
open(StringRef Path, StringRef HashName, unsigned HashByteSize,
349-
std::unique_ptr<OnDiskGraphDB> UpstreamDB = nullptr,
352+
OnDiskGraphDB *UpstreamDB = nullptr,
350353
FaultInPolicy Policy = FaultInPolicy::FullTree);
351354

352355
~OnDiskGraphDB();
@@ -438,8 +441,7 @@ class OnDiskGraphDB {
438441

439442
// Private constructor.
440443
OnDiskGraphDB(StringRef RootPath, OnDiskTrieRawHashMap Index,
441-
OnDiskDataAllocator DataPool,
442-
std::unique_ptr<OnDiskGraphDB> UpstreamDB,
444+
OnDiskDataAllocator DataPool, OnDiskGraphDB *UpstreamDB,
443445
FaultInPolicy Policy);
444446

445447
/// Mapping from hash to object reference.
@@ -459,7 +461,7 @@ class OnDiskGraphDB {
459461
std::string RootPath;
460462

461463
/// Optional on-disk store to be used for faulting-in nodes.
462-
std::unique_ptr<OnDiskGraphDB> UpstreamDB;
464+
OnDiskGraphDB *UpstreamDB = nullptr;
463465

464466
/// The policy used to fault in data from upstream.
465467
FaultInPolicy FIPolicy;

llvm/include/llvm/CAS/OnDiskKeyValueDB.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
namespace llvm::cas::ondisk {
2121

22+
class UnifiedOnDiskCache;
23+
2224
/// An on-disk key-value data store with the following properties:
2325
/// * Keys are fixed length binary hashes with expected normal distribution.
2426
/// * Values are buffers of the same size, specified at creation time.
@@ -59,9 +61,13 @@ class OnDiskKeyValueDB {
5961
/// \param KeySize Size for the key hash bytes.
6062
/// \param ValueName Identifier name for the values.
6163
/// \param ValueSize Size for the value bytes.
64+
/// \param UnifiedCache An optional UnifiedOnDiskCache that manages the size
65+
/// and lifetime of the CAS instance and it must owns current initializing
66+
/// KeyValueDB after initialized.
6267
static Expected<std::unique_ptr<OnDiskKeyValueDB>>
6368
open(StringRef Path, StringRef HashName, unsigned KeySize,
64-
StringRef ValueName, size_t ValueSize);
69+
StringRef ValueName, size_t ValueSize,
70+
UnifiedOnDiskCache *UnifiedCache = nullptr);
6571

6672
using CheckValueT =
6773
function_ref<Error(FileOffset Offset, ArrayRef<char> Data)>;
@@ -70,11 +76,14 @@ class OnDiskKeyValueDB {
7076
Error validate(CheckValueT CheckValue) const;
7177

7278
private:
73-
OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache)
74-
: ValueSize(ValueSize), Cache(std::move(Cache)) {}
79+
OnDiskKeyValueDB(size_t ValueSize, OnDiskTrieRawHashMap Cache,
80+
UnifiedOnDiskCache *UnifiedCache)
81+
: ValueSize(ValueSize), Cache(std::move(Cache)),
82+
UnifiedCache(UnifiedCache) {}
7583

7684
const size_t ValueSize;
7785
OnDiskTrieRawHashMap Cache;
86+
UnifiedOnDiskCache *UnifiedCache = nullptr;
7887
};
7988

8089
} // namespace llvm::cas::ondisk

0 commit comments

Comments
 (0)