Skip to content

Commit e74263e

Browse files
spearceGerrit Code Review @ Eclipse.org
authored andcommitted
Merge "Support excluding objects during DFS compaction"
2 parents aa7be66 + 3c27ee1 commit e74263e

File tree

1 file changed

+102
-31
lines changed

1 file changed

+102
-31
lines changed

org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java

Lines changed: 102 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
4747
import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
4848
import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
49+
import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;
4950

5051
import java.io.IOException;
5152
import java.util.ArrayList;
@@ -56,6 +57,7 @@
5657
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
5758
import org.eclipse.jgit.internal.JGitText;
5859
import org.eclipse.jgit.internal.storage.file.PackIndex;
60+
import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
5961
import org.eclipse.jgit.internal.storage.pack.PackWriter;
6062
import org.eclipse.jgit.lib.AnyObjectId;
6163
import org.eclipse.jgit.lib.NullProgressMonitor;
@@ -88,12 +90,18 @@ public class DfsPackCompactor {
8890

8991
private final List<DfsPackFile> srcPacks;
9092

93+
private final List<PackWriter.ObjectIdSet> exclude;
94+
9195
private final List<DfsPackDescription> newPacks;
9296

9397
private final List<PackWriter.Statistics> newStats;
9498

9599
private int autoAddSize;
96100

101+
private RevWalk rw;
102+
private RevFlag added;
103+
private RevFlag isBase;
104+
97105
/**
98106
* Initialize a pack compactor.
99107
*
@@ -104,6 +112,7 @@ public DfsPackCompactor(DfsRepository repository) {
104112
repo = repository;
105113
autoAddSize = 5 * 1024 * 1024; // 5 MiB
106114
srcPacks = new ArrayList<DfsPackFile>();
115+
exclude = new ArrayList<PackWriter.ObjectIdSet>(4);
107116
newPacks = new ArrayList<DfsPackDescription>(1);
108117
newStats = new ArrayList<PackWriter.Statistics>(1);
109118
}
@@ -141,10 +150,48 @@ public DfsPackCompactor autoAdd() throws IOException {
141150
DfsPackDescription d = pack.getPackDescription();
142151
if (d.getFileSize(PACK) < autoAddSize)
143152
add(pack);
153+
else
154+
exclude(pack);
144155
}
145156
return this;
146157
}
147158

159+
/**
160+
* Exclude objects from the compacted pack.
161+
*
162+
* @param set
163+
* objects to not include.
164+
* @return {@code this}.
165+
*/
166+
public DfsPackCompactor exclude(PackWriter.ObjectIdSet set) {
167+
exclude.add(set);
168+
return this;
169+
}
170+
171+
/**
172+
* Exclude objects from the compacted pack.
173+
*
174+
* @param pack
175+
* objects to not include.
176+
* @return {@code this}.
177+
* @throws IOException
178+
* pack index cannot be loaded.
179+
*/
180+
public DfsPackCompactor exclude(DfsPackFile pack) throws IOException {
181+
final PackIndex idx;
182+
DfsReader ctx = (DfsReader) repo.newObjectReader();
183+
try {
184+
idx = pack.getPackIndex(ctx);
185+
} finally {
186+
ctx.release();
187+
}
188+
return exclude(new PackWriter.ObjectIdSet() {
189+
public boolean contains(AnyObjectId id) {
190+
return idx.hasObject(id);
191+
}
192+
});
193+
}
194+
148195
/**
149196
* Compact the pack files together.
150197
*
@@ -200,6 +247,7 @@ public void compact(ProgressMonitor pm) throws IOException {
200247
pw.release();
201248
}
202249
} finally {
250+
rw = null;
203251
ctx.release();
204252
}
205253
}
@@ -239,50 +287,73 @@ public int compare(DfsPackFile a, DfsPackFile b) {
239287
}
240288
});
241289

242-
RevWalk rw = new RevWalk(ctx);
243-
RevFlag added = rw.newFlag("ADDED"); //$NON-NLS-1$
290+
rw = new RevWalk(ctx);
291+
added = rw.newFlag("ADDED"); //$NON-NLS-1$
292+
isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
293+
List<RevObject> baseObjects = new BlockList<RevObject>();
244294

245295
pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
246296
for (DfsPackFile src : srcPacks) {
247-
List<ObjectIdWithOffset> want = new BlockList<ObjectIdWithOffset>();
248-
for (PackIndex.MutableEntry ent : src.getPackIndex(ctx)) {
249-
ObjectId id = ent.toObjectId();
250-
RevObject obj = rw.lookupOrNull(id);
251-
if (obj == null || !obj.has(added))
252-
want.add(new ObjectIdWithOffset(id, ent.getOffset()));
253-
}
297+
List<ObjectIdWithOffset> want = toInclude(src, ctx);
298+
if (want.isEmpty())
299+
continue;
254300

255-
// Sort objects by the order they appear in the pack file, for
256-
// two benefits. Scanning object type information is faster when
257-
// the pack is traversed in order, and this allows the PackWriter
258-
// to be given the new objects in a relatively sane newest-first
259-
// ordering without additional logic, like unpacking commits and
260-
// walking a commit queue.
261-
Collections.sort(want, new Comparator<ObjectIdWithOffset>() {
262-
public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) {
263-
return Long.signum(a.offset - b.offset);
264-
}
265-
});
266-
267-
// Only pack each object at most once into the output file. The
268-
// PackWriter will later select a representation to reuse, which
269-
// may be the version in this pack, or may be from another pack if
270-
// the object was copied here to complete a thin pack and is larger
271-
// than a delta from another pack. This is actually somewhat common
272-
// if an object is modified frequently, such as the top level tree.
301+
PackReverseIndex rev = src.getReverseIdx(ctx);
302+
DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
273303
for (ObjectIdWithOffset id : want) {
274304
int type = src.getObjectType(ctx, id.offset);
275305
RevObject obj = rw.lookupAny(id, type);
276-
if (!obj.has(added)) {
277-
pm.update(1);
278-
pw.addObject(obj);
279-
obj.add(added);
306+
if (obj.has(added))
307+
continue;
308+
309+
pm.update(1);
310+
pw.addObject(obj);
311+
obj.add(added);
312+
313+
src.representation(rep, id.offset, ctx, rev);
314+
if (rep.getFormat() != PACK_DELTA)
315+
continue;
316+
317+
RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
318+
if (!base.has(added) && !base.has(isBase)) {
319+
baseObjects.add(base);
320+
base.add(isBase);
280321
}
281322
}
282323
}
324+
for (RevObject obj : baseObjects) {
325+
if (!obj.has(added)) {
326+
pm.update(1);
327+
pw.addObject(obj);
328+
obj.add(added);
329+
}
330+
}
283331
pm.endTask();
284332
}
285333

334+
private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
335+
throws IOException {
336+
PackIndex srcIdx = src.getPackIndex(ctx);
337+
List<ObjectIdWithOffset> want = new BlockList<ObjectIdWithOffset>(
338+
(int) srcIdx.getObjectCount());
339+
SCAN: for (PackIndex.MutableEntry ent : srcIdx) {
340+
ObjectId id = ent.toObjectId();
341+
RevObject obj = rw.lookupOrNull(id);
342+
if (obj != null && (obj.has(added) || obj.has(isBase)))
343+
continue;
344+
for (PackWriter.ObjectIdSet e : exclude)
345+
if (e.contains(id))
346+
continue SCAN;
347+
want.add(new ObjectIdWithOffset(id, ent.getOffset()));
348+
}
349+
Collections.sort(want, new Comparator<ObjectIdWithOffset>() {
350+
public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) {
351+
return Long.signum(a.offset - b.offset);
352+
}
353+
});
354+
return want;
355+
}
356+
286357
private static void writePack(DfsObjDatabase objdb,
287358
DfsPackDescription pack,
288359
PackWriter pw, ProgressMonitor pm) throws IOException {

0 commit comments

Comments
 (0)