Skip to content

parallelize creation of file history cache for individual files #3636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jun 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ History parse(File file, Repository repos) throws HistoryException {
List<HistoryEntry> entries = new ArrayList<>();

entries.add(new HistoryEntry(
"", new Date(), "OpenGrok", null, "Workspace Root", true));
"", new Date(), "OpenGrok", "Workspace Root", true));

history = new History(entries);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
*/
package org.opengrok.indexer.history;

import static org.opengrok.indexer.history.HistoryEntry.TAGS_SEPARATOR;
import static org.opengrok.indexer.history.History.TAGS_SEPARATOR;

import java.io.BufferedReader;
import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ public void processStream(InputStream input) throws IOException {
String commit = s.substring("revision".length()).trim();
entry.setRevision(commit);
if (tags.containsKey(commit)) {
entry.setTags(tags.get(commit));
history.addTags(entry, tags.get(commit));
}
state = ParseState.METADATA;
s = in.readLine();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ private boolean next() throws IOException {

icomment = citer.next();

currentEntry = new HistoryEntry(icomment.get(1), idate, iauthor, null, icomment.get(0), true);
currentEntry = new HistoryEntry(icomment.get(1), idate, iauthor, icomment.get(0), true);
currentEntry.setFiles(hash.get(idate).get(iauthor).get(icomment));

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@

import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import org.jetbrains.annotations.TestOnly;
import org.opengrok.indexer.Metrics;
import org.opengrok.indexer.configuration.PathAccepter;
import org.opengrok.indexer.configuration.RuntimeEnvironment;
Expand Down Expand Up @@ -121,6 +122,7 @@ public void doRenamedFileHistory(String filename, File file, Repository reposito
history = repository.getHistory(file);
}

history.strip();
doFileHistory(filename, history, repository, root, true);
}

Expand All @@ -140,10 +142,6 @@ private void doFileHistory(String filename, History history, Repository reposito
return;
}

// File based history cache does not store files for individual
// changesets so strip them unless it is history for the repository.
history.strip();

// Assign tags to changesets they represent.
if (env.isTagsEnabled() && repository.hasFileBasedTags()) {
repository.assignTagsInHistory(history);
Expand Down Expand Up @@ -219,7 +217,7 @@ private static XMLDecoder getDecoder(InputStream in) {
return new XMLDecoder(in, null, null, new HistoryClassLoader());
}

// for testing
@TestOnly
static History readCache(String xmlconfig) {
final ByteArrayInputStream in = new ByteArrayInputStream(xmlconfig.getBytes());
try (XMLDecoder d = getDecoder(in)) {
Expand Down Expand Up @@ -309,16 +307,14 @@ private History mergeOldAndNewHistory(File cacheFile, History histNew, Repositor
}
history = new History(listOld);

// Retag the last changesets in case there have been some new
// Retag the changesets in case there have been some new
// tags added to the repository. Technically we should just
// retag the last revision from the listOld however this
// does not solve the problem when listNew contains new tags
// retroactively tagging changesets from listOld so we resort
// to this somewhat crude solution.
// to this somewhat crude solution of retagging from scratch.
if (env.isTagsEnabled() && repo.hasFileBasedTags()) {
for (HistoryEntry ent : history.getHistoryEntries()) {
ent.setTags(null);
}
history.strip();
repo.assignTagsInHistory(history);
}
}
Expand Down Expand Up @@ -461,32 +457,50 @@ public void store(History history, Repository repository, String tillRevision) t
}
}

// File based history cache does not store files for individual changesets so strip them.
history.strip();

File histDataDir = new File(getRepositoryHistDataDirname(repository));
if (!histDataDir.isDirectory() && !histDataDir.mkdirs()) {
LOGGER.log(Level.WARNING, "cannot create history cache directory for ''{0}''", histDataDir);
}

Set<String> regularFiles = map.keySet().stream().
filter(e -> !history.isRenamed(e)).collect(Collectors.toSet());
createDirectoriesForFiles(regularFiles);

/*
* Now traverse the list of files from the hash map built above
* and for each file store its history (saved in the value of the
* hash map entry for the file) in a file. Skip renamed files
* which will be handled separately below.
* Now traverse the list of files from the hash map built above and for each file store its history
* (saved in the value of the hash map entry for the file) in a file.
* The renamed files will be handled separately.
*/
LOGGER.log(Level.FINE, "Storing history for {0} files in repository ''{1}''",
new Object[]{map.entrySet().size(), repository.getDirectoryName()});
LOGGER.log(Level.FINE, "Storing history for {0} regular files in repository ''{1}''",
new Object[]{regularFiles.size(), repository.getDirectoryName()});
final File root = env.getSourceRootFile();
int fileHistoryCount = 0;
for (Map.Entry<String, List<HistoryEntry>> map_entry : map.entrySet()) {
if (handleRenamedFiles && history.isRenamed(map_entry.getKey())) {
continue;
}

doFileHistory(map_entry.getKey(), new History(map_entry.getValue()),
repository, root, false);
fileHistoryCount++;
final CountDownLatch latch = new CountDownLatch(regularFiles.size());
AtomicInteger fileHistoryCount = new AtomicInteger();
for (String file : regularFiles) {
env.getIndexerParallelizer().getHistoryFileExecutor().submit(() -> {
try {
doFileHistory(file, new History(map.get(file)), repository, root, false);
fileHistoryCount.getAndIncrement();
} catch (Exception ex) {
// We want to catch any exception since we are in thread.
LOGGER.log(Level.WARNING, "doFileHistory() got exception ", ex);
} finally {
latch.countDown();
}
});
}

LOGGER.log(Level.FINE, "Stored history for {0} files in repository ''{1}''",
// Wait for the executors to finish.
try {
latch.await();
} catch (InterruptedException ex) {
LOGGER.log(Level.SEVERE, "latch exception", ex);
}
LOGGER.log(Level.FINE, "Stored history for {0} regular files in repository ''{1}''",
new Object[]{fileHistoryCount, repository.getDirectoryName()});

if (!handleRenamedFiles) {
Expand Down Expand Up @@ -515,30 +529,13 @@ public void storeRenamed(Set<String> renamedFiles, Repository repository, String
LOGGER.log(Level.FINE, "Storing history for {0} renamed files in repository ''{1}''",
new Object[]{renamedFiles.size(), repository.getDirectoryName()});

// The directories for the renamed files have to be created before
// the actual files otherwise storeFile() might be racing for
// mkdirs() if there are multiple renamed files from single directory
// handled in parallel.
for (final String file : renamedFiles) {
File cache;
try {
cache = getCachedFile(new File(env.getSourceRootPath() + file));
} catch (ForbiddenSymlinkException ex) {
LOGGER.log(Level.FINER, ex.getMessage());
continue;
}
File dir = cache.getParentFile();
createDirectoriesForFiles(renamedFiles);

if (!dir.isDirectory() && !dir.mkdirs()) {
LOGGER.log(Level.WARNING,
"Unable to create cache directory ' {0} '.", dir);
}
}
final Repository repositoryF = repository;
final CountDownLatch latch = new CountDownLatch(renamedFiles.size());
AtomicInteger renamedFileHistoryCount = new AtomicInteger();
for (final String file : renamedFiles) {
env.getIndexerParallelizer().getHistoryRenamedExecutor().submit(() -> {
env.getIndexerParallelizer().getHistoryFileExecutor().submit(() -> {
try {
doRenamedFileHistory(file,
new File(env.getSourceRootPath() + file),
Expand All @@ -565,6 +562,28 @@ public void storeRenamed(Set<String> renamedFiles, Repository repository, String
new Object[]{renamedFileHistoryCount.intValue(), repository.getDirectoryName()});
}

private void createDirectoriesForFiles(Set<String> files) throws HistoryException {
// The directories for the files have to be created before
// the actual files otherwise storeFile() might be racing for
// mkdirs() if there are multiple files from single directory
// handled in parallel.
for (final String file : files) {
File cache;
try {
cache = getCachedFile(new File(env.getSourceRootPath() + file));
} catch (ForbiddenSymlinkException ex) {
LOGGER.log(Level.FINER, ex.getMessage());
continue;
}
File dir = cache.getParentFile();

if (!dir.isDirectory() && !dir.mkdirs()) {
LOGGER.log(Level.WARNING,
"Unable to create cache directory ''{0}''.", dir);
}
}
}

@Override
public History get(File file, Repository repository, boolean withFiles)
throws HistoryException, ForbiddenSymlinkException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
import org.opengrok.indexer.logger.LoggerFactory;
import org.opengrok.indexer.util.ForbiddenSymlinkException;

import static org.opengrok.indexer.history.HistoryEntry.TAGS_SEPARATOR;
import static org.opengrok.indexer.history.History.TAGS_SEPARATOR;

/**
* Access to a Git repository.
Expand Down Expand Up @@ -545,7 +545,7 @@ public History getHistory(File file, String sinceRevision, String tillRevision,
commit.getAuthorIdent().getWhen(),
commit.getAuthorIdent().getName() +
" <" + commit.getAuthorIdent().getEmailAddress() + ">",
null, commit.getFullMessage(), true);
commit.getFullMessage(), true);

if (isDirectory) {
SortedSet<String> files = new TreeSet<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,17 +23,25 @@
*/
package org.opengrok.indexer.history;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

/**
* Class representing the history of a file.
*/
public class History {
public class History implements Serializable {

private static final long serialVersionUID = -1;

static final String TAGS_SEPARATOR = ", ";

/** Entries in the log. The first entry is the most recent one. */
private List<HistoryEntry> entries;
/**
Expand All @@ -42,7 +50,10 @@ public class History {
* These are relative to repository root.
*/
private final Set<String> renamedFiles;


// revision to tag list. Individual tags are joined via TAGS_SEPARATOR.
private Map<String, String> tags = new HashMap<>();

public History() {
this(new ArrayList<>());
}
Expand All @@ -61,6 +72,20 @@ public History() {
this.renamedFiles = renamed;
}

// Needed for serialization.
public Map<String, String> getTags() {
return tags;
}

// Needed for serialization.
public void setTags(Map<String, String> tags) {
this.tags = tags;
}

public void setEntries(List<HistoryEntry> entries) {
this.entries = entries;
}

/**
* Set the list of log entries for the file. The first entry is the most
* recent one.
Expand Down Expand Up @@ -113,9 +138,11 @@ public boolean hasFileList() {
* tag list, {@code false} otherwise
*/
public boolean hasTags() {
return entries.stream()
.map(HistoryEntry::getTags)
.anyMatch(Objects::nonNull);
return !tags.isEmpty();
}

public void addTags(HistoryEntry entry, String newTags) {
tags.merge(entry.getRevision(), newTags, (a, b) -> a + TAGS_SEPARATOR + b);
}

/**
Expand All @@ -132,13 +159,15 @@ public Set<String> getRenamedFiles() {
}

/**
* Strip files and tags from history entries.
* Strip files and tags.
* @see HistoryEntry#strip()
*/
public void strip() {
for (HistoryEntry ent : this.getHistoryEntries()) {
ent.strip();
}

tags.clear();
}

@Override
Expand All @@ -151,16 +180,18 @@ public boolean equals(Object o) {
}
History that = (History) o;
return Objects.equals(this.getHistoryEntries(), that.getHistoryEntries()) &&
Objects.equals(this.getTags(), that.getTags()) &&
Objects.equals(this.getRenamedFiles(), that.getRenamedFiles());
}

@Override
public int hashCode() {
return Objects.hash(getHistoryEntries(), getRenamedFiles());
return Objects.hash(getHistoryEntries(), getTags(), getRenamedFiles());
}

@Override
public String toString() {
return this.getHistoryEntries().toString() + ", renamed files: " + this.getRenamedFiles().toString();
return this.getHistoryEntries().toString() + ", renamed files: " + this.getRenamedFiles().toString() +
" , tags: " + getTags();
}
}
Loading