Skip to content

Commit c73a00a

Browse files
authored
Assembler: clone-all from link-index registry (#1311)
* Assembler: clone-all from link-index registry * Add comments * Return empty HeadReference on failure * Use switch case for checkout strategy and disable sparse-checkout for CheckoutStrategy.Full
1 parent da2667b commit c73a00a

File tree

3 files changed

+133
-81
lines changed

3 files changed

+133
-81
lines changed

src/Elastic.Documentation.Configuration/Assembler/Repository.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// Elasticsearch B.V licenses this file to you under the Apache 2.0 License.
33
// See the LICENSE file in the project root for more information
44

5+
using System.Runtime.Serialization;
56
using YamlDotNet.Serialization;
67

78
namespace Elastic.Documentation.Configuration.Assembler;
@@ -12,6 +13,14 @@ public record NarrativeRepository : Repository
1213
public override string Name { get; set; } = RepositoryName;
1314
}
1415

16+
public enum CheckoutStrategy
17+
{
18+
[EnumMember(Value = "partial")]
19+
Partial,
20+
[EnumMember(Value = "full")]
21+
Full
22+
}
23+
1524
public record Repository
1625
{
1726
[YamlIgnore]
@@ -27,7 +36,7 @@ public record Repository
2736
public string GitReferenceNext { get; set; } = "main";
2837

2938
[YamlMember(Alias = "checkout_strategy")]
30-
public string CheckoutStrategy { get; set; } = "partial";
39+
public CheckoutStrategy CheckoutStrategy { get; set; } = CheckoutStrategy.Partial;
3140

3241
[YamlMember(Alias = "skip")]
3342
public bool Skip { get; set; }

src/tooling/docs-assembler/Cli/RepositoryCommands.cs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@
33
// See the LICENSE file in the project root for more information
44

55
using System.Collections.Concurrent;
6+
using System.ComponentModel;
67
using System.Diagnostics;
78
using System.Diagnostics.CodeAnalysis;
9+
using System.Globalization;
810
using System.IO.Abstractions;
911
using System.Net.Mime;
1012
using Actions.Core.Services;
@@ -39,11 +41,13 @@ private void AssignOutputLogger()
3941
/// <summary> Clones all repositories </summary>
4042
/// <param name="strict"> Treat warnings as errors and fail the build on warnings</param>
4143
/// <param name="environment"> The environment to build</param>
44+
/// <param name="fetchLatest"> If true fetch the latest commit of the branch instead of the link registry entry ref</param>
4245
/// <param name="ctx"></param>
4346
[Command("clone-all")]
4447
public async Task<int> CloneAll(
4548
bool? strict = null,
4649
string? environment = null,
50+
bool? fetchLatest = null,
4751
Cancel ctx = default
4852
)
4953
{
@@ -55,7 +59,8 @@ public async Task<int> CloneAll(
5559

5660
var assembleContext = new AssembleContext(environment, collector, new FileSystem(), new FileSystem(), null, null);
5761
var cloner = new AssemblerRepositorySourcer(logger, assembleContext);
58-
_ = await cloner.AcquireAllLatest(ctx);
62+
63+
_ = await cloner.CloneAll(fetchLatest ?? false, ctx);
5964

6065
await collector.StopAsync(ctx);
6166

@@ -138,7 +143,6 @@ public async Task<int> UpdateLinkIndexAll(ContentSource contentSource, Cancel ct
138143
// It's only used to get the list of repositories.
139144
var assembleContext = new AssembleContext("prod", collector, new FileSystem(), new FileSystem(), null, null);
140145
var cloner = new RepositorySourcer(logger, assembleContext.CheckoutDirectory, new FileSystem(), collector);
141-
var dict = new ConcurrentDictionary<string, Stopwatch>();
142146
var repositories = new Dictionary<string, Repository>(assembleContext.Configuration.ReferenceRepositories)
143147
{
144148
{ NarrativeRepository.RepositoryName, assembleContext.Configuration.Narrative }
@@ -152,8 +156,7 @@ await Parallel.ForEachAsync(repositories,
152156
{
153157
try
154158
{
155-
var name = kv.Key.Trim();
156-
var checkout = cloner.CloneOrUpdateRepository(kv.Value, name, kv.Value.GetBranch(contentSource), dict);
159+
var checkout = cloner.CloneRef(kv.Value, kv.Value.GetBranch(contentSource), true);
157160
var outputPath = Directory.CreateTempSubdirectory(checkout.Repository.Name).FullName;
158161
var context = new BuildContext(
159162
collector,

src/tooling/docs-assembler/Sourcing/RepositorySourcesFetcher.cs

Lines changed: 116 additions & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
using System.IO.Abstractions;
99
using Elastic.Documentation.Configuration.Assembler;
1010
using Elastic.Documentation.Diagnostics;
11+
using Elastic.Documentation.LinkIndex;
1112
using Elastic.Markdown.IO;
1213
using Microsoft.Extensions.Logging;
1314
using ProcNet;
@@ -46,129 +47,169 @@ public IReadOnlyCollection<Checkout> GetAll()
4647
return checkouts;
4748
}
4849

49-
public async Task<IReadOnlyCollection<Checkout>> AcquireAllLatest(Cancel ctx = default)
50+
public async Task<IReadOnlyCollection<Checkout>> CloneAll(bool fetchLatest, Cancel ctx = default)
5051
{
51-
_logger.LogInformation(
52-
"Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy",
52+
_logger.LogInformation("Cloning all repositories for environment {EnvironmentName} using '{ContentSourceStrategy}' content sourcing strategy",
5353
PublishEnvironment.Name,
5454
PublishEnvironment.ContentSource.ToStringFast(true)
5555
);
56+
var checkouts = new ConcurrentBag<Checkout>();
57+
58+
ILinkIndexReader linkIndexReader = Aws3LinkIndexReader.CreateAnonymous();
59+
var linkRegistry = await linkIndexReader.GetRegistry(ctx);
5660

5761
var repositories = new Dictionary<string, Repository>(Configuration.ReferenceRepositories)
5862
{
5963
{ NarrativeRepository.RepositoryName, Configuration.Narrative }
6064
};
61-
return await RepositorySourcer.AcquireAllLatest(repositories, PublishEnvironment.ContentSource, ctx);
62-
}
63-
}
64-
65-
public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDirectory, IFileSystem readFileSystem, DiagnosticsCollector collector)
66-
{
67-
private readonly ILogger<RepositorySourcer> _logger = logger.CreateLogger<RepositorySourcer>();
6865

69-
public async Task<IReadOnlyCollection<Checkout>> AcquireAllLatest(Dictionary<string, Repository> repositories, ContentSource source, Cancel ctx = default)
70-
{
71-
var dict = new ConcurrentDictionary<string, Stopwatch>();
72-
var checkouts = new ConcurrentBag<Checkout>();
7366
await Parallel.ForEachAsync(repositories,
7467
new ParallelOptions
7568
{
7669
CancellationToken = ctx,
7770
MaxDegreeOfParallelism = Environment.ProcessorCount
78-
}, async (kv, c) =>
71+
}, async (repo, c) =>
7972
{
8073
await Task.Run(() =>
8174
{
82-
var name = kv.Key.Trim();
83-
var repo = kv.Value;
84-
var clone = CloneOrUpdateRepository(kv.Value, name, repo.GetBranch(source), dict);
85-
checkouts.Add(clone);
75+
if (!linkRegistry.Repositories.TryGetValue(repo.Key, out var entry))
76+
{
77+
context.Collector.EmitError("", $"'{repo.Key}' does not exist in link index");
78+
return;
79+
}
80+
var branch = repo.Value.GetBranch(PublishEnvironment.ContentSource);
81+
var gitRef = branch;
82+
if (!fetchLatest)
83+
{
84+
if (!entry.TryGetValue(branch, out var entryInfo))
85+
{
86+
context.Collector.EmitError("", $"'{repo.Key}' does not have a '{branch}' entry in link index");
87+
return;
88+
}
89+
gitRef = entryInfo.GitReference;
90+
}
91+
checkouts.Add(RepositorySourcer.CloneRef(repo.Value, gitRef, fetchLatest));
8692
}, c);
8793
}).ConfigureAwait(false);
88-
89-
return checkouts.ToList().AsReadOnly();
94+
return checkouts;
9095
}
96+
}
9197

92-
public Checkout CloneOrUpdateRepository(Repository repository, string name, string branch, ConcurrentDictionary<string, Stopwatch> dict)
93-
{
94-
var fs = readFileSystem;
95-
var checkoutFolder = fs.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, name));
96-
var relativePath = Path.GetRelativePath(Paths.WorkingDirectoryRoot.FullName, checkoutFolder.FullName);
97-
var sw = Stopwatch.StartNew();
9898

99-
_ = dict.AddOrUpdate($"{name} ({branch})", sw, (_, _) => sw);
99+
public class RepositorySourcer(ILoggerFactory logger, IDirectoryInfo checkoutDirectory, IFileSystem readFileSystem, DiagnosticsCollector collector)
100+
{
101+
private readonly ILogger<RepositorySourcer> _logger = logger.CreateLogger<RepositorySourcer>();
100102

101-
string? head;
102-
if (checkoutFolder.Exists)
103+
// <summary>
104+
// Clones the repository to the checkout directory and checks out the specified git reference.
105+
// </summary>
106+
// <param name="repository">The repository to clone.</param>
107+
// <param name="gitRef">The git reference to check out. Branch, commit or tag</param>
108+
public Checkout CloneRef(Repository repository, string gitRef, bool pull = false, int attempt = 1)
109+
{
110+
var checkoutFolder = readFileSystem.DirectoryInfo.New(Path.Combine(checkoutDirectory.FullName, repository.Name));
111+
if (attempt > 3)
103112
{
104-
if (!TryUpdateSource(name, branch, relativePath, checkoutFolder, out head))
105-
head = CheckoutFromScratch(repository, name, branch, relativePath, checkoutFolder);
113+
collector.EmitError("", $"Failed to clone repository {repository.Name}@{gitRef} after 3 attempts");
114+
return new Checkout
115+
{
116+
Directory = checkoutFolder,
117+
HeadReference = "",
118+
Repository = repository,
119+
};
106120
}
107-
else
108-
head = CheckoutFromScratch(repository, name, branch, relativePath, checkoutFolder);
109-
110-
sw.Stop();
111-
112-
return new Checkout
121+
_logger.LogInformation("{RepositoryName}: Cloning repository {RepositoryName}@{Commit} to {CheckoutFolder}", repository.Name, repository.Name, gitRef,
122+
checkoutFolder.FullName);
123+
if (!checkoutFolder.Exists)
113124
{
114-
Repository = repository,
115-
Directory = checkoutFolder,
116-
HeadReference = head
117-
};
118-
}
119-
120-
private bool TryUpdateSource(string name, string branch, string relativePath, IDirectoryInfo checkoutFolder, [NotNullWhen(true)] out string? head)
121-
{
122-
head = null;
123-
try
125+
checkoutFolder.Create();
126+
checkoutFolder.Refresh();
127+
}
128+
var isGitInitialized = GitInit(repository, checkoutFolder);
129+
string? head = null;
130+
if (isGitInitialized)
124131
{
125-
_logger.LogInformation("Pull: {Name}\t{Branch}\t{RelativePath}", name, branch, relativePath);
126-
// --allow-unrelated-histories due to shallow clones not finding a common ancestor
127-
ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff");
132+
try
133+
{
134+
head = Capture(checkoutFolder, "git", "rev-parse", "HEAD");
135+
}
136+
catch (Exception e)
137+
{
138+
_logger.LogError(e, "{RepositoryName}: Failed to acquire current commit, falling back to recreating from scratch", repository.Name);
139+
checkoutFolder.Delete(true);
140+
checkoutFolder.Refresh();
141+
return CloneRef(repository, gitRef, pull, attempt + 1);
142+
}
128143
}
129-
catch (Exception e)
144+
// Repository already checked out the same commit
145+
if (head != null && head == gitRef)
146+
// nothing to do, already at the right commit
147+
_logger.LogInformation("{RepositoryName}: HEAD already at {GitRef}", repository.Name, gitRef);
148+
else
130149
{
131-
_logger.LogError(e, "Failed to update {Name} from {RelativePath}, falling back to recreating from scratch", name, relativePath);
132-
if (checkoutFolder.Exists)
150+
FetchAndCheckout(repository, gitRef, checkoutFolder);
151+
if (!pull)
152+
{
153+
return new Checkout
154+
{
155+
Directory = checkoutFolder,
156+
HeadReference = gitRef,
157+
Repository = repository,
158+
};
159+
}
160+
try
161+
{
162+
ExecIn(checkoutFolder, "git", "pull", "--depth", "1", "--allow-unrelated-histories", "--no-ff", "origin", gitRef);
163+
}
164+
catch (Exception e)
133165
{
166+
_logger.LogError(e, "{RepositoryName}: Failed to update {GitRef} from {RelativePath}, falling back to recreating from scratch",
167+
repository.Name, gitRef, checkoutFolder.FullName);
134168
checkoutFolder.Delete(true);
135169
checkoutFolder.Refresh();
170+
return CloneRef(repository, gitRef, pull, attempt + 1);
136171
}
137-
return false;
138172
}
139173

140-
head = Capture(checkoutFolder, "git", "rev-parse", "HEAD");
174+
return new Checkout
175+
{
176+
Directory = checkoutFolder,
177+
HeadReference = gitRef,
178+
Repository = repository,
179+
};
180+
}
141181

142-
return true;
182+
/// <summary>
183+
/// Initializes the git repository if it is not already initialized.
184+
/// Returns true if the repository was already initialized.
185+
/// </summary>
186+
private bool GitInit(Repository repository, IDirectoryInfo checkoutFolder)
187+
{
188+
var isGitAlreadyInitialized = Directory.Exists(Path.Combine(checkoutFolder.FullName, ".git"));
189+
if (isGitAlreadyInitialized)
190+
return true;
191+
ExecIn(checkoutFolder, "git", "init");
192+
ExecIn(checkoutFolder, "git", "remote", "add", "origin", repository.Origin);
193+
return false;
143194
}
144195

145-
private string CheckoutFromScratch(Repository repository, string name, string branch, string relativePath, IDirectoryInfo checkoutFolder)
196+
private void FetchAndCheckout(Repository repository, string gitRef, IDirectoryInfo checkoutFolder)
146197
{
147-
_logger.LogInformation("Checkout: {Name}\t{Branch}\t{RelativePath}", name, branch, relativePath);
198+
ExecIn(checkoutFolder, "git", "fetch", "--no-tags", "--prune", "--no-recurse-submodules", "--depth", "1", "origin", gitRef);
148199
switch (repository.CheckoutStrategy)
149200
{
150-
case "full":
151-
Exec("git", "clone", repository.Origin, checkoutFolder.FullName,
152-
"--depth", "1", "--single-branch",
153-
"--branch", branch
154-
);
201+
case CheckoutStrategy.Full:
202+
ExecIn(checkoutFolder, "git", "sparse-checkout", "disable");
155203
break;
156-
case "partial":
157-
Exec(
158-
"git", "clone", "--filter=blob:none", "--no-checkout", repository.Origin, checkoutFolder.FullName
159-
);
160-
161-
ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "--cone");
162-
ExecIn(checkoutFolder, "git", "checkout", branch);
204+
case CheckoutStrategy.Partial:
163205
ExecIn(checkoutFolder, "git", "sparse-checkout", "set", "docs");
164206
break;
207+
default:
208+
throw new ArgumentOutOfRangeException(nameof(repository), repository.CheckoutStrategy, null);
165209
}
166-
167-
return Capture(checkoutFolder, "git", "rev-parse", "HEAD");
210+
ExecIn(checkoutFolder, "git", "checkout", "--force", gitRef);
168211
}
169212

170-
private void Exec(string binary, params string[] args) => ExecIn(null, binary, args);
171-
172213
private void ExecIn(IDirectoryInfo? workingDirectory, string binary, params string[] args)
173214
{
174215
var arguments = new ExecArguments(binary, args)
@@ -221,7 +262,6 @@ string CaptureOutput()
221262
return line;
222263
}
223264
}
224-
225265
}
226266

227267
public class NoopConsoleWriter : IConsoleOutWriter

0 commit comments

Comments
 (0)