Skip to content

Commit e193846

Browse files
Wrote necessary plus some additional tests to
investigate possible alignment and versification issues with deuterocanonical books
1 parent 3a9b17e commit e193846

30 files changed

+17006
-1
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
using System.Collections.Generic;
2+
3+
namespace SIL.Scripture.Extensions
4+
{
5+
public static class ScrVersExtensions
6+
{
7+
/// <summary>
8+
/// Gets a list of references (verse references) for the specified book.
9+
/// </summary>
10+
public static IEnumerable<VerseRef> GetReferencesForBook(this ScrVers scrVers, int bookNum)
11+
{
12+
List<VerseRef> references = new List<VerseRef>();
13+
int lastChapter = scrVers.GetLastChapter(bookNum);
14+
15+
for (int chapterNum = 1; chapterNum <= lastChapter; chapterNum++)
16+
{
17+
int lastVerse = scrVers.GetLastVerse(bookNum, chapterNum);
18+
19+
for (int verseNum = 1; verseNum <= lastVerse; verseNum++)
20+
{
21+
int bbbcccvvv = VerseRef.GetBBBCCCVVV(bookNum, chapterNum, verseNum);
22+
if (!scrVers.IsExcluded(bbbcccvvv))
23+
{
24+
references.Add(new VerseRef(bookNum, chapterNum, verseNum, scrVers));
25+
}
26+
}
27+
}
28+
29+
return references;
30+
}
31+
}
32+
}

tests/SIL.Machine.Tests/Corpora/CorporaTestHelpers.cs

+132
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
using System.IO.Compression;
2+
using System.Text.RegularExpressions;
23
using NUnit.Framework.Constraints;
4+
using SIL.Scripture;
35

46
namespace SIL.Machine.Corpora;
57

@@ -18,6 +20,16 @@ internal static class CorporaTestHelpers
1820
public static readonly string UsfmSourceProjectPath = Path.Combine(TestDataPath, "usfm", "source");
1921
public static readonly string UsxTestProjectPath = Path.Combine(TestDataPath, "usx", "Tes");
2022
public static readonly string TextTestProjectPath = Path.Combine(TestDataPath, "txt");
23+
public static readonly string DeuterocanonicalsSourcePath = Path.Combine(
24+
TestDataPath,
25+
"deuterocanonicals",
26+
"source"
27+
);
28+
public static readonly string DeuterocanonicalsTargetPath = Path.Combine(
29+
TestDataPath,
30+
"deuterocanonicals",
31+
"target"
32+
);
2133

2234
public static string CreateTestDblBundle()
2335
{
@@ -43,4 +55,124 @@ public static EqualConstraint IgnoreLineEndings(this EqualConstraint constraint)
4355
(actual, expected) => actual.ReplaceLineEndings() == expected.ReplaceLineEndings()
4456
);
4557
}
58+
59+
/// <summary>
60+
/// Sets up and returns the source corpus.
61+
/// </summary>
62+
/// <returns>The source corpus.</returns>
63+
public static ParatextTextCorpus GetDeuterocanonicalSourceCorpus()
64+
{
65+
return new ParatextTextCorpus(CorporaTestHelpers.DeuterocanonicalsSourcePath, includeAllText: true);
66+
}
67+
68+
/// <summary>
69+
/// Sets up and returns the target corpus.
70+
/// </summary>
71+
/// <returns>The target corpus.</returns>
72+
public static ParatextTextCorpus GetDeuterocanonicalTargetCorpus()
73+
{
74+
return new ParatextTextCorpus(CorporaTestHelpers.DeuterocanonicalsTargetPath, includeAllText: true);
75+
}
76+
77+
/// <summary>
78+
/// Expands a hyphenated verse range (e.g., "S3Y 1:1-29") into individual verses.
79+
/// </summary>
80+
public static IEnumerable<ScriptureRef> ExpandVerseRange(string verseRange, ScrVers versification)
81+
{
82+
var parts = verseRange.Split(':');
83+
var bookAndChapter = parts[0].Trim();
84+
var verses = parts[1];
85+
86+
if (verses.Contains('-'))
87+
{
88+
var rangeParts = verses.Split('-').Select(int.Parse).ToArray();
89+
var startVerse = rangeParts[0];
90+
var endVerse = rangeParts[1];
91+
92+
for (int verse = startVerse; verse <= endVerse; verse++)
93+
{
94+
yield return ScriptureRef.Parse($"{bookAndChapter}:{verse}", versification);
95+
}
96+
}
97+
else
98+
{
99+
yield return ScriptureRef.Parse(verseRange, versification);
100+
}
101+
}
102+
103+
public static Dictionary<string, string> ExpandVerseMappings(Dictionary<string, string> mappings)
104+
{
105+
var expandedMappings = new Dictionary<string, string>();
106+
107+
foreach (var mapping in mappings)
108+
{
109+
var sourceParts = ParseRange(mapping.Key);
110+
var targetParts = ParseRange(mapping.Value);
111+
112+
// Check if either source or target is a single verse
113+
if (sourceParts.IsSingleVerse && targetParts.IsSingleVerse)
114+
{
115+
expandedMappings[mapping.Key] = mapping.Value;
116+
continue;
117+
}
118+
119+
int sourceVerseCount = sourceParts.EndVerse - sourceParts.StartVerse + 1;
120+
int targetVerseCount = targetParts.EndVerse - targetParts.StartVerse + 1;
121+
122+
if (sourceVerseCount != targetVerseCount)
123+
{
124+
throw new InvalidOperationException(
125+
"Source and target verse ranges must have the same number of verses."
126+
);
127+
}
128+
129+
for (int i = 0; i < sourceVerseCount; i++)
130+
{
131+
string sourceVerse = $"{sourceParts.Book} {sourceParts.Chapter}:{sourceParts.StartVerse + i}";
132+
string targetVerse = $"{targetParts.Book} {targetParts.Chapter}:{targetParts.StartVerse + i}";
133+
134+
expandedMappings[sourceVerse] = targetVerse;
135+
}
136+
}
137+
138+
return expandedMappings;
139+
}
140+
141+
public static (string Book, int Chapter, int StartVerse, int EndVerse, bool IsSingleVerse) ParseRange(string range)
142+
{
143+
var parts = range.Split(' ');
144+
var book = parts[0];
145+
146+
var chapterAndVerses = parts[1].Split(':');
147+
int chapter = int.Parse(chapterAndVerses[0]);
148+
149+
var verseRange = chapterAndVerses[1].Split('-');
150+
151+
int startVerse = int.Parse(verseRange[0]);
152+
int endVerse = verseRange.Length > 1 ? int.Parse(verseRange[1]) : startVerse;
153+
154+
bool isSingleVerse = startVerse == endVerse;
155+
156+
return (book, chapter, startVerse, endVerse, isSingleVerse);
157+
}
158+
159+
/// <summary>
160+
/// Removes unwanted characters in a corpus string.
161+
/// </summary>
162+
public static string CleanString(string input, string[] unwanted)
163+
{
164+
foreach (var item in unwanted)
165+
{
166+
input = input.Replace(item, "").Trim();
167+
}
168+
return input;
169+
}
170+
171+
/// <summary>
172+
/// Replace multiple spaces with a single space.
173+
/// </summary>
174+
public static string NormalizeSpaces(string input)
175+
{
176+
return Regex.Replace(input, @"\s+", " ");
177+
}
46178
}

0 commit comments

Comments
 (0)