1
1
using System . IO . Compression ;
2
+ using System . Text . RegularExpressions ;
2
3
using NUnit . Framework . Constraints ;
4
+ using SIL . Scripture ;
3
5
4
6
namespace SIL . Machine . Corpora ;
5
7
@@ -18,6 +20,16 @@ internal static class CorporaTestHelpers
18
20
public static readonly string UsfmSourceProjectPath = Path . Combine ( TestDataPath , "usfm" , "source" ) ;
19
21
public static readonly string UsxTestProjectPath = Path . Combine ( TestDataPath , "usx" , "Tes" ) ;
20
22
public static readonly string TextTestProjectPath = Path . Combine ( TestDataPath , "txt" ) ;
23
+ public static readonly string DeuterocanonicalsSourcePath = Path . Combine (
24
+ TestDataPath ,
25
+ "deuterocanonicals" ,
26
+ "source"
27
+ ) ;
28
+ public static readonly string DeuterocanonicalsTargetPath = Path . Combine (
29
+ TestDataPath ,
30
+ "deuterocanonicals" ,
31
+ "target"
32
+ ) ;
21
33
22
34
public static string CreateTestDblBundle ( )
23
35
{
@@ -43,4 +55,124 @@ public static EqualConstraint IgnoreLineEndings(this EqualConstraint constraint)
43
55
( actual , expected ) => actual . ReplaceLineEndings ( ) == expected . ReplaceLineEndings ( )
44
56
) ;
45
57
}
58
+
59
+ /// <summary>
60
+ /// Sets up and returns the source corpus.
61
+ /// </summary>
62
+ /// <returns>The source corpus.</returns>
63
+ public static ParatextTextCorpus GetDeuterocanonicalSourceCorpus ( )
64
+ {
65
+ return new ParatextTextCorpus ( CorporaTestHelpers . DeuterocanonicalsSourcePath , includeAllText : true ) ;
66
+ }
67
+
68
+ /// <summary>
69
+ /// Sets up and returns the target corpus.
70
+ /// </summary>
71
+ /// <returns>The target corpus.</returns>
72
+ public static ParatextTextCorpus GetDeuterocanonicalTargetCorpus ( )
73
+ {
74
+ return new ParatextTextCorpus ( CorporaTestHelpers . DeuterocanonicalsTargetPath , includeAllText : true ) ;
75
+ }
76
+
77
+ /// <summary>
78
+ /// Expands a hyphenated verse range (e.g., "S3Y 1:1-29") into individual verses.
79
+ /// </summary>
80
+ public static IEnumerable < ScriptureRef > ExpandVerseRange ( string verseRange , ScrVers versification )
81
+ {
82
+ var parts = verseRange . Split ( ':' ) ;
83
+ var bookAndChapter = parts [ 0 ] . Trim ( ) ;
84
+ var verses = parts [ 1 ] ;
85
+
86
+ if ( verses . Contains ( '-' ) )
87
+ {
88
+ var rangeParts = verses . Split ( '-' ) . Select ( int . Parse ) . ToArray ( ) ;
89
+ var startVerse = rangeParts [ 0 ] ;
90
+ var endVerse = rangeParts [ 1 ] ;
91
+
92
+ for ( int verse = startVerse ; verse <= endVerse ; verse ++ )
93
+ {
94
+ yield return ScriptureRef . Parse ( $ "{ bookAndChapter } :{ verse } ", versification ) ;
95
+ }
96
+ }
97
+ else
98
+ {
99
+ yield return ScriptureRef . Parse ( verseRange , versification ) ;
100
+ }
101
+ }
102
+
103
+ public static Dictionary < string , string > ExpandVerseMappings ( Dictionary < string , string > mappings )
104
+ {
105
+ var expandedMappings = new Dictionary < string , string > ( ) ;
106
+
107
+ foreach ( var mapping in mappings )
108
+ {
109
+ var sourceParts = ParseRange ( mapping . Key ) ;
110
+ var targetParts = ParseRange ( mapping . Value ) ;
111
+
112
+ // Check if either source or target is a single verse
113
+ if ( sourceParts . IsSingleVerse && targetParts . IsSingleVerse )
114
+ {
115
+ expandedMappings [ mapping . Key ] = mapping . Value ;
116
+ continue ;
117
+ }
118
+
119
+ int sourceVerseCount = sourceParts . EndVerse - sourceParts . StartVerse + 1 ;
120
+ int targetVerseCount = targetParts . EndVerse - targetParts . StartVerse + 1 ;
121
+
122
+ if ( sourceVerseCount != targetVerseCount )
123
+ {
124
+ throw new InvalidOperationException (
125
+ "Source and target verse ranges must have the same number of verses."
126
+ ) ;
127
+ }
128
+
129
+ for ( int i = 0 ; i < sourceVerseCount ; i ++ )
130
+ {
131
+ string sourceVerse = $ "{ sourceParts . Book } { sourceParts . Chapter } :{ sourceParts . StartVerse + i } ";
132
+ string targetVerse = $ "{ targetParts . Book } { targetParts . Chapter } :{ targetParts . StartVerse + i } ";
133
+
134
+ expandedMappings [ sourceVerse ] = targetVerse ;
135
+ }
136
+ }
137
+
138
+ return expandedMappings ;
139
+ }
140
+
141
+ public static ( string Book , int Chapter , int StartVerse , int EndVerse , bool IsSingleVerse ) ParseRange ( string range )
142
+ {
143
+ var parts = range . Split ( ' ' ) ;
144
+ var book = parts [ 0 ] ;
145
+
146
+ var chapterAndVerses = parts [ 1 ] . Split ( ':' ) ;
147
+ int chapter = int . Parse ( chapterAndVerses [ 0 ] ) ;
148
+
149
+ var verseRange = chapterAndVerses [ 1 ] . Split ( '-' ) ;
150
+
151
+ int startVerse = int . Parse ( verseRange [ 0 ] ) ;
152
+ int endVerse = verseRange . Length > 1 ? int . Parse ( verseRange [ 1 ] ) : startVerse ;
153
+
154
+ bool isSingleVerse = startVerse == endVerse ;
155
+
156
+ return ( book , chapter , startVerse , endVerse , isSingleVerse ) ;
157
+ }
158
+
159
+ /// <summary>
160
+ /// Removes unwanted characters in a corpus string.
161
+ /// </summary>
162
+ public static string CleanString ( string input , string [ ] unwanted )
163
+ {
164
+ foreach ( var item in unwanted )
165
+ {
166
+ input = input . Replace ( item , "" ) . Trim ( ) ;
167
+ }
168
+ return input ;
169
+ }
170
+
171
+ /// <summary>
172
+ /// Replace multiple spaces with a single space.
173
+ /// </summary>
174
+ public static string NormalizeSpaces ( string input )
175
+ {
176
+ return Regex . Replace ( input , @"\s+" , " " ) ;
177
+ }
46
178
}
0 commit comments