@@ -19,7 +19,7 @@ use crate::unicode;
1919//
2020// Some of the implementation complexity here is a result of me wanting to
2121// preserve the sequential representation without using additional memory.
22- // In many cases, we do use linear extra memory, but it is at most 2x and it
22+ // In some cases, we do use linear extra memory, but it is at most 2x and it
2323// is amortized. If we relaxed the memory requirements, this implementation
2424// could become much simpler. The extra memory is honestly probably OK, but
2525// character classes (especially of the Unicode variety) can become quite
@@ -81,14 +81,45 @@ impl<I: Interval> IntervalSet<I> {
8181
8282 /// Add a new interval to this set.
8383 pub fn push ( & mut self , interval : I ) {
84- // TODO: This could be faster. e.g., Push the interval such that
85- // it preserves canonicalization.
86- self . ranges . push ( interval) ;
87- self . canonicalize ( ) ;
8884 // We don't know whether the new interval added here is considered
8985 // case folded, so we conservatively assume that the entire set is
9086 // no longer case folded if it was previously.
9187 self . folded = false ;
88+
89+ if self . ranges . is_empty ( ) {
90+ self . ranges . push ( interval) ;
91+ return ;
92+ }
93+
94+ // Find the first range that is not greater than the new interval.
95+ // This is the first range that could possibly be unioned with the
96+ // new interval.
97+ let mut drain_end = self . ranges . len ( ) ;
98+ while drain_end > 0
99+ && self . ranges [ drain_end - 1 ] . lower ( ) > interval. upper ( )
100+ && !self . ranges [ drain_end - 1 ] . is_contiguous ( & interval)
101+ {
102+ drain_end -= 1 ;
103+ }
104+
105+ // Try to union the new interval with old intervals backwards.
106+ if drain_end > 0 && self . ranges [ drain_end - 1 ] . is_contiguous ( & interval)
107+ {
108+ self . ranges [ drain_end - 1 ] =
109+ self . ranges [ drain_end - 1 ] . union ( & interval) . unwrap ( ) ;
110+ for i in ( 0 ..drain_end - 1 ) . rev ( ) {
111+ if let Some ( union) =
112+ self . ranges [ drain_end - 1 ] . union ( & self . ranges [ i] )
113+ {
114+ self . ranges [ drain_end - 1 ] = union;
115+ } else {
116+ self . ranges . drain ( i + 1 ..drain_end - 1 ) ;
117+ break ;
118+ }
119+ }
120+ } else {
121+ self . ranges . insert ( drain_end, interval) ;
122+ }
92123 }
93124
94125 /// Return an iterator over all intervals in this set.
@@ -192,34 +223,13 @@ impl<I: Interval> IntervalSet<I> {
192223 // Folks seem to suggest interval or segment trees, but I'd like to
193224 // avoid the overhead (both runtime and conceptual) of that.
194225 //
195- // The following is basically my Shitty First Draft. Therefore, in
196- // order to grok it, you probably need to read each line carefully.
197- // Simplifications are most welcome!
198- //
199226 // Remember, we can assume the canonical format invariant here, which
200227 // says that all ranges are sorted, not overlapping and not adjacent in
201228 // each class.
202229 let drain_end = self . ranges . len ( ) ;
203- let ( mut a, mut b) = ( 0 , 0 ) ;
204- ' LOOP : while a < drain_end && b < other. ranges . len ( ) {
205- // Basically, the easy cases are when neither range overlaps with
206- // each other. If the `b` range is less than our current `a`
207- // range, then we can skip it and move on.
208- if other. ranges [ b] . upper ( ) < self . ranges [ a] . lower ( ) {
209- b += 1 ;
210- continue ;
211- }
212- // ... similarly for the `a` range. If it's less than the smallest
213- // `b` range, then we can add it as-is.
214- if self . ranges [ a] . upper ( ) < other. ranges [ b] . lower ( ) {
215- let range = self . ranges [ a] ;
216- self . ranges . push ( range) ;
217- a += 1 ;
218- continue ;
219- }
220- // Otherwise, we have overlapping ranges.
221- assert ! ( !self . ranges[ a] . is_intersection_empty( & other. ranges[ b] ) ) ;
222230
231+ let mut b = 0 ;
232+ for a in 0 ..drain_end {
223233 // This part is tricky and was non-obvious to me without looking
224234 // at explicit examples (see the tests). The trickiness stems from
225235 // two things: 1) subtracting a range from another range could
@@ -231,47 +241,34 @@ impl<I: Interval> IntervalSet<I> {
231241 // For example, if our `a` range is `a-t` and our next three `b`
232242 // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply
233243 // subtraction three times before moving on to the next `a` range.
234- let mut range = self . ranges [ a] ;
244+ self . ranges . push ( self . ranges [ a] ) ;
245+ // Only when `b` is not above `a`, `b` might apply to current
246+ // `a` range.
235247 while b < other. ranges . len ( )
236- && !range . is_intersection_empty ( & other. ranges [ b] )
248+ && other. ranges [ b] . lower ( ) <= self . ranges [ a ] . upper ( )
237249 {
238- let old_range = range;
239- range = match range. difference ( & other. ranges [ b] ) {
240- ( None , None ) => {
241- // We lost the entire range, so move on to the next
242- // without adding this one.
243- a += 1 ;
244- continue ' LOOP ;
250+ match self . ranges . pop ( ) . unwrap ( ) . difference ( & other. ranges [ b] ) {
251+ ( Some ( range1) , None ) | ( None , Some ( range1) ) => {
252+ self . ranges . push ( range1) ;
245253 }
246- ( Some ( range1) , None ) | ( None , Some ( range1) ) => range1,
247254 ( Some ( range1) , Some ( range2) ) => {
248255 self . ranges . push ( range1) ;
249- range2
256+ self . ranges . push ( range2) ;
250257 }
251- } ;
252- // It's possible that the `b` range has more to contribute
253- // here. In particular, if it is greater than the original
254- // range, then it might impact the next `a` range *and* it
255- // has impacted the current `a` range as much as possible,
256- // so we can quit. We don't bump `b` so that the next `a`
257- // range can apply it.
258- if other. ranges [ b] . upper ( ) > old_range. upper ( ) {
259- break ;
258+ ( None , None ) => { }
260259 }
261- // Otherwise, the next `b` range might apply to the current
260+ // The next `b` range might apply to the current
262261 // `a` range.
263262 b += 1 ;
264263 }
265- self . ranges . push ( range) ;
266- a += 1 ;
267- }
268- while a < drain_end {
269- let range = self . ranges [ a] ;
270- self . ranges . push ( range) ;
271- a += 1 ;
264+ // It's possible that the last `b` range has more to
265+ // contribute to the next `a`. We don't bump the last
266+ // `b` so that the next `a` range can apply it.
267+ b = b. saturating_sub ( 1 ) ;
272268 }
269+
273270 self . ranges . drain ( ..drain_end) ;
274- self . folded = self . folded && other. folded ;
271+ self . folded = self . ranges . is_empty ( ) || ( self . folded && other. folded ) ;
275272 }
276273
277274 /// Compute the symmetric difference of the two sets, in place.
@@ -282,11 +279,83 @@ impl<I: Interval> IntervalSet<I> {
282279 /// set. That is, the set will contain all elements in either set,
283280 /// but will not contain any elements that are in both sets.
284281 pub fn symmetric_difference ( & mut self , other : & IntervalSet < I > ) {
285- // TODO(burntsushi): Fix this so that it amortizes allocation.
286- let mut intersection = self . clone ( ) ;
287- intersection. intersect ( other) ;
288- self . union ( other) ;
289- self . difference ( & intersection) ;
282+ if self . ranges . is_empty ( ) {
283+ self . ranges . extend ( & other. ranges ) ;
284+ self . folded = other. folded ;
285+ return ;
286+ }
287+ if other. ranges . is_empty ( ) {
288+ return ;
289+ }
290+
291+ // There should be a way to do this in-place with constant memory,
292+ // but I couldn't figure out a simple way to do it. So just append
293+ // the symmetric difference to the end of this range, and then drain
294+ // it before we're done.
295+ let drain_end = self . ranges . len ( ) ;
296+ let mut b = 0 ;
297+ let mut b_range = Some ( other. ranges [ b] ) ;
298+ for a in 0 ..drain_end {
299+ self . ranges . push ( self . ranges [ a] ) ;
300+ while b_range
301+ . map_or ( false , |r| r. lower ( ) <= self . ranges [ a] . upper ( ) )
302+ {
303+ let ( range1, range2) = match self
304+ . ranges
305+ . pop ( )
306+ . unwrap ( )
307+ . symmetric_difference ( & b_range. as_ref ( ) . unwrap ( ) )
308+ {
309+ ( Some ( range1) , None ) | ( None , Some ( range1) ) => {
310+ ( Some ( range1) , None )
311+ }
312+ ( Some ( range1) , Some ( range2) ) => {
313+ ( Some ( range1) , Some ( range2) )
314+ }
315+ ( None , None ) => ( None , None ) ,
316+ } ;
317+ if let Some ( range) = range1 {
318+ if self . ranges . len ( ) > drain_end
319+ && self . ranges . last ( ) . unwrap ( ) . is_contiguous ( & range)
320+ {
321+ self . ranges
322+ . last_mut ( )
323+ . map ( |last| * last = last. union ( & range) . unwrap ( ) ) ;
324+ } else {
325+ self . ranges . push ( range) ;
326+ }
327+ }
328+ if let Some ( range) = range2 {
329+ self . ranges . push ( range) ;
330+ }
331+
332+ b_range = if self . ranges . len ( ) > drain_end
333+ && self . ranges . last ( ) . unwrap ( ) . upper ( )
334+ > self . ranges [ a] . upper ( )
335+ {
336+ Some ( * self . ranges . last ( ) . unwrap ( ) )
337+ } else {
338+ b += 1 ;
339+ other. ranges . get ( b) . cloned ( )
340+ } ;
341+ }
342+ }
343+ while let Some ( range) = b_range {
344+ if self . ranges . len ( ) > drain_end
345+ && self . ranges . last ( ) . unwrap ( ) . is_contiguous ( & range)
346+ {
347+ self . ranges
348+ . last_mut ( )
349+ . map ( |last| * last = last. union ( & range) . unwrap ( ) ) ;
350+ } else {
351+ self . ranges . push ( range) ;
352+ }
353+ b += 1 ;
354+ b_range = other. ranges . get ( b) . cloned ( ) ;
355+ }
356+
357+ self . ranges . drain ( ..drain_end) ;
358+ self . folded = self . ranges . is_empty ( ) || ( self . folded && other. folded ) ;
290359 }
291360
292361 /// Negate this interval set.
@@ -302,28 +371,44 @@ impl<I: Interval> IntervalSet<I> {
302371 return ;
303372 }
304373
305- // There should be a way to do this in-place with constant memory,
306- // but I couldn't figure out a simple way to do it. So just append
307- // the negation to the end of this range, and then drain it before
308- // we're done.
309- let drain_end = self . ranges . len ( ) ;
310-
311374 // We do checked arithmetic below because of the canonical ordering
312375 // invariant.
313376 if self . ranges [ 0 ] . lower ( ) > I :: Bound :: min_value ( ) {
314- let upper = self . ranges [ 0 ] . lower ( ) . decrement ( ) ;
315- self . ranges . push ( I :: create ( I :: Bound :: min_value ( ) , upper) ) ;
316- }
317- for i in 1 ..drain_end {
318- let lower = self . ranges [ i - 1 ] . upper ( ) . increment ( ) ;
319- let upper = self . ranges [ i] . lower ( ) . decrement ( ) ;
320- self . ranges . push ( I :: create ( lower, upper) ) ;
321- }
322- if self . ranges [ drain_end - 1 ] . upper ( ) < I :: Bound :: max_value ( ) {
323- let lower = self . ranges [ drain_end - 1 ] . upper ( ) . increment ( ) ;
324- self . ranges . push ( I :: create ( lower, I :: Bound :: max_value ( ) ) ) ;
377+ let mut pre_upper = self . ranges [ 0 ] . upper ( ) ;
378+ self . ranges [ 0 ] = I :: create (
379+ I :: Bound :: min_value ( ) ,
380+ self . ranges [ 0 ] . lower ( ) . decrement ( ) ,
381+ ) ;
382+ for i in 1 ..self . ranges . len ( ) {
383+ let lower = pre_upper. increment ( ) ;
384+ pre_upper = self . ranges [ i] . upper ( ) ;
385+ self . ranges [ i] =
386+ I :: create ( lower, self . ranges [ i] . lower ( ) . decrement ( ) ) ;
387+ }
388+ if pre_upper < I :: Bound :: max_value ( ) {
389+ self . ranges . push ( I :: create (
390+ pre_upper. increment ( ) ,
391+ I :: Bound :: max_value ( ) ,
392+ ) ) ;
393+ }
394+ } else {
395+ for i in 1 ..self . ranges . len ( ) {
396+ self . ranges [ i - 1 ] = I :: create (
397+ self . ranges [ i - 1 ] . upper ( ) . increment ( ) ,
398+ self . ranges [ i] . lower ( ) . decrement ( ) ,
399+ ) ;
400+ }
401+ if self . ranges . last ( ) . unwrap ( ) . upper ( ) < I :: Bound :: max_value ( ) {
402+ self . ranges . last_mut ( ) . map ( |range| {
403+ * range = I :: create (
404+ range. upper ( ) . increment ( ) ,
405+ I :: Bound :: max_value ( ) ,
406+ )
407+ } ) ;
408+ } else {
409+ self . ranges . pop ( ) ;
410+ }
325411 }
326- self . ranges . drain ( ..drain_end) ;
327412 // We don't need to update whether this set is folded or not, because
328413 // it is conservatively preserved through negation. Namely, if a set
329414 // is not folded, then it is possible that its negation is folded, for
@@ -337,6 +422,7 @@ impl<I: Interval> IntervalSet<I> {
337422 // of case folded characters. Negating it in turn means that all
338423 // equivalence classes in the set are negated, and any equivalence
339424 // class that was previously not in the set is now entirely in the set.
425+ self . folded = self . ranges . is_empty ( ) || self . folded ;
340426 }
341427
342428 /// Converts this set into a canonical ordering.
@@ -347,24 +433,20 @@ impl<I: Interval> IntervalSet<I> {
347433 self . ranges . sort ( ) ;
348434 assert ! ( !self . ranges. is_empty( ) ) ;
349435
350- // Is there a way to do this in-place with constant memory? I couldn't
351- // figure out a way to do it. So just append the canonicalization to
352- // the end of this range, and then drain it before we're done.
353- let drain_end = self . ranges . len ( ) ;
354- for oldi in 0 ..drain_end {
355- // If we've added at least one new range, then check if we can
356- // merge this range in the previously added range.
357- if self . ranges . len ( ) > drain_end {
358- let ( last, rest) = self . ranges . split_last_mut ( ) . unwrap ( ) ;
359- if let Some ( union) = last. union ( & rest[ oldi] ) {
360- * last = union;
361- continue ;
362- }
436+ // We maintain the canonicalization results in-place at `0..newi`.
437+ // `newi` will keep track of the end of the canonicalized ranges.
438+ let mut newi = 0 ;
439+ for oldi in 1 ..self . ranges . len ( ) {
440+ // The last new range gets merged with currnet old range when
441+ // unionable. If not, we update `newi` and store it as a new range.
442+ if let Some ( union) = self . ranges [ newi] . union ( & self . ranges [ oldi] ) {
443+ self . ranges [ newi] = union;
444+ } else {
445+ newi += 1 ;
446+ self . ranges [ newi] = self . ranges [ oldi] ;
363447 }
364- let range = self . ranges [ oldi] ;
365- self . ranges . push ( range) ;
366448 }
367- self . ranges . drain ( ..drain_end ) ;
449+ self . ranges . truncate ( newi + 1 ) ;
368450 }
369451
370452 /// Returns true if and only if this class is in a canonical ordering.
@@ -486,7 +568,13 @@ pub trait Interval:
486568 other : & Self ,
487569 ) -> ( Option < Self > , Option < Self > ) {
488570 let union = match self . union ( other) {
489- None => return ( Some ( self . clone ( ) ) , Some ( other. clone ( ) ) ) ,
571+ None => {
572+ return if self . upper ( ) < other. lower ( ) {
573+ ( Some ( self . clone ( ) ) , Some ( other. clone ( ) ) )
574+ } else {
575+ ( Some ( other. clone ( ) ) , Some ( self . clone ( ) ) )
576+ }
577+ }
490578 Some ( union) => union,
491579 } ;
492580 let intersection = match self . intersect ( other) {
0 commit comments