@@ -28,6 +28,7 @@ import qualified Data.Vector.Unboxed.Mutable as VUM
2828
2929import Control.Exception (throw )
3030import Control.Monad.ST (runST )
31+ import Data.Kind (Type )
3132import Data.Maybe
3233import Data.Type.Equality (TestEquality (.. ))
3334import DataFrame.Errors
@@ -139,20 +140,18 @@ instance Eq Column where
139140 Just Refl -> a == b
140141 (==) _ _ = False
141142
143+ -- Generalised LEQ that does reflection.
144+ generalLEQ ::
145+ forall a b . (Typeable a , Typeable b , Ord a , Ord b ) => a -> b -> Bool
146+ generalLEQ x y = case testEquality (typeRep @ a ) (typeRep @ b ) of
147+ Nothing -> False
148+ Just Refl -> x <= y
149+
142150instance Ord Column where
143151 (<=) :: Column -> Column -> Bool
144- (<=) (BoxedColumn (a :: VB. Vector t1 )) (BoxedColumn (b :: VB. Vector t2 )) =
145- case testEquality (typeRep @ t1 ) (typeRep @ t2 ) of
146- Nothing -> False
147- Just Refl -> a <= b
148- (<=) (OptionalColumn (a :: VB. Vector t1 )) (OptionalColumn (b :: VB. Vector t2 )) =
149- case testEquality (typeRep @ t1 ) (typeRep @ t2 ) of
150- Nothing -> False
151- Just Refl -> a <= b
152- (<=) (UnboxedColumn (a :: VU. Vector t1 )) (UnboxedColumn (b :: VU. Vector t2 )) =
153- case testEquality (typeRep @ t1 ) (typeRep @ t2 ) of
154- Nothing -> False
155- Just Refl -> a <= b
152+ (<=) (BoxedColumn (a :: VB. Vector t1 )) (BoxedColumn (b :: VB. Vector t2 )) = generalLEQ a b
153+ (<=) (OptionalColumn (a :: VB. Vector t1 )) (OptionalColumn (b :: VB. Vector t2 )) = generalLEQ a b
154+ (<=) (UnboxedColumn (a :: VU. Vector t1 )) (UnboxedColumn (b :: VU. Vector t2 )) = generalLEQ a b
156155 (<=) _ _ = False
157156
158157{- | A class for converting a vector to a column of the appropriate type.
@@ -239,63 +238,71 @@ fromList ::
239238 [a ] -> Column
240239fromList = toColumnRep @ (KindOf a ) . VB. fromList
241240
241+ throwTypeMismatch ::
242+ forall (a :: Type ) (b :: Type ).
243+ (Typeable a , Typeable b ) => Either DataFrameException Column
244+ throwTypeMismatch =
245+ Left $
246+ TypeMismatchException
247+ MkTypeErrorContext
248+ { userType = Right (typeRep @ b )
249+ , expectedType = Right (typeRep @ a )
250+ , callingFunctionName = Just " mapColumn"
251+ , errorColumnName = Nothing
252+ }
253+
242254-- | An internal function to map a function over the values of a column.
243255mapColumn ::
244256 forall b c .
245- ( Columnable b
246- , Columnable c
247- , UnboxIf c
248- ) =>
249- (b -> c ) ->
250- Column ->
251- Either DataFrameException Column
257+ (Columnable b , Columnable c ) =>
258+ (b -> c ) -> Column -> Either DataFrameException Column
252259mapColumn f = \ case
253- BoxedColumn (col :: VB. Vector a )
254- | Just Refl <- testEquality (typeRep @ a ) (typeRep @ b ) ->
255- Right (fromVector @ c (VB. map f col))
256- | otherwise ->
257- Left $
258- TypeMismatchException
259- ( MkTypeErrorContext
260- { userType = Right (typeRep @ b )
261- , expectedType = Right (typeRep @ a )
262- , callingFunctionName = Just " mapColumn"
263- , errorColumnName = Nothing
264- }
265- )
266- OptionalColumn (col :: VB. Vector a )
267- | Just Refl <- testEquality (typeRep @ a ) (typeRep @ b ) ->
268- Right (fromVector @ c (VB. map f col))
269- | otherwise ->
270- Left $
271- TypeMismatchException
272- ( MkTypeErrorContext
273- { userType = Right (typeRep @ b )
274- , expectedType = Right (typeRep @ a )
275- , callingFunctionName = Just " mapColumn"
276- , errorColumnName = Nothing
277- }
278- )
279- UnboxedColumn (col :: VU. Vector a )
280- | Just Refl <- testEquality (typeRep @ a ) (typeRep @ b ) ->
281- Right $ case sUnbox @ c of
282- STrue -> UnboxedColumn (VU. map f col)
283- SFalse -> fromVector @ c (VB. generate (VU. length col) (f . VU. unsafeIndex col))
284- | otherwise ->
285- Left $
286- TypeMismatchException
287- ( MkTypeErrorContext
288- { userType = Right (typeRep @ b )
289- , expectedType = Right (typeRep @ a )
290- , callingFunctionName = Just " mapColumn"
291- , errorColumnName = Nothing
292- }
293- )
260+ BoxedColumn (col :: VB. Vector a ) -> run col
261+ OptionalColumn (col :: VB. Vector a ) -> run col
262+ UnboxedColumn (col :: VU. Vector a ) -> runUnboxed col
263+ where
264+ run :: forall a . (Typeable a ) => VB. Vector a -> Either DataFrameException Column
265+ run col = case testEquality (typeRep @ a ) (typeRep @ b ) of
266+ Just Refl -> Right (fromVector @ c (VB. map f col))
267+ Nothing -> throwTypeMismatch @ a @ b
268+
269+ runUnboxed ::
270+ forall a .
271+ (Typeable a , VU. Unbox a ) => VU. Vector a -> Either DataFrameException Column
272+ runUnboxed col = case testEquality (typeRep @ a ) (typeRep @ b ) of
273+ Just Refl -> Right $ case sUnbox @ c of
274+ STrue -> UnboxedColumn (VU. map f col)
275+ SFalse -> fromVector @ c (VB. generate (VU. length col) (f . VU. unsafeIndex col))
276+ Nothing -> throwTypeMismatch @ a @ b
294277{-# SPECIALIZE mapColumn ::
295278 (Double -> Double) -> Column -> Either DataFrameException Column
296279 #-}
297280{-# INLINEABLE mapColumn #-}
298281
282+ -- | Applies a function that returns an unboxed result to an unboxed vector, storing the result in a column.
283+ imapColumn ::
284+ forall b c .
285+ (Columnable b , Columnable c ) =>
286+ (Int -> b -> c ) -> Column -> Either DataFrameException Column
287+ imapColumn f = \ case
288+ BoxedColumn (col :: VB. Vector a ) -> run col
289+ OptionalColumn (col :: VB. Vector a ) -> run col
290+ UnboxedColumn (col :: VU. Vector a ) -> runUnboxed col
291+ where
292+ run :: forall a . (Typeable a ) => VB. Vector a -> Either DataFrameException Column
293+ run col = case testEquality (typeRep @ a ) (typeRep @ b ) of
294+ Just Refl -> Right (fromVector @ c (VB. imap f col))
295+ Nothing -> throwTypeMismatch @ a @ b
296+
297+ runUnboxed ::
298+ forall a .
299+ (Typeable a , VU. Unbox a ) => VU. Vector a -> Either DataFrameException Column
300+ runUnboxed col = case testEquality (typeRep @ a ) (typeRep @ b ) of
301+ Just Refl -> Right $ case sUnbox @ c of
302+ STrue -> UnboxedColumn (VU. imap f col)
303+ SFalse -> BoxedColumn (VB. imap f (VG. convert col))
304+ Nothing -> throwTypeMismatch @ a @ b
305+
299306-- | O(1) Gets the number of elements in the column.
300307columnLength :: Column -> Int
301308columnLength (BoxedColumn xs) = VG. length xs
@@ -373,116 +380,46 @@ findIndices ::
373380 (a -> Bool ) ->
374381 Column ->
375382 Either DataFrameException (VU. Vector Int )
376- findIndices pred (BoxedColumn (column :: VB. Vector b )) = case testEquality (typeRep @ a ) (typeRep @ b ) of
377- Just Refl -> pure $ VG. convert (VG. findIndices pred column)
378- Nothing ->
379- Left $
380- TypeMismatchException
381- ( MkTypeErrorContext
382- { userType = Right (typeRep @ a )
383- , expectedType = Right (typeRep @ b )
384- , callingFunctionName = Just " findIndices"
385- , errorColumnName = Nothing
386- }
387- )
388- findIndices pred (UnboxedColumn (column :: VU. Vector b )) = case testEquality (typeRep @ a ) (typeRep @ b ) of
389- Just Refl -> pure $ VG. findIndices pred column
390- Nothing ->
391- Left $
392- TypeMismatchException
393- ( MkTypeErrorContext
394- { userType = Right (typeRep @ a )
395- , expectedType = Right (typeRep @ b )
396- , callingFunctionName = Just " findIndices"
397- , errorColumnName = Nothing
398- }
399- )
400- findIndices pred (OptionalColumn (column :: VB. Vector b )) = case testEquality (typeRep @ a ) (typeRep @ b ) of
401- Just Refl -> pure $ VG. convert (VG. findIndices pred column)
402- Nothing ->
403- Left $
404- TypeMismatchException
405- ( MkTypeErrorContext
406- { userType = Right (typeRep @ a )
407- , expectedType = Right (typeRep @ b )
408- , callingFunctionName = Just " findIndices"
409- , errorColumnName = Nothing
410- }
411- )
412-
413- -- | An internal function that returns a vector of how indexes change after a column is sorted.
414- sortedIndexes :: Bool -> Column -> VU. Vector Int
415- sortedIndexes asc (BoxedColumn column) = runST $ do
416- withIndexes <- VG. thaw $ VG. indexed column
417- VA. sortBy
418- (\ (a, b) (a', b') -> (if asc then compare else flip compare ) b b')
419- withIndexes
420- sorted <- VG. unsafeFreeze withIndexes
421- return $ VU. generate (VG. length column) (\ i -> fst (sorted VG. ! i))
422- sortedIndexes asc (UnboxedColumn column) = runST $ do
423- withIndexes <- VG. thaw $ VG. indexed column
424- VA. sortBy
425- (\ (a, b) (a', b') -> (if asc then compare else flip compare ) b b')
426- withIndexes
427- sorted <- VG. unsafeFreeze withIndexes
428- return $ VU. generate (VG. length column) (\ i -> fst (sorted VG. ! i))
429- sortedIndexes asc (OptionalColumn column) = runST $ do
430- withIndexes <- VG. thaw $ VG. indexed column
431- VA. sortBy
432- (\ (a, b) (a', b') -> (if asc then compare else flip compare ) b b')
433- withIndexes
434- sorted <- VG. unsafeFreeze withIndexes
435- return $ VU. generate (VG. length column) (\ i -> fst (sorted VG. ! i))
436- {-# INLINE sortedIndexes #-}
437-
438- -- | Applies a function that returns an unboxed result to an unboxed vector, storing the result in a column.
439- imapColumn ::
440- forall b c .
441- (Columnable b , Columnable c ) =>
442- (Int -> b -> c ) -> Column -> Either DataFrameException Column
443- imapColumn f = \ case
444- BoxedColumn (col :: VB. Vector a )
445- | Just Refl <- testEquality (typeRep @ a ) (typeRep @ b ) ->
446- pure (fromVector @ c (VB. imap f col))
447- | otherwise ->
448- Left $
449- TypeMismatchException
450- ( MkTypeErrorContext
451- { userType = Right (typeRep @ b )
452- , expectedType = Right (typeRep @ a )
453- , callingFunctionName = Just " imapColumn"
454- , errorColumnName = Nothing
455- }
456- )
457- UnboxedColumn (col :: VU. Vector a )
458- | Just Refl <- testEquality (typeRep @ a ) (typeRep @ b ) ->
459- pure $
460- case sUnbox @ c of
461- STrue -> UnboxedColumn (VU. imap f col)
462- SFalse -> fromVector @ c (VB. imap f (VB. convert col))
463- | otherwise ->
464- Left $
465- TypeMismatchException
466- ( MkTypeErrorContext
467- { userType = Right (typeRep @ b )
468- , expectedType = Right (typeRep @ a )
469- , callingFunctionName = Just " imapColumn"
470- , errorColumnName = Nothing
471- }
472- )
473- OptionalColumn (col :: VB. Vector a )
474- | Just Refl <- testEquality (typeRep @ a ) (typeRep @ b ) ->
475- pure (fromVector @ c (VB. imap f col))
476- | otherwise ->
383+ findIndices pred = \ case
384+ BoxedColumn (v :: VB. Vector b ) -> run v VG. convert
385+ OptionalColumn (v :: VB. Vector b ) -> run v VG. convert
386+ UnboxedColumn (v :: VU. Vector b ) -> run v id
387+ where
388+ run ::
389+ forall b v .
390+ (Typeable b , VG. Vector v b , VG. Vector v Int ) =>
391+ v b ->
392+ (v Int -> VU. Vector Int ) ->
393+ Either DataFrameException (VU. Vector Int )
394+ run column finalize = case testEquality (typeRep @ a ) (typeRep @ b ) of
395+ Just Refl -> Right . finalize $ VG. findIndices pred column
396+ Nothing ->
477397 Left $
478398 TypeMismatchException
479- ( MkTypeErrorContext
480- { userType = Right (typeRep @ b )
481- , expectedType = Right (typeRep @ a )
482- , callingFunctionName = Just " imapColumn "
399+ MkTypeErrorContext
400+ { userType = Right (typeRep @ a )
401+ , expectedType = Right (typeRep @ b )
402+ , callingFunctionName = Just " findIndices "
483403 , errorColumnName = Nothing
484404 }
485- )
405+
406+ -- | An internal function that returns a vector of how indexes change after a column is sorted.
407+ sortedIndexes :: Bool -> Column -> VU. Vector Int
408+ sortedIndexes asc = \ case
409+ BoxedColumn column -> sortWorker column
410+ UnboxedColumn column -> sortWorker column
411+ OptionalColumn column -> sortWorker column
412+ where
413+ sortWorker ::
414+ (VG. Vector v a , Ord a , VG. Vector v (Int , a ), VG. Vector v Int ) =>
415+ v a -> VU. Vector Int
416+ sortWorker column = runST $ do
417+ withIndexes <- VG. thaw $ VG. indexed column
418+ let cmp = if asc then compare else flip compare
419+ VA. sortBy (\ (_, b) (_, b') -> cmp b b') withIndexes
420+ sorted <- VG. unsafeFreeze withIndexes
421+ return $ VG. convert $ VG. map fst sorted
422+ {-# INLINE sortedIndexes #-}
486423
487424-- | Fold (right) column with index.
488425ifoldrColumn ::
0 commit comments