Skip to content

Commit c70718d

Browse files
committed
refactor: Start cleaning up redundant code in internal modules.
1 parent 4c6d249 commit c70718d

File tree

1 file changed

+103
-166
lines changed

1 file changed

+103
-166
lines changed

src/DataFrame/Internal/Column.hs

Lines changed: 103 additions & 166 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import qualified Data.Vector.Unboxed.Mutable as VUM
2828

2929
import Control.Exception (throw)
3030
import Control.Monad.ST (runST)
31+
import Data.Kind (Type)
3132
import Data.Maybe
3233
import Data.Type.Equality (TestEquality (..))
3334
import DataFrame.Errors
@@ -139,20 +140,18 @@ instance Eq Column where
139140
Just Refl -> a == b
140141
(==) _ _ = False
141142

143+
-- Generalised LEQ that does reflection.
144+
generalLEQ ::
145+
forall a b. (Typeable a, Typeable b, Ord a, Ord b) => a -> b -> Bool
146+
generalLEQ x y = case testEquality (typeRep @a) (typeRep @b) of
147+
Nothing -> False
148+
Just Refl -> x <= y
149+
142150
instance Ord Column where
143151
(<=) :: Column -> Column -> Bool
144-
(<=) (BoxedColumn (a :: VB.Vector t1)) (BoxedColumn (b :: VB.Vector t2)) =
145-
case testEquality (typeRep @t1) (typeRep @t2) of
146-
Nothing -> False
147-
Just Refl -> a <= b
148-
(<=) (OptionalColumn (a :: VB.Vector t1)) (OptionalColumn (b :: VB.Vector t2)) =
149-
case testEquality (typeRep @t1) (typeRep @t2) of
150-
Nothing -> False
151-
Just Refl -> a <= b
152-
(<=) (UnboxedColumn (a :: VU.Vector t1)) (UnboxedColumn (b :: VU.Vector t2)) =
153-
case testEquality (typeRep @t1) (typeRep @t2) of
154-
Nothing -> False
155-
Just Refl -> a <= b
152+
(<=) (BoxedColumn (a :: VB.Vector t1)) (BoxedColumn (b :: VB.Vector t2)) = generalLEQ a b
153+
(<=) (OptionalColumn (a :: VB.Vector t1)) (OptionalColumn (b :: VB.Vector t2)) = generalLEQ a b
154+
(<=) (UnboxedColumn (a :: VU.Vector t1)) (UnboxedColumn (b :: VU.Vector t2)) = generalLEQ a b
156155
(<=) _ _ = False
157156

158157
{- | A class for converting a vector to a column of the appropriate type.
@@ -239,63 +238,71 @@ fromList ::
239238
[a] -> Column
240239
fromList = toColumnRep @(KindOf a) . VB.fromList
241240

241+
throwTypeMismatch ::
242+
forall (a :: Type) (b :: Type).
243+
(Typeable a, Typeable b) => Either DataFrameException Column
244+
throwTypeMismatch =
245+
Left $
246+
TypeMismatchException
247+
MkTypeErrorContext
248+
{ userType = Right (typeRep @b)
249+
, expectedType = Right (typeRep @a)
250+
, callingFunctionName = Just "mapColumn"
251+
, errorColumnName = Nothing
252+
}
253+
242254
-- | An internal function to map a function over the values of a column.
243255
mapColumn ::
244256
forall b c.
245-
( Columnable b
246-
, Columnable c
247-
, UnboxIf c
248-
) =>
249-
(b -> c) ->
250-
Column ->
251-
Either DataFrameException Column
257+
(Columnable b, Columnable c) =>
258+
(b -> c) -> Column -> Either DataFrameException Column
252259
mapColumn f = \case
253-
BoxedColumn (col :: VB.Vector a)
254-
| Just Refl <- testEquality (typeRep @a) (typeRep @b) ->
255-
Right (fromVector @c (VB.map f col))
256-
| otherwise ->
257-
Left $
258-
TypeMismatchException
259-
( MkTypeErrorContext
260-
{ userType = Right (typeRep @b)
261-
, expectedType = Right (typeRep @a)
262-
, callingFunctionName = Just "mapColumn"
263-
, errorColumnName = Nothing
264-
}
265-
)
266-
OptionalColumn (col :: VB.Vector a)
267-
| Just Refl <- testEquality (typeRep @a) (typeRep @b) ->
268-
Right (fromVector @c (VB.map f col))
269-
| otherwise ->
270-
Left $
271-
TypeMismatchException
272-
( MkTypeErrorContext
273-
{ userType = Right (typeRep @b)
274-
, expectedType = Right (typeRep @a)
275-
, callingFunctionName = Just "mapColumn"
276-
, errorColumnName = Nothing
277-
}
278-
)
279-
UnboxedColumn (col :: VU.Vector a)
280-
| Just Refl <- testEquality (typeRep @a) (typeRep @b) ->
281-
Right $ case sUnbox @c of
282-
STrue -> UnboxedColumn (VU.map f col)
283-
SFalse -> fromVector @c (VB.generate (VU.length col) (f . VU.unsafeIndex col))
284-
| otherwise ->
285-
Left $
286-
TypeMismatchException
287-
( MkTypeErrorContext
288-
{ userType = Right (typeRep @b)
289-
, expectedType = Right (typeRep @a)
290-
, callingFunctionName = Just "mapColumn"
291-
, errorColumnName = Nothing
292-
}
293-
)
260+
BoxedColumn (col :: VB.Vector a) -> run col
261+
OptionalColumn (col :: VB.Vector a) -> run col
262+
UnboxedColumn (col :: VU.Vector a) -> runUnboxed col
263+
where
264+
run :: forall a. (Typeable a) => VB.Vector a -> Either DataFrameException Column
265+
run col = case testEquality (typeRep @a) (typeRep @b) of
266+
Just Refl -> Right (fromVector @c (VB.map f col))
267+
Nothing -> throwTypeMismatch @a @b
268+
269+
runUnboxed ::
270+
forall a.
271+
(Typeable a, VU.Unbox a) => VU.Vector a -> Either DataFrameException Column
272+
runUnboxed col = case testEquality (typeRep @a) (typeRep @b) of
273+
Just Refl -> Right $ case sUnbox @c of
274+
STrue -> UnboxedColumn (VU.map f col)
275+
SFalse -> fromVector @c (VB.generate (VU.length col) (f . VU.unsafeIndex col))
276+
Nothing -> throwTypeMismatch @a @b
294277
{-# SPECIALIZE mapColumn ::
295278
(Double -> Double) -> Column -> Either DataFrameException Column
296279
#-}
297280
{-# INLINEABLE mapColumn #-}
298281

282+
-- | Applies a function that returns an unboxed result to an unboxed vector, storing the result in a column.
283+
imapColumn ::
284+
forall b c.
285+
(Columnable b, Columnable c) =>
286+
(Int -> b -> c) -> Column -> Either DataFrameException Column
287+
imapColumn f = \case
288+
BoxedColumn (col :: VB.Vector a) -> run col
289+
OptionalColumn (col :: VB.Vector a) -> run col
290+
UnboxedColumn (col :: VU.Vector a) -> runUnboxed col
291+
where
292+
run :: forall a. (Typeable a) => VB.Vector a -> Either DataFrameException Column
293+
run col = case testEquality (typeRep @a) (typeRep @b) of
294+
Just Refl -> Right (fromVector @c (VB.imap f col))
295+
Nothing -> throwTypeMismatch @a @b
296+
297+
runUnboxed ::
298+
forall a.
299+
(Typeable a, VU.Unbox a) => VU.Vector a -> Either DataFrameException Column
300+
runUnboxed col = case testEquality (typeRep @a) (typeRep @b) of
301+
Just Refl -> Right $ case sUnbox @c of
302+
STrue -> UnboxedColumn (VU.imap f col)
303+
SFalse -> BoxedColumn (VB.imap f (VG.convert col))
304+
Nothing -> throwTypeMismatch @a @b
305+
299306
-- | O(1) Gets the number of elements in the column.
300307
columnLength :: Column -> Int
301308
columnLength (BoxedColumn xs) = VG.length xs
@@ -373,116 +380,46 @@ findIndices ::
373380
(a -> Bool) ->
374381
Column ->
375382
Either DataFrameException (VU.Vector Int)
376-
findIndices pred (BoxedColumn (column :: VB.Vector b)) = case testEquality (typeRep @a) (typeRep @b) of
377-
Just Refl -> pure $ VG.convert (VG.findIndices pred column)
378-
Nothing ->
379-
Left $
380-
TypeMismatchException
381-
( MkTypeErrorContext
382-
{ userType = Right (typeRep @a)
383-
, expectedType = Right (typeRep @b)
384-
, callingFunctionName = Just "findIndices"
385-
, errorColumnName = Nothing
386-
}
387-
)
388-
findIndices pred (UnboxedColumn (column :: VU.Vector b)) = case testEquality (typeRep @a) (typeRep @b) of
389-
Just Refl -> pure $ VG.findIndices pred column
390-
Nothing ->
391-
Left $
392-
TypeMismatchException
393-
( MkTypeErrorContext
394-
{ userType = Right (typeRep @a)
395-
, expectedType = Right (typeRep @b)
396-
, callingFunctionName = Just "findIndices"
397-
, errorColumnName = Nothing
398-
}
399-
)
400-
findIndices pred (OptionalColumn (column :: VB.Vector b)) = case testEquality (typeRep @a) (typeRep @b) of
401-
Just Refl -> pure $ VG.convert (VG.findIndices pred column)
402-
Nothing ->
403-
Left $
404-
TypeMismatchException
405-
( MkTypeErrorContext
406-
{ userType = Right (typeRep @a)
407-
, expectedType = Right (typeRep @b)
408-
, callingFunctionName = Just "findIndices"
409-
, errorColumnName = Nothing
410-
}
411-
)
412-
413-
-- | An internal function that returns a vector of how indexes change after a column is sorted.
414-
sortedIndexes :: Bool -> Column -> VU.Vector Int
415-
sortedIndexes asc (BoxedColumn column) = runST $ do
416-
withIndexes <- VG.thaw $ VG.indexed column
417-
VA.sortBy
418-
(\(a, b) (a', b') -> (if asc then compare else flip compare) b b')
419-
withIndexes
420-
sorted <- VG.unsafeFreeze withIndexes
421-
return $ VU.generate (VG.length column) (\i -> fst (sorted VG.! i))
422-
sortedIndexes asc (UnboxedColumn column) = runST $ do
423-
withIndexes <- VG.thaw $ VG.indexed column
424-
VA.sortBy
425-
(\(a, b) (a', b') -> (if asc then compare else flip compare) b b')
426-
withIndexes
427-
sorted <- VG.unsafeFreeze withIndexes
428-
return $ VU.generate (VG.length column) (\i -> fst (sorted VG.! i))
429-
sortedIndexes asc (OptionalColumn column) = runST $ do
430-
withIndexes <- VG.thaw $ VG.indexed column
431-
VA.sortBy
432-
(\(a, b) (a', b') -> (if asc then compare else flip compare) b b')
433-
withIndexes
434-
sorted <- VG.unsafeFreeze withIndexes
435-
return $ VU.generate (VG.length column) (\i -> fst (sorted VG.! i))
436-
{-# INLINE sortedIndexes #-}
437-
438-
-- | Applies a function that returns an unboxed result to an unboxed vector, storing the result in a column.
439-
imapColumn ::
440-
forall b c.
441-
(Columnable b, Columnable c) =>
442-
(Int -> b -> c) -> Column -> Either DataFrameException Column
443-
imapColumn f = \case
444-
BoxedColumn (col :: VB.Vector a)
445-
| Just Refl <- testEquality (typeRep @a) (typeRep @b) ->
446-
pure (fromVector @c (VB.imap f col))
447-
| otherwise ->
448-
Left $
449-
TypeMismatchException
450-
( MkTypeErrorContext
451-
{ userType = Right (typeRep @b)
452-
, expectedType = Right (typeRep @a)
453-
, callingFunctionName = Just "imapColumn"
454-
, errorColumnName = Nothing
455-
}
456-
)
457-
UnboxedColumn (col :: VU.Vector a)
458-
| Just Refl <- testEquality (typeRep @a) (typeRep @b) ->
459-
pure $
460-
case sUnbox @c of
461-
STrue -> UnboxedColumn (VU.imap f col)
462-
SFalse -> fromVector @c (VB.imap f (VB.convert col))
463-
| otherwise ->
464-
Left $
465-
TypeMismatchException
466-
( MkTypeErrorContext
467-
{ userType = Right (typeRep @b)
468-
, expectedType = Right (typeRep @a)
469-
, callingFunctionName = Just "imapColumn"
470-
, errorColumnName = Nothing
471-
}
472-
)
473-
OptionalColumn (col :: VB.Vector a)
474-
| Just Refl <- testEquality (typeRep @a) (typeRep @b) ->
475-
pure (fromVector @c (VB.imap f col))
476-
| otherwise ->
383+
findIndices pred = \case
384+
BoxedColumn (v :: VB.Vector b) -> run v VG.convert
385+
OptionalColumn (v :: VB.Vector b) -> run v VG.convert
386+
UnboxedColumn (v :: VU.Vector b) -> run v id
387+
where
388+
run ::
389+
forall b v.
390+
(Typeable b, VG.Vector v b, VG.Vector v Int) =>
391+
v b ->
392+
(v Int -> VU.Vector Int) ->
393+
Either DataFrameException (VU.Vector Int)
394+
run column finalize = case testEquality (typeRep @a) (typeRep @b) of
395+
Just Refl -> Right . finalize $ VG.findIndices pred column
396+
Nothing ->
477397
Left $
478398
TypeMismatchException
479-
( MkTypeErrorContext
480-
{ userType = Right (typeRep @b)
481-
, expectedType = Right (typeRep @a)
482-
, callingFunctionName = Just "imapColumn"
399+
MkTypeErrorContext
400+
{ userType = Right (typeRep @a)
401+
, expectedType = Right (typeRep @b)
402+
, callingFunctionName = Just "findIndices"
483403
, errorColumnName = Nothing
484404
}
485-
)
405+
406+
-- | An internal function that returns a vector of how indexes change after a column is sorted.
407+
sortedIndexes :: Bool -> Column -> VU.Vector Int
408+
sortedIndexes asc = \case
409+
BoxedColumn column -> sortWorker column
410+
UnboxedColumn column -> sortWorker column
411+
OptionalColumn column -> sortWorker column
412+
where
413+
sortWorker ::
414+
(VG.Vector v a, Ord a, VG.Vector v (Int, a), VG.Vector v Int) =>
415+
v a -> VU.Vector Int
416+
sortWorker column = runST $ do
417+
withIndexes <- VG.thaw $ VG.indexed column
418+
let cmp = if asc then compare else flip compare
419+
VA.sortBy (\(_, b) (_, b') -> cmp b b') withIndexes
420+
sorted <- VG.unsafeFreeze withIndexes
421+
return $ VG.convert $ VG.map fst sorted
422+
{-# INLINE sortedIndexes #-}
486423

487424
-- | Fold (right) column with index.
488425
ifoldrColumn ::

0 commit comments

Comments
 (0)