Skip to content
This repository was archived by the owner on Aug 3, 2024. It is now read-only.

Commit 740458a

Browse files
authored
Hyperlinker: Avoid linear lookup in enrichToken (#669)
* Make Span strict in Position * Hyperlinker: Use a proper map to enrich tokens
1 parent f7032e5 commit 740458a

File tree

2 files changed

+53
-41
lines changed

2 files changed

+53
-41
lines changed

haddock-api/src/Haddock/Backends/Hyperlinker/Ast.hs

Lines changed: 43 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{-# LANGUAGE BangPatterns #-}
12
{-# LANGUAGE RankNTypes #-}
23
{-# LANGUAGE ScopedTypeVariables #-}
34
{-# LANGUAGE RecordWildCards #-}
@@ -13,9 +14,13 @@ import Haddock.Backends.Hyperlinker.Types
1314
import qualified GHC
1415

1516
import Control.Applicative
17+
import Control.Monad (guard)
1618
import Data.Data
19+
import qualified Data.Map.Strict as Map
1720
import Data.Maybe
1821

22+
import Prelude hiding (span)
23+
1924
everythingInRenamedSource :: (Alternative f, Data x)
2025
=> (forall a. Data a => a -> f r) -> x -> f r
2126
everythingInRenamedSource f = Syb.everythingButType @GHC.Name (<|>) f
@@ -28,33 +33,53 @@ enrich src =
2833
, rtkDetails = enrichToken token detailsMap
2934
}
3035
where
31-
detailsMap = concatMap ($ src)
32-
[ variables
33-
, types
34-
, decls
35-
, binds
36-
, imports
37-
]
36+
detailsMap =
37+
mkDetailsMap (concatMap ($ src)
38+
[ variables
39+
, types
40+
, decls
41+
, binds
42+
, imports
43+
])
44+
45+
type LTokenDetails = [(GHC.SrcSpan, TokenDetails)]
3846

3947
-- | A map containing association between source locations and "details" of
4048
-- this location.
4149
--
42-
-- For the time being, it is just a list of pairs. However, looking up things
43-
-- in such structure has linear complexity. We cannot use any hashmap-like
44-
-- stuff because source locations are not ordered. In the future, this should
45-
-- be replaced with interval tree data structure.
46-
type DetailsMap = [(GHC.SrcSpan, TokenDetails)]
50+
type DetailsMap = Map.Map Position (Span, TokenDetails)
51+
52+
mkDetailsMap :: [(GHC.SrcSpan, TokenDetails)] -> DetailsMap
53+
mkDetailsMap xs =
54+
Map.fromListWith select_details [ (start, (token_span, token_details))
55+
| (ghc_span, token_details) <- xs
56+
, Just !token_span <- [ghcSrcSpanToSpan ghc_span]
57+
, let start = spStart token_span
58+
]
59+
where
60+
-- favour token details which appear earlier in the list
61+
select_details _new old = old
4762

4863
lookupBySpan :: Span -> DetailsMap -> Maybe TokenDetails
49-
lookupBySpan tspan = listToMaybe . map snd . filter (matches tspan . fst)
64+
lookupBySpan span details = do
65+
(_, (tok_span, tok_details)) <- Map.lookupLE (spStart span) details
66+
guard (tok_span `containsSpan` span )
67+
return tok_details
68+
69+
ghcSrcSpanToSpan :: GHC.SrcSpan -> Maybe Span
70+
ghcSrcSpanToSpan (GHC.RealSrcSpan span) =
71+
Just (Span { spStart = Position (GHC.srcSpanStartLine span) (GHC.srcSpanStartCol span)
72+
, spEnd = Position (GHC.srcSpanEndLine span) (GHC.srcSpanEndCol span)
73+
})
74+
ghcSrcSpanToSpan _ = Nothing
5075

5176
enrichToken :: Token -> DetailsMap -> Maybe TokenDetails
5277
enrichToken (Token typ _ spn) dm
5378
| typ `elem` [TkIdentifier, TkOperator] = lookupBySpan spn dm
5479
enrichToken _ _ = Nothing
5580

5681
-- | Obtain details map for variables ("normally" used identifiers).
57-
variables :: GHC.RenamedSource -> DetailsMap
82+
variables :: GHC.RenamedSource -> LTokenDetails
5883
variables =
5984
everythingInRenamedSource (var `Syb.combine` rec)
6085
where
@@ -70,7 +95,7 @@ variables =
7095
_ -> empty
7196

7297
-- | Obtain details map for types.
73-
types :: GHC.RenamedSource -> DetailsMap
98+
types :: GHC.RenamedSource -> LTokenDetails
7499
types = everythingInRenamedSource ty
75100
where
76101
ty term = case cast term of
@@ -84,7 +109,7 @@ types = everythingInRenamedSource ty
84109
-- ordinary assignment (in top-level declarations, let-expressions and where
85110
-- clauses).
86111

87-
binds :: GHC.RenamedSource -> DetailsMap
112+
binds :: GHC.RenamedSource -> LTokenDetails
88113
binds = everythingInRenamedSource
89114
(fun `Syb.combine` pat `Syb.combine` tvar)
90115
where
@@ -112,7 +137,7 @@ binds = everythingInRenamedSource
112137
_ -> empty
113138

114139
-- | Obtain details map for top-level declarations.
115-
decls :: GHC.RenamedSource -> DetailsMap
140+
decls :: GHC.RenamedSource -> LTokenDetails
116141
decls (group, _, _, _) = concatMap ($ group)
117142
[ concat . map typ . concat . map GHC.group_tyclds . GHC.hs_tyclds
118143
, everythingInRenamedSource fun . GHC.hs_valds
@@ -151,7 +176,7 @@ decls (group, _, _, _) = concatMap ($ group)
151176
--
152177
-- This map also includes type and variable details for items in export and
153178
-- import lists.
154-
imports :: GHC.RenamedSource -> DetailsMap
179+
imports :: GHC.RenamedSource -> LTokenDetails
155180
imports src@(_, imps, _, _) =
156181
everythingInRenamedSource ie src ++ mapMaybe (imp . GHC.unLoc) imps
157182
where
@@ -168,22 +193,3 @@ imports src@(_, imps, _, _) =
168193
let (GHC.L sspan name) = GHC.ideclName idecl
169194
in Just (sspan, RtkModule name)
170195
imp _ = Nothing
171-
172-
-- | Check whether token stream span matches GHC source span.
173-
--
174-
-- Currently, it is implemented as checking whether "our" span is contained
175-
-- in GHC span. The reason for that is because GHC span are generally wider
176-
-- and may spread across couple tokens. For example, @(>>=)@ consists of three
177-
-- tokens: @(@, @>>=@, @)@, but GHC source span associated with @>>=@ variable
178-
-- contains @(@ and @)@. Similarly, qualified identifiers like @Foo.Bar.quux@
179-
-- are tokenized as @Foo@, @.@, @Bar@, @.@, @quux@ but GHC source span
180-
-- associated with @quux@ contains all five elements.
181-
matches :: Span -> GHC.SrcSpan -> Bool
182-
matches tspan (GHC.RealSrcSpan aspan)
183-
| saspan <= stspan && etspan <= easpan = True
184-
where
185-
stspan = (posRow . spStart $ tspan, posCol . spStart $ tspan)
186-
etspan = (posRow . spEnd $ tspan, posCol . spEnd $ tspan)
187-
saspan = (GHC.srcSpanStartLine aspan, GHC.srcSpanStartCol aspan)
188-
easpan = (GHC.srcSpanEndLine aspan, GHC.srcSpanEndCol aspan)
189-
matches _ _ = False

haddock-api/src/Haddock/Backends/Hyperlinker/Types.hs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,22 +10,28 @@ import qualified Data.Map as Map
1010
data Token = Token
1111
{ tkType :: TokenType
1212
, tkValue :: String
13-
, tkSpan :: Span
13+
, tkSpan :: {-# UNPACK #-} !Span
1414
}
1515
deriving (Show)
1616

1717
data Position = Position
1818
{ posRow :: !Int
1919
, posCol :: !Int
2020
}
21-
deriving (Show)
21+
deriving (Eq, Ord, Show)
2222

2323
data Span = Span
24-
{ spStart :: Position
25-
, spEnd :: Position
24+
{ spStart :: !Position
25+
, spEnd :: !Position
2626
}
2727
deriving (Show)
2828

29+
-- | Tests whether the first span "contains" the other span, meaning
30+
-- that it covers at least as much source code. True where spans are equal.
31+
containsSpan :: Span -> Span -> Bool
32+
containsSpan s1 s2 =
33+
spStart s1 <= spStart s2 && spEnd s1 >= spEnd s2
34+
2935
data TokenType
3036
= TkIdentifier
3137
| TkKeyword

0 commit comments

Comments
 (0)