Skip to content

Commit

Permalink
Store basis type separately in GEP/ConstGEP
Browse files Browse the repository at this point in the history
With opaque pointers, one cannot tell what the basis type for a `getelementptr`
instruction (or constant expression) is by inspecting the parent pointer. As a
result, we now store the basis type separately in `GEP`/`ConstGEP` so that it
can be determined regardless of whether opaque pointers are used or not.

This also requires tweaking the types of the `resolveGepFull` and `resolveGep`
functions to make the basis type separate from the parent pointer type.

Because this requires making a backwards-incompatible change to `ConstGEP`, I
took the opportunity to include the parent pointer value as a distinguished
field in `ConstGEP`. The `GEP` data constructor already does this, and it seems
oddly asymmetric to not have `ConstGEP` do the same, especially since LLVM
requires it to be present.

See GaloisInc#102.
  • Loading branch information
RyanGlScott committed May 30, 2023
1 parent 1c4b6de commit 16bc5bd
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 28 deletions.
2 changes: 1 addition & 1 deletion src/Text/LLVM.hs
Original file line number Diff line number Diff line change
Expand Up @@ -695,7 +695,7 @@ select c t f = observe (typedType t)

getelementptr :: IsValue a
=> Type -> Typed a -> [Typed Value] -> BB (Typed Value)
getelementptr ty ptr ixs = observe ty (GEP False (toValue `fmap` ptr) ixs)
getelementptr ty ptr ixs = observe ty (GEP False ty (toValue `fmap` ptr) ixs)

-- | Emit a call instruction, and generate a new variable for its result.
call :: IsValue a => Typed a -> [Typed Value] -> BB (Typed Value)
Expand Down
51 changes: 36 additions & 15 deletions src/Text/LLVM/AST.hs
Original file line number Diff line number Diff line change
Expand Up @@ -881,10 +881,11 @@ data Instr' lab
* Middle of basic block.
* Returns a value of the specified type. -}

| GEP Bool (Typed (Value' lab)) [Typed (Value' lab)]
| GEP Bool Type (Typed (Value' lab)) [Typed (Value' lab)]
{- ^ * "Get element pointer",
compute the address of a field in a structure:
inbounds check (value poisoned if this fails);
type to use as a basis for calculations;
pointer to parent structure;
path to a sub-component of a structure.
* Middle of basic block.
Expand Down Expand Up @@ -1118,8 +1119,11 @@ extendMetadata md stmt = case stmt of
-- Constant Expressions --------------------------------------------------------

data ConstExpr' lab
= ConstGEP Bool (Maybe Word64) (Maybe Type) [Typed (Value' lab)]
-- ^ Element type introduced in LLVM 3.7
= ConstGEP Bool (Maybe Word64) Type (Typed (Value' lab)) [Typed (Value' lab)]
-- ^ Since LLVM 3.7, constant @getelementptr@ expressions include an explicit
-- type to use as a basis for calculations. For older versions of LLVM, this
-- type can be reconstructed by inspecting the pointee type of the parent
-- pointer value.
| ConstConv ConvOp (Typed (Value' lab)) Type
| ConstSelect (Typed (Value' lab)) (Typed (Value' lab)) (Typed (Value' lab))
| ConstBlockAddr (Typed (Value' lab)) lab
Expand Down Expand Up @@ -1432,10 +1436,11 @@ isInvalid ir = case ir of
-- on unknown type aliases will return 'Nothing'
resolveGepFull ::
(Ident -> Maybe Type) {- ^ Type alias resolution -} ->
Type {- ^ Pointer type -} ->
Type {- ^ Base type used for calculations -} ->
Typed (Value' lab) {- ^ Pointer value -} ->
[Typed (Value' lab)] {- ^ Path -} ->
Maybe Type {- ^ Type of result -}
resolveGepFull env t ixs = go (resolveGep t ixs)
resolveGepFull env baseTy tv ixs = go (resolveGep baseTy tv ixs)
where
go Invalid = Nothing
go (HasType result) = Just result
Expand All @@ -1444,16 +1449,32 @@ resolveGepFull env t ixs = go (resolveGep t ixs)

-- | Resolve the type of a GEP instruction. Note that the type produced is the
-- type of the result, not necessarily a pointer.
resolveGep :: Type -> [Typed (Value' lab)] -> IndexResult
resolveGep (PtrTo ty0) (v:ixs0)
| isGepIndex v =
resolveGepBody ty0 ixs0
resolveGep ty0@PtrTo{} (v:ixs0)
| Just i <- elimAlias (typedType v) =
Resolve i (\ty' -> resolveGep ty0 (Typed ty' (typedValue v):ixs0))
resolveGep (Alias i) ixs =
Resolve i (\ty' -> resolveGep ty' ixs)
resolveGep _ _ = Invalid
resolveGep :: Type -> Typed (Value' lab) -> [Typed (Value' lab)] -> IndexResult
resolveGep baseTy tv ixs =
case ixs of
v:ixs0
| -- If headed by a pointer and the first index value has a valid GEP
-- index type, proceed to resolve the body of the GEP instruction.
isPointer t
, isGepIndex v
-> resolveGepBody baseTy ixs0

| -- If headed by a pointer and the first index has an alias type,
-- resolve the alias and try again.
isPointer t
, Just i <- elimAlias (typedType v)
-> Resolve i (\ty' -> resolveGep baseTy tv (Typed ty' (typedValue v):ixs0))

_ | -- If headed by a value with an alias type, resolve the alias and
-- try again.
Alias i <- t
-> Resolve i (\ty' -> resolveGep baseTy (Typed ty' (typedValue tv)) ixs)

| -- Otherwise, the GEP instruction is invalid.
otherwise
-> Invalid
where
t = typedType tv

-- | Resolve the type of a GEP instruction. This assumes that the input has
-- already been processed as a pointer.
Expand Down
2 changes: 1 addition & 1 deletion src/Text/LLVM/Labels.hs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ instance HasLabel Instr' where
relabel f (FCmp op l r) = FCmp op
<$> traverse (relabel f) l
<*> relabel f r
relabel f (GEP ib a is) = GEP ib
relabel f (GEP ib t a is) = GEP ib t
<$> traverse (relabel f) a
<*> traverse (traverse (relabel f)) is
relabel f (Select c l r) = Select
Expand Down
20 changes: 9 additions & 11 deletions src/Text/LLVM/PP.hs
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ ppInstr instr = case instr of
ShuffleVector a b m -> "shufflevector" <+> ppTyped ppValue a
<> comma <+> ppTyped ppValue (b <$ a)
<> comma <+> ppTyped ppValue m
GEP ib ptr ixs -> ppGEP ib ptr ixs
GEP ib ty ptr ixs -> ppGEP ib ty ptr ixs
Comment str -> char ';' <+> text str
Jump i -> "br"
<+> ppTypedLabel i
Expand Down Expand Up @@ -711,17 +711,15 @@ ppCallSym ty val = pp_ty <+> ppValue val
-> ppType res
_ -> ppType ty

ppGEP :: LLVM => Bool -> Typed Value -> [Typed Value] -> Doc
ppGEP ib ptr ixs = "getelementptr" <+> inbounds
<+> (if isImplicit then empty else explicit)
<+> commas (map (ppTyped ppValue) (ptr:ixs))
ppGEP :: LLVM => Bool -> Type -> Typed Value -> [Typed Value] -> Doc
ppGEP ib ty ptr ixs =
"getelementptr" <+> inbounds
<+> (if isImplicit then empty else explicit)
<+> commas (map (ppTyped ppValue) (ptr:ixs))
where
isImplicit = checkConfig cfgGEPImplicitType

explicit =
case typedType ptr of
PtrTo ty -> ppType ty <> comma
ty -> ppType ty <> comma
explicit = ppType ty <> comma

inbounds | ib = "inbounds"
| otherwise = empty
Expand Down Expand Up @@ -869,10 +867,10 @@ ppAsm s a i c =
ppConstExpr' :: LLVM => (i -> Doc) -> ConstExpr' i -> Doc
ppConstExpr' pp expr =
case expr of
ConstGEP inb _mix mp ixs ->
ConstGEP inb _mix ty ptr ixs ->
"getelementptr"
<+> opt inb "inbounds"
<+> parens (mcommas ((ppType <$> mp) : (map (pure . ppTyp') ixs)))
<+> parens (commas (ppType ty : map ppTyp' (ptr:ixs)))
ConstConv op tv t -> ppConvOp op <+> parens (ppTyp' tv <+> "to" <+> ppType t)
ConstSelect c l r ->
"select" <+> parens (commas [ ppTyp' c, ppTyp' l , ppTyp' r])
Expand Down

0 comments on commit 16bc5bd

Please sign in to comment.