Skip to content

Fix the Ord instance for Ident + some other small fixes #42

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
name: CI

on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]

# The CACHE_VERSION can be updated to force the use of a new cache if
# the current cache contents become corrupted/invalid. This can
# sometimes happen when (for example) the OS version is changed but
# older .so files are cached, which can have various effects
# (e.g. cabal complains it can't find a valid version of the "happy"
# tool).
env:
CACHE_VERSION: 1

jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-22.04]
ghc: ["9.4.8", "9.6.6", "9.8.2"]
cabal: ["3.10.3.0"]

steps:
- uses: actions/checkout@v4
with:
submodules: true

- uses: haskell-actions/setup@v2
id: setup-haskell
with:
ghc-version: ${{ matrix.ghc }}
cabal-version: ${{ matrix.cabal }}

- uses: actions/cache/restore@v4
name: Restore cabal store cache
with:
path: |
${{ steps.setup-haskell.outputs.cabal-store }}
dist-newstyle
key: ${{ env.CACHE_VERSION }}-cabal-${{ matrix.os }}-${{ matrix.ghc }}-${{ hashFiles(format('cabal.GHC-{0}.config', matrix.ghc)) }}-${{ github.sha }}
restore-keys: |
${{ env.CACHE_VERSION }}-cabal-${{ matrix.os }}-${{ matrix.ghc }}-${{ hashFiles(format('cabal.GHC-{0}.config', matrix.ghc)) }}-

- name: Update
run: cabal update
- name: Configure
run: cabal configure --enable-tests
- name: Build
run: cabal build
- name: Run tests
run: cabal test

- uses: actions/cache/save@v4
name: Save cabal store cache
if: always()
with:
path: |
${{ steps.setup-haskell.outputs.cabal-store }}
dist-newstyle
key: ${{ env.CACHE_VERSION }}-cabal-${{ matrix.os }}-${{ matrix.ghc }}-${{ hashFiles(format('cabal.GHC-{0}.config', matrix.ghc)) }}-${{ github.sha }}
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,7 @@ sample-sources/
!sample-sources/statement-expressions.rs
!sample-sources/statements.rs
!sample-sources/types.rs

# Unicode-related autogenerated files
DerivedCoreProperties.txt
UnicodeLexer.x
36 changes: 0 additions & 36 deletions .travis.yml

This file was deleted.

9 changes: 6 additions & 3 deletions language-rust.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ library
-Wincomplete-uni-patterns
-Wmissing-signatures

build-tools: alex >=3.1, happy >=1.19.8
build-tools: alex >=3.1, happy >=1.19.8 && < 2.1 || >= 2.1.1
-- We restrict to < 2.1 || >= 2.1.1, because of
-- https://github.com/haskell/happy/issues/320

default-language: Haskell2010

exposed-modules: Language.Rust.Syntax
Expand Down Expand Up @@ -86,9 +89,9 @@ library

build-depends: base >=4.9 && <5.0
, prettyprinter >=1.0 && <2.0
, transformers >=0.4 && <0.6
, transformers >=0.4 && <0.7
, array >=0.5 && <0.6
, deepseq >=1.1 && <1.5
, deepseq >=1.1 && <1.6

if flag(useByteStrings)
cpp-options: -DUSE_BYTESTRING
Expand Down
167 changes: 167 additions & 0 deletions scripts/unicode.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/usr/bin/env python
#
# Copyright 2011-2015 The Rust Project Developers
# 2024 Galois Inc.
#
# This script was originally created by the Rust Project Developers as part of
# the `unicode-xid` crate:
#
# https://github.com/unicode-rs/unicode-xid/blob/b3a2718b062da229c0a50d12281de0e5d8e8cff6/scripts/unicode.py
#
# See the COPYRIGHT file in the `unicode-xid` crate:
#
# https://github.com/unicode-rs/unicode-xid/blob/b3a2718b062da229c0a50d12281de0e5d8e8cff6/COPYRIGHT
#
# Galois Inc. has modified the script to generate an `alex`-based lexer instead
# of a Rust-based lexer.
#
# Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
# http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
# <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
# option. This file may not be copied, modified, or distributed
# except according to those terms.

import fileinput, re, os, sys

unicode_version = (15, 1, 0)

preamble = '''-- NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
--
-- If you need to update this code, perform the following steps:
--
-- 1. (Optional) Update the value of `unicode_version` in "scripts/unicode.py".
-- 2. Run the "scripts/unicode.py" script.
-- 3. Copy the code (including the comments) in the autogenerated `UnicodeLexer.x` file.
-- 4. Replace the existing autogenerated code here.
'''

postamble = '''-- End of code generated by "scripts/unicode.py".
'''

def unicode_url(f):
return "http://www.unicode.org/Public/%s.%s.%s/ucd/%s" % (unicode_version + (f,))

def fetch(f):
if not os.path.exists(os.path.basename(f)):
os.system("curl -O %s" % unicode_url(f))

if not os.path.exists(os.path.basename(f)):
sys.stderr.write("cannot load %s" % f)
exit(1)

def group_cat(cat):
cat_out = []
letters = sorted(set(cat))
cur_start = letters.pop(0)
cur_end = cur_start
for letter in letters:
assert letter > cur_end, \
"cur_end: %s, letter: %s" % (hex(cur_end), hex(letter))
if letter == cur_end + 1:
cur_end = letter
else:
cat_out.append((cur_start, cur_end))
cur_start = cur_end = letter
cat_out.append((cur_start, cur_end))
return cat_out

def ungroup_cat(cat):
cat_out = []
for (lo, hi) in cat:
while lo <= hi:
cat_out.append(lo)
lo += 1
return cat_out

def format_table_content(f, content, indent):
line = ""
first = True
for chunk in content.split("|"):
line += " " * indent
if first:
line += "= " + chunk
else:
line += "| " + chunk
line += "\n"
first = False
f.write(line + '\n')

def load_properties(f, interestingprops):
fetch(f)
props = {}
re1 = re.compile("^ *([0-9A-F]+) *; *(\w+)")
re2 = re.compile("^ *([0-9A-F]+)\.\.([0-9A-F]+) *; *(\w+)")

for line in fileinput.input(os.path.basename(f)):
prop = None
d_lo = 0
d_hi = 0
m = re1.match(line)
if m:
d_lo = m.group(1)
d_hi = m.group(1)
prop = m.group(2)
else:
m = re2.match(line)
if m:
d_lo = m.group(1)
d_hi = m.group(2)
prop = m.group(3)
else:
continue
if interestingprops and prop not in interestingprops:
continue
d_lo = int(d_lo, 16)
d_hi = int(d_hi, 16)
if prop not in props:
props[prop] = []
props[prop].append((d_lo, d_hi))

# optimize if possible
for prop in props:
props[prop] = group_cat(ungroup_cat(props[prop]))

return props

def escape_char(c):
return "\\x%04x" % c

def emit_table(f, name, t_data):
f.write("@%s\n" % name)
data = ""
first = True
for dat in t_data:
if not first:
data += "|"
first = False
if dat[0] == dat[1]:
data += "%s" % escape_char(dat[0])
else:
data += "[%s-%s]" % (escape_char(dat[0]), escape_char(dat[1]))
format_table_content(f, data, 2)

def emit_property_module(f, mod, tbl, emit):
for cat in emit:
emit_table(f, cat, tbl[cat])

if __name__ == "__main__":
r = "UnicodeLexer.x"
if os.path.exists(r):
os.remove(r)
with open(r, "w") as rf:
# write the file's preamble
rf.write(preamble)

# download and parse all the data
rf.write('''
-- Based on Unicode %s.%s.%s, using the following Unicode table:
-- %s

''' % (unicode_version + (unicode_url("DerviedCoreProperties.txt"),)))

want_derived = ["XID_Start", "XID_Continue"]
derived = load_properties("DerivedCoreProperties.txt", want_derived)
emit_property_module(rf, "derived_property", derived, want_derived)

# write the file's postamble
rf.write(postamble)
36 changes: 19 additions & 17 deletions src/Language/Rust/Data/Ident.hs
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,22 @@ import Data.Char ( ord )
import Data.String ( IsString(..) )
import Data.Semigroup as Sem

-- | An identifier
-- | An identifier.
-- Note that the order of the fields is important, so the
-- when we derive `Eq` and `Ord` we use the hash first.
data Ident
= Ident { name :: Name -- ^ payload of the identifier
= Ident { hash :: {-# UNPACK #-} !Int -- ^ hash for quick comparision
, name :: Name -- ^ payload of the identifier
, raw :: Bool -- ^ whether the identifier is raw
, hash :: {-# UNPACK #-} !Int -- ^ hash for quick comparision
} deriving (Data, Typeable, Generic, NFData)
}
deriving ( Data, Typeable, Generic, NFData
-- | Note that this instance takes the 'raw' field into account, so
-- the identifiers @x@ and @r#x@ are judged /not/ to be equal.
, Eq
-- | Note that this instance takes the 'raw' field into account, so
-- the identifiers @x@ and @r#x@ are judged /not/ to be equal.
, Ord
)

-- | Shows the identifier as a string (for use with @-XOverloadedStrings@)
instance Show Ident where
Expand All @@ -40,30 +50,22 @@ instance Show Ident where
instance IsString Ident where
fromString = mkIdent

-- | Uses 'hash' to short-circuit
instance Eq Ident where
i1 == i2 = hash i1 == hash i2 && name i1 == name i2 && raw i1 == raw i2
i1 /= i2 = hash i1 /= hash i2 || name i1 /= name i2 || raw i1 /= raw i2

-- | Uses 'hash' to short-circuit
instance Ord Ident where
compare i1 i2 = case compare i1 i2 of
EQ -> compare (raw i1, name i1) (raw i2, name i2)
rt -> rt

-- | "Forgets" about whether either argument was raw
instance Monoid Ident where
mappend = (<>)
mempty = mkIdent ""

-- | "Forgets" about whether either argument was raw
instance Sem.Semigroup Ident where
Ident n1 _ _ <> Ident n2 _ _ = mkIdent (n1 <> n2)
i1 <> i2 = mkIdent (name i1 <> name i2)


-- | Smart constructor for making an 'Ident'.
mkIdent :: String -> Ident
mkIdent s = Ident s False (hashString s)
mkIdent s = Ident { hash = hashString s
, name = s
, raw = False
}

-- | Hash a string into an 'Int'
hashString :: String -> Int
Expand Down
8 changes: 4 additions & 4 deletions src/Language/Rust/Parser/Internal.y
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ self_or_ident :: { Spanned Ident }
-----------

lifetime :: { Lifetime Span }
: LIFETIME { let Spanned (LifetimeTok (Ident l _ _)) s = $1 in Lifetime l s }
: LIFETIME { let Spanned (LifetimeTok l) s = $1 in Lifetime (name l) s }

-- parse_trait_ref()
trait_ref :: { TraitRef Span }
Expand Down Expand Up @@ -1125,7 +1125,7 @@ blockpostfix_expr :: { Expr Span }

-- labels on loops
label :: { Label Span }
: LIFETIME { let Spanned (LifetimeTok (Ident l _ _)) s = $1 in Label l s }
: LIFETIME { let Spanned (LifetimeTok l) s = $1 in Label (name l) s }

-- Literal expressions (composed of just literals)
lit_expr :: { Expr Span }
Expand Down Expand Up @@ -1904,8 +1904,8 @@ addAttrs as (Yield as' e s) = Yield (as ++ as') e s
-- | Given a 'LitTok' token that is expected to result in a valid literal, construct the associated
-- literal. Note that this should _never_ fail on a token produced by the lexer.
lit :: Spanned Token -> Lit Span
lit (Spanned (IdentTok (Ident "true" False _)) s) = Bool True Unsuffixed s
lit (Spanned (IdentTok (Ident "false" False _)) s) = Bool False Unsuffixed s
lit (Spanned (IdentTok Ident { name = "true", raw = False }) s) = Bool True Unsuffixed s
lit (Spanned (IdentTok Ident { name = "false", raw = False }) s) = Bool False Unsuffixed s
lit (Spanned (LiteralTok litTok suffix_m) s) = translateLit litTok suffix s
where
suffix = case suffix_m of
Expand Down
Loading