Skip to content

Commit

Permalink
change code unit width to 8
Browse files Browse the repository at this point in the history
  • Loading branch information
natfrp-bot committed Sep 2, 2024
1 parent 5f3687a commit 4620c32
Show file tree
Hide file tree
Showing 20 changed files with 259 additions and 491 deletions.
11 changes: 11 additions & 0 deletions charLowerUpper.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package regexp2

import "unicode"

func toLowerChar(ch byte) byte {
return byte(unicode.ToLower(rune(ch)))
}

func toUpperChar(ch byte) byte {
return byte(unicode.ToUpper(rune(ch)))
}
24 changes: 12 additions & 12 deletions match.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ type Group struct {
// Capture is a single capture of text within the larger original string
type Capture struct {
// the original string
text []rune
// Index is the position in the underlying rune slice where the first character of
// captured substring was found. Even if you pass in a string this will be in Runes.
text []byte
// Index is the position in the underlying byte slice where the first character of
// captured substring was found. Even if you pass in a string this will be in Bytes.
Index int
// Length is the number of runes in the captured substring.
// Length is the number of bytes in the captured substring.
Length int
}

Expand All @@ -56,12 +56,12 @@ func (c *Capture) String() string {
return string(c.text[c.Index : c.Index+c.Length])
}

// Runes returns the captured text as a rune slice
func (c *Capture) Runes() []rune {
// Bytes returns the captured text as a byte slice
func (c *Capture) Bytes() []byte {
return c.text[c.Index : c.Index+c.Length]
}

func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
func newMatch(regex *Regexp, capcount int, text []byte, startpos int) *Match {
m := Match{
regex: regex,
matchcount: make([]int, capcount),
Expand All @@ -75,13 +75,13 @@ func newMatch(regex *Regexp, capcount int, text []rune, startpos int) *Match {
return &m
}

func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []rune, startpos int) *Match {
func newMatchSparse(regex *Regexp, caps map[int]int, capcount int, text []byte, startpos int) *Match {
m := newMatch(regex, capcount, text, startpos)
m.sparseCaps = caps
return m
}

func (m *Match) reset(text []rune, textstart int) {
func (m *Match) reset(text []byte, textstart int) {
m.text = text
m.textstart = textstart
for i := 0; i < len(m.matchcount); i++ {
Expand Down Expand Up @@ -288,11 +288,11 @@ func (m *Match) groupValueAppendToBuf(groupnum int, buf *bytes.Buffer) {
last := index + matches[(c*2)-1]

for ; index < last; index++ {
buf.WriteRune(m.text[index])
buf.WriteByte(m.text[index])
}
}

func newGroup(name string, text []rune, caps []int, capcount int) Group {
func newGroup(name string, text []byte, caps []int, capcount int) Group {
g := Group{}
g.text = text
if capcount > 0 {
Expand All @@ -315,7 +315,7 @@ func newGroup(name string, text []rune, caps []int, capcount int) Group {

func (m *Match) dump() string {
buf := &bytes.Buffer{}
buf.WriteRune('\n')
buf.WriteByte('\n')
if len(m.sparseCaps) > 0 {
for k, v := range m.sparseCaps {
fmt.Fprintf(buf, "Slot %v -> %v\n", k, v)
Expand Down
44 changes: 22 additions & 22 deletions regexp.go
Original file line number Diff line number Diff line change
Expand Up @@ -178,12 +178,12 @@ func (re *Regexp) ReplaceFunc(input string, evaluator MatchEvaluator, startAt, c

// FindStringMatch searches the input string for a Regexp match
func (re *Regexp) FindStringMatch(s string) (*Match, error) {
// convert string to runes
return re.run(false, -1, getRunes(s))
// convert string to bytes
return re.run(false, -1, getBytes(s))
}

// FindRunesMatch searches the input rune slice for a Regexp match
func (re *Regexp) FindRunesMatch(r []rune) (*Match, error) {
// FindBytesMatch searches the input byte slice for a Regexp match
func (re *Regexp) FindBytesMatch(r []byte) (*Match, error) {
return re.run(false, -1, r)
}

Expand All @@ -192,17 +192,17 @@ func (re *Regexp) FindStringMatchStartingAt(s string, startAt int) (*Match, erro
if startAt > len(s) {
return nil, errors.New("startAt must be less than the length of the input string")
}
r, startAt := re.getRunesAndStart(s, startAt)
r, startAt := re.getBytesAndStart(s, startAt)
if startAt == -1 {
// we didn't find our start index in the string -- that's a problem
return nil, errors.New("startAt must align to the start of a valid rune in the input string")
return nil, errors.New("startAt must align to the start of a valid byte in the input string")
}

return re.run(false, startAt, r)
}

// FindRunesMatchStartingAt searches the input rune slice for a Regexp match starting at the startAt index
func (re *Regexp) FindRunesMatchStartingAt(r []rune, startAt int) (*Match, error) {
// FindBytesMatchStartingAt searches the input byte slice for a Regexp match starting at the startAt index
func (re *Regexp) FindBytesMatchStartingAt(r []byte, startAt int) (*Match, error) {
return re.run(false, startAt, r)
}

Expand Down Expand Up @@ -233,44 +233,44 @@ func (re *Regexp) FindNextMatch(m *Match) (*Match, error) {
// MatchString return true if the string matches the regex
// error will be set if a timeout occurs
func (re *Regexp) MatchString(s string) (bool, error) {
m, err := re.run(true, -1, getRunes(s))
m, err := re.run(true, -1, getBytes(s))
if err != nil {
return false, err
}
return m != nil, nil
}

func (re *Regexp) getRunesAndStart(s string, startAt int) ([]rune, int) {
func (re *Regexp) getBytesAndStart(s string, startAt int) ([]byte, int) {
if startAt < 0 {
if re.RightToLeft() {
r := getRunes(s)
r := getBytes(s)
return r, len(r)
}
return getRunes(s), 0
return getBytes(s), 0
}
ret := make([]rune, len(s))
ret := make([]byte, len(s))
i := 0
runeIdx := -1
for strIdx, r := range s {
byteIdx := -1
for strIdx, r := range []byte(s) {
if strIdx == startAt {
runeIdx = i
byteIdx = i
}
ret[i] = r
i++
}
if startAt == len(s) {
runeIdx = i
byteIdx = i
}
return ret[:i], runeIdx
return ret[:i], byteIdx
}

func getRunes(s string) []rune {
return []rune(s)
func getBytes(s string) []byte {
return []byte(s)
}

// MatchRunes return true if the runes matches the regex
// MatchBytes return true if the bytes matches the regex
// error will be set if a timeout occurs
func (re *Regexp) MatchRunes(r []rune) (bool, error) {
func (re *Regexp) MatchBytes(r []byte) (bool, error) {
m, err := re.run(true, -1, r)
if err != nil {
return false, err
Expand Down
4 changes: 2 additions & 2 deletions regexp_pcre_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,14 +268,14 @@ func containsEnder(line string, ender byte, allowFirst bool) bool {
}

func unEscapeToMatch(line string) string {
idx := strings.IndexRune(line, '\\')
idx := strings.IndexByte(line, '\\')
// no slashes means no unescape needed
if idx == -1 {
return line
}

buf := bytes.NewBufferString(line[:idx])
// get the runes for the rest of the string -- we're going full parser scan on this
// get the bytes for the rest of the string -- we're going full parser scan on this

inEscape := false
// take any \'s and convert them
Expand Down
26 changes: 13 additions & 13 deletions regexp_performance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,17 @@ func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
b.StopTimer()

data := "abcdefghijklmnopqrstuvwxyz"
x := make([]rune, 32768*len(data))
x := make([]byte, 32768*len(data))
for i := 0; i < 32768; /*(2^15)*/ i++ {
for j := 0; j < len(data); j++ {
x[i*len(data)+j] = rune(data[j])
x[i*len(data)+j] = byte(data[j])
}
}

re := MustCompile("^zbc(d|e)", 0)
b.StartTimer()
for i := 0; i < b.N; i++ {
if m, err := re.MatchRunes(x); m || err != nil {
if m, err := re.MatchBytes(x); m || err != nil {
b.Fatalf("unexpected match or error! %v", err)
}
}
Expand All @@ -117,17 +117,17 @@ func BenchmarkAnchoredShortMatch(b *testing.B) {
func BenchmarkAnchoredLongMatch(b *testing.B) {
b.StopTimer()
data := "abcdefghijklmnopqrstuvwxyz"
x := make([]rune, 32768*len(data))
x := make([]byte, 32768*len(data))
for i := 0; i < 32768; /*(2^15)*/ i++ {
for j := 0; j < len(data); j++ {
x[i*len(data)+j] = rune(data[j])
x[i*len(data)+j] = byte(data[j])
}
}

re := MustCompile("^.bc(d|e)", 0)
b.StartTimer()
for i := 0; i < b.N; i++ {
if m, err := re.MatchRunes(x); !m || err != nil {
if m, err := re.MatchBytes(x); !m || err != nil {
b.Fatalf("no match or error! %v", err)
}
}
Expand Down Expand Up @@ -205,13 +205,13 @@ func BenchmarkOnePassLongNotPrefix(b *testing.B) {
}
}

var text []rune
var text []byte

func makeText(n int) []rune {
func makeText(n int) []byte {
if len(text) >= n {
return text[:n]
}
text = make([]rune, n)
text = make([]byte, n)
x := ^uint32(0)
for i := range text {
x += x
Expand All @@ -222,7 +222,7 @@ func makeText(n int) []rune {
if x%31 == 0 {
text[i] = '\n'
} else {
text[i] = rune(x%(0x7E+1-0x20) + 0x20)
text[i] = byte(x%(0x7E+1-0x20) + 0x20)
}
}
return text
Expand All @@ -234,7 +234,7 @@ func benchmark(b *testing.B, re string, n int) {
b.ResetTimer()
b.SetBytes(int64(n))
for i := 0; i < b.N; i++ {
if m, err := r.MatchRunes(t); m {
if m, err := r.MatchBytes(t); m {
b.Fatal("match!")
} else if err != nil {
b.Fatalf("Err %v", err)
Expand Down Expand Up @@ -301,7 +301,7 @@ func BenchmarkLeading(b *testing.B) {
inp := makeText(1000000)
b.StartTimer()
for i := 0; i < b.N; i++ {
if m, err := r.MatchRunes(inp); !m {
if m, err := r.MatchBytes(inp); !m {
b.Errorf("Expected match")
} else if err != nil {
b.Errorf("Error: %v", err)
Expand Down Expand Up @@ -331,7 +331,7 @@ func BenchmarkShortSearch(b *testing.B) {
t := makeText(100)
b.SetBytes(int64(len(t)))
matchOnce := func(r *Regexp) {
if m, err := r.MatchRunes(t); m {
if m, err := r.MatchBytes(t); m {
b.Fatal("match!")
} else if err != nil {
b.Fatalf("Err %v", err)
Expand Down
38 changes: 0 additions & 38 deletions regexp_re2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,33 +122,6 @@ func TestRE2Dollar_Multiline(t *testing.T) {
}
}

func TestRE2ExtendedZero(t *testing.T) {
notZero := "߀" // \u07c0
r := MustCompile(`^\d$`, RE2)
if m, _ := r.MatchString(notZero); m {
t.Fatal("Expected no match")
}

r = MustCompile(`^\D$`, RE2)
if m, _ := r.MatchString(notZero); !m {
t.Fatal("Expected match")
}
}

func TestRegularExtendedZero(t *testing.T) {
notZero := "߀" // \u07c0

r := MustCompile(`^\d$`, 0)
if m, _ := r.MatchString(notZero); !m {
t.Fatal("Expected match")
}

r = MustCompile(`^\D$`, 0)
if m, _ := r.MatchString(notZero); m {
t.Fatal("Expected no match")
}
}

func TestRE2Word(t *testing.T) {
r := MustCompile(`\w`, RE2)
if m, _ := r.MatchString("å"); m {
Expand All @@ -162,17 +135,6 @@ func TestRE2Word(t *testing.T) {

}

func TestRegularWord(t *testing.T) {
r := MustCompile(`\w`, 0)
if m, _ := r.MatchString("å"); !m {
t.Fatal("Expected match")
}
r = MustCompile(`\W`, 0)
if m, _ := r.MatchString("å"); m {
t.Fatal("Expected no match")
}
}

func TestRE2Space(t *testing.T) {
r := MustCompile(`\s`, RE2)
if m, _ := r.MatchString("\x0b"); m {
Expand Down
Loading

0 comments on commit 4620c32

Please sign in to comment.