Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
79 commits
Select commit Hold shift + click to select a range
e03a643
Update minify.go
Rudxain Oct 10, 2022
e2ff7ab
Update minify.go
Rudxain Oct 10, 2022
54d301d
Update minify.go
Rudxain Oct 10, 2022
7c37afc
Update minify.go
Rudxain Oct 10, 2022
833e53b
Add validator
Rudxain Oct 10, 2022
25be574
remove my useless comment
Rudxain Oct 11, 2022
b5a56d7
I realized that infinite loops exist lol
Rudxain Oct 11, 2022
cbc9524
remove validator (for now)
Rudxain Oct 11, 2022
2a7b588
Merge branch 'baris-inandi:main' into pr-min
Rudxain Oct 13, 2022
d43d95a
Update minify.go
Rudxain Oct 13, 2022
50edc00
Update minify.go
Rudxain Oct 13, 2022
843a102
Update minify.go
Rudxain Oct 13, 2022
e23145f
Update minify.go
Rudxain Oct 13, 2022
7995a68
`:=` -> `var`
Rudxain Nov 24, 2022
238eeb8
fmt, rename func
Rudxain Nov 24, 2022
0d6eb3a
comment on panic
Rudxain Nov 24, 2022
2f8657b
remove useless `ReplaceAll`
Rudxain Nov 24, 2022
afa390e
bring back `ReplaceAll`
Rudxain Nov 24, 2022
f628c3b
comment-out static simulator
Rudxain Nov 24, 2022
7d0c173
mutual-cancel opt
Rudxain Nov 24, 2022
2845152
comment-out `indexNoBrace`
Rudxain Nov 24, 2022
77a6a83
declare `size`, no assign
Rudxain Nov 24, 2022
c7a2837
using regexp instead of strings
Rudxain Nov 25, 2022
8c2ec28
reset opt
Rudxain Nov 25, 2022
d35bd0f
fix odd regex bug
Rudxain Nov 25, 2022
5d197b7
merge +- regexes
Rudxain Nov 25, 2022
c17f425
opt compression ratio
Rudxain Nov 25, 2022
a3f49c9
smarter compression
Rudxain Nov 25, 2022
f5231d2
comment about compressor optimizer
Rudxain Nov 25, 2022
c48c3dd
idiomatic Go
Rudxain Nov 25, 2022
0c1ae91
concise and objective comment
Rudxain Dec 2, 2022
a77c752
remove special-casing. concise comments
Rudxain Dec 2, 2022
c800e30
"modularize" CRO
Rudxain Dec 2, 2022
4672513
simulator
Rudxain Dec 2, 2022
0e5892e
more idiomatic
Rudxain Dec 2, 2022
9700cf7
subjective FP comment
Rudxain Dec 2, 2022
493cff7
comment: order of opts
Rudxain Dec 2, 2022
56dd461
convert simulator to func
Rudxain Dec 2, 2022
d8070de
memSimulator doc
Rudxain Dec 2, 2022
b61bb81
move regexes out of `minify`
Rudxain Dec 2, 2022
215262b
Update minify.go
Rudxain Dec 2, 2022
25ee7bf
prefix reset
Rudxain Dec 3, 2022
144c40b
remove possessive quantifier
Rudxain Dec 3, 2022
3b13ff8
fix infinite loop
Rudxain Dec 3, 2022
cbcd3f4
"cell" is redundant
Rudxain Dec 3, 2022
b84531e
better docs
Rudxain Dec 3, 2022
505ab22
minor correction
Rudxain Dec 3, 2022
7352840
getMatchingBraces
Rudxain Dec 3, 2022
e939170
last dot
Rudxain Dec 3, 2022
b962da2
add doc, better name
Rudxain Dec 3, 2022
d3fbb36
specialize `isPrefixReset`, for speed
Rudxain Dec 3, 2022
4be0593
simpler compressor
Rudxain Dec 5, 2022
9021638
corrected bias in compressor
Rudxain Dec 5, 2022
0c0b5bc
inline `IsMorePlusThanMinus`
Rudxain Dec 6, 2022
ab570c9
{ODD|EVEN}_RESET
Rudxain Dec 6, 2022
0ba6830
naming, comments
Rudxain Dec 7, 2022
1f3eca9
Update minify.go
Rudxain Dec 7, 2022
6c9bc91
fmt
Rudxain Dec 7, 2022
0972561
minor corrections, comment about speed
Rudxain Dec 7, 2022
3869b96
Merge branch 'baris-inandi:main' into pr-min
Rudxain Dec 17, 2022
cfc441e
Merge branch 'baris-inandi:main' into pr-min
Rudxain Jan 24, 2023
3fed9f2
forgot a doc
Rudxain Feb 17, 2023
302f5ff
fix bug in `noOutputRemover`
Rudxain Feb 17, 2023
402dd9a
single-quote literals
Rudxain Feb 17, 2023
3cd3711
move doc-comment in utils
Rudxain Feb 17, 2023
6c79e6c
fix RuneInSlice doc-comment?
Rudxain Feb 17, 2023
1062a73
add RelativeIndex to utils
Rudxain Feb 17, 2023
87f240d
use `RelativeIndex`
Rudxain Feb 17, 2023
7c78170
inc `go.mod` version to support generic fn applier
Rudxain Feb 18, 2023
dd23f37
rename some helper funcs
Rudxain Feb 18, 2023
31dd0b1
Merge branch 'main' into pr-min
Rudxain Apr 2, 2024
c8c09a4
Merge branch 'baris-inandi:main' into pr-min
Rudxain Apr 15, 2024
f54293a
mem-sim pseudo-code
Rudxain Apr 15, 2024
8841359
improve `removeAfterLastDot`, better comments
Apr 18, 2024
33de383
`getMatchingBraces` update
Apr 18, 2024
2fbef02
impl `zeroLoopRemover`
Apr 18, 2024
120161c
rm outdated doc
Apr 18, 2024
812d5d0
impl multi-pass minify
Apr 18, 2024
fa8e050
Merge branch 'baris-inandi:main' into pr-min
Rudxain May 29, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
269 changes: 266 additions & 3 deletions bffmt/minify.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,277 @@ package bffmt

import (
"os"
"regexp"
"strings"

"github.com/baris-inandi/bfgo/lang/readcode"
"github.com/baris-inandi/bfgo/utils"
)

func MinifyFile(files ...string) {
/*
// Minification level
type Level int

// enum emulation
// https://stackoverflow.com/a/14426447
const (

// only removes non-BF chars,
// therefore it has maximum portability
// across implementations.
BASIC Level = iota
// assumes code isn't a main program.
// useful for "libraries" and "modules", such as subroutines.
LIB Level = iota
// assume all code will be run as-is
MAIN Level = iota

)
*/
func MinifyFile( /*l Level,*/ files ...string) {
// canonical `-` unconditional cell reseter
const ODD_RESET = "[-]"
// canonical `-` conditional (break-if-even) cell reseter
const EVEN_RESET = "[--]"

// matches any odd (unconditional) reseters, except ODD_RESET
var isOddReset = regexp.MustCompile(`\[(?:(?:\+\+)*\+|(?:--)+-)\]`)

// matches any even (conditional break) reseters, except EVEN_RESET
var isEvenReset = regexp.MustCompile(`\[(?:(?:\+\+)+|(?:--){2,})\]`)

// matches ODD_RESET, preceded by 1 or more "+" or "-" (mixed)
var isPrefixedReset = regexp.MustCompile(`[+-]+\[-\]`)

// returns a pair of indices of matching braces, searched from start.
// -1 if not found
//
// `start` ignores all runes before that index.
// If `start` is negative, it becomes relative to the end.
var getMatchingBraces = func(s string, start int) (int, int) {
start = utils.RelativeIndex(start, len(s))

open := -1
for start < len(s) {
c := s[start]
if c == '[' {
open = start
break
}
start += 1
}

// avoid double-counting "["
start++
depth := 0
// this covers the edge-case where
// "[" is located just before EOF (start >= size)
close := -1
for start < len(s) {
c := s[start]
if c == '[' {
depth++
}
if c == ']' {
if depth == 0 {
close = start
break
}
depth--
}
start += 1
}

return open, close
}

// finds index of 1st byte that isn't in the charset "[],.", or -1 if not found.
//
// `start` ignores all runes before that index.
// If start is negative, it becomes relative to the end.
var indexNoIOBrace = func(s string, start int) int {
size := len(s)
start = utils.RelativeIndex(start, size)

// "I couldn't find a way to write it in functional-paradigm" @Rudxain
for start < size {
c := s[start]
if c != '[' && c != ']' && c != ',' && c != '.' {
return start
}
start += 1
}
return -1
}

// removes consecutive loops, keeping the 1st.
//
// current impl is identity fn
var rmLoopLoop = func(s string) string {
return s
}

// removes all loops before any memory write is done.
//
// this is safe, because memory is all-zeros, and loops are guaranteed to never run.
var rm0Loop = func(s string) string {
for i := 0; i < len(s); i++ {
c := s[i]
if c == ',' || c == '+' || c == '-' {
// can't guarantee cell is 0
break
}
if c == '[' {
open, close := getMatchingBraces(s, i)
// assert open == i
s = s[0:open] + s[close+1:]
}
}
return s
}

// removes all bytes after last char in the set ".,]".
// this ensures `stdin` side effects still happen,
// and infinite loops are still executed.
//
// a mismatched '[' doesn't matter, because it either:
//
// 1. continues execution
//
// 2. halts/crashes the program
var rmAfterEffects = func(s string) string {
// reverse iter
for i := len(s) - 1; i >= 0; i-- {
c := s[i]
if c == '.' || c == ',' || c == ']' {
s = s[0 : i+1]
break
}
}
return s
}

// # Memory Simulator
//
// Statically analyses code, removing some no-ops.
//
// It assumes `IOBrace` is a black-box with potential-side effects.
//
// current implementation is identity fn
var memSim = func(s string) string {
// simulated BF memory/tape
var mem = map[int]uint8{}
// relative memory pointer
var ptr int = 0

/* # pseudo-code
0. split s by IOBrace (consecutives are treated as 1).
1. sim each substr in the resulting array,
such that each sub has its own isolated mem.
2. replace each substr by its "canonical form"
derived from mem.
3. re-insert delimiters at corresponding positions.
*/
// we need an outer loop to cleanup mem.
// and inner loop should break whenever it finds IOBrace
for i := indexNoIOBrace(s, 0); i < len(s) && i > -1; i = indexNoIOBrace(s, i+1) {
switch s[i] {
case '+':
{
mem[ptr] += 1
continue
}
case '-':
{
mem[ptr] -= 1
continue
}
case '>':
{
ptr += 1
continue
}
case '<':
{
ptr -= 1
continue
}
}
}
return s
}

// # Compression Ratio Optimizer
//
// Uses [frequency analysis] to increase compression-ratio by 3rd-party algorithms.
//
// Current implementation only replaces minified "-" reseters.
// It assumes there's no "+" reseters.
//
// [frequency analysis]: https://en.wikipedia.org/wiki/Frequency_analysis
var optimizeCompress = func(s string) string {
// "I hope the compiler optimizes this from 4n iterations to n iters"
// @Rudxain
plus, minus := strings.Count(s, "+"), strings.Count(s, "-")
odd, even := strings.Count(s, ODD_RESET), strings.Count(s, EVEN_RESET)
// this ensures the choice is unbiased
isMorePlusThanMinus := plus-minus+odd+2*even > 0

// A space-time tradeoff isn't worth it,
// because time is O(n) and space is O(1) (ignoring s).
// If (while counting) we were to allocate a list of indices to all ocurrences
// of ODD_RESET and EVEN_RESET, space would become O(n),
// but time would still be O(n) (despite being practically faster).
// So we should iterate over the whole s, rather than a list of pointers to s.
//
// CPU cache already helps a bit.
// allocating more memory just reduces the available cache space,
// therefore reducing iteration speed
if isMorePlusThanMinus {
if odd > 0 {
s = strings.ReplaceAll(s, ODD_RESET, "[+]")
}
if even > 0 {
s = strings.ReplaceAll(s, EVEN_RESET, "[++]")
}
}
return s
}

// # Advanced BF minifier
//
// Explained in [#2]. It assumes s only has valid ops.
//
// [#2]: https://github.com/baris-inandi/brainfuck-go/issues/2
var minify = func(s string) string {
for {
tmp := s
s = utils.Apply(
s,
// calling this 1st may speed up the others
rmAfterEffects,
// order matters, (from this point onwards)
memSim,
rmLoopLoop,
rm0Loop,
)
// these 3 are "amplified" by mem-sim
s = isEvenReset.ReplaceAllLiteralString(s, EVEN_RESET)
s = isOddReset.ReplaceAllLiteralString(s, ODD_RESET)
s = isPrefixedReset.ReplaceAllLiteralString(s, ODD_RESET)

// prevent potential infinite loop and OOM panic
// by using `>=` rather than `==`
if len(s) >= len(tmp) {
// ensure smallest s
return optimizeCompress(tmp)
}
}
}

for _, f := range files {
minified := readcode.ReadBFCode(f)
err := os.WriteFile(f, []byte(minified), 0644)
minified := minify(readcode.ReadBFCode(f))
err := os.WriteFile(f, []byte(minified), 0o644)
if err != nil {
panic(err)
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
module github.com/baris-inandi/bfgo

go 1.17
go 1.18

require (
github.com/urfave/cli/v2 v2.27.1
Expand Down
32 changes: 25 additions & 7 deletions utils/utils.go
Original file line number Diff line number Diff line change
@@ -1,17 +1,35 @@
// other utility functions
package utils

// other utility functions
/*
Gets param a where a is a rune,
and list is a list of runes,
checks if a is in list
*/
func RuneInSlice(a rune, list []rune) bool {
/*
func runeInSlice
Gets param a where a is a rune,
and list is a list of runes,
checks if a is in list
*/
for _, b := range list {
if b == a {
return true
}
}
return false
}

func RelativeIndex(i, size int) int {
if i < 0 {
i += size
}
if i < 0 {
// should it return an err instead of panicking?
panic("Index out of bounds")
}
return i
}

// emulation of "pipeline operator"
func Apply[T any](s T, fs ...func(T) T) T {
for _, f := range fs {
s = f(s)
}
return s
}