Skip to content

Commit

Permalink
You can now use pyminifier as an executable module: `python -m pymini…
Browse files Browse the repository at this point in the history
…fier` will work just like running '/usr/local/bin/pyminifier'.

Fixed issue #33 where minifying multiple files into a destination dir (`--destdir`) was not working properly.  Thanks Sjef de Krijger for emailing me a patch.  His patch also fixed the `--prepend` option which wasn't working properly when minifying/obfuscating multiple files.
__init__.py:  Added some of the recently-added features to the global docstring.
__init__.py:  Added a docstring to the `pyminify()` function.
__main__.py:  Added a docstring to the `main()` function.
analyze.py:  The `enumerate_imports()` and `enumerate_global_imports()` functions now skip 'from' import lines like, `from foo import bar` since those aren't being handled correctly by the `enumerate_local_modules()` function inside of obfuscate.py.  This is just a temporary measure until I get the logic in order for differentiating between "from whatever import foo" and having a "foo.py" in the same directory.
obfuscate.py:  Fixed a bug where variables being passed as arguments (right of an equal sign) were not being obfuscated when they should be.
All over:  Modernized some strings here and there by converting them to use `format()` and cleaned up a few things to improve code readability/presentation.
  • Loading branch information
liftoff committed Feb 6, 2015
1 parent 0c3f8dd commit 63240c3
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 43 deletions.
71 changes: 53 additions & 18 deletions pyminifier/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
# For license information see LICENSE.txt

# Meta
__version__ = '2.1'
__version_info__ = (2, 1)
__version__ = '2.2'
__version_info__ = (2, 2)
__license__ = "GPLv3" # See LICENSE.txt
__author__ = 'Dan McDougall <daniel.mcdougall@liftoffsoftware.com>'

Expand All @@ -25,7 +25,9 @@
* Removes docstrings.
* Removes comments.
* Minimizes code indentation.
* Removes trailing commas.
* Joins multiline pairs of parentheses, braces, and brackets (and removes extraneous whitespace within).
* Joins disjointed strings like, ``("some" "disjointed" "string")`` into single strings: ``('''some disjointed string''')
* Preserves shebangs and encoding info (e.g. "# -- coding: utf-8 --").
* Optionally, produces a bzip2 or gzip-compressed self-extracting python script containing the minified source for ultimate minification. *Added in version 1.4*
* Optionally, obfuscates the code using the shortest possible combination of letters and numbers for one or all of class names, function/method names, and variables. The options are ``--obfuscate`` or ``-O`` to obfuscate everything, ``--obfuscate-variables``, ``--obfuscate-functions``, and ``--obfuscate-classes`` to obfuscate things individually (say, if you wanted to keep your module usable by external programs). *Added in version 2.0*
Expand Down Expand Up @@ -66,6 +68,7 @@
# Import built-in modules
import os, sys, re, io
from optparse import OptionParser
from collections import Iterable

# Import our own modules
from . import minification
Expand Down Expand Up @@ -132,16 +135,37 @@ def test_function():
test_string_inside_operators = imaginary_function(
"This string was indented but the tokenizer won't see it that way."
) # To understand how this could mess up docstring removal code see the
# minification.minification.remove_comments_and_docstrings() function starting at this line:
# minification.minification.remove_comments_and_docstrings() function
# starting at this line:
# "elif token_type == tokenize.STRING:"
# This tests remove_extraneous_spaces():
this_line_has_leading_indentation = '''<--That extraneous space should be
removed''' # But not these spaces

def pyminify(options, *files):
global name_generator
def is_iterable(obj):
"""
Returns `True` if *obj* is iterable but *not* if *obj* is a string, bytes,
or a bytearray.
"""
if isinstance(obj, (str, bytes, bytearray)):
return False
return isinstance(obj, Iterable)

def pyminify(options, files):
"""
Given an *options* object (from `optparse.OptionParser` or similar),
performs minification and/or obfuscation on the given *files* (any iterable
containing file paths) based on said *options*.
All accepted options can be listed by running ``python __main__.py -h`` or
examining the :py:func:`__init__.main` function.
"""
global name_generator
if not is_iterable(files):
print(
"Error: The 'files' argument must be a list, tuple, etc of files. "
"Strings and bytes won't work.")
sys.exit(1)
if options.pyz:
# Check to make sure we were only passed one script (only one at a time)
if len(files) > 1:
Expand All @@ -166,7 +190,8 @@ def pyminify(options, *files):
options.obf_functions, options.obf_variables,
options.obf_builtins, options.obf_import_methods)

# Automatically enable obfuscation if --nonlatin (implied if no explicit obfuscation is stated)
# Automatically enable obfuscation if --nonlatin (implied if no explicit
# obfuscation is stated)
if options.use_nonlatin and not any(obfuscations):
options.obfuscate = True
if len(files) > 1: # We're dealing with more than one file
Expand Down Expand Up @@ -212,7 +237,10 @@ def pyminify(options, *files):
table=table
)
# Convert back to text
result = token_utils.untokenize(tokens)
result = ''
if prepend:
result += prepend
result += token_utils.untokenize(tokens)
# Compress it if we were asked to do so
if options.bzip2:
result = compression.bz2_pack(result)
Expand All @@ -236,16 +264,19 @@ def pyminify(options, *files):
cumulative_new += new_filesize
percent_saved = round(
(float(new_filesize) / float(filesize)) * 100, 2)
print("%s (%s) reduced to %s bytes (%s%% of original size)" % (
sourcefile, filesize, new_filesize, percent_saved))
print("Overall size reduction: %s%% of original size" %
round((float(cumulative_new) / float(cumulative_size) * 100), 2))
print((
"{sourcefile} ({filesize}) reduced to {new_filesize} bytes "
"({percent_saved}% of original size)").format(**locals()))
p_saved = round(
(float(cumulative_new) / float(cumulative_size) * 100), 2)
print("Overall size reduction: {0}% of original size".format(p_saved))
else:
# Get the module name from the path
module = os.path.split(files[0])[1]
_file = files[0]
module = os.path.split(_file)[1]
module = ".".join(module.split('.')[:-1])
filesize = os.path.getsize(files[0])
source = open(files[0]).read()
filesize = os.path.getsize(_file)
source = open(_file).read()
# Convert the tokens from a tuple of tuples to a list of lists so we can
# update in-place.
tokens = token_utils.listified_tokenizer(source)
Expand All @@ -262,7 +293,10 @@ def pyminify(options, *files):
identifier_length=identifier_length)
obfuscate.obfuscate(module, tokens, options)
# Convert back to text
result = token_utils.untokenize(tokens)
result = ''
if prepend:
result += prepend
result += token_utils.untokenize(tokens)
# Compress it if we were asked to do so
if options.bzip2:
result = compression.bz2_pack(result)
Expand All @@ -279,8 +313,9 @@ def pyminify(options, *files):
f.write(result)
f.close()
new_filesize = os.path.getsize(options.outfile)
print("%s (%s) reduced to %s bytes (%s%% of original size)" % (
files[0], filesize, new_filesize,
round(float(new_filesize)/float(filesize) * 100, 2)))
percent_saved = round(float(new_filesize)/float(filesize) * 100, 2)
print((
"{_file} ({filesize}) reduced to {new_filesize} bytes "
"({percent_saved}% of original size)".format(**locals())))
else:
print(result)
20 changes: 11 additions & 9 deletions pyminifier/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,13 @@
pass

def main():
"""
Sets up our command line options, prints the usage/help (if warranted), and
runs :py:func:`pyminifier.pyminify` with the given command line options.
"""
usage = '%prog [options] "<input file>"'
if '__main__.py' in sys.argv[0]: # python -m pyminifier
usage = 'pyminifier [options] "<input file>"'
parser = OptionParser(usage=usage, version=__version__)
parser.disable_interspersed_args()
parser.add_option(
Expand All @@ -39,14 +45,14 @@ def main():
dest="nominify",
default=False,
help="Don't bother minifying (only used with --pyz).",
)
)
parser.add_option(
"--use-tabs",
action="store_true",
dest="tabs",
default=False,
help="Use tabs for indentation instead of spaces.",
)
)
parser.add_option(
"--bzip2",
action="store_true",
Expand Down Expand Up @@ -157,15 +163,11 @@ def main():
),
metavar="<file path>"
)
options, args = parser.parse_args()
try:
pyz_file = args[0]
except Exception as err: # Note: This syntax requires Python 2.6+
print(err) # Just in case it is something wierd
options, files = parser.parse_args()
if not files:
parser.print_help()
sys.exit(2)

pyminify(options, pyz_file)
pyminify(options, files)


if __name__ == "__main__":
Expand Down
34 changes: 24 additions & 10 deletions pyminifier/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,35 +202,44 @@ def enumerate_imports(tokens):
"""
Iterates over *tokens* and returns a list of all imported modules.
**Note:** This is intelligent about the use of the 'as' keyword.
.. note:: This ignores imports using the 'as' and 'from' keywords.
"""
imported_modules = []
import_line = False
from_import = False
for index, tok in enumerate(tokens):
token_type = tok[0]
token_string = tok[1]
if token_type == tokenize.NEWLINE:
import_line = False
from_import = False
elif token_string == "import":
import_line = True
elif token_string == "from":
from_import = True
elif import_line:
if token_type == tokenize.NAME and tokens[index+1][1] != 'as':
if token_string not in reserved_words:
if token_string not in imported_modules:
imported_modules.append(token_string)
if not from_import:
if token_string not in reserved_words:
if token_string not in imported_modules:
imported_modules.append(token_string)
return imported_modules

def enumerate_global_imports(tokens):
"""
Returns a list of all globally imported modules (skips modules imported
inside of classes, methods, or functions).
inside of classes, methods, or functions). Example::
Example:
>>> enumerate_global_modules(tokens)
['sys', 'os', 'tokenize', 're']
.. note::
Does not enumerate imports using the 'from' or 'as' keywords.
"""
imported_modules = []
import_line = False
from_import = False
parent_module = ""
function_count = 0
indentation = 0
Expand All @@ -243,6 +252,7 @@ def enumerate_global_imports(tokens):
indentation -= 1
elif token_type == tokenize.NEWLINE:
import_line = False
from_import = False
elif token_type == tokenize.NAME:
if token_string in ["def", "class"]:
function_count += 1
Expand All @@ -251,15 +261,20 @@ def enumerate_global_imports(tokens):
elif function_count >= indentation:
if token_string == "import":
import_line = True
elif token_string == "from":
from_import = True
elif import_line:
if token_type == tokenize.NAME and tokens[index+1][1] != 'as':
if token_string not in reserved_words:
if token_type == tokenize.NAME \
and tokens[index+1][1] != 'as':
if not from_import \
and token_string not in reserved_words:
if token_string not in imported_modules:
if tokens[index+1][1] == '.': # module.module
parent_module = token_string + '.'
else:
if parent_module:
module_string = parent_module + token_string
module_string = (
parent_module + token_string)
imported_modules.append(module_string)
parent_module = ''
else:
Expand Down Expand Up @@ -335,7 +350,6 @@ def enumerate_builtins(tokens):
for index, tok in enumerate(tokens):
token_type = tok[0]
token_string = tok[1]
#if token_type == tokenize.NAME:
if token_string in builtins:
# Note: I need to test if print can be replaced in Python 3
special_special = ['print'] # Print is special in Python 2
Expand Down
17 changes: 12 additions & 5 deletions pyminifier/obfuscate.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ def find_obfuscatables(tokens, obfunc, ignore_length=False):
keyword_args = analyze.enumerate_keyword_args(tokens)
global imported_modules
imported_modules = analyze.enumerate_imports(tokens)
#print("imported_modules: %s" % imported_modules)
skip_line = False
skip_next = False
obfuscatables = []
Expand Down Expand Up @@ -183,6 +184,8 @@ def obfuscatable_variable(tokens, index, ignore_length=False):
if next_tok_string == ".":
if token_string in imported_modules:
return None
if prev_tok_string == 'import':
return '__skipline__'
if prev_tok_string == ".":
return '__skipnext__'
if prev_tok_string == "for":
Expand All @@ -194,7 +197,7 @@ def obfuscatable_variable(tokens, index, ignore_length=False):
return None
if token_string in ["def", "class", 'if', 'elif', 'import']:
return '__skipline__'
if prev_tok_type != tokenize.INDENT and '=' not in line:
if prev_tok_type != tokenize.INDENT and next_tok_string != '=':
return '__skipline__'
if not ignore_length:
if len(token_string) < 3:
Expand Down Expand Up @@ -334,7 +337,10 @@ def replace_obfuscatables(module, tokens, obfunc, replace, name_generator, table
elif result == '__comma__':
right_of_equal = False
elif result == '__right_of_equal__':
right_of_equal = True
# We only care if we're right of the equal sign outside of
# parens (which indicates arguments)
if not inside_parens:
right_of_equal = True
else:
if table: # Save it for later use in other files
combined_name = "%s.%s" % (module, token_string)
Expand Down Expand Up @@ -575,8 +581,11 @@ def obfuscate_global_import_methods(module, tokens, name_generator, table=None):
to be looked up there before generating a new unique name.
"""
global_imports = analyze.enumerate_global_imports(tokens)
#print("global_imports: %s" % global_imports)
local_imports = analyze.enumerate_local_modules(tokens, os.getcwd())
#print("local_imports: %s" % local_imports)
module_methods = analyze.enumerate_import_methods(tokens)
#print("module_methods: %s" % module_methods)
# Make a 1-to-1 mapping dict of module_method<->replacement:
if table:
replacement_dict = {}
Expand Down Expand Up @@ -667,8 +676,7 @@ def obfuscate(module, tokens, options, name_generator=None, table=None):
ignore_length = True
if sys.version_info[0] == 3:
name_generator = obfuscation_machine(
use_unicode=True, identifier_length=identifier_length
)
use_unicode=True, identifier_length=identifier_length)
else:
print(
"ERROR: You can't use nonlatin characters without Python 3")
Expand Down Expand Up @@ -704,7 +712,6 @@ def obfuscate(module, tokens, options, name_generator=None, table=None):
replace_obfuscatables(
module, tokens, obfuscate_class, _class, name_generator, table)
obfuscate_global_import_methods(module, tokens, name_generator, table)
#print("# table: \n%s" % table)
obfuscate_builtins(module, tokens, name_generator, table)
else:
if options.obf_classes:
Expand Down
2 changes: 1 addition & 1 deletion pyminifier/token_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
except ImportError: # We're using Python 3
import io

def untokenize(tokens): ###
def untokenize(tokens):
"""
Converts the output of tokenize.generate_tokens back into a human-readable
string (that doesn't contain oddly-placed whitespace everywhere).
Expand Down

0 comments on commit 63240c3

Please sign in to comment.