You can now use pyminifier as an executable module: `python -m pymini…

…fier` will work just like running '/usr/local/bin/pyminifier'. Fixed issue #33 where minifying multiple files into a destination dir (`--destdir`) was not working properly. Thanks Sjef de Krijger for emailing me a patch. His patch also fixed the `--prepend` option which wasn't working properly when minifying/obfuscating multiple files. __init__.py: Added some of the recently-added features to the global docstring. __init__.py: Added a docstring to the `pyminify()` function. __main__.py: Added a docstring to the `main()` function. analyze.py: The `enumerate_imports()` and `enumerate_global_imports()` functions now skip 'from' import lines like, `from foo import bar` since those aren't being handled correctly by the `enumerate_local_modules()` function inside of obfuscate.py. This is just a temporary measure until I get the logic in order for differentiating between "from whatever import foo" and having a "foo.py" in the same directory. obfuscate.py: Fixed a bug where variables being passed as arguments (right of an equal sign) were not being obfuscated when they should be. All over: Modernized some strings here and there by converting them to use `format()` and cleaned up a few things to improve code readability/presentation.
liftoff · Feb 6, 2015 · 63240c3 · 63240c3
1 parent 0c3f8dd
commit 63240c3
Show file tree

Hide file tree

Showing 5 changed files with 101 additions and 43 deletions.
diff --git a/pyminifier/__init__.py b/pyminifier/__init__.py
@@ -5,8 +5,8 @@
 # For license information see LICENSE.txt
 
 # Meta
-__version__ = '2.1'
-__version_info__ = (2, 1)
+__version__ = '2.2'
+__version_info__ = (2, 2)
 __license__ = "GPLv3" # See LICENSE.txt
 __author__ = 'Dan McDougall <daniel.mcdougall@liftoffsoftware.com>'
 
@@ -25,7 +25,9 @@
     * Removes docstrings.
     * Removes comments.
     * Minimizes code indentation.
+    * Removes trailing commas.
     * Joins multiline pairs of parentheses, braces, and brackets (and removes extraneous whitespace within).
+    * Joins disjointed strings like, ``("some" "disjointed" "string")`` into single strings: ``('''some disjointed string''')
     * Preserves shebangs and encoding info (e.g. "# -- coding: utf-8 --").
     * Optionally, produces a bzip2 or gzip-compressed self-extracting python script containing the minified source for ultimate minification. *Added in version 1.4*
     * Optionally, obfuscates the code using the shortest possible combination of letters and numbers for one or all of class names, function/method names, and variables. The options are ``--obfuscate`` or ``-O`` to obfuscate everything, ``--obfuscate-variables``, ``--obfuscate-functions``, and ``--obfuscate-classes`` to obfuscate things individually (say, if you wanted to keep your module usable by external programs).  *Added in version 2.0*
@@ -66,6 +68,7 @@
 # Import built-in modules
 import os, sys, re, io
 from optparse import OptionParser
+from collections import Iterable
 
 # Import our own modules
 from . import minification
@@ -132,16 +135,37 @@ def test_function():
     test_string_inside_operators = imaginary_function(
         "This string was indented but the tokenizer won't see it that way."
     ) # To understand how this could mess up docstring removal code see the
-    # minification.minification.remove_comments_and_docstrings() function starting at this line:
+    # minification.minification.remove_comments_and_docstrings() function
+    # starting at this line:
     #     "elif token_type == tokenize.STRING:"
     # This tests remove_extraneous_spaces():
     this_line_has_leading_indentation    = '''<--That extraneous space should be
                                               removed''' # But not these spaces
 
-def pyminify(options, *files):
-    global name_generator
+def is_iterable(obj):
+    """
+    Returns `True` if *obj* is iterable but *not* if *obj* is a string, bytes,
+    or a bytearray.
+    """
+    if isinstance(obj, (str, bytes, bytearray)):
+        return False
+    return isinstance(obj, Iterable)
 
+def pyminify(options, files):
+    """
+    Given an *options* object (from `optparse.OptionParser` or similar),
+    performs minification and/or obfuscation on the given *files* (any iterable
+    containing file paths) based on said *options*.
 
+    All accepted options can　be listed by running ``python __main__.py -h`` or
+    examining the :py:func:`__init__.main` function.
+    """
+    global name_generator
+    if not is_iterable(files):
+        print(
+            "Error: The 'files' argument must be a list, tuple, etc of files.  "
+            "Strings and bytes won't work.")
+        sys.exit(1)
     if options.pyz:
         # Check to make sure we were only passed one script (only one at a time)
         if len(files) > 1:
@@ -166,7 +190,8 @@ def pyminify(options, *files):
                     options.obf_functions, options.obf_variables,
                     options.obf_builtins, options.obf_import_methods)
 
-    # Automatically enable obfuscation if --nonlatin (implied if no explicit obfuscation is stated)
+    # Automatically enable obfuscation if --nonlatin (implied if no explicit
+    # obfuscation is stated)
     if options.use_nonlatin and not any(obfuscations):
         options.obfuscate = True
     if len(files) > 1: # We're dealing with more than one file
@@ -212,7 +237,10 @@ def pyminify(options, *files):
                     table=table
                 )
             # Convert back to text
-            result = token_utils.untokenize(tokens)
+            result = ''
+            if prepend:
+                result += prepend
+            result += token_utils.untokenize(tokens)
             # Compress it if we were asked to do so
             if options.bzip2:
                 result = compression.bz2_pack(result)
@@ -236,16 +264,19 @@ def pyminify(options, *files):
             cumulative_new += new_filesize
             percent_saved = round(
                 (float(new_filesize) / float(filesize)) * 100, 2)
-            print("%s (%s) reduced to %s bytes (%s%% of original size)" % (
-                sourcefile, filesize, new_filesize, percent_saved))
-        print("Overall size reduction: %s%% of original size" %
-              round((float(cumulative_new) / float(cumulative_size) * 100), 2))
+            print((
+                "{sourcefile} ({filesize}) reduced to {new_filesize} bytes "
+                "({percent_saved}% of original size)").format(**locals()))
+        p_saved = round(
+            (float(cumulative_new) / float(cumulative_size) * 100), 2)
+        print("Overall size reduction: {0}% of original size".format(p_saved))
     else:
         # Get the module name from the path
-        module = os.path.split(files[0])[1]
+        _file = files[0]
+        module = os.path.split(_file)[1]
         module = ".".join(module.split('.')[:-1])
-        filesize = os.path.getsize(files[0])
-        source = open(files[0]).read()
+        filesize = os.path.getsize(_file)
+        source = open(_file).read()
         # Convert the tokens from a tuple of tuples to a list of lists so we can
         # update in-place.
         tokens = token_utils.listified_tokenizer(source)
@@ -262,7 +293,10 @@ def pyminify(options, *files):
                 identifier_length=identifier_length)
             obfuscate.obfuscate(module, tokens, options)
         # Convert back to text
-        result = token_utils.untokenize(tokens)
+        result = ''
+        if prepend:
+            result += prepend
+        result += token_utils.untokenize(tokens)
         # Compress it if we were asked to do so
         if options.bzip2:
             result = compression.bz2_pack(result)
@@ -279,8 +313,9 @@ def pyminify(options, *files):
             f.write(result)
             f.close()
             new_filesize = os.path.getsize(options.outfile)
-            print("%s (%s) reduced to %s bytes (%s%% of original size)" % (
-                files[0], filesize, new_filesize,
-                round(float(new_filesize)/float(filesize) * 100, 2)))
+            percent_saved = round(float(new_filesize)/float(filesize) * 100, 2)
+            print((
+                "{_file} ({filesize}) reduced to {new_filesize} bytes "
+                "({percent_saved}% of original size)".format(**locals())))
         else:
             print(result)
diff --git a/pyminifier/__main__.py b/pyminifier/__main__.py
@@ -14,7 +14,13 @@
         pass
 
 def main():
+    """
+    Sets up our command line options, prints the usage/help (if warranted), and
+    runs :py:func:`pyminifier.pyminify` with the given command line options.
+    """
     usage = '%prog [options] "<input file>"'
+    if '__main__.py' in sys.argv[0]: # python -m pyminifier
+        usage = 'pyminifier [options] "<input file>"'
     parser = OptionParser(usage=usage, version=__version__)
     parser.disable_interspersed_args()
     parser.add_option(
@@ -39,14 +45,14 @@ def main():
         dest="nominify",
         default=False,
         help="Don't bother minifying (only used with --pyz).",
-        )
+    )
     parser.add_option(
         "--use-tabs",
         action="store_true",
         dest="tabs",
         default=False,
         help="Use tabs for indentation instead of spaces.",
-        )
+    )
     parser.add_option(
         "--bzip2",
         action="store_true",
@@ -157,15 +163,11 @@ def main():
         ),
         metavar="<file path>"
     )
-    options, args = parser.parse_args()
-    try:
-        pyz_file = args[0]
-    except Exception as err: # Note: This syntax requires Python 2.6+
-        print(err) # Just in case it is something wierd
+    options, files = parser.parse_args()
+    if not files:
         parser.print_help()
         sys.exit(2)
-
-    pyminify(options, pyz_file)
+    pyminify(options, files)
 
 
 if __name__ == "__main__":

diff --git a/pyminifier/analyze.py b/pyminifier/analyze.py
@@ -202,35 +202,44 @@ def enumerate_imports(tokens):
     """
     Iterates over *tokens* and returns a list of all imported modules.
 
-    **Note:** This is intelligent about the use of the 'as' keyword.
+    .. note:: This ignores imports using the 'as' and 'from' keywords.
     """
     imported_modules = []
     import_line = False
+    from_import = False
     for index, tok in enumerate(tokens):
         token_type = tok[0]
         token_string = tok[1]
         if token_type == tokenize.NEWLINE:
             import_line = False
+            from_import = False
         elif token_string == "import":
             import_line = True
+        elif token_string == "from":
+            from_import = True
         elif import_line:
             if token_type == tokenize.NAME and tokens[index+1][1] != 'as':
-                if token_string not in reserved_words:
-                    if token_string not in imported_modules:
-                        imported_modules.append(token_string)
+                if not from_import:
+                    if token_string not in reserved_words:
+                        if token_string not in imported_modules:
+                            imported_modules.append(token_string)
     return imported_modules
 
 def enumerate_global_imports(tokens):
     """
     Returns a list of all globally imported modules (skips modules imported
-    inside of classes, methods, or functions).
+    inside of classes, methods, or functions).  Example::
 
-    Example:
         >>> enumerate_global_modules(tokens)
         ['sys', 'os', 'tokenize', 're']
+
+    .. note::
+
+        Does not enumerate imports using the 'from' or 'as' keywords.
     """
     imported_modules = []
     import_line = False
+    from_import = False
     parent_module = ""
     function_count = 0
     indentation = 0
@@ -243,6 +252,7 @@ def enumerate_global_imports(tokens):
             indentation -= 1
         elif token_type == tokenize.NEWLINE:
             import_line = False
+            from_import = False
         elif token_type == tokenize.NAME:
             if token_string in ["def", "class"]:
                 function_count += 1
@@ -251,15 +261,20 @@ def enumerate_global_imports(tokens):
             elif function_count >= indentation:
                 if token_string == "import":
                     import_line = True
+                elif token_string == "from":
+                    from_import = True
                 elif import_line:
-                    if token_type == tokenize.NAME and tokens[index+1][1] != 'as':
-                        if token_string not in reserved_words:
+                    if token_type == tokenize.NAME \
+                        and tokens[index+1][1] != 'as':
+                        if not from_import \
+                            and token_string not in reserved_words:
                             if token_string not in imported_modules:
                                 if tokens[index+1][1] == '.': # module.module
                                     parent_module = token_string + '.'
                                 else:
                                     if parent_module:
-                                        module_string = parent_module + token_string
+                                        module_string = (
+                                            parent_module + token_string)
                                         imported_modules.append(module_string)
                                         parent_module = ''
                                     else:
@@ -335,7 +350,6 @@ def enumerate_builtins(tokens):
     for index, tok in enumerate(tokens):
         token_type = tok[0]
         token_string = tok[1]
-        #if token_type == tokenize.NAME:
         if token_string in builtins:
             # Note: I need to test if print can be replaced in Python 3
             special_special = ['print'] # Print is special in Python 2

diff --git a/pyminifier/obfuscate.py b/pyminifier/obfuscate.py
@@ -120,6 +120,7 @@ def find_obfuscatables(tokens, obfunc, ignore_length=False):
     keyword_args = analyze.enumerate_keyword_args(tokens)
     global imported_modules
     imported_modules = analyze.enumerate_imports(tokens)
+    #print("imported_modules: %s" % imported_modules)
     skip_line = False
     skip_next = False
     obfuscatables = []
@@ -183,6 +184,8 @@ def obfuscatable_variable(tokens, index, ignore_length=False):
     if next_tok_string == ".":
         if token_string in imported_modules:
             return None
+    if prev_tok_string == 'import':
+        return '__skipline__'
     if prev_tok_string == ".":
         return '__skipnext__'
     if prev_tok_string == "for":
@@ -194,7 +197,7 @@ def obfuscatable_variable(tokens, index, ignore_length=False):
         return None
     if token_string in ["def", "class", 'if', 'elif', 'import']:
         return '__skipline__'
-    if prev_tok_type != tokenize.INDENT and '=' not in line:
+    if prev_tok_type != tokenize.INDENT and next_tok_string != '=':
         return '__skipline__'
     if not ignore_length:
         if len(token_string) < 3:
@@ -334,7 +337,10 @@ def replace_obfuscatables(module, tokens, obfunc, replace, name_generator, table
             elif result == '__comma__':
                 right_of_equal = False
             elif result == '__right_of_equal__':
-                right_of_equal = True
+                # We only care if we're right of the equal sign outside of
+                # parens (which indicates arguments)
+                if not inside_parens:
+                    right_of_equal = True
             else:
                 if table: # Save it for later use in other files
                     combined_name = "%s.%s" % (module, token_string)
@@ -575,8 +581,11 @@ def obfuscate_global_import_methods(module, tokens, name_generator, table=None):
     to be looked up there before generating a new unique name.
     """
     global_imports = analyze.enumerate_global_imports(tokens)
+    #print("global_imports: %s" % global_imports)
     local_imports = analyze.enumerate_local_modules(tokens, os.getcwd())
+    #print("local_imports: %s" % local_imports)
     module_methods = analyze.enumerate_import_methods(tokens)
+    #print("module_methods: %s" % module_methods)
     # Make a 1-to-1 mapping dict of module_method<->replacement:
     if table:
         replacement_dict = {}
@@ -667,8 +676,7 @@ def obfuscate(module, tokens, options, name_generator=None, table=None):
             ignore_length = True
             if sys.version_info[0] == 3:
                 name_generator = obfuscation_machine(
-                    use_unicode=True, identifier_length=identifier_length
-                )
+                    use_unicode=True, identifier_length=identifier_length)
             else:
                 print(
                     "ERROR: You can't use nonlatin characters without Python 3")
@@ -704,7 +712,6 @@ def obfuscate(module, tokens, options, name_generator=None, table=None):
             replace_obfuscatables(
                 module, tokens, obfuscate_class, _class, name_generator, table)
         obfuscate_global_import_methods(module, tokens, name_generator, table)
-        #print("# table: \n%s" % table)
         obfuscate_builtins(module, tokens, name_generator, table)
     else:
         if options.obf_classes:

diff --git a/pyminifier/token_utils.py b/pyminifier/token_utils.py
@@ -11,7 +11,7 @@
 except ImportError: # We're using Python 3
     import io
 
-def untokenize(tokens): ###
+def untokenize(tokens):
     """
     Converts the output of tokenize.generate_tokens back into a human-readable
     string (that doesn't contain oddly-placed whitespace everywhere).