|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +import pytest |
| 3 | +import python_minifier |
| 4 | +import tempfile |
| 5 | +import os |
| 6 | +import codecs |
| 7 | + |
| 8 | + |
| 9 | +def test_minify_utf8_file(): |
| 10 | + """Test minifying a Python file with UTF-8 characters not in Windows default encoding.""" |
| 11 | + |
| 12 | + # Create Python source with UTF-8 characters that are not in Windows-1252 |
| 13 | + # Using Greek letters, Cyrillic, and mathematical symbols |
| 14 | + source_code = u'''""" |
| 15 | +This module contains UTF-8 characters that are not in Windows-1252 encoding: |
| 16 | +- Greek: α β γ δ ε ζ η θ ι κ λ μ ν ξ ο π ρ σ τ υ φ χ ψ ω |
| 17 | +- Cyrillic: а б в г д е ё ж з и й к л м н о п р с т у ф х ц ч ш щ ъ ы ь э ю я |
| 18 | +- Mathematical: ∀ ∃ ∈ ∉ ∅ ∞ ∑ ∏ √ ∫ ∇ ∂ ≠ ≤ ≥ ≈ ≡ ⊂ ⊃ ⊆ ⊇ |
| 19 | +- Arrows: ← → ↑ ↓ ↔ ↕ ↖ ↗ ↘ ↙ |
| 20 | +""" |
| 21 | +
|
| 22 | +def greet_in_greek(): |
| 23 | + return u"Γεια σας κόσμος" # "Hello world" in Greek |
| 24 | +
|
| 25 | +def mathematical_formula(): |
| 26 | + # Using mathematical symbols in comments |
| 27 | + # ∀x ∈ ℝ: x² ≥ 0 |
| 28 | + return u"∑ from i=1 to ∞ of 1/i² = π²/6" |
| 29 | +
|
| 30 | +def arrow_symbols(): |
| 31 | + directions = { |
| 32 | + u"left": u"←", |
| 33 | + u"right": u"→", |
| 34 | + u"up": u"↑", |
| 35 | + u"down": u"↓" |
| 36 | + } |
| 37 | + return directions |
| 38 | +
|
| 39 | +if __name__ == "__main__": |
| 40 | + print(greet_in_greek()) |
| 41 | + print(greet_in_russian()) |
| 42 | + print(mathematical_formula()) |
| 43 | + print(arrow_symbols()) |
| 44 | +''' |
| 45 | + |
| 46 | + # Write to temporary file with UTF-8 encoding |
| 47 | + # Python 2.7 doesn't support encoding parameter, so use binary mode |
| 48 | + with tempfile.NamedTemporaryFile(mode='wb', suffix='.py', delete=False) as f: |
| 49 | + f.write(source_code.encode('utf-8')) |
| 50 | + temp_file = f.name |
| 51 | + |
| 52 | + try: |
| 53 | + # Read the file and minify it |
| 54 | + # Python 2.7 doesn't support encoding parameter in open() |
| 55 | + with codecs.open(temp_file, 'r', encoding='utf-8') as f: |
| 56 | + original_content = f.read() |
| 57 | + |
| 58 | + # This should work - minify the UTF-8 content |
| 59 | + minified = python_minifier.minify(original_content) |
| 60 | + |
| 61 | + # Verify the minified code still contains the UTF-8 characters |
| 62 | + # On Python 2.7, Unicode characters in string literals are escaped but preserved |
| 63 | + # Test by executing the minified code and checking the actual values |
| 64 | + minified_globals = {} |
| 65 | + exec(minified, minified_globals) |
| 66 | + |
| 67 | + # The minified code should contain the same functions that return Unicode |
| 68 | + assert 'greet_in_greek' in minified_globals |
| 69 | + assert u"Γεια σας κόσμος" == minified_globals['greet_in_greek']() |
| 70 | + |
| 71 | + # Test that mathematical symbols are also preserved |
| 72 | + assert 'mathematical_formula' in minified_globals |
| 73 | + assert u"∑ from i=1 to ∞" in minified_globals['mathematical_formula']() |
| 74 | + |
| 75 | + finally: |
| 76 | + # Clean up |
| 77 | + os.unlink(temp_file) |
| 78 | + |
| 79 | + |
| 80 | +def test_minify_utf8_file_direct(): |
| 81 | + """Test minifying a file directly with UTF-8 characters.""" |
| 82 | + |
| 83 | + # Create Python source with UTF-8 characters |
| 84 | + source_code = u'''# UTF-8 test file |
| 85 | +def emoji_function(): |
| 86 | + """Function with emoji and special characters: 🐍 ∆ ∑ ∫ ∞""" |
| 87 | + return u"Python is 🐍 awesome! Math symbols: ∆x ≈ 0, ∑∞ = ∞" |
| 88 | +
|
| 89 | +class UnicodeClass: |
| 90 | + """Class with unicode: ñ ü ö ä ë ï ÿ""" |
| 91 | + def __init__(self): |
| 92 | + self.message = u"Héllö Wörld with àccénts!" |
| 93 | + |
| 94 | + def get_symbols(self): |
| 95 | + return u"Symbols: ™ © ® ° ± × ÷ ≠ ≤ ≥" |
| 96 | +''' |
| 97 | + |
| 98 | + # Test direct minification |
| 99 | + minified = python_minifier.minify(source_code) |
| 100 | + |
| 101 | + # Verify UTF-8 characters are preserved by executing the minified code |
| 102 | + minified_globals = {} |
| 103 | + exec(minified, minified_globals) |
| 104 | + |
| 105 | + # Test that the functions return the correct Unicode strings |
| 106 | + assert u"🐍" in minified_globals['emoji_function']() |
| 107 | + assert u"∆" in minified_globals['emoji_function']() |
| 108 | + |
| 109 | + # Test the class |
| 110 | + unicode_obj = minified_globals['UnicodeClass']() |
| 111 | + assert u"Héllö" in unicode_obj.message |
| 112 | + assert u"àccénts" in unicode_obj.message |
| 113 | + assert u"™" in unicode_obj.get_symbols() |
| 114 | + assert u"©" in unicode_obj.get_symbols() |
0 commit comments