Skip to content

Commit 341f6cb

Browse files
author
luke@maurits.id.au
committed
Lots of work to support appropriate alignment/padding/etc. of text using non-Latin characters (e.g. Kanji, Cyrillic, etc.). Note that this has currently been achieved by using a modified version of code from the "kitchen" library. Kitchen is LGPL, and since I had to modify their code (to get Python 3 support working), if I were to distribute the current trunk code it would need to be LGPLed instead of BSD licensed. If I don't want to switch to LGPL for PrettyTable I need to either get permission from everyone who has copyright on the modified code (could be hard) or do a clean room reimplementation. Fun times ahead!
git-svn-id: http://prettytable.googlecode.com/svn/trunk@78 0f58610c-415a-11de-9c03-5d6cfad8e937
1 parent be785df commit 341f6cb

File tree

1 file changed

+201
-10
lines changed

1 file changed

+201
-10
lines changed

prettytable.py

Lines changed: 201 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
#!/usr/bin/env python
23
#
34
# Copyright (c) 2009, Luke Maurits <luke@maurits.id.au>
@@ -36,11 +37,19 @@
3637
import random
3738
import sys
3839
import textwrap
40+
import itertools
41+
import unicodedata
3942

4043
py3k = sys.version_info[0] >= 3
4144
if py3k:
4245
unicode = str
4346
basestring = str
47+
itermap = map
48+
iterzip = zip
49+
else:
50+
itermap = itertools.imap
51+
iterzip = itertools.izip
52+
4453
if py3k and sys.version_info[1] >= 2:
4554
from html import escape
4655
else:
@@ -60,7 +69,7 @@
6069
def _get_size(text):
6170
lines = text.split("\n")
6271
height = len(lines)
63-
width = max([len(line) for line in lines])
72+
width = max([textual_width(line) for line in lines])
6473
return (width, height)
6574

6675
class PrettyTable(object):
@@ -153,6 +162,27 @@ def _unicode(self, value):
153162
value = unicode(value, self.encoding, "strict")
154163
return value
155164

165+
def _justify(self, text, width, align):
166+
excess = width - textual_width(text)
167+
if align == "l":
168+
return text + excess * " "
169+
elif align == "r":
170+
return excess * " " + text
171+
else:
172+
if excess % 2:
173+
# Uneven padding
174+
# Put more space on right if text is of odd length...
175+
if textual_width(text) % 2:
176+
return (excess//2)*" " + text + (excess//2 + 1)*" "
177+
# and more space on left if text is of even length
178+
else:
179+
return (excess//2 + 1)*" " + text + (excess//2)*" "
180+
# Why distribute extra space this way? To match the behaviour of
181+
# the inbuilt str.center() method.
182+
else:
183+
# Equal padding on either side
184+
return (excess//2)*" " + text + (excess//2)*" "
185+
156186
def __getattr__(self, name):
157187

158188
if name == "rowcount":
@@ -314,7 +344,7 @@ def _validate_all_field_names(self, name, val):
314344

315345
def _validate_single_char(self, name, val):
316346
try:
317-
assert len(val) == 1
347+
assert textual_width(val) == 1
318348
except AssertionError:
319349
raise Exception("Invalid value for %s! Must be a string of length 1." % name)
320350

@@ -922,13 +952,13 @@ def _stringify_header(self, options):
922952
return "".join(bits)
923953

924954
def _stringify_row(self, row, options):
925-
955+
926956
for index, field, value, width, in zip(range(0,len(row)), self._field_names, row, self._widths):
927957
# Enforce max widths
928958
lines = value.split("\n")
929959
new_lines = []
930960
for line in lines:
931-
if len(line) > width:
961+
if textual_width(line) > width:
932962
line = textwrap.fill(line, width)
933963
new_lines.append(line)
934964
lines = new_lines
@@ -959,12 +989,7 @@ def _stringify_row(self, row, options):
959989
if options["fields"] and field not in options["fields"]:
960990
continue
961991

962-
if self._align[field] == "l":
963-
bits[y].append(" " * lpad + l.ljust(width) + " " * rpad)
964-
elif self._align[field] == "r":
965-
bits[y].append(" " * lpad + l.rjust(width) + " " * rpad)
966-
else:
967-
bits[y].append(" " * lpad + l.center(width) + " " * rpad)
992+
bits[y].append(" " * lpad + self._justify(l, width, self._align[field]) + " " * rpad)
968993
if options["border"]:
969994
bits[y].append(self.vertical_char)
970995

@@ -1147,3 +1172,169 @@ def main():
11471172

11481173
if __name__ == "__main__":
11491174
main()
1175+
1176+
#
1177+
# Copyright (c) 2010 Red Hat, Inc.
1178+
# Copyright (c) 2010 Ville Skyttä
1179+
# Copyright (c) 2009 Tim Lauridsen
1180+
# Copyright (c) 2007 Marcus Kuhn
1181+
#
1182+
# kitchen is free software; you can redistribute it and/or modify it under the
1183+
# terms of the GNU Lesser General Public License as published by the Free
1184+
# Software Foundation; either version 2.1 of the License, or (at your option)
1185+
# any later version.
1186+
#
1187+
# kitchen is distributed in the hope that it will be useful, but WITHOUT ANY
1188+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
1189+
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
1190+
# more details.
1191+
#
1192+
# You should have received a copy of the GNU Lesser General Public License
1193+
# along with kitchen; if not, see <http://www.gnu.org/licenses/>
1194+
#
1195+
# Authors:
1196+
# James Antill <james@fedoraproject.org>
1197+
# Marcus Kuhn
1198+
# Toshio Kuratomi <toshio@fedoraproject.org>
1199+
# Tim Lauridsen
1200+
# Ville Skyttä
1201+
#
1202+
# Portions of this are from yum/i18n.py
1203+
1204+
1205+
def _interval_bisearch(value, table):
1206+
minimum = 0
1207+
maximum = len(table) - 1
1208+
if value < table[minimum][0] or value > table[maximum][1]:
1209+
return False
1210+
1211+
while maximum >= minimum:
1212+
mid = (minimum + maximum) / 2
1213+
if value > table[mid][1]:
1214+
minimum = mid + 1
1215+
elif value < table[mid][0]:
1216+
maximum = mid - 1
1217+
else:
1218+
return True
1219+
1220+
return False
1221+
1222+
_COMBINING = (
1223+
(0x300, 0x36f), (0x483, 0x489), (0x591, 0x5bd),
1224+
(0x5bf, 0x5bf), (0x5c1, 0x5c2), (0x5c4, 0x5c5),
1225+
(0x5c7, 0x5c7), (0x600, 0x603), (0x610, 0x61a),
1226+
(0x64b, 0x65e), (0x670, 0x670), (0x6d6, 0x6e4),
1227+
(0x6e7, 0x6e8), (0x6ea, 0x6ed), (0x70f, 0x70f),
1228+
(0x711, 0x711), (0x730, 0x74a), (0x7a6, 0x7b0),
1229+
(0x7eb, 0x7f3), (0x816, 0x819), (0x81b, 0x823),
1230+
(0x825, 0x827), (0x829, 0x82d), (0x901, 0x902),
1231+
(0x93c, 0x93c), (0x941, 0x948), (0x94d, 0x94d),
1232+
(0x951, 0x954), (0x962, 0x963), (0x981, 0x981),
1233+
(0x9bc, 0x9bc), (0x9c1, 0x9c4), (0x9cd, 0x9cd),
1234+
(0x9e2, 0x9e3), (0xa01, 0xa02), (0xa3c, 0xa3c),
1235+
(0xa41, 0xa42), (0xa47, 0xa48), (0xa4b, 0xa4d),
1236+
(0xa70, 0xa71), (0xa81, 0xa82), (0xabc, 0xabc),
1237+
(0xac1, 0xac5), (0xac7, 0xac8), (0xacd, 0xacd),
1238+
(0xae2, 0xae3), (0xb01, 0xb01), (0xb3c, 0xb3c),
1239+
(0xb3f, 0xb3f), (0xb41, 0xb43), (0xb4d, 0xb4d),
1240+
(0xb56, 0xb56), (0xb82, 0xb82), (0xbc0, 0xbc0),
1241+
(0xbcd, 0xbcd), (0xc3e, 0xc40), (0xc46, 0xc48),
1242+
(0xc4a, 0xc4d), (0xc55, 0xc56), (0xcbc, 0xcbc),
1243+
(0xcbf, 0xcbf), (0xcc6, 0xcc6), (0xccc, 0xccd),
1244+
(0xce2, 0xce3), (0xd41, 0xd43), (0xd4d, 0xd4d),
1245+
(0xdca, 0xdca), (0xdd2, 0xdd4), (0xdd6, 0xdd6),
1246+
(0xe31, 0xe31), (0xe34, 0xe3a), (0xe47, 0xe4e),
1247+
(0xeb1, 0xeb1), (0xeb4, 0xeb9), (0xebb, 0xebc),
1248+
(0xec8, 0xecd), (0xf18, 0xf19), (0xf35, 0xf35),
1249+
(0xf37, 0xf37), (0xf39, 0xf39), (0xf71, 0xf7e),
1250+
(0xf80, 0xf84), (0xf86, 0xf87), (0xf90, 0xf97),
1251+
(0xf99, 0xfbc), (0xfc6, 0xfc6), (0x102d, 0x1030),
1252+
(0x1032, 0x1032), (0x1036, 0x1037), (0x1039, 0x103a),
1253+
(0x1058, 0x1059), (0x108d, 0x108d), (0x1160, 0x11ff),
1254+
(0x135f, 0x135f), (0x1712, 0x1714), (0x1732, 0x1734),
1255+
(0x1752, 0x1753), (0x1772, 0x1773), (0x17b4, 0x17b5),
1256+
(0x17b7, 0x17bd), (0x17c6, 0x17c6), (0x17c9, 0x17d3),
1257+
(0x17dd, 0x17dd), (0x180b, 0x180d), (0x18a9, 0x18a9),
1258+
(0x1920, 0x1922), (0x1927, 0x1928), (0x1932, 0x1932),
1259+
(0x1939, 0x193b), (0x1a17, 0x1a18), (0x1a60, 0x1a60),
1260+
(0x1a75, 0x1a7c), (0x1a7f, 0x1a7f), (0x1b00, 0x1b03),
1261+
(0x1b34, 0x1b34), (0x1b36, 0x1b3a), (0x1b3c, 0x1b3c),
1262+
(0x1b42, 0x1b42), (0x1b44, 0x1b44), (0x1b6b, 0x1b73),
1263+
(0x1baa, 0x1baa), (0x1c37, 0x1c37), (0x1cd0, 0x1cd2),
1264+
(0x1cd4, 0x1ce0), (0x1ce2, 0x1ce8), (0x1ced, 0x1ced),
1265+
(0x1dc0, 0x1de6), (0x1dfd, 0x1dff), (0x200b, 0x200f),
1266+
(0x202a, 0x202e), (0x2060, 0x2063), (0x206a, 0x206f),
1267+
(0x20d0, 0x20f0), (0x2cef, 0x2cf1), (0x2de0, 0x2dff),
1268+
(0x302a, 0x302f), (0x3099, 0x309a), (0xa66f, 0xa66f),
1269+
(0xa67c, 0xa67d), (0xa6f0, 0xa6f1), (0xa806, 0xa806),
1270+
(0xa80b, 0xa80b), (0xa825, 0xa826), (0xa8c4, 0xa8c4),
1271+
(0xa8e0, 0xa8f1), (0xa92b, 0xa92d), (0xa953, 0xa953),
1272+
(0xa9b3, 0xa9b3), (0xa9c0, 0xa9c0), (0xaab0, 0xaab0),
1273+
(0xaab2, 0xaab4), (0xaab7, 0xaab8), (0xaabe, 0xaabf),
1274+
(0xaac1, 0xaac1), (0xabed, 0xabed), (0xfb1e, 0xfb1e),
1275+
(0xfe00, 0xfe0f), (0xfe20, 0xfe26), (0xfeff, 0xfeff),
1276+
(0xfff9, 0xfffb), (0x101fd, 0x101fd), (0x10a01, 0x10a03),
1277+
(0x10a05, 0x10a06), (0x10a0c, 0x10a0f), (0x10a38, 0x10a3a),
1278+
(0x10a3f, 0x10a3f), (0x110b9, 0x110ba), (0x1d165, 0x1d169),
1279+
(0x1d16d, 0x1d182), (0x1d185, 0x1d18b), (0x1d1aa, 0x1d1ad),
1280+
(0x1d242, 0x1d244), (0xe0001, 0xe0001), (0xe0020, 0xe007f),
1281+
(0xe0100, 0xe01ef), )
1282+
1283+
def _ucp_width(ucs, control_chars='guess'):
1284+
# test for 8-bit control characters
1285+
1286+
# Don't understand why but this is needed for Python 3
1287+
ucs = ucs[0]
1288+
1289+
if (ucs < 32) or ((ucs < 0xa0) and (ucs >= 0x7f)):
1290+
# Control character detected
1291+
if control_chars == 'strict':
1292+
raise ControlCharError(b_('_ucp_width does not understand how to'
1293+
' assign a width value to control characters.'))
1294+
if ucs in (0x08, 0x07F, 0x94):
1295+
# Backspace, delete, and clear delete remove a single character
1296+
return -1
1297+
if ucs == 0x1b:
1298+
# Excape is tricky. It removes some number of characters that
1299+
# come after it but the amount is dependent on what is
1300+
# interpreting the code.
1301+
# So this is going to often be wrong but other values will be
1302+
# wrong as well.
1303+
return -1
1304+
# All other control characters get 0 width
1305+
return 0
1306+
1307+
if _interval_bisearch(ucs, _COMBINING):
1308+
# Combining characters return 0 width as they will be combined with
1309+
# the width from other characters
1310+
return 0
1311+
1312+
# if we arrive here, ucs is not a combining or C0/C1 control character
1313+
1314+
return (1 +
1315+
(ucs >= 0x1100 and
1316+
(ucs <= 0x115f or # Hangul Jamo init. consonants
1317+
ucs == 0x2329 or ucs == 0x232a or
1318+
(ucs >= 0x2e80 and ucs <= 0xa4cf and
1319+
ucs != 0x303f) or # CJK ... Yi
1320+
(ucs >= 0xac00 and ucs <= 0xd7a3) or # Hangul Syllables
1321+
(ucs >= 0xf900 and ucs <= 0xfaff) or # CJK Compatibility Ideographs
1322+
(ucs >= 0xfe10 and ucs <= 0xfe19) or # Vertical forms
1323+
(ucs >= 0xfe30 and ucs <= 0xfe6f) or # CJK Compatibility Forms
1324+
(ucs >= 0xff00 and ucs <= 0xff60) or # Fullwidth Forms
1325+
(ucs >= 0xffe0 and ucs <= 0xffe6) or
1326+
(ucs >= 0x20000 and ucs <= 0x2fffd) or
1327+
(ucs >= 0x30000 and ucs <= 0x3fffd))))
1328+
1329+
def textual_width(msg, control_chars='guess', encoding='utf-8',
1330+
errors='replace'):
1331+
1332+
return sum(
1333+
# calculate width of each char
1334+
itermap(_ucp_width,
1335+
# Setup the arguments to _ucp_width
1336+
iterzip(
1337+
# int value of each char
1338+
itermap(ord, msg),
1339+
# control_chars arg in a form that izip will deal with
1340+
itertools.repeat(control_chars))))

0 commit comments

Comments
 (0)