diff --git a/googler b/googler index 64fb7dd..d784021 100755 --- a/googler +++ b/googler @@ -55,6 +55,20 @@ try: except (ImportError, Exception): pass +from typing import ( + Any, + Dict, + Generator, + Iterable, + Iterator, + List, + Match, + Optional, + Tuple, + Union, + cast, +) + # Basic setup logging.basicConfig(format='[%(levelname)s] %(message)s') @@ -152,6 +166,98 @@ def monkeypatch_textwrap_for_cjk(): monkeypatch_textwrap_for_cjk() +CoordinateType = Tuple[int, int] + + +class TrackedTextwrap: + """ + Implements a text wrapper that tracks the position of each source + character, and can correctly insert zero-width sequences at given + offsets of the source text. + + Wrapping result should be the same as that from PSL textwrap.wrap + with default settings except expand_tabs=False. + """ + + def __init__(self, text: str, width: int): + self._original = text + + # Do the job of replace_whitespace first so that we can easily + # match text to wrapped lines later. Note that this operation + # does not change text length or offsets. + whitespace = "\t\n\v\f\r " + whitespace_trans = str.maketrans(whitespace, " " * len(whitespace)) + text = text.translate(whitespace_trans) + + self._lines = textwrap.wrap( + text, width, expand_tabs=False, replace_whitespace=False + ) + + # self._coords track the (row, column) coordinate of each source + # character in the result text. It is indexed by offset in + # source text. + self._coords = [] # type: List[CoordinateType] + offset = 0 + try: + if not self._lines: + # Source text only has whitespaces. We add an empty line + # in order to produce meaningful coordinates. + self._lines = [""] + for row, line in enumerate(self._lines): + assert text[offset : offset + len(line)] == line + col = 0 + for _ in line: + self._coords.append((row, col)) + offset += 1 + col += 1 + # All subsequent dropped whitespaces map to the last, imaginary column + # (the EOL character if you wish) of the current line. + while offset < len(text) and text[offset] == " ": + self._coords.append((row, col)) + offset += 1 + # One past the final character (think of it as EOF) should + # be treated as a valid offset. + self._coords.append((row, col)) + except AssertionError: + raise RuntimeError( + "TrackedTextwrap: the impossible happened at offset {} of text {!r}".format( + offset, self._original + ) + ) + + # seq should be a zero-width sequence, e.g., an ANSI escape sequence. + # May raise IndexError if offset is out of bounds. + def insert_zero_width_sequence(self, seq: str, offset: int) -> None: + row, col = self._coords[offset] + line = self._lines[row] + self._lines[row] = line[:col] + seq + line[col:] + + # Shift coordinates of all characters after the given character + # on the same line. + shift = len(seq) + offset += 1 + while offset < len(self._coords) and self._coords[offset][0] == row: + _, col = self._coords[offset] + self._coords[offset] = (row, col + shift) + offset += 1 + + @property + def original(self) -> str: + return self._original + + @property + def lines(self) -> List[str]: + return self._lines + + @property + def wrapped(self) -> str: + return "\n".join(self._lines) + + # May raise IndexError if offset is out of bounds. + def get_coordinate(self, offset: int) -> CoordinateType: + return self._coords[offset] + + ### begin dim (DOM implementation with CSS support) ### ### https://github.com/zmwangx/dim/blob/master/dim.py ### @@ -162,20 +268,6 @@ from collections import OrderedDict from enum import Enum from html.parser import HTMLParser -from typing import ( - Any, - Dict, - Generator, - Iterable, - Iterator, - List, - Match, - Optional, - Tuple, - Union, - cast, -) - SelectorGroupLike = Union[str, "SelectorGroup", "Selector"] @@ -2284,27 +2376,23 @@ class Result(object): else: print(' ' * (indent + 5) + metadata) + fillwidth = (columns - (indent + 6)) if columns > indent + 6 else len(abstract) + wrapped_abstract = TrackedTextwrap(abstract, fillwidth) if colors and not self.nohl: - # Start from the last match, as inserting the bold characters changes the offsets. - for match in reversed(matches or []): - abstract = ( - abstract[: match['offset']] - + '\033[1m' - + match['phrase'] - + '\033[0m' - + abstract[match['offset'] + len(match['phrase']) :] - ) + # Highlight matches. + for match in matches or []: + offset = match['offset'] + span = len(match['phrase']) + wrapped_abstract.insert_zero_width_sequence('\x1b[1m', offset) + wrapped_abstract.insert_zero_width_sequence('\x1b[0m', offset + span) + + if colors: print(colors.abstract, end='') - if columns > indent + 6: - # Try to fill to columns - fillwidth = columns - (indent + 6) - for line in textwrap.wrap(abstract.replace('\n', ''), width=fillwidth): - print('%s%s' % (' ' * (indent + 5), line)) - print('') - else: - print('%s%s\n' % (' ' * (indent + 5), abstract.replace('\n', ' '))) + for line in wrapped_abstract.lines: + print('%s%s' % (' ' * (indent + 5), line)) if colors: print(colors.reset, end='') + print('') def print(self): """Print the result entry."""