Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[path-] fix undercounted progress for multibyte chars #2323

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 32 additions & 14 deletions visidata/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,15 @@ def peek(self, size=-1):


class FileProgress:
'Open file in binary mode and track read() progress.'
'Open file as text or binary, then track read() progress.'
def __init__(self, path, fp, mode='r', **kwargs):
self.path = path
self.fp = fp
self.prog = None
if 'r' in mode:
gerund = 'reading'
self.prog = Progress(gerund=gerund, total=filesize(path))
self.total = filesize(path)
self.prog = Progress(gerund=gerund, total=self.total)
elif 'w' in mode:
gerund = 'writing'
self.prog = Progress(gerund=gerund)
Expand All @@ -117,6 +118,10 @@ def __init__(self, path, fp, mode='r', **kwargs):
self.fp.read = self.read
self.fp.close = self.close

self.est_charbytes = 1
self.est_sample = ''
self.est_total = 0

if self.prog:
self.prog.__enter__()

Expand All @@ -126,17 +131,32 @@ def close(self, *args, **kwargs):
self.prog = None
return self.fp_orig_close(*args, **kwargs)

def update_progress(self, r):
if not self.prog: return
if isinstance(r, bytes):
r_bytes = len(r)
self.prog.addProgress(r_bytes)
else:
self.update_estimate(r)
r_bytes = len(r) * self.est_charbytes
self.est_total += r_bytes
if self.est_total <= self.total:
self.prog.addProgress(r_bytes)

def update_estimate(self, r):
if len(self.est_sample) < self.prog.made/10000:
self.est_sample += r[:100]
self.est_charbytes = max(1, len(self.est_sample.encode(self.encoding)) / len(self.est_sample))

def read(self, size=-1):
r = self.fp_orig_read(size)
if self.prog:
if r:
self.prog.addProgress(len(r))
if r:
self.update_progress(r)
return r

def readline(self, size=-1):
r = self.fp_orig_readline(size)
if self.prog:
self.prog.addProgress(len(r))
self.update_progress(r)
return r

def __getattr__(self, k):
Expand All @@ -148,15 +168,15 @@ def __enter__(self):

def __next__(self):
r = next(self.fp)
self.prog.addProgress(len(r))
self.update_progress(r)
return r

def __iter__(self):
if not self.prog:
yield from self.fp
else:
for line in self.fp:
self.prog.addProgress(len(line))
self.update_progress(line)
yield line

def __exit__(self, type, value, tb):
Expand Down Expand Up @@ -325,11 +345,9 @@ def _open(self, *args, **kwargs):
return zopen(FileProgress(path, fp=open(path, mode='rb'), **kwargs), **kwargs)

def __iter__(self):
with Progress(total=filesize(self)) as prog:
with self.open(encoding=vd.options.encoding) as fd:
for i, line in enumerate(fd):
prog.addProgress(len(line))
yield line.rstrip('\n')
with self.open(encoding=vd.options.encoding) as fd:
for line in fd:
yield line.rstrip('\n')

def open_bytes(self, mode='rb'):
'Open the file pointed by this path and return a file object in binary mode.'
Expand Down
Loading