Skip to content

Commit

Permalink
supporting dirlist with different encodings
Browse files Browse the repository at this point in the history
  • Loading branch information
SharonBrizinov committed Jan 11, 2021
1 parent 8acaf0f commit 56870f7
Showing 1 changed file with 20 additions and 7 deletions.
27 changes: 20 additions & 7 deletions src/FSNode.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,28 @@

DIRLIST_REGEX = re.compile("(\d+\-\d+\-\d+ \d+\:\d+\:\d+)\s+(\d+)\s+(.*)")

def decode_data(data):
# Try brute forcing all popular encodings
for encoding in ["utf-8", "utf-16-le", "utf-16-be", "latin-1", "ascii"]:
try:
return data.decode(encoding)
except Exception as e:
pass
return None

def parse_dirlist(dirlist_path):
# Dirlist:
# 2016-11-14 16:14:09 0 DirName/
# 2016-11-14 16:14:10 10 DirName/File.txt
stats = NSNodeStats()
root_node = FSNode("", None, 0)
# Read dirlist
with open(dirlist_path, "r") as f:
dirlist_data = f.read()
with open(dirlist_path, "rb") as f:
dirlist_data_raw = f.read()
dirlist_data = decode_data(dirlist_data_raw)
if not dirlist_data:
raise Exception("Could not decode dirlist. Are you sure your data is valid?")

# Parse
lines = dirlist_data.splitlines()
for i, line in enumerate(lines):
Expand Down Expand Up @@ -144,11 +157,11 @@ def process_sub_node(self, new_node):
current_node.children[path_element] = new_node

def get_how_many_childern_are_files(self):
count_files = 0
for node in self.children.values():
if node.is_file:
count_files += 1
return count_files
count_files = 0
for node in self.children.values():
if node.is_file:
count_files += 1
return count_files

def add_child(self, new_child):
self.children[new_child.basename] = new_child
Expand Down

0 comments on commit 56870f7

Please sign in to comment.