Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make minor performance improvements #470

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions lasio/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,6 @@

__version__ = version()

try:
import openpyxl
except ImportError:
pass
else:
from .excel import ExcelConverter


def read(file_ref, **kwargs):
"""Read a LAS file.
Expand Down
29 changes: 8 additions & 21 deletions lasio/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,7 @@ def get_default_items():
READ_SUBS = {
"comma-decimal-mark": [(re.compile(r"(\d),(\d)"), r"\1.\2")],
"run-on(-)": [(re.compile(r"(\d)-(\d)"), r"\1 -\2")],
"run-on(.)": [(re.compile(r"-?\d*\.\d*\.\d*"), " NaN NaN ")],
"run-on(NaN.)": [(re.compile(r"NaN[\.-]\d+"), " NaN NaN ")],
"run-on(.)": [(re.compile(r"-?\d*\.\d*\.\d*|NaN[\.-]\d+"), " NaN NaN ")],
}

NULL_POLICIES = {
Expand Down Expand Up @@ -158,35 +157,23 @@ def get_default_items():
"2147483647": [-2147483647, 2147483647],
"32767": [-32767, 32767],
"(null)": [
(re.compile(r" \(null\)"), " NaN"),
(re.compile(r"\(null\) "), "NaN "),
(re.compile(r" \(NULL\)"), " NaN"),
(re.compile(r"\(NULL\) "), "NaN "),
(re.compile(r" null"), " NaN"),
(re.compile(r"null "), "NaN "),
(re.compile(r" NULL"), " NaN"),
(re.compile(r"NULL "), "NaN "),
(re.compile(r" \(null\)|\(null\) | \(NULL\)|\(NULL\) | null|null | NULL|NULL "), " NaN "),
],
"-": [(re.compile(r" -+ "), " NaN ")],
"NA": [(re.compile(r"(#N/A)[ ]"), "NaN "), (re.compile(r"[ ](#N/A)"), " NaN")],
"NA": [(re.compile(r"(#N/A)[ ]|[ ](#N/A)"), " NaN ")],
"INF": [
(re.compile(r"(-?1\.#INF)[ ]"), "NaN "),
(re.compile(r"[ ](-?1\.#INF[0-9]*)"), " NaN"),
(re.compile(r"(-?1\.#INF)[ ]|[ ](-?1\.#INF[0-9]*)"), " NaN "),
],
"IO": [
(re.compile(r"(-?1\.#IO)[ ]"), "NaN "),
(re.compile(r"[ ](-?1\.#IO)"), " NaN"),
(re.compile(r"(-?1\.#IO)[ ]|[ ](-?1\.#IO)"), " NaN "),
],
"IND": [
(re.compile(r"(-?1\.#IND)[ ]"), "NaN "),
(re.compile(r"[ ](-?1\.#IND[0-9]*)"), " NaN"),
(re.compile(r"(-?1\.#IND)[ ]|[ ](-?1\.#IND[0-9]*)"), " NaN "),
],
"-0.0": [
(re.compile(r"(-0\.0)[ ]"), "NaN "),
(re.compile(r"[ ](-0\.00*[^1-9])"), " NaN"),
(re.compile(r"(-0\.0)[ ]|[ ](-0\.00*[^1-9])"), " NaN "),
],
"numbers-only": [
(re.compile(r"([^ 0-9.\-+]+)[ ]"), "NaN "),
(re.compile(r"[ ]([^ 0-9.\-+]+)"), " NaN"),
(re.compile(r"([^ 0-9.\-+]+)[ ]|[ ]([^ 0-9.\-+]+)"), " NaN "),
],
}
8 changes: 8 additions & 0 deletions lasio/las.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,14 @@ def to_excel(self, filename):
filename (str)

"""

try:
import openpyxl
except ImportError:
pass
else:
from .excel import ExcelConverter

from . import excel

converter = excel.ExcelConverter(self)
Expand Down
23 changes: 8 additions & 15 deletions lasio/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@
re.IGNORECASE,
)

sow_regex = re.compile(r"""([^\s"']+)|"([^"]*)"|'([^']*)'""")



def check_for_path_obj(file_ref):
"""Check if file_ref is a pathlib.Path object.
Expand Down Expand Up @@ -317,11 +320,6 @@ def determine_section_type(section_title):
return "Header items"


def split_on_whitespace(s):
# return s.split() # does not handle quoted substrings (#271)
# return shlex.split(s) # too slow
return ["".join(t) for t in re.findall(r"""([^\s"']+)|"([^"]*)"|'([^']*)'""", s)]


def inspect_data_section(file_obj, line_nos, regexp_subs, ignore_comments="#"):
"""Determine how many columns there are in the data section.
Expand Down Expand Up @@ -351,7 +349,8 @@ def inspect_data_section(file_obj, line_nos, regexp_subs, ignore_comments="#"):
else:
for pattern, sub_str in regexp_subs:
line = re.sub(pattern, sub_str, line)
n_items = len(split_on_whitespace(line))
# split line and count number of elements
n_items = len(["".join(t) for t in sow_regex.findall(line)])
logger.debug(
"Line {}: {} items counted in '{}'".format(line_no + 1, n_items, line)
)
Expand Down Expand Up @@ -401,21 +400,15 @@ def read_data_section_iterative(
title = file_obj.readline()

def items(f, start_line_no, end_line_no):
line_no = start_line_no
for line in f:
line_no += 1
logger.debug(
"Line {}: reading data '{}'".format(
line_no + 1, line.strip("\n").strip()
)
)
for line_no, line in enumerate(f, start=start_line_no+1):
if line.strip().startswith(ignore_comments):
continue
else:
for pattern, sub_str in regexp_subs:
line = re.sub(pattern, sub_str, line)
line = line.replace(chr(26), "")
for item in split_on_whitespace(line):
# for item in split_on_whitespace(line, sow_regex):
for item in ["".join(t) for t in sow_regex.findall(line)]:
try:
yield np.float64(item)
except ValueError:
Expand Down