kinverarity1 · kinverarity1 · Jun 17, 2021 · Jun 4, 2021
diff --git a/lasio/__init__.py b/lasio/__init__.py
@@ -7,13 +7,6 @@
 
 __version__ = version()
 
-try:
-    import openpyxl
-except ImportError:
-    pass
-else:
-    from .excel import ExcelConverter
-
 
 def read(file_ref, **kwargs):
     """Read a LAS file.

diff --git a/lasio/defaults.py b/lasio/defaults.py
@@ -101,8 +101,7 @@ def get_default_items():
 READ_SUBS = {
     "comma-decimal-mark": [(re.compile(r"(\d),(\d)"), r"\1.\2")],
     "run-on(-)": [(re.compile(r"(\d)-(\d)"), r"\1 -\2")],
-    "run-on(.)": [(re.compile(r"-?\d*\.\d*\.\d*"), " NaN NaN ")],
-    "run-on(NaN.)": [(re.compile(r"NaN[\.-]\d+"), " NaN NaN ")],
+    "run-on(.)": [(re.compile(r"-?\d*\.\d*\.\d*|NaN[\.-]\d+"), " NaN NaN ")],
 }
 
 NULL_POLICIES = {
@@ -158,35 +157,23 @@ def get_default_items():
     "2147483647": [-2147483647, 2147483647],
     "32767": [-32767, 32767],
     "(null)": [
-        (re.compile(r" \(null\)"), " NaN"),
-        (re.compile(r"\(null\) "), "NaN "),
-        (re.compile(r" \(NULL\)"), " NaN"),
-        (re.compile(r"\(NULL\) "), "NaN "),
-        (re.compile(r" null"), " NaN"),
-        (re.compile(r"null "), "NaN "),
-        (re.compile(r" NULL"), " NaN"),
-        (re.compile(r"NULL "), "NaN "),
+        (re.compile(r" \(null\)|\(null\) | \(NULL\)|\(NULL\) | null|null | NULL|NULL "), " NaN "),
     ],
     "-": [(re.compile(r" -+ "), " NaN ")],
-    "NA": [(re.compile(r"(#N/A)[ ]"), "NaN "), (re.compile(r"[ ](#N/A)"), " NaN")],
+    "NA": [(re.compile(r"(#N/A)[ ]|[ ](#N/A)"), " NaN ")],
     "INF": [
-        (re.compile(r"(-?1\.#INF)[ ]"), "NaN "),
-        (re.compile(r"[ ](-?1\.#INF[0-9]*)"), " NaN"),
+        (re.compile(r"(-?1\.#INF)[ ]|[ ](-?1\.#INF[0-9]*)"), " NaN "),
     ],
     "IO": [
-        (re.compile(r"(-?1\.#IO)[ ]"), "NaN "),
-        (re.compile(r"[ ](-?1\.#IO)"), " NaN"),
+        (re.compile(r"(-?1\.#IO)[ ]|[ ](-?1\.#IO)"), " NaN "),
     ],
     "IND": [
-        (re.compile(r"(-?1\.#IND)[ ]"), "NaN "),
-        (re.compile(r"[ ](-?1\.#IND[0-9]*)"), " NaN"),
+        (re.compile(r"(-?1\.#IND)[ ]|[ ](-?1\.#IND[0-9]*)"), " NaN "),
     ],
     "-0.0": [
-        (re.compile(r"(-0\.0)[ ]"), "NaN "),
-        (re.compile(r"[ ](-0\.00*[^1-9])"), " NaN"),
+        (re.compile(r"(-0\.0)[ ]|[ ](-0\.00*[^1-9])"), " NaN "),
     ],
     "numbers-only": [
-        (re.compile(r"([^ 0-9.\-+]+)[ ]"), "NaN "),
-        (re.compile(r"[ ]([^ 0-9.\-+]+)"), " NaN"),
+        (re.compile(r"([^ 0-9.\-+]+)[ ]|[ ]([^ 0-9.\-+]+)"), " NaN "),
     ],
 }
diff --git a/lasio/las.py b/lasio/las.py
@@ -461,6 +461,14 @@ def to_excel(self, filename):
             filename (str)
 
         """
+
+        try:
+            import openpyxl
+        except ImportError:
+            pass
+        else:
+            from .excel import ExcelConverter
+
         from . import excel
 
         converter = excel.ExcelConverter(self)

diff --git a/lasio/reader.py b/lasio/reader.py
@@ -46,6 +46,9 @@
     re.IGNORECASE,
 )
 
+sow_regex = re.compile(r"""([^\s"']+)|"([^"]*)"|'([^']*)'""")
+
+
 
 def check_for_path_obj(file_ref):
     """Check if file_ref is a pathlib.Path object.
@@ -317,11 +320,6 @@ def determine_section_type(section_title):
         return "Header items"
 
 
-def split_on_whitespace(s):
-    # return s.split() # does not handle quoted substrings (#271)
-    # return shlex.split(s) # too slow
-    return ["".join(t) for t in re.findall(r"""([^\s"']+)|"([^"]*)"|'([^']*)'""", s)]
-
 
 def inspect_data_section(file_obj, line_nos, regexp_subs, ignore_comments="#"):
     """Determine how many columns there are in the data section.
@@ -351,7 +349,8 @@ def inspect_data_section(file_obj, line_nos, regexp_subs, ignore_comments="#"):
         else:
             for pattern, sub_str in regexp_subs:
                 line = re.sub(pattern, sub_str, line)
-            n_items = len(split_on_whitespace(line))
+            # split line and count number of elements
+            n_items = len(["".join(t) for t in sow_regex.findall(line)])
             logger.debug(
                 "Line {}: {} items counted in '{}'".format(line_no + 1, n_items, line)
             )
@@ -401,21 +400,15 @@ def read_data_section_iterative(
     title = file_obj.readline()
 
     def items(f, start_line_no, end_line_no):
-        line_no = start_line_no
-        for line in f:
-            line_no += 1
-            logger.debug(
-                "Line {}: reading data '{}'".format(
-                    line_no + 1, line.strip("\n").strip()
-                )
-            )
+        for line_no, line in enumerate(f, start=start_line_no+1):
             if line.strip().startswith(ignore_comments):
                 continue
             else:
                 for pattern, sub_str in regexp_subs:
                     line = re.sub(pattern, sub_str, line)
                 line = line.replace(chr(26), "")
-                for item in split_on_whitespace(line):
+                # for item in split_on_whitespace(line, sow_regex):
+                for item in ["".join(t) for t in sow_regex.findall(line)]:
                     try:
                         yield np.float64(item)
                     except ValueError: