pyexcel · chfw · Nov 1, 2025 · Apr 18, 2025 · Jun 26, 2025 · Oct 30, 2025
diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml
@@ -11,7 +11,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v1
         with:
-            python-version: 3.8
+            python-version: 3.11
       - name: lint
         run: |
           pip --use-deprecated=legacy-resolver install flake8

diff --git a/.github/workflows/moban-update.yml b/.github/workflows/moban-update.yml
@@ -12,11 +12,11 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v1
         with:
-          python-version: '3.7'
+          python-version: '3.11'
       - name: check changes
         run: |
           pip install markupsafe==2.0.1
-          pip install moban gitfs2 pypifs moban-jinja2-github moban-ansible
+          pip install ruamel.yaml moban gitfs2 pypifs moban-jinja2-github moban-ansible
           moban
           git status
           git diff --exit-code

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -11,7 +11,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [Ubuntu]
-        python_version: ["3.9.16"]
+        python_version: ["3.9.24"]
 
     steps:
       - uses: actions/checkout@v2
@@ -25,6 +25,7 @@ jobs:
         run: |
           pip --use-deprecated=legacy-resolver install -r requirements.txt
           pip --use-deprecated=legacy-resolver install -r tests/requirements.txt
+          pip --use-deprecated=legacy-resolver install -r rnd_requirements.txt          
       - name: test
         run: |
           pip freeze

diff --git a/.moban.d/custom_readme.rst.jj2 b/.moban.d/custom_readme.rst.jj2
@@ -44,7 +44,7 @@ Otherwise, this library works OK with lxml 3.4.4 or above.
     >>> data = OrderedDict()
     >>> data.update({"Sheet 1": [[1, 2, 3], [4, 5, 6]]})
     >>> data.update({"Sheet 2": [[7, 8, 9], [10, 11, 12]]})
-    >>> io = StringIO()
+    >>> io = BytesIO()
     >>> save_data(io, data)
     >>> unused = io.seek(0)
     >>> # do something with the io

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -1,13 +1,25 @@
 Change log
 ================================================================================
 
+0.6.2 - 31.10.2025
+--------------------------------------------------------------------------------
+
+**Fixed**
+
+#. Fix freeze when parsing certain corrupt XLSX files
+#. Fix reading of files with more than 26 columns
+
+**Updated**
+
+#. Migrated to pytest
+
 0.6.1 - 11.11.2024
 --------------------------------------------------------------------------------
 
 **Updated**
 
-#. #9: Potential fix for incorrect reading of data with empty cells when used
-   with pyexcel 
+#. `#9 <https://github.com/pyexcel/pyexcel-xlsxr/issues/9>`_: Potential fix for
+   incorrect reading of data with empty cells when used with pyexcel 
 
 0.6.0 - 10.10.2020
 --------------------------------------------------------------------------------

diff --git a/CONTRIBUTORS.rst b/CONTRIBUTORS.rst
@@ -1,8 +1,9 @@
 
 
-1 contributors
+2 contributors
 ================================================================================
 
 In alphabetical order:
 
 * `Mark Skelton <https://github.com/mtskelton>`_
+* `Pierre-Louis Peeters <https://github.com/PLPeeters>`_
diff --git a/README.rst b/README.rst
@@ -16,6 +16,8 @@ pyexcel-xlsxr - Let you focus on data, instead of xlsx format
 
 
 
+.. image:: https://pepy.tech/badge/pyexcel-xlsxr/month
+   :target: https://pepy.tech/project/pyexcel-xlsxr
 
 
 .. image:: https://img.shields.io/gitter/room/gitterHQ/gitter.svg
@@ -47,19 +49,11 @@ Otherwise, this library works OK with lxml 3.4.4 or above.
 Support the project
 ================================================================================
 
-If your company has embedded pyexcel and its components into a revenue generating
-product, please support me on github, `patreon <https://www.patreon.com/bePatron?u=5537627>`_
-or `bounty source <https://salt.bountysource.com/teams/chfw-pyexcel>`_ to maintain
-the project and develop it further.
-
-If you are an individual, you are welcome to support me too and for however long
-you feel like. As my backer, you will receive
-`early access to pyexcel related contents <https://www.patreon.com/pyexcel/posts>`_.
-
-And your issues will get prioritized if you would like to become my patreon as `pyexcel pro user`.
-
-With your financial support, I will be able to invest
-a little bit more time in coding, documentation and writing interesting posts.
+If your company uses pyexcel and its components in a revenue-generating product,
+please consider supporting the project on GitHub or
+`Patreon <https://www.patreon.com/bePatron?u=5537627>`_. Your financial
+support will enable me to dedicate more time to coding, improving documentation,
+and creating engaging content.
 
 
 Known constraints
@@ -99,15 +93,8 @@ As a standalone library
 
     >>> import os
     >>> import sys
-    >>> if sys.version_info[0] < 3:
-    ...     from StringIO import StringIO
-    ... else:
-    ...     from io import BytesIO as StringIO
-    >>> PY2 = sys.version_info[0] == 2
-    >>> if PY2 and sys.version_info[1] < 7:
-    ...      from ordereddict import OrderedDict
-    ... else:
-    ...     from collections import OrderedDict
+    >>> from io import BytesIO
+    >>> from collections import OrderedDict
 
 
 .. testcode::
@@ -141,7 +128,7 @@ Here's the sample code:
     >>> data = OrderedDict()
     >>> data.update({"Sheet 1": [[1, 2, 3], [4, 5, 6]]})
     >>> data.update({"Sheet 2": [[7, 8, 9], [10, 11, 12]]})
-    >>> io = StringIO()
+    >>> io = BytesIO()
     >>> save_data(io, data)
     >>> unused = io.seek(0)
     >>> # do something with the io
@@ -317,15 +304,18 @@ and update changelog.yml
 .. note::
 
     As to rnd_requirements.txt, usually, it is created when a dependent
-    library is not released. Once the dependecy is installed
+    library is not released. Once the dependency is installed
     (will be released), the future
     version of the dependency in the requirements.txt will be valid.
 
 
 How to test your contribution
-------------------------------
+--------------------------------------------------------------------------------
 
-Although `nose` and `doctest` are both used in code testing, it is adviable that unit tests are put in tests. `doctest` is incorporated only to make sure the code examples in documentation remain valid across different development releases.
+Although `nose` and `doctest` are both used in code testing, it is advisable
+that unit tests are put in tests. `doctest` is incorporated only to make sure
+the code examples in documentation remain valid across different development
+releases.
 
 On Linux/Unix systems, please launch your tests like this::
 

diff --git a/changelog.yml b/changelog.yml
@@ -1,10 +1,20 @@
 name: pyexcel-xlsxr
 organisation: pyexcel
 releases:
+- changes:
+  - action: Fixed
+    details:
+    - 'Fix freeze when parsing certain corrupt XLSX files'
+    - 'Fix reading of files with more than 26 columns'
+  - action: Updated
+    details:
+    - 'Migrated to pytest'
+  date: 31.10.2025
+  version: 0.6.2
 - changes:
   - action: Updated
     details:
-    - '#9: Potential fix for incorrect reading of data with empty cells when used with pyexcel '
+    - '`#9`: Potential fix for incorrect reading of data with empty cells when used with pyexcel '
   date: 11.11.2024
   version: 0.6.1
 - changes:

diff --git a/lint.sh b/lint.sh
@@ -1,2 +1,2 @@
 pip install flake8
-flake8 --exclude=.moban.d,docs,setup.py   --builtins=unicode,xrange,long .  && python setup.py checkdocs
+flake8 --exclude=.venv,.moban.d,docs,setup.py   --builtins=unicode,xrange,long .  && python setup.py checkdocs
diff --git a/pyexcel_xlsxr/__init__.py b/pyexcel_xlsxr/__init__.py
@@ -1,10 +1,11 @@
 """
-    pyexcel_xlsxr
-    ~~~~~~~~~~~~~~~~~~~
-    The lower level xlsx file format handler using lxml
-    :copyright: (c) 2015-2020 by Onni Software Ltd & its contributors
-    :license: New BSD License
+pyexcel_xlsxr
+~~~~~~~~~~~~~~~~~~~
+The lower level xlsx file format handler using lxml
+:copyright: (c) 2015-2020 by Onni Software Ltd & its contributors
+:license: New BSD License
 """
+
 from pyexcel_io.io import get_data as read_data
 from pyexcel_io.io import isstream
 from pyexcel_io.plugins import IOPluginInfoChainV2

diff --git a/pyexcel_xlsxr/messy_xlsx.py b/pyexcel_xlsxr/messy_xlsx.py
@@ -2,24 +2,20 @@
 import re
 import zipfile
 from datetime import time, datetime, timedelta
+from functools import cache
 
 from lxml import etree
 from pyexcel_io._compact import OrderedDict
 
 STYLE_FILENAME = "xl/styles.xml"
 SHARED_STRING = "xl/sharedStrings.xml"
 WORK_BOOK = "xl/workbook.xml"
-SHEET_MATCHER = "xl/worksheets/(work)?sheet([0-9]+)?.xml"
-SHEET_INDEX_MATCHER = "xl/worksheets/(work)?sheet(([0-9]+)?).xml"
-XLSX_ROW_MATCH = re.compile(rb".*?(<row.*?<\/.*?row>).*?", re.MULTILINE)
-NUMBER_FMT_MATCHER = re.compile(
-    rb".*?(<numFmts.*?<\/.*?numFmts>).*?", re.MULTILINE
-)
-XFS_FMT_MATCHER = re.compile(
-    rb".*?(<cellXfs.*?<\/.*?cellXfs>).*?", re.MULTILINE
-)
-SHEET_FMT_MATCHER = re.compile(rb".*?(<sheet .*?\/>).*?", re.MULTILINE)
-DATE_1904_MATCHER = re.compile(rb".*?(<workbookPr.*?\/>).*?", re.MULTILINE)
+SHEET_MATCHER = re.compile(r"xl/worksheets/(?:work)?sheet([0-9]+)?.xml")
+XLSX_ROW_MATCH = re.compile(rb"<row\b[^>]*>.*?</row>", re.DOTALL)
+NUMBER_FMT_MATCHER = re.compile(rb"<numFmts\b[^>]*>.*?</numFmts>", re.DOTALL)
+XFS_FMT_MATCHER = re.compile(rb"<cellXfs\b[^>]*>.*?</cellXfs>", re.DOTALL)
+SHEET_FMT_MATCHER = re.compile(rb"<sheet\b.*?/>", re.DOTALL)
+DATE_1904_MATCHER = re.compile(rb"<workbookPr\b.*?/>", re.DOTALL)
 # "xmlns:x14ac="http://schemas.microsoft.com/office/spreadsheetml/2009/9/ac"
 # But it not used for now
 X14AC_NAMESPACE = b'xmlns:x14ac="http://not.used.com/"'
@@ -158,14 +154,15 @@ def find_sheets(file_list):
     return [
         sheet_file
         for sheet_file in file_list
-        if re.match(SHEET_MATCHER, sheet_file)
+        if SHEET_MATCHER.match(sheet_file)
     ]
 
 
 def get_sheet_index(file_name):
-    if re.match(SHEET_MATCHER, file_name):
-        result = re.search(SHEET_INDEX_MATCHER, file_name)
-        index = int(result.group(3)) if result.group(3) else 1
+    sheet_match = SHEET_MATCHER.match(file_name)
+
+    if sheet_match:
+        index = int(sheet_match.group(1)) if sheet_match.group(1) else 1
         return index - 1
     else:
         raise Exception("Invalid sheet file name")
@@ -182,12 +179,16 @@ def __repr__(self):
         return str(self.value)
 
 
+@cache
 def column_to_number(column):
-    column = re.sub("[^A-Z]", "", column)
-    cl = len(column) - 1
-    return sum(
-        [(ord(c.upper()) - 64) + (26 * (cl - i)) for i, c in enumerate(column)]
-    )
+    column = re.sub(r"[^A-Z]", "", column.upper())
+
+    result = 0
+
+    for index, c in enumerate(column):
+        result = result * 26 + (ord(c) - ord("A") + 1)
+
+    return result
 
 
 def parse_row(row_xml_string, book):
@@ -338,10 +339,10 @@ def parse_book_properties(book_content):
     )
     namespaces = {"r": ns}
 
-    xlsx_header = u"<wrapper {0}>".format(
+    xlsx_header = "<wrapper {0}>".format(
         " ".join('xmlns:{0}="{1}"'.format(k, v) for k, v in namespaces.items())
     ).encode("utf-8")
-    xlsx_footer = u"</wrapper>".encode("utf-8")
+    xlsx_footer = "</wrapper>".encode("utf-8")
     sheets = SHEET_FMT_MATCHER.findall(book_content)
     for sheet in sheets:
         block = xlsx_header + sheet + xlsx_footer

diff --git a/test.bat b/test.bat
@@ -1,2 +1,2 @@
 pip freeze
-nosetests --with-coverage --cover-package pyexcel_xlsxr --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst  pyexcel_xlsxr
+coverage run -m --source=pyexcel_xlsxr pytest && coverage report --show-missing
diff --git a/test.sh b/test.sh
@@ -1,3 +1,3 @@
 #/bin/bash
 pip freeze
-nosetests --with-coverage --cover-package pyexcel_xlsxr --cover-package tests tests --with-doctest --doctest-extension=.rst README.rst  pyexcel_xlsxr
+coverage run -m --source=pyexcel_xlsxr pytest && coverage report --show-missing
diff --git a/tests/base.py b/tests/base.py
@@ -1,10 +1,5 @@
-import os  # noqa
-import datetime  # noqa
-
 import pyexcel
 
-from nose.tools import eq_, raises  # noqa
-
 
 def create_sample_file1(file):
     data = ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", 1.1, 1]