thisisparker · afontenot · Aug 6, 2024
diff --git a/xword_dl/downloader/amuniversaldownloader.py b/xword_dl/downloader/amuniversaldownloader.py
@@ -5,7 +5,6 @@
 import urllib
 import xml
 
-import puz
 import requests
 import xmltodict
 
@@ -59,26 +58,25 @@ def parse_xword(self, xword_data):
             fetched[field] = urllib.parse.unquote(
                 xword_data.get(field, '')).strip()
 
-        puzzle = puz.Puzzle()
-        puzzle.title = fetched.get('Title', '')
-        puzzle.author = ''.join([fetched.get('Author', ''),
+        self.puzzle.title = fetched.get('Title', '')
+        self.puzzle.author = ''.join([fetched.get('Author', ''),
                                  ' / Ed. ',
                                  fetched.get('Editor', '')])
-        puzzle.copyright = fetched.get('Copyright', '')
-        puzzle.width = int(xword_data.get('Width'))
-        puzzle.height = int(xword_data.get('Height'))
+        self.puzzle.copyright = fetched.get('Copyright', '')
+        self.puzzle.width = int(xword_data.get('Width'))
+        self.puzzle.height = int(xword_data.get('Height'))
 
         solution = xword_data.get('AllAnswer').replace('-', '.')
 
-        puzzle.solution = solution
+        self.puzzle.solution = solution
 
         fill = ''
         for letter in solution:
             if letter == '.':
                 fill += '.'
             else:
                 fill += '-'
-        puzzle.fill = fill
+        self.puzzle.fill = fill
 
         across_clues = xword_data['AcrossClue'].splitlines()
         down_clues = self.process_clues(xword_data['DownClue'].splitlines())
@@ -92,9 +90,9 @@ def parse_xword(self, xword_data):
 
         clues = [clue['clue'] for clue in clues_sorted]
 
-        puzzle.clues = clues
+        self.puzzle.clues = clues
 
-        return puzzle
+        return self.puzzle
 
 # As of Sept 2023, the JSON data for USA Today is not consistently populated.
 # I'd rather use the JSON data if possible, but until that's sorted, we can
@@ -166,24 +164,22 @@ def parse_xword(self, xword_data):
         except xml.parsers.expat.ExpatError:
             raise XWordDLException('Puzzle data malformed, cannot parse.')
 
-        puzzle = puz.Puzzle()
+        self.puzzle.title = unquote(xw.get('Title',[]).get('@v') or '')
+        self.puzzle.author = unquote(xw.get('Author',[]).get('@v') or '')
+        self.puzzle.copyright = unquote(xw.get('Copyright',[]).get('@v') or '')
 
-        puzzle.title = unquote(xw.get('Title',[]).get('@v') or '')
-        puzzle.author = unquote(xw.get('Author',[]).get('@v') or '')
-        puzzle.copyright = unquote(xw.get('Copyright',[]).get('@v') or '')
+        self.puzzle.width = int(xw.get('Width')['@v'])
+        self.puzzle.height = int(xw.get('Height')['@v'])
 
-        puzzle.width = int(xw.get('Width')['@v'])
-        puzzle.height = int(xw.get('Height')['@v'])
-
-        puzzle.solution = xw.get('AllAnswer',[]).get('@v').replace('-', '.')
-        puzzle.fill = ''.join([c if c == '.' else '-' for c in puzzle.solution])
+        self.puzzle.solution = xw.get('AllAnswer',[]).get('@v').replace('-', '.')
+        self.puzzle.fill = ''.join([c if c == '.' else '-' for c in self.puzzle.solution])
 
         xw_clues = sorted(list(xw['across'].values()) + list(xw['down'].values()),
                           key=lambda c: int(c['@cn']))
 
-        puzzle.clues = [unquote(c.get('@c') or '') for c in xw_clues]
+        self.puzzle.clues = [unquote(c.get('@c') or '') for c in xw_clues]
 
-        return puzzle
+        return self.puzzle
 
 
 class UniversalDownloader(AMUniversalDownloader):

diff --git a/xword_dl/downloader/amuselabsdownloader.py b/xword_dl/downloader/amuselabsdownloader.py
@@ -3,7 +3,6 @@
 import json
 import urllib
 
-import puz
 import requests
 
 import re
@@ -187,12 +186,11 @@ def amuse_b64(e, amuseKey=None):
         return xword_data
 
     def parse_xword(self, xword_data):
-        puzzle = puz.Puzzle()
-        puzzle.title = xword_data.get('title', '').strip()
-        puzzle.author = xword_data.get('author', '').strip()
-        puzzle.copyright = xword_data.get('copyright', '').strip()
-        puzzle.width = xword_data.get('w')
-        puzzle.height = xword_data.get('h')
+        self.puzzle.title = xword_data.get('title', '').strip()
+        self.puzzle.author = xword_data.get('author', '').strip()
+        self.puzzle.copyright = xword_data.get('copyright', '').strip()
+        self.puzzle.width = xword_data.get('w')
+        self.puzzle.height = xword_data.get('h')
 
         markup_data = xword_data.get('cellInfos', '')
 
@@ -228,8 +226,8 @@ def parse_xword(self, xword_data):
                     rebus_table += '{:2d}:{};'.format(rebus_index, unidecode(cell))
                     rebus_index += 1
 
-        puzzle.solution = solution
-        puzzle.fill = fill
+        self.puzzle.solution = solution
+        self.puzzle.fill = fill
 
         placed_words = xword_data['placedWords']
         across_words = [word for word in placed_words if word['acrossNotDown']]
@@ -241,26 +239,25 @@ def parse_xword(self, xword_data):
 
         clues = [word['clue']['clue'] for word in weirdass_puz_clue_sorting]
 
-        puzzle.clues.extend(clues)
+        self.puzzle.clues.extend(clues)
 
         has_markup = b'\x80' in markup
         has_rebus = any(rebus_board)
 
         if has_markup:
-            puzzle.extensions[b'GEXT'] = markup
-            puzzle._extensions_order.append(b'GEXT')
-            puzzle.markup()
+            self.puzzle.extensions[b'GEXT'] = markup
+            self.puzzle._extensions_order.append(b'GEXT')
+            self.puzzle.markup()
 
         if has_rebus:
-            puzzle.extensions[b'GRBS'] = bytes(rebus_board)
-            puzzle.extensions[b'RTBL'] = rebus_table.encode(puz.ENCODING)
-            puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
-            puzzle.rebus()
+            self.puzzle.extensions[b'GRBS'] = bytes(rebus_board)
+            self.puzzle.extensions[b'RTBL'] = rebus_table.encode(self.puzzle.encoding)
+            self.puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
+            self.puzzle.rebus()
 
-        return puzzle
+        return self.puzzle
 
     def pick_filename(self, puzzle, **kwargs):
         if not self.date and self.id:
             self.guess_date_from_id(self.id)
         return super().pick_filename(puzzle, **kwargs)
-
diff --git a/xword_dl/downloader/basedownloader.py b/xword_dl/downloader/basedownloader.py
@@ -1,6 +1,7 @@
 import urllib
 
 import requests
+import puz
 
 from ..util import *
 
@@ -34,6 +35,15 @@ def __init__(self, **kwargs):
         self.session.headers.update(self.settings.get('headers', {}))
         self.session.cookies.update(self.settings.get('cookies', {}))
 
+        self.puzzle = puz.Puzzle()
+
+        if 'puzzle_v2' in kwargs:
+            # this is hack-ily patching constants that puzpy does not
+            # currently provide a method for setting
+            self.puzzle.version = b'2.0'
+            self.puzzle.fileversion = b'2.0\0'
+            self.puzzle.encoding = 'UTF-8'
+
     def pick_filename(self, puzzle, **kwargs):
         tokens = {'outlet':  self.outlet or '',
                   'prefix':  self.outlet_prefix or '',
@@ -105,6 +115,7 @@ def download(self, url):
 
         puzzle = sanitize_for_puzfile(puzzle,
                                       preserve_html=self.settings.get(
-                                                        'preserve_html'))
+                                                        'preserve_html'),
+                                      demojize=(self.puzzle.encoding != 'UTF-8'))
 
         return puzzle
diff --git a/xword_dl/downloader/compilerdownloader.py b/xword_dl/downloader/compilerdownloader.py
@@ -1,4 +1,3 @@
-import puz
 import requests
 import xmltodict
 
@@ -36,30 +35,28 @@ def parse_xword(self, xword_data, enumeration=True):
         xw_metadata = xw_puzzle['metadata']
         xw_grid = xw_puzzle['crossword']['grid']
 
-        puzzle = puz.Puzzle()
+        self.puzzle.title = xw_metadata.get('title') or ''
+        self.puzzle.author = xw_metadata.get('creator') or ''
+        self.puzzle.copyright = xw_metadata.get('copyright') or ''
 
-        puzzle.title = xw_metadata.get('title') or ''
-        puzzle.author = xw_metadata.get('creator') or ''
-        puzzle.copyright = xw_metadata.get('copyright') or ''
-
-        puzzle.width = int(xw_grid.get('@width'))
-        puzzle.height = int(xw_grid.get('@height'))
+        self.puzzle.width = int(xw_grid.get('@width'))
+        self.puzzle.height = int(xw_grid.get('@height'))
 
         solution = ''
         fill = ''
         markup = b''
 
         cells = {(int(cell.get('@x')), int(cell.get('@y'))): cell for cell in xw_grid.get('cell')}
 
-        for y in range(1, puzzle.height + 1):
-            for x in range(1, puzzle.width + 1):
+        for y in range(1, self.puzzle.height + 1):
+            for x in range(1, self.puzzle.width + 1):
                 cell = cells.get((x, y))
                 solution += cell.get('@solution', '.')
                 fill += '.' if cell.get('@type') == 'block' else '-'
                 markup += (b'\x80' if (cell.get('@background-shape') == 'circle') else b'\x00')
 
-        puzzle.solution = solution
-        puzzle.fill = fill
+        self.puzzle.solution = solution
+        self.puzzle.fill = fill
 
         xw_clues = xw_puzzle['crossword']['clues']
 
@@ -69,13 +66,13 @@ def parse_xword(self, xword_data, enumeration=True):
                     if c.get("@format") and enumeration else '') for c in
                     sorted(all_clues, key=lambda x: int(x.get('@number')))]
 
-        puzzle.clues = clues
+        self.puzzle.clues = clues
 
         has_markup = b'\x80' in markup
 
         if has_markup:
-            puzzle.extensions[b'GEXT'] = markup
-            puzzle._extensions_order.append(b'GEXT')
-            puzzle.markup()
+            self.puzzle.extensions[b'GEXT'] = markup
+            self.puzzle._extensions_order.append(b'GEXT')
+            self.puzzle.markup()
 
-        return puzzle
+        return self.puzzle
diff --git a/xword_dl/downloader/guardiandownloader.py b/xword_dl/downloader/guardiandownloader.py
@@ -2,7 +2,6 @@
 import json
 import re
 
-import puz
 import requests
 
 from bs4 import BeautifulSoup
@@ -42,16 +41,14 @@ def fetch_data(self, solver_url):
         return xw_data
 
     def parse_xword(self, xword_data):
-        puzzle = puz.Puzzle()
+        self.puzzle.author = xword_data.get('creator', {}).get('name') or ''
+        self.puzzle.height = xword_data.get('dimensions').get('rows')
+        self.puzzle.width  = xword_data.get('dimensions').get('cols')
 
-        puzzle.author = xword_data.get('creator', {}).get('name') or ''
-        puzzle.height = xword_data.get('dimensions').get('rows')
-        puzzle.width  = xword_data.get('dimensions').get('cols')
-
-        puzzle.title = xword_data.get('name') or ''
+        self.puzzle.title = xword_data.get('name') or ''
 
         if not all(e.get('solution') for e in xword_data['entries']):
-            puzzle.title += ' - no solution provided'
+            self.puzzle.title += ' - no solution provided'
 
         self.date = datetime.datetime.fromtimestamp(
                                         xword_data.get('date') // 1000)
@@ -68,21 +65,21 @@ def parse_xword(self, xword_data):
         solution = ''
         fill = ''
 
-        for y in range(puzzle.height):
-            for x in range(puzzle.width):
+        for y in range(self.puzzle.height):
+            for x in range(self.puzzle.width):
                 sol_at_space = grid_dict.get((x,y), '.')
                 solution += sol_at_space
                 fill += '.' if sol_at_space == '.' else '-'
 
-        puzzle.solution = solution
-        puzzle.fill = fill
+        self.puzzle.solution = solution
+        self.puzzle.fill = fill
 
         clues = [e.get('clue') for e in sorted(xword_data.get('entries'),
                     key=lambda x: (x.get('number'), x.get('direction')))]
 
-        puzzle.clues = clues
+        self.puzzle.clues = clues
 
-        return puzzle
+        return self.puzzle
 
 
 class GuardianCrypticDownloader(GuardianDownloader):

diff --git a/xword_dl/downloader/newyorktimesdownloader.py b/xword_dl/downloader/newyorktimesdownloader.py
@@ -1,7 +1,6 @@
 import datetime
 import urllib
 
-import puz
 import requests
 
 from .basedownloader import BaseDownloader
@@ -110,22 +109,20 @@ def fetch_data(self, solver_url):
         return xword_data
 
     def parse_xword(self, xword_data):
-        puzzle = puz.Puzzle()
-
-        puzzle.author = join_bylines(xword_data['constructors'], "and").strip()
-        puzzle.copyright = xword_data['copyright']
-        puzzle.height = int(xword_data['body'][0]['dimensions']['height'])
-        puzzle.width =  int(xword_data['body'][0]['dimensions']['width'])
+        self.puzzle.author = join_bylines(xword_data['constructors'], "and").strip()
+        self.puzzle.copyright = xword_data['copyright']
+        self.puzzle.height = int(xword_data['body'][0]['dimensions']['height'])
+        self.puzzle.width =  int(xword_data['body'][0]['dimensions']['width'])
 
         if not self.date:
             self.date = datetime.datetime.strptime(xword_data['publicationDate'],
                                           '%Y-%m-%d')
 
-        puzzle.title = xword_data.get('title') or self.date.strftime(
+        self.puzzle.title = xword_data.get('title') or self.date.strftime(
                 '%A, %B %d, %Y')
 
         if xword_data.get('notes'):
-            puzzle.notes = xword_data.get('notes')[0]['text']
+            self.puzzle.notes = xword_data.get('notes')[0]['text']
 
         solution = ''
         fill = ''
@@ -145,7 +142,7 @@ def parse_xword(self, xword_data):
                 rebus_board.append(0)
             else:
                 try:
-                    suitable_answer = unidecode(square.get('answer') or 
+                    suitable_answer = unidecode(square.get('answer') or
                                         square['moreAnswers']['valid'][0])
                 except (IndexError, KeyError):
                     raise XWordDLException('Unable to parse puzzle JSON. Possibly something .puz incompatible')
@@ -158,26 +155,26 @@ def parse_xword(self, xword_data):
 
             markup += (b'\x00' if square.get('type', 1) == 1 else b'\x80')
 
-        puzzle.solution = solution
-        puzzle.fill = fill
+        self.puzzle.solution = solution
+        self.puzzle.fill = fill
 
         if b'\x80' in markup:
-            puzzle.extensions[b'GEXT'] = markup
-            puzzle._extensions_order.append(b'GEXT')
-            puzzle.markup()
+            self.puzzle.extensions[b'GEXT'] = markup
+            self.puzzle._extensions_order.append(b'GEXT')
+            self.puzzle.markup()
 
         if any(rebus_board):
-            puzzle.extensions[b'GRBS'] = bytes(rebus_board)
-            puzzle.extensions[b'RTBL'] = rebus_table.encode(puz.ENCODING)
-            puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
-            puzzle.rebus()
+            self.puzzle.extensions[b'GRBS'] = bytes(rebus_board)
+            self.puzzle.extensions[b'RTBL'] = rebus_table.encode(self.puzzle.ENCODING)
+            self.puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
+            self.puzzle.rebus()
 
         clue_list = xword_data['body'][0]['clues']
         clue_list.sort(key=lambda c: (int(c['label']), c['direction']))
 
-        puzzle.clues = [c['text'][0].get('plain') or '' for c in clue_list]
+        self.puzzle.clues = [c['text'][0].get('plain') or '' for c in clue_list]
 
-        return puzzle
+        return self.puzzle
 
     def pick_filename(self, puzzle, **kwargs):
         if puzzle.title == self.date.strftime('%A, %B %d, %Y'):