Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor creation of puz files and add optional PUZv2 support #204

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 18 additions & 22 deletions xword_dl/downloader/amuniversaldownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import urllib
import xml

import puz
import requests
import xmltodict

Expand Down Expand Up @@ -59,26 +58,25 @@ def parse_xword(self, xword_data):
fetched[field] = urllib.parse.unquote(
xword_data.get(field, '')).strip()

puzzle = puz.Puzzle()
puzzle.title = fetched.get('Title', '')
puzzle.author = ''.join([fetched.get('Author', ''),
self.puzzle.title = fetched.get('Title', '')
self.puzzle.author = ''.join([fetched.get('Author', ''),
' / Ed. ',
fetched.get('Editor', '')])
puzzle.copyright = fetched.get('Copyright', '')
puzzle.width = int(xword_data.get('Width'))
puzzle.height = int(xword_data.get('Height'))
self.puzzle.copyright = fetched.get('Copyright', '')
self.puzzle.width = int(xword_data.get('Width'))
self.puzzle.height = int(xword_data.get('Height'))

solution = xword_data.get('AllAnswer').replace('-', '.')

puzzle.solution = solution
self.puzzle.solution = solution

fill = ''
for letter in solution:
if letter == '.':
fill += '.'
else:
fill += '-'
puzzle.fill = fill
self.puzzle.fill = fill

across_clues = xword_data['AcrossClue'].splitlines()
down_clues = self.process_clues(xword_data['DownClue'].splitlines())
Expand All @@ -92,9 +90,9 @@ def parse_xword(self, xword_data):

clues = [clue['clue'] for clue in clues_sorted]

puzzle.clues = clues
self.puzzle.clues = clues

return puzzle
return self.puzzle

# As of Sept 2023, the JSON data for USA Today is not consistently populated.
# I'd rather use the JSON data if possible, but until that's sorted, we can
Expand Down Expand Up @@ -166,24 +164,22 @@ def parse_xword(self, xword_data):
except xml.parsers.expat.ExpatError:
raise XWordDLException('Puzzle data malformed, cannot parse.')

puzzle = puz.Puzzle()
self.puzzle.title = unquote(xw.get('Title',[]).get('@v') or '')
self.puzzle.author = unquote(xw.get('Author',[]).get('@v') or '')
self.puzzle.copyright = unquote(xw.get('Copyright',[]).get('@v') or '')

puzzle.title = unquote(xw.get('Title',[]).get('@v') or '')
puzzle.author = unquote(xw.get('Author',[]).get('@v') or '')
puzzle.copyright = unquote(xw.get('Copyright',[]).get('@v') or '')
self.puzzle.width = int(xw.get('Width')['@v'])
self.puzzle.height = int(xw.get('Height')['@v'])

puzzle.width = int(xw.get('Width')['@v'])
puzzle.height = int(xw.get('Height')['@v'])

puzzle.solution = xw.get('AllAnswer',[]).get('@v').replace('-', '.')
puzzle.fill = ''.join([c if c == '.' else '-' for c in puzzle.solution])
self.puzzle.solution = xw.get('AllAnswer',[]).get('@v').replace('-', '.')
self.puzzle.fill = ''.join([c if c == '.' else '-' for c in self.puzzle.solution])

xw_clues = sorted(list(xw['across'].values()) + list(xw['down'].values()),
key=lambda c: int(c['@cn']))

puzzle.clues = [unquote(c.get('@c') or '') for c in xw_clues]
self.puzzle.clues = [unquote(c.get('@c') or '') for c in xw_clues]

return puzzle
return self.puzzle


class UniversalDownloader(AMUniversalDownloader):
Expand Down
35 changes: 16 additions & 19 deletions xword_dl/downloader/amuselabsdownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import json
import urllib

import puz
import requests

import re
Expand Down Expand Up @@ -187,12 +186,11 @@ def amuse_b64(e, amuseKey=None):
return xword_data

def parse_xword(self, xword_data):
puzzle = puz.Puzzle()
puzzle.title = xword_data.get('title', '').strip()
puzzle.author = xword_data.get('author', '').strip()
puzzle.copyright = xword_data.get('copyright', '').strip()
puzzle.width = xword_data.get('w')
puzzle.height = xword_data.get('h')
self.puzzle.title = xword_data.get('title', '').strip()
self.puzzle.author = xword_data.get('author', '').strip()
self.puzzle.copyright = xword_data.get('copyright', '').strip()
self.puzzle.width = xword_data.get('w')
self.puzzle.height = xword_data.get('h')

markup_data = xword_data.get('cellInfos', '')

Expand Down Expand Up @@ -228,8 +226,8 @@ def parse_xword(self, xword_data):
rebus_table += '{:2d}:{};'.format(rebus_index, unidecode(cell))
rebus_index += 1

puzzle.solution = solution
puzzle.fill = fill
self.puzzle.solution = solution
self.puzzle.fill = fill

placed_words = xword_data['placedWords']
across_words = [word for word in placed_words if word['acrossNotDown']]
Expand All @@ -241,26 +239,25 @@ def parse_xword(self, xword_data):

clues = [word['clue']['clue'] for word in weirdass_puz_clue_sorting]

puzzle.clues.extend(clues)
self.puzzle.clues.extend(clues)

has_markup = b'\x80' in markup
has_rebus = any(rebus_board)

if has_markup:
puzzle.extensions[b'GEXT'] = markup
puzzle._extensions_order.append(b'GEXT')
puzzle.markup()
self.puzzle.extensions[b'GEXT'] = markup
self.puzzle._extensions_order.append(b'GEXT')
self.puzzle.markup()

if has_rebus:
puzzle.extensions[b'GRBS'] = bytes(rebus_board)
puzzle.extensions[b'RTBL'] = rebus_table.encode(puz.ENCODING)
puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
puzzle.rebus()
self.puzzle.extensions[b'GRBS'] = bytes(rebus_board)
self.puzzle.extensions[b'RTBL'] = rebus_table.encode(self.puzzle.encoding)
self.puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
self.puzzle.rebus()

return puzzle
return self.puzzle

def pick_filename(self, puzzle, **kwargs):
if not self.date and self.id:
self.guess_date_from_id(self.id)
return super().pick_filename(puzzle, **kwargs)

13 changes: 12 additions & 1 deletion xword_dl/downloader/basedownloader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import urllib

import requests
import puz

from ..util import *

Expand Down Expand Up @@ -34,6 +35,15 @@ def __init__(self, **kwargs):
self.session.headers.update(self.settings.get('headers', {}))
self.session.cookies.update(self.settings.get('cookies', {}))

self.puzzle = puz.Puzzle()

if 'puzzle_v2' in kwargs:
# this is hack-ily patching constants that puzpy does not
# currently provide a method for setting
self.puzzle.version = b'2.0'
self.puzzle.fileversion = b'2.0\0'
self.puzzle.encoding = 'UTF-8'

def pick_filename(self, puzzle, **kwargs):
tokens = {'outlet': self.outlet or '',
'prefix': self.outlet_prefix or '',
Expand Down Expand Up @@ -105,6 +115,7 @@ def download(self, url):

puzzle = sanitize_for_puzfile(puzzle,
preserve_html=self.settings.get(
'preserve_html'))
'preserve_html'),
demojize=(self.puzzle.encoding != 'UTF-8'))

return puzzle
31 changes: 14 additions & 17 deletions xword_dl/downloader/compilerdownloader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import puz
import requests
import xmltodict

Expand Down Expand Up @@ -36,30 +35,28 @@ def parse_xword(self, xword_data, enumeration=True):
xw_metadata = xw_puzzle['metadata']
xw_grid = xw_puzzle['crossword']['grid']

puzzle = puz.Puzzle()
self.puzzle.title = xw_metadata.get('title') or ''
self.puzzle.author = xw_metadata.get('creator') or ''
self.puzzle.copyright = xw_metadata.get('copyright') or ''

puzzle.title = xw_metadata.get('title') or ''
puzzle.author = xw_metadata.get('creator') or ''
puzzle.copyright = xw_metadata.get('copyright') or ''

puzzle.width = int(xw_grid.get('@width'))
puzzle.height = int(xw_grid.get('@height'))
self.puzzle.width = int(xw_grid.get('@width'))
self.puzzle.height = int(xw_grid.get('@height'))

solution = ''
fill = ''
markup = b''

cells = {(int(cell.get('@x')), int(cell.get('@y'))): cell for cell in xw_grid.get('cell')}

for y in range(1, puzzle.height + 1):
for x in range(1, puzzle.width + 1):
for y in range(1, self.puzzle.height + 1):
for x in range(1, self.puzzle.width + 1):
cell = cells.get((x, y))
solution += cell.get('@solution', '.')
fill += '.' if cell.get('@type') == 'block' else '-'
markup += (b'\x80' if (cell.get('@background-shape') == 'circle') else b'\x00')

puzzle.solution = solution
puzzle.fill = fill
self.puzzle.solution = solution
self.puzzle.fill = fill

xw_clues = xw_puzzle['crossword']['clues']

Expand All @@ -69,13 +66,13 @@ def parse_xword(self, xword_data, enumeration=True):
if c.get("@format") and enumeration else '') for c in
sorted(all_clues, key=lambda x: int(x.get('@number')))]

puzzle.clues = clues
self.puzzle.clues = clues

has_markup = b'\x80' in markup

if has_markup:
puzzle.extensions[b'GEXT'] = markup
puzzle._extensions_order.append(b'GEXT')
puzzle.markup()
self.puzzle.extensions[b'GEXT'] = markup
self.puzzle._extensions_order.append(b'GEXT')
self.puzzle.markup()

return puzzle
return self.puzzle
25 changes: 11 additions & 14 deletions xword_dl/downloader/guardiandownloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import json
import re

import puz
import requests

from bs4 import BeautifulSoup
Expand Down Expand Up @@ -42,16 +41,14 @@ def fetch_data(self, solver_url):
return xw_data

def parse_xword(self, xword_data):
puzzle = puz.Puzzle()
self.puzzle.author = xword_data.get('creator', {}).get('name') or ''
self.puzzle.height = xword_data.get('dimensions').get('rows')
self.puzzle.width = xword_data.get('dimensions').get('cols')

puzzle.author = xword_data.get('creator', {}).get('name') or ''
puzzle.height = xword_data.get('dimensions').get('rows')
puzzle.width = xword_data.get('dimensions').get('cols')

puzzle.title = xword_data.get('name') or ''
self.puzzle.title = xword_data.get('name') or ''

if not all(e.get('solution') for e in xword_data['entries']):
puzzle.title += ' - no solution provided'
self.puzzle.title += ' - no solution provided'

self.date = datetime.datetime.fromtimestamp(
xword_data.get('date') // 1000)
Expand All @@ -68,21 +65,21 @@ def parse_xword(self, xword_data):
solution = ''
fill = ''

for y in range(puzzle.height):
for x in range(puzzle.width):
for y in range(self.puzzle.height):
for x in range(self.puzzle.width):
sol_at_space = grid_dict.get((x,y), '.')
solution += sol_at_space
fill += '.' if sol_at_space == '.' else '-'

puzzle.solution = solution
puzzle.fill = fill
self.puzzle.solution = solution
self.puzzle.fill = fill

clues = [e.get('clue') for e in sorted(xword_data.get('entries'),
key=lambda x: (x.get('number'), x.get('direction')))]

puzzle.clues = clues
self.puzzle.clues = clues

return puzzle
return self.puzzle


class GuardianCrypticDownloader(GuardianDownloader):
Expand Down
39 changes: 18 additions & 21 deletions xword_dl/downloader/newyorktimesdownloader.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import datetime
import urllib

import puz
import requests

from .basedownloader import BaseDownloader
Expand Down Expand Up @@ -110,22 +109,20 @@ def fetch_data(self, solver_url):
return xword_data

def parse_xword(self, xword_data):
puzzle = puz.Puzzle()

puzzle.author = join_bylines(xword_data['constructors'], "and").strip()
puzzle.copyright = xword_data['copyright']
puzzle.height = int(xword_data['body'][0]['dimensions']['height'])
puzzle.width = int(xword_data['body'][0]['dimensions']['width'])
self.puzzle.author = join_bylines(xword_data['constructors'], "and").strip()
self.puzzle.copyright = xword_data['copyright']
self.puzzle.height = int(xword_data['body'][0]['dimensions']['height'])
self.puzzle.width = int(xword_data['body'][0]['dimensions']['width'])

if not self.date:
self.date = datetime.datetime.strptime(xword_data['publicationDate'],
'%Y-%m-%d')

puzzle.title = xword_data.get('title') or self.date.strftime(
self.puzzle.title = xword_data.get('title') or self.date.strftime(
'%A, %B %d, %Y')

if xword_data.get('notes'):
puzzle.notes = xword_data.get('notes')[0]['text']
self.puzzle.notes = xword_data.get('notes')[0]['text']

solution = ''
fill = ''
Expand All @@ -145,7 +142,7 @@ def parse_xword(self, xword_data):
rebus_board.append(0)
else:
try:
suitable_answer = unidecode(square.get('answer') or
suitable_answer = unidecode(square.get('answer') or
square['moreAnswers']['valid'][0])
except (IndexError, KeyError):
raise XWordDLException('Unable to parse puzzle JSON. Possibly something .puz incompatible')
Expand All @@ -158,26 +155,26 @@ def parse_xword(self, xword_data):

markup += (b'\x00' if square.get('type', 1) == 1 else b'\x80')

puzzle.solution = solution
puzzle.fill = fill
self.puzzle.solution = solution
self.puzzle.fill = fill

if b'\x80' in markup:
puzzle.extensions[b'GEXT'] = markup
puzzle._extensions_order.append(b'GEXT')
puzzle.markup()
self.puzzle.extensions[b'GEXT'] = markup
self.puzzle._extensions_order.append(b'GEXT')
self.puzzle.markup()

if any(rebus_board):
puzzle.extensions[b'GRBS'] = bytes(rebus_board)
puzzle.extensions[b'RTBL'] = rebus_table.encode(puz.ENCODING)
puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
puzzle.rebus()
self.puzzle.extensions[b'GRBS'] = bytes(rebus_board)
self.puzzle.extensions[b'RTBL'] = rebus_table.encode(self.puzzle.ENCODING)
self.puzzle._extensions_order.extend([b'GRBS', b'RTBL'])
self.puzzle.rebus()

clue_list = xword_data['body'][0]['clues']
clue_list.sort(key=lambda c: (int(c['label']), c['direction']))

puzzle.clues = [c['text'][0].get('plain') or '' for c in clue_list]
self.puzzle.clues = [c['text'][0].get('plain') or '' for c in clue_list]

return puzzle
return self.puzzle

def pick_filename(self, puzzle, **kwargs):
if puzzle.title == self.date.strftime('%A, %B %d, %Y'):
Expand Down
Loading
Loading