Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add additional rawr intersector implementations #302

Merged
merged 2 commits into from
Nov 17, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions config.yaml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -360,5 +360,8 @@ rawr:
table: ne_10m_urban_areas
layer: landuse
end_zoom: 10
intersect:
# type can be toi, none, all, all-parents
type: toi
# uncomment this to use RAWR tiles rather than go direct to the database.
#use-rawr-tiles: true
41 changes: 26 additions & 15 deletions tilequeue/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -1731,8 +1731,6 @@ def tilequeue_rawr_process(cfg, peripherals):
from tilequeue.rawr import RawrS3Sink
from tilequeue.rawr import RawrStoreSink
from tilequeue.rawr import RawrTileGenerationPipeline
from tilequeue.rawr import RawrToiIntersector
from tilequeue.rawr import EmptyToiIntersector
from tilequeue.stats import RawrTilePipelineStatsHandler
import boto3
# pass through the postgresql yaml config directly
Expand Down Expand Up @@ -1765,25 +1763,38 @@ def tilequeue_rawr_process(cfg, peripherals):
s3_client = boto3.client('s3', region_name=sink_region)
rawr_sink = RawrS3Sink(s3_client, bucket, prefix, suffix)

toi_yaml = cfg.yml.get('toi-store')
toi_type = toi_yaml.get('type')
if toi_type == 's3':
toi_s3_yaml = toi_yaml.get('s3')
assert toi_s3_yaml, 'Missing toi-store s3 config'
toi_bucket = toi_s3_yaml.get('bucket')
toi_key = toi_s3_yaml.get('key')
rawr_intersect_yaml = rawr_yaml.get('intersect')
assert rawr_intersect_yaml, 'Missing rawr intersect config'
intersect_type = rawr_intersect_yaml.get('type')
assert intersect_type, 'Missing rawr intersect type'

if intersect_type == 'toi':
toi_yaml = cfg.yml.get('toi-store')
toi_type = toi_yaml.get('type')
assert toi_type == 's3', 'Rawr toi intersector requires toi on s3'
toi_bucket = toi_yaml.get('bucket')
toi_key = toi_yaml.get('key')
toi_region = toi_yaml.get('region')
assert toi_bucket, 'Missing toi-store s3 bucket'
assert toi_key, 'Missing toi-store s3 key'

assert toi_region, 'Missing toi-store s3 region'
s3_client = boto3.client('s3', region_name=toi_region)
from tilequeue.rawr import RawrToiIntersector
rawr_toi_intersector = RawrToiIntersector(
s3_client, toi_bucket, toi_key)

elif toi_type == 'none':
elif intersect_type == 'none':
from tilequeue.rawr import EmptyToiIntersector
rawr_toi_intersector = EmptyToiIntersector()

elif intersect_type == 'all':
from tilequeue.rawr import RawrAllIntersector
rawr_toi_intersector = RawrAllIntersector()
elif intersect_type == 'all-parents':
from tilequeue.rawr import RawrAllWithParentsIntersector
zoom_stop_inclusive = 0
rawr_toi_intersector = \
RawrAllWithParentsIntersector(zoom_stop_inclusive)
else:
assert False, 'TOI type %r is not known. Options are s3 or none.' \
% (toi_type,)
assert 0, 'Invalid rawr intersect type: %s' % intersect_type

logger = make_logger(cfg, 'rawr_process')
rawr_source = parse_sources(rawr_source_list)
Expand Down
61 changes: 60 additions & 1 deletion tilequeue/rawr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from collections import namedtuple
from contextlib import closing
from cStringIO import StringIO
from itertools import imap
from ModestMaps.Core import Coordinate
from msgpack import Unpacker
from raw_tiles.tile import Tile
Expand Down Expand Up @@ -127,7 +128,7 @@ def __call__(self, coords):
if self.logger:
self.logger.info(
'Rawr tiles enqueued: '
'coords(%d) payloads(%d) enqueue_calls(%d))' %
'coords(%d) payloads(%d) enqueue_calls(%d)' %
(n_coords, n_payloads, n_msgs_sent))

self.stats_handler(n_coords, n_payloads, n_msgs_sent)
Expand Down Expand Up @@ -271,6 +272,64 @@ def __call__(self, coords):
return [], metrics, timing


class RawrAllIntersector(object):
"""
return back the coordinates themselves

This is useful when we know that we enqueue the full tile pyramids in the
message.
"""

def __call__(self, coords):
metrics = dict(
total=len(coords),
hits=len(coords),
misses=0,
n_toi=0,
cached=False,
)
timing = dict(
fetch=0,
intersect=0,
)
return coords, metrics, timing


class RawrAllWithParentsIntersector(object):
"""
return back the coordinates with their parents
"""

def __init__(self, zoom_stop_inclusive):
self.zoom_stop_inclusive = zoom_stop_inclusive

def __call__(self, coords):
timing = dict(
fetch=0,
intersect=0,
)
with time_block(timing, 'intersect'):
all_coord_ints = set()
for coord in coords:
while coord.zoom >= self.zoom_stop_inclusive:
coord_int = coord_marshall_int(coord)
if coord_int in all_coord_ints:
# as an optimization, assume that if the coord is
# already in the set, then all its parents will be too
break
all_coord_ints.add(coord_int)
coord = coord.zoomBy(-1).container()
coords = imap(coord_unmarshall_int, all_coord_ints)
metrics = dict(
total=len(all_coord_ints),
hits=len(all_coord_ints),
misses=0,
n_toi=0,
cached=False,
)
return coords, metrics, timing


class RawrTileGenerationPipeline(object):

"""Entry point for rawr process command"""
Expand Down