Merge pull request #1 from rollbar/sectioned-source-maps

rokob · web-flow · commit 05735efbd5c8 · 2017-12-01T14:05:09.000-08:00
add support for sectioned source maps
diff --git a/sourcemap/__init__.py b/sourcemap/__init__.py
@@ -8,7 +8,7 @@
 from .exceptions import SourceMapDecodeError  # NOQA
 from .decoder import SourceMapDecoder
 
-__version__ = '0.2.1'
+__version__ = '0.3.0'
 
 
 def load(fp, cls=None):
diff --git a/sourcemap/decoder.py b/sourcemap/decoder.py
@@ -14,7 +14,7 @@
 import sys
 from functools import partial
 from .exceptions import SourceMapDecodeError
-from .objects import Token, SourceMapIndex
+from .objects import Token, SourceMapIndex, SectionedSourceMapIndex
 try:
     import simplejson as json
 except ImportError:
@@ -63,8 +63,10 @@ def parse_vlq(self, segment):
         return values
 
     def decode(self, source):
-        """Decode a source map object into a SourceMapIndex.
+        """Decode a source map object into a SourceMapIndex or
+        SectionedSourceMapIndex.
 
+        For SourceMapIndex:
         The index is keyed on (dst_line, dst_column) for lookups,
         and a per row index is kept to help calculate which Token to retrieve.
 
@@ -102,6 +104,29 @@ def decode(self, source):
                 lte to the bisect_right: 2-1 => row[2-1] => 12
               - At this point, we know the token location, (1, 12)
               - Pull (1, 12) from index => tokens[3]
+
+        For SectionedSourceMapIndex:
+        The offsets are stored as tuples in sorted order:
+        [(0, 0), (1, 10), (1, 24), (2, 0), ...]
+
+        For each offset there is a corresponding SourceMapIndex
+        which operates as described above, except the tokens
+        are relative to their own section and must have the offset
+        replied in reverse on the destination row/col when the tokens
+        are returned.
+
+        To find the token at (1, 20):
+            - bisect_right to find the closest index (1, 20)
+            - Supposing that returns index i, we actually want (i - 1)
+              because the token we want is inside the map before that one
+            - We then have a SourceMapIndex and we perform the search
+              for (1 - offset[0], column - offset[1]). [Note this isn't
+              exactly correct as we have to account for different lines
+              being searched for and the found offset, so for the column
+              we use either offset[1] or 0 depending on if line matches
+              offset[0] or not]
+            - The token we find we then translate dst_line += offset[0],
+              and dst_col += offset[1].
         """
         # According to spec (https://docs.google.com/document/d/1U1RGAehQwRypUTovF1KRlpiOFze0b-_2gc6fAH0KY0k/edit#heading=h.h7yy76c5il9v)
         # A SouceMap may be prepended with ")]}'" to cause a Javascript error.
@@ -110,6 +135,18 @@ def decode(self, source):
             source = source.split('\n', 1)[1]
 
         smap = json.loads(source)
+        if smap.get('sections'):
+            offsets = []
+            maps = []
+            for section in smap.get('sections'):
+                offset = section.get('offset')
+                offsets.append((offset.get('line'), offset.get('column')))
+                maps.append(self._decode_map(section.get('map')))
+            return SectionedSourceMapIndex(smap, offsets, maps)
+        else:
+            return self._decode_map(smap)
+
+    def _decode_map(self, smap):
         sources = smap['sources']
         sourceRoot = smap.get('sourceRoot')
         names = list(map(text_type, smap['names']))
diff --git a/sourcemap/objects.py b/sourcemap/objects.py
@@ -61,7 +61,7 @@ def __init__(self, raw, tokens, line_index, index, sources=None):
     def lookup(self, line, column):
         try:
             # Let's hope for a direct match first
-            return self.index[(line, column)]
+            return self.index[(line, column)], self
         except KeyError:
             pass
 
@@ -75,7 +75,31 @@ def lookup(self, line, column):
         # We actually want the one less than current
         column = line_index[i - 1]
         # Return from the main index, based on the (line, column) tuple
-        return self.index[(line, column)]
+        return self.index[(line, column)], self
+
+    def columns_for_line(self, line):
+        return self.line_index[line]
+
+    def total_number_of_lines(self):
+        return len(self.line_index)
+
+    def files(self):
+        f = self.raw.get('file')
+        return [f] if f else None
+
+    def sources_content_map(self):
+        result = self._source_content_array()
+        return dict(result) if result else None
+
+    def raw_sources(self):
+        return self.raw.get('sources')
+
+    def _source_content_array(self):
+        sources = self.raw.get('sources')
+        content = self.raw.get('sourcesContent')
+        if sources and content:
+            return zip(sources, content)
+        return None
 
     def __getitem__(self, item):
         return self.tokens[item]
@@ -88,3 +112,69 @@ def __len__(self):
 
     def __repr__(self):
         return '<SourceMapIndex: %s>' % ', '.join(map(str, self.sources))
+
+
+class SectionedSourceMapIndex(object):
+    """The index for a source map which contains sections
+    containing all the Tokens and precomputed indexes for
+    searching."""
+
+    def __init__(self, raw, offsets, maps):
+        self.raw = raw
+        self.offsets = offsets
+        self.maps = maps
+
+    def lookup(self, line, column):
+        map_index = bisect_right(self.offsets, (line, column)) - 1
+        line_offset, col_offset = self.offsets[map_index]
+        col_offset = 0 if line != line_offset else col_offset
+        smap = self.maps[map_index]
+        result, _ = smap.lookup(line - line_offset, column - col_offset)
+        result.dst_line += line_offset
+        result.dst_col += col_offset
+        return result, smap
+
+    def columns_for_line(self, line):
+        last_map_index = bisect_right(self.offsets, (line + 1, 0))
+        first_map_index = bisect_right(self.offsets, (line, 0)) - 1
+        columns = []
+        for map_index in range(first_map_index, last_map_index):
+            smap = self.maps[map_index]
+            line_offset, col_offset = self.offsets[map_index]
+            smap_line = line - line_offset
+            smap_cols = smap.columns_for_line(smap_line)
+            columns.extend([x + col_offset for x in smap_cols])
+        return columns
+
+    def total_number_of_lines(self):
+        result = 0
+        for smap in self.maps:
+            result += smap.total_number_of_lines()
+        return result
+
+    def files(self):
+        files = []
+        for smap in self.maps:
+            smap_files = smap.files()
+            if smap_files:
+                files.extend(smap_files)
+        return files if len(files) else None
+
+    def sources_content_map(self):
+        content_maps = []
+        for m in self.maps:
+            source_content_array = m._source_content_array()
+            if source_content_array:
+                content_maps.extend(source_content_array)
+        if len(content_maps):
+            return dict(content_maps)
+        return None
+
+    def raw_sources(self):
+        sources = []
+        for m in self.maps:
+            sources.extend(m.raw_sources())
+        return sources
+
+    def __repr__(self):
+        return '<SectionedSourceMapIndex: %s>' % ', '.join(map(str, self.maps))
diff --git a/tests/test_objects.py b/tests/test_objects.py
@@ -2,14 +2,146 @@
     import unittest2 as unittest
 except ImportError:
     import unittest
-from sourcemap.objects import Token, SourceMapIndex
+from sourcemap.objects import Token, SourceMapIndex, SectionedSourceMapIndex
 
 
 class TokenTestCase(unittest.TestCase):
     def test_eq(self):
         assert Token(1, 1, 'lol.js', 1, 1, 'lol') == Token(1, 1, 'lol.js', 1, 1, 'lol')
         assert Token(99, 1, 'lol.js', 1, 1, 'lol') != Token(1, 1, 'lol.js', 1, 1, 'lol')
 
+class SectionedSourceMapIndexTestCase(unittest.TestCase):
+    def get_index(self):
+        offsets = [(0, 0), (1, 14), (2, 28)]
+        tokens0 = [
+            Token(dst_line=0, dst_col=0),
+            Token(dst_line=0, dst_col=5),
+            Token(dst_line=1, dst_col=0),
+            Token(dst_line=1, dst_col=12),
+        ]
+        tokens1 = [
+            Token(dst_line=0, dst_col=0),
+            Token(dst_line=0, dst_col=5),
+            Token(dst_line=1, dst_col=0),
+            Token(dst_line=1, dst_col=12),
+        ]
+        tokens2 = [
+            Token(dst_line=0, dst_col=0),
+            Token(dst_line=0, dst_col=5),
+            Token(dst_line=1, dst_col=0),
+            Token(dst_line=1, dst_col=12),
+        ]
+        maps = [
+                SourceMapIndex({'file': 'foo0.js'}, tokens0,
+                [
+                    [0, 5],
+                    [0, 12],
+                ],
+                {
+                    (0, 0):  tokens0[0],
+                    (0, 5):  tokens0[1],
+                    (1, 0):  tokens0[2],
+                    (1, 12): tokens0[3],
+                }),
+                SourceMapIndex({'file': 'foo1.js'}, tokens1,
+                [
+                    [0, 5],
+                    [0, 12],
+                ],
+                {
+                    (0, 0):  tokens1[0],
+                    (0, 5):  tokens1[1],
+                    (1, 0):  tokens1[2],
+                    (1, 12): tokens1[3],
+                }),
+                SourceMapIndex({'file': 'foo2.js'}, tokens2,
+                [
+                    [0, 5],
+                    [0, 12],
+                ],
+                {
+                    (0, 0):  tokens2[0],
+                    (0, 5):  tokens2[1],
+                    (1, 0):  tokens2[2],
+                    (1, 12): tokens2[3],
+                }),
+        ]
+
+        raw = {}
+
+        return SectionedSourceMapIndex(raw, offsets, maps), [tokens0, tokens1, tokens2]
+
+    def test_lookup(self):
+        index, tokens = self.get_index()
+
+        for i in range(5):
+            assert index.lookup(0, i)[0] is tokens[0][0]
+
+        for i in range(5, 10):
+            assert index.lookup(0, i)[0] is tokens[0][1]
+
+        for i in range(12):
+            assert index.lookup(1, i)[0] is tokens[0][2]
+
+        for i in range(12, 14):
+            assert index.lookup(1, i)[0] is tokens[0][3]
+
+        for i in range(14, 19):
+            assert index.lookup(1, i)[0] is tokens[1][0]
+
+        for i in range(19, 25):
+            assert index.lookup(1, i)[0] is tokens[1][1]
+
+        for i in range(12):
+            assert index.lookup(2, i)[0] is tokens[1][2]
+
+        for i in range(12, 28):
+            assert index.lookup(2, i)[0] is tokens[1][3]
+
+        for i in range(28, 33):
+            assert index.lookup(2, i)[0] is tokens[2][0]
+
+        for i in range(33, 40):
+            assert index.lookup(2, i)[0] is tokens[2][1]
+
+        for i in range(12):
+            assert index.lookup(3, i)[0] is tokens[2][2]
+
+        for i in range(12, 14):
+            assert index.lookup(3, i)[0] is tokens[2][3]
+
+    def test_columns_for_line(self):
+        index, tokens = self.get_index()
+        cols = index.columns_for_line(0)
+
+        assert cols[0] is tokens[0][0].dst_col
+        assert cols[1] is tokens[0][1].dst_col
+
+        cols = index.columns_for_line(1)
+
+        assert len(cols) is 4
+        assert cols[0] is tokens[0][2].dst_col
+        assert cols[1] is tokens[0][3].dst_col
+        assert cols[2] is tokens[1][0].dst_col + index.offsets[1][1]
+        assert cols[3] is tokens[1][1].dst_col + index.offsets[1][1]
+
+        cols = index.columns_for_line(2)
+
+        assert len(cols) is 4
+        assert cols[0] is tokens[1][2].dst_col + index.offsets[1][1]
+        assert cols[1] is tokens[1][3].dst_col + index.offsets[1][1]
+        assert cols[2] is tokens[2][0].dst_col + index.offsets[2][1]
+        assert cols[3] is tokens[2][1].dst_col + index.offsets[2][1]
+
+    def test_lookup_from_columns_for_line(self):
+        index, tokens = self.get_index()
+        cols = index.columns_for_line(2)
+        t, _ = index.lookup(2, cols[2])
+        assert t is tokens[2][0]
+
+    def test_files(self):
+        index, _ = self.get_index()
+        assert len(index.files()) is 3
 
 class SourceMapIndexTestCase(unittest.TestCase):
     def get_index(self):
@@ -40,16 +172,28 @@ def test_lookup(self):
         index, tokens = self.get_index()
 
         for i in range(5):
-            assert index.lookup(0, i) is tokens[0]
+            assert index.lookup(0, i)[0] is tokens[0]
 
         for i in range(5, 10):
-            assert index.lookup(0, i) is tokens[1]
+            assert index.lookup(0, i)[0] is tokens[1]
 
         for i in range(12):
-            assert index.lookup(1, i) is tokens[2]
+            assert index.lookup(1, i)[0] is tokens[2]
 
         for i in range(12, 20):
-            assert index.lookup(1, i) is tokens[3]
+            assert index.lookup(1, i)[0] is tokens[3]
+
+    def test_columns_for_line(self):
+        index, tokens = self.get_index()
+        cols = index.columns_for_line(0)
+
+        assert cols[0] is tokens[0].dst_col
+        assert cols[1] is tokens[1].dst_col
+
+        cols = index.columns_for_line(1)
+
+        assert cols[0] is tokens[2].dst_col
+        assert cols[1] is tokens[3].dst_col
 
     def test_getitem(self):
         index, tokens = self.get_index()