diff --git a/Makefile b/Makefile
index 3299e3d..3c98a4b 100644
--- a/Makefile
+++ b/Makefile
@@ -18,8 +18,6 @@ build:
 #
 .PHONY: setup
 setup:
-	test -f tree-sitter-version \
-	|| cp tree-sitter-version.default tree-sitter-version
 	./scripts/check-prerequisites
 	./scripts/install-tree-sitter-cli
 	./scripts/install-tree-sitter-lib
diff --git a/scripts/download-tree-sitter b/scripts/download-tree-sitter
index 7a8f5b7..6062705 100755
--- a/scripts/download-tree-sitter
+++ b/scripts/download-tree-sitter
@@ -41,6 +41,7 @@ while [[ $# -gt 0 ]]; do
   shift
 done
 
+./scripts/update-version-symlinks
 version=$(cat tree-sitter-version)
 
 mkdir -p downloads
diff --git a/scripts/update-version-symlinks b/scripts/update-version-symlinks
index e95f6b6..1d4b2ed 100755
--- a/scripts/update-version-symlinks
+++ b/scripts/update-version-symlinks
@@ -15,9 +15,13 @@
 #
 set -eu
 
+if [[ ! -e tree-sitter-version ]]; then
+  cp tree-sitter-version.default tree-sitter-version
+fi
 version=$(cat tree-sitter-version)
 
 echo "Updating symlinks 'downloads/tree-sitter' and 'tree-sitter'"
+mkdir -p downloads
 
 (
   cd downloads
diff --git a/src/bindings/lib/Tree_sitter_output.atd b/src/bindings/lib/Tree_sitter_output.atd
index 973d827..56480e2 100644
--- a/src/bindings/lib/Tree_sitter_output.atd
+++ b/src/bindings/lib/Tree_sitter_output.atd
@@ -42,6 +42,6 @@ type node = {
 }
 
 type position = {
-  row: int;
-  column: int;
+  row: int; (* 0-based *)
+  column: int; (* 0-based *)
 }
diff --git a/src/gen/lib/CST_grammar.ml b/src/gen/lib/CST_grammar.ml
index b0a4a03..87ac503 100644
--- a/src/gen/lib/CST_grammar.ml
+++ b/src/gen/lib/CST_grammar.ml
@@ -163,8 +163,9 @@ type grammar = {
   (* rules, grouped and sorted in dependency order. *)
 
   extras: string list;
-  (* node names that don't belong to any rule and can occur anywhere,
-     such as comments. *)
+  (* rules names for constructs that can occur anywhere independently from
+     the grammar, such as comments. Other extras such as string literals
+     and patterns were removed because we don't need them. *)
 }
 [@@deriving show {with_path = false}]
 
diff --git a/src/gen/lib/CST_grammar_conv.ml b/src/gen/lib/CST_grammar_conv.ml
index 22f162e..6e06a8b 100644
--- a/src/gen/lib/CST_grammar_conv.ml
+++ b/src/gen/lib/CST_grammar_conv.ml
@@ -5,10 +5,11 @@
 open CST_grammar
 
 (*
-   Traverse the grammar starting from the entrypoint, return the set of
-   visited rule names.
+   Traverse the grammar starting from all the entrypoints, which
+   are the main entrypoint (first rule in the tree-sitter grammar)
+   and the extras. Return the set of visited rule names.
 *)
-let detect_used ~entrypoint rules =
+let detect_used ~entrypoints rules =
   let rule_tbl = Hashtbl.create 100 in
   List.iter (fun (name, x) -> Hashtbl.add rule_tbl name x) rules;
   let get_rule name =
@@ -48,7 +49,7 @@ let detect_used ~entrypoint rules =
     | None -> ()
     | Some x -> scan x
   in
-  visit entrypoint;
+  List.iter visit entrypoints;
   was_visited
 
 let name_of_body opt_rule_name body =
@@ -241,14 +242,16 @@ let tsort_rules rules =
     ) group
   ) sorted
 
+(*
+   Extras can be rule names, strings or patterns.
+   Here we only keep rule names. We need them to identify tree nodes that
+   are extras and should be handled independently from the grammar.
+*)
 let filter_extras bodies =
   List.filter_map (fun (x : Tree_sitter_t.rule_body) ->
     match x with
     | SYMBOL name -> Some name
-    | STRING name ->
-        (* Results in tree-sitter parse error at the moment.
-           Presumably not super useful. *)
-        Some name
+    | STRING _ -> None
     | _ -> None
   ) bodies
 
@@ -259,7 +262,8 @@ let of_tree_sitter (x : Tree_sitter_t.grammar) : t =
     | (name, _) :: _ -> name
     | _ -> "program"
   in
-  let is_used = detect_used ~entrypoint x.rules in
+  let extras = filter_extras x.extras in
+  let is_used = detect_used ~entrypoints:(entrypoint :: extras) x.rules in
   let grammar_rules = translate_rules x.rules in
   let all_rules =
     make_external_rules x.externals @ grammar_rules
@@ -270,12 +274,11 @@ let of_tree_sitter (x : Tree_sitter_t.grammar) : t =
         body;
         is_rec = true; (* set correctly by tsort below *)
         is_inlined_rule = is_inlined_rule;
-        is_inlined_type = false
+        is_inlined_type = false;
       }
     )
   in
   let sorted_rules = tsort_rules all_rules in
-  let extras = filter_extras x.extras in
   {
     name = x.name;
     entrypoint;
diff --git a/src/gen/lib/Codegen.ml b/src/gen/lib/Codegen.ml
index 700625b..349a38a 100644
--- a/src/gen/lib/Codegen.ml
+++ b/src/gen/lib/Codegen.ml
@@ -50,12 +50,14 @@ let () =
     ~parse_source_file:%s.parse_source_file
     ~parse_input_tree:%s.parse_input_tree
     ~dump_tree:%s.dump_tree
+    ~dump_extras:%s.dump_extras
 "
     lib_module_name
     lang
     parse_module_name
     parse_module_name
     boilerplate_module_name
+    boilerplate_module_name
 
 let ocaml ?out_dir ~lang grammar =
   let cst_module_name = "CST" in
diff --git a/src/gen/lib/Codegen_CST.ml b/src/gen/lib/Codegen_CST.ml
index b64f2ba..950c720 100644
--- a/src/gen/lib/Codegen_CST.ml
+++ b/src/gen/lib/Codegen_CST.ml
@@ -71,10 +71,20 @@ module Fmt = struct
   let type_app param type_name =
     label param (atom type_name)
 
-  let product l =
+  (* About the need for parentheses:
+     - For polymorphic variants, the presence of parentheses changes nothing,
+       a standalone tuple is always created.
+     - For classic variants though, the presence parentheses forces the
+       creation of a tuple which is detachable from the variant constructor.
+       Without parentheses though, we save the allocation of a block. *)
+  let product ~paren l =
     match l with
     | [x] -> x
-    | l -> E.List (("(", "*", ")", Style.left_sep_paren_list), l)
+    | l ->
+        if paren then
+          E.List (("(", "*", ")", Style.left_sep_paren_list), l)
+        else
+          E.List (("", "*", "", Style.left_sep_paren_list), l)
 
   let classic_variant l =
     let cases =
@@ -110,7 +120,7 @@ module Fmt = struct
   let top_sequence l =
     E.List (("", "", "", Style.vert_seq), l)
 
-  let typedef pos (name, inlined, rhs) =
+  let typedef pos (name, inlined, opt_rhs) =
     let is_first = (pos = 0) in
     let type_ =
       if is_first then
@@ -122,7 +132,11 @@ module Fmt = struct
       if inlined then " (* inlined *)"
       else ""
     in
-    let code = def (sprintf "%s %s%s =" type_ name comment) rhs in
+    let code =
+      match opt_rhs with
+      | Some rhs -> def (sprintf "%s %s%s =" type_ name comment) rhs
+      | None -> atom (sprintf "%s %s%s" type_ name comment)
+    in
     if is_first then code
     else
       top_sequence [
@@ -206,7 +220,7 @@ let rec format_body ?def_name body : E.t =
   | Optional body ->
       Fmt.type_app (format_body body) "option"
   | Seq body_list ->
-      Fmt.product (format_seq body_list)
+      Fmt.product ~paren:true (format_seq body_list)
 
 and format_choice l =
   List.map (fun (name, body) ->
@@ -219,7 +233,29 @@ and format_seq l =
 let format_rule (rule : rule) =
   (trans rule.name,
    rule.is_inlined_type,
-   format_body ~def_name:rule.name rule.body)
+   Some (format_body ~def_name:rule.name rule.body))
+
+let format_extra_def extras =
+  let rhs =
+    match extras with
+    | [] -> None
+    | extras ->
+        let cases =
+          extras
+          |> List.map (fun rule_name ->
+            let constructor =
+              Codegen_util.translate_ident_uppercase rule_name in
+            (constructor, Some (Fmt.product ~paren:false [
+               format_body (Symbol "Loc.t");
+               format_body (Symbol rule_name);
+             ]))
+          )
+          |> Fmt.poly_variant
+        in
+        Some cases
+  in
+  [[("extra", false, rhs)];
+   [("extras", false, Some (Fmt.atom "extra list"))]]
 
 (*
    1. Identify names that are used at most once, becoming candidates
@@ -242,7 +278,7 @@ let format_types grammar =
       Fmt.recursive_typedefs x;
       Fmt.atom ""
     ]
-  ) semi_formatted_defs
+  ) (semi_formatted_defs @ format_extra_def grammar.extras)
   |> Fmt.top_sequence
 
 let generate grammar =
diff --git a/src/gen/lib/Codegen_boilerplate.ml b/src/gen/lib/Codegen_boilerplate.ml
index bf5186e..da4bd88 100644
--- a/src/gen/lib/Codegen_boilerplate.ml
+++ b/src/gen/lib/Codegen_boilerplate.ml
@@ -173,7 +173,7 @@ let gen_rule_mapper_binding ~cst_module_name (rule : rule) =
     Block (gen_mapper_body_multi env);
   ]
 
-let gen ~cst_module_name grammar =
+let gen ~cst_module_name ~is_extra grammar =
   List.filter_map (fun rule_group ->
     let is_rec =
       match rule_group with
@@ -182,7 +182,7 @@ let gen ~cst_module_name grammar =
     in
     let bindings =
       List.filter_map (fun rule ->
-        if rule.is_inlined_type then
+        if rule.is_inlined_type && not (is_extra rule.name) then
           None
         else
           Some (gen_rule_mapper_binding ~cst_module_name rule)
@@ -204,14 +204,63 @@ let generate_dumper grammar =
 
 let dump_tree root =
   map_%s () root
-  |> Tree_sitter_run.Raw_tree.to_string
-  |> print_string
+  |> Tree_sitter_run.Raw_tree.to_channel stdout
 "
     (trans grammar.entrypoint)
 
+let generate_map_extra grammar =
+  let cases =
+    grammar.extras
+    |> List.map (fun rule_name ->
+      let constructor =
+        Codegen_util.translate_ident_uppercase rule_name in
+      sprintf "  | `%s (_loc, x) -> (%S, %S, map_%s env x)\n"
+        constructor rule_name (trans rule_name) (trans rule_name)
+    )
+    |> String.concat ""
+  in
+  sprintf "\
+
+let map_extra (env : env) (x : CST.extra) =
+  match x with
+%s"
+    cases
+
+let generate_extra_dumper grammar =
+  match grammar.extras with
+  | [] ->
+      "\
+let dump_extras (extras : CST.extras) = ()
+"
+  | _ ->
+      sprintf "\
+%s
+let dump_extras (extras : CST.extras) =
+  List.iter (fun extra ->
+    let ts_rule_name, ocaml_type_name, raw_tree = map_extra () extra in
+    let details =
+      if ocaml_type_name <> ts_rule_name then
+        Printf.sprintf \" (OCaml type '%%s')\" ocaml_type_name
+      else
+        \"\"
+    in
+    Printf.printf \"%%s%%s:\\n\" ts_rule_name details;
+    Tree_sitter_run.Raw_tree.to_channel stdout raw_tree
+  ) extras
+"
+        (generate_map_extra grammar)
+
+let make_is_extra grammar =
+  let tbl = Hashtbl.create 100 in
+  List.iter (fun rule_name -> Hashtbl.replace tbl rule_name ()) grammar.extras;
+  fun rule_name ->
+    Hashtbl.mem tbl rule_name
+
 let generate ~cst_module_name grammar =
   let inline_grammar = Nice_typedefs.rearrange_rules grammar in
-  let tree = gen ~cst_module_name inline_grammar in
+  let is_extra = make_is_extra grammar in
+  let tree = gen ~cst_module_name ~is_extra inline_grammar in
   make_header grammar
   ^ Indent.to_string tree
   ^ generate_dumper grammar
+  ^ generate_extra_dumper grammar
diff --git a/src/gen/lib/Codegen_parse.ml b/src/gen/lib/Codegen_parse.ml
index 6e29c5e..bdd00f8 100644
--- a/src/gen/lib/Codegen_parse.ml
+++ b/src/gen/lib/Codegen_parse.ml
@@ -32,13 +32,13 @@ let mli_contents grammar : string =
 *)
 val string :
   ?src_file:string -> string ->
-  CST.%s Tree_sitter_run.Parsing_result.t
+  (CST.%s, CST.extra) Tree_sitter_run.Parsing_result.t
 
 (** Parse a %s program from a file into a typed OCaml CST.
     See the [string] function above for details. *)
 val file :
   string ->
-  CST.%s Tree_sitter_run.Parsing_result.t
+  (CST.%s, CST.extra) Tree_sitter_run.Parsing_result.t
 
 (** Whether to print debugging information. Default: false. *)
 val debug : bool ref
@@ -56,7 +56,7 @@ val parse_source_file : string -> Tree_sitter_run.Tree_sitter_parsing.t
 (** Parse a tree-sitter CST into an OCaml typed CST. *)
 val parse_input_tree :
   Tree_sitter_run.Tree_sitter_parsing.t ->
-  CST.%s Tree_sitter_run.Parsing_result.t
+  (CST.%s, CST.extra) Tree_sitter_run.Parsing_result.t
 |}
     lang
     lang (trans root_type)
@@ -385,24 +385,92 @@ let gen_translators ~cst_module_name grammar =
   |> Codegen_util.interleave [Line ""]
   |> List.flatten
 
+let trans_tree =
+  {|(*
+   Costly operation that translates a whole tree or subtree.
+
+   The first pass translates it into a generic tree structure suitable
+   to guess which node corresponds to each grammar rule.
+   The second pass is a translation into a typed tree where each grammar
+   node has its own type.
+
+   This function is called:
+   - once on the root of the program after removing extras
+     (comments and other nodes that occur anywhere independently from
+     the grammar);
+   - once of each extra node, resulting in its own independent tree of type
+     'extra'.
+*)
+let translate_tree src node trans_x =
+  let matched_tree = Run.match_tree children_regexps src node in
+  Option.map trans_x matched_tree
+|}
+
+let gen_trans_extras grammar =
+  let body =
+    match grammar.extras with
+    | [] -> [ Line "None" ]
+    | extras ->
+        let cases =
+          (extras
+           |> List.map (fun name ->
+             Inline [
+               Line (sprintf "| %S ->" name);
+               Block [
+                 Block [
+                   Line (sprintf
+                           "(match translate_tree src node trans_%s with"
+                           (trans name));
+                   Line "| None -> None";
+                   Line (sprintf "| Some x -> Some (`%s (Run.get_loc node, x)))"
+                           (Codegen_util.translate_ident_uppercase name));
+                 ];
+               ]
+             ]
+           ))
+          @ [ Line "| _ -> None" ]
+        in
+        [
+          Line "match node.type_ with";
+          Inline cases;
+        ]
+  in
+  [
+    Line "let translate_extra src (node : Tree_sitter_output_t.node) \
+          : CST.extra option =";
+    Block body
+  ]
+
 let gen ~cst_module_name grammar =
   let regexps = gen_regexps grammar in
   let translators = gen_translators ~cst_module_name grammar in
+  let trans_extras = gen_trans_extras grammar in
   [
     Inline regexps;
     Line "";
     Inline translators;
+    Line "";
+    Line trans_tree;
+    Line "";
+    Inline trans_extras;
   ]
 
 let ml_trailer grammar = sprintf {|
+let translate_root src root_node =
+  translate_tree src root_node trans_%s
+
 let parse_input_tree input_tree =
   let orig_root_node = Tree_sitter_parsing.root input_tree in
   let src = Tree_sitter_parsing.src input_tree in
   let errors = Run.extract_errors src orig_root_node in
-  let root_node = Run.remove_extras ~extras orig_root_node in
-  let matched_tree = Run.match_tree children_regexps src root_node in
-  let opt_program = Option.map trans_%s matched_tree in
-  Parsing_result.create src opt_program errors
+  let opt_program, extras =
+     Run.translate
+       ~extras
+       ~translate_root:(translate_root src)
+       ~translate_extra:(translate_extra src)
+       orig_root_node
+  in
+  Parsing_result.create src opt_program extras errors
 
 let string ?src_file contents =
   let input_tree = parse_source_string ?src_file contents in
diff --git a/src/gen/lib/Codegen_util.ml b/src/gen/lib/Codegen_util.ml
index 329fef4..2c6cbef 100644
--- a/src/gen/lib/Codegen_util.ml
+++ b/src/gen/lib/Codegen_util.ml
@@ -10,6 +10,22 @@ let translate_ident =
   in
   fun ident -> Protect_ident.add_translation map ident
 
+(* Capitalize an identifier to be used in OCaml code for variants and such.
+   It checks that the argument starts with an ASCII letter. *)
+let translate_ident_uppercase s =
+  let res = String.capitalize_ascii s |> translate_ident in
+  if String.length res > 0 &&
+     match res.[0] with
+     | 'A'..'Z' -> true
+     | _ -> false
+  then
+    res
+  else
+    invalid_arg (
+      sprintf "translate_ident_uppercase: cannot capitalize identifier %S"
+        s
+    )
+
 let interleave sep l =
   let rec loop = function
     | [] -> []
diff --git a/src/gen/lib/Protect_ident.ml b/src/gen/lib/Protect_ident.ml
index b403716..665f22a 100644
--- a/src/gen/lib/Protect_ident.ml
+++ b/src/gen/lib/Protect_ident.ml
@@ -77,7 +77,13 @@ let ocaml_builtin_types = [
   "option";
 ]
 
-let ocaml_reserved = ocaml_keywords @ ocaml_builtin_types
+let reserved_type_names = [
+  "extra";
+  "extras";
+]
+
+let ocaml_reserved =
+  ocaml_keywords @ ocaml_builtin_types @ reserved_type_names
 
 (*
    Map from input identifier to output identifier and vice-versa.
diff --git a/src/gen/lib/Protect_ident.mli b/src/gen/lib/Protect_ident.mli
index e786e67..c3a2d1e 100644
--- a/src/gen/lib/Protect_ident.mli
+++ b/src/gen/lib/Protect_ident.mli
@@ -41,5 +41,8 @@ val ocaml_keywords : string list
 (* Lowercase identifiers that are built-in type names in OCaml. *)
 val ocaml_builtin_types : string list
 
-(* Union of ocaml_keywords and ocaml_builtin_types *)
+(* Type names reserved for use by the code generator. *)
+val reserved_type_names : string list
+
+(* Union of ocaml_keywords, ocaml_builtin_types, reserved_type_names *)
 val ocaml_reserved : string list
diff --git a/src/run/lib/Main.ml b/src/run/lib/Main.ml
index 5b74f0a..f8928d6 100644
--- a/src/run/lib/Main.ml
+++ b/src/run/lib/Main.ml
@@ -182,6 +182,7 @@ let parse_and_dump
     ~parse_source_file
     ~parse_input_tree
     ~dump_tree
+    ~dump_extras
     conf =
   let input_tree = load_input_tree ~parse_source_file conf in
   if conf.output_json then (
@@ -197,6 +198,8 @@ let parse_and_dump
         printf "---\n";
         printf "Recovered typed CST:\n%!";
         dump_tree matched_tree;
+        printf "Extras:\n%!";
+        dump_extras res.extras;
         true
     | None ->
         eprintf "Error: \
@@ -238,7 +241,7 @@ success: %.2f%%
   in
   exit_code
 
-let run ~lang ~parse_source_file ~parse_input_tree ~dump_tree =
+let run ~lang ~parse_source_file ~parse_input_tree ~dump_tree ~dump_extras =
   let conf = parse_command_line ~lang in
   safe_run (fun () ->
     let suggested_exit_code =
@@ -246,6 +249,7 @@ let run ~lang ~parse_source_file ~parse_input_tree ~dump_tree =
         ~parse_source_file
         ~parse_input_tree
         ~dump_tree
+        ~dump_extras
         conf
     in
     exit suggested_exit_code
diff --git a/src/run/lib/Main.mli b/src/run/lib/Main.mli
index 5dd73b1..72b4c41 100644
--- a/src/run/lib/Main.mli
+++ b/src/run/lib/Main.mli
@@ -5,5 +5,7 @@
 val run :
   lang:string ->
   parse_source_file:(string -> Tree_sitter_parsing.t) ->
-  parse_input_tree:(Tree_sitter_parsing.t -> 'a Parsing_result.t) ->
-  dump_tree:('a -> unit) -> unit
+  parse_input_tree:
+    (Tree_sitter_parsing.t -> ('prog, 'extra) Parsing_result.t) ->
+  dump_tree:('prog -> unit) ->
+  dump_extras:('extra list -> unit) -> unit
diff --git a/src/run/lib/Parsing_result.ml b/src/run/lib/Parsing_result.ml
index 0138664..3929381 100644
--- a/src/run/lib/Parsing_result.ml
+++ b/src/run/lib/Parsing_result.ml
@@ -10,8 +10,9 @@ type stat = {
   error_count: int;
 }
 
-type 'a t = {
+type ('a, 'b) t = {
   program: 'a option;
+  extras: 'b list;
   errors: Tree_sitter_error.t list;
   stat: stat;
 }
@@ -40,10 +41,11 @@ let create_stat src errors =
     error_count;
   }
 
-let create src program errors =
+let create src program extras errors =
   let stat = create_stat src errors in
   {
     program;
+    extras;
     errors;
     stat;
   }
diff --git a/src/run/lib/Parsing_result.mli b/src/run/lib/Parsing_result.mli
index e64a13d..26f9e5a 100644
--- a/src/run/lib/Parsing_result.mli
+++ b/src/run/lib/Parsing_result.mli
@@ -8,12 +8,24 @@ type stat = {
   error_count: int;
 }
 
-type 'a t = {
-  program: 'a option;
+(*
+   There's one type for the program which is the grammar's entrypoint,
+   and one type for all the extras which are independent entrypoints.
+   For example:
+
+     type program = ...
+     type extra =
+     | Comment of ...
+     | Heredoc of ...
+*)
+type ('program, 'extra) t = {
+  program: 'program option;
+  extras: 'extra list;
   errors: Tree_sitter_error.t list;
   stat: stat;
 }
 
-val create : Src_file.t -> 'a option -> Tree_sitter_error.t list -> 'a t
+val create :
+  Src_file.t -> 'a option -> 'b list -> Tree_sitter_error.t list -> ('a, 'b) t
 
 val export_stat : out_file:string -> stat -> unit
diff --git a/src/run/lib/Raw_tree.ml b/src/run/lib/Raw_tree.ml
index 755a09c..3eea01c 100644
--- a/src/run/lib/Raw_tree.ml
+++ b/src/run/lib/Raw_tree.ml
@@ -64,3 +64,7 @@ let format ?(format_any = fun _ -> [Line "??"]) x =
 let to_string ?format_any x =
   format ?format_any x
   |> Tree_sitter_gen.Indent.to_string
+
+let to_channel ?format_any oc x =
+  to_string ?format_any x
+  |> output_string oc
diff --git a/src/run/lib/Raw_tree.mli b/src/run/lib/Raw_tree.mli
index baac262..ac8b692 100644
--- a/src/run/lib/Raw_tree.mli
+++ b/src/run/lib/Raw_tree.mli
@@ -49,3 +49,8 @@ val format :
 val to_string :
   ?format_any: ('a -> Tree_sitter_gen.Indent.t) ->
   'a t -> string
+
+val to_channel :
+  ?format_any: ('a -> Tree_sitter_gen.Indent.t) ->
+  out_channel ->
+  'a t -> unit
diff --git a/src/run/lib/Run.ml b/src/run/lib/Run.ml
index d780a6a..158f7c1 100644
--- a/src/run/lib/Run.ml
+++ b/src/run/lib/Run.ml
@@ -216,39 +216,81 @@ let extract_errors src root_node =
             | Error -> "???" (* should not happen *)))
   )
 
-let rec filter_nodes keep nodes =
-  List.filter_map (filter_node keep) nodes
-
-and filter_node keep node =
-  if keep node then
-    Some { node with children = Option.map (filter_nodes keep) node.children }
+(* Remove extras from the tree, leaving only nodes matching the entrypoint
+   rule. *)
+let rec remove_extras_from_opt_nodes ~keep_node opt_nodes =
+  match opt_nodes with
+  | None -> None
+  | Some nodes ->
+      Some (List.filter_map (remove_extras_from_node ~keep_node) nodes)
+
+and remove_extras_from_node ~keep_node node =
+  if keep_node node then
+    Some {
+      node with
+      children = remove_extras_from_opt_nodes ~keep_node node.children
+    }
   else
     None
 
-let make_keep ~blacklist =
-  let tbl = Hashtbl.create 100 in
-  List.iter (fun s -> Hashtbl.replace tbl s ()) blacklist;
-  let keep node =
+(*
+   Produce fast functions for identifying whether a node is an extra
+   and whether a node should be removed to allow matching with the structure
+   of the grammar. This involves removing error nodes and other extra nodes
+   tree-sitter may decide to insert.
+*)
+let make_filters ~extras =
+  let extra_tbl = Hashtbl.create 100 in
+  List.iter (fun s -> Hashtbl.replace extra_tbl s ()) extras;
+  let is_extra node = Hashtbl.mem extra_tbl node.type_ in
+  let keep_node node =
     not (node.kind = Error)
-    && not (Hashtbl.mem tbl node.type_)
+    && not (is_extra node)
   in
-  keep
+  is_extra, keep_node
 
 (*
-   Remove error nodes, missing nodes, and extra nodes.
-
-   Error nodes indicate unexpected input and can be removed from the tree
-   while respecting the grammar (because the error bubbles up until
-   it's an optional position such as in a repeat()).
-
-   Missing nodes are suggested nodes that don't exist in the input.
-   Missing nodes are left in place so as to keep the tree well-formed
-   but they're reported as errors.
-
-   Extra nodes are nodes that can appear anywhere in the tree such as comments.
+   Remove error nodes and extra nodes, which are nodes that can appear
+   anywhere in the tree.
 *)
-let remove_extras ~extras =
-  let keep = make_keep ~blacklist:extras in
-  fun root_node ->
-    { root_node with
-      children = Option.map (filter_nodes keep) root_node.children }
+let remove_extras ~keep_node root_node =
+  { root_node with
+    children = remove_extras_from_opt_nodes ~keep_node root_node.children }
+
+(* Translate extras and accumulate them into a list. *)
+let scan_node_for_extras ~is_extra ~keep_node ~translate_extra node =
+  let rec scan_nodes_for_extras acc opt_nodes =
+    match opt_nodes with
+    | None -> acc
+    | Some nodes ->
+        List.fold_left (scan_node_for_extras ~translate_extra) acc nodes
+
+  and scan_node_for_extras ~translate_extra acc node =
+    let acc =
+      if is_extra node then
+        (* We must remove the other extras from the child subtrees.
+           This is inefficient if we have large, nested extras due to the
+           subtree being rewritten each time we encounter an extra node.
+           In practice, it should be ok. *)
+        match remove_extras ~keep_node node
+              |> translate_extra with
+        | None -> acc
+        | Some x -> x :: acc
+      else
+        acc
+    in
+    (* An extra can contain other extras, so we must recurse into the children
+       regardless of whether the current node is an extra. *)
+    scan_nodes_for_extras acc node.children
+  in
+  scan_node_for_extras ~translate_extra [] node |> List.rev
+
+let translate ~extras ~translate_root ~translate_extra orig_root_node =
+  let is_extra, keep_node = make_filters ~extras in
+  let pure_root_node = remove_extras ~keep_node orig_root_node in
+  let root = translate_root pure_root_node in
+  let extras =
+    scan_node_for_extras
+      ~is_extra ~keep_node ~translate_extra orig_root_node
+  in
+  (root, extras)
diff --git a/src/run/lib/Run.mli b/src/run/lib/Run.mli
index e971a7f..daa726e 100644
--- a/src/run/lib/Run.mli
+++ b/src/run/lib/Run.mli
@@ -15,6 +15,8 @@ type exp =
 
 type capture = matcher_token Matcher.capture
 
+val get_loc : Tree_sitter_bindings.Tree_sitter_output_t.node -> Loc.t
+
 val match_tree :
   (string * exp option) list ->
   Src_file.t ->
@@ -38,6 +40,14 @@ val extract_errors :
   Tree_sitter_error.t list
 
 val remove_extras :
-  extras:string list ->
+  keep_node:(Tree_sitter_bindings.Tree_sitter_output_t.node -> bool) ->
   Tree_sitter_bindings.Tree_sitter_output_t.node ->
   Tree_sitter_bindings.Tree_sitter_output_t.node
+
+val translate :
+  extras:string list ->
+  translate_root:(Tree_sitter_bindings.Tree_sitter_output_t.node -> 'a) ->
+  translate_extra:(Tree_sitter_bindings.Tree_sitter_output_t.node ->
+                   'b option) ->
+  Tree_sitter_bindings.Tree_sitter_output_t.node ->
+  'a * 'b list
diff --git a/src/run/lib/Sample.ml b/src/run/lib/Sample.ml
index c6af8e7..3c5e678 100644
--- a/src/run/lib/Sample.ml
+++ b/src/run/lib/Sample.ml
@@ -25,6 +25,10 @@ module CST = struct
   ]
   type statement = (expression * (Loc.t * string (* ";" *)))
   type program = statement list (* zero or more *)
+  type comment = (Loc.t * string)
+  type extra = [
+    | `Comment of comment
+  ]
 end
 
 module Parse = struct
@@ -81,6 +85,10 @@ module Parse = struct
         |];
       )
     );
+    (
+      "comment",
+      None
+    );
   ]
 
   (* generated *)
@@ -130,21 +138,50 @@ module Parse = struct
         )
     | _ -> assert false
 
-  (* generated *)
+  (* generated - entrypoint *)
   let trans_program ((kind, body) : mt) =
     match body with
     | Children v ->
         Run.repeat (fun v -> trans_statement (Run.matcher_token v)) v
     | _ -> assert false
 
+  (* generated - extra *)
+  let trans_comment ((kind, body) : mt) : CST.variable =
+    match body with
+    | Leaf v -> v
+    | _ -> assert false
+
+  let translate_tree src node trans_x =
+    let matched_tree = Run.match_tree children_regexps src node in
+    Option.map trans_x matched_tree
+
+  (* generated *)
+  let translate_extra src
+      (node : Tree_sitter_bindings.Tree_sitter_output_t.node)
+    : CST.extra option =
+    match node.type_ with
+    | "comment" ->
+        (match translate_tree src node trans_comment with
+         | None -> None
+         | Some x -> Some (`Comment x))
+    | _ -> None
+
+  (* generated *)
+  let translate_root src root_node =
+    translate_tree src root_node trans_program
+
   let parse_input_tree input_tree =
     let orig_root_node = Tree_sitter_parsing.root input_tree in
     let src = Tree_sitter_parsing.src input_tree in
     let errors = Run.extract_errors src orig_root_node in
-    let root_node = Run.remove_extras ~extras orig_root_node in
-    let matched_tree = Run.match_tree children_regexps src root_node in
-    let program = Option.map trans_program matched_tree in
-    Parsing_result.create src program errors
+    let opt_program, extras =
+      Run.translate
+        ~extras
+        ~translate_root:(translate_root src)
+        ~translate_extra:(translate_extra src)
+        orig_root_node
+    in
+    Parsing_result.create src opt_program extras errors
 
   let string ?src_file contents =
     let input_tree = parse_source_string ?src_file contents in
diff --git a/test/extras/check-test-output b/test/extras/check-test-output
new file mode 100755
index 0000000..9b18100
--- /dev/null
+++ b/test/extras/check-test-output
@@ -0,0 +1,10 @@
+#! /usr/bin/env bash
+#
+# Check specific bits of output
+#
+set -eu
+
+if ! grep 'complex_extra:' test.out/ok/complex_extra.cst > /dev/null; then
+  echo "Extra 'complex_extra' is missing from test.out/ok/complex_extra.cst"
+  exit 1
+fi
diff --git a/test/extras/grammar.js b/test/extras/grammar.js
index bcf9cf8..fe9e501 100644
--- a/test/extras/grammar.js
+++ b/test/extras/grammar.js
@@ -5,10 +5,18 @@ module.exports = grammar({
       $.number
     ),
     number: $ => /[0-9]+/,
-    comment: $ => /#.*/
+    letter: $ => /[a-zA-Z]/,
+    comment: $ => /#.*/,
+    complex_extra: $ => seq('(', repeat(choice($.number, $.letter)), ')'),
+    extra: $ => 'extra',    // a rule named 'extra' (!)
+    extras: $ => 'extras',  // a rule named 'extras' (!)
   },
   extras: $ => [
-    $.comment,
-    /\s|\\\n/
+    $.comment,       // appears in the CST
+    $.complex_extra, // same
+    $.extra,
+    $.extras,
+    "IGNOREME",  // doesn't appear in the CST because it doesn't have a name
+    /\s|\\\n/    // same
   ]
 })
diff --git a/test/extras/test/ok/a_rule_named_extra b/test/extras/test/ok/a_rule_named_extra
new file mode 100644
index 0000000..072889c
--- /dev/null
+++ b/test/extras/test/ok/a_rule_named_extra
@@ -0,0 +1,5 @@
+extra
+extras
+42
+# something
+17
diff --git a/test/extras/test/ok/complex_extra b/test/extras/test/ok/complex_extra
new file mode 100644
index 0000000..5ba1689
--- /dev/null
+++ b/test/extras/test/ok/complex_extra
@@ -0,0 +1,3 @@
+# first comment
+1 2 (3 A # extra in the middle of another extra!
+ 4) 5
diff --git a/test/extras/test/ok/ex2 b/test/extras/test/ok/ex2
new file mode 100644
index 0000000..cf76a52
--- /dev/null
+++ b/test/extras/test/ok/ex2
@@ -0,0 +1,2 @@
+1 2 # hello
+3 IGNOREME 4
diff --git a/test/implicit-extra/check-test-output b/test/implicit-extra/check-test-output
index ceee77b..a9275ed 100755
--- a/test/implicit-extra/check-test-output
+++ b/test/implicit-extra/check-test-output
@@ -14,6 +14,5 @@ for actual in $targets; do
   diff -u "$expected" "$actual"
 done
 
-echo "Checking parse results."
-
-diff example-parse.expected <(./parse test/ok/example)
+echo "Checking parse results: example-parse.expected"
+diff -u example-parse.expected <(./parse test/ok/example)
diff --git a/test/implicit-extra/example-parse.expected b/test/implicit-extra/example-parse.expected
index 5285f7e..9d71a1c 100644
--- a/test/implicit-extra/example-parse.expected
+++ b/test/implicit-extra/example-parse.expected
@@ -39,6 +39,23 @@ Recovered typed CST:
     )
   ]
 )
+Extras:
+period:
+"."
+comma:
+","
+period:
+"."
+comma:
+","
+comma:
+","
+period:
+"."
+period:
+"."
+comma:
+","
 total lines: 2
 error lines: 0
 error count: 0