Skip to content

Extract code blocks by name #1326

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 28 commits into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
de30ff2
`odoc extract-code` prototype
panglesd Mar 4, 2025
bc22fa2
`odoc extract-code` cleanup and test
panglesd Mar 5, 2025
f0a7487
Complete `odoc extract-code`
panglesd Mar 5, 2025
324e602
Added changelog entry for #1326
panglesd Mar 5, 2025
ff1dc82
Make test more robust
panglesd Mar 5, 2025
ebeadf2
Test: 5.3 compatibility
panglesd Mar 5, 2025
ba9dfd4
Extract code: handle cmti files
panglesd Mar 5, 2025
a46ed82
Fix cmdliner compatibility
panglesd Mar 11, 2025
96a2661
Test compat with OCaml version
panglesd Mar 11, 2025
35b0bbb
OCaml 4.14 compatibility
panglesd Mar 11, 2025
3be6f1c
Parse code block tags
panglesd Mar 11, 2025
e9719f1
Fix unescaping of quoted strings
panglesd Mar 11, 2025
8d6e5e3
Add location for tag/binding
panglesd Mar 11, 2025
304e69f
Add location to binding key and value
panglesd Mar 11, 2025
9fb8df3
Add location to the whole tag block
panglesd Mar 11, 2025
fc1e773
Extract code: handle error
panglesd Mar 11, 2025
5a94728
Disable extract-code on OCaml < 4.10
panglesd Mar 11, 2025
4efe1b4
Fix typo
panglesd Mar 11, 2025
2ece420
Fix odoc_of_md wrt new tag type
panglesd Mar 11, 2025
64bbecf
Make ocamlformat ignore cppo file
panglesd Mar 11, 2025
9b2692f
Tag parsing: Unescape everything
panglesd Mar 12, 2025
e24f873
Warn on escaped character that does not need escaping
panglesd Mar 12, 2025
950bda3
Simplify code-block tag types and parser
jonludlam Mar 14, 2025
58dcacb
Parser: Use lexer for quoted strings in code block metadata
jonludlam Mar 14, 2025
9fff131
Parser: Allow \ddd escape sequence in code-block metadata
jonludlam Mar 14, 2025
8afa524
Parser: more tests
jonludlam Mar 14, 2025
9c9940b
Parser: Fixes following PR review
jonludlam Mar 17, 2025
b42d48a
Parser: Ensure parser can be called concurrently
jonludlam Mar 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

- Added support for (local) images in the latex backend (@Octachron, #1297)
- Added a `header` field to the json output (@panglesd, #1314)
- Added an `extract-code` subcommand to extract code blocks from mld files
(@panglesd, #1326)

### Changed

Expand Down
1 change: 1 addition & 0 deletions src/.ocamlformat-ignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ loader/lookup_def.ml
loader/lookup_def.mli
syntax_highlighter/syntax_highlighter.ml
model/*.cppo.ml
odoc/*.cppo.ml
html_support_files/*.ml
xref2/shape_tools.*
odoc/classify.cppo.ml
Expand Down
12 changes: 12 additions & 0 deletions src/loader/doc_attr.mli
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,15 @@ val conv_canonical_module : Odoc_model.Reference.path -> Paths.Path.Module.t
val conv_canonical_type : Odoc_model.Reference.path -> Paths.Path.Type.t option
val conv_canonical_module_type :
Odoc_model.Reference.path -> Paths.Path.ModuleType.t option

type payload = string * Location.t

type parsed_attribute =
[ `Text of payload (* Standalone comment. *)
| `Doc of payload (* Attached comment. *)
| `Stop of Location.t (* [(**/**)]. *)
| `Alert of string * payload option * Location.t
(* [`Alert (name, payload, loc)] is for [\[@@alert name "payload"\]] attributes. *)
]

val parse_attribute : Parsetree.attribute -> parsed_attribute option
2 changes: 2 additions & 0 deletions src/loader/odoc_loader.ml
Original file line number Diff line number Diff line change
Expand Up @@ -268,3 +268,5 @@ let read_cmi ~make_root ~parent ~filename ~warnings_tag =
wrap_errors ~filename (read_cmi ~make_root ~parent ~filename ~warnings_tag)

let read_location = Doc_attr.read_location

let parse_attribute = Doc_attr.parse_attribute
7 changes: 7 additions & 0 deletions src/loader/odoc_loader.mli
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,10 @@ val read_cmi :
(Lang.Compilation_unit.t, Error.t) result Error.with_warnings

val read_location : Location.t -> Location_.span

val wrap_errors :
filename:string ->
(unit -> 'a) ->
'a Odoc_model.Error.with_errors_and_warnings

val parse_attribute : Parsetree.attribute -> Doc_attr.parsed_attribute option
4 changes: 2 additions & 2 deletions src/markdown/doc_of_md.ml
Original file line number Diff line number Diff line change
Expand Up @@ -454,8 +454,8 @@ let code_block_to_nestable_block_element ~locator cb m (bs, warns) =
split_info_string_locs ~left_count ~right_count im
in
let env =
if env = "" then None
else Some (Loc.at (textloc_to_loc ~locator env_loc) env)
if env = "" then []
else [ `Tag (Loc.at (textloc_to_loc ~locator env_loc) env) ]
in
let lang = Loc.at (textloc_to_loc ~locator lang_loc) lang in
let metadata = Some { Ast.language = lang; tags = env } in
Expand Down
39 changes: 39 additions & 0 deletions src/odoc/bin/main.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1683,6 +1683,44 @@ module Classify = struct
are specified by the --library option."
end

module Extract_code = struct
let extract dst input line_directives names warnings_options =
Extract_code.extract ~dst ~input ~line_directives ~names ~warnings_options

let line_directives =
let doc = "Whether to include line directives in the output file" in
Arg.(value & flag & info ~doc [ "line-directives" ])

let names =
let doc =
"From which name(s) of code blocks to extract content. When no names are \
provided, extract all OCaml code blocks."
in
Arg.(value & opt_all string [] & info ~doc [ "name" ])

let input =
let doc = "Input $(i,.mld) file." in
Arg.(required & pos 0 (some file) None & info ~doc ~docv:"FILE" [])

let dst =
let doc = "Output file path." in
Arg.(
value
& opt (some string) None
& info ~docs ~docv:"PATH" ~doc [ "o"; "output" ])

let cmd =
Term.(
const handle_error
$ (const extract $ dst $ input $ line_directives $ names
$ warnings_options))

let info ~docs =
Cmd.info "extract-code" ~docs
~doc:
"Extract code blocks from mld files in order to be able to execute them"
end

let section_pipeline = "COMMANDS: Compilation pipeline"
let section_generators = "COMMANDS: Alternative generators"
let section_support = "COMMANDS: Scripting"
Expand Down Expand Up @@ -1737,6 +1775,7 @@ let () =
Css.(cmd, info ~docs:section_deprecated);
Depends.Odoc_html.(cmd, info ~docs:section_deprecated);
Classify.(cmd, info ~docs:section_pipeline);
Extract_code.(cmd, info ~docs:section_pipeline);
]
in
let main =
Expand Down
9 changes: 9 additions & 0 deletions src/odoc/dune
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,14 @@
%{workspace_root}
(run %{bin:cppo} -V OCAML:%{ocaml_version} %{x} -o %{targets}))))

(rule
(targets extract_code.ml)
(deps
(:x extract_code.cppo.ml))
(action
(chdir
%{workspace_root}
(run %{bin:cppo} -V OCAML:%{ocaml_version} %{x} -o %{targets}))))

(documentation
(package odoc))
244 changes: 244 additions & 0 deletions src/odoc/extract_code.cppo.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,244 @@
#if OCAML_VERSION >= (4,10,0)
open Odoc_utils
open Odoc_parser

let tags_included_in_names names tags =
List.exists
(function
| `Binding ({ Loc.value = "name"; _ }, { Loc.value = n; _ })
when List.exists (String.equal n) names ->
true
| _ -> false)
tags

let needs_extraction names meta =
let check_language () =
match meta with
| None -> true
| Some { Ast.language; _ } -> String.equal "ocaml" language.Loc.value
in
let check_name () =
match meta with
| Some { Ast.tags; _ } ->
tags_included_in_names names tags
| _ -> false
in
match names with [] -> check_language () | _ :: _ -> check_name ()

let print line_directives oc location value =
if line_directives then (
Printf.fprintf oc "#%d \"%s\"\n" location.Loc.start.line location.file;
Printf.fprintf oc "%s%s\n"
(String.v ~len:location.start.column (fun _ -> ' '))
value)
else Printf.fprintf oc "%s" value

let rec nestable_block_element line_directives oc names v =
match v.Loc.value with
| `Verbatim _ | `Modules _ | `Math_block _ | `Media _ | `Paragraph _ -> ()
| `Code_block { Ast.content = { value; location }; meta; _ }
when needs_extraction names meta ->
print line_directives oc location value
| `Code_block _ -> ()
| `List (_, _, l) ->
List.iter (List.iter (nestable_block_element line_directives oc names)) l
| `Table ((table, _), _) ->
List.iter
(List.iter (fun (x, _) ->
List.iter (nestable_block_element line_directives oc names) x))
table

let block_element line_directives oc names v =
match v.Loc.value with
| `Tag
( `Deprecated l
| `Param (_, l)
| `Raise (_, l)
| `Return l
| `See (_, _, l)
| `Before (_, l) ) ->
List.iter (nestable_block_element line_directives oc names) l
| `Tag
( `Author _ | `Since _ | `Version _ | `Canonical _ | `Inline | `Open
| `Children_order _ | `Toc_status _ | `Order_category _ | `Short_title _
| `Closed | `Hidden )
| `Heading _ ->
()
| #Ast.nestable_block_element as value ->
nestable_block_element line_directives oc names { v with value }

let pad_loc loc =
{ loc.Location.loc_start with pos_cnum = loc.loc_start.pos_cnum + 3 }

let iterator line_directives oc names =
let default_iterator = Tast_iterator.default_iterator in
let attribute _ attr =
match Odoc_loader.parse_attribute attr with
| None | Some (`Stop _ | `Alert _) -> ()
| Some (`Text (doc, loc) | `Doc (doc, loc)) ->
let ast_docs =
Odoc_parser.parse_comment ~location:(pad_loc loc) ~text:doc
in
let ast = Odoc_parser.ast ast_docs in
List.iter (block_element line_directives oc names) ast
in
let attributes sub attrs = List.iter (attribute sub) attrs in
(* For some reason, Tast_iterator.default_iterator does not recurse on
Tsig_attribute and on attributes... *)
let signature_item sub sig_ =
match sig_.Typedtree.sig_desc with
| Tsig_attribute attr -> attribute sub attr
| Tsig_include incl -> attributes sub incl.incl_attributes
| Tsig_open o -> attributes sub o.open_attributes
| _ -> default_iterator.signature_item sub sig_
in
let row_field sub rf =
attributes sub rf.Typedtree.rf_attributes;
default_iterator.row_field sub rf
in
let value_description sub vd =
attributes sub vd.Typedtree.val_attributes;
default_iterator.value_description sub vd
in
let label_declaration sub lbls =
List.iter (fun ld -> attributes sub ld.Typedtree.ld_attributes) lbls
in
let constructor_declaration sub cd =
(match cd.Typedtree.cd_args with
| Cstr_record lds -> label_declaration sub lds
| _ -> ());
attributes sub cd.cd_attributes
in
let type_kind sub tk =
(match tk with
| Typedtree.Ttype_record lbls -> label_declaration sub lbls
| Ttype_variant cstrs -> List.iter (constructor_declaration sub) cstrs
| _ -> ());
default_iterator.type_kind sub tk
in
let type_declaration sub decl =
attributes sub decl.Typedtree.typ_attributes;
default_iterator.type_declaration sub decl
in
let extension_constructor sub ext =
attributes sub ext.Typedtree.ext_attributes;
default_iterator.extension_constructor sub ext
in
let class_type_field sub ctf =
attributes sub ctf.Typedtree.ctf_attributes;
(match ctf.ctf_desc with
| Tctf_attribute attr -> attribute sub attr
| _ -> ());
default_iterator.class_type_field sub ctf
in
let class_type_declaration sub ctd =
attributes sub ctd.Typedtree.ci_attributes;
default_iterator.class_type_declaration sub ctd
in
let class_description sub cd =
attributes sub cd.Typedtree.ci_attributes;
default_iterator.class_description sub cd
in
(* let type_exception sub exc = *)
(* attributes sub ext.Typedtree.ext_attributes; *)
(* default_iterator.extension_constructor sub ext *)
(* in *)
let type_extension sub ext =
attributes sub ext.Typedtree.tyext_attributes;
default_iterator.type_extension sub ext
in
let module_type_declaration sub mtd =
attributes sub mtd.Typedtree.mtd_attributes;
default_iterator.module_type_declaration sub mtd
in
let module_declaration sub md =
attributes sub md.Typedtree.md_attributes;
default_iterator.module_declaration sub md
in
let module_expr sub me =
attributes sub me.Typedtree.mod_attributes;
default_iterator.module_expr sub me
in
let module_substitution sub ms =
attributes sub ms.Typedtree.ms_attributes;
default_iterator.module_substitution sub ms
in
(* let module_type_substitution sub mtd = *)
(* attributes sub mtd.Typedtree.mtd_attributes; *)
(* default_iterator.module_type_substitution sub ms *)
(* in *)
{
default_iterator with
row_field
(* ; attribute *)
(* ; attributes *);
value_description;
signature_item;
type_kind;
type_declaration;
extension_constructor;
type_extension;
class_type_field;
class_type_declaration;
class_description;
module_type_declaration;
module_declaration;
module_substitution;
module_expr;
}

let load_cmti line_directives oc names input ~warnings_options =
try
let res =
Odoc_loader.wrap_errors ~filename:input @@ fun () ->
let cmt_info = Cmt_format.read_cmt input in
match cmt_info.cmt_annots with
| Interface intf ->
let iterator = iterator line_directives oc names in
iterator.signature iterator intf;
Ok ()
| _ ->
Error
(`Msg (Format.sprintf "Provided file %s is not an interface" input))
in
Odoc_model.Error.handle_errors_and_warnings ~warnings_options res
|> Result.join
with exn ->
Error
(`Msg
(Format.sprintf
"Error while unmarshalling input file %s:\n\
%s\n\
Check that the input file is a valid cmti file"
input (Printexc.to_string exn)))

let load_mld line_directives oc names input =
let location =
{ Lexing.pos_fname = input; pos_lnum = 1; pos_bol = 0; pos_cnum = 0 }
in
let c = Io_utils.read_lines input |> String.concat ~sep:"\n" in
let parsed = parse_comment ~location ~text:c in
let ast = ast parsed in
List.iter (block_element line_directives oc names) ast;
Ok ()

let extract ~dst ~input ~names ~line_directives ~warnings_options =
let ( let* ) = Result.bind in
let* loader =
match input |> Fpath.v |> Fpath.get_ext with
| ".mld" -> Ok load_mld
| ".cmti" -> Ok (load_cmti ~warnings_options)
| _ -> Error (`Msg "Input must have either mld or cmti as extension")
in
match dst with
| None -> loader line_directives stdout names input
| Some dst ->
Io_utils.with_open_out dst @@ fun oc ->
loader line_directives oc names input

#else

let extract ~dst:_ ~input:_ ~names:_ ~line_directives:_ ~warnings_options:_ =
Error (`Msg "Extract-code is not available for OCaml < 4.10")

#endif
7 changes: 7 additions & 0 deletions src/odoc/extract_code.mli
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
val extract :
dst:string option ->
input:string ->
names:string list ->
line_directives:bool ->
warnings_options:Odoc_model.Error.warnings_options ->
(unit, [> `Msg of string ]) result
8 changes: 7 additions & 1 deletion src/parser/ast.ml
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,15 @@ type 'a row = 'a cell list
type 'a grid = 'a row list
type 'a abstract_table = 'a grid * alignment option list option

type code_block_tag =
[ `Tag of string with_location
| `Binding of string with_location * string with_location ]

type code_block_tags = code_block_tag list

type code_block_meta = {
language : string with_location;
tags : string with_location option;
tags : code_block_tags;
}

type media = Token.media
Expand Down
Loading