@@ -50,17 +50,51 @@ pub fn decode(llm_message: &str) -> (String, Option<String>) {
5050 // Detach the sentinel footnote reference.
5151 children. next ( ) . unwrap ( ) . detach ( ) ;
5252
53- for child in children {
54- match & child. data . borrow ( ) . value {
53+ for block in children {
54+ match & block. data . borrow ( ) . value {
55+ NodeValue :: Paragraph => {
56+ // Store our reconstructed markdown summary here, if it is found
57+ let mut buf: Option < String > = None ;
58+
59+ for child in block. children ( ) {
60+ // NB: We have to store this here due to more `comrak` quirks. Because `comrak`
61+ // uses an arena-based API with `RefCell`s, we cannot both mutably borrow its
62+ // inner data and also immutably generate a string from the outer container.
63+ // So, we generate the string ahead of time in case we need it.
64+ let child_text = comrak_to_string ( child) ;
65+
66+ match & mut child. data . borrow_mut ( ) . value {
67+ NodeValue :: Text ( s) if s. contains ( "[^summary]:" ) && buf. is_none ( ) => {
68+ let ( l, r) = s. split_once ( "[^summary]:" ) . unwrap ( ) ;
69+
70+ buf = Some ( r. trim_start ( ) . to_owned ( ) ) ;
71+ * s = l. trim_end ( ) . to_owned ( ) ;
72+ }
73+
74+ _ => {
75+ if let Some ( buf) = buf. as_mut ( ) {
76+ child. detach ( ) ;
77+ * buf += & child_text;
78+ buf. push ( ' ' ) ;
79+ }
80+ }
81+ }
82+ }
83+
84+ if let Some ( conclusion) = buf {
85+ return ( comrak_to_string ( root) , Some ( conclusion. trim ( ) . to_owned ( ) ) ) ;
86+ }
87+ }
88+
5589 NodeValue :: FootnoteDefinition ( def) if def. name == "summary" => ( ) ,
5690 _ => continue ,
5791 } ;
5892
59- if let Some ( first_child) = child . children ( ) . next ( ) {
93+ if let Some ( first_child) = block . children ( ) . next ( ) {
6094 if let NodeValue :: Paragraph = & first_child. data . borrow ( ) . value {
6195 // We detach the summary from the main text, so that it does not end up in the final
6296 // article output.
63- child . detach ( ) ;
97+ block . detach ( ) ;
6498 return ( comrak_to_string ( root) , Some ( comrak_to_string ( first_child) ) ) ;
6599 }
66100 }
@@ -1043,24 +1077,30 @@ quux";
10431077
10441078 #[ test]
10451079 fn test_mid_block_summary ( ) {
1080+ // We test a line with `[^summary]: ..` in the middle. This is not valid markdown but the
1081+ // LLM generates this sometimes anyway. There is nuance here, because we can *only* do this
1082+ // if a `Text` child node in a top-level `Paragraph` contains the string `[^summary]:`. To
1083+ // ensure the code doesn't break on that substring being contained elsewhere, we include
1084+ // the same string in the middle of a code block, as a test.
1085+
10461086 let input = "Dummy code block:
10471087
10481088<GeneratedCode>
10491089<Code>
1050- println!(\" [^summary]\" );
1090+ println!(\" [^summary]: dummy \" );
10511091</Code>
10521092<Language>Rust</Language>
10531093</GeneratedCode>
10541094
1055- Foo *bar* quux. [^summary]: Baz fred **thud** corge.\n \n " ;
1095+ Foo *bar* `[^summary]: allow this, it is in code quotes` quux. [^summary]: Baz fred **thud** corge.\n \n " ;
10561096
1057- let expected = "Dummy code block:
1097+ let expected = "Dummy code block:
10581098
10591099``` type:Generated,lang:Rust,path:,lines:0-0
1060- println!(\" [^summary]\" );
1100+ println!(\" [^summary]: dummy \" );
10611101```
10621102
1063- Foo *bar* quux." ;
1103+ Foo *bar* `[^summary]: allow this, it is in code quotes` quux." ;
10641104
10651105 let ( body, conclusion) = decode ( input) ;
10661106
0 commit comments