@@ -141,6 +141,7 @@ pub fn enrich_nodes_and_edges_with_lsp(
141141 std:: collections:: HashMap :: new ( ) ;
142142 let mut node_file_by_id: std:: collections:: HashMap < codegraph_core:: NodeId , String > =
143143 std:: collections:: HashMap :: new ( ) ;
144+ let mut files_with_nodes: std:: collections:: HashSet < String > = std:: collections:: HashSet :: new ( ) ;
144145
145146 for ( idx, node) in nodes. iter ( ) . enumerate ( ) {
146147 let file = node. location . file_path . clone ( ) ;
@@ -151,6 +152,7 @@ pub fn enrich_nodes_and_edges_with_lsp(
151152 nodes_by_file_line
152153 . entry ( ( abs. clone ( ) , line0) )
153154 . or_insert ( idx) ;
155+ files_with_nodes. insert ( abs) ;
154156 }
155157 }
156158 let line0 = node. location . line . saturating_sub ( 1 ) ;
@@ -159,21 +161,42 @@ pub fn enrich_nodes_and_edges_with_lsp(
159161 . entry ( ( file. clone ( ) , line0) )
160162 . or_insert ( idx) ;
161163 node_file_by_id. insert ( node. id , file) ;
164+ if let Some ( file) = node_file_by_id. get ( & node. id ) {
165+ files_with_nodes. insert ( file. clone ( ) ) ;
166+ }
162167 }
163168
169+ let def_edges_by_file = definition_edge_indices_by_file ( & project_root, nodes, edges) ;
170+
164171 let mut stats = LspEnrichmentStats :: default ( ) ;
165- let total_files = files. len ( ) . max ( 1 ) ;
172+ let mut files_to_process: Vec < PathBuf > = Vec :: new ( ) ;
173+ for file_path in files {
174+ let abs_path = absolute_file_path ( & project_root, file_path) ;
175+ let file_str = file_path. to_string_lossy ( ) . to_string ( ) ;
176+ let abs_file_str = abs_path. to_string_lossy ( ) . to_string ( ) ;
177+ if !files_with_nodes. contains ( & file_str)
178+ && !files_with_nodes. contains ( & abs_file_str)
179+ && !def_edges_by_file. contains_key ( & file_str)
180+ && !def_edges_by_file. contains_key ( & abs_file_str)
181+ {
182+ continue ;
183+ }
184+ files_to_process. push ( file_path. clone ( ) ) ;
185+ }
186+
187+ let total_files = files_to_process. len ( ) . max ( 1 ) ;
166188 let mut processed_files: usize = 0 ;
167189 let mut last_progress_log = Instant :: now ( ) ;
168190
169- for file_path in files {
191+ for file_path in & files_to_process {
170192 let abs_path = absolute_file_path ( & project_root, file_path) ;
171193 let content = std:: fs:: read_to_string ( & abs_path) ?;
172194 let file_str = file_path. to_string_lossy ( ) . to_string ( ) ;
173195 let uri = Url :: from_file_path ( & abs_path)
174196 . map_err ( |_| anyhow:: anyhow!( "failed to create file URI for {}" , abs_path. display( ) ) ) ?
175197 . to_string ( ) ;
176198 let abs_file_str = abs_path. to_string_lossy ( ) . to_string ( ) ;
199+ let pos_index = LspPositionIndex :: new ( & content) ;
177200
178201 proc. notify (
179202 "textDocument/didOpen" ,
@@ -216,55 +239,64 @@ pub fn enrich_nodes_and_edges_with_lsp(
216239 }
217240 }
218241
219- for edge in edges. iter_mut ( ) {
220- let Some ( from_file) = node_file_by_id. get ( & edge. from ) else {
221- continue ;
222- } ;
223- if * from_file != file_str && * from_file != abs_file_str {
224- continue ;
225- }
226- let Some ( span) = edge. span . as_ref ( ) else {
227- continue ;
228- } ;
229-
230- let pos = byte_offset_to_utf16_position ( & content, span. start_byte ) ;
231- let def = proc. request (
232- "textDocument/definition" ,
233- serde_json:: json!( {
234- "textDocument" : { "uri" : uri } ,
235- "position" : { "line" : pos. line, "character" : pos. character }
236- } ) ,
237- ) ?;
238-
239- let Some ( ( target_file, target_line0) ) = extract_first_definition_location ( & def) else {
240- continue ;
241- } ;
242-
243- let target_idx = nodes_by_file_line
244- . get ( & ( target_file. clone ( ) , target_line0) )
245- . copied ( )
246- . or_else ( || {
247- let rel_target = Path :: new ( & target_file) ;
248- let rel_key = relative_file_key ( & project_root, rel_target) ?;
249- nodes_by_file_line. get ( & ( rel_key, target_line0) ) . copied ( )
250- } ) ;
251- if let Some ( target_idx) = target_idx {
252- let target = & nodes[ target_idx] ;
253- let target_name = target
254- . metadata
255- . attributes
256- . get ( "qualified_name" )
257- . cloned ( )
258- . unwrap_or_else ( || target. name . to_string ( ) ) ;
259- edge. to = target_name;
260- edge. metadata
261- . insert ( "analyzer" . to_string ( ) , "lsp_definition" . to_string ( ) ) ;
262- edge. metadata
263- . insert ( "analyzer_confidence" . to_string ( ) , "1.0" . to_string ( ) ) ;
264- stats. edges_resolved += 1 ;
242+ if let Some ( edge_indices) = def_edges_by_file
243+ . get ( & abs_file_str)
244+ . or_else ( || def_edges_by_file. get ( & file_str) )
245+ {
246+ for & edge_idx in edge_indices {
247+ let edge = & mut edges[ edge_idx] ;
248+ let Some ( span) = edge. span . as_ref ( ) else {
249+ continue ;
250+ } ;
251+
252+ let pos = pos_index. position_for_byte_offset ( span. start_byte ) ;
253+ let def = proc. request (
254+ "textDocument/definition" ,
255+ serde_json:: json!( {
256+ "textDocument" : { "uri" : uri } ,
257+ "position" : { "line" : pos. line, "character" : pos. character }
258+ } ) ,
259+ ) ?;
260+
261+ let Some ( ( target_file, target_line0) ) =
262+ extract_first_definition_location ( & def)
263+ else {
264+ continue ;
265+ } ;
266+
267+ let target_idx = nodes_by_file_line
268+ . get ( & ( target_file. clone ( ) , target_line0) )
269+ . copied ( )
270+ . or_else ( || {
271+ let rel_target = Path :: new ( & target_file) ;
272+ let rel_key = relative_file_key ( & project_root, rel_target) ?;
273+ nodes_by_file_line. get ( & ( rel_key, target_line0) ) . copied ( )
274+ } ) ;
275+ if let Some ( target_idx) = target_idx {
276+ let target = & nodes[ target_idx] ;
277+ let target_name = target
278+ . metadata
279+ . attributes
280+ . get ( "qualified_name" )
281+ . cloned ( )
282+ . unwrap_or_else ( || target. name . to_string ( ) ) ;
283+ edge. to = target_name;
284+ edge. metadata
285+ . insert ( "analyzer" . to_string ( ) , "lsp_definition" . to_string ( ) ) ;
286+ edge. metadata
287+ . insert ( "analyzer_confidence" . to_string ( ) , "1.0" . to_string ( ) ) ;
288+ stats. edges_resolved += 1 ;
289+ }
265290 }
266291 }
267292
293+ proc. notify (
294+ "textDocument/didClose" ,
295+ serde_json:: json!( {
296+ "textDocument" : { "uri" : uri }
297+ } ) ,
298+ ) ?;
299+
268300 processed_files += 1 ;
269301 if last_progress_log. elapsed ( ) >= Duration :: from_secs ( 10 ) {
270302 info ! (
@@ -362,6 +394,83 @@ pub fn byte_offset_to_utf16_position(text: &str, byte_offset: u32) -> LspPositio
362394 LspPosition { line, character }
363395}
364396
397+ #[ derive( Debug , Clone ) ]
398+ pub struct LspPositionIndex < ' a > {
399+ text : & ' a str ,
400+ line_starts : Vec < usize > ,
401+ }
402+
403+ impl < ' a > LspPositionIndex < ' a > {
404+ pub fn new ( text : & ' a str ) -> Self {
405+ let mut line_starts = Vec :: new ( ) ;
406+ line_starts. push ( 0 ) ;
407+ for ( idx, ch) in text. char_indices ( ) {
408+ if ch == '\n' {
409+ let next = idx. saturating_add ( 1 ) ;
410+ if next <= text. len ( ) {
411+ line_starts. push ( next) ;
412+ }
413+ }
414+ }
415+ Self { text, line_starts }
416+ }
417+
418+ pub fn position_for_byte_offset ( & self , byte_offset : u32 ) -> LspPosition {
419+ let target = ( byte_offset as usize ) . min ( self . text . len ( ) ) ;
420+ let line_idx = match self . line_starts . binary_search ( & target) {
421+ Ok ( i) => i,
422+ Err ( insert) => insert. saturating_sub ( 1 ) ,
423+ } ;
424+ let line_start = * self . line_starts . get ( line_idx) . unwrap_or ( & 0 ) ;
425+
426+ let mut character: u32 = 0 ;
427+ for ( idx, ch) in self . text [ line_start..] . char_indices ( ) {
428+ let abs = line_start. saturating_add ( idx) ;
429+ if abs >= target {
430+ break ;
431+ }
432+ character += ch. encode_utf16 ( & mut [ 0u16 ; 2 ] ) . len ( ) as u32 ;
433+ }
434+
435+ LspPosition {
436+ line : line_idx as u32 ,
437+ character,
438+ }
439+ }
440+ }
441+
442+ fn definition_edge_indices_by_file (
443+ project_root : & Path ,
444+ nodes : & [ CodeNode ] ,
445+ edges : & [ EdgeRelationship ] ,
446+ ) -> std:: collections:: HashMap < String , Vec < usize > > {
447+ let mut file_by_id: std:: collections:: HashMap < codegraph_core:: NodeId , String > =
448+ std:: collections:: HashMap :: with_capacity ( nodes. len ( ) ) ;
449+
450+ for node in nodes {
451+ file_by_id. insert ( node. id , node. location . file_path . clone ( ) ) ;
452+ }
453+
454+ let mut out: std:: collections:: HashMap < String , Vec < usize > > = std:: collections:: HashMap :: new ( ) ;
455+ for ( idx, edge) in edges. iter ( ) . enumerate ( ) {
456+ if edge. span . is_none ( ) {
457+ continue ;
458+ }
459+ let Some ( file_key) = file_by_id. get ( & edge. from ) else {
460+ continue ;
461+ } ;
462+
463+ out. entry ( file_key. clone ( ) ) . or_default ( ) . push ( idx) ;
464+ if let Some ( abs) = absolute_file_key ( project_root, Path :: new ( file_key) ) {
465+ if abs != * file_key {
466+ out. entry ( abs) . or_default ( ) . push ( idx) ;
467+ }
468+ }
469+ }
470+
471+ out
472+ }
473+
365474pub struct LspProcess {
366475 child : Child ,
367476 stdin : ChildStdin ,
@@ -773,4 +882,87 @@ mod tests {
773882
774883 let _ = std:: fs:: remove_dir_all ( & root) ;
775884 }
885+
886+ #[ test]
887+ fn utf16_line_index_matches_reference_mapping ( ) {
888+ let text = "a🙂b\n c" ;
889+ let index = LspPositionIndex :: new ( text) ;
890+
891+ for offset in 0 ..=( text. len ( ) as u32 ) {
892+ let expected = byte_offset_to_utf16_position ( text, offset) ;
893+ let observed = index. position_for_byte_offset ( offset) ;
894+ assert_eq ! ( observed, expected, "mismatch at byte offset {offset}" ) ;
895+ }
896+ }
897+
898+ #[ test]
899+ fn groups_edge_indices_by_file_path ( ) {
900+ let project_root =
901+ std:: env:: temp_dir ( ) . join ( format ! ( "codegraph_lsp_edges_{}" , std:: process:: id( ) ) ) ;
902+ let _ = std:: fs:: create_dir_all ( & project_root) ;
903+
904+ let a_path = project_root. join ( "a.rs" ) ;
905+ let b_path = project_root. join ( "b.rs" ) ;
906+ let _ = std:: fs:: write ( & a_path, "fn a() {}" ) ;
907+ let _ = std:: fs:: write ( & b_path, "fn b() {}" ) ;
908+
909+ let node_a = CodeNode :: new (
910+ "a" ,
911+ None ,
912+ None ,
913+ codegraph_core:: Location {
914+ file_path : "a.rs" . to_string ( ) ,
915+ line : 1 ,
916+ column : 0 ,
917+ end_line : Some ( 1 ) ,
918+ end_column : Some ( 0 ) ,
919+ } ,
920+ ) ;
921+ let node_b = CodeNode :: new (
922+ "b" ,
923+ None ,
924+ None ,
925+ codegraph_core:: Location {
926+ file_path : "b.rs" . to_string ( ) ,
927+ line : 1 ,
928+ column : 0 ,
929+ end_line : Some ( 1 ) ,
930+ end_column : Some ( 0 ) ,
931+ } ,
932+ ) ;
933+
934+ let edges = vec ! [
935+ EdgeRelationship {
936+ from: node_a. id,
937+ to: "x" . to_string( ) ,
938+ edge_type: codegraph_core:: EdgeType :: Uses ,
939+ metadata: std:: collections:: HashMap :: new( ) ,
940+ span: Some ( codegraph_core:: Span {
941+ start_byte: 0 ,
942+ end_byte: 1 ,
943+ } ) ,
944+ } ,
945+ EdgeRelationship {
946+ from: node_b. id,
947+ to: "y" . to_string( ) ,
948+ edge_type: codegraph_core:: EdgeType :: Uses ,
949+ metadata: std:: collections:: HashMap :: new( ) ,
950+ span: Some ( codegraph_core:: Span {
951+ start_byte: 0 ,
952+ end_byte: 1 ,
953+ } ) ,
954+ } ,
955+ ] ;
956+
957+ let nodes = vec ! [ node_a, node_b] ;
958+ let grouped = definition_edge_indices_by_file ( & project_root, & nodes, & edges) ;
959+
960+ assert_eq ! ( grouped. get( "a.rs" ) , Some ( & vec![ 0 ] ) ) ;
961+ assert_eq ! ( grouped. get( "b.rs" ) , Some ( & vec![ 1 ] ) ) ;
962+
963+ let a_abs = a_path. to_string_lossy ( ) . to_string ( ) ;
964+ let b_abs = b_path. to_string_lossy ( ) . to_string ( ) ;
965+ assert_eq ! ( grouped. get( & a_abs) , Some ( & vec![ 0 ] ) ) ;
966+ assert_eq ! ( grouped. get( & b_abs) , Some ( & vec![ 1 ] ) ) ;
967+ }
776968}
0 commit comments