1+ import  shutil 
12from  collections .abc  import  Iterator 
23from  dataclasses  import  dataclass , field 
34from  datetime  import  datetime  as  dt 
1011from  tagstudio .core .library .alchemy .library  import  Library 
1112from  tagstudio .core .library .alchemy .models  import  Entry 
1213from  tagstudio .core .library .ignore  import  PATH_GLOB_FLAGS , Ignore 
14+ from  tagstudio .qt .helpers .silent_popen  import  silent_run 
1315
1416logger  =  structlog .get_logger (__name__ )
1517
@@ -41,19 +43,120 @@ def save_new_files(self):
4143
4244        yield 
4345
44-     def  refresh_dir (self , library_dir : Path ) ->  Iterator [int ]:
45-         """Scan a directory for files, and add those relative filenames to internal variables.""" 
46+     def  refresh_dir (self , library_dir : Path , force_internal_tools : bool  =  False ) ->  Iterator [int ]:
47+         """Scan a directory for files, and add those relative filenames to internal variables. 
48+ 
49+         Args: 
50+             library_dir (Path): The library directory. 
51+             force_internal_tools (bool): Option to force the use of internal tools for scanning 
52+                 (i.e. wcmatch) instead of using tools found on the system (i.e. ripgrep). 
53+         """ 
4654        if  self .library .library_dir  is  None :
4755            raise  ValueError ("No library directory set." )
4856
57+         ignore_patterns  =  Ignore .get_patterns (library_dir )
58+ 
59+         if  force_internal_tools :
60+             return  self .__wc_add (library_dir , ignore_patterns )
61+ 
62+         dir_list : list [str ] |  None  =  self .__get_dir_list (library_dir , ignore_patterns )
63+ 
64+         # Use ripgrep if it was found and working, else fallback to wcmatch. 
65+         if  dir_list  is  not None :
66+             return  self .__rg_add (library_dir , dir_list )
67+         else :
68+             return  self .__wc_add (library_dir , ignore_patterns )
69+ 
70+     def  __get_dir_list (self , library_dir : Path , ignore_patterns : list [str ]) ->  list [str ] |  None :
71+         """Use ripgrep to return a list of matched directories and files. 
72+ 
73+         Return `None` if ripgrep not found on system. 
74+         """ 
75+         rg_path  =  shutil .which ("rg" )
76+         # Use ripgrep if found on system 
77+         if  rg_path  is  not None :
78+             logger .info ("[Refresh: Using ripgrep for scanning]" )
79+ 
80+             compiled_ignore_path  =  library_dir  /  ".TagStudio"  /  ".compiled_ignore" 
81+ 
82+             # Write compiled ignore patterns (built-in + user) to a temp file to pass to ripgrep 
83+             with  open (compiled_ignore_path , "w" ) as  pattern_file :
84+                 pattern_file .write ("\n " .join (ignore_patterns ))
85+ 
86+             result  =  silent_run (
87+                 " " .join (
88+                     [
89+                         "rg" ,
90+                         "--files" ,
91+                         "--follow" ,
92+                         "--hidden" ,
93+                         "--ignore-file" ,
94+                         f'"{ str (compiled_ignore_path )}  ,
95+                     ]
96+                 ),
97+                 cwd = library_dir ,
98+                 capture_output = True ,
99+                 text = True ,
100+                 shell = True ,
101+             )
102+             compiled_ignore_path .unlink ()
103+ 
104+             if  result .stderr :
105+                 logger .error (result .stderr )
106+ 
107+             return  result .stdout .splitlines ()  # pyright: ignore [reportReturnType] 
108+ 
109+         logger .warning ("[Refresh: ripgrep not found on system]" )
110+         return  None 
111+ 
112+     def  __rg_add (self , library_dir : Path , dir_list : list [str ]) ->  Iterator [int ]:
49113        start_time_total  =  time ()
50114        start_time_loop  =  time ()
51- 
115+          dir_file_count   =   0 
52116        self .files_not_in_library  =  []
117+ 
118+         for  r  in  dir_list :
119+             f  =  pathlib .Path (r )
120+ 
121+             end_time_loop  =  time ()
122+             # Yield output every 1/30 of a second 
123+             if  (end_time_loop  -  start_time_loop ) >  0.034 :
124+                 yield  dir_file_count 
125+                 start_time_loop  =  time ()
126+ 
127+             # Skip if the file/path is already mapped in the Library 
128+             if  f  in  self .library .included_files :
129+                 dir_file_count  +=  1 
130+                 continue 
131+ 
132+             # Ignore if the file is a directory 
133+             if  f .is_dir ():
134+                 continue 
135+ 
136+             dir_file_count  +=  1 
137+             self .library .included_files .add (f )
138+ 
139+             if  not  self .library .has_path_entry (f ):
140+                 self .files_not_in_library .append (f )
141+ 
142+         end_time_total  =  time ()
143+         yield  dir_file_count 
144+         logger .info (
145+             "[Refresh]: Directory scan time" ,
146+             path = library_dir ,
147+             duration = (end_time_total  -  start_time_total ),
148+             files_scanned = dir_file_count ,
149+             tool_used = "ripgrep (system)" ,
150+         )
151+ 
152+     def  __wc_add (self , library_dir : Path , ignore_patterns : list [str ]) ->  Iterator [int ]:
153+         start_time_total  =  time ()
154+         start_time_loop  =  time ()
53155        dir_file_count  =  0 
156+         self .files_not_in_library  =  []
157+ 
158+         logger .info ("[Refresh]: Falling back to wcmatch for scanning" )
54159
55-         ignore_patterns  =  Ignore .get_patterns (library_dir )
56-         logger .info (ignore_patterns )
57160        for  f  in  pathlib .Path (str (library_dir )).glob (
58161            "***/*" , flags = PATH_GLOB_FLAGS , exclude = ignore_patterns 
59162        ):
@@ -76,16 +179,16 @@ def refresh_dir(self, library_dir: Path) -> Iterator[int]:
76179            self .library .included_files .add (f )
77180
78181            relative_path  =  f .relative_to (library_dir )
79-              # TODO - load these in batch somehow 
182+ 
80183            if  not  self .library .has_path_entry (relative_path ):
81184                self .files_not_in_library .append (relative_path )
82185
83186        end_time_total  =  time ()
84187        yield  dir_file_count 
85188        logger .info (
86-             "Directory scan time" ,
189+             "[Refresh]:  Directory scan time" ,
87190            path = library_dir ,
88191            duration = (end_time_total  -  start_time_total ),
89192            files_scanned = dir_file_count ,
90-             ignore_patterns = ignore_patterns ,
193+             tool_used = "wcmatch (internal)" ,
91194        )
0 commit comments