@@ -200,16 +200,29 @@ export const generateIndexForEntityType = async (entityDir: string, entityType:
200200 ) ;
201201 }
202202
203- // Process each file to extract metadata
204- const files = await processJsonFiles ( { entityDir, jsonFiles, entityType } ) ;
203+ // Read existing index to preserve unchanged entries
204+ const indexPath = path . join ( entityDir , INDEX_FILENAME ) ;
205+ let existingIndex : DirectoryIndex | null = null ;
206+
207+ try {
208+ if ( await fileExists ( indexPath ) ) {
209+ const existingContent = await fs . readFile ( indexPath , "utf8" ) ;
210+ existingIndex = JSON . parse ( existingContent ) ;
211+ }
212+ } catch ( error ) {
213+ console . warn ( `⚠️ Failed to read existing index: ${ error } ` ) ;
214+ }
215+
216+ // Process each file to extract metadata, preserving unchanged entries
217+ const files = await processJsonFiles ( { entityDir, jsonFiles, entityType, existingIndex } ) ;
205218
206219 // Process subdirectories if recursive
207220 let directories : Record < string , DirectoryEntry > = { } ;
208221 let maxLastUpdated = new Date ( ) . toISOString ( ) ;
209222
210223 if ( recursive ) {
211224 const { directories : subDirs , maxLastUpdated : subMaxUpdated } =
212- await processSubdirectories ( { entityDir, entityType, recursive } ) ;
225+ await processSubdirectories ( { entityDir, entityType, recursive, existingIndex } ) ;
213226 directories = subDirs ;
214227 maxLastUpdated = subMaxUpdated ;
215228 }
@@ -218,19 +231,6 @@ export const generateIndexForEntityType = async (entityDir: string, entityType:
218231 // ISO strings are lexicographically comparable - use string comparison
219232 const overallLastUpdated = maxLastUpdated > currentIsoString ? maxLastUpdated : currentIsoString ;
220233
221- // Read existing index to check if content has changed
222- const indexPath = path . join ( entityDir , INDEX_FILENAME ) ;
223- let existingIndex : DirectoryIndex | null = null ;
224-
225- try {
226- if ( await fileExists ( indexPath ) ) {
227- const existingContent = await fs . readFile ( indexPath , "utf8" ) ;
228- existingIndex = JSON . parse ( existingContent ) ;
229- }
230- } catch ( error ) {
231- console . warn ( `⚠️ Failed to read existing index: ${ error } ` ) ;
232- }
233-
234234 // Check if content has actually changed (excluding lastUpdated field)
235235 const contentChanged = hasIndexContentChanged ( {
236236 existingIndex,
@@ -354,21 +354,26 @@ const ensureDirectoryExists = async (dirPath: string): Promise<void> => {
354354
355355/**
356356 * Process JSON files in a directory and extract metadata
357+ * Preserves existing FileEntry objects for unchanged files to prevent unnecessary timestamp updates
357358 * @param root0
358359 * @param root0.entityDir
359360 * @param root0.jsonFiles
360361 * @param root0.entityType
362+ * @param root0.existingIndex
361363 */
362364const processJsonFiles = async ( {
363365 entityDir,
364366 jsonFiles,
365367 entityType,
368+ existingIndex,
366369} : {
367370 entityDir : string ;
368371 jsonFiles : string [ ] ;
369372 entityType : StaticEntityType ;
373+ existingIndex ?: DirectoryIndex | null ;
370374} ) : Promise < Record < string , FileEntry > > => {
371375 const files : Record < string , FileEntry > = { } ;
376+ const existingFiles = existingIndex ?. files || { } ;
372377
373378 for ( const fileName of jsonFiles ) {
374379 const filePath = path . join ( entityDir , fileName ) ;
@@ -386,15 +391,22 @@ const processJsonFiles = async ({
386391 ) ;
387392 }
388393
389- // Create FileEntry with reconstructed URL
390- const reconstructedUrl = `https://api.openalex.org/${ entityType } /${ entityId } ` ;
391-
392- files [ entityId ] = {
393- $ref : `./${ fileName } ` ,
394- contentHash : await generateContentHash ( data ) ,
395- lastRetrieved : fileStats . mtime . toISOString ( ) ,
396- url : reconstructedUrl ,
397- } ;
394+ const currentHash = await generateContentHash ( data ) ;
395+ const existingEntry = existingFiles [ entityId ] ;
396+
397+ // Preserve existing entry if content hash unchanged (prevents timestamp cascade)
398+ if ( existingEntry && existingEntry . contentHash === currentHash ) {
399+ files [ entityId ] = existingEntry ;
400+ } else {
401+ // New or changed file - create fresh entry with current timestamp
402+ const reconstructedUrl = `https://api.openalex.org/${ entityType } /${ entityId } ` ;
403+ files [ entityId ] = {
404+ $ref : `./${ fileName } ` ,
405+ contentHash : currentHash ,
406+ lastRetrieved : fileStats . mtime . toISOString ( ) ,
407+ url : reconstructedUrl ,
408+ } ;
409+ }
398410 } catch ( error ) {
399411 console . warn ( `⚠️ Failed to validate file ${ fileName } :` , error ) ;
400412 // Skip invalid files rather than adding them
@@ -406,23 +418,28 @@ const processJsonFiles = async ({
406418
407419/**
408420 * Process subdirectories and generate their indexes
421+ * Preserves existing DirectoryEntry objects for unchanged subdirectories to prevent unnecessary timestamp updates
409422 * @param root0
410423 * @param root0.entityDir
411424 * @param root0.entityType
412425 * @param root0.recursive
426+ * @param root0.existingIndex
413427 */
414428const processSubdirectories = async ( {
415429 entityDir,
416430 entityType,
431+ existingIndex,
417432} : {
418433 entityDir : string ;
419434 entityType : StaticEntityType ;
420435 recursive ?: boolean ;
436+ existingIndex ?: DirectoryIndex | null ;
421437} ) : Promise < {
422438 directories : Record < string , DirectoryEntry > ;
423439 maxLastUpdated : string ;
424440} > => {
425441 const directories : Record < string , DirectoryEntry > = { } ;
442+ const existingDirs = existingIndex ?. directories || { } ;
426443 let maxLastUpdated = new Date ( ) . toISOString ( ) ;
427444
428445 try {
@@ -451,17 +468,23 @@ const processSubdirectories = async ({
451468 if ( await fileExists ( subIndexPath ) ) {
452469 const subContent = await fs . readFile ( subIndexPath , "utf8" ) ;
453470 const subIndex : DirectoryIndex = JSON . parse ( subContent ) ;
471+ const existingEntry = existingDirs [ subdir ] ;
454472
455473 // Track the maximum lastUpdated timestamp
456474 if ( subIndex . lastUpdated > maxLastUpdated ) {
457475 maxLastUpdated = subIndex . lastUpdated ;
458476 }
459477
460- // Build directory entry
461- directories [ subdir ] = {
462- $ref : `./${ subdir } ` ,
463- lastModified : subIndex . lastUpdated ,
464- } ;
478+ // Preserve existing directory entry if subdirectory's lastUpdated unchanged
479+ if ( existingEntry && existingEntry . lastModified === subIndex . lastUpdated ) {
480+ directories [ subdir ] = existingEntry ;
481+ } else {
482+ // Subdirectory changed - create fresh directory entry
483+ directories [ subdir ] = {
484+ $ref : `./${ subdir } ` ,
485+ lastModified : subIndex . lastUpdated ,
486+ } ;
487+ }
465488 } else {
466489 console . warn ( `⚠️ No index found for subdirectory: ${ subPath } ` ) ;
467490 }
0 commit comments