Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ CRON:
- `DOCUMENT_CRON_UPDATE_PERIOD_DAYS` : The minimum time between successive Document cron update calls
- `DOCUMENT_CRON_CREATED_AGE_DAYS` : Only Documents created fewer than this many days will be selecte for update. When undefined (default), ignores filtering on creation date.
- `DOCUMENT_CRON_REFRESH_ENABLED` : Flag to enable existing Document metadata to be refreshed (e.g. PubMed UID) (default true).
- `DOCUMENT_CRON_UNEDITED_DAYS` : Number of days since Documemt was last edited; criteria for trashing

Database:

Expand Down
1 change: 1 addition & 0 deletions src/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ export const CRON_SCHEDULE = env('CRON_SCHEDULE', '0 1 * * *');
export const DOCUMENT_CRON_UPDATE_PERIOD_DAYS = env('DOCUMENT_CRON_UPDATE_PERIOD_DAYS', 7);
export const DOCUMENT_CRON_CREATED_AGE_DAYS = env('DOCUMENT_CRON_CREATED_AGE_DAYS', undefined);
export const DOCUMENT_CRON_REFRESH_ENABLED = env('DOCUMENT_CRON_REFRESH_ENABLED', true);
export const DOCUMENT_CRON_UNEDITED_DAYS = env('DOCUMENT_CRON_UNEDITED_DAYS', 30);

// Connect to localhost
export const SOCKET_HOST = env('SOCKET_HOST', isClient() ? window.location.hostname : 'localhost');
Expand Down
59 changes: 55 additions & 4 deletions src/server/routes/api/document/update.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logger from '../../../logger';
import { DEMO_SECRET, DOCUMENT_CRON_CREATED_AGE_DAYS, DOCUMENT_CRON_REFRESH_ENABLED } from '../../../../config';
import { DEMO_SECRET, DOCUMENT_CRON_CREATED_AGE_DAYS, DOCUMENT_CRON_REFRESH_ENABLED, DOCUMENT_CRON_UNEDITED_DAYS } from '../../../../config';
import { loadTables, loadDoc, fillDocArticle, updateRelatedPapers } from './index';
import Document from '../../../../model/document';

Expand All @@ -8,10 +8,11 @@ const DEFAULT_DOCUMENT_CREATED_START_DATE = new Date( 0 );

const HOURS_PER_DAY = 24;
const MINUTES_PER_HOUR = 60;
const SECONS_PER_MINUTE = 60;
const SECONDS_PER_MINUTE = 60;
const MILLISECONDS_PER_SECOND = 1000;

const daysToMs = d => d * HOURS_PER_DAY * MINUTES_PER_HOUR * SECONS_PER_MINUTE * MILLISECONDS_PER_SECOND;
const daysToMs = d => d * HOURS_PER_DAY * MINUTES_PER_HOUR * SECONDS_PER_MINUTE * MILLISECONDS_PER_SECOND;
const daysToSec = d => d * HOURS_PER_DAY * MINUTES_PER_HOUR * SECONDS_PER_MINUTE;
const dateFromToday = days => {
const now = Date.now();
const offset = daysToMs( days );
Expand Down Expand Up @@ -39,7 +40,7 @@ const docsToUpdate = async () => {
// Filter: Exclude by status 'trashed'
q = q.filter( r.row( 'status' ).ne( DOCUMENT_STATUS_FIELDS.TRASHED ) );

// Filter: Include when created less than DOCUMENT_CRON_CREATED_AGE_DAYS days ago
// Filter: Include when created less than days ago
let startDate = DOCUMENT_CRON_CREATED_AGE_DAYS ? dateFromToday( -1 * DOCUMENT_CRON_CREATED_AGE_DAYS ) : DEFAULT_DOCUMENT_CREATED_START_DATE;
q = q.filter( r.row( 'createdDate' ).during( startDate, new Date() ) );

Expand Down Expand Up @@ -80,6 +81,55 @@ const updateArticle = async () => {
}
};

const docsToTrash = async () => {
const tables = await loadTables();
const { docDb, eleDb } = tables;
let { table: q, conn, rethink: r } = docDb;

const toTime = field => r.branch(
r.typeOf( r.row( field ) ).eq( 'STRING' ), r.ISO8601( r.row( field ) ),
r.typeOf( r.row( field ) ).eq( 'NUMBER' ), r.epochTime( r.row( field ) ),
r.row( field )
);
const editedMoreThanDaysAgo = d => r.now().sub( toTime( 'lastEditedDate' ) ).gt( daysToSec( d ) );

// Predicates: demo docs
const isDemo = r.row( 'secret' ).eq( DEMO_SECRET );
const demoNotRecentlyEdited = editedMoreThanDaysAgo( DOCUMENT_CRON_UNEDITED_DAYS );
const isOldDemo = isDemo.and( demoNotRecentlyEdited );

// Predicates: Initiated docs
const isInitiated = r.row( 'status' ).eq( DOCUMENT_STATUS_FIELDS.INITIATED );
const initiatedNotRecentlyEdited = editedMoreThanDaysAgo( DOCUMENT_CRON_UNEDITED_DAYS );
const noPubMedId = r.not( r.row( 'article' )( 'PubmedData' )( 'ArticleIdList' ).contains( ArticleId => ArticleId('IdType').eq('pmid') ) );
const noEntries = r.row('entries').count().eq( 0 );
const isOldEmptyInitated = isDemo.not()
.and( isInitiated )
.and( initiatedNotRecentlyEdited )
.and( noPubMedId )
.and( noEntries );

q = q.filter( isOldDemo.or( isOldEmptyInitated ) );
q = q.pluck([ 'id', 'secret' ]);

const cursor = await q.run( conn );
const dbJSON = await cursor.toArray();
return Promise.all( dbJSON.map( ({ id, secret }) => loadDoc({ docDb, eleDb, id, secret }) ));
};


/**
* trashDocs
*
* Send the selected Documents to trash
*/
const trashDocs = async () => {
const toTrash = doc => doc.trash();
const docs = await docsToTrash();
logger.info( `CRON: Moving ${docs.length} documents to trash`);
return Promise.all( docs.map( toTrash ) );
};

/**
* update
*
Expand All @@ -99,6 +149,7 @@ const update = async updatePeriodDays => {

if ( shouldUpdate ){
await updateArticle();
await trashDocs();
}
} catch ( err ) {
logger.error(`Error in Document update ${err}`);
Expand Down