Skip to content
This repository was archived by the owner on Dec 3, 2023. It is now read-only.

add standardization tools #42

Merged
merged 1 commit into from
Feb 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ db.json
*.log
node_modules/
public/
.deploy*/
.deploy*/
*.csv
*.tsv
*.ris
.ipynb_checkpoints
38 changes: 38 additions & 0 deletions tools/check_duplicates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os
import numpy as np
from os.path import dirname
ROOT_DIR = dirname(dirname(os.path.realpath(__file__)))
POST_DIR = os.path.join(ROOT_DIR, 'source', '_posts')

## Preallocate space.
titles = []
subtitles = []
paper_urls = []
abstracts = []

## Locate posts.
posts = sorted([f for f in os.listdir(POST_DIR) if f.endswith('.md')])

## Main loop.
for post in posts:

## Read post.
with open(os.path.join(POST_DIR, post)) as f:
lines = f.readlines()

## Identify / append lines.
for line in lines:
if line.startswith('title:'): titles.append(line.strip().replace('title: ',''))
elif line.startswith('subtitle:'): subtitles.append(line.strip().replace('subtitle: ',''))
elif line.startswith('paper_url:'): paper_urls.append(line.strip().replace('paper_url: ',''))
abstracts.append(lines[-1].strip())

## Check for duplicates.
for arr in [titles, subtitles, paper_urls, abstracts]:

## Count entries.
arr, counts = np.unique(arr, return_counts=True)

## Return info.
if np.any(counts > 1):
print(arr[counts > 1])
25 changes: 25 additions & 0 deletions tools/check_sample.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os
import numpy as np
from os.path import dirname
ROOT_DIR = dirname(dirname(os.path.realpath(__file__)))
POST_DIR = os.path.join(ROOT_DIR, 'source', '_posts')

## Locate posts.
posts = sorted([f for f in os.listdir(POST_DIR) if f.endswith('.md')])

## Main loop.
for post in posts:

## Read post.
with open(os.path.join(POST_DIR, post)) as f:
lines = f.readlines()

## Identify if sample size metadata present.
is_sample_size = any([line.startswith('sample_size') for line in lines])

## Check if database entry.
is_database = any(['database' in line.lower() for line in lines])

## Return info.
if not is_sample_size and not is_database:
print(post)