Merge pull request #6 from SciLifeLab/master

Version sync
NationalGenomicsInfrastructure · Jan 10, 2024 · c46f07c · c46f07c
2 parents b11177c + d519d2d
commit c46f07c
Show file tree

Hide file tree

Showing 5 changed files with 178 additions and 11 deletions.
diff --git a/Gdrivebackup.sh b/Gdrivebackup.sh
@@ -8,8 +8,10 @@
 
 currentDate=`date +"%Y-%m-%d"`
 echo 'Run $currentDate'
+GDRIVE="$HOME/Google Drive/My Drive"
 #Zip GDrive Sync file
-zip -vr $HOME/opt/zipFilesTemp/QAbackup.$currentDate.zip  $HOME/Google\ Drive/
+zip -vr $HOME/opt/zipFilesTemp/QAbackup.$currentDate.zip  $GDRIVE/Electronic\ logbooks\ \(QA\ system\) \
+        $GDRIVE/Genomics\ Platform $GDRIVE/_Quality\ assurance\ system
 
 #make mountpoint
 mkdir -p $HOME/kvalitetssystem

diff --git a/check_dirty.py b/check_dirty.py
@@ -0,0 +1,22 @@
+#!/usr/bin/python3
+"""
+Checks if one or several repos are dirty and/or contain untracked files.
+"""
+import argparse
+import sys
+from git import Repo
+
+def check_if_dirty(repo_paths):
+    for path in repo_paths:
+        repo = Repo(path)
+        if repo.is_dirty():
+            print("Repo is dirty: ", path, file=sys.stderr)
+        if repo.untracked_files:
+            print("Repo has untracked files: ", path, repo.untracked_files, file=sys.stderr)
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='check if dirty')
+    parser.add_argument('repo_paths', nargs='+', help='one or more paths to repositories to check')
+    args = parser.parse_args()
+
+    check_if_dirty(args.repo_paths)
diff --git a/du_miarka.py b/du_miarka.py
@@ -0,0 +1,114 @@
+#!/usr/bin/python3
+"""A faster version of `du` for our cluster Miarka, using ceph supplied space consumption.
+
+Author: @alneberg
+"""
+
+
+import argparse
+import glob
+import os
+import subprocess
+import sys
+
+
+def sizeof_fmt(num, suffix="B"):
+    """Human readable format for file sizes
+
+    From https://stackoverflow.com/a/1094933/
+    """
+    for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
+        if abs(num) < 1024.0:
+            return f"{num:3.1f}{unit}{suffix}"
+        num /= 1024.0
+    return f"{num:.1f}Yi{suffix}"
+
+
+def print_file_size(path, bytes, raw_bytes=False):
+    if raw_bytes:
+        print(f"{bytes}\t{path}")
+    else:
+        print(f"{sizeof_fmt(bytes)}\t{path}")
+
+
+def main(input_paths, raw_bytes=False, size_sorted=False, depth=0):
+    paths = []
+    # Do the depth traversing
+    for path in input_paths:
+        paths.append(path)
+        for depth in range(depth):
+            path += "/*"
+            depth_paths = glob.glob(path)
+            if not depth_paths:
+                break  # reached the tip of the branch
+            paths += depth_paths
+
+    # Append files and sizes to a list so that it can be sorted in the end
+    filesizes = []
+    for path in paths:
+        # Check if path is not a directory
+        if not os.path.isdir(path):
+            # getfattr didn't work for files, so we use os.stat
+            try:
+                statinfo = os.stat(path)
+            except OSError as e:
+                # This happens ƒor example with broken links
+                print(str(e), file=sys.stderr)
+                continue
+            bytes = statinfo.st_size
+            filesizes.append((path, bytes))
+        else:
+            # .run Requires python 3.5 or higher
+            result = subprocess.run(
+                ["getfattr", "-n", "ceph.dir.rbytes", path],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+            if result.returncode != 0:
+                print(result.stderr.decode("utf-8"), file=sys.stderr)
+                continue
+
+            # Typical output from getfattr:
+            #
+            #  # file: proj/ngi2016004/private/johannes/pipelines
+            #  ceph.dir.rbytes="3699513252"
+            #
+
+            lines = result.stdout.decode("utf-8").splitlines()
+            for line in lines:
+                if line.startswith("# file:"):
+                    filename = line.split(" ")[2]
+                    filename = filename.strip()
+                elif line.startswith("ceph.dir.rbytes"):
+                    bytes = int((line.split("=")[1]).strip('"'))
+                    filesizes.append((filename, bytes))
+
+    if size_sorted:
+        filesizes.sort(key=lambda x: x[1], reverse=False)
+
+    for filename, bytes in filesizes:
+        print_file_size(filename, bytes, raw_bytes)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="A faster version of `du` for our cluster Miarka, using ceph supplied space consumption."
+    )
+    parser.add_argument("paths", nargs="*", help="The paths that should be looked into")
+    parser.add_argument(
+        "-r",
+        "--raw-bytes",
+        action="store_true",
+        help="Print sizes in bytes instead of human readable format (e.g. 1K 234M 2G)",
+    )
+    parser.add_argument("--sort", action="store_true", help="Sort by size")
+    parser.add_argument(
+        "-d",
+        "--depth",
+        default=0,
+        type=int,
+        help="The number of levels to go down to inside the given directory",
+    )
+    args = parser.parse_args()
+
+    main(args.paths, args.raw_bytes, args.sort, args.depth)
diff --git a/statusdb_snicuser_checker.py b/statusdb_snicuser_checker.py
@@ -53,7 +53,7 @@ def update_statusdb(config, dryrun=True):
                 print(doc['project_name'], doc['details']['snic_checked'])
 
 def snic_check(email, config):
-    url = 'https://supr.snic.se/api/person/email_present/?email={}'.format(email)
+    url = 'https://api.supr.naiss.se/api/person/email_present/?email={}'.format(email)
     response = requests.get(url, auth=HTTPBasicAuth(config.get('username'), config.get('password')))
     if not response.ok and response.reason == 'Unauthorized':
         print('ERROR: SNIC API is IP restricted and this script can only be run from ngi-internal OR credentials are wrong')

diff --git a/update_exchange_rates.py b/update_exchange_rates.py
@@ -4,10 +4,37 @@
 """
 
 import argparse
-import yaml
 from couchdb import Server
 import datetime
-from forex_python.converter import CurrencyRates
+import json
+import requests
+import yaml
+
+
+class CurrencyRates(object):
+    """A class to fetch currency rates from fixer.io."""
+    def __init__(self, config_file):
+
+        self.rates_fetched = False
+        self._source_url = "https://api.apilayer.com/fixer/latest"
+
+        with open(config_file, 'r') as fh:
+            config = yaml.load(fh, Loader=yaml.SafeLoader)
+        self._apikey = config.get('apikey')
+
+    def fetch_rates(self):
+        response = requests.get(self._source_url, params={'base': 'SEK', 'symbols': 'USD, EUR'}, headers={'apikey': self._apikey})
+        assert response.status_code == 200
+        self.data = json.loads(response.text)
+        self.rates = self.data['rates']
+        self.rates_fetched = True
+
+    def get_rate(self, currency):
+        """Get the exchange rate for SEK to the given currency."""
+        if not self.rates_fetched:
+            self.fetch_rates()
+
+        return 1/self.rates[currency]
 
 
 def get_current(db, item):
@@ -26,12 +53,12 @@ def check_financial_crisis(current_val, new_val, currency):
         if abs(rel_change) > 0.20:
             raise Exception("Financial crisis or rather; something is likely wrong!")
 
-def main(config, push_to_server=False):
+def main(config, fixer_io_config, push_to_server=False):
 
-    c = CurrencyRates()
+    c = CurrencyRates(fixer_io_config)
     # Will raise RatesNotAvailableError if not able to fetch from the api
-    usd_to_sek = c.get_rate('USD', 'SEK')
-    eur_to_sek = c.get_rate('EUR', 'SEK')
+    usd_to_sek = c.get_rate('USD')
+    eur_to_sek = c.get_rate('EUR')
 
     # Inconsistent results for Euro after broken API was updated
     if isinstance(eur_to_sek, str):
@@ -42,7 +69,7 @@ def main(config, push_to_server=False):
     doc['Issued at'] = datetime.datetime.now().isoformat()
     # I know it's bad practice to call the _source_url method,
     # but if it breaks it breaks.
-    doc['Data source'] = "forex_python ({})".format(c._source_url())
+    doc['Data source'] = "Fixer.io via ({})".format(c._source_url)
     doc['USD_in_SEK'] = usd_to_sek
     doc['EUR_in_SEK'] = eur_to_sek
 
@@ -53,7 +80,7 @@ def main(config, push_to_server=False):
     url_string = 'https://{}:{}@{}'.format(
                     server_settings['statusdb'].get('username'),
                     server_settings['statusdb'].get('password'),
-                    server_settings['statusdb'].get('url'))
+                    server_settings['statusdb'].get('url')
                 )
     couch = Server(url_string)
 
@@ -87,8 +114,10 @@ def main(config, push_to_server=False):
     parser = argparse.ArgumentParser(description=__doc__)
     parser.add_argument('--statusdb_config', required=True,
                         help='The statusdb_cred.yaml file.')
+    parser.add_argument('--fixer_io_config', required=True,
+                        help='The fixer_io.yaml file.')
     parser.add_argument('--push', action='store_true', help='Use this tag to '
                         "make the script push the changes to statusdb")
 
     args = parser.parse_args()
-    main(args.statusdb_config, push_to_server=args.push)
+    main(args.statusdb_config, args.fixer_io_config, push_to_server=args.push)