diff --git a/crypt4gh/__init__.py b/crypt4gh/__init__.py index ad47ab5..bb4b35d 100644 --- a/crypt4gh/__init__.py +++ b/crypt4gh/__init__.py @@ -37,7 +37,7 @@ __title__ = 'GA4GH cryptographic utilities' -__version__ = '1.7' # VERSION in header is 1 (as 4 bytes little endian) +__version__ = '1.8' # VERSION in header is 1 (as 4 bytes little endian) __author__ = 'Frédéric Haziza' __author_email__ = 'frederic.haziza@crg.eu' __license__ = 'Apache License 2.0' diff --git a/crypt4gh/cli.py b/crypt4gh/cli.py index 345fdfe..e1205e2 100644 --- a/crypt4gh/cli.py +++ b/crypt4gh/cli.py @@ -7,6 +7,7 @@ from functools import partial from getpass import getpass import re +import datetime from docopt import docopt from nacl.public import PrivateKey @@ -26,7 +27,7 @@ Utility for the cryptographic GA4GH standard, reading from stdin and outputting to stdout. Usage: - {PROG} [-hv] [--log ] encrypt [--sk ] --recipient_pk [--recipient_pk ]... [--range ] [--header ] + {PROG} [-hv] [--log ] encrypt [--sk ] --recipient_pk [--recipient_pk ]... [--range ] [--header ] [--expiration ] {PROG} [-hv] [--log ] decrypt [--sk ] [--sender_pk ] [--range ] {PROG} [-hv] [--log ] rearrange [--sk ] --range {PROG} [-hv] [--log ] reencrypt [--sk ] --recipient_pk [--recipient_pk ]... [--trim] [--header-only] @@ -43,6 +44,7 @@ -t, --trim Keep only header packets that you can decrypt --header Where to write the header (default: stdout) --header-only Whether the input data consists only of a header (default: false) + --expiration Expiration date (in ISO format) Environment variables: C4GH_LOG If defined, it will be used as the default logger @@ -124,6 +126,12 @@ def retrieve_private_key(args, generate=False): return get_private_key(seckeypath, cb) +def make_timestamp(date): + # if date.startswith('+'): + # return int((datetime.datetime.now(datetime.UTC) + parse_deltatime(date[1:])).timestamp()) + LOG.debug("expiration: %s", date) + return int(datetime.datetime.fromisoformat(date).timestamp()) + def encrypt(args): assert( args['encrypt'] ) @@ -148,6 +156,10 @@ def build_recipients(): raise ValueError("No Recipients' Public Key found") header = args["--header"] + timestamp = args["--expiration"] + if timestamp: + timestamp = make_timestamp(timestamp) + LOG.debug("timestamp: %s", timestamp) try: if header: @@ -157,7 +169,8 @@ def build_recipients(): sys.stdout.buffer, headerfile = header, offset = range_start, - span = range_span) + span = range_span, + timestamp = timestamp) finally: if header: header.close() diff --git a/crypt4gh/header.py b/crypt4gh/header.py index 36b1378..b470aa6 100644 --- a/crypt4gh/header.py +++ b/crypt4gh/header.py @@ -3,7 +3,7 @@ import os import logging -from itertools import chain +import datetime # from types import GeneratorType from nacl.bindings import (crypto_kx_client_session_keys, @@ -85,13 +85,17 @@ def serialize(packets): # Encrypted data packet # ------------------------------------- -PACKET_TYPE_DATA_ENC = b'\x00\x00\x00\x00' # 0 little endian +PACKET_TYPE_DATA_ENC = b'\x00\x00\x00\x00' # 0 little endian PACKET_TYPE_EDIT_LIST = b'\x01\x00\x00\x00' # 1 little endian +PACKET_TYPE_TIMESTAMP = b'\x02\x00\x00\x00' # 2 little endian +PACKET_TYPE_LINK = b'\x03\x00\x00\x00' # 3 little endian -def partition_packets(packets): +def extract(packets): enc_packets = [] edits = None + timestamp = None + link = None for packet in packets: @@ -105,11 +109,21 @@ def partition_packets(packets): raise ValueError('Invalid file: Too many edit list packets') edits = packet[4:] + elif packet_type == PACKET_TYPE_TIMESTAMP: + if timestamp is not None: # reject files if many timestamp + raise ValueError('Invalid file: Too many timestamp packets') + timestamp = packet[4:] + + elif packet_type == PACKET_TYPE_LINK: + if link is not None: # reject files if many links + raise ValueError('Invalid file: Too many links') + link = packet[4:] + else: # Bark if unsupported packet. Don't just ignore it packet_type = int.from_bytes(packet_type, byteorder='little') raise ValueError(f'Invalid packet type {packet_type}') - return (enc_packets, edits) + return (enc_packets, edits, timestamp, link) # ------------------------------------- # Encrypted data packet @@ -160,6 +174,31 @@ def parse_edit_list_packet(packet): raise ValueError('Invalid edit list') return (int.from_bytes(packet[i:i+8], byteorder='little') for i in range(4, nb_lengths * 8, 8)) # generator +# ------------------------------------- +# Timestamp packet +# ------------------------------------- +def make_packet_timestamp(timestamp): + return (PACKET_TYPE_TIMESTAMP + + int(timestamp).to_bytes(8,'little')) # we drop the milliseconds + +def parse_timestamp_packet(packet): + if len(packet) != 8: + raise ValueError('Invalid timestamp packet') + return datetime.datetime.fromtimestamp(int.from_bytes(packet, byteorder='little'), datetime.UTC) + +# ------------------------------------- +# link packet +# ------------------------------------- +def make_packet_link(link): + return (PACKET_TYPE_LINK + # + len(link).to_bytes(4,'little') + + link) + +def parse_link_packet(packet): + # size = int.from_bytes(packet[:4], byteorder='little') + # return packet[:size] + return packet + # ------------------------------------- # Header Encryption Methods Conventions # ------------------------------------- @@ -315,11 +354,13 @@ def deconstruct(infile, keys, sender_pubkey=None): if not packets: # no packets were decrypted raise ValueError('No supported encryption method') - data_packets, edit_packet = partition_packets(packets) + data_packets, edit_packet, timestamp_packet, link_packet = extract(packets) # Parse returns the session key (since it should be method 0) session_keys = [parse_enc_packet(packet) for packet in data_packets] edit_list = parse_edit_list_packet(edit_packet) if edit_packet else None - return session_keys, edit_list + expiration = parse_timestamp_packet(timestamp_packet) if timestamp_packet else None + link = parse_link_packet(link_packet) if link_packet else None + return session_keys, edit_list, expiration, link # ------------------------------------- # Header Re-Encryption @@ -376,7 +417,7 @@ def rearrange(header_packets, keys, offset=0, span=None, sender_pubkey=None): if not decrypted_packets: raise ValueError('No header packet could be decrypted') - data_packets, edit_packet = partition_packets(decrypted_packets) + data_packets, edit_packet, timestamp_packet, link_packet = extract(decrypted_packets) # # Note: We do not yet implement chunking a file that already contains an Edit List @@ -416,7 +457,11 @@ def segment_oracle(): LOG.info('Reencrypting all packets') packets = [PACKET_TYPE_DATA_ENC + packet for packet in data_packets] - packets.append(edit_packet) # adding the edit list at the end + packets.append(edit_packet) + if timestamp_packet: + packets.append(timestamp_packet) + if link_packet: + packets.append(link_packet) packets = [encrypted_packet for packet in packets for encrypted_packet in encrypt(packet, keys)] return packets, segment_oracle() diff --git a/crypt4gh/lib.py b/crypt4gh/lib.py index 341795c..8ef603c 100644 --- a/crypt4gh/lib.py +++ b/crypt4gh/lib.py @@ -5,6 +5,8 @@ import logging import io import collections +from itertools import chain +import datetime from nacl.bindings import (crypto_aead_chacha20poly1305_ietf_encrypt, crypto_aead_chacha20poly1305_ietf_decrypt) @@ -14,6 +16,7 @@ from . import SEGMENT_SIZE from .exceptions import close_on_broken_pipe from . import header +from .fetcher import Fetcher, URLFetcher LOG = logging.getLogger(__name__) @@ -46,7 +49,7 @@ def _encrypt_segment(data, process, key): @close_on_broken_pipe -def encrypt(keys, infile, outfile, headerfile=None, offset=0, span=None): +def encrypt(keys, infile, outfile, headerfile=None, offset=0, span=None, timestamp=None): '''Encrypt infile into outfile, using the list of keys. @@ -88,8 +91,12 @@ def encrypt(keys, infile, outfile, headerfile=None, offset=0, span=None): # Output the header LOG.debug('Creating Crypt4GH header') - header_content = header.make_packet_data_enc(encryption_method, session_key) - header_packets = header.encrypt(header_content, keys) + header_packets = header.encrypt(header.make_packet_data_enc(encryption_method, session_key), + keys) + if timestamp: + header_packets = chain(header_packets, + header.encrypt(header.make_packet_timestamp(timestamp), + keys)) header_bytes = header.serialize(header_packets) LOG.debug('header length: %d', len(header_bytes)) @@ -380,9 +387,17 @@ def decrypt(keys, infile, outfile, sender_pubkey=None, offset=0, span=None): ) ) - session_keys, edit_list = header.deconstruct(infile, keys, sender_pubkey=sender_pubkey) + session_keys, edit_list, expiration, link = header.deconstruct(infile, keys, sender_pubkey=sender_pubkey) + + if expiration and (datetime.datetime.now(datetime.UTC) > expiration): + raise ValueError(f'Expired on {expiration}') # Infile in now positioned at the beginning of the data portion + # or we fetch the data portion from the link. + if link: + # replacing the infile with a fetcher + outfile = URLFetcher(link) + # Note: the remainder of the infile might not be empty # Generator to slice the output output = limited_output(offset=offset, limit=span, process=outfile.write) diff --git a/setup.py b/setup.py index 5b3bb88..900a974 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,7 @@ _readme = (Path(__file__).parent / "README.md").read_text() setup(name='crypt4gh', - version='1.7', + version='1.8', url='https://www.github.com/EGA-archive/crypt4gh', license='Apache License 2.0', author='Frédéric Haziza',