Skip to content

Commit

Permalink
Merge pull request #42 from jvkersch/header-stream
Browse files Browse the repository at this point in the history
Support for storing header and data in separate files
  • Loading branch information
silverdaz authored May 25, 2024
2 parents 10a05c1 + fc6d3a0 commit effb89c
Show file tree
Hide file tree
Showing 9 changed files with 115 additions and 20 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/bats.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,14 @@ jobs:

runs-on: ubuntu-latest
strategy:
max-parallel: 4
max-parallel: 6
matrix:
python-version: [3.7, 3.8, 3.9]
python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', '3.12']

steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v1
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

# Crypt4GH Encryption Utility

`crypt4gh`is a Python tool to encrypt, decrypt or re-encrypt files, according to the [GA4GH encryption file format](https://www.ga4gh.org/news/crypt4gh-a-secure-method-for-sharing-human-genetic-data/).
`crypt4gh` is a Python tool to encrypt, decrypt or re-encrypt files, according to the [GA4GH encryption file format](https://www.ga4gh.org/news/crypt4gh-a-secure-method-for-sharing-human-genetic-data/).


## Installation
Expand Down
2 changes: 1 addition & 1 deletion crypt4gh/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@


__title__ = 'GA4GH cryptographic utilities'
__version__ = '1.6' # VERSION in header is 1 (as 4 bytes little endian)
__version__ = '1.7' # VERSION in header is 1 (as 4 bytes little endian)
__author__ = 'Frédéric Haziza'
__author_email__ = 'frederic.haziza@crg.eu'
__license__ = 'Apache License 2.0'
Expand Down
29 changes: 20 additions & 9 deletions crypt4gh/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@
Utility for the cryptographic GA4GH standard, reading from stdin and outputting to stdout.
Usage:
{PROG} [-hv] [--log <file>] encrypt [--sk <path>] --recipient_pk <path> [--recipient_pk <path>]... [--range <start-end>]
{PROG} [-hv] [--log <file>] encrypt [--sk <path>] --recipient_pk <path> [--recipient_pk <path>]... [--range <start-end>] [--header <path>]
{PROG} [-hv] [--log <file>] decrypt [--sk <path>] [--sender_pk <path>] [--range <start-end>]
{PROG} [-hv] [--log <file>] rearrange [--sk <path>] --range <start-end>
{PROG} [-hv] [--log <file>] reencrypt [--sk <path>] --recipient_pk <path> [--recipient_pk <path>]... [--trim]
{PROG} [-hv] [--log <file>] reencrypt [--sk <path>] --recipient_pk <path> [--recipient_pk <path>]... [--trim] [--header-only]
Options:
-h, --help Prints this help and exit
Expand All @@ -41,7 +41,8 @@
--sender_pk <path> Peer's Curve25519-based Public key to verify provenance (akin to signature)
--range <start-end> Byte-range either as <start-end> or just <start> (Start included, End excluded)
-t, --trim Keep only header packets that you can decrypt
--header <path> Where to write the header (default: stdout)
--header-only Whether the input data consists only of a header (default: false)
Environment variables:
C4GH_LOG If defined, it will be used as the default logger
Expand Down Expand Up @@ -146,11 +147,20 @@ def build_recipients():
if not recipient_keys:
raise ValueError("No Recipients' Public Key found")

lib.encrypt(recipient_keys,
sys.stdin.buffer,
sys.stdout.buffer,
offset = range_start,
span = range_span)
header = args["--header"]

try:
if header:
header = open(header, 'wb') # let it raise exception
lib.encrypt(recipient_keys,
sys.stdin.buffer,
sys.stdout.buffer,
headerfile = header,
offset = range_start,
span = range_span)
finally:
if header:
header.close()


def decrypt(args):
Expand Down Expand Up @@ -212,4 +222,5 @@ def build_recipients():
recipient_keys,
sys.stdin.buffer,
sys.stdout.buffer,
trim=args['--trim'])
trim=args['--trim'],
header_only=args['--header-only'])
13 changes: 10 additions & 3 deletions crypt4gh/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ def _encrypt_segment(data, process, key):


@close_on_broken_pipe
def encrypt(keys, infile, outfile, offset=0, span=None):
def encrypt(keys, infile, outfile, headerfile=None, offset=0, span=None):
'''Encrypt infile into outfile, using the list of keys.
Expand All @@ -57,6 +57,8 @@ def encrypt(keys, infile, outfile, offset=0, span=None):
'''

LOG.info('Encrypting the file')

headerfile = headerfile or outfile

# Forward to start position
LOG.debug(" Start Coordinate: %s", offset)
Expand Down Expand Up @@ -91,7 +93,7 @@ def encrypt(keys, infile, outfile, offset=0, span=None):
header_bytes = header.serialize(header_packets)

LOG.debug('header length: %d', len(header_bytes))
outfile.write(header_bytes)
headerfile.write(header_bytes)

# ...and cue music
LOG.debug("Streaming content")
Expand Down Expand Up @@ -405,7 +407,7 @@ def decrypt(keys, infile, outfile, sender_pubkey=None, offset=0, span=None):


@close_on_broken_pipe
def reencrypt(keys, recipient_keys, infile, outfile, chunk_size=4096, trim=False):
def reencrypt(keys, recipient_keys, infile, outfile, chunk_size=4096, trim=False, header_only=False):
'''Extract header packets from infile and generate another one to outfile.
The encrypted data section is only copied from infile to outfile.'''

Expand All @@ -414,6 +416,11 @@ def reencrypt(keys, recipient_keys, infile, outfile, chunk_size=4096, trim=False
packets = header.reencrypt(header_packets, keys, recipient_keys, trim=trim)
outfile.write(header.serialize(packets))

# If header-only reencryption, we are done.
if header_only:
LOG.info(f'Header-only reencryption Successful')
return

# Stream the remainder
LOG.info(f'Streaming the remainder of the file')
while True:
Expand Down
2 changes: 1 addition & 1 deletion docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@
today_fmt = '%B %d, %Y'

def setup(app):
app.add_stylesheet('custom.css')
app.add_css_file('custom.css')

# -- Other stuff ----------------------------------------------------------
htmlhelp_basename = 'crypt4gh'
Expand Down
23 changes: 23 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,26 @@ Any user can generate a keypair with:
$ crypt4gh-keygen --sk user.sec --pk user.pub
The private key will be encrypted with a passphrase. The user is prompted at the terminal for that passphrase.

Storing the encrypted header separately
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The encrypted header can be stored separately from the encrypted data. This is useful, for example, when sharing the encrypted message with many recipients. In this case, only the header needs to be re-encrypted (for a specific recipient) while the encrypted data can stay the same.

To store the encrypted header in a separate file ``header.dat``, use the flag ``--header``:

.. code-block:: console
$ crypt4gh encrypt --sk alice.sec --recipient_pk bob.pub --header header.bob.c4gh < M > M.data.c4gh
Bob can then decrypt the message by concatenating the header and the data, and decrypting the whole file:

.. code-block:: console
$ cat header.bob.c4gh M.data.c4gh | crypt4gh decrypt --sk bob.sec > M
To re-encrypt the message for another user Eve, with public key ``eve.pub``, Alice can run the ``crypt4gh reencrypt`` command:

.. code-block:: console
$ crypt4gh reencrypt --sk alice.sec --recipient_pk eve.pub < header.alice.c4gh > header.eve.c4gh
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
_readme = (Path(__file__).parent / "README.md").read_text()

setup(name='crypt4gh',
version='1.6',
version='1.7',
url='https://www.github.com/EGA-archive/crypt4gh',
license='Apache License 2.0',
author='Frédéric Haziza',
Expand Down
54 changes: 54 additions & 0 deletions tests/header_stream.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/usr/bin/env bats

load _common/helpers

function setup() {

# Defining the TMP dir
TESTFILES=${BATS_TEST_FILENAME}.d
mkdir -p "$TESTFILES"

}

function teardown() {
rm -rf ${TESTFILES}
}

@test "Bob sends the testfile secretly (with separate header and data) to Alice" {

TESTFILE=${BATS_TEST_DIRNAME}/_common/testfile.abcd

# Bob encrypts the testfile for Alice, storing the header separately
export C4GH_PASSPHRASE=${BOB_PASSPHRASE}
crypt4gh encrypt --sk ${BOB_SECKEY} --recipient_pk ${ALICE_PUBKEY} --header $TESTFILES/header.alice.c4gh < $TESTFILE > $TESTFILES/data.c4gh

# Alice concatenates the header and the data and decrypts the combined result
export C4GH_PASSPHRASE=${ALICE_PASSPHRASE}
cat $TESTFILES/header.alice.c4gh $TESTFILES/data.c4gh | crypt4gh decrypt --sk ${ALICE_SECKEY} > $TESTFILES/message.received

run diff $TESTFILE $TESTFILES/message.received
[ "$status" -eq 0 ]

unset C4GH_PASSPHRASE
}

@test "Bob encrypts the testfile for himself (with separate header) and reencrypts the header for Alice" {

TESTFILE=${BATS_TEST_DIRNAME}/_common/testfile.abcd

# Bob encrypts the testfile for himself
export C4GH_PASSPHRASE=${BOB_PASSPHRASE}
crypt4gh encrypt --sk ${BOB_SECKEY} --recipient_pk ${BOB_PUBKEY} --header $TESTFILES/header.bob.c4gh < $TESTFILE > $TESTFILES/data.c4gh

# Bob changes the header for Alice
crypt4gh reencrypt --sk ${BOB_SECKEY} --recipient_pk ${ALICE_PUBKEY} --header-only < $TESTFILES/header.bob.c4gh > $TESTFILES/header.alice.c4gh

# Alice concatenates the header and data and decrypts the results
export C4GH_PASSPHRASE=${ALICE_PASSPHRASE}
cat $TESTFILES/header.alice.c4gh $TESTFILES/data.c4gh | crypt4gh decrypt --sk ${ALICE_SECKEY} > $TESTFILES/message.received

run diff $TESTFILE $TESTFILES/message.received
[ "$status" -eq 0 ]

unset C4GH_PASSPHRASE
}

0 comments on commit effb89c

Please sign in to comment.