Skip to content

Commit

Permalink
fix(lambda-python): asset hash is non-deterministic (aws#12984)
Browse files Browse the repository at this point in the history
When a Python handler uses external dependencies, the hash calculated on the output is non-determinstic due to the fact that it includes timestamps. To resolve that, change the asset hash strategy to `SOURCE` which means that the bundle will only be re-created if one of the source files changes.

Additionally, the Python image hash, which is also included in the asset hash (to ensure that if the build image changes, the bundle is invalidated) included the absolute path for the `Dockerfile`. This caused the image hash itself to change every time the image was built on a different system. To fix this, we stage the dependency files & dockerfile into a temp directory and build the image from there.

Fixes aws#12770
Fixes aws#12684


----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
Elad Ben-Israel authored Feb 24, 2021
1 parent 3cb3104 commit 37debc0
Show file tree
Hide file tree
Showing 14 changed files with 328 additions and 189 deletions.
84 changes: 67 additions & 17 deletions packages/@aws-cdk/aws-lambda-python/lib/bundling.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,56 @@ export interface BundlingOptions {
* Output path suffix ('python' for a layer, '.' otherwise)
*/
readonly outputPathSuffix: string;

/**
* Determines how asset hash is calculated. Assets will get rebuild and
* uploaded only if their hash has changed.
*
* If asset hash is set to `SOURCE` (default), then only changes to the source
* directory will cause the asset to rebuild. This means, for example, that in
* order to pick up a new dependency version, a change must be made to the
* source tree. Ideally, this can be implemented by including a dependency
* lockfile in your source tree or using fixed dependencies.
*
* If the asset hash is set to `OUTPUT`, the hash is calculated after
* bundling. This means that any change in the output will cause the asset to
* be invalidated and uploaded. Bear in mind that `pip` adds timestamps to
* dependencies it installs, which implies that in this mode Python bundles
* will _always_ get rebuild and uploaded. Normally this is an anti-pattern
* since build
*
* @default AssetHashType.SOURCE By default, hash is calculated based on the
* contents of the source directory. If `assetHash` is also specified, the
* default is `CUSTOM`. This means that only updates to the source will cause
* the asset to rebuild.
*/
readonly assetHashType?: cdk.AssetHashType;

/**
* Specify a custom hash for this asset. If `assetHashType` is set it must
* be set to `AssetHashType.CUSTOM`. For consistency, this custom hash will
* be SHA256 hashed and encoded as hex. The resulting hash will be the asset
* hash.
*
* NOTE: the hash is used in order to identify a specific revision of the asset, and
* used for optimizing and caching deployment activities related to this asset such as
* packaging, uploading to Amazon S3, etc. If you chose to customize the hash, you will
* need to make sure it is updated every time the asset changes, or otherwise it is
* possible that some deployments will not be invalidated.
*
* @default - based on `assetHashType`
*/
readonly assetHash?: string;
}

/**
* Produce bundled Lambda asset code
*/
export function bundle(options: BundlingOptions): lambda.AssetCode {
export function bundle(options: BundlingOptions): lambda.Code {
const { entry, runtime, outputPathSuffix } = options;

const hasDeps = hasDependencies(entry);
const stagedir = cdk.FileSystem.mkdtemp('python-bundling-');
const hasDeps = stageDependencies(entry, stagedir);

const depsCommand = chain([
hasDeps ? `rsync -r ${BUNDLER_DEPENDENCIES_CACHE}/. ${cdk.AssetStaging.BUNDLING_OUTPUT_DIR}/${outputPathSuffix}` : '',
Expand All @@ -54,15 +95,19 @@ export function bundle(options: BundlingOptions): lambda.AssetCode {
? 'Dockerfile.dependencies'
: 'Dockerfile';

const image = cdk.BundlingDockerImage.fromAsset(entry, {
// copy Dockerfile to workdir
fs.copyFileSync(path.join(__dirname, dockerfile), path.join(stagedir, dockerfile));

const image = cdk.BundlingDockerImage.fromAsset(stagedir, {
buildArgs: {
IMAGE: runtime.bundlingDockerImage.image,
},
file: path.join(__dirname, dockerfile),
file: dockerfile,
});

return lambda.Code.fromAsset(entry, {
assetHashType: cdk.AssetHashType.BUNDLE,
assetHashType: options.assetHashType,
assetHash: options.assetHash,
exclude: DEPENDENCY_EXCLUDES,
bundling: {
image,
Expand All @@ -75,20 +120,25 @@ export function bundle(options: BundlingOptions): lambda.AssetCode {
* Checks to see if the `entry` directory contains a type of dependency that
* we know how to install.
*/
export function hasDependencies(entry: string): boolean {
if (fs.existsSync(path.join(entry, 'Pipfile'))) {
return true;
}

if (fs.existsSync(path.join(entry, 'poetry.lock'))) {
return true;
}

if (fs.existsSync(path.join(entry, 'requirements.txt'))) {
return true;
export function stageDependencies(entry: string, stagedir: string): boolean {
const prefixes = [
'Pipfile',
'pyproject',
'poetry',
'requirements.txt',
];

let found = false;
for (const file of fs.readdirSync(entry)) {
for (const prefix of prefixes) {
if (file.startsWith(prefix)) {
fs.copyFileSync(path.join(entry, file), path.join(stagedir, file));
found = true;
}
}
}

return false;
return found;
}

function chain(commands: string[]): string {
Expand Down
42 changes: 42 additions & 0 deletions packages/@aws-cdk/aws-lambda-python/lib/function.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import * as fs from 'fs';
import * as path from 'path';
import * as lambda from '@aws-cdk/aws-lambda';
import { AssetHashType } from '@aws-cdk/core';
import { bundle } from './bundling';

// keep this import separate from other imports to reduce chance for merge conflicts with v2-main
Expand Down Expand Up @@ -37,6 +38,45 @@ export interface PythonFunctionProps extends lambda.FunctionOptions {
* @default lambda.Runtime.PYTHON_3_7
*/
readonly runtime?: lambda.Runtime;

/**
* Determines how asset hash is calculated. Assets will get rebuild and
* uploaded only if their hash has changed.
*
* If asset hash is set to `SOURCE` (default), then only changes to the source
* directory will cause the asset to rebuild. This means, for example, that in
* order to pick up a new dependency version, a change must be made to the
* source tree. Ideally, this can be implemented by including a dependency
* lockfile in your source tree or using fixed dependencies.
*
* If the asset hash is set to `OUTPUT`, the hash is calculated after
* bundling. This means that any change in the output will cause the asset to
* be invalidated and uploaded. Bear in mind that `pip` adds timestamps to
* dependencies it installs, which implies that in this mode Python bundles
* will _always_ get rebuild and uploaded. Normally this is an anti-pattern
* since build
*
* @default AssetHashType.SOURCE By default, hash is calculated based on the
* contents of the source directory. This means that only updates to the
* source will cause the asset to rebuild.
*/
readonly assetHashType?: AssetHashType;

/**
* Specify a custom hash for this asset. If `assetHashType` is set it must
* be set to `AssetHashType.CUSTOM`. For consistency, this custom hash will
* be SHA256 hashed and encoded as hex. The resulting hash will be the asset
* hash.
*
* NOTE: the hash is used in order to identify a specific revision of the asset, and
* used for optimizing and caching deployment activities related to this asset such as
* packaging, uploading to Amazon S3, etc. If you chose to customize the hash, you will
* need to make sure it is updated every time the asset changes, or otherwise it is
* possible that some deployments will not be invalidated.
*
* @default - based on `assetHashType`
*/
readonly assetHash?: string;
}

/**
Expand Down Expand Up @@ -70,6 +110,8 @@ export class PythonFunction extends lambda.Function {
runtime,
entry,
outputPathSuffix: '.',
assetHashType: props.assetHashType,
assetHash: props.assetHash,
}),
handler: `${index.slice(0, -3)}.${handler}`,
});
Expand Down
62 changes: 17 additions & 45 deletions packages/@aws-cdk/aws-lambda-python/test/bundling.test.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import * as fs from 'fs';
import * as path from 'path';
import { Code, Runtime } from '@aws-cdk/aws-lambda';
import { hasDependencies, bundle } from '../lib/bundling';
import { FileSystem } from '@aws-cdk/core';
import { stageDependencies, bundle } from '../lib/bundling';

jest.mock('@aws-cdk/aws-lambda');
const existsSyncOriginal = fs.existsSync;
const existsSyncMock = jest.spyOn(fs, 'existsSync');

jest.mock('child_process', () => ({
spawnSync: jest.fn(() => {
Expand Down Expand Up @@ -41,9 +40,6 @@ test('Bundling a function without dependencies', () => {
],
}),
}));

// Searches for requirements.txt in entry
expect(existsSyncMock).toHaveBeenCalledWith(path.join(entry, 'requirements.txt'));
});

test('Bundling a function with requirements.txt installed', () => {
Expand All @@ -63,9 +59,6 @@ test('Bundling a function with requirements.txt installed', () => {
],
}),
}));

// Searches for requirements.txt in entry
expect(existsSyncMock).toHaveBeenCalledWith(path.join(entry, 'requirements.txt'));
});

test('Bundling Python 2.7 with requirements.txt installed', () => {
Expand All @@ -85,9 +78,6 @@ test('Bundling Python 2.7 with requirements.txt installed', () => {
],
}),
}));

// Searches for requirements.txt in entry
expect(existsSyncMock).toHaveBeenCalledWith(path.join(entry, 'requirements.txt'));
});

test('Bundling a layer with dependencies', () => {
Expand Down Expand Up @@ -128,42 +118,24 @@ test('Bundling a python code layer', () => {
}));
});

describe('Dependency detection', () => {
test('Detects pipenv', () => {
existsSyncMock.mockImplementation((p: fs.PathLike) => {
if (/Pipfile/.test(p.toString())) {
return true;
}
return existsSyncOriginal(p);
});

expect(hasDependencies('/asset-input')).toEqual(true);
});

test('Detects poetry', () => {
existsSyncMock.mockImplementation((p: fs.PathLike) => {
if (/poetry.lock/.test(p.toString())) {
return true;
}
return existsSyncOriginal(p);
});

expect(hasDependencies('/asset-input')).toEqual(true);
});

test('Detects requirements.txt', () => {
existsSyncMock.mockImplementation((p: fs.PathLike) => {
if (/requirements.txt/.test(p.toString())) {
return true;
}
return existsSyncOriginal(p);
});

expect(hasDependencies('/asset-input')).toEqual(true);
describe('Dependency detection', () => {
test.each(['Pipfile', 'poetry.lock', 'requirements.txt'])('detect dependency %p', filename => {
// GIVEN
const sourcedir = FileSystem.mkdtemp('source-');
const stagedir = FileSystem.mkdtemp('stage-');
fs.writeFileSync(path.join(sourcedir, filename), 'dummy!');

// WHEN
const found = stageDependencies(sourcedir, stagedir);

// THEN
expect(found).toBeTruthy();
expect(fs.existsSync(path.join(stagedir, filename))).toBeTruthy();
});

test('No known dependencies', () => {
existsSyncMock.mockImplementation(() => false);
expect(hasDependencies('/asset-input')).toEqual(false);
const sourcedir = FileSystem.mkdtemp('source-');
expect(stageDependencies(sourcedir, '/dummy')).toEqual(false);
});
});
84 changes: 77 additions & 7 deletions packages/@aws-cdk/aws-lambda-python/test/function.test.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,36 @@
import '@aws-cdk/assert/jest';
import { Runtime } from '@aws-cdk/aws-lambda';
import { Stack } from '@aws-cdk/core';
import { Code, Runtime } from '@aws-cdk/aws-lambda';
import { AssetHashType, AssetOptions, Stack } from '@aws-cdk/core';
import { PythonFunction } from '../lib';
import { bundle } from '../lib/bundling';

jest.mock('../lib/bundling', () => {
return {
bundle: jest.fn().mockReturnValue({
bind: () => {
return { inlineCode: 'code' };
},
bindToResource: () => { return; },
bundle: jest.fn().mockImplementation((options: AssetOptions): Code => {
const mockObjectKey = (() => {
const hashType = options.assetHashType ?? (options.assetHash ? 'custom' : 'source');
switch (hashType) {
case 'source': return 'SOURCE_MOCK';
case 'output': return 'OUTPUT_MOCK';
case 'custom': {
if (!options.assetHash) { throw new Error('no custom hash'); }
return options.assetHash;
}
}

throw new Error('unexpected asset hash type');
})();

return {
isInline: false,
bind: () => ({
s3Location: {
bucketName: 'mock-bucket-name',
objectKey: mockObjectKey,
},
}),
bindToResource: () => { return; },
};
}),
hasDependencies: jest.fn().mockReturnValue(false),
};
Expand Down Expand Up @@ -73,3 +93,53 @@ test('throws with the wrong runtime family', () => {
runtime: Runtime.NODEJS_12_X,
})).toThrow(/Only `PYTHON` runtimes are supported/);
});

test('allows specifying hash type', () => {
new PythonFunction(stack, 'source1', {
entry: 'test/lambda-handler-nodeps',
index: 'index.py',
handler: 'handler',
});

new PythonFunction(stack, 'source2', {
entry: 'test/lambda-handler-nodeps',
index: 'index.py',
handler: 'handler',
assetHashType: AssetHashType.SOURCE,
});

new PythonFunction(stack, 'output', {
entry: 'test/lambda-handler-nodeps',
index: 'index.py',
handler: 'handler',
assetHashType: AssetHashType.OUTPUT,
});

new PythonFunction(stack, 'custom', {
entry: 'test/lambda-handler-nodeps',
index: 'index.py',
handler: 'handler',
assetHash: 'MY_CUSTOM_HASH',
});

expect(stack).toHaveResource('AWS::Lambda::Function', {
Code: {
S3Bucket: 'mock-bucket-name',
S3Key: 'SOURCE_MOCK',
},
});

expect(stack).toHaveResource('AWS::Lambda::Function', {
Code: {
S3Bucket: 'mock-bucket-name',
S3Key: 'OUTPUT_MOCK',
},
});

expect(stack).toHaveResource('AWS::Lambda::Function', {
Code: {
S3Bucket: 'mock-bucket-name',
S3Key: 'MY_CUSTOM_HASH',
},
});
});
Loading

0 comments on commit 37debc0

Please sign in to comment.