|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | +# |
| 4 | +# Copyright (c) nexB Inc. and others. All rights reserved. |
| 5 | +# ScanCode is a trademark of nexB Inc. |
| 6 | +# SPDX-License-Identifier: Apache-2.0 |
| 7 | +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. |
| 8 | +# See https://github.com/aboutcode-org/python-inspector for support or download. |
| 9 | +# See https://aboutcode.org for more information about nexB OSS projects. |
| 10 | +# |
| 11 | + |
| 12 | +import pytest |
| 13 | + |
| 14 | +from python_inspector.package_data import get_file_match_key |
| 15 | + |
| 16 | + |
| 17 | +class TestGetFileMatchKey: |
| 18 | + """Tests for get_file_match_key function""" |
| 19 | + |
| 20 | + def test_extracts_filename_from_simple_url(self): |
| 21 | + """Test extracting filename from a simple URL without hash""" |
| 22 | + url = "https://files.pythonhosted.org/packages/numpy-1.26.4-py3-none-any.whl" |
| 23 | + result = get_file_match_key(url) |
| 24 | + assert result == ("numpy-1.26.4-py3-none-any.whl", None) |
| 25 | + |
| 26 | + def test_extracts_filename_and_hash_from_url_with_fragment(self): |
| 27 | + """Test extracting filename and SHA256 from URL with hash fragment""" |
| 28 | + url = ( |
| 29 | + "https://files.pythonhosted.org/packages/numpy-1.26.4-py3-none-any.whl#sha256=" |
| 30 | + + "a" * 64 |
| 31 | + ) |
| 32 | + result = get_file_match_key(url) |
| 33 | + assert result[0] == "numpy-1.26.4-py3-none-any.whl" |
| 34 | + assert result[1] == "a" * 64 |
| 35 | + assert len(result[1]) == 64 # SHA256 is 64 hex characters |
| 36 | + |
| 37 | + def test_uses_provided_sha256_over_url_fragment(self): |
| 38 | + """Test that provided SHA256 takes precedence over URL fragment""" |
| 39 | + url = "https://files.pythonhosted.org/packages/file.whl#sha256=abc123" + "0" * 58 |
| 40 | + provided_hash = "def456" + "0" * 58 |
| 41 | + result = get_file_match_key(url, sha256=provided_hash) |
| 42 | + assert result == ("file.whl", provided_hash) |
| 43 | + |
| 44 | + def test_handles_pypi_org_style_urls(self): |
| 45 | + """Test PyPI.org style URLs with hash paths""" |
| 46 | + url = "https://files.pythonhosted.org/packages/c1/fa/abc123/package-1.0-py3-none-any.whl" |
| 47 | + result = get_file_match_key(url) |
| 48 | + assert result == ("package-1.0-py3-none-any.whl", None) |
| 49 | + |
| 50 | + def test_handles_artifactory_simple_style_urls(self): |
| 51 | + """Test Artifactory /simple endpoint style URLs""" |
| 52 | + url = "https://artifactory.example.com/simple/../packages/packages/c1/fa/package-1.0.whl" |
| 53 | + result = get_file_match_key(url) |
| 54 | + assert result == ("package-1.0.whl", None) |
| 55 | + |
| 56 | + def test_handles_artifactory_json_style_urls(self): |
| 57 | + """Test Artifactory JSON API style URLs""" |
| 58 | + url = "https://artifactory.example.com/pypi/c1/fa/package-1.0.whl" |
| 59 | + result = get_file_match_key(url) |
| 60 | + assert result == ("package-1.0.whl", None) |
| 61 | + |
| 62 | + def test_handles_relative_urls_resolved(self): |
| 63 | + """Test relative URLs (after resolution)""" |
| 64 | + url = "https://artifactory.example.com/../../packages/file.tar.gz" |
| 65 | + result = get_file_match_key(url) |
| 66 | + assert result == ("file.tar.gz", None) |
| 67 | + |
| 68 | + def test_extracts_tar_gz_filenames(self): |
| 69 | + """Test extracting .tar.gz filenames""" |
| 70 | + url = "https://pypi.org/packages/source/n/numpy/numpy-1.26.4.tar.gz" |
| 71 | + result = get_file_match_key(url) |
| 72 | + assert result == ("numpy-1.26.4.tar.gz", None) |
| 73 | + |
| 74 | + def test_handles_empty_fragment(self): |
| 75 | + """Test URL with empty fragment""" |
| 76 | + url = "https://example.com/package.whl#" |
| 77 | + result = get_file_match_key(url) |
| 78 | + assert result == ("package.whl", None) |
| 79 | + |
| 80 | + def test_ignores_non_sha256_fragments(self): |
| 81 | + """Test that non-SHA256 fragments are ignored""" |
| 82 | + url = "https://example.com/package.whl#md5=abc123" |
| 83 | + result = get_file_match_key(url) |
| 84 | + assert result == ("package.whl", None) |
| 85 | + |
| 86 | + def test_handles_sha256_fragment_with_uppercase(self): |
| 87 | + """Test SHA256 extraction is case-insensitive for hex""" |
| 88 | + url = "https://example.com/file.whl#sha256=ABCDEF" + "0" * 58 |
| 89 | + result = get_file_match_key(url) |
| 90 | + # Note: The regex uses [a-f0-9] which is lowercase only |
| 91 | + # This test documents current behavior - uppercase won't match |
| 92 | + assert result == ("file.whl", None) |
| 93 | + |
| 94 | + def test_rejects_invalid_sha256_length(self): |
| 95 | + """Test that short hashes are not extracted""" |
| 96 | + url = "https://example.com/file.whl#sha256=abc123" # Only 6 chars |
| 97 | + result = get_file_match_key(url) |
| 98 | + assert result == ("file.whl", None) |
| 99 | + |
| 100 | + def test_handles_complex_wheel_filename(self): |
| 101 | + """Test complex wheel filename with platform tags""" |
| 102 | + url = "https://example.com/packages/numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl" |
| 103 | + result = get_file_match_key(url) |
| 104 | + assert result == ("numpy-1.26.4-cp311-cp311-macosx_10_9_x86_64.whl", None) |
| 105 | + |
| 106 | + def test_handles_url_with_query_parameters(self): |
| 107 | + """Test URL with query parameters (though unusual for packages)""" |
| 108 | + url = "https://example.com/package.whl?token=xyz#sha256=" + "a" * 64 |
| 109 | + result = get_file_match_key(url) |
| 110 | + assert result[0] == "package.whl" |
| 111 | + assert result[1] == "a" * 64 |
0 commit comments