Skip to content

Conversation

@codeflash-ai
Copy link

@codeflash-ai codeflash-ai bot commented Nov 20, 2025

📄 72% (0.72x) speedup for extract_google_drive_file_id in skyvern/forge/sdk/api/files.py

⏱️ Runtime : 1.76 milliseconds 1.02 milliseconds (best of 227 runs)

📝 Explanation and details

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 4551 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests and Runtime
import re

# imports
import pytest  # used for our unit tests
from skyvern.forge.sdk.api.files import extract_google_drive_file_id

# unit tests

# -----------------------
# 1. Basic Test Cases
# -----------------------

def test_standard_file_url():
    # Standard Google Drive file URL
    url = "https://drive.google.com/file/d/1a2B3cD4eF5gHIjKlm6n7_OPQ8r9stUvW/view?usp=sharing"
    expected_id = "1a2B3cD4eF5gHIjKlm6n7_OPQ8r9stUvW"
    codeflash_output = extract_google_drive_file_id(url) # 2.22μs -> 1.27μs (74.5% faster)

def test_file_url_with_no_query():
    # Google Drive file URL with no query parameters
    url = "https://drive.google.com/file/d/abcdef1234567890/view"
    expected_id = "abcdef1234567890"
    codeflash_output = extract_google_drive_file_id(url) # 2.05μs -> 1.17μs (75.1% faster)

def test_file_url_with_extra_path():
    # Google Drive file URL with extra path after /view
    url = "https://drive.google.com/file/d/abc123/view/more"
    expected_id = "abc123"
    codeflash_output = extract_google_drive_file_id(url) # 1.98μs -> 1.16μs (70.7% faster)

def test_file_url_with_dash_and_underscore():
    # File ID contains both dashes and underscores
    url = "https://drive.google.com/file/d/abc_DEF-123/view"
    expected_id = "abc_DEF-123"
    codeflash_output = extract_google_drive_file_id(url) # 1.95μs -> 1.19μs (64.2% faster)

def test_file_url_with_short_id():
    # File ID is very short
    url = "https://drive.google.com/file/d/a/view"
    expected_id = "a"
    codeflash_output = extract_google_drive_file_id(url) # 1.97μs -> 1.15μs (71.6% faster)

# -----------------------
# 2. Edge Test Cases
# -----------------------

def test_url_with_no_file_id():
    # URL that looks like a Google Drive file but has no ID
    url = "https://drive.google.com/file/d//view"
    codeflash_output = extract_google_drive_file_id(url) # 1.69μs -> 902ns (87.5% faster)

def test_url_with_similar_but_invalid_pattern():
    # URL with similar pattern but not matching exactly
    url = "https://drive.google.com/files/d/abc/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.36μs -> 626ns (117% faster)

def test_url_with_multiple_file_d_patterns():
    # URL with multiple /file/d/ patterns (should get the first one)
    url = "https://drive.google.com/file/d/firstID/view/file/d/secondID/view"
    expected_id = "firstID"
    codeflash_output = extract_google_drive_file_id(url) # 1.96μs -> 1.23μs (58.8% faster)

def test_url_with_non_alphanumeric_id():
    # File ID with illegal characters (should not match)
    url = "https://drive.google.com/file/d/abc$%/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.87μs -> 1.16μs (61.4% faster)

def test_url_with_no_file_d_pattern():
    # Completely unrelated URL
    url = "https://example.com/some/path"
    codeflash_output = extract_google_drive_file_id(url) # 1.22μs -> 502ns (143% faster)

def test_empty_string():
    # Empty string as input
    url = ""
    codeflash_output = extract_google_drive_file_id(url) # 1.25μs -> 470ns (166% faster)

def test_none_input():
    # None as input should raise TypeError
    with pytest.raises(TypeError):
        extract_google_drive_file_id(None) # 2.27μs -> 1.45μs (56.7% faster)

def test_url_with_id_at_end():
    # /file/d/{id} at the end of the URL, no /view
    url = "https://drive.google.com/file/d/xyz123"
    expected_id = "xyz123"
    codeflash_output = extract_google_drive_file_id(url) # 2.04μs -> 1.26μs (61.9% faster)

def test_url_with_uppercase_drive():
    # URL with uppercase DRIVE (should not match, as regex is case-sensitive)
    url = "https://DRIVE.google.com/file/d/ABC123/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.93μs -> 1.19μs (61.9% faster)

def test_url_with_fragment():
    # URL with fragment after /view
    url = "https://drive.google.com/file/d/abc123/view#fragment"
    expected_id = "abc123"
    codeflash_output = extract_google_drive_file_id(url) # 1.94μs -> 1.14μs (70.3% faster)

def test_url_with_spaces():
    # URL with spaces (should not match)
    url = "https://drive.google.com/file/d/abc 123/view"
    expected_id = "abc"
    codeflash_output = extract_google_drive_file_id(url) # 1.83μs -> 1.13μs (61.9% faster)

def test_url_with_trailing_slash():
    # URL with trailing slash after file ID
    url = "https://drive.google.com/file/d/abc123/"
    expected_id = "abc123"
    codeflash_output = extract_google_drive_file_id(url) # 1.89μs -> 1.11μs (69.2% faster)

def test_url_with_subdomain():
    # URL with subdomain (should still match)
    url = "https://subdomain.drive.google.com/file/d/xyz987/view"
    expected_id = "xyz987"
    codeflash_output = extract_google_drive_file_id(url) # 1.86μs -> 1.11μs (67.7% faster)

def test_url_with_port():
    # URL with port in host
    url = "https://drive.google.com:443/file/d/portID/view"
    expected_id = "portID"
    codeflash_output = extract_google_drive_file_id(url) # 1.72μs -> 1.06μs (61.9% faster)

def test_url_with_multiple_query_params():
    # URL with multiple query parameters
    url = "https://drive.google.com/file/d/abcDEF123/view?usp=sharing&foo=bar"
    expected_id = "abcDEF123"
    codeflash_output = extract_google_drive_file_id(url) # 1.73μs -> 1.15μs (50.1% faster)

def test_url_with_id_containing_only_dash_underscore():
    # File ID is only dashes and underscores
    url = "https://drive.google.com/file/d/-_/view"
    expected_id = "-_"
    codeflash_output = extract_google_drive_file_id(url) # 1.83μs -> 1.09μs (68.4% faster)

def test_url_with_id_too_long():
    # File ID is very long (over 100 chars)
    file_id = "a" * 120
    url = f"https://drive.google.com/file/d/{file_id}/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.95μs -> 1.24μs (56.9% faster)

def test_url_with_id_too_short():
    # File ID is empty (should not match)
    url = "https://drive.google.com/file/d//view"
    codeflash_output = extract_google_drive_file_id(url) # 1.61μs -> 866ns (85.9% faster)

# -----------------------
# 3. Large Scale Test Cases
# -----------------------

def test_many_valid_urls():
    # Test extraction from a large number of valid URLs
    base_url = "https://drive.google.com/file/d/{}/view"
    for i in range(1000):
        file_id = f"id_{i:04d}"
        url = base_url.format(file_id)
        codeflash_output = extract_google_drive_file_id(url) # 444μs -> 286μs (55.2% faster)

def test_many_invalid_urls():
    # Test extraction from a large number of invalid URLs
    base_url = "https://drive.google.com/files/d/{}/view"
    for i in range(1000):
        file_id = f"id_{i:04d}"
        url = base_url.format(file_id)
        codeflash_output = extract_google_drive_file_id(url) # 303μs -> 152μs (98.9% faster)

def test_mixed_valid_and_invalid_urls():
    # Test a mix of valid and invalid URLs for robustness
    valid_base = "https://drive.google.com/file/d/{}/view"
    invalid_base = "https://drive.google.com/files/d/{}/view"
    for i in range(500):
        valid_id = f"valid_{i}"
        invalid_id = f"invalid_{i}"
        valid_url = valid_base.format(valid_id)
        invalid_url = invalid_base.format(invalid_id)
        codeflash_output = extract_google_drive_file_id(valid_url) # 232μs -> 150μs (55.0% faster)
        codeflash_output = extract_google_drive_file_id(invalid_url)

def test_performance_on_large_input():
    # Test performance on a very large URL string (should not hang)
    file_id = "A" * 256
    url = "https://drive.google.com/file/d/" + file_id + "/view?" + "x" * 1000
    codeflash_output = extract_google_drive_file_id(url) # 2.25μs -> 1.37μs (64.2% faster)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
import random
# function to test
# (copied as provided)
import re
import string

# imports
import pytest  # used for our unit tests
from skyvern.forge.sdk.api.files import extract_google_drive_file_id

# unit tests

# ----------------
# BASIC TEST CASES
# ----------------

def test_basic_standard_url():
    # Standard Google Drive file URL
    url = "https://drive.google.com/file/d/1A2B3C4D5E6F7G8H9I0J/view?usp=sharing"
    expected = "1A2B3C4D5E6F7G8H9I0J"
    codeflash_output = extract_google_drive_file_id(url) # 1.95μs -> 1.22μs (60.7% faster)

def test_basic_url_with_no_query():
    # Standard URL, no query string
    url = "https://drive.google.com/file/d/abc123DEF456ghi789/view"
    expected = "abc123DEF456ghi789"
    codeflash_output = extract_google_drive_file_id(url) # 1.69μs -> 1.02μs (65.9% faster)

def test_basic_url_with_extra_path():
    # URL with extra path after file id
    url = "https://drive.google.com/file/d/xyz-ABC_1234567/view/some/extra"
    expected = "xyz-ABC_1234567"
    codeflash_output = extract_google_drive_file_id(url) # 1.75μs -> 1.08μs (61.0% faster)

def test_basic_url_with_dash_and_underscore():
    # File ID with dashes and underscores
    url = "https://drive.google.com/file/d/abc_DEF-123/view"
    expected = "abc_DEF-123"
    codeflash_output = extract_google_drive_file_id(url) # 1.76μs -> 1.06μs (66.0% faster)

def test_basic_url_with_subdomain():
    # URL with www subdomain
    url = "https://www.drive.google.com/file/d/1A2B3C4D5E6F7G8H9I0J/view"
    expected = "1A2B3C4D5E6F7G8H9I0J"
    codeflash_output = extract_google_drive_file_id(url) # 1.73μs -> 1.05μs (65.0% faster)

# ----------------
# EDGE TEST CASES
# ----------------

def test_edge_empty_string():
    # Empty input string
    url = ""
    codeflash_output = extract_google_drive_file_id(url) # 1.18μs -> 488ns (142% faster)

def test_edge_no_file_id():
    # URL with no file id
    url = "https://drive.google.com/file/d//view"
    codeflash_output = extract_google_drive_file_id(url) # 1.62μs -> 895ns (81.1% faster)

def test_edge_no_file_d_segment():
    # URL without the /file/d/ segment
    url = "https://drive.google.com/files/abc123/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.26μs -> 593ns (113% faster)

def test_edge_file_id_at_end_of_url():
    # File ID at the very end of the URL (no trailing slash)
    url = "https://drive.google.com/file/d/abc123DEF456"
    expected = "abc123DEF456"
    codeflash_output = extract_google_drive_file_id(url) # 1.70μs -> 1.09μs (55.5% faster)

def test_edge_file_id_with_special_chars():
    # File ID with invalid characters (should not match)
    url = "https://drive.google.com/file/d/abc$%^&/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.70μs -> 1.06μs (59.3% faster)

def test_edge_file_id_too_short():
    # File ID is only one character
    url = "https://drive.google.com/file/d/a/view"
    expected = "a"
    codeflash_output = extract_google_drive_file_id(url) # 1.72μs -> 1.04μs (65.5% faster)

def test_edge_file_id_with_mixed_case():
    # File ID with mixed upper and lower case
    url = "https://drive.google.com/file/d/AbC123xYz/view"
    expected = "AbC123xYz"
    codeflash_output = extract_google_drive_file_id(url) # 1.71μs -> 1.03μs (65.5% faster)

def test_edge_url_with_multiple_file_d_segments():
    # URL with multiple /file/d/ segments, should take the first
    url = "https://drive.google.com/file/d/firstID/view/file/d/secondID/view"
    expected = "firstID"
    codeflash_output = extract_google_drive_file_id(url) # 1.70μs -> 1.03μs (64.4% faster)

def test_edge_url_with_port_number():
    # URL with port number
    url = "https://drive.google.com:443/file/d/PORT123/view"
    expected = "PORT123"
    codeflash_output = extract_google_drive_file_id(url) # 1.67μs -> 1.02μs (63.4% faster)

def test_edge_url_with_fragment():
    # URL with fragment after file id
    url = "https://drive.google.com/file/d/fragID/view#section"
    expected = "fragID"
    codeflash_output = extract_google_drive_file_id(url) # 1.71μs -> 1.04μs (64.0% faster)

def test_edge_url_with_query_before_file_id():
    # URL with query params before file id (should not match)
    url = "https://drive.google.com/view?foo=bar/file/d/ID123/view"
    expected = "ID123"
    codeflash_output = extract_google_drive_file_id(url) # 1.71μs -> 1.07μs (59.0% faster)

def test_edge_file_id_with_max_length():
    # File ID with maximum plausible length (100 chars)
    file_id = ''.join(random.choices(string.ascii_letters + string.digits + "-_", k=100))
    url = f"https://drive.google.com/file/d/{file_id}/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.76μs -> 1.11μs (58.3% faster)

def test_edge_url_with_spaces():
    # URL containing spaces (should not match)
    url = "https://drive.google.com/file/d/abc 123/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.63μs -> 1.06μs (53.2% faster)

def test_edge_url_with_encoded_chars():
    # URL with percent-encoded file id (should not match encoded chars)
    url = "https://drive.google.com/file/d/abc%20123/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.60μs -> 1.00μs (59.7% faster)

def test_edge_url_with_non_ascii():
    # File ID with non-ascii characters (should only match ascii)
    url = "https://drive.google.com/file/d/abcé123/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.78μs -> 1.16μs (52.8% faster)

def test_edge_url_with_trailing_slash_only():
    # URL ends with /file/d/
    url = "https://drive.google.com/file/d/"
    codeflash_output = extract_google_drive_file_id(url) # 1.44μs -> 836ns (72.4% faster)

def test_edge_url_with_multiple_possible_matches():
    # Multiple /file/d/ segments, only first should be extracted
    url = "https://drive.google.com/file/d/firstID/view/other/file/d/secondID/view"
    expected = "firstID"
    codeflash_output = extract_google_drive_file_id(url) # 1.70μs -> 1.03μs (63.8% faster)

def test_edge_url_with_unrelated_domain():
    # Not a Google Drive domain, but matches pattern
    url = "https://example.com/file/d/abc123/view"
    expected = "abc123"
    codeflash_output = extract_google_drive_file_id(url) # 1.68μs -> 1.02μs (64.3% faster)

def test_edge_url_with_uppercase_path():
    # Path is uppercase (should not match)
    url = "https://drive.google.com/FILE/D/ABC123/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.27μs -> 590ns (116% faster)

def test_edge_url_with_malformed_path():
    # Malformed path, missing 'd'
    url = "https://drive.google.com/file/x/abc123/view"
    codeflash_output = extract_google_drive_file_id(url) # 1.28μs -> 588ns (118% faster)

# -------------------------
# LARGE SCALE TEST CASES
# -------------------------

def test_large_scale_many_valid_urls():
    # Test extracting from a large number of valid URLs
    ids = []
    urls = []
    for _ in range(500):
        file_id = ''.join(random.choices(string.ascii_letters + string.digits + "-_", k=30))
        ids.append(file_id)
        urls.append(f"https://drive.google.com/file/d/{file_id}/view")
    # All should extract correctly
    for url, file_id in zip(urls, ids):
        codeflash_output = extract_google_drive_file_id(url) # 235μs -> 154μs (52.0% faster)

def test_large_scale_many_invalid_urls():
    # Test extracting from a large number of invalid URLs
    urls = [
        f"https://drive.google.com/file/x/{i}/view" for i in range(500)
    ] + [
        f"https://drive.google.com/files/{i}/view" for i in range(500)
    ]
    for url in urls:
        codeflash_output = extract_google_drive_file_id(url) # 293μs -> 147μs (99.3% faster)

def test_large_scale_long_url_prefix_and_suffix():
    # Very long URLs before and after the file id
    file_id = ''.join(random.choices(string.ascii_letters + string.digits + "-_", k=50))
    prefix = "https://drive.google.com/" + "a" * 400
    suffix = "/view?" + "b" * 400
    url = f"{prefix}/file/d/{file_id}{suffix}"
    codeflash_output = extract_google_drive_file_id(url) # 2.69μs -> 1.76μs (53.0% faster)

def test_large_scale_file_id_with_all_valid_chars():
    # File ID containing all valid characters
    chars = string.ascii_letters + string.digits + "-_"
    file_id = ''.join(random.sample(chars, len(chars)))
    url = f"https://drive.google.com/file/d/{file_id}/view"
    codeflash_output = extract_google_drive_file_id(url) # 2.13μs -> 1.34μs (58.8% faster)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

To edit these changes git checkout codeflash/optimize-extract_google_drive_file_id-mi7bto2m and push.

Codeflash

@codeflash-ai codeflash-ai bot requested a review from mashraf-222 November 20, 2025 11:07
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Nov 20, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant