-
Notifications
You must be signed in to change notification settings - Fork 2
improve library loading times by caching #127
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,164 @@ | ||
| """Inspection caching utilities for improved library scanning performance. | ||
|
|
||
| This module provides a context-manager-based caching mechanism for Python's | ||
| inspect module functions. The cache is automatically cleared after each scan | ||
| operation, ensuring no stale data persists across library rescans. | ||
|
|
||
| Performance Impact: | ||
| - Reduces ~1.3s of overhead from repeated inspect.getmodule() calls | ||
| - Provides ~33% improvement in library loading time | ||
| - Safe for rescan() operations due to automatic cache cleanup | ||
|
|
||
| Usage: | ||
| with enable_inspection_caching(): | ||
| # perform scanning operations | ||
| # inspect.getmodule() calls will be cached | ||
| # cache is automatically cleared here | ||
| """ | ||
|
|
||
| from contextlib import contextmanager | ||
| from contextvars import ContextVar | ||
| import inspect | ||
| from typing import Any, Dict, Optional | ||
|
|
||
| # Thread-safe context variable for storing the current cache | ||
| _inspection_cache: ContextVar[Dict[Any, Any] | None] = ContextVar( | ||
| '_inspection_cache', default=None | ||
| ) | ||
|
|
||
| # Store original functions for restoration | ||
| _original_getmodule = inspect.getmodule | ||
| _original_getsourcefile = inspect.getsourcefile | ||
| _original_getsourcelines = inspect.getsourcelines | ||
|
|
||
|
|
||
| def _cached_getmodule(obj: Any, _filename: str | None = None) -> Any: | ||
| """Cached version of inspect.getmodule(). | ||
|
|
||
| Args: | ||
| obj: The object to get the module for | ||
| _filename: Optional filename parameter (passed to inspect.getmodule) | ||
|
|
||
| Returns: | ||
| The module containing the object, or None if not found | ||
| """ | ||
| cache = _inspection_cache.get() | ||
| if cache is None: | ||
| # Not in a caching context, use original function | ||
| return _original_getmodule(obj, _filename) | ||
|
|
||
| # Use object id and filename as cache key | ||
| cache_key = (id(obj), _filename) | ||
| if cache_key not in cache: | ||
| cache[cache_key] = _original_getmodule(obj, _filename) | ||
| return cache[cache_key] | ||
|
|
||
|
|
||
| def _cached_getsourcefile(obj: Any) -> str | None: | ||
| """Cached version of inspect.getsourcefile(). | ||
|
|
||
| Args: | ||
| obj: The object to get the source file for | ||
|
|
||
| Returns: | ||
| The source file path, or None if not found | ||
| """ | ||
| cache = _inspection_cache.get() | ||
| if cache is None: | ||
| # Not in a caching context, use original function | ||
| return _original_getsourcefile(obj) | ||
|
|
||
| # Use special key format to avoid collision with getmodule cache | ||
| cache_key = ('sourcefile', id(obj)) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In |
||
| if cache_key not in cache: | ||
| cache[cache_key] = _original_getsourcefile(obj) | ||
| return cache[cache_key] | ||
|
|
||
|
|
||
| def _cached_getsourcelines(obj: Any) -> tuple: | ||
| """Cached version of inspect.getsourcelines(). | ||
|
|
||
| Args: | ||
| obj: The object to get the source lines for | ||
|
|
||
| Returns: | ||
| Tuple of (lines, line_number) | ||
| """ | ||
| cache = _inspection_cache.get() | ||
| if cache is None: | ||
| # Not in a caching context, use original function | ||
| return _original_getsourcelines(obj) | ||
|
|
||
| # Use special key format to avoid collision with other caches | ||
| cache_key = ('sourcelines', id(obj)) | ||
| if cache_key not in cache: | ||
| cache[cache_key] = _original_getsourcelines(obj) | ||
| return cache[cache_key] | ||
|
Comment on lines
+78
to
+96
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function has never been called. |
||
|
|
||
|
|
||
| @contextmanager | ||
| def enable_inspection_caching(): | ||
| """Enable inspection caching for the duration of this context. | ||
|
|
||
| This context manager temporarily replaces inspect module functions with | ||
| cached versions. The cache is automatically cleared when exiting the context, | ||
| ensuring no stale data persists. | ||
|
|
||
| The caching is safe for rescan() operations because: | ||
| 1. Cache only lives during a single scan operation | ||
| 2. Each rescan() gets a fresh cache | ||
| 3. Automatic cleanup prevents stale module references | ||
|
|
||
| Example: | ||
| >>> with enable_inspection_caching(): | ||
| ... # All inspect.getmodule() calls will be cached | ||
| ... result = scan_folder_for_instances(path) | ||
| ... # Cache is cleared here | ||
|
|
||
| Yields: | ||
| Dict containing the cache for inspection (mainly for testing) | ||
| """ | ||
| # Check if we're already in a caching context to avoid nesting | ||
| if _inspection_cache.get() is not None: | ||
| # Already caching, don't create nested context | ||
| yield _inspection_cache.get() | ||
| return | ||
|
|
||
| # Create new cache for this context | ||
| cache: Dict[Any, Any] = {} | ||
| token = _inspection_cache.set(cache) | ||
|
|
||
| try: | ||
| # Monkey-patch inspect module functions with cached versions | ||
| inspect.getmodule = _cached_getmodule | ||
| inspect.getsourcefile = _cached_getsourcefile | ||
| inspect.getsourcelines = _cached_getsourcelines | ||
|
|
||
| yield cache | ||
|
|
||
| finally: | ||
| # Always restore original functions and clear cache | ||
| inspect.getmodule = _original_getmodule | ||
| inspect.getsourcefile = _original_getsourcefile | ||
| inspect.getsourcelines = _original_getsourcelines | ||
|
|
||
| _inspection_cache.reset(token) | ||
| cache.clear() | ||
|
|
||
|
|
||
| def get_cache_stats() -> Dict[str, int]: | ||
| """Get statistics about the current cache (for debugging). | ||
|
|
||
| Returns: | ||
| Dictionary with cache statistics: | ||
| - size: Number of cached items | ||
| - active: Whether caching is currently enabled | ||
| """ | ||
| cache = _inspection_cache.get() | ||
| if cache is None: | ||
| return {"size": 0, "active": False} | ||
|
|
||
| return { | ||
| "size": len(cache), | ||
| "active": True | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm confused a little.
A large percentage of calls contain a
Frameobject as an argument.And it's a little strange to use id of
Frameas cache key. But it's working here.I'm still researching