-
Notifications
You must be signed in to change notification settings - Fork 26.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Repo checks: check documented methods exist #32320
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,7 +31,6 @@ | |
It has no auto-fix mode. | ||
""" | ||
|
||
import inspect | ||
import os | ||
import re | ||
import sys | ||
|
@@ -421,22 +420,15 @@ def get_model_modules() -> List[str]: | |
"modeling_auto", | ||
"modeling_encoder_decoder", | ||
"modeling_marian", | ||
"modeling_mmbt", | ||
"modeling_outputs", | ||
"modeling_retribert", | ||
"modeling_utils", | ||
"modeling_flax_auto", | ||
"modeling_flax_encoder_decoder", | ||
"modeling_flax_utils", | ||
"modeling_speech_encoder_decoder", | ||
"modeling_flax_speech_encoder_decoder", | ||
"modeling_flax_vision_encoder_decoder", | ||
"modeling_timm_backbone", | ||
"modeling_tf_auto", | ||
"modeling_tf_encoder_decoder", | ||
"modeling_tf_outputs", | ||
"modeling_tf_pytorch_utils", | ||
"modeling_tf_utils", | ||
"modeling_tf_vision_encoder_decoder", | ||
"modeling_vision_encoder_decoder", | ||
] | ||
|
@@ -450,8 +442,7 @@ def get_model_modules() -> List[str]: | |
for submodule in dir(model_module): | ||
if submodule.startswith("modeling") and submodule not in _ignore_modules: | ||
modeling_module = getattr(model_module, submodule) | ||
if inspect.ismodule(modeling_module): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. redundant check in our current folder structure |
||
modules.append(modeling_module) | ||
modules.append(modeling_module) | ||
return modules | ||
|
||
|
||
|
@@ -913,19 +904,26 @@ def find_all_documented_objects() -> List[str]: | |
|
||
Returns: | ||
`List[str]`: The list of all object names being documented. | ||
`Dict[str, List[str]]`: A dictionary mapping the object name (full import path, e.g. | ||
`integrations.PeftAdapterMixin`) to its documented methods | ||
""" | ||
documented_obj = [] | ||
for doc_file in Path(PATH_TO_DOC).glob("**/*.rst"): | ||
with open(doc_file, "r", encoding="utf-8", newline="\n") as f: | ||
content = f.read() | ||
raw_doc_objs = re.findall(r"(?:autoclass|autofunction):: transformers.(\S+)\s+", content) | ||
documented_obj += [obj.split(".")[-1] for obj in raw_doc_objs] | ||
Comment on lines
-918
to
-922
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't have rst doc files for quite a long time |
||
documented_methods_map = {} | ||
for doc_file in Path(PATH_TO_DOC).glob("**/*.md"): | ||
with open(doc_file, "r", encoding="utf-8", newline="\n") as f: | ||
content = f.read() | ||
raw_doc_objs = re.findall(r"\[\[autodoc\]\]\s+(\S+)\s+", content) | ||
documented_obj += [obj.split(".")[-1] for obj in raw_doc_objs] | ||
return documented_obj | ||
|
||
for obj in raw_doc_objs: | ||
obj_public_methods = re.findall(rf"\[\[autodoc\]\] {obj}((\n\s+-.*)+)", content) | ||
# Some objects have no methods documented | ||
if len(obj_public_methods) == 0: | ||
continue | ||
else: | ||
documented_methods_map[obj] = re.findall(r"(?<=-\s).*", obj_public_methods[0][0]) | ||
|
||
return documented_obj, documented_methods_map | ||
|
||
|
||
# One good reason for not being documented is to be deprecated. Put in this list deprecated objects. | ||
|
@@ -1063,7 +1061,7 @@ def ignore_undocumented(name: str) -> bool: | |
|
||
def check_all_objects_are_documented(): | ||
"""Check all models are properly documented.""" | ||
documented_objs = find_all_documented_objects() | ||
documented_objs, documented_methods_map = find_all_documented_objects() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: I intend to use this new mapping in |
||
modules = transformers._modules | ||
objects = [c for c in dir(transformers) if c not in modules and not c.startswith("_")] | ||
undocumented_objs = [c for c in objects if c not in documented_objs and not ignore_undocumented(c)] | ||
|
@@ -1072,8 +1070,41 @@ def check_all_objects_are_documented(): | |
"The following objects are in the public init so should be documented:\n - " | ||
+ "\n - ".join(undocumented_objs) | ||
) | ||
check_docstrings_are_in_md() | ||
check_model_type_doc_match() | ||
check_public_method_exists(documented_methods_map) | ||
|
||
|
||
def check_public_method_exists(documented_methods_map): | ||
"""Check that all explicitly documented public methods are defined in the corresponding class.""" | ||
failures = [] | ||
for obj, methods in documented_methods_map.items(): | ||
# Let's ensure there is no repetition | ||
if len(set(methods)) != len(methods): | ||
failures.append(f"Error in the documentation of {obj}: there are repeated documented methods.") | ||
|
||
# Navigates into the object, given the full import path | ||
nested_path = obj.split(".") | ||
submodule = transformers | ||
if len(nested_path) > 1: | ||
nested_submodules = nested_path[:-1] | ||
for submodule_name in nested_submodules: | ||
if submodule_name == "transformers": | ||
continue | ||
submodule = getattr(submodule, submodule_name) | ||
class_name = nested_path[-1] | ||
obj_class = getattr(submodule, class_name) | ||
# Checks that all explicitly documented methods are defined in the class | ||
for method in methods: | ||
if method == "all": # Special keyword to document all public methods | ||
continue | ||
if not hasattr(obj_class, method): | ||
failures.append( | ||
"The following public method is explicitly documented but not defined in the corresponding " | ||
f"class. class: {obj}, method: {method}" | ||
) | ||
|
||
if len(failures) > 0: | ||
raise Exception("\n".join(failures)) | ||
|
||
|
||
def check_model_type_doc_match(): | ||
|
@@ -1103,50 +1134,6 @@ def check_model_type_doc_match(): | |
) | ||
|
||
|
||
# Re pattern to catch :obj:`xx`, :class:`xx`, :func:`xx` or :meth:`xx`. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We don't have rst doc files for quite a long time |
||
_re_rst_special_words = re.compile(r":(?:obj|func|class|meth):`([^`]+)`") | ||
# Re pattern to catch things between double backquotes. | ||
_re_double_backquotes = re.compile(r"(^|[^`])``([^`]+)``([^`]|$)") | ||
# Re pattern to catch example introduction. | ||
_re_rst_example = re.compile(r"^\s*Example.*::\s*$", flags=re.MULTILINE) | ||
|
||
|
||
def is_rst_docstring(docstring: str) -> True: | ||
""" | ||
Returns `True` if `docstring` is written in rst. | ||
""" | ||
if _re_rst_special_words.search(docstring) is not None: | ||
return True | ||
if _re_double_backquotes.search(docstring) is not None: | ||
return True | ||
if _re_rst_example.search(docstring) is not None: | ||
return True | ||
return False | ||
|
||
|
||
def check_docstrings_are_in_md(): | ||
"""Check all docstrings are written in md and nor rst.""" | ||
files_with_rst = [] | ||
for file in Path(PATH_TO_TRANSFORMERS).glob("**/*.py"): | ||
with open(file, encoding="utf-8") as f: | ||
code = f.read() | ||
docstrings = code.split('"""') | ||
|
||
for idx, docstring in enumerate(docstrings): | ||
if idx % 2 == 0 or not is_rst_docstring(docstring): | ||
continue | ||
files_with_rst.append(file) | ||
break | ||
|
||
if len(files_with_rst) > 0: | ||
raise ValueError( | ||
"The following files have docstrings written in rst:\n" | ||
+ "\n".join([f"- {f}" for f in files_with_rst]) | ||
+ "\nTo fix this run `doc-builder convert path_to_py_file` after installing `doc-builder`\n" | ||
"(`pip install git+https://github.com/huggingface/doc-builder`)" | ||
) | ||
|
||
|
||
def check_deprecated_constant_is_up_to_date(): | ||
""" | ||
Check if the constant `DEPRECATED_MODELS` in `models/auto/configuration_auto.py` is up to date. | ||
|
@@ -1177,27 +1164,28 @@ def check_deprecated_constant_is_up_to_date(): | |
|
||
|
||
def check_repo_quality(): | ||
"""Check all models are properly tested and documented.""" | ||
print("Checking all models are included.") | ||
"""Check all models are tested and documented.""" | ||
print("Repository-wide checks:") | ||
print(" - checking all models are included.") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
check_model_list() | ||
print("Checking all models are public.") | ||
print(" - checking all models are public.") | ||
check_models_are_in_init() | ||
print("Checking all models are properly tested.") | ||
print(" - checking all models have tests.") | ||
check_all_decorator_order() | ||
check_all_models_are_tested() | ||
print("Checking all objects are properly documented.") | ||
print(" - checking all objects have documentation.") | ||
check_all_objects_are_documented() | ||
print("Checking all models are in at least one auto class.") | ||
print(" - checking all models are in at least one auto class.") | ||
check_all_models_are_auto_configured() | ||
print("Checking all names in auto name mappings are defined.") | ||
print(" - checking all names in auto name mappings are defined.") | ||
check_all_auto_object_names_being_defined() | ||
print("Checking all keys in auto name mappings are defined in `CONFIG_MAPPING_NAMES`.") | ||
print(" - checking all keys in auto name mappings are defined in `CONFIG_MAPPING_NAMES`.") | ||
check_all_auto_mapping_names_in_config_mapping_names() | ||
print("Checking all auto mappings could be imported.") | ||
print(" - checking all auto mappings could be imported.") | ||
check_all_auto_mappings_importable() | ||
print("Checking all objects are equally (across frameworks) in the main __init__.") | ||
print(" - checking all objects are equally (across frameworks) in the main __init__.") | ||
check_objects_being_equally_in_main_init() | ||
print("Checking the DEPRECATED_MODELS constant is up to date.") | ||
print(" - checking the DEPRECATED_MODELS constant is up to date.") | ||
check_deprecated_constant_is_up_to_date() | ||
|
||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we've changed the model structure since 4 years ago, when this list was first created:
transformers.models
exclusively contains folders with models.As such, we don't need to manually ignore these files.