Skip to content

Commit

Permalink
Remove mentions of models in the READMEs and link to the documentatio…
Browse files Browse the repository at this point in the history
…n page in which they are featured. (huggingface#30420)

* REAMDEs

* REAMDEs v2
  • Loading branch information
LysandreJik authored Apr 24, 2024
1 parent d4e92f1 commit c6bba94
Show file tree
Hide file tree
Showing 14 changed files with 14 additions and 3,625 deletions.
271 changes: 1 addition & 270 deletions README.md

Large diffs are not rendered by default.

271 changes: 1 addition & 270 deletions README_de.md

Large diffs are not rendered by default.

271 changes: 1 addition & 270 deletions README_es.md

Large diffs are not rendered by default.

270 changes: 1 addition & 269 deletions README_fr.md

Large diffs are not rendered by default.

271 changes: 1 addition & 270 deletions README_hd.md

Large diffs are not rendered by default.

271 changes: 1 addition & 270 deletions README_ja.md

Large diffs are not rendered by default.

271 changes: 1 addition & 270 deletions README_ko.md

Large diffs are not rendered by default.

272 changes: 1 addition & 271 deletions README_pt-br.md

Large diffs are not rendered by default.

272 changes: 1 addition & 271 deletions README_ru.md

Large diffs are not rendered by default.

274 changes: 2 additions & 272 deletions README_te.md

Large diffs are not rendered by default.

271 changes: 1 addition & 270 deletions README_vi.md

Large diffs are not rendered by default.

271 changes: 1 addition & 270 deletions README_zh-hans.md

Large diffs are not rendered by default.

271 changes: 1 addition & 270 deletions README_zh-hant.md

Large diffs are not rendered by default.

112 changes: 0 additions & 112 deletions utils/check_copies.py
Original file line number Diff line number Diff line change
Expand Up @@ -858,7 +858,6 @@ def check_copies(overwrite: bool = False, file: str = None):
+ diff
+ "\nRun `make fix-copies` or `python utils/check_copies.py --fix_and_overwrite` to fix them."
)
check_model_list_copy(overwrite=overwrite)


def check_full_copies(overwrite: bool = False):
Expand Down Expand Up @@ -1055,68 +1054,6 @@ def _find_text_in_file(filename: str, start_prompt: str, end_prompt: str) -> Tup
return "".join(lines[start_index:end_index]), start_index, end_index, lines


def check_model_list_copy(overwrite: bool = False):
"""
Check the model lists in the README is consistent with the ones in the other READMES and also with `index.nmd`.
Args:
overwrite (`bool`, *optional*, defaults to `False`):
Whether or not to overwrite the copies when they don't match.
"""
# Fix potential doc links in the README
with open(os.path.join(REPO_PATH, "README.md"), "r", encoding="utf-8", newline="\n") as f:
readme = f.read()
new_readme = readme.replace("https://huggingface.co/transformers", "https://huggingface.co/docs/transformers")
new_readme = new_readme.replace(
"https://huggingface.co/docs/main/transformers", "https://huggingface.co/docs/transformers/main"
)
if new_readme != readme:
if overwrite:
with open(os.path.join(REPO_PATH, "README.md"), "w", encoding="utf-8", newline="\n") as f:
f.write(new_readme)
else:
raise ValueError(
"The main README contains wrong links to the documentation of Transformers. Run `make fix-copies` to "
"automatically fix them."
)

md_list = get_model_list(
filename="README.md",
start_prompt=LOCALIZED_READMES["README.md"]["start_prompt"],
end_prompt=LOCALIZED_READMES["README.md"]["end_prompt"],
)

# Build the converted Markdown.
converted_md_lists = []
for filename, value in LOCALIZED_READMES.items():
_start_prompt = value["start_prompt"]
_end_prompt = value["end_prompt"]
_format_model_list = value["format_model_list"]

localized_md_list = get_model_list(filename, _start_prompt, _end_prompt)
readmes_match, converted_md_list = convert_to_localized_md(md_list, localized_md_list, _format_model_list)

converted_md_lists.append((filename, readmes_match, converted_md_list, _start_prompt, _end_prompt))

# Compare the converted Markdowns
for converted_md_list in converted_md_lists:
filename, readmes_match, converted_md, _start_prompt, _end_prompt = converted_md_list

if filename == "README.md":
continue
if overwrite:
_, start_index, end_index, lines = _find_text_in_file(
filename=os.path.join(REPO_PATH, filename), start_prompt=_start_prompt, end_prompt=_end_prompt
)
with open(os.path.join(REPO_PATH, filename), "w", encoding="utf-8", newline="\n") as f:
f.writelines(lines[:start_index] + [converted_md] + lines[end_index:])
elif not readmes_match:
raise ValueError(
f"The model list in the README changed and the list in `{filename}` has not been updated. Run "
"`make fix-copies` to fix this."
)


# Map a model name with the name it has in the README for the check_readme check
SPECIAL_MODEL_NAMES = {
"Bert Generation": "BERT For Sequence Generation",
Expand Down Expand Up @@ -1160,60 +1097,11 @@ def check_model_list_copy(overwrite: bool = False):
)


def check_readme(overwrite: bool = False):
"""
Check if the main README contains all the models in the library or not.
Args:
overwrite (`bool`, *optional*, defaults to `False`):
Whether or not to add an entry for the missing models using `README_TEMPLATE`.
"""
info = LOCALIZED_READMES["README.md"]
models, start_index, end_index, lines = _find_text_in_file(
os.path.join(REPO_PATH, "README.md"),
info["start_prompt"],
info["end_prompt"],
)
models_in_readme = [re.search(r"\*\*\[([^\]]*)", line).groups()[0] for line in models.strip().split("\n")]

model_names_mapping = transformers_module.models.auto.configuration_auto.MODEL_NAMES_MAPPING
absents = [
(key, name)
for key, name in model_names_mapping.items()
if SPECIAL_MODEL_NAMES.get(name, name) not in models_in_readme
]
# Remove exceptions
absents = [(key, name) for key, name in absents if name not in MODELS_NOT_IN_README]
if len(absents) > 0 and not overwrite:
print(absents)
raise ValueError(
"The main README doesn't contain all models, run `make fix-copies` to fill it with the missing model(s)"
" then complete the generated entries.\nIf the model is not supposed to be in the main README, add it to"
" the list `MODELS_NOT_IN_README` in utils/check_copies.py.\nIf it has a different name in the repo than"
" in the README, map the correspondence in `SPECIAL_MODEL_NAMES` in utils/check_copies.py."
)

new_models = [README_TEMPLATE.format(model_name=name, model_type=key) for key, name in absents]

all_models = models.strip().split("\n") + new_models
all_models = sorted(all_models, key=lambda x: re.search(r"\*\*\[([^\]]*)", x).groups()[0].lower())
all_models = "\n".join(all_models) + "\n"

if all_models != models:
if overwrite:
print("Fixing the main README.")
with open(os.path.join(REPO_PATH, "README.md"), "w", encoding="utf-8", newline="\n") as f:
f.writelines(lines[:start_index] + [all_models] + lines[end_index:])
else:
raise ValueError("The main README model list is not properly sorted. Run `make fix-copies` to fix this.")


if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--file", type=str, default=None, help="A specific file to check and/or fix")
parser.add_argument("--fix_and_overwrite", action="store_true", help="Whether to fix inconsistencies.")
args = parser.parse_args()

check_readme(args.fix_and_overwrite)
check_copies(args.fix_and_overwrite, args.file)
check_full_copies(args.fix_and_overwrite)

0 comments on commit c6bba94

Please sign in to comment.