Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
155 changes: 155 additions & 0 deletions docs/cli/classifier.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
import os
import sys
import glob
import re
from pathlib import Path
from collections import defaultdict

current_dir = os.path.dirname(os.path.abspath(__file__))
lib_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
sys.path.insert(0, lib_path)
Comment on lines +8 to +10
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
current_dir = os.path.dirname(os.path.abspath(__file__))
lib_path = os.path.abspath(os.path.join(current_dir, "..", ".."))
sys.path.insert(0, lib_path)
sys.path.insert(0, os.getcwd())



def classify_file_category(path):

relative_path = Path(path).relative_to(lib_path)
return_path = relative_path.as_posix()
filename = Path(*relative_path.parts[1:]).as_posix() if len(relative_path.parts) > 1 else return_path

if filename.startswith("linear"):
category = "linear"
elif filename.startswith("torch") or filename.startswith("nn"):
category = "nn"
else:
category = "general"
return category, return_path
Comment on lines +13 to +25
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After simplifying find_config_usage_in_file, we no longer need return_path here.
Let's discuss how it can be simplified further.



def fetch_option_flags(flags):
flag_list = []

for flag in flags:
flag_list.append(
{
"name": flag["name"].replace("\\", ""),
"instruction": flag["name"].split("-")[-1],
"description": flag["description"],
}
)

return flag_list


def fetch_all_files():
main_files = [
os.path.join(lib_path, "main.py"),
os.path.join(lib_path, "linear_trainer.py"),
os.path.join(lib_path, "torch_trainer.py"),
]
lib_files = glob.glob(os.path.join(lib_path, "libmultilabel/**/*.py"), recursive=True)
file_set = set(map(os.path.abspath, main_files + lib_files))
return file_set


def find_config_usages_in_file(file_path, allowed_keys):
pattern = re.compile(r"\bconfig\.([a-zA-Z_][a-zA-Z0-9_]*)")
detailed_results = {}
try:
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
except (IOError, UnicodeDecodeError):
return []

_, path = classify_file_category(file_path)

if file_path.endswith("main.py"):
for idx in range(len(lines)):
if lines[idx].startswith("def main("):
lines = lines[idx:]
main_start = idx
break
for i, line in enumerate(lines[1:]):
if line and line[0] not in (" ", "\t") and line.strip() != "":
lines = lines[:i]
break

for i, line in enumerate(lines, start=1):
matches = pattern.findall(line)
for key in matches:
if key in allowed_keys:
if key not in detailed_results:
detailed_results[key] = {"file": path, "lines": []}
if file_path.endswith("main.py"):
detailed_results[key]["lines"].append(str(i + main_start))
else:
detailed_results[key]["lines"].append(str(i))

return detailed_results
Comment on lines +54 to +87
Copy link
Contributor

@Eleven1Liu Eleven1Liu Oct 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar in this function,

Suggested change
def find_config_usages_in_file(file_path, allowed_keys):
pattern = re.compile(r"\bconfig\.([a-zA-Z_][a-zA-Z0-9_]*)")
detailed_results = {}
try:
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
except (IOError, UnicodeDecodeError):
return []
_, path = classify_file_category(file_path)
if file_path.endswith("main.py"):
for idx in range(len(lines)):
if lines[idx].startswith("def main("):
lines = lines[idx:]
main_start = idx
break
for i, line in enumerate(lines[1:]):
if line and line[0] not in (" ", "\t") and line.strip() != "":
lines = lines[:i]
break
for i, line in enumerate(lines, start=1):
matches = pattern.findall(line)
for key in matches:
if key in allowed_keys:
if key not in detailed_results:
detailed_results[key] = {"file": path, "lines": []}
if file_path.endswith("main.py"):
detailed_results[key]["lines"].append(str(i + main_start))
else:
detailed_results[key]["lines"].append(str(i))
return detailed_results
def find_config_usages_in_file(file_path, allowed_keys, category_set):
pattern = re.compile(r"\bconfig\.([a-zA-Z_][a-zA-Z0-9_]*)")
try:
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
except (IOError, UnicodeDecodeError):
return []
# get start line in main.py
if file_path.endswith("main.py"):
for idx in range(len(lines)):
if lines[idx].startswith("def main("):
lines = lines[idx:]
break
all_str = " ".join(lines)
matches = set(pattern.findall(all_str)) & allowed_keys
category = classify_file_category(file_path)[0]
for key in matches:
category_set[category].add(key)

Copy link
Contributor

@Eleven1Liu Eleven1Liu Oct 20, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBD: try-catch here can be removed, as we want to see error instantly when someone change the files (not like production settings)



def move_duplicates_together(data, keep):
all_keys = list(data.keys())
duplicates = set()

for i, key1 in enumerate(all_keys):
for key2 in all_keys[i + 1 :]:
duplicates |= data[key1] & data[key2]

data[keep] |= duplicates

for key in all_keys:
if key != keep:
data[key] -= duplicates

return data
Comment on lines +90 to +104
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

TBD: readability



def classify(raw_flags):

category_set = {"general": set(), "linear": set(), "nn": set()}
flags = fetch_option_flags(raw_flags)
allowed_keys = set(flag["instruction"] for flag in flags)
file_set = fetch_all_files()
usage_map = defaultdict(list)
collected = {}

for file_path in file_set:
detailed_results = find_config_usages_in_file(file_path, allowed_keys)
if detailed_results:
usage_map[file_path] = set(detailed_results.keys())
for k, v in detailed_results.items():
if k not in collected:
collected[k] = []
collected[k].append(v)

for path, keys in usage_map.items():
category, path = classify_file_category(path)
category_set[category] = category_set[category].union(keys)

category_set = move_duplicates_together(category_set, "general")

for flag in flags:
for k, v in category_set.items():
for i in v:
if flag["instruction"] == i:
flag["category"] = k
if "category" not in flag:
flag["category"] = "general"

result = {}
for flag in flags:
if flag["category"] not in result:
result[flag["category"]] = []

result[flag["category"]].append(
{"name": flag["name"].replace("--", r"\-\-"), "description": flag["description"]}
)

result["details"] = []
for k, v in collected.items():
result["details"].append({"name": k, "file": v[0]["file"], "location": ", ".join(v[0]["lines"])})
if len(v) > 1:
for i in v[1:]:
result["details"].append({"name": "", "file": i["file"], "location": ", ".join(i["lines"])})

return result
Comment on lines +107 to +155
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about simplify the data structure (e.g., unused detailed line numbers) after the spec is decided?

Suggested change
def classify(raw_flags):
category_set = {"general": set(), "linear": set(), "nn": set()}
flags = fetch_option_flags(raw_flags)
allowed_keys = set(flag["instruction"] for flag in flags)
file_set = fetch_all_files()
usage_map = defaultdict(list)
collected = {}
for file_path in file_set:
detailed_results = find_config_usages_in_file(file_path, allowed_keys)
if detailed_results:
usage_map[file_path] = set(detailed_results.keys())
for k, v in detailed_results.items():
if k not in collected:
collected[k] = []
collected[k].append(v)
for path, keys in usage_map.items():
category, path = classify_file_category(path)
category_set[category] = category_set[category].union(keys)
category_set = move_duplicates_together(category_set, "general")
for flag in flags:
for k, v in category_set.items():
for i in v:
if flag["instruction"] == i:
flag["category"] = k
if "category" not in flag:
flag["category"] = "general"
result = {}
for flag in flags:
if flag["category"] not in result:
result[flag["category"]] = []
result[flag["category"]].append(
{"name": flag["name"].replace("--", r"\-\-"), "description": flag["description"]}
)
result["details"] = []
for k, v in collected.items():
result["details"].append({"name": k, "file": v[0]["file"], "location": ", ".join(v[0]["lines"])})
if len(v) > 1:
for i in v[1:]:
result["details"].append({"name": "", "file": i["file"], "location": ", ".join(i["lines"])})
return result
def classify(raw_flags):
category_set = {"general": set(), "linear": set(), "nn": set()}
flags = fetch_option_flags(raw_flags)
allowed_keys = set(flag["instruction"] for flag in flags)
file_set = fetch_all_files()
for file_path in file_set:
find_config_usages_in_file(file_path, allowed_keys, category_set)
category_set = move_duplicates_together(category_set)
result = defaultdict(list)
for flag in raw_flags:
for category, keys in category_set.items():
for key in keys:
if key in flag["name"]:
result[category].append(flag)
return result

53 changes: 40 additions & 13 deletions docs/cli/genflags.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
import os

sys.path.insert(1, os.path.join(sys.path[0], "..", ".."))

import main

from classifier import classify


class FakeParser(dict):
def __init__(self):
Expand All @@ -29,21 +32,45 @@ def add_argument(
parser.add_argument("-c", "--config", help="Path to configuration file")
main.add_all_arguments(parser)

classified = classify(parser.flags)


def width_title(key, title):
return max(map(lambda f: len(f[key]), classified[title]))

def width(key):
return max(map(lambda f: len(f[key]), parser.flags))

def print_table(title, flags, intro):
print()
print(intro)
print()

wn = width("name")
wd = width("description")
wn = width_title("name", title)
wd = width_title("description", title)

print(
"""..
Do not modify this file. This file is generated by genflags.py.\n"""
print("=" * wn, "=" * wd)
print("Name".ljust(wn), "Description".ljust(wd))
print("=" * wn, "=" * wd)
for flag in flags:
print(flag["name"].ljust(wn), flag["description"].ljust(wd))
print("=" * wn, "=" * wd)
print()


print_table(
"general",
classified["general"],
intro="**General options**:\n\
Common configurations shared across both linear and neural network trainers.",
)
print_table(
"linear",
classified["linear"],
intro="**Linear options**:\n\
Configurations specific to linear trainer.",
)
print_table(
"nn",
classified["nn"],
intro="**Neural network options**:\n\
Configurations specific to torch (neural networks) trainer.",
)
print("=" * wn, "=" * wd)
print("Name".ljust(wn), "Description".ljust(wd))
print("=" * wn, "=" * wd)
for flag in parser.flags:
print(flag["name"].ljust(wn), flag["description"].ljust(wd))
print("=" * wn, "=" * wd)
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
"examples_dirs": "./examples", # path to your example scripts
"gallery_dirs": "auto_examples", # path to where to save gallery generated output
"plot_gallery": False,
"write_computation_times": False,
}

# bibtex files
Expand Down
Loading
Loading