Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit db42436

Browse files
committedFeb 12, 2024
Create finetuningresearch python package, allowing it to be pip installed
1 parent f5ac8fa commit db42436

File tree

8 files changed

+51
-11
lines changed

8 files changed

+51
-11
lines changed
 

‎Dockerfile

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,14 +16,14 @@ RUN apt-get update \
1616
&& rm -rf /var/lib/{apt,dpkg,cache,log}
1717

1818
# Copy the current directory contents into the container at /app
19-
COPY app/requirements.txt requirements.txt
19+
COPY src/finetuningresearch/requirements.txt requirements.txt
2020

2121
# Install any needed packages specified in requirements.txt
2222
RUN pip install --upgrade pip && \
2323
pip install --no-cache-dir -r requirements.txt && \
2424
rm requirements.txt
2525

26-
COPY app /app
26+
COPY src/finetuningresearch /app
2727

2828
# Set the working directory in the container to /app
2929
WORKDIR /app

‎README.md

+18
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,21 @@ Use a tag versioning by date / user as needed. For example,
1919
docker build . -t rparundekar/fine_tune_research:20230110_01
2020
docker push rparundekar/fine_tune_research:20230110_01
2121
```
22+
23+
## Library
24+
To use this finetuning library as a python package, perform a pip install directly from github. This should install all dependencies as well.
25+
26+
```sh
27+
pip install -v git+https://github.com/shankarg87/training_research@main
28+
```
29+
30+
then use it normally in your python code.
31+
32+
```python
33+
from finetuningresearch import execute
34+
35+
config = """
36+
<config multiline yaml here>
37+
"""
38+
execute(config)
39+
```

‎pyproject.toml

+22-5
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,36 @@
11
[project]
2-
name = "Fine-Tuning Reserch"
2+
name = "finetuningresearch"
33
version = "0.1.0"
44
description = "Open source research on fine-tuning LLMs"
5-
authors = ["Rahul Parundekar <rahul@aihero.studio>", "Shankar Ganesan <gshankar.87@gmail.com>"]
6-
license = "MIT"
5+
authors = [
6+
{name = "Rahul Parundekar", email= "rahul@aihero.studio" },
7+
{name = "Shankar Ganesan", email = "gshankar.87@gmail.com" }
8+
]
79
readme = "README.md"
8-
homepage = "https://aihero.studio"
9-
repository = "https://github.com/ai-hero/fine_tune_research"
1010
classifiers = [
1111
"Programming Language :: Python",
1212
"Programming Language :: Python :: 3.9",
1313
"Programming Language :: Python :: 3.10",
1414
"Programming Language :: Python :: 3.11",
1515
"License :: OSI Approved :: MIT License",
1616
]
17+
dependencies = [
18+
"transformers>=4.35.0",
19+
"peft>=0.5.0",
20+
"bitsandbytes>=0.41.1",
21+
"accelerate>=0.25.0",
22+
"trl>=0.7.2",
23+
"pydantic-settings>=2.0.3",
24+
"scipy>=1.11.3",
25+
"PyYAML>=6.0.1",
26+
"datasets>=2.14.6",
27+
"einops>=0.7.0",
28+
"wandb>=0.15.12",
29+
"python-dotenv",
30+
"minio>=7.2.0",
31+
"fire",
32+
"types-PyYAML"
33+
]
1734

1835
[tool.pytest.ini_options]
1936
addopts = "-vvv"

‎src/finetuningresearch/__init__.py

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
"""Helps finetune models."""
2+
from .sft import execute
3+
4+
__all__ = ["execute"]
File renamed without changes.
File renamed without changes.

‎app/sft.py ‎src/finetuningresearch/sft.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from trl import SFTTrainer
1717
from wandb import Table, finish
1818

19-
from utils import DatasetMover, dump_envs, load_config, peft_module_casting_to_bf16
19+
from .utils import DatasetMover, dump_envs, load_config, peft_module_casting_to_bf16
2020

2121
CHECKPOINT_DIR = "/mnt/checkpoint"
2222
DATASET_DIR = "/mnt/dataset"
@@ -645,10 +645,11 @@ def save_model(model: Any, tokenizer: Any, config: dict[str, Any]) -> None:
645645
raise NotImplementedError("S3 support not implemented yet")
646646

647647

648-
def main() -> None:
648+
def execute(config: dict[str, Any] = {}) -> None:
649649
"""Execute the main training loop."""
650650
dump_envs()
651-
config = load_config()
651+
if not config:
652+
config = load_config()
652653

653654
# Check if "training" is in config or "batch_inference" is in config, but not both.
654655
if "training" not in config and "batch_inference" not in config:
@@ -692,4 +693,4 @@ def main() -> None:
692693

693694

694695
if __name__ == "__main__":
695-
Fire(main)
696+
Fire(execute)
File renamed without changes.

0 commit comments

Comments
 (0)
Please sign in to comment.