Skip to content

Commit

Permalink
fix bug
Browse files Browse the repository at this point in the history
  • Loading branch information
tmhoangt committed Oct 22, 2024
1 parent 977a067 commit 28672f7
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 5 deletions.
10 changes: 10 additions & 0 deletions copy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def upload_prompts_data(num_processes=100):
)



def download_prompts_data(num_processes=100):
"""
s3://cos-optimal-llm-pile/torchtitan/outputs/
Expand Down Expand Up @@ -56,6 +57,15 @@ def upload_prompts_data_output(num_processes=100):
num_processes=num_processes,
verbose=True,
)
base_folder = "/results/status/"
target_folder = "data/synthetic_using/Cosmopedia_Prompts/status/"
print(f"copying from {base_folder} to {target_folder}")
s3Client.upload_files(
base_folder,
prefix=target_folder,
num_processes=num_processes,
verbose=True,
)


def create_s3Client(environ):
Expand Down
18 changes: 16 additions & 2 deletions raga-gen/gen2
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ We launch N pods, each handle a portition of the same file


class StatusFile:
def __init__(self, id: str, num_workers: int, fname="/results/status.json"):
def __init__(self, id: str, num_workers: int, fname="/results/status/status.json"):
self.fname = fname
folder_path = os.path.dirname(fname)
os.makedirs(folder_path, exist_ok=True)
self.id = id # pod-id 1-based index
self.data = self.read()
# read worker-specific file
Expand Down Expand Up @@ -58,11 +60,23 @@ class StatusFile:
keep_processing = True
yield fname

def exclude_files(self):
fname = "/model/data/exclude_these_files.txt"
files = []
try:
with open(fname, "r") as f:
lines = f.readlines()
files = [x.strip() for x in lines]
except FileNotFoundError:
pass
return files

def files2process(self):
"""return an iterable of files to process"""
metadata = self.data["metadata"]
excluded_files = self.exclude_files()
for fname in metadata.keys():
if not fname in self.data["completed"]:
if (not fname in self.data["completed"]) and fname not in excluded_files:
yield fname

def update_metadata(self, mdict):
Expand Down
2 changes: 1 addition & 1 deletion run.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def run_in_pod(id=1):
for fname in fnames:
tmux = "tmux send-keys -t 0 "
if fname.endswith(".sh"):
cmd = f""" bash -c "chmod +x {POD_WORKDIR}/{fname} && cd {POD_WORKDIR} && {tmux} 'sh ./{fname} 2>&1 | tee log_output.txt' C-m;" """
cmd = f""" bash -c "chmod +x {POD_WORKDIR}/{fname} && cd {POD_WORKDIR} && {tmux} 'cd {POD_WORKDIR}; sh ./{fname} 2>&1 | tee log_output.txt' C-m;" """
elif fname.endswith(".tar"):
cmd = f""" bash -c "cd {POD_WORKDIR} && tar -xvf {POD_WORKDIR}/{fname}" """
elif fname.startswith("datalake"):
Expand Down
5 changes: 3 additions & 2 deletions vllm-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,13 @@ spec:
imagePullSecrets:
- name: us-icr-pull
containers:
- image: us.icr.io/cil15-shared-registry/cmisale/pytorchbase:2.3cu121
- image: us.icr.io/cil15-shared-registry/cmisale/pytorchbase:cu124
command:
- bash
- -c
- |
pip install vllm;
pip install vllm --no-dependencies;
pip install psutil msgspec pydantic compressed-tensors gguf cloudpickle mistral-common py-cpuinfo aiohttp openai;
apt-get update; apt-get install tmux vim -y;
pip install -U "huggingface_hub[cli]";
sleep infinity
Expand Down

0 comments on commit 28672f7

Please sign in to comment.