Skip to content

Commit

Permalink
convert : skip unaccessible HF repos (ggerganov#7210)
Browse files Browse the repository at this point in the history
  • Loading branch information
CrispStrobe authored May 11, 2024
1 parent 9886313 commit 3292733
Showing 1 changed file with 20 additions and 2 deletions.
22 changes: 20 additions & 2 deletions convert-hf-to-gguf-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,17 @@ def download_file_with_auth(url, token, save_path):
if tokt == TOKENIZER_TYPE.SPM:
continue

# Skip if the tokenizer folder does not exist or there are other download issues previously
if not os.path.exists(f"models/tokenizers/{name}"):
logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
continue

# create the tokenizer
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
try:
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
except OSError as e:
logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
continue # Skip to the next model if the tokenizer can't be loaded

chktok = tokenizer.encode(chktxt)
chkhsh = sha256(str(chktok).encode()).hexdigest()
Expand Down Expand Up @@ -287,8 +296,17 @@ def get_vocab_base_pre(self, tokenizer) -> str:
name = model["name"]
tokt = model["tokt"]

# Skip if the tokenizer folder does not exist or there are other download issues previously
if not os.path.exists(f"models/tokenizers/{name}"):
logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
continue

# create the tokenizer
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
try:
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
except OSError as e:
logger.error(f"Failed to load tokenizer for model {name}. Error: {e}")
continue # Skip this model and continue with the next one in the loop

with open(f"models/ggml-vocab-{name}.gguf.inp", "w", encoding="utf-8") as f:
for text in tests:
Expand Down

0 comments on commit 3292733

Please sign in to comment.