Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Unicode model filename support for Windows #5927

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions common/train.cpp
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of copy-pasting this code across several locations, let's use a function. Maybe we could put a static function in a utility header and use it everywhere?

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can expose the function through the ggml API:

    FILE * ggml_fopen(const char * fname, const char * mode);

Unfortunately, we would have to include <stdio.h> which is not great, but I guess it is the best option

Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,28 @@ struct llama_file {
size_t size;

llama_file(const char * fname, const char * mode) {
#ifdef _WIN32
// temporarily change the locale to the system default to handle Unicode file names
std::string oldLocale = std::setlocale(LC_ALL, nullptr);
std::setlocale(LC_ALL, "");

// convert multi-byte string to wide-char string
int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0);
std::vector<wchar_t> wfname(wsize);
MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize);

// determine the correct wide-character mode string
std::wstring wmode;
for(; *mode; ++mode) {
wmode += wchar_t(*mode);
}

fp = _wfopen(wfname.data(), wmode.c_str());

std::setlocale(LC_ALL, oldLocale.c_str());
#else
fp = std::fopen(fname, mode);
#endif
if (fp == NULL) {
size = 0;
} else {
Expand Down
21 changes: 21 additions & 0 deletions examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,28 @@ struct llama_file {
size_t size;

llama_file(const char * fname, const char * mode) {
#ifdef _WIN32
// temporarily change the locale to the system default to handle Unicode file names
std::string oldLocale = std::setlocale(LC_ALL, nullptr);
std::setlocale(LC_ALL, "");

// convert multi-byte string to wide-char string
int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0);
std::vector<wchar_t> wfname(wsize);
MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize);

// determine the correct wide-character mode string
std::wstring wmode;
for(; *mode; ++mode) {
wmode += wchar_t(*mode);
}

fp = _wfopen(wfname.data(), wmode.c_str());

std::setlocale(LC_ALL, oldLocale.c_str());
#else
fp = std::fopen(fname, mode);
#endif
if (fp == NULL) {
size = 0;
} else {
Expand Down
21 changes: 21 additions & 0 deletions examples/finetune/finetune.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,28 @@ struct llama_file {
size_t size;

llama_file(const char * fname, const char * mode) {
#ifdef _WIN32
// temporarily change the locale to the system default to handle Unicode file names
std::string oldLocale = std::setlocale(LC_ALL, nullptr);
std::setlocale(LC_ALL, "");

// convert multi-byte string to wide-char string
int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0);
std::vector<wchar_t> wfname(wsize);
MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize);

// determine the correct wide-character mode string
std::wstring wmode;
for(; *mode; ++mode) {
wmode += wchar_t(*mode);
}

fp = _wfopen(wfname.data(), wmode.c_str());

std::setlocale(LC_ALL, oldLocale.c_str());
#else
fp = std::fopen(fname, mode);
#endif
if (fp == NULL) {
size = 0;
} else {
Expand Down
34 changes: 32 additions & 2 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include <stdio.h>
#include <float.h>
#include <limits.h>
#include <locale.h>
#include <stdarg.h>
#include <signal.h>
#if defined(__gnu_linux__)
Expand All @@ -42,7 +43,7 @@
#endif

#if defined(_WIN32)

#include <wchar.h>
#include <windows.h>

typedef volatile LONG atomic_int;
Expand Down Expand Up @@ -20282,8 +20283,37 @@ struct gguf_context * gguf_init_empty(void) {
return ctx;
}

#ifdef _WIN32
static FILE *open_file(const char *fname) {
char* oldLocale = setlocale(LC_ALL, NULL);
if(oldLocale) {
oldLocale = strdup(oldLocale); // duplicate since setlocale returns a pointer to internal memory which might be altered by subsequent setlocale calls
}
setlocale(LC_ALL, "");
BruceMacD marked this conversation as resolved.
Show resolved Hide resolved

FILE *file = NULL;
size_t size = mbstowcs(NULL, fname, 0) + 1;
wchar_t *wfname = malloc(size * sizeof(wchar_t));
if (wfname && mbstowcs(wfname, fname, size) != (size_t)-1) {
file = _wfopen(wfname, L"rb");
}
free(wfname);

if (oldLocale) {
setlocale(LC_ALL, oldLocale);
free(oldLocale);
}

return file;
}
#else
static FILE *open_file(const char *fname) {
return fopen(fname, "rb");
}
#endif

struct gguf_context * gguf_init_from_file(const char * fname, struct gguf_init_params params) {
FILE * file = fopen(fname, "rb");
FILE *file = open_file(fname);
if (!file) {
return NULL;
}
Expand Down
21 changes: 21 additions & 0 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -988,7 +988,28 @@ struct llama_file {
size_t size;

llama_file(const char * fname, const char * mode) {
#ifdef _WIN32
// temporarily change the locale to the system default to handle Unicode file names
std::string oldLocale = std::setlocale(LC_ALL, nullptr);
std::setlocale(LC_ALL, "");

// convert multi-byte string to wide-char string
int wsize = MultiByteToWideChar(CP_UTF8, 0, fname, -1, nullptr, 0);
std::vector<wchar_t> wfname(wsize);
MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wsize);

// determine the correct wide-character mode string
std::wstring wmode;
for(; *mode; ++mode) {
wmode += wchar_t(*mode);
}

fp = _wfopen(wfname.data(), wmode.c_str());

std::setlocale(LC_ALL, oldLocale.c_str());
#else
fp = std::fopen(fname, mode);
#endif
if (fp == NULL) {
throw std::runtime_error(format("failed to open %s: %s", fname, strerror(errno)));
}
Expand Down
Loading