@@ -1312,6 +1312,40 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13121312            else  { throw  std::invalid_argument (" invalid value"  ); }
13131313        }
13141314    ).set_env (" LLAMA_ARG_NUMA"  ));
1315+     add_opt (common_arg (
1316+         {" -dev"  , " --device"  }, " <dev1,dev2,..>"  ,
1317+         " comma-separated list of devices to use for offloading\n " 
1318+         " use --list-devices to see a list of available devices"  ,
1319+         [](common_params & params, const  std::string & value) {
1320+             auto  devices = string_split<std::string>(value, ' ,'  );
1321+             if  (devices.empty ()) {
1322+                 throw  std::invalid_argument (" no devices specified"  );
1323+             }
1324+             for  (const  auto  & device : devices) {
1325+                 auto  * dev = ggml_backend_dev_by_name (device.c_str ());
1326+                 if  (!dev || ggml_backend_dev_type (dev) != GGML_BACKEND_DEVICE_TYPE_GPU) {
1327+                     throw  std::invalid_argument (string_format (" invalid device: %s"  , device.c_str ()));
1328+                 }
1329+                 params.devices .push_back (dev);
1330+             }
1331+             params.devices .push_back (nullptr );
1332+         }
1333+     ).set_env (" LLAMA_ARG_DEVICES"  ));
1334+     add_opt (common_arg (
1335+         {" --list-devices"  },
1336+         " print list of available devices and exit"  ,
1337+         [](common_params &) {
1338+             for  (size_t  i = 0 ; i < ggml_backend_dev_count (); ++i) {
1339+                 auto  * dev = ggml_backend_dev_get (i);
1340+                 if  (ggml_backend_dev_type (dev) == GGML_BACKEND_DEVICE_TYPE_GPU) {
1341+                     size_t  free, total;
1342+                     ggml_backend_dev_memory (dev, &free, &total);
1343+                     printf (" %s: %s (%zu MiB, %zu MiB free)\n "  , ggml_backend_dev_name (dev), ggml_backend_dev_description (dev), total / 1024  / 1024 , free / 1024  / 1024 );
1344+                 }
1345+             }
1346+             exit (0 );
1347+         }
1348+     ));
13151349    add_opt (common_arg (
13161350        {" -ngl"  , " --gpu-layers"  , " --n-gpu-layers"  }, " N"  ,
13171351        " number of layers to store in VRAM"  ,
@@ -1336,10 +1370,6 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
13361370            } else  if  (arg_next == " layer"  ) {
13371371                params.split_mode  = LLAMA_SPLIT_MODE_LAYER;
13381372            } else  if  (arg_next == " row"  ) {
1339- #ifdef  GGML_USE_SYCL
1340-                 fprintf (stderr, " warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\n Exit!\n "  );
1341-                 exit (1 );
1342- #endif  //  GGML_USE_SYCL
13431373                params.split_mode  = LLAMA_SPLIT_MODE_ROW;
13441374            } else  {
13451375                throw  std::invalid_argument (" invalid value"  );
@@ -2042,6 +2072,25 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
20422072            params.speculative .n_ctx  = value;
20432073        }
20442074    ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
2075+     add_opt (common_arg (
2076+         {" -devd"  , " --device-draft"  }, " <dev1,dev2,..>"  ,
2077+         " comma-separated list of devices to use for offloading the draft model\n " 
2078+         " use --list-devices to see a list of available devices"  ,
2079+         [](common_params & params, const  std::string & value) {
2080+             auto  devices = string_split<std::string>(value, ' ,'  );
2081+             if  (devices.empty ()) {
2082+                 throw  std::invalid_argument (" no devices specified"  );
2083+             }
2084+             for  (const  auto  & device : devices) {
2085+                 auto  * dev = ggml_backend_dev_by_name (device.c_str ());
2086+                 if  (!dev || ggml_backend_dev_type (dev) != GGML_BACKEND_DEVICE_TYPE_GPU) {
2087+                     throw  std::invalid_argument (string_format (" invalid device: %s"  , device.c_str ()));
2088+                 }
2089+                 params.speculative .devices .push_back (dev);
2090+             }
2091+             params.speculative .devices .push_back (nullptr );
2092+         }
2093+     ).set_examples ({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}));
20452094    add_opt (common_arg (
20462095        {" -ngld"  , " --gpu-layers-draft"  , " --n-gpu-layers-draft"  }, " N"  ,
20472096        " number of layers to store in VRAM for the draft model"  ,
0 commit comments