Deprecate config in LLMPipeline, add kwargs to other pipelines. (#1042)

Returned reverted changes from this PR: #1033
openvinotoolkit · Oct 23, 2024 · 1729e78 · 1729e78
1 parent 91c41cb
commit 1729e78
Show file tree

Hide file tree

Showing 9 changed files with 63 additions and 71 deletions.
diff --git a/samples/python/visual_language_chat/visual_language_chat.py b/samples/python/visual_language_chat/visual_language_chat.py
@@ -61,7 +61,7 @@ def main():
  # Cache compiled models on disk for GPU to save time on the
  # next run. It's not beneficial for CPU.
  enable_compile_cache["CACHE_DIR"] = "vlm_cache"
- pipe = openvino_genai.VLMPipeline(args.model_dir, device, enable_compile_cache)
+ pipe = openvino_genai.VLMPipeline(args.model_dir, device, **enable_compile_cache)
 
  config = openvino_genai.GenerationConfig()
  config.max_new_tokens = 100

diff --git a/src/python/py_llm_pipeline.cpp b/src/python/py_llm_pipeline.cpp
@@ -90,49 +90,25 @@ extern char generation_config_docstring[];
 
 void init_llm_pipeline(py::module_& m) {
  py::class_<LLMPipeline>(m, "LLMPipeline", "This class is used for generation with LLMs")
- .def(py::init([](
- const std::filesystem::path& models_path,
- const std::string& device,
- const std::map<std::string, py::object>& config
- ) {
- ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
- return std::make_unique<LLMPipeline>(models_path, device, pyutils::properties_to_any_map(config));
- }),
- py::arg("models_path"), "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files",
- py::arg("device"), "device on which inference will be done",
- py::arg("config") = ov::AnyMap({}), "openvino.properties map",
- R"(
- LLMPipeline class constructor.
- models_path (str): Path to the model file.
- device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
- Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline.
- )")
-
- .def(py::init([](
- const std::filesystem::path& models_path,
- const std::string& device,
- const py::kwargs& kwargs
- ) {
- ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
- return std::make_unique<LLMPipeline>(models_path, device, pyutils::kwargs_to_any_map(kwargs));
- }),
- py::arg("models_path"), "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files",
- py::arg("device"), "device on which inference will be done",
- R"(
- LLMPipeline class constructor.
- models_path (str): Path to the model file.
- device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
- Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline.
- )")
-
+ // init(model_path, tokenizer, device, config, kwargs) should be defined before init(model_path, device, config, kwargs) 
+ // to prevent tokenizer treated as kwargs argument
  .def(py::init([](
  const std::filesystem::path& models_path,
  const Tokenizer& tokenizer,
  const std::string& device,
- const std::map<std::string, py::object>& config
+ const std::map<std::string, py::object>& config,
+ const py::kwargs& kwargs
  ) {
  ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
- return std::make_unique<LLMPipeline>(models_path, tokenizer, device, pyutils::properties_to_any_map(config));
+ ov::AnyMap properties = pyutils::kwargs_to_any_map(kwargs);
+ if (config.size()) {
+ PyErr_WarnEx(PyExc_DeprecationWarning, 
+ "'config' parameters is deprecated, please use kwargs to pass config properties instead.", 
+ 1);
+ auto config_properties = pyutils::properties_to_any_map(config);
+ properties.insert(config_properties.begin(), config_properties.end());
+ }
+ return std::make_unique<LLMPipeline>(models_path, tokenizer, device, properties);
  }),
  py::arg("models_path"),
  py::arg("tokenizer"),
@@ -144,26 +120,35 @@ void init_llm_pipeline(py::module_& m) {
  tokenizer (openvino_genai.Tokenizer): tokenizer object.
  device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
  Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline.
+ kwargs: Device properties.
  )")
 
  .def(py::init([](
  const std::filesystem::path& models_path,
- const Tokenizer& tokenizer,
  const std::string& device,
+ const std::map<std::string, py::object>& config,
  const py::kwargs& kwargs
  ) {
  ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
- return std::make_unique<LLMPipeline>(models_path, tokenizer, device, pyutils::kwargs_to_any_map(kwargs));
+ ov::AnyMap properties = pyutils::kwargs_to_any_map(kwargs);
+ if (config.size()) {
+ PyErr_WarnEx(PyExc_DeprecationWarning, 
+ "'config' parameters is deprecated, please use kwargs to pass config properties instead.", 
+ 1);
+ auto config_properties = pyutils::properties_to_any_map(config);
+ properties.insert(config_properties.begin(), config_properties.end());
+ }
+ return std::make_unique<LLMPipeline>(models_path, device, properties);
  }),
- py::arg("models_path"),
- py::arg("tokenizer"),
- py::arg("device"),
+ py::arg("models_path"), "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files",
+ py::arg("device"), "device on which inference will be done",
+ py::arg("config") = ov::AnyMap({}), "openvino.properties map",
  R"(
- LLMPipeline class constructor for manualy created openvino_genai.Tokenizer.
+ LLMPipeline class constructor.
  models_path (str): Path to the model file.
- tokenizer (openvino_genai.Tokenizer): tokenizer object.
  device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
  Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline.
+ kwargs: Device properties.
  )")
 
  .def(

diff --git a/src/python/py_text2image_pipeline.cpp b/src/python/py_text2image_pipeline.cpp
@@ -148,8 +148,6 @@ ov::AnyMap text2image_kwargs_to_any_map(const py::kwargs& kwargs, bool allow_com
  "Use help(openvino_genai.Text2ImagePipeline.generate) to get list of acceptable parameters."));
  }
  }
-
-
  }
  return params;
 }

diff --git a/src/python/py_vlm_pipeline.cpp b/src/python/py_vlm_pipeline.cpp
@@ -72,53 +72,63 @@ py::object call_vlm_generate(
  return py::cast(pipe.generate(prompt, images, updated_config, streamer));
 }
 
-py::object call_vlm_generate(
- ov::genai::VLMPipeline& pipe,
- const std::string& prompt,
- const py::kwargs& kwargs
-) {
+ov::AnyMap vlm_kwargs_to_any_map(const py::kwargs& kwargs, bool allow_compile_properties=true) {
  ov::AnyMap params = {};
 
  for (const auto& item : kwargs) {
  std::string key = py::cast<std::string>(item.first);
  py::object value = py::cast<py::object>(item.second);
 
  if (key == "images") {
- params.insert({ov::genai::images(std::move(py::cast<std::vector<ov::Tensor>>(item.second)))});
+ params.insert({ov::genai::images(std::move(py::cast<std::vector<ov::Tensor>>(value)))});
  } else if (key == "image") {
- params.insert({ov::genai::image(std::move(py::cast<ov::Tensor>(item.second)))});
+ params.insert({ov::genai::image(std::move(py::cast<ov::Tensor>(value)))});
  } else if (key == "generation_config") {
- params.insert({ov::genai::generation_config(std::move(py::cast<ov::genai::GenerationConfig>(item.second)))});
+ params.insert({ov::genai::generation_config(std::move(py::cast<ov::genai::GenerationConfig>(value)))});
  } else if (key == "streamer") {
  auto py_streamer = py::cast<pyutils::PyBindStreamerVariant>(value);
  params.insert({ov::genai::streamer(std::move(pyutils::pystreamer_to_streamer(py_streamer)))});
 
- } else {
- throw(std::invalid_argument("'" + key + "' is unexpected parameter name. "
+ } 
+ else {
+ if (allow_compile_properties) {
+ // convert arbitrary objects to ov::Any
+ // not supported properties are not checked, as these properties are passed to compile(), which will throw exception in case of unsupported property
+ if (pyutils::py_object_is_any_map(value)) {
+ auto map = pyutils::py_object_to_any_map(value);
+ params.insert(map.begin(), map.end());
+ } else {
+ params[key] = pyutils::py_object_to_any(value);
+ }
+ }
+ else {
+ // generate doesn't run compile(), so only VLMPipeline specific properties are allowed
+ throw(std::invalid_argument("'" + key + "' is unexpected parameter name. "
  "Use help(openvino_genai.VLMPipeline.generate) to get list of acceptable parameters."));
+ }
  }
  }
 
- return py::cast(pipe.generate(prompt, params));
+ return params;
 }
 
 void init_vlm_pipeline(py::module_& m) {
  py::class_<ov::genai::VLMPipeline>(m, "VLMPipeline", "This class is used for generation with VLMs")
  .def(py::init([](
  const std::filesystem::path& models_path,
  const std::string& device,
- const std::map<std::string, py::object>& config
+ const py::kwargs& kwargs
  ) {
  ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
- return std::make_unique<ov::genai::VLMPipeline>(models_path, device, pyutils::properties_to_any_map(config));
+ return std::make_unique<ov::genai::VLMPipeline>(models_path, device, vlm_kwargs_to_any_map(kwargs, true));
  }),
  py::arg("models_path"), "folder with exported model files",
- py::arg("device"), "device on which inference will be done",
- py::arg("config") = ov::AnyMap({}), "openvino.properties map"
+ py::arg("device"), "device on which inference will be done"
  R"(
  VLMPipeline class constructor.
  models_path (str): Path to the folder with exported model files.
  device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
+ kwargs: Device properties
  )")
 
  .def("start_chat", &ov::genai::VLMPipeline::start_chat, py::arg("system_message") = "")
@@ -149,7 +159,7 @@ void init_vlm_pipeline(py::module_& m) {
  const std::string& prompt,
  const py::kwargs& kwargs
  ) {
- return call_vlm_generate(pipe, prompt, kwargs);
+ return py::cast(pipe.generate(prompt, vlm_kwargs_to_any_map(kwargs, false)));
  },
  py::arg("prompt"), "Input string",
  (vlm_generate_kwargs_docstring + std::string(" \n ")).c_str()

diff --git a/src/python/py_whisper_pipeline.cpp b/src/python/py_whisper_pipeline.cpp
@@ -251,15 +251,14 @@ void init_whisper_pipeline(py::module_& m) {
  py::class_<WhisperPipeline>(m, "WhisperPipeline")
  .def(py::init([](const std::filesystem::path& models_path,
  const std::string& device,
- const std::map<std::string, py::object>& config) {
+ const py::kwargs& kwargs) {
  ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
- return std::make_unique<WhisperPipeline>(models_path, device, pyutils::properties_to_any_map(config));
+ return std::make_unique<WhisperPipeline>(models_path, device, pyutils::kwargs_to_any_map(kwargs));
  }),
  py::arg("models_path"),
  "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files",
  py::arg("device"),
  "device on which inference will be done",
- py::arg("config") = ov::AnyMap({}),
  "openvino.properties map",
  R"(
  WhisperPipeline class constructor.

diff --git a/tests/python_tests/ov_genai_test_utils.py b/tests/python_tests/ov_genai_test_utils.py
@@ -197,7 +197,7 @@ def read_model(params, **tokenizer_kwargs):
  path,
  tokenizer,
  opt_model,
- ov_genai.LLMPipeline(path, 'CPU', config={'ENABLE_MMAP': False}),
+ ov_genai.LLMPipeline(path, 'CPU', **{'ENABLE_MMAP': False}),
  )
 
 
@@ -252,4 +252,4 @@ def load_pipe(configs: List[Tuple], temp_path):
 def get_continuous_batching(path):
  scheduler_config = ov_genai.SchedulerConfig()
  scheduler_config.cache_size = 1
- return ov_genai.LLMPipeline(path, ov_genai.Tokenizer(path), 'CPU', config={"scheduler_config": scheduler_config})
+ return ov_genai.LLMPipeline(path, ov_genai.Tokenizer(path), 'CPU', **{"scheduler_config": scheduler_config})
diff --git a/tests/python_tests/test_chat_generate_api.py b/tests/python_tests/test_chat_generate_api.py
@@ -118,7 +118,7 @@ def test_chat_compare_statefull_vs_text_history(model_descr, generation_config:
  # HF in chat scenario does not add special tokens, but openvino tokenizer by default is converted with add_special_tokens=True.
  # Need to regenerate openvino_tokenizer/detokenizer.
  model_id, path, tokenizer, model_opt, pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'), add_special_tokens=False)
- pipe_with_kv_cache = ov_genai.LLMPipeline(path, device, config={"ENABLE_MMAP": False})
+ pipe_with_kv_cache = ov_genai.LLMPipeline(path, device, **{"ENABLE_MMAP": False})
 
  pipe_with_kv_cache.start_chat()
  for question in quenstions:

diff --git a/tests/python_tests/test_whisper_generate_api.py b/tests/python_tests/test_whisper_generate_api.py
@@ -68,7 +68,7 @@ def read_whisper_model(params, **tokenizer_kwargs):
  path,
  opt_pipe,
  ov_genai.WhisperPipeline(
- path, 'CPU', config={'ENABLE_MMAP': False}
+ path, 'CPU', **{'ENABLE_MMAP': False}
  ),
  )
 

diff --git a/tools/llm_bench/llm_bench_utils/ov_utils.py b/tools/llm_bench/llm_bench_utils/ov_utils.py
@@ -201,7 +201,7 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
  setattr(scheduler_config, param, value)
  ov_config["scheduler_config"] = scheduler_config
  start = time.perf_counter()
- llm_pipe = openvino_genai.LLMPipeline(model_path, device.upper(), ov_config)
+ llm_pipe = openvino_genai.LLMPipeline(model_path, device.upper(), **ov_config)
  end = time.perf_counter()
  log.info(f'Pipeline initialization time: {end - start:.2f}s')