Skip to content

Commit

Permalink
Deprecate config in LLMPipeline, add kwargs to other pipelines. (#1042)
Browse files Browse the repository at this point in the history
Returned reverted changes from this PR:
#1033
  • Loading branch information
popovaan authored Oct 23, 2024
1 parent 91c41cb commit 1729e78
Show file tree
Hide file tree
Showing 9 changed files with 63 additions and 71 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def main():
# Cache compiled models on disk for GPU to save time on the
# next run. It's not beneficial for CPU.
enable_compile_cache["CACHE_DIR"] = "vlm_cache"
pipe = openvino_genai.VLMPipeline(args.model_dir, device, enable_compile_cache)
pipe = openvino_genai.VLMPipeline(args.model_dir, device, **enable_compile_cache)

config = openvino_genai.GenerationConfig()
config.max_new_tokens = 100
Expand Down
73 changes: 29 additions & 44 deletions src/python/py_llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,49 +90,25 @@ extern char generation_config_docstring[];

void init_llm_pipeline(py::module_& m) {
py::class_<LLMPipeline>(m, "LLMPipeline", "This class is used for generation with LLMs")
.def(py::init([](
const std::filesystem::path& models_path,
const std::string& device,
const std::map<std::string, py::object>& config
) {
ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
return std::make_unique<LLMPipeline>(models_path, device, pyutils::properties_to_any_map(config));
}),
py::arg("models_path"), "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files",
py::arg("device"), "device on which inference will be done",
py::arg("config") = ov::AnyMap({}), "openvino.properties map",
R"(
LLMPipeline class constructor.
models_path (str): Path to the model file.
device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline.
)")

.def(py::init([](
const std::filesystem::path& models_path,
const std::string& device,
const py::kwargs& kwargs
) {
ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
return std::make_unique<LLMPipeline>(models_path, device, pyutils::kwargs_to_any_map(kwargs));
}),
py::arg("models_path"), "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files",
py::arg("device"), "device on which inference will be done",
R"(
LLMPipeline class constructor.
models_path (str): Path to the model file.
device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline.
)")

// init(model_path, tokenizer, device, config, kwargs) should be defined before init(model_path, device, config, kwargs)
// to prevent tokenizer treated as kwargs argument
.def(py::init([](
const std::filesystem::path& models_path,
const Tokenizer& tokenizer,
const std::string& device,
const std::map<std::string, py::object>& config
const std::map<std::string, py::object>& config,
const py::kwargs& kwargs
) {
ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
return std::make_unique<LLMPipeline>(models_path, tokenizer, device, pyutils::properties_to_any_map(config));
ov::AnyMap properties = pyutils::kwargs_to_any_map(kwargs);
if (config.size()) {
PyErr_WarnEx(PyExc_DeprecationWarning,
"'config' parameters is deprecated, please use kwargs to pass config properties instead.",
1);
auto config_properties = pyutils::properties_to_any_map(config);
properties.insert(config_properties.begin(), config_properties.end());
}
return std::make_unique<LLMPipeline>(models_path, tokenizer, device, properties);
}),
py::arg("models_path"),
py::arg("tokenizer"),
Expand All @@ -144,26 +120,35 @@ void init_llm_pipeline(py::module_& m) {
tokenizer (openvino_genai.Tokenizer): tokenizer object.
device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline.
kwargs: Device properties.
)")

.def(py::init([](
const std::filesystem::path& models_path,
const Tokenizer& tokenizer,
const std::string& device,
const std::map<std::string, py::object>& config,
const py::kwargs& kwargs
) {
ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
return std::make_unique<LLMPipeline>(models_path, tokenizer, device, pyutils::kwargs_to_any_map(kwargs));
ov::AnyMap properties = pyutils::kwargs_to_any_map(kwargs);
if (config.size()) {
PyErr_WarnEx(PyExc_DeprecationWarning,
"'config' parameters is deprecated, please use kwargs to pass config properties instead.",
1);
auto config_properties = pyutils::properties_to_any_map(config);
properties.insert(config_properties.begin(), config_properties.end());
}
return std::make_unique<LLMPipeline>(models_path, device, properties);
}),
py::arg("models_path"),
py::arg("tokenizer"),
py::arg("device"),
py::arg("models_path"), "folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files",
py::arg("device"), "device on which inference will be done",
py::arg("config") = ov::AnyMap({}), "openvino.properties map",
R"(
LLMPipeline class constructor for manualy created openvino_genai.Tokenizer.
LLMPipeline class constructor.
models_path (str): Path to the model file.
tokenizer (openvino_genai.Tokenizer): tokenizer object.
device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
Add {"scheduler_config": ov_genai.SchedulerConfig} to config properties to create continuous batching pipeline.
kwargs: Device properties.
)")

.def(
Expand Down
2 changes: 0 additions & 2 deletions src/python/py_text2image_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,6 @@ ov::AnyMap text2image_kwargs_to_any_map(const py::kwargs& kwargs, bool allow_com
"Use help(openvino_genai.Text2ImagePipeline.generate) to get list of acceptable parameters."));
}
}


}
return params;
}
Expand Down
42 changes: 26 additions & 16 deletions src/python/py_vlm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,53 +72,63 @@ py::object call_vlm_generate(
return py::cast(pipe.generate(prompt, images, updated_config, streamer));
}

py::object call_vlm_generate(
ov::genai::VLMPipeline& pipe,
const std::string& prompt,
const py::kwargs& kwargs
) {
ov::AnyMap vlm_kwargs_to_any_map(const py::kwargs& kwargs, bool allow_compile_properties=true) {
ov::AnyMap params = {};

for (const auto& item : kwargs) {
std::string key = py::cast<std::string>(item.first);
py::object value = py::cast<py::object>(item.second);

if (key == "images") {
params.insert({ov::genai::images(std::move(py::cast<std::vector<ov::Tensor>>(item.second)))});
params.insert({ov::genai::images(std::move(py::cast<std::vector<ov::Tensor>>(value)))});
} else if (key == "image") {
params.insert({ov::genai::image(std::move(py::cast<ov::Tensor>(item.second)))});
params.insert({ov::genai::image(std::move(py::cast<ov::Tensor>(value)))});
} else if (key == "generation_config") {
params.insert({ov::genai::generation_config(std::move(py::cast<ov::genai::GenerationConfig>(item.second)))});
params.insert({ov::genai::generation_config(std::move(py::cast<ov::genai::GenerationConfig>(value)))});
} else if (key == "streamer") {
auto py_streamer = py::cast<pyutils::PyBindStreamerVariant>(value);
params.insert({ov::genai::streamer(std::move(pyutils::pystreamer_to_streamer(py_streamer)))});

} else {
throw(std::invalid_argument("'" + key + "' is unexpected parameter name. "
}
else {
if (allow_compile_properties) {
// convert arbitrary objects to ov::Any
// not supported properties are not checked, as these properties are passed to compile(), which will throw exception in case of unsupported property
if (pyutils::py_object_is_any_map(value)) {
auto map = pyutils::py_object_to_any_map(value);
params.insert(map.begin(), map.end());
} else {
params[key] = pyutils::py_object_to_any(value);
}
}
else {
// generate doesn't run compile(), so only VLMPipeline specific properties are allowed
throw(std::invalid_argument("'" + key + "' is unexpected parameter name. "
"Use help(openvino_genai.VLMPipeline.generate) to get list of acceptable parameters."));
}
}
}

return py::cast(pipe.generate(prompt, params));
return params;
}

void init_vlm_pipeline(py::module_& m) {
py::class_<ov::genai::VLMPipeline>(m, "VLMPipeline", "This class is used for generation with VLMs")
.def(py::init([](
const std::filesystem::path& models_path,
const std::string& device,
const std::map<std::string, py::object>& config
const py::kwargs& kwargs
) {
ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
return std::make_unique<ov::genai::VLMPipeline>(models_path, device, pyutils::properties_to_any_map(config));
return std::make_unique<ov::genai::VLMPipeline>(models_path, device, vlm_kwargs_to_any_map(kwargs, true));
}),
py::arg("models_path"), "folder with exported model files",
py::arg("device"), "device on which inference will be done",
py::arg("config") = ov::AnyMap({}), "openvino.properties map"
py::arg("device"), "device on which inference will be done"
R"(
VLMPipeline class constructor.
models_path (str): Path to the folder with exported model files.
device (str): Device to run the model on (e.g., CPU, GPU). Default is 'CPU'.
kwargs: Device properties
)")

.def("start_chat", &ov::genai::VLMPipeline::start_chat, py::arg("system_message") = "")
Expand Down Expand Up @@ -149,7 +159,7 @@ void init_vlm_pipeline(py::module_& m) {
const std::string& prompt,
const py::kwargs& kwargs
) {
return call_vlm_generate(pipe, prompt, kwargs);
return py::cast(pipe.generate(prompt, vlm_kwargs_to_any_map(kwargs, false)));
},
py::arg("prompt"), "Input string",
(vlm_generate_kwargs_docstring + std::string(" \n ")).c_str()
Expand Down
5 changes: 2 additions & 3 deletions src/python/py_whisper_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -251,15 +251,14 @@ void init_whisper_pipeline(py::module_& m) {
py::class_<WhisperPipeline>(m, "WhisperPipeline")
.def(py::init([](const std::filesystem::path& models_path,
const std::string& device,
const std::map<std::string, py::object>& config) {
const py::kwargs& kwargs) {
ScopedVar env_manager(pyutils::ov_tokenizers_module_path());
return std::make_unique<WhisperPipeline>(models_path, device, pyutils::properties_to_any_map(config));
return std::make_unique<WhisperPipeline>(models_path, device, pyutils::kwargs_to_any_map(kwargs));
}),
py::arg("models_path"),
"folder with openvino_model.xml and openvino_tokenizer[detokenizer].xml files",
py::arg("device"),
"device on which inference will be done",
py::arg("config") = ov::AnyMap({}),
"openvino.properties map",
R"(
WhisperPipeline class constructor.
Expand Down
4 changes: 2 additions & 2 deletions tests/python_tests/ov_genai_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ def read_model(params, **tokenizer_kwargs):
path,
tokenizer,
opt_model,
ov_genai.LLMPipeline(path, 'CPU', config={'ENABLE_MMAP': False}),
ov_genai.LLMPipeline(path, 'CPU', **{'ENABLE_MMAP': False}),
)


Expand Down Expand Up @@ -252,4 +252,4 @@ def load_pipe(configs: List[Tuple], temp_path):
def get_continuous_batching(path):
scheduler_config = ov_genai.SchedulerConfig()
scheduler_config.cache_size = 1
return ov_genai.LLMPipeline(path, ov_genai.Tokenizer(path), 'CPU', config={"scheduler_config": scheduler_config})
return ov_genai.LLMPipeline(path, ov_genai.Tokenizer(path), 'CPU', **{"scheduler_config": scheduler_config})
2 changes: 1 addition & 1 deletion tests/python_tests/test_chat_generate_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def test_chat_compare_statefull_vs_text_history(model_descr, generation_config:
# HF in chat scenario does not add special tokens, but openvino tokenizer by default is converted with add_special_tokens=True.
# Need to regenerate openvino_tokenizer/detokenizer.
model_id, path, tokenizer, model_opt, pipe = read_model((model_descr[0], model_descr[1] / '_test_chat'), add_special_tokens=False)
pipe_with_kv_cache = ov_genai.LLMPipeline(path, device, config={"ENABLE_MMAP": False})
pipe_with_kv_cache = ov_genai.LLMPipeline(path, device, **{"ENABLE_MMAP": False})

pipe_with_kv_cache.start_chat()
for question in quenstions:
Expand Down
2 changes: 1 addition & 1 deletion tests/python_tests/test_whisper_generate_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def read_whisper_model(params, **tokenizer_kwargs):
path,
opt_pipe,
ov_genai.WhisperPipeline(
path, 'CPU', config={'ENABLE_MMAP': False}
path, 'CPU', **{'ENABLE_MMAP': False}
),
)

Expand Down
2 changes: 1 addition & 1 deletion tools/llm_bench/llm_bench_utils/ov_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def create_genai_text_gen_model(model_path, device, ov_config, **kwargs):
setattr(scheduler_config, param, value)
ov_config["scheduler_config"] = scheduler_config
start = time.perf_counter()
llm_pipe = openvino_genai.LLMPipeline(model_path, device.upper(), ov_config)
llm_pipe = openvino_genai.LLMPipeline(model_path, device.upper(), **ov_config)
end = time.perf_counter()
log.info(f'Pipeline initialization time: {end - start:.2f}s')

Expand Down

0 comments on commit 1729e78

Please sign in to comment.