-
Notifications
You must be signed in to change notification settings - Fork 72
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Separate configuration from algorithms #200
base: main
Are you sure you want to change the base?
Changes from all commits
769c892
8c99ae8
5774e0f
ee9ee8b
65791dc
441b783
52ba872
d1fdf4b
ea3b4c2
6a61d4c
8284f45
8709c53
1a62c95
978f63d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -12,8 +12,25 @@ | |||||
|
||||||
namespace python_bindings { | ||||||
|
||||||
// Currently, only two scenarios are supported. Either | ||||||
// (scenario 1) | ||||||
// 1. create the algorithm | ||||||
// 2. set options using `set_option` | ||||||
// 3. call `load_data` with no arguments | ||||||
// 4. set options using `set_option` | ||||||
// 5. call `execute` with no arguments | ||||||
// or | ||||||
// (scenario 2) | ||||||
// 1. create the algorithm | ||||||
// 2. call `load_data` with option arguments | ||||||
// 3. call `execute` with option arguments | ||||||
// Using the class in any other way from Python is undefined behaviour. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. undefined behaviour для кода, который мы можем контроливать, это плохое дизайн решение. Особенно для кода, который является частью интерфейса. Стоит явное кидать исключение или еще как-то сообщать об ошибке There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ну либо сделать behaviour defined, чтобы, например, опции, указанные в |
||||||
// scenario 2 is intended for use from Python's interactive shell | ||||||
// scenario 1 is intended for use in scripts where parameters are set one-by-one | ||||||
class PyAlgorithmBase { | ||||||
private: | ||||||
bool set_option_used_on_stage_ = false; | ||||||
|
||||||
void LoadProvidedData(pybind11::kwargs const& kwargs, util::config::InputTable table); | ||||||
|
||||||
protected: | ||||||
|
@@ -24,24 +41,26 @@ class PyAlgorithmBase { | |||||
void Configure(pybind11::kwargs const& kwargs, util::config::InputTable table = nullptr); | ||||||
|
||||||
public: | ||||||
// (scenario 1) | ||||||
void SetOption(std::string_view option_name, pybind11::handle option_value); | ||||||
|
||||||
[[nodiscard]] std::unordered_set<std::string_view> GetNeededOptions() const; | ||||||
|
||||||
[[nodiscard]] std::unordered_set<std::string_view> GetPossibleOptions() const; | ||||||
|
||||||
[[nodiscard]] std::string_view GetDescription(std::string_view option_name) const; | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
|
||||||
[[nodiscard]] pybind11::frozenset GetOptionType(std::string_view option_name) const; | ||||||
|
||||||
// For pandas dataframes | ||||||
// (scenario 2) | ||||||
void LoadData(pybind11::handle dataframe, std::string name, pybind11::kwargs const& kwargs); | ||||||
|
||||||
// (scenario 2) | ||||||
void LoadData(std::string_view path, char separator, bool has_header, | ||||||
pybind11::kwargs const& kwargs); | ||||||
|
||||||
// For the case when the "data" option has been set by `set_option`. | ||||||
// Currently, there is no use case where we want to set other options in | ||||||
// bulk with kwargs when the "data" option is set by `set_option`, so this | ||||||
// method assumes that all the other data loading options have already been | ||||||
// set by `set_option` as well and doesn't accept any arguments. This only | ||||||
// moves the algorithm to the execution stage. | ||||||
// (scenario 1) | ||||||
void LoadData(); | ||||||
|
||||||
pybind11::int_ Execute(pybind11::kwargs const& kwargs); | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,131 +1,42 @@ | ||
#include "algorithms/algorithm.h" | ||
|
||
#include <cassert> | ||
#include <utility> | ||
|
||
namespace algos { | ||
|
||
bool Algorithm::SetExternalOption([[maybe_unused]] std::string_view option_name, | ||
[[maybe_unused]] boost::any const& value) { | ||
return false; | ||
} | ||
|
||
void Algorithm::AddSpecificNeededOptions( | ||
[[maybe_unused]] std::unordered_set<std::string_view>& previous_options) const {} | ||
|
||
void Algorithm::MakeExecuteOptsAvailable() {} | ||
|
||
void Algorithm::ClearOptions() noexcept { | ||
available_options_.clear(); | ||
opt_parents_.clear(); | ||
} | ||
|
||
void Algorithm::ExecutePrepare() { | ||
data_loaded_ = true; | ||
ClearOptions(); | ||
MakeExecuteOptsAvailable(); | ||
} | ||
|
||
Algorithm::Algorithm(std::vector<std::string_view> phase_names) | ||
: progress_(std::move(phase_names)) {} | ||
Algorithm::Algorithm(std::vector<std::string_view> phase_names, | ||
util::config::ConfigurationStage starting_stage) | ||
: progress_(std::move(phase_names)), configuration_(starting_stage) {} | ||
|
||
void Algorithm::ExcludeOptions(std::string_view parent_option) noexcept { | ||
auto it = opt_parents_.find(parent_option); | ||
if (it == opt_parents_.end()) return; | ||
Algorithm::Algorithm(std::vector<std::string_view> phase_names, | ||
util::config::Configuration::FuncTuple funcTuple) | ||
: progress_(std::move(phase_names)), configuration_(std::move(funcTuple)) {} | ||
|
||
for (auto const &option_name: it->second) { | ||
auto possible_opt_it = possible_options_.find(option_name); | ||
assert(possible_opt_it != possible_options_.end()); | ||
available_options_.erase(possible_opt_it->first); | ||
UnsetOption(possible_opt_it->first); | ||
void Algorithm::LoadData() { | ||
if (configuration_.GetCurrentStage() == +util::config::ConfigurationStage::execute) { | ||
throw std::logic_error("Data has already been processed."); | ||
} | ||
opt_parents_.erase(it); | ||
} | ||
|
||
void Algorithm::UnsetOption(std::string_view option_name) noexcept { | ||
auto it = possible_options_.find(option_name); | ||
if (it == possible_options_.end() | ||
|| available_options_.find(it->first) == available_options_.end()) | ||
return; | ||
it->second->Unset(); | ||
ExcludeOptions(it->first); | ||
} | ||
|
||
void Algorithm::MakeOptionsAvailable(std::vector<std::string_view> const& option_names) { | ||
for (std::string_view name: option_names) { | ||
auto it = possible_options_.find(name); | ||
assert(it != possible_options_.end()); | ||
available_options_.insert(it->first); | ||
if (!GetNeededOptions().empty()) { | ||
throw std::logic_error("All options need to be set before starting processing."); | ||
} | ||
} | ||
|
||
void Algorithm::LoadData() { | ||
if (!GetNeededOptions().empty()) throw std::logic_error( | ||
"All options need to be set before starting processing."); | ||
LoadDataInternal(); | ||
ExecutePrepare(); | ||
configuration_.StartStage(util::config::ConfigurationStage::execute); | ||
} | ||
|
||
unsigned long long Algorithm::Execute() { | ||
if (!data_loaded_) { | ||
if (configuration_.GetCurrentStage() != +util::config::ConfigurationStage::execute) { | ||
throw std::logic_error("Data must be processed before execution."); | ||
} | ||
if (!GetNeededOptions().empty()) | ||
if (!GetNeededOptions().empty()) { | ||
throw std::logic_error("All options need to be set before execution."); | ||
} | ||
progress_.ResetProgress(); | ||
ResetState(); | ||
auto time_ms = ExecuteInternal(); | ||
for (auto const& opt_name : available_options_) { | ||
possible_options_.at(opt_name)->Unset(); | ||
} | ||
ClearOptions(); | ||
MakeExecuteOptsAvailable(); | ||
ResetConfiguration(); | ||
return time_ms; | ||
} | ||
|
||
void Algorithm::SetOption(std::string_view option_name, boost::any const& value) { | ||
// Currently, it is assumed that if both the pipeline and its algorithms | ||
// have options with the same name, they should all be set to the same | ||
// value. | ||
bool ext_opt_set = SetExternalOption(option_name, value); | ||
auto it = possible_options_.find(option_name); | ||
if (it == possible_options_.end()) { | ||
if (ext_opt_set) return; | ||
throw std::invalid_argument("Unknown option \"" + std::string{option_name} + '"'); | ||
} | ||
std::string_view name = it->first; | ||
util::config::IOption& option = *it->second; | ||
if (available_options_.find(name) == available_options_.end()) { | ||
if (ext_opt_set) return; | ||
throw std::invalid_argument("Invalid option \"" + std::string{name} + '"'); | ||
} | ||
|
||
if (option.IsSet()) { | ||
UnsetOption(name); | ||
} | ||
|
||
std::vector<std::string_view> const& new_opts = option.Set(value); | ||
if (new_opts.empty()) return; | ||
MakeOptionsAvailable(new_opts); | ||
std::vector<std::string_view>& child_opts = opt_parents_[name]; | ||
child_opts.insert(child_opts.end(), new_opts.begin(), new_opts.end()); | ||
} | ||
|
||
std::unordered_set<std::string_view> Algorithm::GetNeededOptions() const { | ||
std::unordered_set<std::string_view> needed{}; | ||
for (std::string_view name : available_options_) { | ||
if (!possible_options_.at(name)->IsSet()) { | ||
needed.insert(name); | ||
} | ||
} | ||
AddSpecificNeededOptions(needed); | ||
return needed; | ||
} | ||
|
||
std::type_index Algorithm::GetTypeIndex(std::string_view option_name) const { | ||
auto it = possible_options_.find(option_name); | ||
if (it == possible_options_.end()) return typeid(void); | ||
return it->second->GetTypeIndex(); | ||
} | ||
|
||
} // namespace algos | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Не очень понимаю логику. Если нам передали непустой
kwargs
, то мы всегда хотим установить эти опции, зачем тут нужна проверкаset_option_used_on_stage_
?Как будто тут в любом случае стоит устанавливать опци из
kwargs