Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate configuration from algorithms #200

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
4 changes: 4 additions & 0 deletions python_bindings/bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ PYBIND11_MODULE(desbordante, module) {
"Load data after all options have been set by SetOption calls")
.def("get_needed_options", &PyAlgorithmBase::GetNeededOptions,
"Get names of options the algorithm needs")
.def("get_possible_options", &PyAlgorithmBase::GetPossibleOptions,
"Get names of options the algorithm may request")
.def("get_description", &PyAlgorithmBase::GetDescription,
"Get description of an option of this algorithm")
.def("set_option", &PyAlgorithmBase::SetOption, "option_name"_a,
"option_value"_a = pybind11::none(), "Set option value")
.def("get_option_type", &PyAlgorithmBase::GetOptionType, "option_name"_a,
Expand Down
21 changes: 18 additions & 3 deletions python_bindings/py_algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ static const auto void_index = std::type_index{typeid(void)};
namespace py = pybind11;

void PyAlgorithmBase::Configure(py::kwargs const& kwargs, InputTable table) {
algorithm_->ResetConfiguration();
set_option_used_on_stage_ = false;
algos::ConfigureFromFunction(
*algorithm_,
[this, &kwargs, table = std::move(table)](std::string_view option_name) -> boost::any {
Expand All @@ -43,6 +45,7 @@ void PyAlgorithmBase::Configure(py::kwargs const& kwargs, InputTable table) {
}

void PyAlgorithmBase::SetOption(std::string_view option_name, py::handle option_value) {
set_option_used_on_stage_ = true;
if (option_value.is_none()) {
algorithm_->SetOption(option_name);
return;
Expand All @@ -55,6 +58,14 @@ std::unordered_set<std::string_view> PyAlgorithmBase::GetNeededOptions() const {
return algorithm_->GetNeededOptions();
}

std::unordered_set<std::string_view> PyAlgorithmBase::GetPossibleOptions() const {
return algorithm_->GetPossibleOptions();
}

std::string_view PyAlgorithmBase::GetDescription(std::string_view option_name) const {
return algorithm_->GetDescription(option_name);
}

py::frozenset PyAlgorithmBase::GetOptionType(std::string_view option_name) const {
auto type_index = algorithm_->GetTypeIndex(option_name);
if (type_index == void_index)
Expand All @@ -64,7 +75,6 @@ py::frozenset PyAlgorithmBase::GetOptionType(std::string_view option_name) const
}

void PyAlgorithmBase::LoadProvidedData(pybind11::kwargs const& kwargs, InputTable table) {
algorithm_->UnsetOption(kTable);
Configure(kwargs, std::move(table));
algorithm_->LoadData();
}
Expand All @@ -80,11 +90,16 @@ void PyAlgorithmBase::LoadData(py::handle dataframe, std::string name, py::kwarg

void PyAlgorithmBase::LoadData() {
algorithm_->LoadData();
set_option_used_on_stage_ = false;
}

py::int_ PyAlgorithmBase::Execute(py::kwargs const& kwargs) {
Configure(kwargs);
return algorithm_->Execute();
if (!(set_option_used_on_stage_ && kwargs.empty())) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Не очень понимаю логику. Если нам передали непустой kwargs, то мы всегда хотим установить эти опции, зачем тут нужна проверка set_option_used_on_stage_?
Как будто тут в любом случае стоит устанавливать опци из kwargs

Configure(kwargs);
}
py::int_ time = algorithm_->Execute();
set_option_used_on_stage_ = false;
return time;
}

} // namespace python_bindings
31 changes: 25 additions & 6 deletions python_bindings/py_algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,25 @@

namespace python_bindings {

// Currently, only two scenarios are supported. Either
// (scenario 1)
// 1. create the algorithm
// 2. set options using `set_option`
// 3. call `load_data` with no arguments
// 4. set options using `set_option`
// 5. call `execute` with no arguments
// or
// (scenario 2)
// 1. create the algorithm
// 2. call `load_data` with option arguments
// 3. call `execute` with option arguments
// Using the class in any other way from Python is undefined behaviour.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

undefined behaviour для кода, который мы можем контроливать, это плохое дизайн решение. Особенно для кода, который является частью интерфейса. Стоит явное кидать исключение или еще как-то сообщать об ошибке

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ну либо сделать behaviour defined, чтобы, например, опции, указанные в load_data, переписывали выставленные раньше

// scenario 2 is intended for use from Python's interactive shell
// scenario 1 is intended for use in scripts where parameters are set one-by-one
class PyAlgorithmBase {
private:
bool set_option_used_on_stage_ = false;

void LoadProvidedData(pybind11::kwargs const& kwargs, util::config::InputTable table);

protected:
Expand All @@ -24,24 +41,26 @@ class PyAlgorithmBase {
void Configure(pybind11::kwargs const& kwargs, util::config::InputTable table = nullptr);

public:
// (scenario 1)
void SetOption(std::string_view option_name, pybind11::handle option_value);

[[nodiscard]] std::unordered_set<std::string_view> GetNeededOptions() const;

[[nodiscard]] std::unordered_set<std::string_view> GetPossibleOptions() const;

[[nodiscard]] std::string_view GetDescription(std::string_view option_name) const;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
[[nodiscard]] std::string_view GetDescription(std::string_view option_name) const;
[[nodiscard]] std::string_view GetOptionDescription(std::string_view option_name) const;


[[nodiscard]] pybind11::frozenset GetOptionType(std::string_view option_name) const;

// For pandas dataframes
// (scenario 2)
void LoadData(pybind11::handle dataframe, std::string name, pybind11::kwargs const& kwargs);

// (scenario 2)
void LoadData(std::string_view path, char separator, bool has_header,
pybind11::kwargs const& kwargs);

// For the case when the "data" option has been set by `set_option`.
// Currently, there is no use case where we want to set other options in
// bulk with kwargs when the "data" option is set by `set_option`, so this
// method assumes that all the other data loading options have already been
// set by `set_option` as well and doesn't accept any arguments. This only
// moves the algorithm to the execution stage.
// (scenario 1)
void LoadData();

pybind11::int_ Execute(pybind11::kwargs const& kwargs);
Expand Down
39 changes: 18 additions & 21 deletions src/algorithms/algebraic_constraints/ac_algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,18 @@

#include "types/create_type.h"
#include "util/config/names_and_descriptions.h"
#include "util/config/option_using.h"
#include "util/config/tabular_data/input_table/option.h"

namespace algos {

ACAlgorithm::ACAlgorithm() : Algorithm({}) {
RegisterOptions();
MakeOptionsAvailable({util::config::TableOpt.GetName()});
ac_exception_finder_ = std::make_unique<algebraic_constraints::ACExceptionFinder>();
}

void ACAlgorithm::RegisterOptions() {
using namespace util::config::names;
using namespace util::config::descriptions;
using util::config::Option;
DESBORDANTE_OPTION_USING;

auto check_and_set_binop = [this](Binop bin_operation) {
switch (bin_operation) {
Expand Down Expand Up @@ -56,31 +54,30 @@ void ACAlgorithm::RegisterOptions() {
if (parameter <= 0) throw std::invalid_argument("Parameter out of range");
};

RegisterOption(util::config::TableOpt(&input_table_));
RegisterOption(Option{&bin_operation_, kBinaryOperation, kDBinaryOperation}.SetValueCheck(
check_and_set_binop));
RegisterOption(Option{&fuzziness_, kFuzziness, kDFuzziness}.SetValueCheck(check_fuzziness));
RegisterOption(Option{&p_fuzz_, kFuzzinessProbability, kDFuzzinessProbability}.SetValueCheck(
check_double_parameter));
RegisterOption(Option{&weight_, kWeight, kDWeight}.SetValueCheck(check_double_parameter));
RegisterOption(
RegisterInitialLoadOption(util::config::TableOpt(&input_table_));
RegisterInitialExecOption(
Option{&bin_operation_, kBinaryOperation, kDBinaryOperation}.SetValueCheck(
check_and_set_binop));
RegisterInitialExecOption(
Option{&fuzziness_, kFuzziness, kDFuzziness}.SetValueCheck(check_fuzziness));
RegisterInitialExecOption(
Option{&p_fuzz_, kFuzzinessProbability, kDFuzzinessProbability}.SetValueCheck(
check_double_parameter));
RegisterInitialExecOption(
Option{&weight_, kWeight, kDWeight}.SetValueCheck(check_double_parameter));
RegisterInitialExecOption(
Option{&bumps_limit_, kBumpsLimit, kDBumpsLimit}.SetValueCheck(check_non_negative));
RegisterOption(Option{&iterations_limit_, kIterationsLimit, kDIterationsLimit}.SetValueCheck(
check_positive));
RegisterOption(Option{&seed_, kACSeed, kDACSeed});
RegisterInitialExecOption(
Option{&iterations_limit_, kIterationsLimit, kDIterationsLimit}.SetValueCheck(
check_positive));
RegisterInitialExecOption(Option{&seed_, kACSeed, kDACSeed});
}

void ACAlgorithm::LoadDataInternal() {
typed_relation_ = model::ColumnLayoutTypedRelationData::CreateFrom(*input_table_,
false); // nulls are ignored
}

void ACAlgorithm::MakeExecuteOptsAvailable() {
using namespace util::config::names;
MakeOptionsAvailable({kFuzziness, kFuzzinessProbability, kWeight, kBumpsLimit, kIterationsLimit,
kACSeed, kBinaryOperation});
}

void ACAlgorithm::ResetState() {
ac_pairs_.clear();
ranges_.clear();
Expand Down
1 change: 0 additions & 1 deletion src/algorithms/algebraic_constraints/ac_algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ class ACAlgorithm : public Algorithm {
void RestrictRangesAmount(std::vector<std::byte const*>& ranges) const;
void RegisterOptions();
void LoadDataInternal() override;
void MakeExecuteOptsAvailable() override;
void ResetState() override;

public:
Expand Down
123 changes: 17 additions & 106 deletions src/algorithms/algorithm.cpp
Original file line number Diff line number Diff line change
@@ -1,131 +1,42 @@
#include "algorithms/algorithm.h"

#include <cassert>
#include <utility>

namespace algos {

bool Algorithm::SetExternalOption([[maybe_unused]] std::string_view option_name,
[[maybe_unused]] boost::any const& value) {
return false;
}

void Algorithm::AddSpecificNeededOptions(
[[maybe_unused]] std::unordered_set<std::string_view>& previous_options) const {}

void Algorithm::MakeExecuteOptsAvailable() {}

void Algorithm::ClearOptions() noexcept {
available_options_.clear();
opt_parents_.clear();
}

void Algorithm::ExecutePrepare() {
data_loaded_ = true;
ClearOptions();
MakeExecuteOptsAvailable();
}

Algorithm::Algorithm(std::vector<std::string_view> phase_names)
: progress_(std::move(phase_names)) {}
Algorithm::Algorithm(std::vector<std::string_view> phase_names,
util::config::ConfigurationStage starting_stage)
: progress_(std::move(phase_names)), configuration_(starting_stage) {}

void Algorithm::ExcludeOptions(std::string_view parent_option) noexcept {
auto it = opt_parents_.find(parent_option);
if (it == opt_parents_.end()) return;
Algorithm::Algorithm(std::vector<std::string_view> phase_names,
util::config::Configuration::FuncTuple funcTuple)
: progress_(std::move(phase_names)), configuration_(std::move(funcTuple)) {}

for (auto const &option_name: it->second) {
auto possible_opt_it = possible_options_.find(option_name);
assert(possible_opt_it != possible_options_.end());
available_options_.erase(possible_opt_it->first);
UnsetOption(possible_opt_it->first);
void Algorithm::LoadData() {
if (configuration_.GetCurrentStage() == +util::config::ConfigurationStage::execute) {
throw std::logic_error("Data has already been processed.");
}
opt_parents_.erase(it);
}

void Algorithm::UnsetOption(std::string_view option_name) noexcept {
auto it = possible_options_.find(option_name);
if (it == possible_options_.end()
|| available_options_.find(it->first) == available_options_.end())
return;
it->second->Unset();
ExcludeOptions(it->first);
}

void Algorithm::MakeOptionsAvailable(std::vector<std::string_view> const& option_names) {
for (std::string_view name: option_names) {
auto it = possible_options_.find(name);
assert(it != possible_options_.end());
available_options_.insert(it->first);
if (!GetNeededOptions().empty()) {
throw std::logic_error("All options need to be set before starting processing.");
}
}

void Algorithm::LoadData() {
if (!GetNeededOptions().empty()) throw std::logic_error(
"All options need to be set before starting processing.");
LoadDataInternal();
ExecutePrepare();
configuration_.StartStage(util::config::ConfigurationStage::execute);
}

unsigned long long Algorithm::Execute() {
if (!data_loaded_) {
if (configuration_.GetCurrentStage() != +util::config::ConfigurationStage::execute) {
throw std::logic_error("Data must be processed before execution.");
}
if (!GetNeededOptions().empty())
if (!GetNeededOptions().empty()) {
throw std::logic_error("All options need to be set before execution.");
}
progress_.ResetProgress();
ResetState();
auto time_ms = ExecuteInternal();
for (auto const& opt_name : available_options_) {
possible_options_.at(opt_name)->Unset();
}
ClearOptions();
MakeExecuteOptsAvailable();
ResetConfiguration();
return time_ms;
}

void Algorithm::SetOption(std::string_view option_name, boost::any const& value) {
// Currently, it is assumed that if both the pipeline and its algorithms
// have options with the same name, they should all be set to the same
// value.
bool ext_opt_set = SetExternalOption(option_name, value);
auto it = possible_options_.find(option_name);
if (it == possible_options_.end()) {
if (ext_opt_set) return;
throw std::invalid_argument("Unknown option \"" + std::string{option_name} + '"');
}
std::string_view name = it->first;
util::config::IOption& option = *it->second;
if (available_options_.find(name) == available_options_.end()) {
if (ext_opt_set) return;
throw std::invalid_argument("Invalid option \"" + std::string{name} + '"');
}

if (option.IsSet()) {
UnsetOption(name);
}

std::vector<std::string_view> const& new_opts = option.Set(value);
if (new_opts.empty()) return;
MakeOptionsAvailable(new_opts);
std::vector<std::string_view>& child_opts = opt_parents_[name];
child_opts.insert(child_opts.end(), new_opts.begin(), new_opts.end());
}

std::unordered_set<std::string_view> Algorithm::GetNeededOptions() const {
std::unordered_set<std::string_view> needed{};
for (std::string_view name : available_options_) {
if (!possible_options_.at(name)->IsSet()) {
needed.insert(name);
}
}
AddSpecificNeededOptions(needed);
return needed;
}

std::type_index Algorithm::GetTypeIndex(std::string_view option_name) const {
auto it = possible_options_.find(option_name);
if (it == possible_options_.end()) return typeid(void);
return it->second->GetTypeIndex();
}

} // namespace algos

Loading