Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Miscellaneous config fixups #214

Merged
merged 6 commits into from
May 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ include_directories(
"src/algorithms/association_rules"
"src/algorithms/depminer"
"src/algorithms/statistics"
"src/algorithms/options"
"src/model"
"src/model/types"
"src/util"
"src/util/config"
"src/parser"
"src/parser/json"
"src/core"
Expand Down
14 changes: 6 additions & 8 deletions python_bindings/py_algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,12 @@ namespace python_bindings {
namespace py = pybind11;

void PyAlgorithmBase::Configure(py::kwargs const& kwargs) {
auto params = kwargs.cast<std::unordered_map<std::string, py::object>>();
std::unordered_map<std::string, boost::any> any_map{};
for (auto const& [opt_name, obj] : params) {
std::type_index type_index = algorithm_->GetTypeIndex(opt_name);
if (type_index == void_index) continue;
any_map[opt_name] = PyToAny(type_index, obj);
}
algos::ConfigureFromMap(*algorithm_, any_map);
algos::ConfigureFromFunction(*algorithm_, [this, &kwargs](std::string_view option_name) {
std::type_index type_index = algorithm_->GetTypeIndex(option_name);
assert(type_index != void_index);
return kwargs.contains(option_name) ? PyToAny(type_index, kwargs[py::str{option_name}])
: boost::any{};
});
}

void PyAlgorithmBase::SetOption(std::string const& option_name, py::object const& option_value) {
Expand Down
2 changes: 1 addition & 1 deletion python_bindings/py_fd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ PyFD::PyFD(RawFD const& fd) : rhs_index_(fd.rhs_) {
std::string PyFD::ToString() const {
std::stringstream stream;
stream << "( ";
for (algos::config::IndexType index : lhs_indices_) {
for (util::config::IndexType index : lhs_indices_) {
stream << index << " ";
}
stream << ") -> ";
Expand Down
10 changes: 5 additions & 5 deletions python_bindings/py_fd.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,26 @@
#include <string>
#include <vector>

#include "algorithms/options/indices/type.h"
#include "model/raw_fd.h"
#include "util/config/indices/type.h"

namespace python_bindings {

class PyFD {
private:
algos::config::IndicesType lhs_indices_{};
algos::config::IndexType rhs_index_;
util::config::IndicesType lhs_indices_{};
util::config::IndexType rhs_index_;

public:
explicit PyFD(RawFD const& fd);

[[nodiscard]] std::string ToString() const;

[[nodiscard]] algos::config::IndexType GetRhs() const {
[[nodiscard]] util::config::IndexType GetRhs() const {
return rhs_index_;
}

[[nodiscard]] algos::config::IndicesType GetLhs() const {
[[nodiscard]] util::config::IndicesType GetLhs() const {
return lhs_indices_;
}
};
Expand Down
33 changes: 19 additions & 14 deletions src/algorithms/algo_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
#include "algorithms/algorithms.h"
#include "algorithms/create_algorithm.h"
#include "algorithms/legacy_algorithms.h"
#include "algorithms/options/names.h"
#include "algorithms/typo_miner.h"
#include "util/config/names.h"

namespace algos {

Expand All @@ -20,7 +20,7 @@ using StdParamsMap = std::unordered_map<std::string, boost::any>;
namespace details {

template <typename OptionMap>
boost::any ExtractAnyFromMap(OptionMap&& options, std::string_view option_name) {
boost::any ExtractAnyFromMap(OptionMap& options, std::string_view option_name) {
using std::is_same_v, std::decay, boost::program_options::variables_map;
const std::string string_opt{option_name};
auto it = options.find(string_opt);
Expand All @@ -41,7 +41,7 @@ T ExtractOptionValue(OptionMap&& options, std::string const& option_name) {

template <typename ParamsMap>
ACAlgorithm::Config CreateAcAlgorithmConfigFromMap(ParamsMap params) {
namespace onam = config::names;
namespace onam = util::config::names;
ACAlgorithm::Config c;

c.data = ExtractOptionValue<std::filesystem::path>(params, onam::kData);
Expand Down Expand Up @@ -82,27 +82,32 @@ std::unique_ptr<Algorithm> CreateAcAlgorithmInstance(ParamsMap&& params) {

} // namespace details

template <typename OptionMap>
void ConfigureFromMap(Algorithm& algorithm, OptionMap&& options) {
template <typename FuncType>
void ConfigureFromFunction(Algorithm& algorithm, FuncType get_opt_value_by_name) {
std::unordered_set<std::string_view> needed;
while (!(needed = algorithm.GetNeededOptions()).empty()) {
for (std::string_view option_name : needed) {
if (options.find(std::string{option_name}) == options.end()) {
algorithm.SetOption(option_name);
} else {
algorithm.SetOption(option_name, details::ExtractAnyFromMap(options, option_name));
}
algorithm.SetOption(option_name, get_opt_value_by_name(option_name));
}
}
}

template <typename OptionMap>
void ConfigureFromMap(Algorithm& algorithm, OptionMap&& options) {
ConfigureFromFunction(algorithm, [&options](std::string_view option_name) -> boost::any {
auto it = options.find(std::string{option_name});
return it == options.end() ? boost::any{}
: details::ExtractAnyFromMap(options, option_name);
});
}

template <typename OptionMap>
void LoadAlgorithm(Algorithm& algorithm, OptionMap&& options) {
ConfigureFromMap(algorithm, options);
auto parser = CSVParser{
details::ExtractOptionValue<std::filesystem::path>(options, config::names::kData),
details::ExtractOptionValue<char>(options, config::names::kSeparator),
details::ExtractOptionValue<bool>(options, config::names::kHasHeader)};
details::ExtractOptionValue<std::filesystem::path>(options, util::config::names::kData),
details::ExtractOptionValue<char>(options, util::config::names::kSeparator),
details::ExtractOptionValue<bool>(options, util::config::names::kHasHeader)};
algorithm.LoadData(parser);
ConfigureFromMap(algorithm, options);
}
Expand All @@ -123,8 +128,8 @@ std::unique_ptr<Algorithm> CreateAlgorithm(AlgorithmType algorithm_enum, OptionM

template <typename OptionMap>
std::unique_ptr<Algorithm> CreateTypoMiner(OptionMap&& options) {
using config::names::kPreciseAlgorithm, config::names::kApproximateAlgorithm;
using details::ExtractOptionValue;
using util::config::names::kPreciseAlgorithm, util::config::names::kApproximateAlgorithm;
AlgorithmType precise_algo = ExtractOptionValue<AlgorithmType>(options, kPreciseAlgorithm);
AlgorithmType approx_algo = ExtractOptionValue<AlgorithmType>(options, kApproximateAlgorithm);
std::unique_ptr<TypoMiner> typo_miner = std::make_unique<TypoMiner>(precise_algo, approx_algo);
Expand Down
21 changes: 10 additions & 11 deletions src/algorithms/algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,11 @@

namespace algos {

bool Algorithm::HandleUnknownOption([[maybe_unused]] std::string_view option_name,
[[maybe_unused]] boost::any const& value) {
bool Algorithm::SetExternalOption([[maybe_unused]] std::string_view option_name,
[[maybe_unused]] boost::any const& value) {
return false;
}

bool Algorithm::DataLoaded() const {
return data_loaded_;
}

void Algorithm::AddSpecificNeededOptions(
[[maybe_unused]] std::unordered_set<std::string_view>& previous_options) const {}

Expand Down Expand Up @@ -88,16 +84,19 @@ unsigned long long Algorithm::Execute() {
}

void Algorithm::SetOption(std::string_view option_name, boost::any const& value) {
// Currently, it is assumed that if both the pipeline and its algorithms
// have options with the same name, they should all be set to the same
// value.
bool ext_opt_set = SetExternalOption(option_name, value);
auto it = possible_options_.find(option_name);
if (it == possible_options_.end()) {
if (!HandleUnknownOption(option_name, value)) {
throw std::invalid_argument("Unknown option \"" + std::string{option_name} + '"');
}
return;
if (ext_opt_set) return;
throw std::invalid_argument("Unknown option \"" + std::string{option_name} + '"');
}
std::string_view name = it->first;
config::IOption& option = *it->second;
util::config::IOption& option = *it->second;
if (available_options_.find(name) == available_options_.end()) {
if (ext_opt_set) return;
throw std::invalid_argument("Invalid option \"" + std::string{name} + '"');
}

Expand Down
15 changes: 7 additions & 8 deletions src/algorithms/algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@

#include <boost/any.hpp>

#include "algorithms/options/ioption.h"
#include "algorithms/options/option.h"
#include "model/idataset_stream.h"
#include "parser/csv_parser.h"
#include "util/config/ioption.h"
#include "util/config/option.h"
#include "util/progress.h"

namespace algos {
Expand All @@ -22,11 +22,11 @@ class Algorithm {
private:
util::Progress progress_;
// All options the algorithm may use
std::unordered_map<std::string_view, std::unique_ptr<config::IOption>> possible_options_{};
std::unordered_map<std::string_view, std::unique_ptr<util::config::IOption>> possible_options_;
// All options that can be set at the moment
std::unordered_set<std::string_view> available_options_;
// Maps a parameter that added other parameters to their names.
std::unordered_map<std::string_view, std::vector<std::string_view>> opt_parents_{};
std::unordered_map<std::string_view, std::vector<std::string_view>> opt_parents_;

bool data_loaded_ = false;

Expand All @@ -53,15 +53,15 @@ class Algorithm {
void MakeOptionsAvailable(std::vector<std::string_view> const& option_names);

template <typename T>
void RegisterOption(config::Option<T> option) {
void RegisterOption(util::config::Option<T> option) {
auto name = option.GetName();
assert(possible_options_.find(name) == possible_options_.end());
possible_options_[name] = std::make_unique<config::Option<T>>(std::move(option));
possible_options_[name] = std::make_unique<util::config::Option<T>>(std::move(option));
}

// Overload this if you want to work with options outside of
// possible_options_ map. Useful for pipelines.
virtual bool HandleUnknownOption(std::string_view option_name, boost::any const& value);
virtual bool SetExternalOption(std::string_view option_name, boost::any const& value);
virtual void AddSpecificNeededOptions(
std::unordered_set<std::string_view>& previous_options) const;
void ExecutePrepare();
Expand All @@ -84,7 +84,6 @@ class Algorithm {
explicit Algorithm(std::vector<std::string_view> phase_names);

void LoadData(model::IDatasetStream& data_stream);
bool DataLoaded() const;

unsigned long long Execute();

Expand Down
11 changes: 5 additions & 6 deletions src/algorithms/ar_algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,20 @@

#include <easylogging++.h>

#include "algorithms/options/names_and_descriptions.h"
#include "util/config/names_and_descriptions.h"
#include "util/config/option_using.h"

namespace algos {

ARAlgorithm::ARAlgorithm(std::vector<std::string_view> phase_names)
: Algorithm(std::move(phase_names)) {
using namespace config::names;
using namespace util::config::names;
RegisterOptions();
MakeOptionsAvailable({kInputFormat});
}

void ARAlgorithm::RegisterOptions() {
using namespace config::names;
using namespace config::descriptions;
using config::Option;
DESBORDANTE_OPTION_USING;

auto sing_eq = [](InputFormat input_format) { return input_format == +InputFormat::singular; };
auto tab_eq = [](InputFormat input_format) { return input_format == +InputFormat::tabular; };
Expand All @@ -38,7 +37,7 @@ void ARAlgorithm::ResetState() {
}

void ARAlgorithm::MakeExecuteOptsAvailable() {
using namespace config::names;
using namespace util::config::names;
MakeOptionsAvailable({kMinimumSupport, kMinimumConfidence});
}

Expand Down
6 changes: 3 additions & 3 deletions src/algorithms/dfd/dfd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
#include <easylogging++.h>

#include "algorithms/dfd/lattice_traversal/lattice_traversal.h"
#include "algorithms/options/thread_number/option.h"
#include "model/column_layout_relation_data.h"
#include "model/relational_schema.h"
#include "util/config/thread_number/option.h"
#include "util/position_list_index.h"

namespace algos {
Expand All @@ -16,11 +16,11 @@ DFD::DFD() : PliBasedFDAlgorithm({kDefaultPhaseName}) {
}

void DFD::RegisterOptions() {
RegisterOption(config::ThreadNumberOpt(&number_of_threads_));
RegisterOption(util::config::ThreadNumberOpt(&number_of_threads_));
}

void DFD::MakeExecuteOptsAvailable() {
MakeOptionsAvailable({config::ThreadNumberOpt.GetName()});
MakeOptionsAvailable({util::config::ThreadNumberOpt.GetName()});
}

void DFD::ResetStateFd() {
Expand Down
4 changes: 2 additions & 2 deletions src/algorithms/dfd/dfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
#include <stack>

#include "algorithms/dfd/partition_storage/partition_storage.h"
#include "algorithms/options/thread_number/type.h"
#include "algorithms/pli_based_fd_algorithm.h"
#include "model/vertical.h"
#include "util/config/thread_number/type.h"

namespace algos {

class DFD : public PliBasedFDAlgorithm {
private:
std::vector<Vertical> unique_columns_;

config::ThreadNumType number_of_threads_;
util::config::ThreadNumType number_of_threads_;

void MakeExecuteOptsAvailable() final;
void RegisterOptions();
Expand Down
11 changes: 6 additions & 5 deletions src/algorithms/fastfds.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
#include <boost/thread.hpp>
#include <easylogging++.h>

#include "algorithms/options/max_lhs/option.h"
#include "algorithms/options/thread_number/option.h"
#include "util/agree_set_factory.h"
#include "util/config/max_lhs/option.h"
#include "util/config/thread_number/option.h"
#include "util/parallel_for.h"

namespace algos {
Expand All @@ -24,12 +24,13 @@ FastFDs::FastFDs() : PliBasedFDAlgorithm({"Agree sets generation", "Finding mini
}

void FastFDs::RegisterOptions() {
RegisterOption(config::MaxLhsOpt(&max_lhs_));
RegisterOption(config::ThreadNumberOpt(&threads_num_));
RegisterOption(util::config::MaxLhsOpt(&max_lhs_));
RegisterOption(util::config::ThreadNumberOpt(&threads_num_));
}

void FastFDs::MakeExecuteOptsAvailable() {
MakeOptionsAvailable({config::MaxLhsOpt.GetName(), config::ThreadNumberOpt.GetName()});
MakeOptionsAvailable(
{util::config::MaxLhsOpt.GetName(), util::config::ThreadNumberOpt.GetName()});
}

void FastFDs::ResetStateFd() {
Expand Down
8 changes: 4 additions & 4 deletions src/algorithms/fastfds.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@

#include <boost/thread/mutex.hpp>

#include "algorithms/options/max_lhs/type.h"
#include "algorithms/options/thread_number/type.h"
#include "algorithms/pli_based_fd_algorithm.h"
#include "model/column_layout_relation_data.h"
#include "model/vertical.h"
#include "util/config/max_lhs/type.h"
#include "util/config/thread_number/type.h"

namespace algos {

Expand Down Expand Up @@ -70,8 +70,8 @@ class FastFDs : public PliBasedFDAlgorithm {

RelationalSchema const* schema_;
std::vector<DiffSet> diff_sets_;
config::ThreadNumType threads_num_;
config::MaxLhsType max_lhs_;
util::config::ThreadNumType threads_num_;
util::config::MaxLhsType max_lhs_;
double percent_per_col_;
};

Expand Down
Loading