diff --git a/test/queries_generator/generate_aws_cmds.cpp b/test/queries_generator/generate_aws_cmds.cpp new file mode 100644 index 00000000..b2cb69d0 --- /dev/null +++ b/test/queries_generator/generate_aws_cmds.cpp @@ -0,0 +1,26 @@ +#include +#include + +using namespace std; + +int main() +{ + fstream query_file, cmd_file; + query_file.open("aws_queries.txt", ios::in); + cmd_file.open("aws_cmds.sh", ios::out); + cmd_file << "#!/bin/sh\nset -x\nset -e\n\n"; + cmd_file << "mkdir -p aws_results\n"; + string bucket, csv_file, query, aws_cmd; + cout << "Enter bucket name: "; + cin >> bucket; + cout << "Enter file name: "; + cin >> csv_file; + for(int i = 1; getline(query_file, query); i++) + { + aws_cmd = "aws s3api select-object-content --bucket " + bucket + " --key " + csv_file + " --expression-type \'SQL\' --input-serialization \'{\"CSV\": {}, \"CompressionType\": \"NONE\"}\' --output-serialization \'{\"CSV\": {}}\' --profile openshift-dev --expression \"" + query + "\" \"aws_results/output" + to_string(i) + ".csv\""; + cmd_file << aws_cmd << endl; + } + cmd_file.close(); + query_file.close(); + return 0; +} diff --git a/test/queries_generator/queries_generator.cpp b/test/queries_generator/queries_generator.cpp new file mode 100644 index 00000000..0e57ff24 --- /dev/null +++ b/test/queries_generator/queries_generator.cpp @@ -0,0 +1,325 @@ +#include +#include +#include +#include +#define NUM_COLUMN 3 + +using namespace std; + +enum Return_type { INTEGER = 0, + STRING = 1, + TIMESTAMP = 2, + MIX_COL_NUM = 3, + COLUMN = 4, + NUMBER = 5}; + +auto random_arth_op = [](){std::string op="+-*/";return op[rand()%op.size()];}; + +auto random_compare_op = []() +{vector op={">", "<", ">=", "<=", "==", "!="}; + return op[ rand() % op.size() ]; +}; + +auto random_date_part = []() +{vector op={"year", "month", "day", "hour", "minute", "second"}; + return op[ rand() % op.size() ]; +}; + +/*auto random_date_part_extract = []() +{vector op={"year", "month", "day", "hour", "minute", "second", + "timezone_hour", "timezone_minute"}; + return op[ rand() % op.size() ]; +};*/ + +string random_timestamp_string(string& aws_expr) +{ + auto year = [](){return rand()%100 + 1900;}; + auto month = [](){return 1 + rand()%12;}; + auto day = [](){return 1 + rand()%28;}; + auto hours = [](){return rand()%24;}; + auto minutes = [](){return rand()%60;}; + auto seconds = [](){return rand()%60;}; + auto fraction_sec = [](){return rand()%1000000;}; + stringstream timestamp_str; + + timestamp_str << year() << "-" << std::setw(2) << std::setfill('0') << month() << "-" << std::setw(2) << std::setfill('0') << day() << "T" < op={"yyyyy ", "yyyy ", "yyy ", "yy ", "y ", "MMMMM ", "MMMM ", "MMM ", "MM ", "M ", "dd ", "d ", "a ", "hh ", "h ", "HH ", "H ", "mm ", "m ", "ss ", "s ", "SSSSSSSSS ", "SSSSSS ", "SSSSS ", "SSS ", "SS ", "S ", "n ", ": ", "- ", " "}; + return op[ rand() % op.size() ]; + }; + int loop = rand() % 10; + string frmt; + while(loop) + { + frmt += random_format(); + loop--; + } + return frmt; +} + +string random_col(string& aws_expr) +{ + int num = 1 + (rand() % NUM_COLUMN); + aws_expr = "cast(_" + to_string(num) + " as int)"; + return "int(_" + to_string(num) + ")"; +} + +string random_number(string& aws_expr) +{ + int num = rand() % 10 + 1; + aws_expr = to_string(num); + return "int(" + to_string(num) + ")"; +} + +string random_num_expr(int depth, string& aws_expr) +{ + string aws_expr1, aws_expr2, ceph_expr, op; + if (depth == 0) + { + ceph_expr = random_number(aws_expr1); + aws_expr = aws_expr1; + return ceph_expr; + } + op = random_arth_op(); + ceph_expr = random_num_expr(depth-1, aws_expr1) + op + + random_num_expr(depth-1, aws_expr2); + aws_expr = aws_expr1 + op + aws_expr2; + return ceph_expr; +} + +string random_num_col_expr(int depth, string& aws_expr) +{ + string aws_expr1, aws_expr2, ceph_expr, op; + if (depth == 0) + { + if ((rand() % 2) == 0) + { + ceph_expr = random_col(aws_expr1); + aws_expr = aws_expr1; + return ceph_expr; + } + else + { + ceph_expr = random_number(aws_expr1); + aws_expr = aws_expr1; + return ceph_expr; + } + } + op = random_arth_op(); + ceph_expr = random_num_col_expr(depth-1, aws_expr1) + op + + random_num_col_expr(depth-1, aws_expr2); + aws_expr = aws_expr1 + op + aws_expr2; + return ceph_expr; +} + +string random_query_expr(int depth, string& input_str, int type, string& aws_expr) +{ + string ceph_expr; + if (depth == 0) + { + switch (type) + { + case INTEGER: + ceph_expr = random_number(aws_expr); + break; + case STRING: + ceph_expr = "\'" + input_str + "\'"; + aws_expr = "\'" + input_str + "\'"; + break; + case MIX_COL_NUM: + ceph_expr = random_num_col_expr(depth, aws_expr); + break; + case TIMESTAMP: + ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr) + "\')"; + aws_expr = "to_timestamp(\'" + aws_expr + "\')"; + break; + } + return ceph_expr; + } + + int option; + if (type == INTEGER) //return type is int + { + string ceph_col, aws_col, aws_expr1, aws_expr2, op1, op2; + switch (option = rand() % 9) + { + case 0: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "int(avg(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + + ") " + op2 + " " + random_num_expr(depth-1, aws_expr2) + ")"; + aws_expr = "cast((avg(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2 + + ") as int)"; + break; + case 1: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "count(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + + ") " + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "count(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 2: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "max(" + ceph_col + op1 + random_num_col_expr(depth-1,aws_expr1) + ") " + + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "max(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 3: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "min(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + ") " + + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "min(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 4: + ceph_col = random_col(aws_col); + op1 = random_arth_op(); + op2 = random_arth_op(); + ceph_expr = "sum(" + ceph_col + op1 + random_num_col_expr(depth-1, aws_expr1) + + ") " + op2 + " " + random_num_expr(depth-1, aws_expr2); + aws_expr = "sum(" + aws_col + op1 + aws_expr1 + ") " + op2 + " " + aws_expr2; + break; + case 5: + ceph_expr = "char_length(" + random_query_expr(depth-1, input_str, STRING, + aws_expr1) + ")"; + aws_expr = "char_length(" + aws_expr1 + ")"; + break; + case 6: + ceph_expr = "character_length(" + random_query_expr(depth-1, input_str, STRING, + aws_expr1) + ")"; + aws_expr = "character_length(" + aws_expr1 + ")"; + break; + case 7: + op1 = random_date_part(); + ceph_expr = "extract(" + op1 + " from " + random_query_expr(depth-1, input_str, + TIMESTAMP, aws_expr1) + ")"; + aws_expr = "extract(" + op1 + " from " + aws_expr1 + ")"; + break; + case 8: + op1 = random_date_part(); + ceph_expr = "date_diff(" + op1 + ", " + random_query_expr(depth-1, input_str, + TIMESTAMP, aws_expr1) + ", " + random_query_expr(depth-1, input_str, + TIMESTAMP, aws_expr2) + ")"; + aws_expr = "date_diff(" + op1 + ", " + aws_expr1 + ", " + aws_expr2 + ")"; + break; + } + } + else if (type == STRING) // return type is string + { + string aws_expr1, aws_expr2, aws_expr3; + switch (option = rand() % 4) + { + case 0: + ceph_expr = "lower(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + + ")"; + aws_expr = "lower(" + aws_expr1 + ")"; + break; + case 1: + ceph_expr = "upper(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + + ")"; + aws_expr = "upper(" + aws_expr1 + ")"; + break; + case 2: + ceph_expr = "substring(" + random_query_expr(depth-1, input_str, STRING, aws_expr1) + + ", " + random_query_expr(depth-1, input_str, INTEGER, aws_expr2) + ", " + + random_query_expr(depth-1, input_str, INTEGER, aws_expr3) + ")"; + aws_expr = "substring(" + aws_expr1 + ", " + aws_expr2 + ", " + aws_expr3 + ")"; + break; + case 3: + aws_expr2 = random_tm_format_string(); + ceph_expr = "to_string(" + random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr1) + + ", \'" + aws_expr2 + "\')"; + aws_expr = "to_string(" + aws_expr1 + ", \'" + aws_expr2 + "\')"; + break; + } + } + else if (type == TIMESTAMP) // return type is TIMESTAMP + { + string aws_expr1, aws_expr2, date_part; + switch (option = rand() % 2) + { + case 0: + date_part = random_date_part(); + ceph_expr = "date_add(" + date_part + ", " + random_number(aws_expr1) + ", " + + random_query_expr(depth-1, input_str, TIMESTAMP, aws_expr2) + ")"; + aws_expr = "date_add(" + date_part + ", " + aws_expr1 + ", " + aws_expr2 + ")"; + break; + case 1: + ceph_expr = "to_timestamp(\'" + random_timestamp_string(aws_expr1) + "\')"; + aws_expr = "to_timestamp(\'" + aws_expr1 + "\')"; + break; + } + } + else if (type == MIX_COL_NUM) + { + ceph_expr = random_num_col_expr(depth-1, aws_expr); + } + else if (type == COLUMN) // return type integer column number + { + ceph_expr = random_col(aws_expr); + } + else if (type == NUMBER) // return type randon number + { + ceph_expr = random_number(aws_expr); + } + else + { + aws_expr = "error"; + ceph_expr = "error"; + } + return ceph_expr; +} + +int main() +{ + srand(time(0)); + int reps, depth; + fstream query_file, aws_query_file; + query_file.open("queries.txt", ios::out); + aws_query_file.open("aws_queries.txt", ios::out); + string input_str = " %%AbCdEfGhIjKlMnOpQrStUvWxYz## "; + cout << "Enter number of quries to be generated: "; + cin >> reps; + cout << "Enter depth of queries to be generated: "; + cin >> depth; + if(query_file.is_open() && aws_query_file.is_open()) //checking whether the file is open + { + while (reps) + { + string aws_expr; + int type; + string ceph_query = "select "; + string aws_query = "select "; + /*int projection = rand() % 4; + while (projection > 1) + { + type = rand() % 4; + ceph_query = ceph_query + random_query_expr(depth, input_str, + type, aws_expr) + ", "; + aws_query = aws_query + aws_expr + ", "; + projection--; + }*/ + type = rand() % 4; + ceph_query = ceph_query + random_query_expr(depth, input_str, type, + aws_expr)+ " from stdin;"; + aws_query = aws_query + aws_expr + " from s3object;"; + query_file << ceph_query << endl; + aws_query_file << aws_query <