Skip to content
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ export APP_VERSION = ${tag}-${version}
commit = ${APP_VERSION}
lastcommit := $(shell touch .lastcommit && cat .lastcommit)
date := $(shell date -I)
id := $(shell openssl rand -base64 8)
id:=myid

vm_max_count := $(shell cat /etc/sysctl.conf | egrep vm.max_map_count\s*=\s*262144 && echo true)

Expand Down
6 changes: 3 additions & 3 deletions code/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,7 +610,7 @@ def post(self, dataset):
except:
return {"data": [{"error": "error: no such table {}".format(ds.table)}]}
df = df.head(n=size).reset_index(drop=True)
df = df.applymap(lambda x: unicode_safe(x))
df = df.apply(lambda col: col.map(unicode_safe))
if (format_type == 'json'):
return {"data": list(df.fillna("").T.to_dict().values()), "schema": schema}
elif (format_type == 'csv'):
Expand Down Expand Up @@ -957,7 +957,7 @@ def post(self, recipe, action):
try:
return jsonify({"data": list(df.T.to_dict().values()), "log": str(r.log.writer.getvalue())})
except:
df = df.applymap(lambda x: str(x))
df = df.apply(lambda col: col.astype(str))
return jsonify({"data": list(df.T.to_dict().values()), "log": str(r.log.writer.getvalue())})
else:
return {"log": r.log.writer.getvalue()}
Expand Down Expand Up @@ -993,7 +993,7 @@ def put(self, recipe, action):
try:
return jsonify({"data": list(df.T.to_dict().values()), "log": r.callback["log"]})
except:
df = df.applymap(lambda x: unicode_safe(x))
df = df.apply(lambda col: col.map(unicode_safe))
return jsonify({"data": list(df.T.to_dict().values()), "log": r.callback["log"]})
else:
return {"data": [{"result": "empty"}], "log": r.callback["log"]}
Expand Down
48 changes: 19 additions & 29 deletions code/recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1588,9 +1588,8 @@ def prepare_categorical(self, df=None):
return df

def prepare_numerical(self, df=None):
df = df[self.numerical].fillna("")
df = df.applymap(lambda x: 0 if (
(str(x) == "") | (x == None)) else float(x))
df = df[self.numerical].fillna("0")
df = df.apply(lambda col: pd.to_numeric(col, errors='coerce').fillna(0))
return df

def internal_fillna(self, df=None, desc=None):
Expand Down Expand Up @@ -1756,7 +1755,7 @@ def internal_build_model(self, df=None, desc=None):
# for debug: self.log.write("{} {} {} {}
# {}".format(X.shape,len(self.numerical),Xn.shape,len(self.categorical),Xc.shape))

Y = df[self.target].applymap(lambda x: 1 if x else 0)
Y = np.where(df[self.target], 1, 0)
# prep = DictVectorizer()
# X=X.to_dict().values()
# X = prep.fit_transform(X).toarray()
Expand Down Expand Up @@ -1891,8 +1890,8 @@ def internal_to_integer(self, df=None, desc=None):
# keep only selected columns
self.select_columns(df=df)
try:
df[self.cols] = df[self.cols].applymap(
lambda x: np.nan if (str(x) == "") else int(x))
# Convert columns to integers, keeping NaN as is
df[self.cols] = df[self.cols].apply(lambda col: pd.to_numeric(col, errors='coerce'))
return df
except SystemExit:
return df
Expand All @@ -1904,8 +1903,7 @@ def internal_list_to_tuple(self, df=None, desc=None):
# keep only selected columns
self.select_columns(df=df)
try:
df[self.cols] = df[self.cols].applymap(
lambda x: tuple(x) if (type(x) == list) else x)
df[self.cols] = df[self.cols].apply(lambda col: col.apply(lambda x: tuple(x) if isinstance(x, list) else x))
return df
except SystemExit:
return df
Expand All @@ -1917,8 +1915,7 @@ def internal_tuple_to_list(self, df=None, desc=None):
# keep only selected columns
self.select_columns(df=df)
try:
df[self.cols] = df[self.cols].applymap(
lambda x: list(x) if (type(x) == tuple) else x)
df[self.cols] = df[self.cols].apply(lambda col: col.apply(lambda x: list(x) if isinstance(x, tuple) else x))
return df
except SystemExit:
return df
Expand All @@ -1929,13 +1926,11 @@ def internal_tuple_to_list(self, df=None, desc=None):
def internal_to_float(self, df=None, desc=None):
# keep only selected columns
self.select_columns(df=df)
na_value = self.args.get("na_value", np.nan)

try:
na_value = self.args["na_value"]
except:
na_value = np.nan
try:
df[self.cols] = df[self.cols].applymap(
lambda x: na_value if (str(x) == "") else float(x))
# Convert columns to floats, setting non-convertible values to na_value
df[self.cols] = df[self.cols].apply(lambda col: pd.to_numeric(col, errors='coerce').fillna(na_value))
return df
except SystemExit:
return df
Expand All @@ -1946,13 +1941,13 @@ def internal_to_float(self, df=None, desc=None):
def internal_ngram(self, df=None, desc=None):
# keep only selected columns
self.select_columns(df=df)
if ("n" in list(self.args.keys())):
n = self.args['n']
else:
n = list([2, 3])
n = self.args.get('n', [2, 3]) # Use get with a default value for simplification

try:
df[self.cols] = df[self.cols].applymap(
lambda x: ngrams(tokenize(normalize(x)), n))
# Apply n-gram generation to each column
df[self.cols] = df[self.cols].apply(
lambda col: col.apply(lambda x: ngrams(tokenize(normalize(x)), n))
)
return df
except SystemExit:
return df
Expand Down Expand Up @@ -2248,8 +2243,7 @@ def internal_join(self, df=None, desc=None):
if True:
m_res = []

rest = df.applymap(lambda x: "" if x is None else x)
rest.fillna("", inplace=True)
rest = df.fillna("")

# elasticsearch bulk search
while rest.shape[0] > 0:
Expand Down Expand Up @@ -2427,9 +2421,6 @@ def internal_parsedate(self, df=None, desc=None):
for col in self.cols:
df[col] = pd.to_datetime(
df[col], errors='coerce', format=self.args["format"])
# df[self.cols]=df[self.cols].applymap(lambda x:
# parsedate(x,self.args["format"]))

return df

def internal_replace(self, df=None, desc=None):
Expand All @@ -2441,8 +2432,7 @@ def internal_replace(self, df=None, desc=None):
for r in self.args["regex"]:
regex.append([re.compile(list(r.keys())[0]), r[list(r.keys())[0]]])
pd.options.mode.chained_assignment = None
df[self.cols] = df[self.cols].applymap(
lambda x: replace_regex(x, regex))
df[self.cols] = df[self.cols].apply(lambda col: col.apply(replace_regex))
return df
else:
return df
Expand Down