You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
data = open("spambase/libsvm.data", "r")
data2 = open("spambase/libsvm2.data", "a")
i = 0
for line in data:
temp = line[-3:]
white_space_index = line[2:].find(" ")
if i == 0:
white_space_index = 0
i += 1
if "-1" in temp:
line = "-1 " + line[white_space_index + 2:-6]
else:
line = "1 " + line[white_space_index + 2:-6]
#drop last index (class) and first because it is the index nr.
data2.write(line + "\n")
data.close()
data2.close()
The text was updated successfully, but these errors were encountered:
When you know the highest occuring index you can do something like this:
data = open("iris.scale", "r")
def read_libsvm(data, col_length):
new_data = np.zeros((1,col_length))
i = 0
for line in data:
label = line[:1]
#split splits on whitespace by default
x = line[1:].split()
if i > 0:
new_data = np.vstack((new_data, np.zeros((1,col_length))))
for j in range(len(x)):
temp_string = x[j].split(":")
new_data[i, int(temp_string[0]) - 1] = np.float32(temp_string[1])
new_data[i, col_length - 1] = int(label)
i += 1
return new_data
I did something like this:
#convert to libsvm format https://github.com/zygmuntz/phraug/blob/master/csv2libsvm.py
#python csv2libsvm.py spambase_renamed_class.csv libsvm.data -1 False
The text was updated successfully, but these errors were encountered: