I'm doing a project using the breast cancer dataset. I implemented the kNN model and the decision tree model but when I try to find the best value for k the result changes every time I run the code.
# Finding the optimum number of neighbors
list1 = []
for k in range(1,20):
kNN = KNeighborsClassifier(n_neighbors=k)
kNN.fit(X_train, y_train)
y_pred = kNN.predict(X_test)
# compute Accuracy on test set
a = accuracy_score(y_true=y_test, y_pred=y_pred)
print ("k:", k, " | Accuracy:", a)
list1.append(accuracy_score(y_test,y_pred))
plt.plot(list(range(1,20)), list1)
plt.show()all_train_acc = []
all_test_acc = []
leaves_vals = range(2,20)
for max_leaves in range(2,20):
# train and predict
dt = tree.DecisionTreeClassifier(max_leaf_nodes=max_leaves, random_state = None)
dt.fit(X_train,y_train)
# compute Accuracy
train_acc = accuracy_score(y_true=y_train, y_pred=dt.predict(X_train))
test_acc = accuracy_score(y_true=y_test, y_pred=dt.predict(X_test))
print ("Max leaves:", max_leaves, " - Train Accuracy:", train_acc, " - Test Accuracy:", test_acc)
all_train_acc.append(train_acc)
all_test_acc.append(test_acc)
plt.figure()
plt.plot(leaves_vals, all_train_acc)
plt.plot(leaves_vals, all_test_acc)
plt.legend(['train','test'])
plt.xlabel('max leaves')
I'd like to have the same resul every time the code runs, because I have to write a report on the project and I need to show why I get some conclusions and things like that. And a value that changes every time will make it difficult. Thank you
Aucun commentaire:
Enregistrer un commentaire