Machine Learning Algorithms: KNN, Decision Tree, SVM, Random Forest, and AdaBoost
KNN
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
dataset = pd.read_csv(‘Social_Network_Ads.csv’)
X = dataset.iloc[:, [1, 2, 3]].values
y = dataset.iloc[:, -1].values
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
dataset = pd.read_csv(‘Social_Network_Ads.csv’)
X = dataset.iloc[:, [1, 2, 3]].values
y = dataset.iloc[:, -1].values
print(len(X))
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,0] = le.fit_transform(X[:,0])
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 5, metric = ‘minkowski’, p = 2)
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test,y_pred)
Decision Tree
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
dataset = pd.read_csv(‘Social_Network_Ads.csv’)
X = dataset.iloc[:, [1, 2, 3]].values
y = dataset.iloc[:, -1].values
print(len(X))
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,0] = le.fit_transform(X[:,0])
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from sklearn.tree import DecisionTreeClassifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)
y_pred = classifier.predict(X_test)
from sklearn.metrics import confusion_matrix,accuracy_score
cm = confusion_matrix(y_test, y_pred)
ac = accuracy_score(y_test,y_pred)
print(“the accuracy is”, ac)
from sklearn import tree
tree.plot_tree(classifier)
SVM
from sklearn import datasets
cancer = datasets.load_breast_cancer()
print(“Features: “, cancer.feature_names)
print(“Labels: “, cancer.target_names)
cancer.data.shape
print(cancer.data[0:5])
print(cancer.target)
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3,random_state=109) # 70% training and 30% test
from sklearn import svm
clf = svm.SVC(kernel=’linear’) # Linear Kernel
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
from sklearn import metrics
print(“Accuracy:”,metrics.accuracy_score(y_test, y_pred))
print(“Precision:”,metrics.precision_score(y_test, y_pred
print(“Recall:”,metrics.recall_score(y_test, y_pred))
Randomforest
import numpy as np
import matplotlib.pyplot as plt ||
import pandas as pd
import sklearn
dataset = pd.read_csv(‘Social_Network_Ads.csv’)
X = dataset.iloc[:, [1, 2, 3]].values
y = dataset.iloc[:, -1].values
print(len(X))
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:,0] = le.fit_transform(X[:,0])
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
rfc = RandomForestClassifier(random_state=0,n_estimators=1)
rfc.fit(X_train, y_train) y_pred = rfc.predict(X_test) print(‘Model accuracy score with 1 decision-tree : {0:0.4f}’. format(accuracy_score(y_test, y_pred)))
rfc = RandomForestClassifier(random_state=0,n_estimators=10)
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)
print(‘Model accuracy score with 10 decision-tree : {0:0.4f}’. format(accuracy_score(y_test, y_pred)))
rfc = RandomForestClassifier(random_state=0,n_estimators=100)
rfc.fit(X_train, y_train) y_pred = rfc.predict(X_test)
print(‘Model accuracy score with 10 decision-tree : {0:0.4f}’. format(accuracy_score(y_test, y_pred)))
Adaboost
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics
iris = datasets.load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # 70% training and 30% test
abc = AdaBoostClassifier(n_estimators=50,
learning_rate=1)
model = abc.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(“Accuracy:”,metrics.accuracy_score(y_test, y_pred))
