Machine Learning Algorithms: KNN, Decision Tree, SVM, Random Forest, and AdaBoost

KNN

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

import sklearn

dataset = pd.read_csv(‘Social_Network_Ads.csv’)

X = dataset.iloc[:, [1, 2, 3]].values

y = dataset.iloc[:, -1].values

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

import sklearn

dataset = pd.read_csv(‘Social_Network_Ads.csv’)

X = dataset.iloc[:, [1, 2, 3]].values

y = dataset.iloc[:, -1].values

print(len(X))

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

X[:,0] = le.fit_transform(X[:,0])

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)

from sklearn.neighbors import KNeighborsClassifier

classifier = KNeighborsClassifier(n_neighbors = 5, metric = ‘minkowski’, p = 2)

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

from sklearn.metrics import confusion_matrix,accuracy_score

cm = confusion_matrix(y_test, y_pred)

ac = accuracy_score(y_test,y_pred)

Decision Tree

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

import sklearn

dataset = pd.read_csv(‘Social_Network_Ads.csv’)

X = dataset.iloc[:, [1, 2, 3]].values

y = dataset.iloc[:, -1].values

print(len(X))

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

X[:,0] = le.fit_transform(X[:,0])

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)

from sklearn.tree import DecisionTreeClassifier

classifier = DecisionTreeClassifier()

classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)

from sklearn.metrics import confusion_matrix,accuracy_score

cm = confusion_matrix(y_test, y_pred)

ac = accuracy_score(y_test,y_pred)

print(“the accuracy is”, ac)

from sklearn import tree

tree.plot_tree(classifier)

SVM

from sklearn import datasets

cancer = datasets.load_breast_cancer()

print(“Features: “, cancer.feature_names)

print(“Labels: “, cancer.target_names)

cancer.data.shape

print(cancer.data[0:5])

print(cancer.target)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3,random_state=109) # 70% training and 30% test

from sklearn import svm

clf = svm.SVC(kernel=’linear’) # Linear Kernel

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

from sklearn import metrics

print(“Accuracy:”,metrics.accuracy_score(y_test, y_pred))

print(“Precision:”,metrics.precision_score(y_test, y_pred

print(“Recall:”,metrics.recall_score(y_test, y_pred))

Randomforest

import numpy as np

import matplotlib.pyplot as plt   || 

import pandas as pd

import sklearn

dataset = pd.read_csv(‘Social_Network_Ads.csv’)

X = dataset.iloc[:, [1, 2, 3]].values

y = dataset.iloc[:, -1].values

print(len(X))

from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()

X[:,0] = le.fit_transform(X[:,0])

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20, random_state = 0)

from sklearn.preprocessing import StandardScaler

sc = StandardScaler()

X_train = sc.fit_transform(X_train)

X_test = sc.transform(X_test)

from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score

rfc = RandomForestClassifier(random_state=0,n_estimators=1)

rfc.fit(X_train, y_train) y_pred = rfc.predict(X_test) print(‘Model accuracy score with 1 decision-tree : {0:0.4f}’. format(accuracy_score(y_test, y_pred)))


rfc = RandomForestClassifier(random_state=0,n_estimators=10)

rfc.fit(X_train, y_train) y_pred = rfc.predict(X_test) print(‘Model accuracy score with 10 decision-tree : {0:0.4f}’. format(accuracy_score(y_test, y_pred)))

rfc = RandomForestClassifier(random_state=0,n_estimators=100)

rfc.fit(X_train, y_train) y_pred = rfc.predict(X_test)

print(‘Model accuracy score with 10 decision-tree : {0:0.4f}’. format(accuracy_score(y_test, y_pred)))

Adaboost

from sklearn.ensemble import AdaBoostClassifier 
from sklearn import datasets 
from sklearn.model_selection import train_test_split 
from sklearn import metrics 
iris = datasets.load_iris() 
X = iris.data 
y = iris.target 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # 70% training and 30% test 

abc = AdaBoostClassifier(n_estimators=50, 
                         learning_rate=1) 

model = abc.fit(X_train, y_train) 
y_pred = model.predict(X_test) 
print(“Accuracy:”,metrics.accuracy_score(y_test, y_pred))