Machine Learning Algorithms: KNN, LWR, Random Forest, SVM
K-Nearest Neighbors (KNN) Algorithm Implementation
This section demonstrates the implementation of the K-Nearest Neighbors (KNN) algorithm using the Iris dataset in Python with scikit-learn.
Python Code for KNN
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import numpy as np
# Load the Iris dataset
dataset = load_iris()
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2, random_state=0)
print("Training labels:", y_train)
# Initialize and fit the KNN classifier
kn = KNeighborsClassifier(n_neighbors=3)
kn.fit(X_train, y_train)
# Make predictions on the test set
for i in range(len(X_test)):
x = X_test[i]
x_new = np.array([x]) # Convert to a 2D array for prediction
prediction = kn.predict(x_new)[0] # Get the first (and only) element of the prediction array
# Print the actual and predicted classes
print(f"TARGET: {dataset.target_names[y_test[i]]}, PREDICTED: {dataset.target_names[prediction]}")
# Evaluate the model's accuracy on the test set
accuracy = kn.score(X_test, y_test)
print(f"Model Accuracy: {accuracy:.2f}")
Locally Weighted Regression (LWR) Example
This section illustrates the concept of Locally Weighted Regression (LWR), a non-parametric regression method that computes a regression line at each query point by weighting nearby data points more heavily.
Python Code for LWR
import numpy as np
import matplotlib.pyplot as plt
# Example dataset
X = np.array([1, 2, 3, 4, 5])
y = np.array([1, 2, 1.3, 3.75, 2.25])
def gaussian_weights(x_query, X, tau):
# Compute weights using the Gaussian kernel
return np.exp(-(X - x_query)**2 / (2 * tau**2))
def locally_weighted_regression(x_query, X, y, tau):
weights = gaussian_weights(x_query, X, tau)
X_design = np.vstack([np.ones_like(X), X]).T
W = np.diag(weights)
theta = np.linalg.pinv(X_design.T @ W @ X_design) @ (X_design.T @ W @ y)
x_query_design = np.array([1, x_query])
y_pred = x_query_design @ theta
return y_pred, theta
tau = 1.0
x_query = 3
y_pred, theta = locally_weighted_regression(x_query, X, y, tau)
print(f"Observed y at x=3: {y[X==3][0]}")
print(f"LWR Predicted y at x=3: {y_pred:.3f}")
print(f"Locally weighted regression coefficients: Intercept={theta[0]:.3f}, Slope={theta[1]:.3f}")
x_test = np.linspace(1, 5, 100)
y_test = [locally_weighted_regression(x0, X, y, tau)[0] for x0 in x_test]
plt.scatter(X, y, color='red', label='Data')
plt.plot(x_test, y_test, color='blue', label='LWR Prediction')
plt.scatter([x_query], [y_pred], color='green', label=f'LWR at x={x_query}')
plt.scatter([x_query], [y[X==x_query][0]], color='orange', label=f'Observed at x={x_query}')
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.title('Locally Weighted Regression Example')
plt.show()
Random Forest Classifier Implementation
This section demonstrates the implementation of a Random Forest Classifier in Python using scikit-learn, including calculation of result matrices, evaluation metrics, and accuracy.
Python Code for Random Forest
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
# Load dataset
data = load_iris()
X = data.data
y = data.target
# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
# Train Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)
# Predict on test data
y_pred = rf_classifier.predict(y_test)
# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)
# Calculate evaluation metrics
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
Support Vector Machine (SVM) Classification
This section provides an implementation of a Support Vector Machine (SVM) for classification, demonstrating its use with the Iris dataset and visualizing the decision boundary and support vectors.
Python Code for SVM
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data[:, :2] # Using first two features for visualization
y = iris.target
# For binary classification, use only two classes
X = X[y != 2]
y = y[y != 2]
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
# Scale features for better performance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)
# Create and train SVM model with linear kernel
svm_model = SVC(kernel='linear')
svm_model.fit(X_train_scaled, y_train)
# Make predictions
y_pred = svm_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
# Get support vectors
support_vectors = svm_model.support_vectors_
print(f"Number of support vectors: {len(support_vectors)}")
print("Support vectors:")
print(support_vectors)
# Create a mesh to plot decision boundary
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
np.arange(y_min, y_max, 0.02))
# Transform mesh for prediction
mesh_points = np.c_[xx.ravel(), yy.ravel()]
mesh_points_scaled = scaler.transform(mesh_points)
Z = svm_model.predict(mesh_points_scaled)
Z = Z.reshape(xx.shape)
# Plot the decision boundary and data points
plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, alpha=0.3)
# Plot original data points
plt.scatter(X[y == 0, 0], X[y == 0, 1], c='red', label='Class 0')
plt.scatter(X[y == 1, 0], X[y == 1, 1], c='yellow', label='Class 1')
# Highlight support vectors
original_support_vectors = scaler.inverse_transform(support_vectors)
plt.scatter(original_support_vectors[:, 0], original_support_vectors[:, 1],
s=100, facecolors='none', edgecolors='blue', linewidth=1.5,
label='Support Vectors')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('SVM Classification with Support Vectors')
plt.legend()
plt.tight_layout()
plt.show()