Machine Learning Algorithms: KNN, LWR, Random Forest, SVM

K-Nearest Neighbors (KNN) Algorithm Implementation

This section demonstrates the implementation of the K-Nearest Neighbors (KNN) algorithm using the Iris dataset in Python with scikit-learn.

Python Code for KNN

from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import numpy as np

# Load the Iris dataset
dataset = load_iris()

# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2, random_state=0)
print("Training labels:", y_train)

# Initialize and fit the KNN classifier
kn = KNeighborsClassifier(n_neighbors=3)
kn.fit(X_train, y_train)

# Make predictions on the test set
for i in range(len(X_test)):
    x = X_test[i]
    x_new = np.array([x])  # Convert to a 2D array for prediction
    prediction = kn.predict(x_new)[0]  # Get the first (and only) element of the prediction array

    # Print the actual and predicted classes
    print(f"TARGET: {dataset.target_names[y_test[i]]}, PREDICTED: {dataset.target_names[prediction]}")

# Evaluate the model's accuracy on the test set
accuracy = kn.score(X_test, y_test)
print(f"Model Accuracy: {accuracy:.2f}")

Locally Weighted Regression (LWR) Example

This section illustrates the concept of Locally Weighted Regression (LWR), a non-parametric regression method that computes a regression line at each query point by weighting nearby data points more heavily.

Python Code for LWR

import numpy as np
import matplotlib.pyplot as plt

# Example dataset
X = np.array([1, 2, 3, 4, 5])
y = np.array([1, 2, 1.3, 3.75, 2.25])

def gaussian_weights(x_query, X, tau):
    # Compute weights using the Gaussian kernel
    return np.exp(-(X - x_query)**2 / (2 * tau**2))

def locally_weighted_regression(x_query, X, y, tau):
    weights = gaussian_weights(x_query, X, tau)
    
    X_design = np.vstack([np.ones_like(X), X]).T
    W = np.diag(weights)
    
    theta = np.linalg.pinv(X_design.T @ W @ X_design) @ (X_design.T @ W @ y)
    x_query_design = np.array([1, x_query])
    y_pred = x_query_design @ theta
    return y_pred, theta

tau = 1.0
x_query = 3
y_pred, theta = locally_weighted_regression(x_query, X, y, tau)

print(f"Observed y at x=3: {y[X==3][0]}")
print(f"LWR Predicted y at x=3: {y_pred:.3f}")
print(f"Locally weighted regression coefficients: Intercept={theta[0]:.3f}, Slope={theta[1]:.3f}")

x_test = np.linspace(1, 5, 100)
y_test = [locally_weighted_regression(x0, X, y, tau)[0] for x0 in x_test]

plt.scatter(X, y, color='red', label='Data')
plt.plot(x_test, y_test, color='blue', label='LWR Prediction')
plt.scatter([x_query], [y_pred], color='green', label=f'LWR at x={x_query}')
plt.scatter([x_query], [y[X==x_query][0]], color='orange', label=f'Observed at x={x_query}')
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.title('Locally Weighted Regression Example')
plt.show()

Random Forest Classifier Implementation

This section demonstrates the implementation of a Random Forest Classifier in Python using scikit-learn, including calculation of result matrices, evaluation metrics, and accuracy.

Python Code for Random Forest

import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

# Load dataset
data = load_iris()
X = data.data
y = data.target

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=42)

# Train Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Predict on test data
y_pred = rf_classifier.predict(y_test)

# Calculate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate evaluation metrics
class_report = classification_report(y_test, y_pred)
print("Classification Report:")
print(class_report)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

Support Vector Machine (SVM) Classification

This section provides an implementation of a Support Vector Machine (SVM) for classification, demonstrating its use with the Iris dataset and visualizing the decision boundary and support vectors.

Python Code for SVM

import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data[:, :2]  # Using first two features for visualization
y = iris.target

# For binary classification, use only two classes
X = X[y != 2]
y = y[y != 2]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Scale features for better performance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train SVM model with linear kernel
svm_model = SVC(kernel='linear')
svm_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Get support vectors
support_vectors = svm_model.support_vectors_
print(f"Number of support vectors: {len(support_vectors)}")
print("Support vectors:")
print(support_vectors)

# Create a mesh to plot decision boundary
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))

# Transform mesh for prediction
mesh_points = np.c_[xx.ravel(), yy.ravel()]
mesh_points_scaled = scaler.transform(mesh_points)
Z = svm_model.predict(mesh_points_scaled)
Z = Z.reshape(xx.shape)

# Plot the decision boundary and data points
plt.figure(figsize=(10, 8))
plt.contourf(xx, yy, Z, alpha=0.3)

# Plot original data points
plt.scatter(X[y == 0, 0], X[y == 0, 1], c='red', label='Class 0')
plt.scatter(X[y == 1, 0], X[y == 1, 1], c='yellow', label='Class 1')

# Highlight support vectors
original_support_vectors = scaler.inverse_transform(support_vectors)
plt.scatter(original_support_vectors[:, 0], original_support_vectors[:, 1], 
            s=100, facecolors='none', edgecolors='blue', linewidth=1.5,
            label='Support Vectors')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.title('SVM Classification with Support Vectors')
plt.legend()
plt.tight_layout()
plt.show()