Python Machine Learning Code Snippets Collection

Python Machine Learning Code Snippets

Hierarchical Clustering Example

This snippet demonstrates hierarchical clustering using scipy and visualizes the result as a dendrogram, followed by applying Agglomerative Clustering.

import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as sch
import numpy as np
from sklearn.cluster import AgglomerativeClustering

X = np.random.rand(50, 2)
dendrogram = sch.dendrogram(sch.linkage(X, method='ward'))
plt.title('Dendrogram')
plt.show()

hc = AgglomerativeClustering(n_clusters=5, linkage='ward')
print('Cluster assignments:', hc.fit_predict(X))

K-Means Clustering and Elbow Method

Code to determine the optimal number of clusters for K-Means using the Elbow Method.

import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Using random data as placeholder
X = np.random.rand(100, 2)
WCSS = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(X)
    WCSS.append(kmeans.inertia_)

plt.plot(range(1, 11), WCSS)
plt.title('Elbow Method')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('WCSS')
plt.show()

Support Vector Machine (SVM) Implementation

Example of training an SVM classifier on the Wine dataset.

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine

data = load_wine()
X_tr, X_te, y_tr, y_te = train_test_split(data.data, data.target, test_size=0.2)
svm = SVC(kernel='linear')
svm.fit(X_tr, y_tr)
print('SVM Accuracy:', svm.score(X_te, y_te))

Decision Tree Classifier

Implementation of a Decision Tree Classifier using the Wine dataset.

from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

data = load_wine()
X_tr, X_te, y_tr, y_te = train_test_split(data.data, data.target, test_size=0.2)

dt = DecisionTreeClassifier()
dt.fit(X_tr, y_tr)
print('Decision Tree Accuracy:', dt.score(X_te, y_te))

K-Nearest Neighbors (KNN) Classifier

Example of training a KNN classifier on the Iris dataset.

from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

iris = load_iris()
X_tr, X_te, y_tr, y_te = train_test_split(iris.data, iris.target, test_size=0.2)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_tr, y_tr)
print('KNN Accuracy:', knn.score(X_te, y_te))

Titanic Logistic Regression

Applying Logistic Regression to predict survival on the Titanic dataset.

import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

df = pd.read_csv('titanic.csv')
df = df.select_dtypes(include=[float, int]).dropna()
X = df.drop('Survived', axis=1)
y = df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
print('Titanic Logistic Regression Accuracy:', accuracy_score(y_test, model.predict(X_test)))

Boston Linear Regression

Linear Regression model applied to the Boston Housing dataset (using fetch_openml data).

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

boston = fetch_openml(data_id=531, parser='auto')
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = boston.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = LinearRegression()
model.fit(X_train, y_train)
pred = model.predict(X_test)
print('Linear Regression MSE:', mean_squared_error(y_test, pred))
print('Linear Regression R2 Score:', r2_score(y_test, pred))

A* Search Algorithm

Implementation of the A* search algorithm using a priority queue (heapq).

import heapq

def a_star(start, goal, graph, h):
    open_l, came_f, g_sc = [(0, start)], {}, {node: float('inf') for node in graph}
    g_sc[start] = 0
    while open_l:
        _, curr = heapq.heappop(open_l)
        if curr == goal:
            path = []
            while curr in came_f: path.append(curr); curr = came_f[curr]
            return [start] + path[::-1]
        for n, w in graph[curr].items():
            t_g = g_sc[curr] + w
            if t_g < g_sc[n]:
                came_f[n], g_sc[n] = curr, t_g
                heapq.heappush(open_l, (t_g + h[n], n))
    return None

G = {'A': {'B':1, 'C':3}, 'B':{'D':1}, 'C':{'D':1}, 'D':{}}
H = {'A':3, 'B':2, 'C':2, 'D':0}
print('A* Path:', a_star('A', 'D', G, H))