Python Machine Learning Code Snippets Collection
Python Machine Learning Code Snippets
Hierarchical Clustering Example
This snippet demonstrates hierarchical clustering using scipy and visualizes the result as a dendrogram, followed by applying Agglomerative Clustering.
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as sch
import numpy as np
from sklearn.cluster import AgglomerativeClustering
X = np.random.rand(50, 2)
dendrogram = sch.dendrogram(sch.linkage(X, method='ward'))
plt.title('Dendrogram')
plt.show()
hc = AgglomerativeClustering(n_clusters=5, linkage='ward')
print('Cluster assignments:', hc.fit_predict(X))
K-Means Clustering and Elbow Method
Code to determine the optimal number of clusters for K-Means using the Elbow Method.
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
# Using random data as placeholder
X = np.random.rand(100, 2)
WCSS = []
for i in range(1, 11):
kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
kmeans.fit(X)
WCSS.append(kmeans.inertia_)
plt.plot(range(1, 11), WCSS)
plt.title('Elbow Method')
plt.xlabel('Number of Clusters (k)')
plt.ylabel('WCSS')
plt.show()
Support Vector Machine (SVM) Implementation
Example of training an SVM classifier on the Wine dataset.
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine
data = load_wine()
X_tr, X_te, y_tr, y_te = train_test_split(data.data, data.target, test_size=0.2)
svm = SVC(kernel='linear')
svm.fit(X_tr, y_tr)
print('SVM Accuracy:', svm.score(X_te, y_te))
Decision Tree Classifier
Implementation of a Decision Tree Classifier using the Wine dataset.
from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
data = load_wine()
X_tr, X_te, y_tr, y_te = train_test_split(data.data, data.target, test_size=0.2)
dt = DecisionTreeClassifier()
dt.fit(X_tr, y_tr)
print('Decision Tree Accuracy:', dt.score(X_te, y_te))
K-Nearest Neighbors (KNN) Classifier
Example of training a KNN classifier on the Iris dataset.
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
iris = load_iris()
X_tr, X_te, y_tr, y_te = train_test_split(iris.data, iris.target, test_size=0.2)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_tr, y_tr)
print('KNN Accuracy:', knn.score(X_te, y_te))
Titanic Logistic Regression
Applying Logistic Regression to predict survival on the Titanic dataset.
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
df = pd.read_csv('titanic.csv')
df = df.select_dtypes(include=[float, int]).dropna()
X = df.drop('Survived', axis=1)
y = df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
print('Titanic Logistic Regression Accuracy:', accuracy_score(y_test, model.predict(X_test)))
Boston Linear Regression
Linear Regression model applied to the Boston Housing dataset (using fetch_openml data).
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
boston = fetch_openml(data_id=531, parser='auto')
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = boston.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = LinearRegression()
model.fit(X_train, y_train)
pred = model.predict(X_test)
print('Linear Regression MSE:', mean_squared_error(y_test, pred))
print('Linear Regression R2 Score:', r2_score(y_test, pred))
A* Search Algorithm
Implementation of the A* search algorithm using a priority queue (heapq).
import heapq
def a_star(start, goal, graph, h):
open_l, came_f, g_sc = [(0, start)], {}, {node: float('inf') for node in graph}
g_sc[start] = 0
while open_l:
_, curr = heapq.heappop(open_l)
if curr == goal:
path = []
while curr in came_f: path.append(curr); curr = came_f[curr]
return [start] + path[::-1]
for n, w in graph[curr].items():
t_g = g_sc[curr] + w
if t_g < g_sc[n]:
came_f[n], g_sc[n] = curr, t_g
heapq.heappush(open_l, (t_g + h[n], n))
return None
G = {'A': {'B':1, 'C':3}, 'B':{'D':1}, 'C':{'D':1}, 'D':{}}
H = {'A':3, 'B':2, 'C':2, 'D':0}
print('A* Path:', a_star('A', 'D', G, H))
