Machine Learning courses with 110+ Real-time projects Start Now!!
Program 1
Lifestyle Health Risk Dataset 1
import pandas as pd # For handling tabular data.
import numpy as np #For numerical operations and random data generation.
import matplotlib.pyplot as plt # For plotting graphs.
from sklearn.cluster import KMeans # Unsupervised clustering algorithm from sklearn.
from sklearn.preprocessing import StandardScaler #Used to normalize features (mean = 0, std = 1).
np.random.seed(42) # Ensures reproducibility of the random data.
n_people = 200 # Total number of synthetic individuals.
data = {
'PersonID': np.arange(1, n_people + 1),
'Exercise (hrs/week)': np.random.normal(3, 1.5, n_people).clip(0, 10), # Mean (μ) = 3 Standard deviation (σ) = 1.5
'Sleep (hrs/day)': np.random.normal(7, 1.0, n_people).clip(3, 10),
'Junk Food (times/week)': np.random.randint(0, 8, n_people),
'Screen Time (hrs/day)': np.random.normal(6, 2.0, n_people).clip(2, 14)
}
df = pd.DataFrame(data)
df
# Creates synthetic values for 4 lifestyle habits:
# Exercise: Normally distributed around 3 hrs/week (clipped 0–10)
# Sleep: Around 7 hrs/day (clipped 3–10)
# Junk Food: Integer from 0–7
# Screen Time: Around 6 hrs/day (clipped 2–14)
df.to_csv('D://scikit_data/KMeans/lifestyle_health_risk.csv', index=False)
# Feature Selection & Normalization
features = ['Exercise (hrs/week)', 'Sleep (hrs/day)', 'Junk Food (times/week)', 'Screen Time (hrs/day)']
X = df[features]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled
# Select only the relevant columns (exclude PersonID).
# Normalize all features so that they contribute equally during clustering.
# Output is a scaled X_scaled array (mean=0, std=1).
#Apply K-Means Clustering
kmeans = KMeans(n_clusters=3, random_state=42)
df['Health Risk Category'] = kmeans.fit_predict(X_scaled)
df.to_csv('D://scikit_data/KMeans/lifestyle_health_risk1.csv', index=False)
# A scatter plot showing how people are grouped based on their sleep and diet habits.
# Helps visually identify which group is more or less healthy.
plt.figure(figsize=(8, 6))
plt.scatter(df['Sleep (hrs/day)'], df['Junk Food (times/week)'],
c=df['Health Risk Category'], cmap='coolwarm', s=60)
plt.scatter(
scaler.inverse_transform(kmeans.cluster_centers_)[:, 1],
scaler.inverse_transform(kmeans.cluster_centers_)[:, 2],
s=200, c='black', marker='X', label='Centroids')
plt.title('Lifestyle Clustering: Sleep vs. Junk Food')
plt.xlabel('Sleep (hrs/day)')
plt.ylabel('Junk Food (times/week)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()