Machine Learning courses with 110+ Real-time projects Start Now!!
Program 1
Insurance Claim Approval Dataset
# Step 1: Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt
# Step 2: Load dataset
df = pd.read_csv("D://scikit_data/insurancedata/insurance_claim_approval.csv")
df.head()
df.isnull().sum()
df.shape
# Step 3: Encode categorical features
label_encoders = {}
for col in ["Gender", "Smoking", "PolicyType", "PreExistingCondition"]:
le = LabelEncoder()
df[col] = le.fit_transform(df[col])
label_encoders[col] = le # Save encoders for later use
df.head()
# Step 4: Split features and target
X = df.drop("ClaimApproved", axis=1) # Input Data
y = df["ClaimApproved"] # output
# Step 5: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
len(X_test)
# Step 6: Initialize and train XGBoost model
model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
model.fit(X_train, y_train)
# Step 7: Predictions and Evaluation
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
print(" Model Accuracy:", round(acc * 100, 2), "%")
print("Confusion Matrix:\n", cm)
# Step 9: User Input Prediction
print("\n Enter details to predict claim approval:")
age = int(input("Age: "))
gender = input("Gender (Male/Female): ")
bmi = float(input("BMI: "))
smoking = input("Smoking (Yes/No): ")
policy = input("Policy Type (Basic/Premium/Gold): ")
amount = float(input("Claim Amount: "))
condition = input("Pre-existing Condition (None/Diabetes/Heart Disease/Asthma): ")
stay = int(input("Hospital Stay Days: "))
# Encode user inputs
gender = label_encoders["Gender"].transform([gender])[0]
smoking = label_encoders["Smoking"].transform([smoking])[0]
policy = label_encoders["PolicyType"].transform([policy])[0]
condition = label_encoders["PreExistingCondition"].transform([condition])[0]
input_data = pd.DataFrame([{
"Age": age,
"Gender": gender,
"BMI": bmi,
"Smoking": smoking,
"PolicyType": policy,
"ClaimAmount": amount,
"PreExistingCondition": condition,
"HospitalStayDays": stay
}])
# Predict
prediction = model.predict(input_data)[0]
print("\n Claim Status:", " Approved" if prediction == 1 else " Not Approved")
# Step 8: Feature Importance Plot
plt.figure(figsize=(10, 6))
plt.barh(X.columns, model.feature_importances_, color='skyblue')
plt.title(" Feature Importance - Claim Approval")
plt.xlabel("Importance Score")
plt.tight_layout()
plt.show()