Machine Learning Project – Loan Prediction
Machine Learning courses with 100+ Real-time projects Start Now!!
Program 1
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sn
from sklearn.metrics import accuracy_score
#pandas 2.2.2
#numpy 1.24.3
#matplotlib 3.7.1
#scikit-learn 1.5.1
#Python 3.11.3
# DataFrame Prepare
df_loan=pd.read_csv("D://scikit_data/loan/loan_dataset.csv")
df_loan.head()
type(df_loan)
df_loan.shape
# Find Missing values
df_loan.isnull().sum()
# Drop Missing values
df_loan=df_loan.dropna()
# Find Missing values
df_loan.isnull().sum()
df_loan.shape
df_loan.head(5)
# Find Categorical Values
df_loan['Education'].value_counts()
df_loan['Gender'].value_counts()
df_loan['Married'].value_counts()
df_loan.info()
df_loan['Dependents'].value_counts()
# Replace Dependents 3+ values with 4
df_loan.replace(to_replace='3+',value=4,inplace=True)
df_loan['Dependents'].value_counts()
df_loan.head()
# Data Visulization between Education and Loan Status
sn.countplot(x='Education',hue='Loan_Status',data=df_loan)
# Data Visulization between Dependents and Loan Status
sn.countplot(x='Dependents',hue='Loan_Status',data=df_loan)
# Data Visulization between Marrital Status and Loan Status
sn.countplot(x='Married',hue='Loan_Status',data=df_loan)
# Data Visulization between Propery Area and Loan Status
sn.countplot(x='Property_Area',hue='Loan_Status',data=df_loan)
# Label encoding
df_loan.head()
df_loan.replace({'Loan_Status ':{'Y':1,'N':0}},inplace=True)
df_loan.replace({'Married':{'Yes':1,'No':0}},inplace=True)
df_loan.replace({'Education':{'Graduate':1,'Not Graduate':0}},inplace=True)
df_loan.replace({'Property_Area':{'Rural':0,'Semiurban':1,'Urban':2}},inplace=True)
df_loan.head()
df_loan.replace({'Loan_Status':{'Y':1,'N':0}},inplace=True)
df_loan.head()
df_loan.replace({'Gender':{'Male':0,'Female':1}},inplace=True)
df_loan.head()
df_loan.replace({'Self_Employed':{'Yes':1,'No':0}},inplace=True)
df_loan.head()
# Model Prepartion (Independed Variable)
x=df_loan.drop(['Loan_ID','Loan_Status'],axis='columns')
x
# Model Prepartion (Depended Variable)
y=df_loan['Loan_Status']
y.shape
# Split dataset in training and testing data
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=2)
# x_train ---> training Independed variable
# x_test ---> testing Independed variable
# y_train ---> training Depended variable
# y_test ---> testing Depended variable
len(x_train)
len(x_test)
# Model Prepreation
from sklearn.linear_model import LogisticRegression
model=LogisticRegression()
# Train Model
model.fit(x_train,y_train)
# Predication with training data
y_pred_train=model.predict(x_train)
y_train.head(10)
y_pred_train
# Test data accuracy on training dataset
test_data_accuracy=accuracy_score(y_pred_train,y_train)
print("Accuracy Score ontraing dataset : ",test_data_accuracy)
# Predication with testing data
y_pred_test=model.predict(x_test)
y_test
y_pred_test
# Test data accuracy on testing dataset
test_data_accuracy=accuracy_score(y_pred_test,y_test)
print("Accuracy Score on testing dataset : ",test_data_accuracy)
model.score(x_test,y_test)
df_loan.head(3)
# Check with individual value
#model.predict([[0,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0]])
model.predict([[0,1,0,1,1,3000,0.0,66.0,360.0,1.0,2]])
You give me 15 seconds I promise you best tutorials
Please share your happy experience on Google

