Machine Learning Project – Loan Prediction

Machine Learning courses with 100+ Real-time projects Start Now!!

Program 1

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import seaborn as sn
from sklearn.metrics import accuracy_score

#pandas                    2.2.2
#numpy                     1.24.3
#matplotlib                3.7.1
#scikit-learn              1.5.1
#Python                   3.11.3


# DataFrame Prepare
df_loan=pd.read_csv("D://scikit_data/loan/loan_dataset.csv")
df_loan.head()


type(df_loan)


df_loan.shape


# Find Missing values
df_loan.isnull().sum()


# Drop Missing values
df_loan=df_loan.dropna()


# Find Missing values
df_loan.isnull().sum()


df_loan.shape


df_loan.head(5)



# Find Categorical Values
df_loan['Education'].value_counts()


df_loan['Gender'].value_counts()


df_loan['Married'].value_counts()


df_loan.info()


df_loan['Dependents'].value_counts()


# Replace  Dependents 3+ values with 4
df_loan.replace(to_replace='3+',value=4,inplace=True)


df_loan['Dependents'].value_counts()



df_loan.head()


# Data Visulization between Education and Loan Status
sn.countplot(x='Education',hue='Loan_Status',data=df_loan)






# Data Visulization between Dependents and Loan Status
sn.countplot(x='Dependents',hue='Loan_Status',data=df_loan)


# Data Visulization between Marrital Status and Loan Status
sn.countplot(x='Married',hue='Loan_Status',data=df_loan)


# Data Visulization between Propery Area and Loan Status
sn.countplot(x='Property_Area',hue='Loan_Status',data=df_loan)


# Label encoding 
df_loan.head()



df_loan.replace({'Loan_Status ':{'Y':1,'N':0}},inplace=True)
df_loan.replace({'Married':{'Yes':1,'No':0}},inplace=True)
df_loan.replace({'Education':{'Graduate':1,'Not Graduate':0}},inplace=True)
df_loan.replace({'Property_Area':{'Rural':0,'Semiurban':1,'Urban':2}},inplace=True)


df_loan.head()


df_loan.replace({'Loan_Status':{'Y':1,'N':0}},inplace=True)


df_loan.head()


df_loan.replace({'Gender':{'Male':0,'Female':1}},inplace=True)


df_loan.head()


df_loan.replace({'Self_Employed':{'Yes':1,'No':0}},inplace=True)


df_loan.head()


# Model Prepartion (Independed Variable)
x=df_loan.drop(['Loan_ID','Loan_Status'],axis='columns')


x


# Model Prepartion (Depended Variable)
y=df_loan['Loan_Status']


y.shape

# Split dataset in  training and testing data
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=2)


# x_train ---> training Independed variable
# x_test ---> testing Independed variable
# y_train ---> training Depended variable
# y_test ---> testing Depended variable


len(x_train)


len(x_test)


# Model Prepreation
from sklearn.linear_model import LogisticRegression



model=LogisticRegression()


# Train Model
model.fit(x_train,y_train)


# Predication with training data
y_pred_train=model.predict(x_train)


y_train.head(10)


y_pred_train



# Test data accuracy on training  dataset 
test_data_accuracy=accuracy_score(y_pred_train,y_train)


print("Accuracy Score ontraing dataset : ",test_data_accuracy)


# Predication with testing data
y_pred_test=model.predict(x_test)


y_test


y_pred_test


# Test data accuracy on testing  dataset 
test_data_accuracy=accuracy_score(y_pred_test,y_test)
print("Accuracy Score on testing dataset : ",test_data_accuracy)


model.score(x_test,y_test)


df_loan.head(3)


# Check with individual value 
#model.predict([[0,1,1,1,0,4583,1508.0,128.0,360.0,1.0,0]])
model.predict([[0,1,0,1,1,3000,0.0,66.0,360.0,1.0,2]])

Did you like this article? If Yes, please give DataFlair 5 Stars on Google

Machine Learning Project – Loan Prediction

Program 1

Leave a Reply Cancel reply

About DataFlair

Trending Courses

Trending Data Science Courses

Free Big Data Courses

Trending Programming Courses

Trending Data Science Tutorials

Trending Projects

Trending Programming Tutorials

Trending Tutorials