Machine Learning Project – Car Price Prediction
Machine Learning courses with 100+ Real-time projects Start Now!!
Program 1
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
#pandas 2.2.2
#numpy 1.24.3
#matplotlib 3.7.1
#scikit-learn 1.5.1
# Read data set
df=pd.read_csv("D://scikit_data/car/car_data.csv")
df.head()
type(df)
# No of Rows & Columns
df.shape
# Dispaly information about dataset
df.info()
# Infomrmation about missing values
df.isnull().sum()
# Check categorical data
print(df.Fuel_Type.value_counts())
print(df.Seller_Type.value_counts())
print(df.Transmission.value_counts())
# Replace Character values
df.replace({'Fuel_Type':{'Petrol':0,'Diesel':1,'CNG':2}},inplace=True)
df.replace({'Seller_Type':{'Dealer':0,'Individual':1}},inplace=True)
df.replace({'Transmission':{'Manual':0,'Automatic':1}},inplace=True)
df.head()
# Find Depended and Independed variables
x=df.drop(['Car_Name','Selling_Price'],axis='columns') # Independed variables
y=df['Selling_Price'] # Depended variables
x
y
# Split data into training and testing data
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=2)
# x_train ---> training independed variable
# y_train ---> training depended variable
# x_test ---> test independed variable
# y_test ---> test depended variable
len(x_train)
len(x_test)
# Model Prepration
model=LinearRegression()
model.fit(x_train,y_train)
y_pred_train=model.predict(x_train)
y_train
plt.scatter(y_train,y_pred_train,marker='.',color='blue')
plt.xlabel("Actual Price")
plt.ylabel("Predicated Price")
plt.title("Actual Price vs Predicated Price")
plt.show()
error_score=metrics.r2_score(y_train,y_pred_train)
print("R squared Error for training data set ", error_score)
# Predication an testing data
y_pred_test=model.predict(x_test)
plt.scatter(y_test,y_pred_test,marker='.',color='red')
plt.xlabel("Actual Price")
plt.ylabel("Predicated Price")
plt.title("Actual Price vs Predicated Price")
plt.show()
error_score=metrics.r2_score(y_test,y_pred_test)
print("R squared Error for training data set ", error_score)
model.score(x_test,y_test)
Did you like this article? If Yes, please give DataFlair 5 Stars on Google

