How to Split a Dataset into Train and Test Sets in Machine Learning
Machine Learning courses with 100+ Real-time projects Start Now!!
Program 1
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
df=pd.read_csv("D:\scikit_data\cardata\carprices.csv")
df.head(5)
df=df.rename(columns={'Age(yrs)':'Age'})
df=df.rename(columns={'Sell Price($)':'SellPrice'})
df.head(5)
plt.scatter(df['Mileage'],df['SellPrice'],color='red')
plt.scatter(df['Age'],df['SellPrice'],color='red')
from sklearn.model_selection import train_test_split
x=df[['Mileage','Age']]
y=df[['SellPrice']]
# x_train---> Training data set for Independed variable
# x_test---> Testing data set for Independed variable
# y_train---> Training data set for depended variable
# y_test---> Testing data set for depended variable
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.3)
len(x_train)
len(y_train)
len(x_test)
len(y_test)
x_train
y_train
x_test
y_test
from sklearn import linear_model
model=linear_model.LinearRegression()
model.fit(x_train,y_train)
model.predict(x_test)
model.score(x_test,y_test)
# Predication by training dataset
y_pred=model.predict(x_train)
plt.scatter(y_train,y_pred,color='red')
plt.xlabel("Actual Price")
plt.ylabel("Predicated Price")
plt.show()
from sklearn.metrics import r2_score
print("R2 Score for Training data:",r2_score(y_train,y_pred))
# Predication by test dataset
y_pred=model.predict(x_test)
#print(y_pred)
plt.scatter(y_test,y_pred,color='blue',marker='+')
plt.xlabel("Actual Price")
plt.ylabel("Predicated Price")
plt.show()
print("R2 Score for Testing data:",r2_score(y_test,y_pred))
We work very hard to provide you quality material
Could you take 15 seconds and share your happy experience on Google

