Site icon DataFlair

ML Project – Stock Price Prediction using Gradient Boosting

Machine Learning courses with 110+ Real-time projects Start Now!!

Program 1

Stock Market Dataset

# Import libraries

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

#  Load dataset
df = pd.read_csv("D://scikit_data/stock/stock_market_dataset.csv")
df["Date"] = pd.to_datetime(df["Date"]) # Converts the Date column to proper datetime format.
df.info()

# Feature Engineering (using past day’s data)
#Extracts numeric features from the date: Day, Month, Year
#These help the model learn seasonal or yearly patterns in stock behavior.
#Day, Month, Year
df['Day'] = df['Date'].dt.day
df['Month'] = df['Date'].dt.month
df['Year'] = df['Date'].dt.year
df.head()

#  Prepare Independed and Depended variables
X = df[["Open", "High", "Low", "Volume", "Day", "Month", "Year"]] # Input (Independed)
y = df["Close"] # Depended(Output)
y

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
len(X_test)

#  Train Gradient Boosting model
# 100 small trees , Each tree improves the previous one,  max_depth=3 keeps trees small to avoid overfitting
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)

# Step 7: Predict and evaluate
import matplotlib.pyplot as plt
import pandas as pd

y_pred = model.predict(X_test)


date_test = df.loc[X_test.index, "Date"]

# Prepare results for plotting
results = pd.DataFrame({
    "Date": date_test.values,
    "Actual": y_test.values,
    "Predicted": y_pred
}).sort_values("Date")

rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)
print("Gradient Boosting Model Performance:")
print(f"RMSE: {rmse:.2f}")
print(f"R2 Score: {r2:.2f}")


# Plot the results
plt.figure(figsize=(12, 6))
plt.plot(results["Date"], results["Actual"], label="Actual Close Price", linewidth=2)
plt.plot(results["Date"], results["Predicted"], label="Predicted Close Price", linewidth=2)
plt.xlabel("Date")
plt.ylabel("Stock Close Price")
plt.title(" Actual vs Predicted Stock Close Prices")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.xticks(rotation=45)
plt.show()

# Step 8: Predict close price for new user input
print("\n Enter today's market data to predict closing price:")
open_val = float(input("Open price: "))
high_val = float(input("High price: "))
low_val = float(input("Low price: "))
volume_val = int(input("Volume: "))
day = int(input("Day of the month: "))
month = int(input("Month: "))
year = int(input("Year: "))
# Prepare input DataFrame
input_data = pd.DataFrame([{
    "Open": open_val,
    "High": high_val,
    "Low": low_val,
    "Volume": volume_val,
    "Day": day,
    "Month": month,
    "Year": year
}])

# Predict and show result
predicted_close = model.predict(input_data)

print("Predict close price: ",predicted_close)

Exit mobile version