updating LSTMDQN.py to incorporate Optuna tuning into the DQN model training, and possible rerunning if the DQN sucks

This commit is contained in:
2025-01-29 18:32:56 +00:00
parent 2ca8890e5f
commit 78ef2965b8
3 changed files with 440 additions and 1046 deletions

View File

@@ -1 +1,2 @@
venv/
.python-version .python-version

View File

@@ -3,42 +3,44 @@ import sys
import argparse import argparse
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import logging import logging
from tabulate import tabulate from tabulate import tabulate
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler # TensorFlow / Keras
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf import tensorflow as tf
from tensorflow.keras.models import Sequential from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.optimizers import Adam, Nadam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.losses import Huber from tensorflow.keras.losses import Huber
from tensorflow.keras.regularizers import l2 from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam, Nadam
import xgboost as xgb # Sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib
# Optuna
import optuna import optuna
from optuna.integration import KerasPruningCallback from optuna.integration import KerasPruningCallback
# For Reinforcement Learning # RL stuff
import gym import gym
from gym import spaces from gym import spaces
from stable_baselines3 import DQN from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback
# Suppress TensorFlow warnings beyond errors # Suppress TensorFlow logs beyond errors
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
######################################################
################################################### # 1. DATA LOADING & ADVANCED TECHNICAL INDICATORS
# 1. Data Loading / Advanced Technical Indicators ######################################################
###################################################
def load_data(file_path): def load_data(file_path):
logging.info(f"Loading data from: {file_path}") logging.info(f"Loading data from: {file_path}")
try: try:
@@ -68,80 +70,70 @@ def load_data(file_path):
logging.info("Data loaded and sorted successfully.") logging.info("Data loaded and sorted successfully.")
return df return df
def compute_rsi(series, window=14): def compute_rsi(series, window=14):
delta = series.diff() delta = series.diff()
gain = delta.where(delta > 0, 0).rolling(window=window).mean() gain = delta.where(delta>0, 0).rolling(window=window).mean()
loss = -delta.where(delta < 0, 0).rolling(window=window).mean() loss = -delta.where(delta<0, 0).rolling(window=window).mean()
RS = gain / (loss + 1e-9) RS = gain / (loss+1e-9)
return 100 - (100 / (1 + RS)) return 100 - (100/(1+RS))
def compute_macd(series, span_short=12, span_long=26, span_signal=9): def compute_macd(series, span_short=12, span_long=26, span_signal=9):
ema_short = series.ewm(span=span_short, adjust=False).mean() ema_short = series.ewm(span=span_short, adjust=False).mean()
ema_long = series.ewm(span=span_long, adjust=False).mean() ema_long = series.ewm(span=span_long, adjust=False).mean()
macd_line = ema_short - ema_long macd_line = ema_short - ema_long
signal_line = macd_line.ewm(span=span_signal, adjust=False).mean() signal_line = macd_line.ewm(span=span_signal, adjust=False).mean()
return macd_line - signal_line # histogram return macd_line - signal_line # histogram
def compute_obv(df): def compute_obv(df):
signed_volume = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0) signed_volume = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0)
return signed_volume.cumsum() return signed_volume.cumsum()
def compute_adx(df, window=14): def compute_adx(df, window=14):
"""Pseudo-ADX approach using rolling True Range / Close.""" df['H-L'] = df['High'] - df['Low']
df['H-L'] = df['High'] - df['Low']
df['H-Cp'] = (df['High'] - df['Close'].shift(1)).abs() df['H-Cp'] = (df['High'] - df['Close'].shift(1)).abs()
df['L-Cp'] = (df['Low'] - df['Close'].shift(1)).abs() df['L-Cp'] = (df['Low'] - df['Close'].shift(1)).abs()
tr = df[['H-L','H-Cp','L-Cp']].max(axis=1) tr = df[['H-L','H-Cp','L-Cp']].max(axis=1)
tr_rolling = tr.rolling(window=window).mean() tr_rolling = tr.rolling(window=window).mean()
adx_placeholder = tr_rolling / (df['Close'] + 1e-9) adx_placeholder = tr_rolling/(df['Close']+1e-9)
df.drop(['H-L','H-Cp','L-Cp'], axis=1, inplace=True) df.drop(['H-L','H-Cp','L-Cp'], axis=1, inplace=True)
return adx_placeholder return adx_placeholder
def compute_bollinger_bands(series, window=20, num_std=2): def compute_bollinger_bands(series, window=20, num_std=2):
sma = series.rolling(window=window).mean() sma = series.rolling(window=window).mean()
std = series.rolling(window=window).std() std = series.rolling(window=window).std()
upper = sma + num_std * std upper = sma + num_std*std
lower = sma - num_std * std lower = sma - num_std*std
bandwidth = (upper - lower) / (sma + 1e-9) bandwidth = (upper - lower)/(sma + 1e-9)
return upper, lower, bandwidth return upper, lower, bandwidth
def compute_mfi(df, window=14): def compute_mfi(df, window=14):
typical_price = (df['High'] + df['Low'] + df['Close']) / 3 typical_price = (df['High']+ df['Low']+ df['Close'])/3
money_flow = typical_price * df['Volume'] money_flow = typical_price* df['Volume']
prev_tp = typical_price.shift(1) prev_tp = typical_price.shift(1)
flow_pos = money_flow.where(typical_price > prev_tp, 0) flow_pos = money_flow.where(typical_price>prev_tp, 0)
flow_neg = money_flow.where(typical_price < prev_tp, 0) flow_neg = money_flow.where(typical_price<prev_tp, 0)
pos_sum = flow_pos.rolling(window=window).sum() pos_sum = flow_pos.rolling(window=window).sum()
neg_sum = flow_neg.rolling(window=window).sum() neg_sum = flow_neg.rolling(window=window).sum()
mfi = 100 - (100 / (1 + pos_sum/(neg_sum+1e-9))) mfi= 100-(100/(1+ pos_sum/(neg_sum+1e-9)))
return mfi return mfi
def calculate_technical_indicators(df): def calculate_technical_indicators(df):
logging.info("Calculating technical indicators...") logging.info("Calculating technical indicators...")
df['RSI'] = compute_rsi(df['Close'], 14) df['RSI'] = compute_rsi(df['Close'], 14)
df['MACD'] = compute_macd(df['Close']) df['MACD']= compute_macd(df['Close'])
df['OBV'] = compute_obv(df) df['OBV'] = compute_obv(df)
df['ADX'] = compute_adx(df) df['ADX'] = compute_adx(df)
up, low, bw = compute_bollinger_bands(df['Close'], 20, 2) up, lo, bw = compute_bollinger_bands(df['Close'], 20, 2)
df['BB_Upper'] = up df['BB_Upper']= up
df['BB_Lower'] = low df['BB_Lower']= lo
df['BB_Width'] = bw df['BB_Width']= bw
df['MFI'] = compute_mfi(df, 14)
df['MFI'] = compute_mfi(df,14)
df['SMA_5'] = df['Close'].rolling(5).mean() df['SMA_5'] = df['Close'].rolling(5).mean()
df['SMA_10'] = df['Close'].rolling(10).mean() df['SMA_10']= df['Close'].rolling(10).mean()
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean() df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean() df['EMA_10']= df['Close'].ewm(span=10, adjust=False).mean()
df['STDDEV_5'] = df['Close'].rolling(5).std() df['STDDEV_5'] = df['Close'].rolling(5).std()
df.dropna(inplace=True) df.dropna(inplace=True)
@@ -150,67 +142,123 @@ def calculate_technical_indicators(df):
############################### ###############################
# 2. ARGUMENT PARSING # 2. ARG PARSING
############################### ###############################
def parse_arguments(): def parse_arguments():
parser = argparse.ArgumentParser(description='Train LSTM and DQN models for stock trading.') parser = argparse.ArgumentParser(description='All-in-One: LSTM + DQN (with LSTM predictions) + Tuning.')
parser.add_argument('csv_path', type=str, help='Path to the CSV data file (with columns time,open,high,low,close,volume).') parser.add_argument('csv_path', type=str,
parser.add_argument('--do_dqn_inference', action='store_true', help='Path to CSV data with columns [time, open, high, low, close, volume].')
help='If set, will run the DQN inference after training the agent.') parser.add_argument('--lstm_window_size', type=int, default=15,
help='Sequence window size for LSTM. Default=15.')
parser.add_argument('--dqn_total_timesteps', type=int, default=50000,
help='Total timesteps to train each DQN candidate. Default=50000.')
parser.add_argument('--dqn_eval_episodes', type=int, default=1,
help='Number of episodes to evaluate DQN in the tuning step. Default=1 (entire dataset once).')
parser.add_argument('--n_trials_lstm', type=int, default=30,
help='Number of Optuna trials for LSTM. Default=30.')
parser.add_argument('--n_trials_dqn', type=int, default=20,
help='Number of Optuna trials for DQN. Default=20.')
return parser.parse_args() return parser.parse_args()
############################### ###############################
# 3. MAIN # 3. CUSTOM DQN CALLBACK: LOG ACTIONS + REWARDS
###############################
class ActionLoggingCallback(BaseCallback):
"""
Logs distribution of actions and average reward after each rollout.
For off-policy (DQN), "rollout" can be a bit different than on-policy,
but stable-baselines3 still calls `_on_rollout_end` periodically.
"""
def __init__(self, verbose=0):
super().__init__(verbose)
self.action_buffer = []
self.reward_buffer = []
def _on_training_start(self):
self.action_buffer = []
self.reward_buffer = []
def _on_step(self):
action = self.locals.get('action', None)
reward = self.locals.get('reward', None)
if action is not None:
self.action_buffer.append(action)
if reward is not None:
self.reward_buffer.append(reward)
return True
def _on_rollout_end(self):
import numpy as np
actions = np.array(self.action_buffer)
rewards = np.array(self.reward_buffer)
if len(actions)>0:
unique, counts = np.unique(actions, return_counts=True)
total = len(actions)
distr_str = []
for act,c in zip(unique, counts):
distr_str.append(f"Action {act}: {c} times ({100*c/total:.2f}%)")
logging.info(" -- DQN Rollout End -- ")
logging.info(" " + ", ".join(distr_str))
logging.info(f" Avg Reward this rollout: {rewards.mean():.4f} (min={rewards.min():.4f}, max={rewards.max():.4f})")
self.action_buffer = []
self.reward_buffer = []
###############################
# 4. MAIN
############################### ###############################
def main(): def main():
# Parse command-line arguments
args = parse_arguments() args = parse_arguments()
csv_path = args.csv_path csv_path = args.csv_path
do_dqn_inference = args.do_dqn_inference lstm_window_size = args.lstm_window_size
dqn_total_timesteps = args.dqn_total_timesteps
dqn_eval_episodes = args.dqn_eval_episodes
n_trials_lstm = args.n_trials_lstm
n_trials_dqn = args.n_trials_dqn
# 1) Load Data ##########################################
data = load_data(csv_path) # A) LSTM PART: LOAD, PREPROCESS, TUNE
data = calculate_technical_indicators(data) ##########################################
# 1) LOAD & preprocess
df = load_data(csv_path)
df = calculate_technical_indicators(df)
# We'll exclude 'Close' from the feature set
feature_columns = [ feature_columns = [
'SMA_5','SMA_10','EMA_5','EMA_10','STDDEV_5', 'SMA_5','SMA_10','EMA_5','EMA_10','STDDEV_5',
'RSI','MACD','ADX','OBV','Volume','Open','High','Low', 'RSI','MACD','ADX','OBV','Volume','Open','High','Low',
'BB_Upper','BB_Lower','BB_Width','MFI' 'BB_Upper','BB_Lower','BB_Width','MFI'
] ]
target_column = 'Close' target_column = 'Close'
data = data[['Date'] + feature_columns + [target_column]] df = df[['Date']+ feature_columns+[target_column]].dropna()
data.dropna(inplace=True)
# 2) Scaling from sklearn.preprocessing import MinMaxScaler
scaler_features = MinMaxScaler() scaler_features = MinMaxScaler()
scaler_target = MinMaxScaler() scaler_target = MinMaxScaler()
X_all = data[feature_columns].values X_all = df[feature_columns].values
y_all = data[[target_column]].values y_all = df[[target_column]].values
X_scaled = scaler_features.fit_transform(X_all) X_scaled = scaler_features.fit_transform(X_all)
y_scaled = scaler_target.fit_transform(y_all).flatten() y_scaled = scaler_target.fit_transform(y_all).flatten()
# 3) Create LSTM sequences # 2) Create sequences
def create_sequences(features, target, window_size=15): def create_sequences(features, target, window_size):
X_seq, y_seq = [], [] X_seq, y_seq = [], []
for i in range(len(features)-window_size): for i in range(len(features) - window_size):
X_seq.append(features[i:i+window_size]) X_seq.append(features[i:i+window_size])
y_seq.append(target[i+window_size]) y_seq.append(target[i+window_size])
return np.array(X_seq), np.array(y_seq) return np.array(X_seq), np.array(y_seq)
window_size = 15 X, y = create_sequences(X_scaled, y_scaled, lstm_window_size)
X, y = create_sequences(X_scaled, y_scaled, window_size)
# 4) Train/Val/Test Split # 3) Split into train/val/test
train_size = int(len(X)*0.7) train_size = int(len(X)*0.7)
val_size = int(len(X)*0.15) val_size = int(len(X)*0.15)
test_size = len(X)-train_size-val_size test_size = len(X)- train_size- val_size
X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:] X_train, y_train = X[:train_size], y[:train_size]
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:] X_val, y_val = X[train_size: train_size+ val_size], y[train_size: train_size+ val_size]
X_test, y_test = X[train_size+ val_size:], y[train_size+ val_size:]
logging.info(f"Scaled training features shape: {X_train.shape}") logging.info(f"Scaled training features shape: {X_train.shape}")
logging.info(f"Scaled validation features shape: {X_val.shape}") logging.info(f"Scaled validation features shape: {X_val.shape}")
@@ -219,14 +267,14 @@ def main():
logging.info(f"Scaled validation target shape: {y_val.shape}") logging.info(f"Scaled validation target shape: {y_val.shape}")
logging.info(f"Scaled testing target shape: {y_test.shape}") logging.info(f"Scaled testing target shape: {y_test.shape}")
# 5) GPU or CPU # 4) GPU config
def configure_device(): def configure_device():
gpus = tf.config.list_physical_devices('GPU') gpus = tf.config.list_physical_devices('GPU')
if gpus: if gpus:
try: try:
for gpu in gpus: for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True) tf.config.experimental.set_memory_growth(gpu, True)
logging.info(f"{len(gpus)} GPU(s) detected and configured.") logging.info(f"{len(gpus)} GPU(s) detected & configured.")
except RuntimeError as e: except RuntimeError as e:
logging.error(e) logging.error(e)
else: else:
@@ -234,39 +282,44 @@ def main():
configure_device() configure_device()
# 6) Build LSTM # 5) Build LSTM function
def build_advanced_lstm(input_shape, hyperparams): def build_lstm(input_shape, hyperparams):
from tensorflow.keras.regularizers import l2
model = Sequential() model = Sequential()
for i in range(hyperparams['num_lstm_layers']): num_layers = hyperparams['num_lstm_layers']
return_seqs = (i < hyperparams['num_lstm_layers'] - 1) units = hyperparams['lstm_units']
drop = hyperparams['dropout_rate']
for i in range(num_layers):
return_seqs = (i< num_layers-1)
model.add(Bidirectional( model.add(Bidirectional(
LSTM(hyperparams['lstm_units'], LSTM(units, return_sequences=return_seqs, kernel_regularizer=l2(1e-4)),
return_sequences=return_seqs,
kernel_regularizer=tf.keras.regularizers.l2(0.001)),
input_shape=input_shape if i==0 else None input_shape=input_shape if i==0 else None
)) ))
model.add(Dropout(hyperparams['dropout_rate'])) model.add(Dropout(drop))
model.add(Dense(1, activation='linear')) model.add(Dense(1, activation='linear'))
if hyperparams['optimizer'] == 'Adam': opt_name= hyperparams['optimizer']
opt = Adam(learning_rate=hyperparams['learning_rate'], decay=hyperparams['decay']) lr = hyperparams['learning_rate']
elif hyperparams['optimizer'] == 'Nadam': decay = hyperparams['decay']
opt = Nadam(learning_rate=hyperparams['learning_rate']) if opt_name=='Adam':
opt= Adam(learning_rate=lr, decay=decay)
elif opt_name=='Nadam':
opt= Nadam(learning_rate=lr)
else: else:
opt = Adam(learning_rate=hyperparams['learning_rate']) opt= Adam(learning_rate=lr)
model.compile(optimizer=opt, loss=Huber(), metrics=['mae']) model.compile(loss=Huber(), optimizer=opt, metrics=['mae'])
return model return model
# 7) Optuna Tuning # 6) Optuna objective for LSTM
def objective(trial): def lstm_objective(trial):
num_lstm_layers = trial.suggest_int('num_lstm_layers', 1, 3) import tensorflow as tf
lstm_units = trial.suggest_categorical('lstm_units', [32,64,96,128]) num_lstm_layers = trial.suggest_int('num_lstm_layers',1,3)
dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5) lstm_units = trial.suggest_categorical('lstm_units',[32,64,96,128])
learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2) dropout_rate = trial.suggest_float('dropout_rate',0.1,0.5)
optimizer_name = trial.suggest_categorical('optimizer', ['Adam','Nadam']) learning_rate = trial.suggest_loguniform('learning_rate',1e-5,1e-2)
decay = trial.suggest_float('decay', 0.0, 1e-4) optimizer_name = trial.suggest_categorical('optimizer',['Adam','Nadam'])
decay = trial.suggest_float('decay',0.0,1e-4)
hyperparams = { hyperparams = {
'num_lstm_layers': num_lstm_layers, 'num_lstm_layers': num_lstm_layers,
@@ -277,295 +330,390 @@ def main():
'decay': decay 'decay': decay
} }
model_ = build_advanced_lstm((X_train.shape[1], X_train.shape[2]), hyperparams) model_ = build_lstm((X_train.shape[1], X_train.shape[2]), hyperparams)
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6) lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
cb_prune = KerasPruningCallback(trial, 'val_loss') cb_prune = KerasPruningCallback(trial, 'val_loss')
history = model_.fit( history= model_.fit(
X_train, y_train, X_train, y_train,
epochs=100, epochs=100,
batch_size=16, batch_size=16,
validation_data=(X_val, y_val), validation_data=(X_val,y_val),
callbacks=[early_stop, lr_reduce, cb_prune], callbacks=[early_stop, lr_reduce, cb_prune],
verbose=0 verbose=0
) )
val_mae = min(history.history['val_mae']) val_mae = min(history.history['val_mae'])
return val_mae return val_mae
logging.info("Starting hyperparameter optimization with Optuna...") logging.info("Starting LSTM hyperparam optimization with Optuna...")
study = optuna.create_study(direction='minimize') study_lstm= optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50) study_lstm.optimize(lstm_objective, n_trials=n_trials_lstm)
best_lstm_params = study_lstm.best_params
logging.info(f"Best LSTM Hyperparams: {best_lstm_params}")
best_params = study.best_params # 7) Train final LSTM
logging.info(f"Best Hyperparameters from Optuna: {best_params}") final_lstm = build_lstm((X_train.shape[1], X_train.shape[2]), best_lstm_params)
early_stop_final= EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
lr_reduce_final= ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
# 8) Train the Best LSTM logging.info("Training best LSTM model with found hyperparams...")
best_model = build_advanced_lstm((X_train.shape[1], X_train.shape[2]), best_params) hist= final_lstm.fit(
X_train,y_train,
early_stop_final = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
lr_reduce_final = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
logging.info("Training the best LSTM model with optimized hyperparameters...")
history = best_model.fit(
X_train, y_train,
epochs=300, epochs=300,
batch_size=16, batch_size=16,
validation_data=(X_val, y_val), validation_data=(X_val,y_val),
callbacks=[early_stop_final, lr_reduce_final], callbacks=[early_stop_final, lr_reduce_final],
verbose=1 verbose=1
) )
# 9) Evaluate # Evaluate LSTM
def evaluate_model(model, X_test, y_test): def evaluate_lstm(model, X_test, y_test):
logging.info("Evaluating model (LSTM)...") logging.info("Evaluating final LSTM...")
y_pred_scaled = model.predict(X_test).flatten() y_pred_scaled= model.predict(X_test).flatten()
y_pred_scaled = np.clip(y_pred_scaled, 0, 1) y_pred_scaled= np.clip(y_pred_scaled,0,1)
y_pred = scaler_target.inverse_transform(y_pred_scaled.reshape(-1,1)).flatten() y_pred = scaler_target.inverse_transform(y_pred_scaled.reshape(-1,1)).flatten()
y_test_actual = scaler_target.inverse_transform(y_test.reshape(-1,1)).flatten() y_test_actual= scaler_target.inverse_transform(y_test.reshape(-1,1)).flatten()
mse = mean_squared_error(y_test_actual, y_pred) mse_= mean_squared_error(y_test_actual,y_pred)
rmse= np.sqrt(mse) rmse_= np.sqrt(mse_)
mae = mean_absolute_error(y_test_actual, y_pred) mae_ = mean_absolute_error(y_test_actual,y_pred)
r2 = r2_score(y_test_actual, y_pred) r2_ = r2_score(y_test_actual,y_pred)
direction_actual = np.sign(np.diff(y_test_actual)) direction_actual= np.sign(np.diff(y_test_actual))
direction_pred = np.sign(np.diff(y_pred)) direction_pred = np.sign(np.diff(y_pred))
directional_accuracy = np.mean(direction_actual==direction_pred) directional_accuracy= np.mean(direction_actual== direction_pred)
logging.info(f"Test MSE: {mse}") logging.info(f"Test MSE: {mse_}")
logging.info(f"Test RMSE: {rmse}") logging.info(f"Test RMSE: {rmse_}")
logging.info(f"Test MAE: {mae}") logging.info(f"Test MAE: {mae_}")
logging.info(f"Test R2 Score: {r2}") logging.info(f"Test R2 Score: {r2_}")
logging.info(f"Directional Accuracy: {directional_accuracy}") logging.info(f"Directional Accuracy: {directional_accuracy}")
# Plot # Plot
plt.figure(figsize=(14,7)) plt.figure(figsize=(14,7))
plt.plot(y_test_actual, label='Actual Price') plt.plot(y_test_actual, label='Actual Price')
plt.plot(y_pred, label='Predicted Price') plt.plot(y_pred, label='Predicted Price')
plt.title('Actual vs Predicted Prices') plt.title('LSTM: Actual vs Predicted')
plt.xlabel('Time Step')
plt.ylabel('Price')
plt.legend() plt.legend()
plt.grid(True) plt.grid(True)
plt.savefig('actual_vs_predicted.png') plt.savefig('lstm_actual_vs_pred.png')
plt.close() plt.close()
logging.info("Plot saved as 'actual_vs_predicted.png'")
# Tabulate first 40 # Tabulate first 40
table_data = [] table=[]
for i in range(min(40, len(y_test_actual))): limit= min(40,len(y_test_actual))
table_data.append([i, round(y_test_actual[i],2), round(y_pred[i],2)]) for i in range(limit):
headers = ["Index", "Actual Price", "Predicted Price"] table.append([i, round(y_test_actual[i],2), round(y_pred[i],2)])
print(tabulate(table_data, headers=headers, tablefmt="pretty")) headers= ["Index","Actual Price","Predicted Price"]
print(tabulate(table, headers=headers, tablefmt="pretty"))
return r2_, directional_accuracy
return mse, rmse, mae, r2, directional_accuracy _r2, _diracc= evaluate_lstm(final_lstm, X_test, y_test)
mse, rmse, mae, r2, directional_accuracy = evaluate_model(best_model, X_test, y_test) # Save LSTM + scalers
final_lstm.save('best_lstm_model.h5')
joblib.dump(scaler_features,'scaler_features.pkl')
joblib.dump(scaler_target, 'scaler_target.pkl')
logging.info("Saved best LSTM model + scalers (best_lstm_model.h5, scaler_features.pkl, scaler_target.pkl).")
# 10) Save ############################################################
best_model.save('optimized_lstm_model.h5') # B) DQN PART: BUILD ENV THAT USES THE LSTM + FORECAST
import joblib ############################################################
joblib.dump(scaler_features, 'scaler_features.save') class StockTradingEnvWithLSTM(gym.Env):
joblib.dump(scaler_target, 'scaler_target.save')
logging.info("Model and scalers saved (optimized_lstm_model.h5, scaler_features.save, scaler_target.save).")
##########################################################
# 11) Reinforcement Learning: StockTradingEnv + DQN
##########################################################
class StockTradingEnv(gym.Env):
""" """
A simple stock trading environment for OpenAI Gym An environment that uses the LSTM model's predicted next day close
with step-based reward = net_worth - initial_balance. as part of the observation:
obs = [technical indicators, balance, shares, cost_basis, predicted_next_close].
Reward => net_worth - initial_balance each step.
""" """
metadata = {'render.modes': ['human']} metadata = {'render.modes':['human']}
def __init__(self, df, initial_balance=10000, transaction_cost=0.001): def __init__(self, df, feature_columns, lstm_model, scaler_features, scaler_target,
window_size=15, initial_balance=10000, transaction_cost=0.001):
super().__init__() super().__init__()
self.df = df.reset_index() self.df= df.reset_index(drop=True)
self.initial_balance = initial_balance self.feature_columns= feature_columns
self.balance = initial_balance self.lstm_model= lstm_model
self.net_worth = initial_balance self.scaler_features= scaler_features
self.max_steps = len(df) self.scaler_target= scaler_target
self.current_step = 0 self.window_size= window_size
self.shares_held = 0
self.cost_basis = 0
self.transaction_cost = transaction_cost
# Re-use feature_columns from above self.initial_balance= initial_balance
self.feature_columns = feature_columns self.balance= initial_balance
self.net_worth= initial_balance
self.transaction_cost= transaction_cost
# Action space: 0=Sell,1=Hold,2=Buy self.max_steps= len(df)
self.action_space = spaces.Discrete(3) self.current_step=0
self.shares_held=0
self.cost_basis=0
# Observation = [17 indicators + balance + shares + cost_basis] => total 20 # raw array of features
self.observation_space = spaces.Box( self.raw_features= df[feature_columns].values
low=0,
high=1, # 0=Sell,1=Hold,2=Buy
shape=(len(self.feature_columns)+3,), self.action_space= spaces.Discrete(3)
# observation dimension = len(feature_columns)+3 +1 => 17 + 3 +1=21
self.observation_space= spaces.Box(
low=0, high=1,
shape=(len(feature_columns)+3+1,),
dtype=np.float32 dtype=np.float32
) )
def reset(self): def reset(self):
self.balance = self.initial_balance self.balance= self.initial_balance
self.net_worth = self.initial_balance self.net_worth= self.initial_balance
self.current_step = 0 self.current_step=0
self.shares_held = 0 self.shares_held=0
self.cost_basis = 0 self.cost_basis=0
return self._next_observation() return self._get_obs()
def _next_observation(self): def _get_obs(self):
obs_vals = self.df.loc[self.current_step, self.feature_columns].values row= self.raw_features[self.current_step]
# simple normalization row_max= np.max(row) if np.max(row)!=0 else 1.0
if np.max(obs_vals)!=0: row_norm= row/row_max
obs_vals = obs_vals / np.max(obs_vals)
additional = np.array([ # account info
additional= np.array([
self.balance/self.initial_balance, self.balance/self.initial_balance,
self.shares_held/100.0, self.shares_held/100.0,
self.cost_basis/self.initial_balance self.cost_basis/(self.initial_balance+1e-9)
], dtype=np.float32) ], dtype=np.float32)
return np.concatenate([obs_vals, additional]).astype(np.float32) # LSTM prediction
if self.current_step< self.window_size:
# not enough history => no forecast
predicted_close= 0.0
else:
seq= self.raw_features[self.current_step - self.window_size: self.current_step]
seq_scaled= self.scaler_features.transform(seq)
seq_scaled= np.expand_dims(seq_scaled, axis=0) # shape (1, window_size, #features)
pred_scaled= self.lstm_model.predict(seq_scaled, verbose=0).flatten()[0]
pred_scaled= np.clip(pred_scaled,0,1)
unscaled= self.scaler_target.inverse_transform([[pred_scaled]])[0,0]
# either keep raw or scale it. We'll do a naive scale by /1000 if typical price is double digits
predicted_close= unscaled/1000.0
obs= np.concatenate([row_norm, additional, [predicted_close]]).astype(np.float32)
return obs
def step(self, action): def step(self, action):
current_price = self.df.loc[self.current_step, 'Close'] prev_net_worth= self.net_worth
current_price= self.df.loc[self.current_step,'Close']
if action==2: # Buy if action==2: # BUY
shares_bought = int(self.balance // current_price) shares_bought= int(self.balance// current_price)
if shares_bought>0: if shares_bought>0:
cost= shares_bought* current_price cost= shares_bought* current_price
fee = cost* self.transaction_cost fee= cost* self.transaction_cost
self.balance-= (cost+ fee) self.balance-= (cost+ fee)
old_shares= self.shares_held old_shares= self.shares_held
self.shares_held+= shares_bought self.shares_held+= shares_bought
# Weighted average cost
self.cost_basis=( self.cost_basis=(
(self.cost_basis* old_shares)+(shares_bought* current_price) (self.cost_basis* old_shares)+ (shares_bought* current_price)
)/ self.shares_held )/ self.shares_held
elif action==0: # Sell elif action==0: # SELL
if self.shares_held>0: if self.shares_held>0:
revenue= self.shares_held* current_price revenue= self.shares_held* current_price
fee = revenue*self.transaction_cost fee= revenue* self.transaction_cost
self.balance+= (revenue- fee) self.balance+= (revenue- fee)
self.shares_held=0 self.shares_held=0
self.cost_basis=0 self.cost_basis=0
prev_net_worth= self.net_worth
self.net_worth= self.balance+ self.shares_held* current_price self.net_worth= self.balance+ self.shares_held* current_price
self.current_step+=1 self.current_step+=1
done= (self.current_step>= self.max_steps-1) done= (self.current_step>= self.max_steps -1)
# Reward: net_worth - initial_balance (like original code)
reward= self.net_worth- self.initial_balance reward= self.net_worth- self.initial_balance
obs= self._get_obs()
obs= self._next_observation()
return obs, reward, done, {} return obs, reward, done, {}
def render(self, mode='human'): def render(self, mode='human'):
profit= self.net_worth- self.initial_balance profit= self.net_worth- self.initial_balance
print(f"Step: {self.current_step}, " print(f"Step: {self.current_step}, "
f"Balance: {self.balance:.2f}, " f"Balance={self.balance:.2f}, "
f"Shares: {self.shares_held}, " f"Shares={self.shares_held}, "
f"NetWorth: {self.net_worth:.2f}, " f"NetWorth={self.net_worth:.2f}, "
f"Profit: {profit:.2f}") f"Profit={profit:.2f}")
###################################
# C) DQN HYPERPARAM TUNING W/ LSTM
###################################
# We'll define a function that trains a DQN with trial hyperparams,
# then evaluates final net worth on one run.
from stable_baselines3.common.evaluation import evaluate_policy
def train_dqn_agent(env): # We'll define a small function to do final net worth check:
logging.info("Training DQN Agent (step-based reward).") def evaluate_dqn_networth(model, env, n_episodes=1):
try: # We do a simple loop that runs the entire dataset (1 episode)
model = DQN( # to see final net worth.
'MlpPolicy', # If you want multiple episodes, you can do multiple resets in random start positions, etc.
env, final_net_worths = []
verbose=1, for _ in range(n_episodes):
learning_rate=1e-3, obs= env.reset()
buffer_size=10000, done= False
learning_starts=1000, while not done:
batch_size=64, action, _= model.predict(obs, deterministic=True)
tau=1.0, obs, reward, done, info= env.step(action)
gamma=0.99, final_net_worths.append(env.net_worth)
train_freq=4, return np.mean(final_net_worths)
target_update_interval=1000,
exploration_fraction=0.1,
exploration_final_eps=0.02,
tensorboard_log="./dqn_stock_tensorboard/"
)
model.learn(total_timesteps=100000)
model.save("dqn_stock_trading")
logging.info("DQN Agent trained and saved as 'dqn_stock_trading.zip'.")
return model
except Exception as e:
logging.error(f"Error training DQN Agent: {e}")
sys.exit(1)
# We'll define the DQN objective with Optuna
def dqn_objective(trial):
# we sample some DQN hyperparams
lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
gamma = trial.suggest_float("gamma", 0.8, 0.9999)
exploration_fraction= trial.suggest_float("exploration_fraction", 0.01, 0.3)
buffer_size = trial.suggest_categorical("buffer_size",[5000,10000,20000])
batch_size = trial.suggest_categorical("batch_size",[32,64,128])
# 12) Train the DQN environment # Build environment fresh each time or reuse:
logging.info("Initializing trading environment for DQN training...") # We'll reuse the same data environment but new instance
trading_env = StockTradingEnv(data, initial_balance=10000, transaction_cost=0.001) env = StockTradingEnvWithLSTM(
vec_env = DummyVecEnv([lambda: trading_env]) df=df,
feature_columns= feature_columns,
lstm_model= final_lstm, # use the best LSTM
scaler_features= scaler_features,
scaler_target= scaler_target,
window_size= lstm_window_size
)
vec_env = DummyVecEnv([lambda: env])
dqn_model = train_dqn_agent(vec_env) # Build DQN
logging.info("DQN training complete.") from stable_baselines3 import DQN
from stable_baselines3.common.callbacks import BaseCallback
# 13) Optional: run DQN inference right away (like use_dqn.py) if user wants dqn_action_logger = ActionLoggingCallback(verbose=0)
if do_dqn_inference:
logging.info("Running DQN inference (test) after training...")
obs = vec_env.reset()
done = [False]
total_reward = 0.0
step_data = []
step_count = 0
# underlying env to access net worth, etc. model = DQN(
underlying_env = vec_env.envs[0] 'MlpPolicy',
vec_env,
verbose=0,
learning_rate= lr,
gamma= gamma,
exploration_fraction= exploration_fraction,
buffer_size= buffer_size,
batch_size= batch_size,
train_freq=4,
target_update_interval=1000,
# etc
)
# Train some timesteps
model.learn(total_timesteps= dqn_total_timesteps, callback=dqn_action_logger)
while not done[0]: # Evaluate final net worth
step_count += 1 final_net_worth= evaluate_dqn_networth(model, env, n_episodes=dqn_eval_episodes)
action, _ = dqn_model.predict(obs, deterministic=True) # we want to maximize net worth => minimize negative net worth
obs, reward, done, info = vec_env.step(action) return -final_net_worth
reward_scalar = reward[0]
total_reward += reward_scalar
step_data.append({ logging.info("Starting DQN hyperparam tuning with Optuna (using LSTM environment)...")
"Step": step_count, study_dqn = optuna.create_study(direction='minimize')
"Action": int(action[0]), study_dqn.optimize(dqn_objective, n_trials=n_trials_dqn)
"Reward": reward_scalar, best_dqn_params = study_dqn.best_params
"Balance": underlying_env.balance, logging.info(f"Best DQN hyperparams: {best_dqn_params}")
"Shares": underlying_env.shares_held,
"NetWorth": underlying_env.net_worth
})
final_net_worth = underlying_env.net_worth ###################################
final_profit = final_net_worth - underlying_env.initial_balance # D) TRAIN FINAL DQN WITH BEST PARAMS
###################################
logging.info("Training final DQN with best hyperparams & LSTM environment...")
print("\n=== DQN Agent Finished ===") env_final = StockTradingEnvWithLSTM(
print(f"Total Steps Taken: {step_count}") df=df,
print(f"Final Net Worth: {final_net_worth:.2f}") feature_columns=feature_columns,
print(f"Final Profit: {final_profit:.2f}") lstm_model= final_lstm,
print(f"Sum of Rewards: {total_reward:.2f}") scaler_features= scaler_features,
scaler_target= scaler_target,
window_size= lstm_window_size
)
vec_env_final = DummyVecEnv([lambda: env_final])
buy_count = sum(1 for x in step_data if x["Action"] == 2) # Build final model
sell_count = sum(1 for x in step_data if x["Action"] == 0) final_dqn_logger = ActionLoggingCallback(verbose=1) # We'll see logs each rollout
hold_count = sum(1 for x in step_data if x["Action"] == 1) final_model= DQN(
print(f"Actions Taken -> BUY: {buy_count}, SELL: {sell_count}, HOLD: {hold_count}") 'MlpPolicy',
vec_env_final,
verbose=1,
learning_rate= best_dqn_params['lr'],
gamma= best_dqn_params['gamma'],
exploration_fraction= best_dqn_params['exploration_fraction'],
buffer_size= best_dqn_params['buffer_size'],
batch_size= best_dqn_params['batch_size'],
train_freq=4,
target_update_interval=1000
# etc if you want other params
)
final_model.learn(total_timesteps= dqn_total_timesteps, callback= final_dqn_logger)
final_model.save("best_dqn_model_lstm.zip")
# Show last 15 steps (like use_dqn) ###################################
steps_to_display = 15 # E) FINAL INFERENCE & LOG RESULTS
last_n = step_data[-steps_to_display:] if len(step_data)> steps_to_display else step_data ###################################
rows = [] logging.info("Running final inference with best DQN...")
for d in last_n:
rows.append([ env_test = StockTradingEnvWithLSTM(
d["Step"], d["Action"], f"{d['Reward']:.2f}", df=df,
f"{d['Balance']:.2f}", d["Shares"], f"{d['NetWorth']:.2f}" feature_columns= feature_columns,
]) lstm_model= final_lstm,
headers = ["Step","Action","Reward","Balance","Shares","NetWorth"] scaler_features= scaler_features,
print(f"\n== Last {steps_to_display} Steps ==") scaler_target= scaler_target,
print(tabulate(rows, headers=headers, tablefmt="pretty")) window_size= lstm_window_size
)
obs = env_test.reset()
done=False
total_reward=0.0
step_data=[]
step_count=0
while not done:
step_count+=1
action, _= final_model.predict(obs, deterministic=True)
obs, reward, done, info= env_test.step(action)
total_reward+= reward
step_data.append({
"Step": step_count,
"Action": int(action),
"Reward": reward,
"Balance": env_test.balance,
"Shares": env_test.shares_held,
"NetWorth": env_test.net_worth
})
final_net_worth= env_test.net_worth
final_profit= final_net_worth - env_test.initial_balance
print("\n=== Final DQN Inference ===")
print(f"Total Steps: {step_count}")
print(f"Final Net Worth: {final_net_worth:.2f}")
print(f"Final Profit: {final_profit:.2f}")
print(f"Sum of Rewards: {total_reward:.2f}")
buy_count = sum(1 for x in step_data if x["Action"]==2)
sell_count = sum(1 for x in step_data if x["Action"]==0)
hold_count = sum(1 for x in step_data if x["Action"]==1)
print(f"Actions Taken -> BUY:{buy_count}, SELL:{sell_count}, HOLD:{hold_count}")
# Show last 15 steps
last_n= step_data[-15:] if len(step_data)>15 else step_data
rows=[]
for d in last_n:
rows.append([
d["Step"],
d["Action"],
f"{d['Reward']:.2f}",
f"{d['Balance']:.2f}",
d["Shares"],
f"{d['NetWorth']:.2f}"
])
headers= ["Step","Action","Reward","Balance","Shares","NetWorth"]
print(f"\n== Last 15 Steps ==")
print(tabulate(rows, headers=headers, tablefmt="pretty"))
logging.info("All tasks complete. Exiting.") logging.info("All tasks complete. Exiting.")
if __name__ == "__main__": if __name__=="__main__":
main() main()

File diff suppressed because one or more lines are too long