updating LSTMDQN.py to incorporate Optuna tuning into the DQN model training, and possible rerunning if the DQN sucks

2025-01-29 18:32:56 +00:00
parent 2ca8890e5f
commit 78ef2965b8
3 changed files with 440 additions and 1046 deletions
--- a/src/Machine-Learning/LSTM-python/src/.gitignore
+++ b/src/Machine-Learning/LSTM-python/src/.gitignore
@@ -1 +1,2 @@
 venv/
 .python-version
--- a/src/Machine-Learning/LSTM-python/src/LSTMDQN.py
+++ b/src/Machine-Learning/LSTM-python/src/LSTMDQN.py
@@ -3,42 +3,44 @@ import sys
 import argparse
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 import logging
 from tabulate import tabulate
 import matplotlib.pyplot as plt
 import seaborn as sns
-from sklearn.preprocessing import MinMaxScaler
+# TensorFlow / Keras
 from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
 import tensorflow as tf
-from tensorflow.keras.models import Sequential
+from tensorflow.keras.models import Sequential, load_model
 from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
 from tensorflow.keras.optimizers import Adam, Nadam
 from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
 from tensorflow.keras.losses import Huber
 from tensorflow.keras.regularizers import l2
 from tensorflow.keras.optimizers import Adam, Nadam
-import xgboost as xgb
+# Sklearn
 from sklearn.preprocessing import MinMaxScaler
 from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
 import joblib
 # Optuna
 import optuna
 from optuna.integration import KerasPruningCallback
-# For Reinforcement Learning
+# RL stuff
 import gym
 from gym import spaces
 from stable_baselines3 import DQN
 from stable_baselines3.common.vec_env import DummyVecEnv
 from stable_baselines3.common.callbacks import BaseCallback
-# Suppress TensorFlow warnings beyond errors
+# Suppress TensorFlow logs beyond errors
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
+######################################################
-###################################################
+# 1. DATA LOADING & ADVANCED TECHNICAL INDICATORS
-# 1. Data Loading / Advanced Technical Indicators
+######################################################
 ###################################################
 def load_data(file_path):
    logging.info(f"Loading data from: {file_path}")
    try:
@@ -68,80 +70,70 @@ def load_data(file_path):
    logging.info("Data loaded and sorted successfully.")
    return df
 def compute_rsi(series, window=14):
    delta = series.diff()
-    gain = delta.where(delta > 0, 0).rolling(window=window).mean()
+    gain = delta.where(delta>0, 0).rolling(window=window).mean()
-    loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
+    loss = -delta.where(delta<0, 0).rolling(window=window).mean()
-    RS = gain / (loss + 1e-9)
+    RS = gain / (loss+1e-9)
-    return 100 - (100 / (1 + RS))
+    return 100 - (100/(1+RS))
 def compute_macd(series, span_short=12, span_long=26, span_signal=9):
    ema_short = series.ewm(span=span_short, adjust=False).mean()
-    ema_long = series.ewm(span=span_long, adjust=False).mean()
+    ema_long  = series.ewm(span=span_long, adjust=False).mean()
    macd_line = ema_short - ema_long
    signal_line = macd_line.ewm(span=span_signal, adjust=False).mean()
    return macd_line - signal_line  # histogram
 def compute_obv(df):
    signed_volume = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0)
    return signed_volume.cumsum()
 def compute_adx(df, window=14):
-    """Pseudo-ADX approach using rolling True Range / Close."""
+    df['H-L']  = df['High'] - df['Low']
    df['H-L'] = df['High'] - df['Low']
    df['H-Cp'] = (df['High'] - df['Close'].shift(1)).abs()
-    df['L-Cp'] = (df['Low'] - df['Close'].shift(1)).abs()
+    df['L-Cp'] = (df['Low']  - df['Close'].shift(1)).abs()
    tr = df[['H-L','H-Cp','L-Cp']].max(axis=1)
    tr_rolling = tr.rolling(window=window).mean()
-    adx_placeholder = tr_rolling / (df['Close'] + 1e-9)
+    adx_placeholder = tr_rolling/(df['Close']+1e-9)
    df.drop(['H-L','H-Cp','L-Cp'], axis=1, inplace=True)
    return adx_placeholder
 def compute_bollinger_bands(series, window=20, num_std=2):
    sma = series.rolling(window=window).mean()
    std = series.rolling(window=window).std()
-    upper = sma + num_std * std
+    upper = sma + num_std*std
-    lower = sma - num_std * std
+    lower = sma - num_std*std
-    bandwidth = (upper - lower) / (sma + 1e-9)
+    bandwidth = (upper - lower)/(sma + 1e-9)
    return upper, lower, bandwidth
 def compute_mfi(df, window=14):
-    typical_price = (df['High'] + df['Low'] + df['Close']) / 3
+    typical_price = (df['High']+ df['Low']+ df['Close'])/3
-    money_flow = typical_price * df['Volume']
+    money_flow = typical_price* df['Volume']
    prev_tp = typical_price.shift(1)
-    flow_pos = money_flow.where(typical_price > prev_tp, 0)
+    flow_pos = money_flow.where(typical_price>prev_tp, 0)
-    flow_neg = money_flow.where(typical_price < prev_tp, 0)
+    flow_neg = money_flow.where(typical_price<prev_tp, 0)
    pos_sum = flow_pos.rolling(window=window).sum()
    neg_sum = flow_neg.rolling(window=window).sum()
-    mfi = 100 - (100 / (1 + pos_sum/(neg_sum+1e-9)))
+    mfi= 100-(100/(1+ pos_sum/(neg_sum+1e-9)))
    return mfi
 def calculate_technical_indicators(df):
    logging.info("Calculating technical indicators...")
    df['RSI'] = compute_rsi(df['Close'], 14)
-    df['MACD'] = compute_macd(df['Close'])
+    df['MACD']= compute_macd(df['Close'])
    df['OBV'] = compute_obv(df)
    df['ADX'] = compute_adx(df)
-    up, low, bw = compute_bollinger_bands(df['Close'], 20, 2)
+    up, lo, bw = compute_bollinger_bands(df['Close'], 20, 2)
-    df['BB_Upper'] = up
+    df['BB_Upper']= up
-    df['BB_Lower'] = low
+    df['BB_Lower']= lo
-    df['BB_Width'] = bw
+    df['BB_Width']= bw
    df['MFI'] = compute_mfi(df, 14)
    df['MFI']   = compute_mfi(df,14)
    df['SMA_5'] = df['Close'].rolling(5).mean()
-    df['SMA_10'] = df['Close'].rolling(10).mean()
+    df['SMA_10']= df['Close'].rolling(10).mean()
    df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
-    df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
+    df['EMA_10']= df['Close'].ewm(span=10, adjust=False).mean()
    df['STDDEV_5'] = df['Close'].rolling(5).std()
    df.dropna(inplace=True)
@@ -150,67 +142,123 @@ def calculate_technical_indicators(df):
 ###############################
-# 2. ARGUMENT PARSING
+# 2. ARG PARSING
 ###############################
 def parse_arguments():
-    parser = argparse.ArgumentParser(description='Train LSTM and DQN models for stock trading.')
+    parser = argparse.ArgumentParser(description='All-in-One: LSTM + DQN (with LSTM predictions) + Tuning.')
-    parser.add_argument('csv_path', type=str, help='Path to the CSV data file (with columns time,open,high,low,close,volume).')
+    parser.add_argument('csv_path', type=str,
-    parser.add_argument('--do_dqn_inference', action='store_true',
+                        help='Path to CSV data with columns [time, open, high, low, close, volume].')
-                        help='If set, will run the DQN inference after training the agent.')
+    parser.add_argument('--lstm_window_size', type=int, default=15,
                        help='Sequence window size for LSTM. Default=15.')
    parser.add_argument('--dqn_total_timesteps', type=int, default=50000,
                        help='Total timesteps to train each DQN candidate. Default=50000.')
    parser.add_argument('--dqn_eval_episodes', type=int, default=1,
                        help='Number of episodes to evaluate DQN in the tuning step. Default=1 (entire dataset once).')
    parser.add_argument('--n_trials_lstm', type=int, default=30,
                        help='Number of Optuna trials for LSTM. Default=30.')
    parser.add_argument('--n_trials_dqn', type=int, default=20,
                        help='Number of Optuna trials for DQN. Default=20.')
    return parser.parse_args()
 ###############################
-# 3. MAIN
+# 3. CUSTOM DQN CALLBACK: LOG ACTIONS + REWARDS
 ###############################
 class ActionLoggingCallback(BaseCallback):
    """
    Logs distribution of actions and average reward after each rollout.
    For off-policy (DQN), "rollout" can be a bit different than on-policy,
    but stable-baselines3 still calls `_on_rollout_end` periodically.
    """
    def __init__(self, verbose=0):
        super().__init__(verbose)
        self.action_buffer = []
        self.reward_buffer = []
    def _on_training_start(self):
        self.action_buffer = []
        self.reward_buffer = []
    def _on_step(self):
        action = self.locals.get('action', None)
        reward = self.locals.get('reward', None)
        if action is not None:
            self.action_buffer.append(action)
        if reward is not None:
            self.reward_buffer.append(reward)
        return True
    def _on_rollout_end(self):
        import numpy as np
        actions = np.array(self.action_buffer)
        rewards = np.array(self.reward_buffer)
        if len(actions)>0:
            unique, counts = np.unique(actions, return_counts=True)
            total = len(actions)
            distr_str = []
            for act,c in zip(unique, counts):
                distr_str.append(f"Action {act}: {c} times ({100*c/total:.2f}%)")
            logging.info(" -- DQN Rollout End -- ")
            logging.info("    " + ", ".join(distr_str))
            logging.info(f"    Avg Reward this rollout: {rewards.mean():.4f} (min={rewards.min():.4f}, max={rewards.max():.4f})")
        self.action_buffer = []
        self.reward_buffer = []
 ###############################
 # 4. MAIN
 ###############################
 def main():
    # Parse command-line arguments
    args = parse_arguments()
    csv_path = args.csv_path
-    do_dqn_inference = args.do_dqn_inference
+    lstm_window_size = args.lstm_window_size
    dqn_total_timesteps = args.dqn_total_timesteps
    dqn_eval_episodes   = args.dqn_eval_episodes
    n_trials_lstm = args.n_trials_lstm
    n_trials_dqn  = args.n_trials_dqn
-    # 1) Load Data
+    ##########################################
-    data = load_data(csv_path)
+    # A) LSTM PART: LOAD, PREPROCESS, TUNE
-    data = calculate_technical_indicators(data)
+    ##########################################
    # 1) LOAD & preprocess
    df = load_data(csv_path)
    df = calculate_technical_indicators(df)
    # We'll exclude 'Close' from the feature set
    feature_columns = [
        'SMA_5','SMA_10','EMA_5','EMA_10','STDDEV_5',
        'RSI','MACD','ADX','OBV','Volume','Open','High','Low',
        'BB_Upper','BB_Lower','BB_Width','MFI'
    ]
    target_column = 'Close'
-    data = data[['Date'] + feature_columns + [target_column]]
+    df = df[['Date']+ feature_columns+[target_column]].dropna()
    data.dropna(inplace=True)
-    # 2) Scaling
+    from sklearn.preprocessing import MinMaxScaler
    scaler_features = MinMaxScaler()
    scaler_target   = MinMaxScaler()
-    X_all = data[feature_columns].values
+    X_all = df[feature_columns].values
-    y_all = data[[target_column]].values
+    y_all = df[[target_column]].values
    X_scaled = scaler_features.fit_transform(X_all)
    y_scaled = scaler_target.fit_transform(y_all).flatten()
-    # 3) Create LSTM sequences
+    # 2) Create sequences
-    def create_sequences(features, target, window_size=15):
+    def create_sequences(features, target, window_size):
        X_seq, y_seq = [], []
-        for i in range(len(features)-window_size):
+        for i in range(len(features) - window_size):
            X_seq.append(features[i:i+window_size])
            y_seq.append(target[i+window_size])
        return np.array(X_seq), np.array(y_seq)
-    window_size = 15
+    X, y = create_sequences(X_scaled, y_scaled, lstm_window_size)
    X, y = create_sequences(X_scaled, y_scaled, window_size)
-    # 4) Train/Val/Test Split
+    # 3) Split into train/val/test
    train_size = int(len(X)*0.7)
    val_size   = int(len(X)*0.15)
-    test_size  = len(X)-train_size-val_size
+    test_size  = len(X)- train_size- val_size
-    X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
+    X_train, y_train = X[:train_size], y[:train_size]
-    y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
+    X_val,   y_val   = X[train_size: train_size+ val_size], y[train_size: train_size+ val_size]
    X_test,  y_test  = X[train_size+ val_size:], y[train_size+ val_size:]
    logging.info(f"Scaled training features shape: {X_train.shape}")
    logging.info(f"Scaled validation features shape: {X_val.shape}")
@@ -219,14 +267,14 @@ def main():
    logging.info(f"Scaled validation target shape: {y_val.shape}")
    logging.info(f"Scaled testing target shape: {y_test.shape}")
-    # 5) GPU or CPU
+    # 4) GPU config
    def configure_device():
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            try:
                for gpu in gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)
-                logging.info(f"{len(gpus)} GPU(s) detected and configured.")
+                logging.info(f"{len(gpus)} GPU(s) detected & configured.")
            except RuntimeError as e:
                logging.error(e)
        else:
@@ -234,39 +282,44 @@ def main():
    configure_device()
-    # 6) Build LSTM
+    # 5) Build LSTM function
-    def build_advanced_lstm(input_shape, hyperparams):
+    def build_lstm(input_shape, hyperparams):
        from tensorflow.keras.regularizers import l2
        model = Sequential()
-        for i in range(hyperparams['num_lstm_layers']):
+        num_layers = hyperparams['num_lstm_layers']
-            return_seqs = (i < hyperparams['num_lstm_layers'] - 1)
+        units      = hyperparams['lstm_units']
        drop       = hyperparams['dropout_rate']
        for i in range(num_layers):
            return_seqs = (i< num_layers-1)
            model.add(Bidirectional(
-                LSTM(hyperparams['lstm_units'],
+                LSTM(units, return_sequences=return_seqs, kernel_regularizer=l2(1e-4)),
                     return_sequences=return_seqs,
                     kernel_regularizer=tf.keras.regularizers.l2(0.001)),
                input_shape=input_shape if i==0 else None
            ))
-            model.add(Dropout(hyperparams['dropout_rate']))
+            model.add(Dropout(drop))
        model.add(Dense(1, activation='linear'))
-        if hyperparams['optimizer'] == 'Adam':
+        opt_name= hyperparams['optimizer']
-            opt = Adam(learning_rate=hyperparams['learning_rate'], decay=hyperparams['decay'])
+        lr      = hyperparams['learning_rate']
-        elif hyperparams['optimizer'] == 'Nadam':
+        decay   = hyperparams['decay']
-            opt = Nadam(learning_rate=hyperparams['learning_rate'])
+        if opt_name=='Adam':
            opt= Adam(learning_rate=lr, decay=decay)
        elif opt_name=='Nadam':
            opt= Nadam(learning_rate=lr)
        else:
-            opt = Adam(learning_rate=hyperparams['learning_rate'])
+            opt= Adam(learning_rate=lr)
-        model.compile(optimizer=opt, loss=Huber(), metrics=['mae'])
+        model.compile(loss=Huber(), optimizer=opt, metrics=['mae'])
        return model
-    # 7) Optuna Tuning
+    # 6) Optuna objective for LSTM
-    def objective(trial):
+    def lstm_objective(trial):
-        num_lstm_layers = trial.suggest_int('num_lstm_layers', 1, 3)
+        import tensorflow as tf
-        lstm_units      = trial.suggest_categorical('lstm_units', [32,64,96,128])
+        num_lstm_layers = trial.suggest_int('num_lstm_layers',1,3)
-        dropout_rate    = trial.suggest_float('dropout_rate', 0.1, 0.5)
+        lstm_units      = trial.suggest_categorical('lstm_units',[32,64,96,128])
-        learning_rate   = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
+        dropout_rate    = trial.suggest_float('dropout_rate',0.1,0.5)
-        optimizer_name  = trial.suggest_categorical('optimizer', ['Adam','Nadam'])
+        learning_rate   = trial.suggest_loguniform('learning_rate',1e-5,1e-2)
-        decay           = trial.suggest_float('decay', 0.0, 1e-4)
+        optimizer_name  = trial.suggest_categorical('optimizer',['Adam','Nadam'])
        decay           = trial.suggest_float('decay',0.0,1e-4)
        hyperparams = {
            'num_lstm_layers': num_lstm_layers,
@@ -277,295 +330,390 @@ def main():
            'decay': decay
        }
-        model_ = build_advanced_lstm((X_train.shape[1], X_train.shape[2]), hyperparams)
+        model_ = build_lstm((X_train.shape[1], X_train.shape[2]), hyperparams)
        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        lr_reduce  = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
        cb_prune   = KerasPruningCallback(trial, 'val_loss')
-        history = model_.fit(
+        history= model_.fit(
            X_train, y_train,
            epochs=100,
            batch_size=16,
-            validation_data=(X_val, y_val),
+            validation_data=(X_val,y_val),
            callbacks=[early_stop, lr_reduce, cb_prune],
            verbose=0
        )
        val_mae = min(history.history['val_mae'])
        return val_mae
-    logging.info("Starting hyperparameter optimization with Optuna...")
+    logging.info("Starting LSTM hyperparam optimization with Optuna...")
-    study = optuna.create_study(direction='minimize')
+    study_lstm= optuna.create_study(direction='minimize')
-    study.optimize(objective, n_trials=50)
+    study_lstm.optimize(lstm_objective, n_trials=n_trials_lstm)
    best_lstm_params = study_lstm.best_params
    logging.info(f"Best LSTM Hyperparams: {best_lstm_params}")
-    best_params = study.best_params
+    # 7) Train final LSTM
-    logging.info(f"Best Hyperparameters from Optuna: {best_params}")
+    final_lstm = build_lstm((X_train.shape[1], X_train.shape[2]), best_lstm_params)
    early_stop_final= EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    lr_reduce_final= ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
-    # 8) Train the Best LSTM
+    logging.info("Training best LSTM model with found hyperparams...")
-    best_model = build_advanced_lstm((X_train.shape[1], X_train.shape[2]), best_params)
+    hist= final_lstm.fit(
-
+        X_train,y_train,
    early_stop_final = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    lr_reduce_final  = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    logging.info("Training the best LSTM model with optimized hyperparameters...")
    history = best_model.fit(
        X_train, y_train,
        epochs=300,
        batch_size=16,
-        validation_data=(X_val, y_val),
+        validation_data=(X_val,y_val),
        callbacks=[early_stop_final, lr_reduce_final],
        verbose=1
    )
-    # 9) Evaluate
+    # Evaluate LSTM
-    def evaluate_model(model, X_test, y_test):
+    def evaluate_lstm(model, X_test, y_test):
-        logging.info("Evaluating model (LSTM)...")
+        logging.info("Evaluating final LSTM...")
-        y_pred_scaled = model.predict(X_test).flatten()
+        y_pred_scaled= model.predict(X_test).flatten()
-        y_pred_scaled = np.clip(y_pred_scaled, 0, 1)
+        y_pred_scaled= np.clip(y_pred_scaled,0,1)
        y_pred = scaler_target.inverse_transform(y_pred_scaled.reshape(-1,1)).flatten()
-        y_test_actual = scaler_target.inverse_transform(y_test.reshape(-1,1)).flatten()
+        y_test_actual= scaler_target.inverse_transform(y_test.reshape(-1,1)).flatten()
-        mse = mean_squared_error(y_test_actual, y_pred)
+        mse_= mean_squared_error(y_test_actual,y_pred)
-        rmse= np.sqrt(mse)
+        rmse_= np.sqrt(mse_)
-        mae = mean_absolute_error(y_test_actual, y_pred)
+        mae_ = mean_absolute_error(y_test_actual,y_pred)
-        r2  = r2_score(y_test_actual, y_pred)
+        r2_  = r2_score(y_test_actual,y_pred)
-        direction_actual = np.sign(np.diff(y_test_actual))
+        direction_actual= np.sign(np.diff(y_test_actual))
-        direction_pred   = np.sign(np.diff(y_pred))
+        direction_pred  = np.sign(np.diff(y_pred))
-        directional_accuracy = np.mean(direction_actual==direction_pred)
+        directional_accuracy= np.mean(direction_actual== direction_pred)
-        logging.info(f"Test MSE: {mse}")
+        logging.info(f"Test MSE: {mse_}")
-        logging.info(f"Test RMSE: {rmse}")
+        logging.info(f"Test RMSE: {rmse_}")
-        logging.info(f"Test MAE: {mae}")
+        logging.info(f"Test MAE: {mae_}")
-        logging.info(f"Test R2 Score: {r2}")
+        logging.info(f"Test R2 Score: {r2_}")
        logging.info(f"Directional Accuracy: {directional_accuracy}")
        # Plot
        plt.figure(figsize=(14,7))
        plt.plot(y_test_actual, label='Actual Price')
        plt.plot(y_pred,        label='Predicted Price')
-        plt.title('Actual vs Predicted Prices')
+        plt.title('LSTM: Actual vs Predicted')
        plt.xlabel('Time Step')
        plt.ylabel('Price')
        plt.legend()
        plt.grid(True)
-        plt.savefig('actual_vs_predicted.png')
+        plt.savefig('lstm_actual_vs_pred.png')
        plt.close()
        logging.info("Plot saved as 'actual_vs_predicted.png'")
        # Tabulate first 40
-        table_data = []
+        table=[]
-        for i in range(min(40, len(y_test_actual))):
+        limit= min(40,len(y_test_actual))
-            table_data.append([i, round(y_test_actual[i],2), round(y_pred[i],2)])
+        for i in range(limit):
-        headers = ["Index", "Actual Price", "Predicted Price"]
+            table.append([i, round(y_test_actual[i],2), round(y_pred[i],2)])
-        print(tabulate(table_data, headers=headers, tablefmt="pretty"))
+        headers= ["Index","Actual Price","Predicted Price"]
        print(tabulate(table, headers=headers, tablefmt="pretty"))
        return r2_, directional_accuracy
-        return mse, rmse, mae, r2, directional_accuracy
+    _r2, _diracc= evaluate_lstm(final_lstm, X_test, y_test)
-    mse, rmse, mae, r2, directional_accuracy = evaluate_model(best_model, X_test, y_test)
+    # Save LSTM + scalers
    final_lstm.save('best_lstm_model.h5')
    joblib.dump(scaler_features,'scaler_features.pkl')
    joblib.dump(scaler_target,  'scaler_target.pkl')
    logging.info("Saved best LSTM model + scalers (best_lstm_model.h5, scaler_features.pkl, scaler_target.pkl).")
-    # 10) Save
+    ############################################################
-    best_model.save('optimized_lstm_model.h5')
+    # B) DQN PART: BUILD ENV THAT USES THE LSTM + FORECAST
-    import joblib
+    ############################################################
-    joblib.dump(scaler_features, 'scaler_features.save')
+    class StockTradingEnvWithLSTM(gym.Env):
    joblib.dump(scaler_target, 'scaler_target.save')
    logging.info("Model and scalers saved (optimized_lstm_model.h5, scaler_features.save, scaler_target.save).")
    ##########################################################
    # 11) Reinforcement Learning: StockTradingEnv + DQN
    ##########################################################
    class StockTradingEnv(gym.Env):
        """
-        A simple stock trading environment for OpenAI Gym
+        An environment that uses the LSTM model's predicted next day close
-        with step-based reward = net_worth - initial_balance.
+        as part of the observation:
          obs = [technical indicators, balance, shares, cost_basis, predicted_next_close].
        Reward => net_worth - initial_balance each step. 
        """
-        metadata = {'render.modes': ['human']}
+        metadata = {'render.modes':['human']}
-        def __init__(self, df, initial_balance=10000, transaction_cost=0.001):
+        def __init__(self, df, feature_columns, lstm_model, scaler_features, scaler_target,
                     window_size=15, initial_balance=10000, transaction_cost=0.001):
            super().__init__()
-            self.df = df.reset_index()
+            self.df= df.reset_index(drop=True)
-            self.initial_balance = initial_balance
+            self.feature_columns= feature_columns
-            self.balance = initial_balance
+            self.lstm_model= lstm_model
-            self.net_worth = initial_balance
+            self.scaler_features= scaler_features
-            self.max_steps = len(df)
+            self.scaler_target= scaler_target
-            self.current_step = 0
+            self.window_size= window_size
            self.shares_held = 0
            self.cost_basis = 0
            self.transaction_cost = transaction_cost
-            # Re-use feature_columns from above
+            self.initial_balance= initial_balance
-            self.feature_columns = feature_columns
+            self.balance= initial_balance
            self.net_worth= initial_balance
            self.transaction_cost= transaction_cost
-            # Action space: 0=Sell,1=Hold,2=Buy
+            self.max_steps= len(df)
-            self.action_space = spaces.Discrete(3)
+            self.current_step=0
            self.shares_held=0
            self.cost_basis=0
-            # Observation = [17 indicators + balance + shares + cost_basis] => total 20
+            # raw array of features
-            self.observation_space = spaces.Box(
+            self.raw_features= df[feature_columns].values
-                low=0,
+
-                high=1,
+            # 0=Sell,1=Hold,2=Buy
-                shape=(len(self.feature_columns)+3,),
+            self.action_space= spaces.Discrete(3)
            # observation dimension = len(feature_columns)+3 +1 => 17 + 3 +1=21
            self.observation_space= spaces.Box(
                low=0, high=1,
                shape=(len(feature_columns)+3+1,),
                dtype=np.float32
            )
        def reset(self):
-            self.balance = self.initial_balance
+            self.balance= self.initial_balance
-            self.net_worth = self.initial_balance
+            self.net_worth= self.initial_balance
-            self.current_step = 0
+            self.current_step=0
-            self.shares_held = 0
+            self.shares_held=0
-            self.cost_basis = 0
+            self.cost_basis=0
-            return self._next_observation()
+            return self._get_obs()
-        def _next_observation(self):
+        def _get_obs(self):
-            obs_vals = self.df.loc[self.current_step, self.feature_columns].values
+            row= self.raw_features[self.current_step]
-            # simple normalization
+            row_max= np.max(row) if np.max(row)!=0 else 1.0
-            if np.max(obs_vals)!=0:
+            row_norm= row/row_max
                obs_vals = obs_vals / np.max(obs_vals)
-            additional = np.array([
+            # account info
            additional= np.array([
                self.balance/self.initial_balance,
                self.shares_held/100.0,
-                self.cost_basis/self.initial_balance
+                self.cost_basis/(self.initial_balance+1e-9)
            ], dtype=np.float32)
-            return np.concatenate([obs_vals, additional]).astype(np.float32)
+            # LSTM prediction
            if self.current_step< self.window_size:
                # not enough history => no forecast
                predicted_close= 0.0
            else:
                seq= self.raw_features[self.current_step - self.window_size: self.current_step]
                seq_scaled= self.scaler_features.transform(seq)
                seq_scaled= np.expand_dims(seq_scaled, axis=0) # shape (1, window_size, #features)
                pred_scaled= self.lstm_model.predict(seq_scaled, verbose=0).flatten()[0]
                pred_scaled= np.clip(pred_scaled,0,1)
                unscaled= self.scaler_target.inverse_transform([[pred_scaled]])[0,0]
                # either keep raw or scale it. We'll do a naive scale by /1000 if typical price is double digits
                predicted_close= unscaled/1000.0
            obs= np.concatenate([row_norm, additional, [predicted_close]]).astype(np.float32)
            return obs
        def step(self, action):
-            current_price = self.df.loc[self.current_step, 'Close']
+            prev_net_worth= self.net_worth
            current_price= self.df.loc[self.current_step,'Close']
-            if action==2:  # Buy
+            if action==2: # BUY
-                shares_bought = int(self.balance // current_price)
+                shares_bought= int(self.balance// current_price)
                if shares_bought>0:
                    cost= shares_bought* current_price
-                    fee = cost* self.transaction_cost
+                    fee= cost* self.transaction_cost
                    self.balance-= (cost+ fee)
                    old_shares= self.shares_held
                    self.shares_held+= shares_bought
                    # Weighted average cost
                    self.cost_basis=(
-                        (self.cost_basis* old_shares)+(shares_bought* current_price)
+                        (self.cost_basis* old_shares)+ (shares_bought* current_price)
                    )/ self.shares_held
-            elif action==0: # Sell
+            elif action==0: # SELL
                if self.shares_held>0:
                    revenue= self.shares_held* current_price
-                    fee = revenue*self.transaction_cost
+                    fee= revenue* self.transaction_cost
                    self.balance+= (revenue- fee)
                    self.shares_held=0
                    self.cost_basis=0
            prev_net_worth= self.net_worth
            self.net_worth= self.balance+ self.shares_held* current_price
            self.current_step+=1
-            done= (self.current_step>= self.max_steps-1)
+            done= (self.current_step>= self.max_steps -1)
            # Reward: net_worth - initial_balance (like original code)
            reward= self.net_worth- self.initial_balance
-
+            obs= self._get_obs()
            obs= self._next_observation()
            return obs, reward, done, {}
        def render(self, mode='human'):
            profit= self.net_worth- self.initial_balance
            print(f"Step: {self.current_step}, "
-                  f"Balance: {self.balance:.2f}, "
+                  f"Balance={self.balance:.2f}, "
-                  f"Shares: {self.shares_held}, "
+                  f"Shares={self.shares_held}, "
-                  f"NetWorth: {self.net_worth:.2f}, "
+                  f"NetWorth={self.net_worth:.2f}, "
-                  f"Profit: {profit:.2f}")
+                  f"Profit={profit:.2f}")
    ###################################
    # C) DQN HYPERPARAM TUNING W/ LSTM
    ###################################
    # We'll define a function that trains a DQN with trial hyperparams,
    # then evaluates final net worth on one run.
    from stable_baselines3.common.evaluation import evaluate_policy
-    def train_dqn_agent(env):
+    # We'll define a small function to do final net worth check:
-        logging.info("Training DQN Agent (step-based reward).")
+    def evaluate_dqn_networth(model, env, n_episodes=1):
-        try:
+        # We do a simple loop that runs the entire dataset (1 episode) 
-            model = DQN(
+        # to see final net worth.
-                'MlpPolicy',
+        # If you want multiple episodes, you can do multiple resets in random start positions, etc.
-                env,
+        final_net_worths = []
-                verbose=1,
+        for _ in range(n_episodes):
-                learning_rate=1e-3,
+            obs= env.reset()
-                buffer_size=10000,
+            done= False
-                learning_starts=1000,
+            while not done:
-                batch_size=64,
+                action, _= model.predict(obs, deterministic=True)
-                tau=1.0,
+                obs, reward, done, info= env.step(action)
-                gamma=0.99,
+            final_net_worths.append(env.net_worth)
-                train_freq=4,
+        return np.mean(final_net_worths)
                target_update_interval=1000,
                exploration_fraction=0.1,
                exploration_final_eps=0.02,
                tensorboard_log="./dqn_stock_tensorboard/"
            )
            model.learn(total_timesteps=100000)
            model.save("dqn_stock_trading")
            logging.info("DQN Agent trained and saved as 'dqn_stock_trading.zip'.")
            return model
        except Exception as e:
            logging.error(f"Error training DQN Agent: {e}")
            sys.exit(1)
    # We'll define the DQN objective with Optuna
    def dqn_objective(trial):
        # we sample some DQN hyperparams
        lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
        gamma = trial.suggest_float("gamma", 0.8, 0.9999)
        exploration_fraction= trial.suggest_float("exploration_fraction", 0.01, 0.3)
        buffer_size = trial.suggest_categorical("buffer_size",[5000,10000,20000])
        batch_size  = trial.suggest_categorical("batch_size",[32,64,128])
-    # 12) Train the DQN environment
+        # Build environment fresh each time or reuse:
-    logging.info("Initializing trading environment for DQN training...")
+        # We'll reuse the same data environment but new instance
-    trading_env = StockTradingEnv(data, initial_balance=10000, transaction_cost=0.001)
+        env = StockTradingEnvWithLSTM(
-    vec_env = DummyVecEnv([lambda: trading_env])
+            df=df,
            feature_columns= feature_columns,
            lstm_model= final_lstm,   # use the best LSTM
            scaler_features= scaler_features,
            scaler_target= scaler_target,
            window_size= lstm_window_size
        )
        vec_env = DummyVecEnv([lambda: env])
-    dqn_model = train_dqn_agent(vec_env)
+        # Build DQN
-    logging.info("DQN training complete.")
+        from stable_baselines3 import DQN
        from stable_baselines3.common.callbacks import BaseCallback
-    # 13) Optional: run DQN inference right away (like use_dqn.py) if user wants
+        dqn_action_logger = ActionLoggingCallback(verbose=0)
    if do_dqn_inference:
        logging.info("Running DQN inference (test) after training...")
        obs = vec_env.reset()
        done = [False]
        total_reward = 0.0
        step_data = []
        step_count = 0
-        # underlying env to access net worth, etc.
+        model = DQN(
-        underlying_env = vec_env.envs[0]
+            'MlpPolicy',
            vec_env,
            verbose=0,
            learning_rate= lr,
            gamma= gamma,
            exploration_fraction= exploration_fraction,
            buffer_size= buffer_size,
            batch_size= batch_size,
            train_freq=4,
            target_update_interval=1000,
            # etc
        )
        # Train some timesteps
        model.learn(total_timesteps= dqn_total_timesteps, callback=dqn_action_logger)
-        while not done[0]:
+        # Evaluate final net worth
-            step_count += 1
+        final_net_worth= evaluate_dqn_networth(model, env, n_episodes=dqn_eval_episodes)
-            action, _ = dqn_model.predict(obs, deterministic=True)
+        # we want to maximize net worth => minimize negative net worth
-            obs, reward, done, info = vec_env.step(action)
+        return -final_net_worth
            reward_scalar = reward[0]
            total_reward += reward_scalar
-            step_data.append({
+    logging.info("Starting DQN hyperparam tuning with Optuna (using LSTM environment)...")
-                "Step": step_count,
+    study_dqn = optuna.create_study(direction='minimize')
-                "Action": int(action[0]),
+    study_dqn.optimize(dqn_objective, n_trials=n_trials_dqn)
-                "Reward": reward_scalar,
+    best_dqn_params = study_dqn.best_params
-                "Balance": underlying_env.balance,
+    logging.info(f"Best DQN hyperparams: {best_dqn_params}")
                "Shares": underlying_env.shares_held,
                "NetWorth": underlying_env.net_worth
            })
-        final_net_worth = underlying_env.net_worth
+    ###################################
-        final_profit = final_net_worth - underlying_env.initial_balance
+    # D) TRAIN FINAL DQN WITH BEST PARAMS
    ###################################
    logging.info("Training final DQN with best hyperparams & LSTM environment...")
-        print("\n=== DQN Agent Finished ===")
+    env_final = StockTradingEnvWithLSTM(
-        print(f"Total Steps Taken: {step_count}")
+        df=df,
-        print(f"Final Net Worth: {final_net_worth:.2f}")
+        feature_columns=feature_columns,
-        print(f"Final Profit: {final_profit:.2f}")
+        lstm_model= final_lstm,
-        print(f"Sum of Rewards: {total_reward:.2f}")
+        scaler_features= scaler_features,
        scaler_target= scaler_target,
        window_size= lstm_window_size
    )
    vec_env_final = DummyVecEnv([lambda: env_final])
-        buy_count  = sum(1 for x in step_data if x["Action"] == 2)
+    # Build final model
-        sell_count = sum(1 for x in step_data if x["Action"] == 0)
+    final_dqn_logger = ActionLoggingCallback(verbose=1)  # We'll see logs each rollout
-        hold_count = sum(1 for x in step_data if x["Action"] == 1)
+    final_model= DQN(
-        print(f"Actions Taken -> BUY: {buy_count}, SELL: {sell_count}, HOLD: {hold_count}")
+        'MlpPolicy',
        vec_env_final,
        verbose=1,
        learning_rate= best_dqn_params['lr'],
        gamma= best_dqn_params['gamma'],
        exploration_fraction= best_dqn_params['exploration_fraction'],
        buffer_size= best_dqn_params['buffer_size'],
        batch_size= best_dqn_params['batch_size'],
        train_freq=4,
        target_update_interval=1000
        # etc if you want other params
    )
    final_model.learn(total_timesteps= dqn_total_timesteps, callback= final_dqn_logger)
    final_model.save("best_dqn_model_lstm.zip")
-        # Show last 15 steps (like use_dqn)
+    ###################################
-        steps_to_display = 15
+    # E) FINAL INFERENCE & LOG RESULTS
-        last_n = step_data[-steps_to_display:] if len(step_data)> steps_to_display else step_data
+    ###################################
-        rows = []
+    logging.info("Running final inference with best DQN...")
-        for d in last_n:
+
-            rows.append([
+    env_test = StockTradingEnvWithLSTM(
-                d["Step"], d["Action"], f"{d['Reward']:.2f}",
+        df=df,
-                f"{d['Balance']:.2f}", d["Shares"], f"{d['NetWorth']:.2f}"
+        feature_columns= feature_columns,
-            ])
+        lstm_model= final_lstm,
-        headers = ["Step","Action","Reward","Balance","Shares","NetWorth"]
+        scaler_features= scaler_features,
-        print(f"\n== Last {steps_to_display} Steps ==")
+        scaler_target= scaler_target,
-        print(tabulate(rows, headers=headers, tablefmt="pretty"))
+        window_size= lstm_window_size
    )
    obs = env_test.reset()
    done=False
    total_reward=0.0
    step_data=[]
    step_count=0
    while not done:
        step_count+=1
        action, _= final_model.predict(obs, deterministic=True)
        obs, reward, done, info= env_test.step(action)
        total_reward+= reward
        step_data.append({
            "Step": step_count,
            "Action": int(action),
            "Reward": reward,
            "Balance": env_test.balance,
            "Shares": env_test.shares_held,
            "NetWorth": env_test.net_worth
        })
    final_net_worth= env_test.net_worth
    final_profit= final_net_worth - env_test.initial_balance
    print("\n=== Final DQN Inference ===")
    print(f"Total Steps: {step_count}")
    print(f"Final Net Worth: {final_net_worth:.2f}")
    print(f"Final Profit: {final_profit:.2f}")
    print(f"Sum of Rewards: {total_reward:.2f}")
    buy_count  = sum(1 for x in step_data if x["Action"]==2)
    sell_count = sum(1 for x in step_data if x["Action"]==0)
    hold_count = sum(1 for x in step_data if x["Action"]==1)
    print(f"Actions Taken -> BUY:{buy_count}, SELL:{sell_count}, HOLD:{hold_count}")
    # Show last 15 steps
    last_n= step_data[-15:] if len(step_data)>15 else step_data
    rows=[]
    for d in last_n:
        rows.append([
            d["Step"],
            d["Action"],
            f"{d['Reward']:.2f}",
            f"{d['Balance']:.2f}",
            d["Shares"],
            f"{d['NetWorth']:.2f}"
        ])
    headers= ["Step","Action","Reward","Balance","Shares","NetWorth"]
    print(f"\n== Last 15 Steps ==")
    print(tabulate(rows, headers=headers, tablefmt="pretty"))
    logging.info("All tasks complete. Exiting.")
-if __name__ == "__main__":
+if __name__=="__main__":
    main()
--- a/src/Machine-Learning/LSTM-python/src/output.txt
+++ b/src/Machine-Learning/LSTM-python/src/output.txt