Idfk

2025-01-30 00:57:13 -05:00
parent 030529e6c7
commit 6cf5a47005
2 changed files with 558 additions and 293 deletions
--- a/src/Machine-Learning/LSTM-python/src/LSTMDQN.log
+++ b/src/Machine-Learning/LSTM-python/src/LSTMDQN.log
@@ -0,0 +1,42 @@
 2025-01-30 00:40:03,878 - INFO - ===== Resource Statistics =====
 2025-01-30 00:40:03,879 - INFO - Physical CPU Cores: 10
 2025-01-30 00:40:03,879 - INFO - Logical CPU Cores: 12
 2025-01-30 00:40:03,879 - INFO - CPU Usage per Core: [5.1, 2.0, 5.0, 2.0, 1.0, 2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 1.0]%
 2025-01-30 00:40:03,879 - INFO - No GPUs detected.
 2025-01-30 00:40:03,879 - INFO - =================================
 2025-01-30 00:40:03,880 - INFO - Configured TensorFlow to use CPU with optimized thread settings.
 2025-01-30 00:40:03,880 - INFO - Loading data from: BAT.csv
 2025-01-30 00:40:04,173 - INFO - Data columns after renaming: ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
 2025-01-30 00:40:04,179 - INFO - Data loaded and sorted successfully.
 2025-01-30 00:40:04,179 - INFO - Calculating technical indicators...
 2025-01-30 00:40:04,193 - INFO - Technical indicators calculated successfully.
 2025-01-30 00:40:04,197 - INFO - Starting parallel feature engineering with 10 workers...
 2025-01-30 00:40:06,772 - INFO - Parallel feature engineering completed.
 2025-01-30 00:40:06,812 - INFO - Scaled training features shape: (14134, 15, 17)
 2025-01-30 00:40:06,813 - INFO - Scaled validation features shape: (3028, 15, 17)
 2025-01-30 00:40:06,813 - INFO - Scaled testing features shape: (3030, 15, 17)
 2025-01-30 00:40:06,813 - INFO - Scaled training target shape: (14134,)
 2025-01-30 00:40:06,813 - INFO - Scaled validation target shape: (3028,)
 2025-01-30 00:40:06,813 - INFO - Scaled testing target shape: (3030,)
 2025-01-30 00:40:06,813 - INFO - Starting LSTM hyperparameter optimization with Optuna using 10 parallel trials...
 2025-01-30 00:56:53,436 - INFO - ===== Resource Statistics =====
 2025-01-30 00:56:53,436 - INFO - Physical CPU Cores: 10
 2025-01-30 00:56:53,436 - INFO - Logical CPU Cores: 12
 2025-01-30 00:56:53,437 - INFO - CPU Usage per Core: [1.0, 1.0, 5.1, 1.0, 0.0, 0.0, 2.0, 1.0, 0.0, 0.0, 0.0, 0.0]%
 2025-01-30 00:56:53,437 - INFO - No GPUs detected.
 2025-01-30 00:56:53,437 - INFO - =================================
 2025-01-30 00:56:53,437 - INFO - Configured TensorFlow to use CPU with optimized thread settings.
 2025-01-30 00:56:53,437 - INFO - Loading data from: BAT.csv
 2025-01-30 00:56:53,730 - INFO - Data columns after renaming: ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
 2025-01-30 00:56:53,736 - INFO - Data loaded and sorted successfully.
 2025-01-30 00:56:53,736 - INFO - Calculating technical indicators...
 2025-01-30 00:56:53,753 - INFO - Technical indicators calculated successfully.
 2025-01-30 00:56:53,758 - INFO - Starting parallel feature engineering with 10 workers...
 2025-01-30 00:56:56,241 - INFO - Parallel feature engineering completed.
 2025-01-30 00:56:56,285 - INFO - Scaled training features shape: (14134, 15, 17)
 2025-01-30 00:56:56,285 - INFO - Scaled validation features shape: (3028, 15, 17)
 2025-01-30 00:56:56,285 - INFO - Scaled testing features shape: (3030, 15, 17)
 2025-01-30 00:56:56,285 - INFO - Scaled training target shape: (14134,)
 2025-01-30 00:56:56,285 - INFO - Scaled validation target shape: (3028,)
 2025-01-30 00:56:56,285 - INFO - Scaled testing target shape: (3030,)
 2025-01-30 00:56:56,285 - INFO - Starting LSTM hyperparameter optimization with Optuna using 10 parallel trials...
--- a/src/Machine-Learning/LSTM-python/src/LSTMDQN.py
+++ b/src/Machine-Learning/LSTM-python/src/LSTMDQN.py
@@ -7,8 +7,8 @@ import logging
 from tabulate import tabulate
 import matplotlib.pyplot as plt
 import seaborn as sns
-
+import psutil
-# TensorFlow / Keras
+import GPUtil
 import tensorflow as tf
 from tensorflow.keras.models import Sequential, load_model
 from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
@@ -17,30 +17,120 @@ from tensorflow.keras.losses import Huber
 from tensorflow.keras.regularizers import l2
 from tensorflow.keras.optimizers import Adam, Nadam
 # Sklearn
 from sklearn.preprocessing import MinMaxScaler
 from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
 import joblib
 # Optuna
 import optuna
 from optuna.integration import KerasPruningCallback
 # RL stuff
 import gym
 from gym import spaces
 from stable_baselines3 import DQN
 from stable_baselines3.common.vec_env import DummyVecEnv
 from stable_baselines3.common.callbacks import BaseCallback
 from multiprocessing import Pool, cpu_count
 import threading
 import time
 # Suppress TensorFlow logs beyond errors
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# ============================
 # Resource Detection Functions
 # ============================
 def get_cpu_info():
    """
    Retrieves CPU information including physical and logical cores and current usage per core.
-######################################################
+    Returns:
-# 1. DATA LOADING & ADVANCED TECHNICAL INDICATORS
+        dict: Dictionary containing physical cores, logical cores, and CPU usage per core.
-######################################################
+    """
    cpu_count = psutil.cpu_count(logical=False)  # Physical cores
    cpu_count_logical = psutil.cpu_count(logical=True)  # Logical cores
    cpu_percent = psutil.cpu_percent(interval=1, percpu=True)
    return {
        'physical_cores': cpu_count,
        'logical_cores': cpu_count_logical,
        'cpu_percent': cpu_percent
    }
 def get_gpu_info():
    """
    Retrieves GPU information including load, memory usage, and temperature.
    Returns:
        list: List of dictionaries containing GPU stats.
    """
    gpus = GPUtil.getGPUs()
    gpu_info = []
    for gpu in gpus:
        gpu_info.append({
            'id': gpu.id,
            'name': gpu.name,
            'load': gpu.load * 100,  # Convert to percentage
            'memory_total': gpu.memoryTotal,
            'memory_used': gpu.memoryUsed,
            'memory_free': gpu.memoryFree,
            'temperature': gpu.temperature
        })
    return gpu_info
 def configure_tensorflow(cpu_stats, gpu_stats):
    """
    Configures TensorFlow to utilize available CPU and GPU resources efficiently.
    Args:
        cpu_stats (dict): Dictionary containing CPU statistics.
        gpu_stats (list): List of dictionaries containing GPU statistics.
    """
    logical_cores = cpu_stats['logical_cores']
    os.environ["OMP_NUM_THREADS"] = str(logical_cores)
    os.environ["TF_NUM_INTRAOP_THREADS"] = str(logical_cores)
    os.environ["TF_NUM_INTEROP_THREADS"] = str(logical_cores)
    if gpu_stats:
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            try:
                for gpu in gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)
                logging.info(f"Enabled memory growth for {len(gpus)} GPU(s).")
            except RuntimeError as e:
                logging.error(f"TensorFlow GPU configuration error: {e}")
    else:
        tf.config.threading.set_intra_op_parallelism_threads(logical_cores)
        tf.config.threading.set_inter_op_parallelism_threads(logical_cores)
        logging.info("Configured TensorFlow to use CPU with optimized thread settings.")
 # ============================
 # Resource Monitoring Function (Optional)
 # ============================
 def monitor_resources(interval=60):
    """
    Continuously monitors and logs CPU and GPU usage at specified intervals.
    Args:
        interval (int): Time in seconds between each monitoring snapshot.
    """
    while True:
        cpu = psutil.cpu_percent(interval=1, percpu=True)
        gpu = get_gpu_info()
        logging.info(f"CPU Usage per Core: {cpu}%")
        if gpu:
            for gpu_stat in gpu:
                logging.info(f"GPU {gpu_stat['id']} - {gpu_stat['name']}: Load: {gpu_stat['load']}%, "
                             f"Memory Used: {gpu_stat['memory_used']}MB / {gpu_stat['memory_total']}MB, "
                             f"Temperature: {gpu_stat['temperature']}°C")
        else:
            logging.info("No GPUs detected.")
        logging.info("-" * 50)
        time.sleep(interval)
 # ============================
 # Data Loading & Technical Indicators
 # ============================
 def load_data(file_path):
    logging.info(f"Loading data from: {file_path}")
    try:
@@ -140,10 +230,9 @@ def calculate_technical_indicators(df):
    logging.info("Technical indicators calculated successfully.")
    return df
-
+# ============================
-###############################
+# Argument Parsing
-# 2. ARG PARSING
+# ============================
 ###############################
 def parse_arguments():
    parser = argparse.ArgumentParser(description='All-in-One: LSTM + DQN (with LSTM predictions) + Tuning.')
    parser.add_argument('csv_path', type=str,
@@ -158,12 +247,17 @@ def parse_arguments():
                        help='Number of Optuna trials for LSTM. Default=30.')
    parser.add_argument('--n_trials_dqn', type=int, default=20,
                        help='Number of Optuna trials for DQN. Default=20.')
    parser.add_argument('--max_parallel_trials', type=int, default=None,
                        help='Maximum number of parallel Optuna trials. Defaults to (logical cores - 2).')
    parser.add_argument('--preprocess_workers', type=int, default=None,
                        help='Number of worker processes for data preprocessing. Defaults to (logical cores - 2).')
    parser.add_argument('--monitor_resources', action='store_true',
                        help='Enable real-time resource monitoring.')
    return parser.parse_args()
-
+# ============================
-###############################
+# Custom DQN Callback: Log Actions + Rewards
-# 3. CUSTOM DQN CALLBACK: LOG ACTIONS + REWARDS
+# ============================
 ###############################
 class ActionLoggingCallback(BaseCallback):
    """
    Logs distribution of actions and average reward after each rollout.
@@ -171,7 +265,7 @@ class ActionLoggingCallback(BaseCallback):
    but stable-baselines3 still calls `_on_rollout_end` periodically.
    """
    def __init__(self, verbose=0):
-        super().__init__(verbose)
+        super(ActionLoggingCallback, self).__init__(verbose)
        self.action_buffer = []
        self.reward_buffer = []
@@ -180,6 +274,7 @@ class ActionLoggingCallback(BaseCallback):
        self.reward_buffer = []
    def _on_step(self):
        # For Stable Baselines3, access actions and rewards via self.locals
        action = self.locals.get('action', None)
        reward = self.locals.get('reward', None)
        if action is not None:
@@ -204,9 +299,43 @@ class ActionLoggingCallback(BaseCallback):
        self.action_buffer = []
        self.reward_buffer = []
-###############################
+# ============================
-# 4. MAIN
+# Data Preprocessing with Controlled Parallelization
-###############################
+# ============================
 def parallel_feature_engineering(row):
    """
    Placeholder function for feature engineering. Modify as needed.
    Args:
        row (pd.Series): A row from the DataFrame.
    Returns:
        pd.Series: Processed row.
    """
    # Implement any additional feature engineering here if necessary
    return row
 def feature_engineering_parallel(df, num_workers):
    """
    Applies feature engineering in parallel using multiprocessing.
    Args:
        df (pd.DataFrame): DataFrame to process.
        num_workers (int): Number of worker processes.
    Returns:
        pd.DataFrame: Processed DataFrame.
    """
    logging.info(f"Starting parallel feature engineering with {num_workers} workers...")
    with Pool(processes=num_workers) as pool:
        processed_rows = pool.map(parallel_feature_engineering, [row for _, row in df.iterrows()])
    df_processed = pd.DataFrame(processed_rows)
    logging.info("Parallel feature engineering completed.")
    return df_processed
 # ============================
 # Main Function with Enhanced Optimizations
 # ============================
 def main():
    args = parse_arguments()
    csv_path = args.csv_path
@@ -215,6 +344,53 @@ def main():
    dqn_eval_episodes   = args.dqn_eval_episodes
    n_trials_lstm = args.n_trials_lstm
    n_trials_dqn  = args.n_trials_dqn
    max_parallel_trials = args.max_parallel_trials
    preprocess_workers = args.preprocess_workers
    enable_resource_monitor = args.monitor_resources
    # =============================
    # Setup Logging
    # =============================
    logging.basicConfig(level=logging.INFO,
                        format='%(asctime)s - %(levelname)s - %(message)s',
                        handlers=[
                            logging.FileHandler("LSTMDQN.log"),
                            logging.StreamHandler(sys.stdout)
                        ])
    # =============================
    # Resource Detection & Logging
    # =============================
    cpu_stats = get_cpu_info()
    gpu_stats = get_gpu_info()
    logging.info("===== Resource Statistics =====")
    logging.info(f"Physical CPU Cores: {cpu_stats['physical_cores']}")
    logging.info(f"Logical CPU Cores: {cpu_stats['logical_cores']}")
    logging.info(f"CPU Usage per Core: {cpu_stats['cpu_percent']}%")
    if gpu_stats:
        logging.info("GPU Statistics:")
        for gpu in gpu_stats:
            logging.info(f"GPU {gpu['id']} - {gpu['name']}: Load: {gpu['load']}%, "
                         f"Memory Used: {gpu['memory_used']}MB / {gpu['memory_total']}MB, "
                         f"Temperature: {gpu['temperature']}°C")
    else:
        logging.info("No GPUs detected.")
    logging.info("=================================")
    # =============================
    # Configure TensorFlow
    # =============================
    configure_tensorflow(cpu_stats, gpu_stats)
    # =============================
    # Start Resource Monitoring (Optional)
    # =============================
    if enable_resource_monitor:
        logging.info("Starting real-time resource monitoring...")
        resource_monitor_thread = threading.Thread(target=monitor_resources, args=(60,), daemon=True)
        resource_monitor_thread.start()
    ##########################################
    # A) LSTM PART: LOAD, PREPROCESS, TUNE
@@ -231,7 +407,15 @@ def main():
    target_column = 'Close'
    df = df[['Date'] + feature_columns + [target_column]].dropna()
-    from sklearn.preprocessing import MinMaxScaler
+    # 2) Controlled Parallel Data Preprocessing
    if preprocess_workers is None:
        # Default to logical cores minus 2 to prevent overloading
        preprocess_workers = max(1, cpu_stats['logical_cores'] - 2)
    else:
        preprocess_workers = min(preprocess_workers, cpu_stats['logical_cores'])
    df = feature_engineering_parallel(df, num_workers=preprocess_workers)
    scaler_features = MinMaxScaler()
    scaler_target   = MinMaxScaler()
@@ -241,7 +425,7 @@ def main():
    X_scaled = scaler_features.fit_transform(X_all)
    y_scaled = scaler_target.fit_transform(y_all).flatten()
-    # 2) Create sequences
+    # 3) Create sequences
    def create_sequences(features, target, window_size):
        X_seq, y_seq = [], []
        for i in range(len(features) - window_size):
@@ -251,7 +435,7 @@ def main():
    X, y = create_sequences(X_scaled, y_scaled, lstm_window_size)
-    # 3) Split into train/val/test
+    # 4) Split into train/val/test
    train_size = int(len(X) * 0.7)
    val_size   = int(len(X) * 0.15)
    test_size  = len(X) - train_size - val_size
@@ -267,24 +451,8 @@ def main():
    logging.info(f"Scaled validation target shape: {y_val.shape}")
    logging.info(f"Scaled testing target shape: {y_test.shape}")
-    # 4) GPU config
+    # 5) Build and compile LSTM model
    def configure_device():
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            try:
                for gpu in gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)
                logging.info(f"{len(gpus)} GPU(s) detected & configured.")
            except RuntimeError as e:
                logging.error(e)
        else:
            logging.info("No GPU detected, using CPU.")
    configure_device()
    # 5) Build LSTM function
    def build_lstm(input_shape, hyperparams):
        from tensorflow.keras.regularizers import l2
        model = Sequential()
        num_layers = hyperparams['num_lstm_layers']
        units      = hyperparams['lstm_units']
@@ -313,7 +481,6 @@ def main():
    # 6) Optuna objective for LSTM
    def lstm_objective(trial):
        import tensorflow as tf
        num_lstm_layers = trial.suggest_int('num_lstm_layers', 1, 3)
        lstm_units      = trial.suggest_categorical('lstm_units', [32, 64, 96, 128])
        dropout_rate    = trial.suggest_float('dropout_rate', 0.1, 0.5)
@@ -346,18 +513,25 @@ def main():
        val_mae = min(history.history['val_mae'])
        return val_mae
-    logging.info("Starting LSTM hyperparam optimization with Optuna...")
+    # 7) Hyperparameter Optimization with Optuna (Parallelized)
-    study_lstm= optuna.create_study(direction='minimize')
+    if max_parallel_trials is None:
-    study_lstm.optimize(lstm_objective, n_trials=n_trials_lstm)
+        # Default to logical cores minus 2 to prevent overloading
-    best_lstm_params = study_lstm.best_params
+        max_parallel_trials = max(1, cpu_stats['logical_cores'] - 2)
-    logging.info(f"Best LSTM Hyperparams: {best_lstm_params}")
+    else:
        max_parallel_trials = min(max_parallel_trials, cpu_stats['logical_cores'])
-    # 7) Train final LSTM
+    logging.info(f"Starting LSTM hyperparameter optimization with Optuna using {max_parallel_trials} parallel trials...")
    study_lstm = optuna.create_study(direction='minimize')
    study_lstm.optimize(lstm_objective, n_trials=n_trials_lstm, n_jobs=max_parallel_trials)
    best_lstm_params = study_lstm.best_params
    logging.info(f"Best LSTM Hyperparameters: {best_lstm_params}")
    # 8) Train final LSTM
    final_lstm = build_lstm((X_train.shape[1], X_train.shape[2]), best_lstm_params)
    early_stop_final = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    lr_reduce_final  = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
-    logging.info("Training best LSTM model with found hyperparams...")
+    logging.info("Training best LSTM model with optimized hyperparameters...")
    hist = final_lstm.fit(
        X_train, y_train,
        epochs=300,
@@ -367,9 +541,9 @@ def main():
        verbose=1
    )
-    # Evaluate LSTM
+    # 9) Evaluate LSTM
    def evaluate_lstm(model, X_test, y_test):
-        logging.info("Evaluating final LSTM...")
+        logging.info("Evaluating final LSTM model...")
        y_pred_scaled = model.predict(X_test).flatten()
        y_pred_scaled = np.clip(y_pred_scaled, 0, 1)
        y_pred = scaler_target.inverse_transform(y_pred_scaled.reshape(-1, 1)).flatten()
@@ -384,54 +558,53 @@ def main():
        direction_pred  = np.sign(np.diff(y_pred))
        directional_accuracy = np.mean(direction_actual == direction_pred)
-        logging.info(f"Test MSE: {mse_}")
+        logging.info(f"Test MSE: {mse_:.4f}")
-        logging.info(f"Test RMSE: {rmse_}")
+        logging.info(f"Test RMSE: {rmse_:.4f}")
-        logging.info(f"Test MAE: {mae_}")
+        logging.info(f"Test MAE: {mae_:.4f}")
-        logging.info(f"Test R2 Score: {r2_}")
+        logging.info(f"Test R2 Score: {r2_:.4f}")
-        logging.info(f"Directional Accuracy: {directional_accuracy}")
+        logging.info(f"Directional Accuracy: {directional_accuracy:.4f}")
-        # Plot
+        # Plot Actual vs Predicted
        plt.figure(figsize=(14, 7))
        plt.plot(y_test_actual, label='Actual Price')
        plt.plot(y_pred, label='Predicted Price')
-        plt.title('LSTM: Actual vs Predicted')
+        plt.title('LSTM: Actual vs Predicted Closing Prices')
        plt.legend()
        plt.grid(True)
        plt.savefig('lstm_actual_vs_pred.png')
        plt.close()
-        # Tabulate first 40
+        # Tabulate first 40 results
        table = []
        limit = min(40, len(y_test_actual))
        for i in range(limit):
            table.append([i, round(y_test_actual[i], 2), round(y_pred[i], 2)])
        headers = ["Index", "Actual Price", "Predicted Price"]
        print("\nFirst 40 Actual vs. Predicted Prices:")
        print(tabulate(table, headers=headers, tablefmt="pretty"))
        return r2_, directional_accuracy
    _r2, _diracc = evaluate_lstm(final_lstm, X_test, y_test)
-    # Save LSTM + scalers
+    # 10) Save LSTM and Scalers
    final_lstm.save('best_lstm_model.h5')
    joblib.dump(scaler_features, 'scaler_features.pkl')
    joblib.dump(scaler_target, 'scaler_target.pkl')
-    logging.info("Saved best LSTM model + scalers (best_lstm_model.h5, scaler_features.pkl, scaler_target.pkl).")
+    logging.info("Saved best LSTM model and scaler objects (best_lstm_model.h5, scaler_features.pkl, scaler_target.pkl).")
    ############################################################
    # B) DQN PART: BUILD ENV THAT USES THE LSTM + FORECAST
    ############################################################
    class StockTradingEnvWithLSTM(gym.Env):
        """
-        An environment that uses the LSTM model's predicted next day close
+        A custom OpenAI Gym environment for stock trading that integrates LSTM model predictions.
-        as part of the observation:
+        Observation includes technical indicators, account information, and predicted next close price.
          obs = [technical indicators, balance, shares, cost_basis, predicted_next_close].
        Reward => net_worth - initial_balance each step. 
        """
        metadata = {'render.modes': ['human']}
        def __init__(self, df, feature_columns, lstm_model, scaler_features, scaler_target,
                     window_size=15, initial_balance=10000, transaction_cost=0.001):
-            super().__init__()
+            super(StockTradingEnvWithLSTM, self).__init__()
            self.df = df.reset_index(drop=True)
            self.feature_columns = feature_columns
            self.lstm_model = lstm_model
@@ -449,13 +622,13 @@ def main():
            self.shares_held = 0
            self.cost_basis = 0
-            # raw array of features
+            # Raw array of features
            self.raw_features = df[feature_columns].values
-            # 0=Sell,1=Hold,2=Buy
+            # Action space: 0=Sell, 1=Hold, 2=Buy
            self.action_space = spaces.Discrete(3)
-            # observation dimension = len(feature_columns)+3 +1 => 17 + 3 +1=21
+            # Observation space: [technical indicators, balance, shares, cost_basis, predicted_next_close]
            self.observation_space = spaces.Box(
                low=0, high=1,
                shape=(len(feature_columns) + 3 + 1,),
@@ -475,16 +648,16 @@ def main():
            row_max = np.max(row) if np.max(row) != 0 else 1.0
            row_norm = row / row_max
-            # account info
+            # Account info
            additional = np.array([
                self.balance / self.initial_balance,
-                self.shares_held/100.0,
+                self.shares_held / 100.0,  # Assuming max 100 shares for normalization
                self.cost_basis / (self.initial_balance + 1e-9)
            ], dtype=np.float32)
            # LSTM prediction
            if self.current_step < self.window_size:
-                # not enough history => no forecast
+                # Not enough history => no forecast
                predicted_close = 0.0
            else:
                seq = self.raw_features[self.current_step - self.window_size: self.current_step]
@@ -493,7 +666,7 @@ def main():
                pred_scaled = self.lstm_model.predict(seq_scaled, verbose=0).flatten()[0]
                pred_scaled = np.clip(pred_scaled, 0, 1)
                unscaled = self.scaler_target.inverse_transform([[pred_scaled]])[0, 0]
-                # either keep raw or scale it. We'll do a naive scale by /1000 if typical price is double digits
+                # Normalize predicted close price (assuming a typical price range)
                predicted_close = unscaled / 1000.0
            obs = np.concatenate([row_norm, additional, [predicted_close]]).astype(np.float32)
@@ -540,17 +713,22 @@ def main():
                  f"Profit={profit:.2f}")
    ###################################
-    # C) DQN HYPERPARAM TUNING W/ LSTM
+    # C) DQN HYPERPARAMETER TUNING WITH LSTM
    ###################################
    # We'll define a function that trains a DQN with trial hyperparams,
    # then evaluates final net worth on one run.
    from stable_baselines3.common.evaluation import evaluate_policy
    # We'll define a small function to do final net worth check:
    def evaluate_dqn_networth(model, env, n_episodes=1):
-        # We do a simple loop that runs the entire dataset (1 episode) 
+        """
-        # to see final net worth.
+        Evaluates the trained DQN model by simulating trading over a specified number of episodes.
-        # If you want multiple episodes, you can do multiple resets in random start positions, etc.
+        
        Args:
            model (stable_baselines3.DQN): Trained DQN model.
            env (gym.Env): Trading environment instance.
            n_episodes (int): Number of episodes to run for evaluation.
        Returns:
            float: Average final net worth across episodes.
        """
        final_net_worths = []
        for _ in range(n_episodes):
            obs = env.reset()
@@ -561,31 +739,35 @@ def main():
            final_net_worths.append(env.net_worth)
        return np.mean(final_net_worths)
    # We'll define the DQN objective with Optuna
    def dqn_objective(trial):
-        # we sample some DQN hyperparams
+        """
        Objective function for Optuna to optimize DQN hyperparameters.
        Minimizes the negative of the final net worth achieved by the DQN agent.
        Args:
            trial (optuna.trial.Trial): Optuna trial object.
        Returns:
            float: Negative of the final net worth.
        """
        lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
        gamma = trial.suggest_float("gamma", 0.8, 0.9999)
        exploration_fraction = trial.suggest_float("exploration_fraction", 0.01, 0.3)
        buffer_size = trial.suggest_categorical("buffer_size", [5000, 10000, 20000])
        batch_size  = trial.suggest_categorical("batch_size", [32, 64, 128])
-        # Build environment fresh each time or reuse:
+        # Initialize environment
        # We'll reuse the same data environment but new instance
        env = StockTradingEnvWithLSTM(
            df=df,
            feature_columns=feature_columns,
-            lstm_model= final_lstm,   # use the best LSTM
+            lstm_model=final_lstm,   # Use the trained LSTM model
            scaler_features=scaler_features,
            scaler_target=scaler_target,
            window_size=lstm_window_size
        )
        vec_env = DummyVecEnv([lambda: env])
-        # Build DQN
+        # Initialize DQN model
        from stable_baselines3 import DQN
        from stable_baselines3.common.callbacks import BaseCallback
        dqn_action_logger = ActionLoggingCallback(verbose=0)
        model = DQN(
@@ -598,28 +780,34 @@ def main():
            buffer_size=buffer_size,
            batch_size=batch_size,
            train_freq=4,
-            target_update_interval=1000,
+            target_update_interval=1000
            # etc
        )
-        # Train some timesteps
+
        # Train DQN model
        model.learn(total_timesteps=dqn_total_timesteps, callback=dqn_action_logger)
        # Evaluate final net worth
        final_net_worth = evaluate_dqn_networth(model, env, n_episodes=dqn_eval_episodes)
-        # we want to maximize net worth => minimize negative net worth
+        # Objective is to maximize net worth, so return negative
        return -final_net_worth
-    logging.info("Starting DQN hyperparam tuning with Optuna (using LSTM environment)...")
+    # 11) Hyperparameter Optimization with Optuna (Parallelized)
    if max_parallel_trials is None:
        # Default to logical cores minus 2 to prevent overloading
        max_parallel_trials = max(1, cpu_stats['logical_cores'] - 2)
    else:
        max_parallel_trials = min(max_parallel_trials, cpu_stats['logical_cores'])
    logging.info(f"Starting DQN hyperparameter tuning with Optuna using {max_parallel_trials} parallel trials...")
    study_dqn = optuna.create_study(direction='minimize')
-    study_dqn.optimize(dqn_objective, n_trials=n_trials_dqn)
+    study_dqn.optimize(dqn_objective, n_trials=n_trials_dqn, n_jobs=max_parallel_trials)
    best_dqn_params = study_dqn.best_params
-    logging.info(f"Best DQN hyperparams: {best_dqn_params}")
+    logging.info(f"Best DQN Hyperparameters: {best_dqn_params}")
    ###################################
-    # D) TRAIN FINAL DQN WITH BEST PARAMS
+    # D) TRAIN FINAL DQN WITH BEST PARAMETERS
    ###################################
-    logging.info("Training final DQN with best hyperparams & LSTM environment...")
+    logging.info("Training final DQN model with best hyperparameters...")
    env_final = StockTradingEnvWithLSTM(
        df=df,
        feature_columns=feature_columns,
@@ -630,8 +818,8 @@ def main():
    )
    vec_env_final = DummyVecEnv([lambda: env_final])
-    # Build final model
+    final_dqn_logger = ActionLoggingCallback(verbose=1)  # Enable detailed logging
-    final_dqn_logger = ActionLoggingCallback(verbose=1)  # We'll see logs each rollout
+
    final_model = DQN(
        'MlpPolicy',
        vec_env_final,
@@ -643,15 +831,15 @@ def main():
        batch_size=best_dqn_params['batch_size'],
        train_freq=4,
        target_update_interval=1000
        # etc if you want other params
    )
    final_model.learn(total_timesteps=dqn_total_timesteps, callback=final_dqn_logger)
    final_model.save("best_dqn_model_lstm.zip")
    logging.info("Final DQN model trained and saved as 'best_dqn_model_lstm.zip'.")
    ###################################
    # E) FINAL INFERENCE & LOG RESULTS
    ###################################
-    logging.info("Running final inference with best DQN...")
+    logging.info("Running final inference with the trained DQN model...")
    env_test = StockTradingEnvWithLSTM(
        df=df,
@@ -711,9 +899,44 @@ def main():
    print(f"\n== Last 15 Steps ==")
    print(tabulate(rows, headers=headers, tablefmt="pretty"))
-    logging.info("All tasks complete. Exiting.")
+    logging.info("Final inference completed. Results logged and displayed.")
    ###################################
    # F) OPTIONAL: RETRY LOOP IF NET WORTH < THRESHOLD
    ###################################
    NET_WORTH_THRESHOLD = 10500.0  # example threshold
    if final_net_worth < NET_WORTH_THRESHOLD:
        logging.warning(f"Final net worth (${final_net_worth:.2f}) is below ${NET_WORTH_THRESHOLD:.2f}. Retraining the same DQN model to learn from mistakes...")
        # We continue training the SAME final_model without resetting its replay buffer.
        # By setting `reset_num_timesteps=False`, we keep the replay buffer and learned weights.
        additional_timesteps = 50000
        logging.info(f"Retraining the existing DQN model for an additional {additional_timesteps} timesteps (keeping old experiences).")
        # If you want to see action distributions again, you can keep the same callback or define a new one:
        final_model.learn(
            total_timesteps=additional_timesteps, 
            reset_num_timesteps=False,       # Keep replay buffer + internal step counter
            callback=final_dqn_logger        # Optional: to log actions again
        )
        # Evaluate again
        obs = env_test.reset()
        done = False
        second_total_reward = 0.0
        while not done:
            action, _ = final_model.predict(obs, deterministic=True)
            obs, reward, done, info = env_test.step(action)
            second_total_reward += reward
        second_net_worth = env_test.net_worth
        second_profit = second_net_worth - env_test.initial_balance
        logging.info(f"After additional training, new final net worth=${second_net_worth:.2f}, profit=${second_profit:.2f}")
        if second_net_worth < NET_WORTH_THRESHOLD:
            logging.warning("Even after continued training, net worth is still below threshold. Consider a deeper hyperparameter search or analyzing the environment settings.")
 if __name__=="__main__":
 main()