updating git again

2025-01-27 17:30:50 -05:00
parent 6cd015dda9
commit 1c92544eaa
8 changed files with 809 additions and 259 deletions
--- a/src/Machine-Learning/LSTM-python/actual_vs_predicted.png
+++ b/src/Machine-Learning/LSTM-python/actual_vs_predicted.png
--- a/src/Machine-Learning/LSTM-python/dqn_enhanced_tensorboard/DQN_1/events.out.tfevents.1738015994.kernelpanic.1233601.0
+++ b/src/Machine-Learning/LSTM-python/dqn_enhanced_tensorboard/DQN_1/events.out.tfevents.1738015994.kernelpanic.1233601.0
--- a/src/Machine-Learning/LSTM-python/improved_dqn_agent.zip
+++ b/src/Machine-Learning/LSTM-python/improved_dqn_agent.zip
--- a/src/Machine-Learning/LSTM-python/main.py
+++ b/src/Machine-Learning/LSTM-python/main.py
@@ -10,7 +10,6 @@ from tabulate import tabulate
 from sklearn.preprocessing import MinMaxScaler
 from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
 from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
 import tensorflow as tf
 from tensorflow.keras.models import Sequential
@@ -18,30 +17,32 @@ from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
 from tensorflow.keras.optimizers import Adam, Nadam
 from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
 from tensorflow.keras.losses import Huber
 from tensorflow.keras.regularizers import l2
 import xgboost as xgb
 import optuna
 from optuna.integration import KerasPruningCallback
-# Reinforcement Learning
+# For Reinforcement Learning
 import gym
 from gym import spaces
 from stable_baselines3 import DQN
 from stable_baselines3.common.vec_env import DummyVecEnv
 # Suppress TensorFlow warnings
-os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
+os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Suppress INFO/WARNING
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-##############################
+
-# 1. Data Loading & Indicators
+###############################
-##############################
+# 1. Data Loading / Indicators
 ###############################
 def load_data(file_path):
    logging.info(f"Loading data from: {file_path}")
    try:
-        data = pd.read_csv(file_path, parse_dates=['time'])
+        df = pd.read_csv(file_path, parse_dates=['time'])
    except FileNotFoundError:
        logging.error(f"File not found: {file_path}")
        sys.exit(1)
@@ -59,83 +60,71 @@ def load_data(file_path):
        'low': 'Low',
        'close': 'Close'
    }
-    data.rename(columns=rename_mapping, inplace=True)
+    df.rename(columns=rename_mapping, inplace=True)
-    # Sort by Date
+    logging.info(f"Data columns after renaming: {df.columns.tolist()}")
-    data.sort_values('Date', inplace=True)
+
-    data.reset_index(drop=True, inplace=True)
+    df.sort_values('Date', inplace=True)
-    logging.info(f"Data columns after renaming: {data.columns.tolist()}")
+    df.reset_index(drop=True, inplace=True)
    logging.info("Data loaded and sorted successfully.")
    return df
    return data
 def compute_rsi(series, window=14):
    delta = series.diff()
    gain = delta.where(delta > 0, 0).rolling(window=window).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
-    RS = gain / loss
+    RS = gain / (loss + 1e-9)
    return 100 - (100 / (1 + RS))
 def compute_macd(series, span_short=12, span_long=26, span_signal=9):
    ema_short = series.ewm(span=span_short, adjust=False).mean()
    ema_long = series.ewm(span=span_long, adjust=False).mean()
    macd_line = ema_short - ema_long
    signal_line = macd_line.ewm(span=span_signal, adjust=False).mean()
-    return macd_line - signal_line  # MACD histogram
+    return macd_line - signal_line  # histogram
 def compute_adx(df, window=14):
    """
    Example ADX calculation (pseudo-real):
    You can implement a full ADX formula if you’d like.
    Here, we do a slightly more robust approach than rolling std.
    """
    # True range
    df['H-L'] = df['High'] - df['Low']
    df['H-Cp'] = (df['High'] - df['Close'].shift(1)).abs()
    df['L-Cp'] = (df['Low'] - df['Close'].shift(1)).abs()
    tr = df[['H-L', 'H-Cp', 'L-Cp']].max(axis=1)
    tr_rolling = tr.rolling(window=window).mean()
    # Simplistic to replicate ADX-like effect
    adx_placeholder = tr_rolling / df['Close']
    df.drop(['H-L','H-Cp','L-Cp'], axis=1, inplace=True)
    return adx_placeholder
 def compute_obv(df):
    # On-Balance Volume
    signed_volume = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0)
    return signed_volume.cumsum()
 def compute_adx(df, window=14):
    """Pseudo-ADX approach using rolling True Range / Close."""
    df['H-L'] = df['High'] - df['Low']
    df['H-Cp'] = (df['High'] - df['Close'].shift(1)).abs()
    df['L-Cp'] = (df['Low'] - df['Close'].shift(1)).abs()
    tr = df[['H-L','H-Cp','L-Cp']].max(axis=1)
    tr_rolling = tr.rolling(window=window).mean()
    adx_placeholder = tr_rolling / (df['Close'] + 1e-9)
    df.drop(['H-L','H-Cp','L-Cp'], axis=1, inplace=True)
    return adx_placeholder
 def compute_bollinger_bands(series, window=20, num_std=2):
    """
    Bollinger Bands: middle=MA, upper=MA+2*std, lower=MA-2*std
    Return the band width or separate columns.
    """
    sma = series.rolling(window=window).mean()
    std = series.rolling(window=window).std()
    upper = sma + num_std * std
    lower = sma - num_std * std
-    bandwidth = (upper - lower) / sma  # optional metric
+    bandwidth = (upper - lower) / (sma + 1e-9)
    return upper, lower, bandwidth
 def compute_mfi(df, window=14):
    """
    Money Flow Index: uses typical price, volume, direction.
    For demonstration.
    """
    typical_price = (df['High'] + df['Low'] + df['Close']) / 3
    money_flow = typical_price * df['Volume']
-    # Positive or negative
+    prev_tp = typical_price.shift(1)
-    df_shift = typical_price.shift(1)
+    flow_pos = money_flow.where(typical_price > prev_tp, 0)
-    flow_positive = money_flow.where(typical_price > df_shift, 0)
+    flow_neg = money_flow.where(typical_price < prev_tp, 0)
-    flow_negative = money_flow.where(typical_price < df_shift, 0)
+    pos_sum = flow_pos.rolling(window=window).sum()
-    # Sum over window
+    neg_sum = flow_neg.rolling(window=window).sum()
-    pos_sum = flow_positive.rolling(window=window).sum()
+    mfi = 100 - (100 / (1 + pos_sum/(neg_sum+1e-9)))
    neg_sum = flow_negative.rolling(window=window).sum()
    # RSI-like formula
    mfi = 100 - (100 / (1 + pos_sum / (neg_sum + 1e-9)))
    return mfi
 def calculate_technical_indicators(df):
    logging.info("Calculating technical indicators...")
@@ -143,100 +132,97 @@ def calculate_technical_indicators(df):
    df['MACD'] = compute_macd(df['Close'])
    df['OBV'] = compute_obv(df)
    df['ADX'] = compute_adx(df)
-    
+
-    # Bollinger
+    upper_bb, lower_bb, bb_width = compute_bollinger_bands(df['Close'], window=20, num_std=2)
    upper_bb, lower_bb, bb_width = compute_bollinger_bands(df['Close'], window=20)
    df['BB_Upper'] = upper_bb
    df['BB_Lower'] = lower_bb
    df['BB_Width'] = bb_width
-    # MFI
+    df['MFI'] = compute_mfi(df, window=14)
    df['MFI'] = compute_mfi(df)
    # Simple/EMA
    df['SMA_5'] = df['Close'].rolling(window=5).mean()
    df['SMA_10'] = df['Close'].rolling(window=10).mean()
    df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
    df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
-    # STD
+
    df['STDDEV_5'] = df['Close'].rolling(window=5).std()
    df.dropna(inplace=True)
    logging.info("Technical indicators calculated successfully.")
    return df
-##############################
+
-# 2. Parse Arguments
+###############################
-##############################
+# 2. ARGUMENT PARSING
 ###############################
 def parse_arguments():
    parser = argparse.ArgumentParser(description='Train LSTM and DQN models for stock trading.')
-    parser.add_argument('csv_path', type=str, help='Path to the CSV data file.')
+    parser.add_argument('csv_path', type=str, help='Path to the CSV data file (with columns time,open,high,low,close,volume).')
    return parser.parse_args()
-##############################
+
-# 3. Main
+###############################
-##############################
+# 3. MAIN
 ###############################
 def main():
    # 1) Parse args
    args = parse_arguments()
    csv_path = args.csv_path
-    # 1) Load data
+    # 2) Load data & advanced indicators
    data = load_data(csv_path)
    data = calculate_technical_indicators(data)
-    # 2) Build feature set
+    # EXCLUDE 'Close' from feature inputs
    # We deliberately EXCLUDE 'Close' from the features so the model doesn't trivially see it.
    # Instead, rely on advanced indicators + OHLC + Volume.
    feature_columns = [
-        'Open', 'High', 'Low', 'Volume',
+        'SMA_5', 'SMA_10', 'EMA_5', 'EMA_10', 'STDDEV_5',
-        'RSI', 'MACD', 'OBV', 'ADX',
+        'RSI', 'MACD', 'ADX', 'OBV', 'Volume', 'Open', 'High', 'Low',
-        'BB_Upper', 'BB_Lower', 'BB_Width',
+        'BB_Upper','BB_Lower','BB_Width','MFI'
        'MFI', 'SMA_5', 'SMA_10', 'EMA_5', 'EMA_10', 'STDDEV_5'
    ]
-    target_column = 'Close'  # still used for label/evaluation
+    target_column = 'Close'
    data = data[['Date'] + feature_columns + [target_column]]
    data.dropna(inplace=True)
-    # Keep only these columns + Date + target
+    # 3) Scale
    data = data[['Date'] + feature_columns + [target_column]].dropna()
    # 3) Scale data
    from sklearn.preprocessing import MinMaxScaler
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()
-    scaled_features = scaler_features.fit_transform(data[feature_columns])
+    X_all = data[feature_columns].values
-    scaled_target = scaler_target.fit_transform(data[[target_column]]).flatten()
+    y_all = data[[target_column]].values
-    # 4) Create sequences for LSTM
+    X_scaled = scaler_features.fit_transform(X_all)
    y_scaled = scaler_target.fit_transform(y_all).flatten()
    # 4) Create LSTM Sequences
    def create_sequences(features, target, window_size=15):
-        X, y = [], []
+        X_seq, y_seq = [], []
        for i in range(len(features) - window_size):
-            X.append(features[i:i+window_size])
+            X_seq.append(features[i:i+window_size])
-            y.append(target[i+window_size])
+            y_seq.append(target[i+window_size])
-        return np.array(X), np.array(y)
+        return np.array(X_seq), np.array(y_seq)
    window_size = 15
-    X, y = create_sequences(scaled_features, scaled_target, window_size)
+    X, y = create_sequences(X_scaled, y_scaled, window_size)
    # 5) Train/Val/Test Split
-    train_size = int(len(X) * 0.7)
+    train_size = int(len(X)*0.7)
-    val_size = int(len(X) * 0.15)
+    val_size = int(len(X)*0.15)
    test_size = len(X) - train_size - val_size
-    X_train, X_val, X_test = (
+    X_train = X[:train_size]
-        X[:train_size],
+    y_train = y[:train_size]
-        X[train_size:train_size+val_size],
+    X_val   = X[train_size:train_size+val_size]
-        X[train_size+val_size:]
+    y_val   = y[train_size:train_size+val_size]
-    )
+    X_test  = X[train_size+val_size:]
-    y_train, y_val, y_test = (
+    y_test  = y[train_size+val_size:]
        y[:train_size],
        y[train_size:train_size+val_size],
        y[train_size+val_size:]
    )
-    logging.info(f"X_train: {X_train.shape}, X_val: {X_val.shape}, X_test: {X_test.shape}")
+    logging.info(f"Scaled training features shape: {X_train.shape}")
-    logging.info(f"y_train: {y_train.shape}, y_val: {y_val.shape}, y_test: {y_test.shape}")
+    logging.info(f"Scaled validation features shape: {X_val.shape}")
    logging.info(f"Scaled testing features shape: {X_test.shape}")
    logging.info(f"Scaled training target shape: {y_train.shape}")
    logging.info(f"Scaled validation target shape: {y_val.shape}")
    logging.info(f"Scaled testing target shape: {y_test.shape}")
-    # 6) Device Config
+    # 6) GPU/CPU Config
    def configure_device():
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
@@ -248,224 +234,277 @@ def main():
                logging.error(e)
        else:
            logging.info("No GPU detected, using CPU.")
    configure_device()
    # 7) Build LSTM
-    from tensorflow.keras.regularizers import l2
+    def build_advanced_lstm(input_shape, hyperparams):
    def build_lstm(input_shape, units=128, dropout=0.3, lr=1e-3):
        model = Sequential()
-        # Example: 2 stacked LSTM layers
+        for i in range(hyperparams['num_lstm_layers']):
-        model.add(Bidirectional(LSTM(units, return_sequences=True, kernel_regularizer=l2(1e-4)), input_shape=input_shape))
+            return_seqs = (i < hyperparams['num_lstm_layers'] - 1)
-        model.add(Dropout(dropout))
+            model.add(Bidirectional(
-        model.add(Bidirectional(LSTM(units, return_sequences=False, kernel_regularizer=l2(1e-4))))
+                LSTM(hyperparams['lstm_units'],
-        model.add(Dropout(dropout))
+                     return_sequences=return_seqs,
                     kernel_regularizer=tf.keras.regularizers.l2(0.001)
                ), input_shape=input_shape if i==0 else None))
            model.add(Dropout(hyperparams['dropout_rate']))
        model.add(Dense(1, activation='linear'))
-        optimizer = Adam(learning_rate=lr)
+        # Optimizer
-        model.compile(loss=Huber(), optimizer=optimizer, metrics=['mae'])
+        if hyperparams['optimizer'] == 'Adam':
            opt = Adam(learning_rate=hyperparams['learning_rate'], decay=hyperparams['decay'])
        elif hyperparams['optimizer'] == 'Nadam':
            opt = Nadam(learning_rate=hyperparams['learning_rate'])
        else:
            opt = Adam(learning_rate=hyperparams['learning_rate'])
        model.compile(optimizer=opt, loss=Huber(), metrics=['mae'])
        return model
-    # 8) Train LSTM (you can still do Optuna if you like, omitted here for brevity)
+    # 8) Optuna Tuning
-    model_lstm = build_lstm((X_train.shape[1], X_train.shape[2]), units=128, dropout=0.3, lr=1e-3)
+    def objective(trial):
        num_lstm_layers = trial.suggest_int('num_lstm_layers', 1, 3)
        lstm_units = trial.suggest_categorical('lstm_units', [32, 64, 96, 128])
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
        optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'Nadam'])
        decay = trial.suggest_float('decay', 0.0, 1e-4)
-    early_stop = EarlyStopping(patience=15, restore_best_weights=True)
+        hyperparams = {
-    reduce_lr = ReduceLROnPlateau(factor=0.5, patience=5, min_lr=1e-6)
+            'num_lstm_layers': num_lstm_layers,
            'lstm_units': lstm_units,
            'dropout_rate': dropout_rate,
            'learning_rate': learning_rate,
            'optimizer': optimizer_name,
            'decay': decay
        }
-    model_lstm.fit(
+        model_ = build_advanced_lstm((X_train.shape[1], X_train.shape[2]), hyperparams)
        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        lr_reduce  = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
        cb_prune = KerasPruningCallback(trial, 'val_loss')
        history = model_.fit(
            X_train, y_train,
            epochs=100,
            batch_size=16,
            validation_data=(X_val, y_val),
            callbacks=[early_stop, lr_reduce, cb_prune],
            verbose=0
        )
        val_mae = min(history.history['val_mae'])
        return val_mae
    logging.info("Starting hyperparameter optimization with Optuna...")
    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=50)  # might take a long time
    best_params = study.best_params
    logging.info(f"Best Hyperparameters from Optuna: {best_params}")
    # 9) Train Best LSTM
    best_model = build_advanced_lstm((X_train.shape[1], X_train.shape[2]), best_params)
    early_stop2 = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
    lr_reduce2  = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
    logging.info("Training the best LSTM model with optimized hyperparameters...")
    history = best_model.fit(
        X_train, y_train,
        epochs=300,
        batch_size=16,
        validation_data=(X_val, y_val),
-        epochs=100,
+        callbacks=[early_stop2, lr_reduce2],
        batch_size=32,
        callbacks=[early_stop, reduce_lr],
        verbose=1
    )
-    # 9) Evaluate
+    # 10) Evaluate
-    def evaluate_lstm(model, X_test, y_test):
+    def evaluate_model(model, X_test, y_test):
        logging.info("Evaluating model...")
        # Predict scaled
        y_pred_scaled = model.predict(X_test).flatten()
-        # If we forcibly clamp predictions to [0,1], do so, else skip:
+        y_pred_scaled = np.clip(y_pred_scaled, 0, 1)  # clamp if needed
-        y_pred_scaled = np.clip(y_pred_scaled, 0, 1)
+        # Inverse
        y_pred = scaler_target.inverse_transform(y_pred_scaled.reshape(-1,1)).flatten()
-        y_true = scaler_target.inverse_transform(y_test.reshape(-1,1)).flatten()
+        y_test_actual = scaler_target.inverse_transform(y_test.reshape(-1,1)).flatten()
-        mse = mean_squared_error(y_true, y_pred)
+        mse = mean_squared_error(y_test_actual, y_pred)
        rmse = np.sqrt(mse)
-        mae = mean_absolute_error(y_true, y_pred)
+        mae = mean_absolute_error(y_test_actual, y_pred)
-        r2 = r2_score(y_true, y_pred)
+        r2  = r2_score(y_test_actual, y_pred)
-        # Direction
+        # Directional accuracy
-        direction_true = np.sign(np.diff(y_true))
+        direction_actual = np.sign(np.diff(y_test_actual))
-        direction_pred = np.sign(np.diff(y_pred))
+        direction_pred   = np.sign(np.diff(y_pred))
-        directional_acc = np.mean(direction_true == direction_pred)
+        directional_accuracy = np.mean(direction_actual == direction_pred)
-        logging.info(f"LSTM Test -> MSE={mse:.4f}, RMSE={rmse:.4f}, MAE={mae:.4f}, R2={r2:.4f}, DirAcc={directional_acc:.4f}")
+        logging.info(f"Test MSE: {mse}")
        logging.info(f"Test RMSE: {rmse}")
        logging.info(f"Test MAE: {mae}")
        logging.info(f"Test R2 Score: {r2}")
        logging.info(f"Directional Accuracy: {directional_accuracy}")
-        # Quick Plot
+        # Plot
-        plt.figure(figsize=(12,6))
+        plt.figure(figsize=(14, 7))
-        plt.plot(y_true[:100], label='Actual')
+        plt.plot(y_test_actual, label='Actual Price')
-        plt.plot(y_pred[:100], label='Predicted')
+        plt.plot(y_pred, label='Predicted Price')
-        plt.title("LSTM: Actual vs Predicted (first 100 test points)")
+        plt.title('Actual vs Predicted Prices')
        plt.xlabel('Time Step')
        plt.ylabel('Price')
        plt.legend()
-        plt.savefig("lstm_actual_vs_pred.png")
+        plt.grid(True)
        plt.savefig('actual_vs_predicted.png')
        plt.close()
        logging.info("Actual vs Predicted plot saved as 'actual_vs_predicted.png'")
-    evaluate_lstm(model_lstm, X_test, y_test)
+        # Tabulate first 40 predictions (like old code)
        table_data = []
        for i in range(min(40, len(y_test_actual))):
            table_data.append([i, round(y_test_actual[i],2), round(y_pred[i],2)])
        headers = ["Index", "Actual Price", "Predicted Price"]
        print(tabulate(table_data, headers=headers, tablefmt="pretty"))
-    # Save
+        return mse, rmse, mae, r2, directional_accuracy
-    model_lstm.save("improved_lstm_model.keras")
+
    mse, rmse, mae, r2, directional_accuracy = evaluate_model(best_model, X_test, y_test)
    # 11) Save
    best_model.save('optimized_lstm_model.h5')
    import joblib
-    joblib.dump(scaler_features, "improved_scaler_features.pkl")
+    joblib.dump(scaler_features, 'scaler_features.save')
-    joblib.dump(scaler_target, "improved_scaler_target.pkl")
+    joblib.dump(scaler_target, 'scaler_target.save')
    logging.info("Model and scalers saved as 'optimized_lstm_model.h5', 'scaler_features.save', and 'scaler_target.save'.")
-    ##############################
+    #########################################
-    # 10) Reinforcement Learning
+    # 12) Reinforcement Learning Environment
-    ##############################
+    #########################################
    class StockTradingEnv(gym.Env):
        """
-        Improved RL Env that:
+        A simple stock trading environment for OpenAI Gym
         - excludes raw 'Close' from observation
         - includes transaction cost (optional)
         - uses step-based PnL as reward
        """
        metadata = {'render.modes': ['human']}
        def __init__(self, df, initial_balance=10000, transaction_cost=0.001):
            super().__init__()
-            self.df = df.reset_index(drop=True)
+        def __init__(self, df, initial_balance=10000):
            super().__init__()
            self.df = df.reset_index()
            self.initial_balance = initial_balance
            self.balance = initial_balance
            self.net_worth = initial_balance
            self.current_step = 0
            self.max_steps = len(df)
-
+            self.current_step = 0
            # Add transaction cost in decimal form (0.001 => 0.1%)
            self.transaction_cost = transaction_cost
            self.shares_held = 0
            self.cost_basis = 0
-            # Suppose we exclude 'Close' from features to remove direct see of final price
+            # We re-use feature_columns from above
-            self.obs_columns = [
+            # (Excluding 'Close' from the observation)
-                'Open', 'High', 'Low', 'Volume',
+            # Actions: 0=Sell, 1=Hold, 2=Buy
-                'RSI', 'MACD', 'OBV', 'ADX',
+            self.action_space = spaces.Discrete(3)
                'BB_Upper', 'BB_Lower', 'BB_Width',
                'MFI', 'SMA_5', 'SMA_10', 'EMA_5', 'EMA_10', 'STDDEV_5'
            ]
-            # We'll normalize features with the same scaler used for LSTM. If you want EXACT same scaling:
+            # Observations => advanced feature columns + 3 additional (balance, shares, cost_basis)
            # you can pass the same 'scaler_features' object into this environment.
            self.scaler = MinMaxScaler().fit(df[self.obs_columns])
            # Or load from a pkl if you prefer: joblib.load("improved_scaler_features.pkl")
            self.action_space = spaces.Discrete(3)  # 0=Sell, 1=Hold, 2=Buy
            self.observation_space = spaces.Box(
-                low=0.0, high=1.0,
+                low=0,
-                shape=(len(self.obs_columns) + 3,),  # + balance, shares, cost_basis
+                high=1,
                shape=(len(feature_columns) + 3,),
                dtype=np.float32
            )
        def reset(self):
            self.balance = self.initial_balance
            self.net_worth = self.initial_balance
            self.current_step = 0
            self.shares_held = 0
            self.cost_basis = 0
-            self.current_step = 0
+            return self._next_observation()
            return self._get_obs()
-        def step(self, action):
+        def _next_observation(self):
-            # Current row
+            # Use same approach as old code: we take the row from df
-            row = self.df.iloc[self.current_step]
+            obs = self.df.loc[self.current_step, feature_columns].values
-            current_price = row['Close']
+            # Simple normalization by max
-
+            obs = obs / np.max(obs) if np.max(obs)!=0 else obs
            prev_net_worth = self.net_worth
            if action == 2:  # Buy
                shares_bought = int(self.balance // current_price)
                if shares_bought > 0:
                    cost = shares_bought * current_price
                    fee = cost * self.transaction_cost
                    self.balance -= (cost + fee)
                    # Weighted average cost basis
                    prev_shares = self.shares_held
                    self.shares_held += shares_bought
                    self.cost_basis = (
                        (self.cost_basis * prev_shares) + (shares_bought * current_price)
                    ) / self.shares_held
            elif action == 0:  # Sell
                if self.shares_held > 0:
                    revenue = self.shares_held * current_price
                    fee = revenue * self.transaction_cost
                    self.balance += (revenue - fee)
                    self.shares_held = 0
                    self.cost_basis = 0
            # Recompute net worth
            self.net_worth = self.balance + self.shares_held * current_price
            self.current_step += 1
            done = (self.current_step >= self.max_steps - 1)
            # *Step-based* reward => daily PnL
            reward = self.net_worth - prev_net_worth
            obs = self._get_obs()
            return obs, reward, done, {}
        def _get_obs(self):
            row = self.df.iloc[self.current_step][self.obs_columns]
            # Scale
            scaled = self.scaler.transform([row])[0]
            additional = np.array([
                self.balance / self.initial_balance,
                self.shares_held / 100.0,
-                self.cost_basis / (self.initial_balance+1e-9)
+                self.cost_basis / self.initial_balance
-            ], dtype=np.float32)
+            ])
            return np.concatenate([obs, additional])
-            obs = np.concatenate([scaled, additional]).astype(np.float32)
+        def step(self, action):
-            return obs
+            current_price = self.df.loc[self.current_step, 'Close']
            if action == 2:  # Buy
                total_possible = self.balance // current_price
                shares_bought = total_possible
                if shares_bought > 0:
                    self.balance -= shares_bought * current_price
                    self.shares_held += shares_bought
                    self.cost_basis = (
                        (self.cost_basis * (self.shares_held - shares_bought)) +
                        (shares_bought * current_price)
                    ) / self.shares_held
            elif action == 0:  # Sell
                if self.shares_held > 0:
                    self.balance += self.shares_held * current_price
                    self.shares_held = 0
                    self.cost_basis = 0
            self.net_worth = self.balance + self.shares_held * current_price
            self.current_step += 1
            done = (self.current_step >= self.max_steps - 1)
            reward = self.net_worth - self.initial_balance
            obs = self._next_observation()
            return obs, reward, done, {}
        def render(self, mode='human'):
            profit = self.net_worth - self.initial_balance
-            print(f"Step: {self.current_step}, "
+            print(f"Step: {self.current_step}")
-                  f"Balance: {self.balance:.2f}, "
+            print(f"Balance: {self.balance}")
-                  f"Shares: {self.shares_held}, "
+            print(f"Shares held: {self.shares_held} (Cost Basis: {self.cost_basis})")
-                  f"NetWorth: {self.net_worth:.2f}, "
+            print(f"Net worth: {self.net_worth}")
-                  f"Profit: {profit:.2f}")
+            print(f"Profit: {profit}")
-    ##############################
+    def train_dqn_agent(env):
-    # 11) Train DQN
+        logging.info("Training DQN Agent...")
-    ##############################
+        try:
-    def train_dqn(env):
+            model = DQN(
-        logging.info("Training DQN agent with improved environment...")
+                'MlpPolicy',
-        model = DQN(
+                env,
-            'MlpPolicy',
+                verbose=1,
-            env,
+                learning_rate=1e-3,
-            verbose=1,
+                buffer_size=10000,
-            learning_rate=1e-3,
+                learning_starts=1000,
-            buffer_size=50000,
+                batch_size=64,
-            learning_starts=1000,
+                tau=1.0,
-            batch_size=64,
+                gamma=0.99,
-            tau=0.99,
+                train_freq=4,
-            gamma=0.99,
+                target_update_interval=1000,
-            train_freq=4,
+                exploration_fraction=0.1,
-            target_update_interval=1000,
+                exploration_final_eps=0.02,
-            exploration_fraction=0.1,
+                tensorboard_log="./dqn_stock_tensorboard/"
-            exploration_final_eps=0.02,
+            )
-            tensorboard_log="./dqn_enhanced_tensorboard/"
+            model.learn(total_timesteps=100000)
-        )
+            model.save("dqn_stock_trading")
-        model.learn(total_timesteps=50000)
+            logging.info("DQN Agent trained and saved as 'dqn_stock_trading.zip'.")
-        model.save("improved_dqn_agent")
+            return model
-        return model
+        except Exception as e:
            logging.error(f"Error training DQN Agent: {e}")
            sys.exit(1)
-    # Initialize environment with the same data
+    # Initialize RL environment
-    # *In a real scenario, you might feed a different dataset or do a train/test split
+    logging.info("Initializing and training DQN environment...")
-    # for the RL environment, too.
+    trading_env = StockTradingEnv(data)
-    rl_env = StockTradingEnv(data, initial_balance=10000, transaction_cost=0.001)
+    trading_env = DummyVecEnv([lambda: trading_env])
-    vec_env = DummyVecEnv([lambda: rl_env])
+
    # Train
    dqn_model = train_dqn_agent(trading_env)
    logging.info("All tasks complete. Exiting.")
    dqn_model = train_dqn(vec_env)
    logging.info("Finished DQN training. You can test with a script like 'use_dqn.py' or do an internal test here.")
 if __name__ == "__main__":
    main()
--- a/src/Machine-Learning/LSTM-python/optuna_lstm_model.h5
+++ b/src/Machine-Learning/LSTM-python/optuna_lstm_model.h5
--- a/src/Machine-Learning/LSTM-python/past_iterations/main.py.iteration3
+++ b/src/Machine-Learning/LSTM-python/past_iterations/main.py.iteration3
@@ -0,0 +1,511 @@
 import os
 import sys
 import argparse
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
 import logging
 from tabulate import tabulate
 from sklearn.preprocessing import MinMaxScaler
 from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
 from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
 import tensorflow as tf
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
 from tensorflow.keras.optimizers import Adam, Nadam
 from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
 from tensorflow.keras.losses import Huber
 from tensorflow.keras.regularizers import l2
 import xgboost as xgb
 import optuna
 from optuna.integration import KerasPruningCallback
 # Reinforcement Learning
 import gym
 from gym import spaces
 from stable_baselines3 import DQN
 from stable_baselines3.common.vec_env import DummyVecEnv
 # Suppress TensorFlow warnings
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 ##############################
 # 1. Data Loading & Indicators
 ##############################
 def load_data(file_path):
    logging.info(f"Loading data from: {file_path}")
    try:
        data = pd.read_csv(file_path, parse_dates=['time'])
    except FileNotFoundError:
        logging.error(f"File not found: {file_path}")
        sys.exit(1)
    except pd.errors.ParserError as e:
        logging.error(f"Error parsing CSV file: {e}")
        sys.exit(1)
    except Exception as e:
        logging.error(f"Unexpected error: {e}")
        sys.exit(1)
    rename_mapping = {
        'time': 'Date',
        'open': 'Open',
        'high': 'High',
        'low': 'Low',
        'close': 'Close'
    }
    data.rename(columns=rename_mapping, inplace=True)
    # Sort by Date
    data.sort_values('Date', inplace=True)
    data.reset_index(drop=True, inplace=True)
    logging.info(f"Data columns after renaming: {data.columns.tolist()}")
    logging.info("Data loaded and sorted successfully.")
    return data
 def compute_rsi(series, window=14):
    delta = series.diff()
    gain = delta.where(delta > 0, 0).rolling(window=window).mean()
    loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
    RS = gain / (loss + 1e-9)  # to avoid zero division
    return 100 - (100 / (1 + RS))
 def compute_macd(series, span_short=12, span_long=26, span_signal=9):
    ema_short = series.ewm(span=span_short, adjust=False).mean()
    ema_long = series.ewm(span=span_long, adjust=False).mean()
    macd_line = ema_short - ema_long
    signal_line = macd_line.ewm(span=span_signal, adjust=False).mean()
    return macd_line - signal_line  # MACD histogram
 def compute_adx(df, window=14):
    """
    Example ADX calculation (pseudo-real):
    You can implement a full ADX formula if you’d like.
    """
    df['H-L'] = df['High'] - df['Low']
    df['H-Cp'] = (df['High'] - df['Close'].shift(1)).abs()
    df['L-Cp'] = (df['Low'] - df['Close'].shift(1)).abs()
    tr = df[['H-L', 'H-Cp', 'L-Cp']].max(axis=1)
    tr_rolling = tr.rolling(window=window).mean()
    # Simplistic replication
    adx_placeholder = tr_rolling / (df['Close'] + 1e-9)
    df.drop(['H-L','H-Cp','L-Cp'], axis=1, inplace=True)
    return adx_placeholder
 def compute_obv(df):
    signed_volume = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0)
    return signed_volume.cumsum()
 def compute_bollinger_bands(series, window=20, num_std=2):
    sma = series.rolling(window=window).mean()
    std = series.rolling(window=window).std()
    upper = sma + num_std * std
    lower = sma - num_std * std
    bandwidth = (upper - lower) / (sma + 1e-9)
    return upper, lower, bandwidth
 def compute_mfi(df, window=14):
    typical_price = (df['High'] + df['Low'] + df['Close']) / 3
    money_flow = typical_price * df['Volume']
    prev_tp = typical_price.shift(1)
    flow_positive = money_flow.where(typical_price > prev_tp, 0)
    flow_negative = money_flow.where(typical_price < prev_tp, 0)
    pos_sum = flow_positive.rolling(window=window).sum()
    neg_sum = flow_negative.rolling(window=window).sum()
    mfi = 100 - (100 / (1 + pos_sum/(neg_sum+1e-9)))
    return mfi
 def calculate_technical_indicators(df):
    logging.info("Calculating technical indicators...")
    df['RSI'] = compute_rsi(df['Close'], window=14)
    df['MACD'] = compute_macd(df['Close'])
    df['OBV'] = compute_obv(df)
    df['ADX'] = compute_adx(df)
    # Bollinger
    up, low, bw = compute_bollinger_bands(df['Close'], window=20)
    df['BB_Upper'] = up
    df['BB_Lower'] = low
    df['BB_Width'] = bw
    # MFI
    df['MFI'] = compute_mfi(df)
    # Simple/EMA
    df['SMA_5'] = df['Close'].rolling(window=5).mean()
    df['SMA_10'] = df['Close'].rolling(window=10).mean()
    df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
    df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
    # STD
    df['STDDEV_5'] = df['Close'].rolling(window=5).std()
    df.dropna(inplace=True)
    logging.info("Technical indicators calculated successfully.")
    return df
 ##############################
 # 2. Parse Arguments
 ##############################
 def parse_arguments():
    parser = argparse.ArgumentParser(description='Train LSTM and DQN models for stock trading.')
    parser.add_argument('csv_path', type=str, help='Path to the CSV data file.')
    return parser.parse_args()
 ##############################
 # 3. MAIN
 ##############################
 def main():
    args = parse_arguments()
    csv_path = args.csv_path
    # 1) Load data
    data = load_data(csv_path)
    data = calculate_technical_indicators(data)
    # 2) Build feature set (EXCLUDING 'Close' from the features)
    feature_columns = [
        'Open', 'High', 'Low', 'Volume',
        'RSI', 'MACD', 'OBV', 'ADX',
        'BB_Upper', 'BB_Lower', 'BB_Width',
        'MFI', 'SMA_5', 'SMA_10', 'EMA_5', 'EMA_10', 'STDDEV_5'
    ]
    target_column = 'Close'  # used for label
    data = data[['Date'] + feature_columns + [target_column]].dropna()
    # 3) Scale data
    scaler_features = MinMaxScaler()
    scaler_target = MinMaxScaler()
    X_all = data[feature_columns].values
    y_all = data[[target_column]].values
    X_scaled = scaler_features.fit_transform(X_all)
    y_scaled = scaler_target.fit_transform(y_all).flatten()
    # 4) Create sequences for LSTM
    def create_sequences(features, target, window_size=15):
        X_seq, y_seq = [], []
        for i in range(len(features) - window_size):
            X_seq.append(features[i:i+window_size])
            y_seq.append(target[i+window_size])
        return np.array(X_seq), np.array(y_seq)
    window_size = 15
    X, y = create_sequences(X_scaled, y_scaled, window_size)
    # 5) Train/Val/Test split
    train_size = int(len(X)*0.7)
    val_size = int(len(X)*0.15)
    test_size = len(X) - train_size - val_size
    X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
    y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
    logging.info(f"X_train: {X_train.shape}, X_val: {X_val.shape}, X_test: {X_test.shape}")
    logging.info(f"y_train: {y_train.shape}, y_val: {y_val.shape}, y_test: {y_test.shape}")
    # 6) GPU/CPU
    def configure_device():
        gpus = tf.config.list_physical_devices('GPU')
        if gpus:
            try:
                for gpu in gpus:
                    tf.config.experimental.set_memory_growth(gpu, True)
                logging.info(f"{len(gpus)} GPU(s) detected and configured.")
            except RuntimeError as e:
                logging.error(e)
        else:
            logging.info("No GPU detected, using CPU.")
    configure_device()
    # 7) LSTM Builder
    def build_lstm(input_shape, hyperparams):
        model = Sequential()
        num_layers = hyperparams['num_lstm_layers']
        units = hyperparams['lstm_units']
        dropout_rate = hyperparams['dropout_rate']
        for i in range(num_layers):
            return_sequences = (i < num_layers-1)
            model.add(Bidirectional(LSTM(
                units=units,
                return_sequences=return_sequences,
                kernel_regularizer=tf.keras.regularizers.l2(1e-4),
            ), input_shape=input_shape if i==0 else None))
            model.add(Dropout(dropout_rate))
        # Final output
        model.add(Dense(1, activation='linear'))
        if hyperparams['optimizer'] == 'Adam':
            opt = Adam(learning_rate=hyperparams['learning_rate'])
        else:
            opt = Nadam(learning_rate=hyperparams['learning_rate'])
        model.compile(loss=Huber(), optimizer=opt, metrics=['mae'])
        return model
    # 8) Optuna Objective
    def objective(trial):
        # define hyperparam search space
        num_lstm_layers = trial.suggest_int('num_lstm_layers', 1, 3)
        lstm_units = trial.suggest_categorical('lstm_units', [32, 64, 128])
        dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
        learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
        optimizer_name = trial.suggest_categorical('optimizer', ['Adam', 'Nadam'])
        hyperparams = {
            'num_lstm_layers': num_lstm_layers,
            'lstm_units': lstm_units,
            'dropout_rate': dropout_rate,
            'learning_rate': learning_rate,
            'optimizer': optimizer_name
        }
        model_ = build_lstm((X_train.shape[1], X_train.shape[2]), hyperparams)
        early_stop = EarlyStopping(monitor='val_loss', patience=8, restore_best_weights=True)
        lr_reduce = ReduceLROnPlateau(monitor='val_loss', patience=5, factor=0.5, min_lr=1e-6)
        pruning_cb = KerasPruningCallback(trial, 'val_loss')
        history = model_.fit(
            X_train, y_train,
            validation_data=(X_val, y_val),
            epochs=50,
            batch_size=16,
            callbacks=[early_stop, lr_reduce, pruning_cb],
            verbose=0
        )
        val_mae = min(history.history['val_mae'])
        return val_mae
    logging.info("Starting hyperparameter optimization with Optuna...")
    study = optuna.create_study(direction='minimize')
    # Increase n_trials if you want a more thorough search (but it takes longer).
    study.optimize(objective, n_trials=20)  
    best_params = study.best_params
    logging.info(f"Best Hyperparameters: {best_params}")
    # 9) Build final model with best hyperparams + Train
    final_model = build_lstm((X_train.shape[1], X_train.shape[2]), best_params)
    early_stop_final = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
    reduce_lr_final = ReduceLROnPlateau(monitor='val_loss', patience=5, factor=0.5, min_lr=1e-6)
    logging.info("Training final model with best hyperparams (up to 300 epochs)...")
    final_model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=300,
        batch_size=16,
        callbacks=[early_stop_final, reduce_lr_final],
        verbose=1
    )
    # Evaluate
    def evaluate_lstm(model, X_te, y_te):
        logging.info("Evaluating final LSTM model on test set...")
        y_pred_scaled = model.predict(X_te).flatten()
        # Optionally clamp to [0,1]
        y_pred_scaled = np.clip(y_pred_scaled, 0, 1)
        y_pred = scaler_target.inverse_transform(y_pred_scaled.reshape(-1,1)).flatten()
        y_true = scaler_target.inverse_transform(y_te.reshape(-1,1)).flatten()
        mse_ = mean_squared_error(y_true, y_pred)
        rmse_ = np.sqrt(mse_)
        mae_ = mean_absolute_error(y_true, y_pred)
        r2_ = r2_score(y_true, y_pred)
        # Directional Accuracy
        dir_true = np.sign(np.diff(y_true))
        dir_pred = np.sign(np.diff(y_pred))
        dir_acc = np.mean(dir_true == dir_pred)
        logging.info(f"Test MSE={mse_:.4f}, RMSE={rmse_:.4f}, MAE={mae_:.4f}, R2={r2_:.4f}, DirAcc={dir_acc:.4f}")
        # Sample plot
        plt.figure(figsize=(10,6))
        plt.plot(y_true[:100], label='Actual')
        plt.plot(y_pred[:100], label='Predicted')
        plt.title("Actual vs. Predicted (first 100 test points)")
        plt.legend()
        plt.savefig("actual_vs_predicted.png")
        plt.close()
        logging.info("Saved plot as actual_vs_predicted.png")
        # Tabulate first 10 predictions
        table_data = []
        for i in range(min(10, len(y_pred))):
            table_data.append([i, round(y_true[i],2), round(y_pred[i],2)])
        headers = ["Index","Actual","Predicted"]
        print(tabulate(table_data, headers=headers, tablefmt="pretty"))
    evaluate_lstm(final_model, X_test, y_test)
    # Save model + scalers
    final_model.save("optuna_lstm_model.h5")
    import joblib
    joblib.dump(scaler_features, "scaler_features.pkl")
    joblib.dump(scaler_target, "scaler_target.pkl")
    logging.info("Saved final LSTM model + scalers.")
    ##############################
    # 10) Reinforcement Learning
    ##############################
    class StockTradingEnv(gym.Env):
        """
        RL Env that:
         - excludes 'Close' from observation
         - includes transaction cost
         - uses step-based PnL as reward
        """
        metadata = {'render.modes': ['human']}
        def __init__(self, df, initial_balance=10000, transaction_cost=0.001):
            super().__init__()
            self.df = df.reset_index(drop=True)
            self.initial_balance = initial_balance
            self.balance = initial_balance
            self.net_worth = initial_balance
            self.current_step = 0
            self.max_steps = len(df)
            self.transaction_cost = transaction_cost
            self.shares_held = 0
            self.cost_basis = 0
            # Same columns as LSTM features (excluding 'Close'):
            self.obs_columns = [
                'Open', 'High', 'Low', 'Volume',
                'RSI', 'MACD', 'OBV', 'ADX',
                'BB_Upper', 'BB_Lower', 'BB_Width',
                'MFI', 'SMA_5', 'SMA_10', 'EMA_5', 'EMA_10', 'STDDEV_5'
            ]
            # Use same scaler if you want consistent normalization
            self.scaler = MinMaxScaler().fit(df[self.obs_columns])
            self.action_space = spaces.Discrete(3)  # 0=Sell,1=Hold,2=Buy
            self.observation_space = spaces.Box(
                low=0.0, high=1.0,
                shape=(len(self.obs_columns)+3,),
                dtype=np.float32
            )
        def reset(self):
            self.balance = self.initial_balance
            self.net_worth = self.initial_balance
            self.shares_held = 0
            self.cost_basis = 0
            self.current_step = 0
            return self._get_obs()
        def step(self, action):
            row = self.df.iloc[self.current_step]
            current_price = row['Close']
            prev_net_worth = self.net_worth
            if action == 2:  # Buy
                shares_bought = int(self.balance // current_price)
                if shares_bought > 0:
                    cost = shares_bought * current_price
                    fee = cost * self.transaction_cost
                    self.balance -= (cost + fee)
                    prev_shares = self.shares_held
                    self.shares_held += shares_bought
                    self.cost_basis = (
                        (self.cost_basis * prev_shares) + (shares_bought * current_price)
                    ) / self.shares_held
            elif action == 0:  # Sell
                if self.shares_held > 0:
                    revenue = self.shares_held * current_price
                    fee = revenue * self.transaction_cost
                    self.balance += (revenue - fee)
                    self.shares_held = 0
                    self.cost_basis = 0
            # Recompute net worth
            self.net_worth = self.balance + self.shares_held * current_price
            self.current_step += 1
            done = (self.current_step >= self.max_steps - 1)
            # Step-based PnL
            reward = self.net_worth - prev_net_worth
            obs = self._get_obs()
            return obs, reward, done, {}
        def _get_obs(self):
            row = self.df.iloc[self.current_step][self.obs_columns]
            scaled_vals = self.scaler.transform([row])[0]
            additional = np.array([
                self.balance / self.initial_balance,
                self.shares_held / 100.0,
                self.cost_basis / (self.initial_balance+1e-9)
            ], dtype=np.float32)
            obs = np.concatenate([scaled_vals, additional]).astype(np.float32)
            return obs
        def render(self, mode='human'):
            profit = self.net_worth - self.initial_balance
            print(f"Step: {self.current_step}, "
                  f"Balance: {self.balance:.2f}, "
                  f"Shares: {self.shares_held}, "
                  f"NetWorth: {self.net_worth:.2f}, "
                  f"Profit: {profit:.2f}")
    # 11) Train DQN
    def train_dqn(env):
        logging.info("Training DQN agent with environment...")
        model = DQN(
            'MlpPolicy', env, verbose=1,
            learning_rate=1e-3,
            buffer_size=50000,
            learning_starts=1000,
            batch_size=64,
            tau=0.99,
            gamma=0.99,
            train_freq=4,
            target_update_interval=1000,
            exploration_fraction=0.1,
            exploration_final_eps=0.02,
            tensorboard_log="./dqn_enhanced_tensorboard/"
        )
        model.learn(total_timesteps=50000)
        model.save("improved_dqn_agent")
        return model
    rl_env = StockTradingEnv(data, initial_balance=10000, transaction_cost=0.001)
    vec_env = DummyVecEnv([lambda: rl_env])
    dqn_model = train_dqn(vec_env)
    logging.info("DQN training complete. Saved as 'improved_dqn_agent'. Done!")
 if __name__ == "__main__":
    main()
--- a/src/Machine-Learning/LSTM-python/scaler_features.pkl
+++ b/src/Machine-Learning/LSTM-python/scaler_features.pkl
--- a/src/Machine-Learning/LSTM-python/scaler_target.pkl
+++ b/src/Machine-Learning/LSTM-python/scaler_target.pkl