gwitt1Repo/HMM_Test/HMM_LSTM_DQN.py

# ===============================
# 2. IMPORTS & GLOBALS
# ===============================
import os
import sys
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import logging
from datetime import datetime
import pytz

# ---------------------------
# Force CPU usage:
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')
# ---------------------------

from sklearn.preprocessing import MinMaxScaler

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import Huber

from hmmlearn import hmm

import gym
from gym import spaces
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import DummyVecEnv

import joblib
import optuna
from optuna.integration import TFKerasPruningCallback

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

EASTERN = pytz.timezone("US/Eastern")


# ===============================
# 3. DATA LOADING & INDICATORS
# ===============================
def load_and_preprocess_data(csv_path):
    df = pd.read_csv(csv_path)
    df['Date'] = pd.to_datetime(df['Date'], utc=True)
    df.set_index('Date', inplace=True)
    df.index = df.index.tz_convert(EASTERN)
    df.sort_index(inplace=True)

    df['log_return'] = np.log(df['Close']).diff()
    df['rolling_vol'] = df['log_return'].rolling(window=30).std()

    # RSI
    delta = df['Close'].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    avg_gain = gain.rolling(14).mean()
    avg_loss = loss.rolling(14).mean()
    rs = avg_gain / avg_loss
    df['RSI'] = 100 - (100/(1 + rs))

    # Simple MAs
    df['SMA20'] = df['Close'].rolling(20).mean()
    df['EMA20'] = df['Close'].ewm(span=20, adjust=False).mean()

    # MACD
    ema12 = df['Close'].ewm(span=12, adjust=False).mean()
    ema26 = df['Close'].ewm(span=26, adjust=False).mean()
    df['MACD'] = ema12 - ema26

    # Bollinger
    sma20 = df['Close'].rolling(window=20).mean()
    std20 = df['Close'].rolling(window=20).std()
    df['BB_upper'] = sma20 + 2.0 * std20
    df['BB_lower'] = sma20 - 2.0 * std20

    # OBV
    df['OBV'] = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0).cumsum()

    # Time-of-day
    df['minute_of_day'] = df.index.hour * 60 + df.index.minute
    df['tod_sin'] = np.sin(2*np.pi*(df['minute_of_day'] - 570)/(955-570))
    df['tod_cos'] = np.cos(2*np.pi*(df['minute_of_day'] - 570)/(955-570))

    df.dropna(inplace=True)
    return df


# ===============================
# 4. ROLLING SLOPE & HMM
# ===============================
def add_rolling_slope(df, window=15):
    closes = df['Close'].values
    slopes = []
    for i in range(len(closes)):
        if i < window:
            slopes.append(np.nan)
        else:
            seg = closes[i-window:i]
            x_idx = np.arange(window)
            slope, intercept = np.polyfit(x_idx, seg, 1)
            slopes.append(slope)
    df['rolling_slope'] = slopes
    df.dropna(subset=['rolling_slope'], inplace=True)
    return df

def train_3state_hmm_on_slope(df):
    X = df[['rolling_slope']].values.reshape(-1,1)
    model = hmm.GaussianHMM(n_components=3, covariance_type='diag',
                            n_iter=1000, random_state=42)
    model.fit(X)
    hidden_states = model.predict(X)
    df['HMM_state'] = hidden_states

    # Sort by slope
    means = [X[hidden_states==s].mean() for s in range(3)]
    sort_idx = np.argsort(means)
    label_map = {}
    label_map[sort_idx[0]] = 'Bearish'
    label_map[sort_idx[1]] = 'Sideways'
    label_map[sort_idx[2]] = 'Bullish'
    df['HMM_label'] = df['HMM_state'].map(label_map)
    return model, df


# ===============================
# 5. LSTM DATASET CREATION
# ===============================
def create_lstm_sequences(features, target, window_size=15):
    X_seq, y_seq = [], []
    for i in range(len(features) - window_size):
        X_seq.append(features[i:i+window_size])
        y_seq.append(target[i+window_size])
    return np.array(X_seq), np.array(y_seq)


# ===============================
# 6. OPTUNA: CPU-FRIENDLY LSTM
# ===============================
def build_lstm_model_cpu(n_units1, n_units2, dropout1, dropout2, lr, input_shape):
    """
    2-layer LSTM with implementation=1 to ensure non-CuDNN code path.
    """
    model = Sequential()
    # 1st LSTM
    model.add(LSTM(
        units=n_units1,
        return_sequences=True,
        activation='tanh',
        recurrent_activation='sigmoid',
        implementation=1,  # CPU-friendly
        input_shape=input_shape
    ))
    model.add(Dropout(dropout1))

    # 2nd LSTM
    model.add(LSTM(
        units=n_units2,
        activation='tanh',
        recurrent_activation='sigmoid',
        implementation=1  # CPU-friendly
    ))
    model.add(Dropout(dropout2))

    model.add(Dense(1, activation='linear'))
    optimizer = Adam(learning_rate=lr)
    model.compile(optimizer=optimizer, loss=Huber(), metrics=['mae'])
    return model

def optimize_lstm(trial, X_train, y_train, X_val, y_val, input_shape):
    # Sample hyperparams
    n_units1 = trial.suggest_categorical("n_units1", [32, 64, 128])
    n_units2 = trial.suggest_categorical("n_units2", [16, 32, 64])
    dropout1 = trial.suggest_float("dropout1", 0.1, 0.5, step=0.1)
    dropout2 = trial.suggest_float("dropout2", 0.1, 0.5, step=0.1)
    lr       = trial.suggest_float("lr", 1e-4, 1e-2, log=True)

    model = build_lstm_model_cpu(n_units1, n_units2, dropout1, dropout2, lr, input_shape)

    es_cb = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
    history = model.fit(
        X_train, y_train,
        epochs=20,
        batch_size=16,
        validation_data=(X_val, y_val),
        callbacks=[es_cb, TFKerasPruningCallback(trial, 'val_loss')],
        verbose=0
    )
    val_loss = min(history.history['val_loss'])
    return val_loss

def train_lstm_with_optuna(X_train, y_train, X_val, y_val, input_shape, n_trials=5):
    def objective(trial):
        return optimize_lstm(trial, X_train, y_train, X_val, y_val, input_shape)

    study = optuna.create_study(direction='minimize')
    study.optimize(objective, n_trials=n_trials)
    best_params = study.best_params
    logging.info(f"Best Hyperparams from Optuna: {best_params}")

    # Rebuild final model
    model = build_lstm_model_cpu(
        best_params['n_units1'],
        best_params['n_units2'],
        best_params['dropout1'],
        best_params['dropout2'],
        best_params['lr'],
        input_shape
    )
    # Retrain
    es_cb = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=16,
        validation_data=(X_val,y_val),
        callbacks=[es_cb],
        verbose=1
    )
    return model, study


# ===============================
# 7. RL ENVIRONMENT (OPTIONS)
# ===============================
class IntradayOptionTradingEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, df, feature_cols, initial_balance=10000.0):
        super().__init__()
        self.df = df.reset_index()  # integer steps
        self.feature_cols = feature_cols
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.net_worth = initial_balance
        self.current_step = 0
        self.max_steps = len(self.df)

        self.option_contracts_held = 0
        self.cost_basis = 0.0

        self.action_space = spaces.Discrete(3)
        obs_len = len(feature_cols) + 3
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(obs_len,), dtype=np.float32
        )

    def reset(self):
        self.balance = self.initial_balance
        self.net_worth = self.initial_balance
        self.current_step = 0
        self.option_contracts_held = 0
        self.cost_basis = 0.0
        return self._next_observation()

    def _next_observation(self):
        row = self.df.loc[self.current_step]
        feats = row[self.feature_cols].values.astype(np.float32)
        bal_ratio = self.balance / self.initial_balance
        contracts = float(self.option_contracts_held)/10.0
        cost_ratio = self.cost_basis / self.initial_balance
        return np.concatenate([feats, [bal_ratio, contracts, cost_ratio]])

    def _get_option_price(self):
        return self.df.loc[self.current_step, 'Close']

    def _get_hmm_label(self):
        return self.df.loc[self.current_step, 'HMM_label']

    def step(self, action):
        price = self._get_option_price()
        hmm_label = self._get_hmm_label()

        # SELL
        if action == 0:
            if self.option_contracts_held > 0:
                proceeds = self.option_contracts_held * price
                self.balance += proceeds
                self.option_contracts_held = 0
                self.cost_basis = 0.0

        # BUY
        elif action == 2:
            if hmm_label != 'Bearish':
                possible = int(self.balance // price)
                if possible > 0:
                    cost = possible*price
                    self.balance -= cost
                    prev_held = self.option_contracts_held
                    self.option_contracts_held += possible
                    if self.option_contracts_held>0:
                        self.cost_basis = (
                            (self.cost_basis*prev_held) + cost
                        )/self.option_contracts_held

        # 1=Hold -> do nothing

        self.net_worth = self.balance + (self.option_contracts_held * price)
        self.current_step += 1
        done = (self.current_step >= self.max_steps-1)
        reward = self.net_worth - self.initial_balance
        if done:
            obs = np.zeros(self.observation_space.shape, dtype=np.float32)
        else:
            obs = self._next_observation()
        return obs, reward, done, {}

    def render(self, mode='human'):
        profit = self.net_worth - self.initial_balance
        print(f"Step {self.current_step}, Bal={self.balance}, Held={self.option_contracts_held}, NW={self.net_worth}, Profit={profit}")


def train_dqn_agent(df, feature_cols, timesteps=2000):
    env = IntradayOptionTradingEnv(df, feature_cols)
    vec_env = DummyVecEnv([lambda: env])
    model = DQN(
        'MlpPolicy',
        vec_env,
        verbose=1,
        learning_rate=1e-3,
        buffer_size=5000,
        learning_starts=1000,
        batch_size=32,
        gamma=0.99,
        train_freq=4,
        target_update_interval=500,
        exploration_fraction=0.1,
        exploration_final_eps=0.02,
        tensorboard_log="./dqn_tensorboard/"
    )
    model.learn(total_timesteps=timesteps)
    model.save("dqn_option_trading_model_cpu")
    return model


# ===============================
# 8. MAIN
# ===============================
def main():
    csv_path = "/content/drive/MyDrive/Data/GITHUB_5min_RTH_3years.csv"

    df = load_and_preprocess_data(csv_path)
    logging.info(f"Loaded data: shape={df.shape}")

    # Add slope & HMM
    df = add_rolling_slope(df, window=15)
    hmm_model, df = train_3state_hmm_on_slope(df)
    joblib.dump(hmm_model, "hmm_slope_model.pkl")

    df['HMM_label_num'] = df['HMM_label'].map({'Bearish':0, 'Sideways':1, 'Bullish':2}).astype(float)
    df.dropna(subset=['RSI','MACD','OBV','rolling_slope','HMM_label_num','Close'], inplace=True)

    # LSTM dataset
    lstm_feats = ['RSI','MACD','OBV','rolling_slope','HMM_label_num']
    X_all = df[lstm_feats].values
    y_all = df[['Close']].values

    scalerX = MinMaxScaler()
    scalerY = MinMaxScaler()
    X_scaled = scalerX.fit_transform(X_all)
    y_scaled = scalerY.fit_transform(y_all).flatten()

    window_size = 15
    X_seq, y_seq = create_lstm_sequences(X_scaled, y_scaled, window_size)
    n_total = len(X_seq)
    train_sz = int(n_total*0.7)
    val_sz   = int(n_total*0.15)

    X_train = X_seq[:train_sz]
    y_train = y_seq[:train_sz]
    X_val = X_seq[train_sz : train_sz+val_sz]
    y_val = y_seq[train_sz : train_sz+val_sz]
    X_test = X_seq[train_sz+val_sz:]
    y_test = y_seq[train_sz+val_sz:]

    logging.info(f"LSTM: train={X_train.shape}, val={X_val.shape}, test={X_test.shape}")
    input_shape = (X_train.shape[1], X_train.shape[2])

    # Optuna
    model, study = train_lstm_with_optuna(
        X_train, y_train,
        X_val, y_val,
        input_shape,
        n_trials=20
    )

    # Evaluate on test
    y_pred_scaled = model.predict(X_test).flatten()
    y_pred = scalerY.inverse_transform(y_pred_scaled.reshape(-1,1)).flatten()
    y_test_ = scalerY.inverse_transform(y_test.reshape(-1,1)).flatten()

    plt.figure(figsize=(12,5))
    plt.plot(y_test_, label='Actual')
    plt.plot(y_pred, label='Predicted')
    plt.title("CPU-Only LSTM w/ Optuna - Actual vs Predicted")
    plt.legend()
    plt.show()

    model.save("optuna_lstm_model_cpu.h5")
    joblib.dump(scalerX, "lstm_feat_scaler_cpu.gz")
    joblib.dump(scalerY, "lstm_tgt_scaler_cpu.gz")

    # DQN
    dqn_feats = ['RSI','MACD','OBV','rolling_slope','HMM_label_num']
    dqn_model = train_dqn_agent(df, dqn_feats, timesteps=2000)
    logging.info("All tasks complete.")


if __name__ == "__main__":
    main()