updating LSTMDQN.py to incorporate Optuna tuning into the DQN model training, and possible rerunning if the DQN sucks
This commit is contained in:
@@ -1 +1,2 @@
|
|||||||
|
venv/
|
||||||
.python-version
|
.python-version
|
||||||
|
|||||||
@@ -3,42 +3,44 @@ import sys
|
|||||||
import argparse
|
import argparse
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import seaborn as sns
|
|
||||||
import logging
|
import logging
|
||||||
from tabulate import tabulate
|
from tabulate import tabulate
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
from sklearn.preprocessing import MinMaxScaler
|
# TensorFlow / Keras
|
||||||
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
|
||||||
|
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow.keras.models import Sequential
|
from tensorflow.keras.models import Sequential, load_model
|
||||||
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
|
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional
|
||||||
from tensorflow.keras.optimizers import Adam, Nadam
|
|
||||||
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
|
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
|
||||||
from tensorflow.keras.losses import Huber
|
from tensorflow.keras.losses import Huber
|
||||||
from tensorflow.keras.regularizers import l2
|
from tensorflow.keras.regularizers import l2
|
||||||
|
from tensorflow.keras.optimizers import Adam, Nadam
|
||||||
|
|
||||||
import xgboost as xgb
|
# Sklearn
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
|
||||||
|
import joblib
|
||||||
|
|
||||||
|
# Optuna
|
||||||
import optuna
|
import optuna
|
||||||
from optuna.integration import KerasPruningCallback
|
from optuna.integration import KerasPruningCallback
|
||||||
|
|
||||||
# For Reinforcement Learning
|
# RL stuff
|
||||||
import gym
|
import gym
|
||||||
from gym import spaces
|
from gym import spaces
|
||||||
from stable_baselines3 import DQN
|
from stable_baselines3 import DQN
|
||||||
from stable_baselines3.common.vec_env import DummyVecEnv
|
from stable_baselines3.common.vec_env import DummyVecEnv
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
|
||||||
# Suppress TensorFlow warnings beyond errors
|
# Suppress TensorFlow logs beyond errors
|
||||||
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
|
||||||
|
|
||||||
# Configure logging
|
|
||||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||||
|
|
||||||
|
######################################################
|
||||||
###################################################
|
# 1. DATA LOADING & ADVANCED TECHNICAL INDICATORS
|
||||||
# 1. Data Loading / Advanced Technical Indicators
|
######################################################
|
||||||
###################################################
|
|
||||||
def load_data(file_path):
|
def load_data(file_path):
|
||||||
logging.info(f"Loading data from: {file_path}")
|
logging.info(f"Loading data from: {file_path}")
|
||||||
try:
|
try:
|
||||||
@@ -68,80 +70,70 @@ def load_data(file_path):
|
|||||||
logging.info("Data loaded and sorted successfully.")
|
logging.info("Data loaded and sorted successfully.")
|
||||||
return df
|
return df
|
||||||
|
|
||||||
|
|
||||||
def compute_rsi(series, window=14):
|
def compute_rsi(series, window=14):
|
||||||
delta = series.diff()
|
delta = series.diff()
|
||||||
gain = delta.where(delta > 0, 0).rolling(window=window).mean()
|
gain = delta.where(delta>0, 0).rolling(window=window).mean()
|
||||||
loss = -delta.where(delta < 0, 0).rolling(window=window).mean()
|
loss = -delta.where(delta<0, 0).rolling(window=window).mean()
|
||||||
RS = gain / (loss + 1e-9)
|
RS = gain / (loss+1e-9)
|
||||||
return 100 - (100 / (1 + RS))
|
return 100 - (100/(1+RS))
|
||||||
|
|
||||||
|
|
||||||
def compute_macd(series, span_short=12, span_long=26, span_signal=9):
|
def compute_macd(series, span_short=12, span_long=26, span_signal=9):
|
||||||
ema_short = series.ewm(span=span_short, adjust=False).mean()
|
ema_short = series.ewm(span=span_short, adjust=False).mean()
|
||||||
ema_long = series.ewm(span=span_long, adjust=False).mean()
|
ema_long = series.ewm(span=span_long, adjust=False).mean()
|
||||||
macd_line = ema_short - ema_long
|
macd_line = ema_short - ema_long
|
||||||
signal_line = macd_line.ewm(span=span_signal, adjust=False).mean()
|
signal_line = macd_line.ewm(span=span_signal, adjust=False).mean()
|
||||||
return macd_line - signal_line # histogram
|
return macd_line - signal_line # histogram
|
||||||
|
|
||||||
|
|
||||||
def compute_obv(df):
|
def compute_obv(df):
|
||||||
signed_volume = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0)
|
signed_volume = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0)
|
||||||
return signed_volume.cumsum()
|
return signed_volume.cumsum()
|
||||||
|
|
||||||
|
|
||||||
def compute_adx(df, window=14):
|
def compute_adx(df, window=14):
|
||||||
"""Pseudo-ADX approach using rolling True Range / Close."""
|
df['H-L'] = df['High'] - df['Low']
|
||||||
df['H-L'] = df['High'] - df['Low']
|
|
||||||
df['H-Cp'] = (df['High'] - df['Close'].shift(1)).abs()
|
df['H-Cp'] = (df['High'] - df['Close'].shift(1)).abs()
|
||||||
df['L-Cp'] = (df['Low'] - df['Close'].shift(1)).abs()
|
df['L-Cp'] = (df['Low'] - df['Close'].shift(1)).abs()
|
||||||
tr = df[['H-L','H-Cp','L-Cp']].max(axis=1)
|
tr = df[['H-L','H-Cp','L-Cp']].max(axis=1)
|
||||||
tr_rolling = tr.rolling(window=window).mean()
|
tr_rolling = tr.rolling(window=window).mean()
|
||||||
adx_placeholder = tr_rolling / (df['Close'] + 1e-9)
|
adx_placeholder = tr_rolling/(df['Close']+1e-9)
|
||||||
df.drop(['H-L','H-Cp','L-Cp'], axis=1, inplace=True)
|
df.drop(['H-L','H-Cp','L-Cp'], axis=1, inplace=True)
|
||||||
return adx_placeholder
|
return adx_placeholder
|
||||||
|
|
||||||
|
|
||||||
def compute_bollinger_bands(series, window=20, num_std=2):
|
def compute_bollinger_bands(series, window=20, num_std=2):
|
||||||
sma = series.rolling(window=window).mean()
|
sma = series.rolling(window=window).mean()
|
||||||
std = series.rolling(window=window).std()
|
std = series.rolling(window=window).std()
|
||||||
upper = sma + num_std * std
|
upper = sma + num_std*std
|
||||||
lower = sma - num_std * std
|
lower = sma - num_std*std
|
||||||
bandwidth = (upper - lower) / (sma + 1e-9)
|
bandwidth = (upper - lower)/(sma + 1e-9)
|
||||||
return upper, lower, bandwidth
|
return upper, lower, bandwidth
|
||||||
|
|
||||||
|
|
||||||
def compute_mfi(df, window=14):
|
def compute_mfi(df, window=14):
|
||||||
typical_price = (df['High'] + df['Low'] + df['Close']) / 3
|
typical_price = (df['High']+ df['Low']+ df['Close'])/3
|
||||||
money_flow = typical_price * df['Volume']
|
money_flow = typical_price* df['Volume']
|
||||||
prev_tp = typical_price.shift(1)
|
prev_tp = typical_price.shift(1)
|
||||||
flow_pos = money_flow.where(typical_price > prev_tp, 0)
|
flow_pos = money_flow.where(typical_price>prev_tp, 0)
|
||||||
flow_neg = money_flow.where(typical_price < prev_tp, 0)
|
flow_neg = money_flow.where(typical_price<prev_tp, 0)
|
||||||
pos_sum = flow_pos.rolling(window=window).sum()
|
pos_sum = flow_pos.rolling(window=window).sum()
|
||||||
neg_sum = flow_neg.rolling(window=window).sum()
|
neg_sum = flow_neg.rolling(window=window).sum()
|
||||||
mfi = 100 - (100 / (1 + pos_sum/(neg_sum+1e-9)))
|
mfi= 100-(100/(1+ pos_sum/(neg_sum+1e-9)))
|
||||||
return mfi
|
return mfi
|
||||||
|
|
||||||
|
|
||||||
def calculate_technical_indicators(df):
|
def calculate_technical_indicators(df):
|
||||||
logging.info("Calculating technical indicators...")
|
logging.info("Calculating technical indicators...")
|
||||||
|
|
||||||
df['RSI'] = compute_rsi(df['Close'], 14)
|
df['RSI'] = compute_rsi(df['Close'], 14)
|
||||||
df['MACD'] = compute_macd(df['Close'])
|
df['MACD']= compute_macd(df['Close'])
|
||||||
df['OBV'] = compute_obv(df)
|
df['OBV'] = compute_obv(df)
|
||||||
df['ADX'] = compute_adx(df)
|
df['ADX'] = compute_adx(df)
|
||||||
|
|
||||||
up, low, bw = compute_bollinger_bands(df['Close'], 20, 2)
|
up, lo, bw = compute_bollinger_bands(df['Close'], 20, 2)
|
||||||
df['BB_Upper'] = up
|
df['BB_Upper']= up
|
||||||
df['BB_Lower'] = low
|
df['BB_Lower']= lo
|
||||||
df['BB_Width'] = bw
|
df['BB_Width']= bw
|
||||||
|
|
||||||
df['MFI'] = compute_mfi(df, 14)
|
|
||||||
|
|
||||||
|
df['MFI'] = compute_mfi(df,14)
|
||||||
df['SMA_5'] = df['Close'].rolling(5).mean()
|
df['SMA_5'] = df['Close'].rolling(5).mean()
|
||||||
df['SMA_10'] = df['Close'].rolling(10).mean()
|
df['SMA_10']= df['Close'].rolling(10).mean()
|
||||||
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
|
df['EMA_5'] = df['Close'].ewm(span=5, adjust=False).mean()
|
||||||
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
|
df['EMA_10']= df['Close'].ewm(span=10, adjust=False).mean()
|
||||||
df['STDDEV_5'] = df['Close'].rolling(5).std()
|
df['STDDEV_5'] = df['Close'].rolling(5).std()
|
||||||
|
|
||||||
df.dropna(inplace=True)
|
df.dropna(inplace=True)
|
||||||
@@ -150,67 +142,123 @@ def calculate_technical_indicators(df):
|
|||||||
|
|
||||||
|
|
||||||
###############################
|
###############################
|
||||||
# 2. ARGUMENT PARSING
|
# 2. ARG PARSING
|
||||||
###############################
|
###############################
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
parser = argparse.ArgumentParser(description='Train LSTM and DQN models for stock trading.')
|
parser = argparse.ArgumentParser(description='All-in-One: LSTM + DQN (with LSTM predictions) + Tuning.')
|
||||||
parser.add_argument('csv_path', type=str, help='Path to the CSV data file (with columns time,open,high,low,close,volume).')
|
parser.add_argument('csv_path', type=str,
|
||||||
parser.add_argument('--do_dqn_inference', action='store_true',
|
help='Path to CSV data with columns [time, open, high, low, close, volume].')
|
||||||
help='If set, will run the DQN inference after training the agent.')
|
parser.add_argument('--lstm_window_size', type=int, default=15,
|
||||||
|
help='Sequence window size for LSTM. Default=15.')
|
||||||
|
parser.add_argument('--dqn_total_timesteps', type=int, default=50000,
|
||||||
|
help='Total timesteps to train each DQN candidate. Default=50000.')
|
||||||
|
parser.add_argument('--dqn_eval_episodes', type=int, default=1,
|
||||||
|
help='Number of episodes to evaluate DQN in the tuning step. Default=1 (entire dataset once).')
|
||||||
|
parser.add_argument('--n_trials_lstm', type=int, default=30,
|
||||||
|
help='Number of Optuna trials for LSTM. Default=30.')
|
||||||
|
parser.add_argument('--n_trials_dqn', type=int, default=20,
|
||||||
|
help='Number of Optuna trials for DQN. Default=20.')
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
###############################
|
###############################
|
||||||
# 3. MAIN
|
# 3. CUSTOM DQN CALLBACK: LOG ACTIONS + REWARDS
|
||||||
|
###############################
|
||||||
|
class ActionLoggingCallback(BaseCallback):
|
||||||
|
"""
|
||||||
|
Logs distribution of actions and average reward after each rollout.
|
||||||
|
For off-policy (DQN), "rollout" can be a bit different than on-policy,
|
||||||
|
but stable-baselines3 still calls `_on_rollout_end` periodically.
|
||||||
|
"""
|
||||||
|
def __init__(self, verbose=0):
|
||||||
|
super().__init__(verbose)
|
||||||
|
self.action_buffer = []
|
||||||
|
self.reward_buffer = []
|
||||||
|
|
||||||
|
def _on_training_start(self):
|
||||||
|
self.action_buffer = []
|
||||||
|
self.reward_buffer = []
|
||||||
|
|
||||||
|
def _on_step(self):
|
||||||
|
action = self.locals.get('action', None)
|
||||||
|
reward = self.locals.get('reward', None)
|
||||||
|
if action is not None:
|
||||||
|
self.action_buffer.append(action)
|
||||||
|
if reward is not None:
|
||||||
|
self.reward_buffer.append(reward)
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _on_rollout_end(self):
|
||||||
|
import numpy as np
|
||||||
|
actions = np.array(self.action_buffer)
|
||||||
|
rewards = np.array(self.reward_buffer)
|
||||||
|
if len(actions)>0:
|
||||||
|
unique, counts = np.unique(actions, return_counts=True)
|
||||||
|
total = len(actions)
|
||||||
|
distr_str = []
|
||||||
|
for act,c in zip(unique, counts):
|
||||||
|
distr_str.append(f"Action {act}: {c} times ({100*c/total:.2f}%)")
|
||||||
|
logging.info(" -- DQN Rollout End -- ")
|
||||||
|
logging.info(" " + ", ".join(distr_str))
|
||||||
|
logging.info(f" Avg Reward this rollout: {rewards.mean():.4f} (min={rewards.min():.4f}, max={rewards.max():.4f})")
|
||||||
|
self.action_buffer = []
|
||||||
|
self.reward_buffer = []
|
||||||
|
|
||||||
|
###############################
|
||||||
|
# 4. MAIN
|
||||||
###############################
|
###############################
|
||||||
def main():
|
def main():
|
||||||
# Parse command-line arguments
|
|
||||||
args = parse_arguments()
|
args = parse_arguments()
|
||||||
csv_path = args.csv_path
|
csv_path = args.csv_path
|
||||||
do_dqn_inference = args.do_dqn_inference
|
lstm_window_size = args.lstm_window_size
|
||||||
|
dqn_total_timesteps = args.dqn_total_timesteps
|
||||||
|
dqn_eval_episodes = args.dqn_eval_episodes
|
||||||
|
n_trials_lstm = args.n_trials_lstm
|
||||||
|
n_trials_dqn = args.n_trials_dqn
|
||||||
|
|
||||||
# 1) Load Data
|
##########################################
|
||||||
data = load_data(csv_path)
|
# A) LSTM PART: LOAD, PREPROCESS, TUNE
|
||||||
data = calculate_technical_indicators(data)
|
##########################################
|
||||||
|
# 1) LOAD & preprocess
|
||||||
|
df = load_data(csv_path)
|
||||||
|
df = calculate_technical_indicators(df)
|
||||||
|
|
||||||
# We'll exclude 'Close' from the feature set
|
|
||||||
feature_columns = [
|
feature_columns = [
|
||||||
'SMA_5','SMA_10','EMA_5','EMA_10','STDDEV_5',
|
'SMA_5','SMA_10','EMA_5','EMA_10','STDDEV_5',
|
||||||
'RSI','MACD','ADX','OBV','Volume','Open','High','Low',
|
'RSI','MACD','ADX','OBV','Volume','Open','High','Low',
|
||||||
'BB_Upper','BB_Lower','BB_Width','MFI'
|
'BB_Upper','BB_Lower','BB_Width','MFI'
|
||||||
]
|
]
|
||||||
target_column = 'Close'
|
target_column = 'Close'
|
||||||
data = data[['Date'] + feature_columns + [target_column]]
|
df = df[['Date']+ feature_columns+[target_column]].dropna()
|
||||||
data.dropna(inplace=True)
|
|
||||||
|
|
||||||
# 2) Scaling
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
scaler_features = MinMaxScaler()
|
scaler_features = MinMaxScaler()
|
||||||
scaler_target = MinMaxScaler()
|
scaler_target = MinMaxScaler()
|
||||||
|
|
||||||
X_all = data[feature_columns].values
|
X_all = df[feature_columns].values
|
||||||
y_all = data[[target_column]].values
|
y_all = df[[target_column]].values
|
||||||
|
|
||||||
X_scaled = scaler_features.fit_transform(X_all)
|
X_scaled = scaler_features.fit_transform(X_all)
|
||||||
y_scaled = scaler_target.fit_transform(y_all).flatten()
|
y_scaled = scaler_target.fit_transform(y_all).flatten()
|
||||||
|
|
||||||
# 3) Create LSTM sequences
|
# 2) Create sequences
|
||||||
def create_sequences(features, target, window_size=15):
|
def create_sequences(features, target, window_size):
|
||||||
X_seq, y_seq = [], []
|
X_seq, y_seq = [], []
|
||||||
for i in range(len(features)-window_size):
|
for i in range(len(features) - window_size):
|
||||||
X_seq.append(features[i:i+window_size])
|
X_seq.append(features[i:i+window_size])
|
||||||
y_seq.append(target[i+window_size])
|
y_seq.append(target[i+window_size])
|
||||||
return np.array(X_seq), np.array(y_seq)
|
return np.array(X_seq), np.array(y_seq)
|
||||||
|
|
||||||
window_size = 15
|
X, y = create_sequences(X_scaled, y_scaled, lstm_window_size)
|
||||||
X, y = create_sequences(X_scaled, y_scaled, window_size)
|
|
||||||
|
|
||||||
# 4) Train/Val/Test Split
|
# 3) Split into train/val/test
|
||||||
train_size = int(len(X)*0.7)
|
train_size = int(len(X)*0.7)
|
||||||
val_size = int(len(X)*0.15)
|
val_size = int(len(X)*0.15)
|
||||||
test_size = len(X)-train_size-val_size
|
test_size = len(X)- train_size- val_size
|
||||||
|
|
||||||
X_train, X_val, X_test = X[:train_size], X[train_size:train_size+val_size], X[train_size+val_size:]
|
X_train, y_train = X[:train_size], y[:train_size]
|
||||||
y_train, y_val, y_test = y[:train_size], y[train_size:train_size+val_size], y[train_size+val_size:]
|
X_val, y_val = X[train_size: train_size+ val_size], y[train_size: train_size+ val_size]
|
||||||
|
X_test, y_test = X[train_size+ val_size:], y[train_size+ val_size:]
|
||||||
|
|
||||||
logging.info(f"Scaled training features shape: {X_train.shape}")
|
logging.info(f"Scaled training features shape: {X_train.shape}")
|
||||||
logging.info(f"Scaled validation features shape: {X_val.shape}")
|
logging.info(f"Scaled validation features shape: {X_val.shape}")
|
||||||
@@ -219,14 +267,14 @@ def main():
|
|||||||
logging.info(f"Scaled validation target shape: {y_val.shape}")
|
logging.info(f"Scaled validation target shape: {y_val.shape}")
|
||||||
logging.info(f"Scaled testing target shape: {y_test.shape}")
|
logging.info(f"Scaled testing target shape: {y_test.shape}")
|
||||||
|
|
||||||
# 5) GPU or CPU
|
# 4) GPU config
|
||||||
def configure_device():
|
def configure_device():
|
||||||
gpus = tf.config.list_physical_devices('GPU')
|
gpus = tf.config.list_physical_devices('GPU')
|
||||||
if gpus:
|
if gpus:
|
||||||
try:
|
try:
|
||||||
for gpu in gpus:
|
for gpu in gpus:
|
||||||
tf.config.experimental.set_memory_growth(gpu, True)
|
tf.config.experimental.set_memory_growth(gpu, True)
|
||||||
logging.info(f"{len(gpus)} GPU(s) detected and configured.")
|
logging.info(f"{len(gpus)} GPU(s) detected & configured.")
|
||||||
except RuntimeError as e:
|
except RuntimeError as e:
|
||||||
logging.error(e)
|
logging.error(e)
|
||||||
else:
|
else:
|
||||||
@@ -234,39 +282,44 @@ def main():
|
|||||||
|
|
||||||
configure_device()
|
configure_device()
|
||||||
|
|
||||||
# 6) Build LSTM
|
# 5) Build LSTM function
|
||||||
def build_advanced_lstm(input_shape, hyperparams):
|
def build_lstm(input_shape, hyperparams):
|
||||||
|
from tensorflow.keras.regularizers import l2
|
||||||
model = Sequential()
|
model = Sequential()
|
||||||
for i in range(hyperparams['num_lstm_layers']):
|
num_layers = hyperparams['num_lstm_layers']
|
||||||
return_seqs = (i < hyperparams['num_lstm_layers'] - 1)
|
units = hyperparams['lstm_units']
|
||||||
|
drop = hyperparams['dropout_rate']
|
||||||
|
for i in range(num_layers):
|
||||||
|
return_seqs = (i< num_layers-1)
|
||||||
model.add(Bidirectional(
|
model.add(Bidirectional(
|
||||||
LSTM(hyperparams['lstm_units'],
|
LSTM(units, return_sequences=return_seqs, kernel_regularizer=l2(1e-4)),
|
||||||
return_sequences=return_seqs,
|
|
||||||
kernel_regularizer=tf.keras.regularizers.l2(0.001)),
|
|
||||||
input_shape=input_shape if i==0 else None
|
input_shape=input_shape if i==0 else None
|
||||||
))
|
))
|
||||||
model.add(Dropout(hyperparams['dropout_rate']))
|
model.add(Dropout(drop))
|
||||||
|
|
||||||
model.add(Dense(1, activation='linear'))
|
model.add(Dense(1, activation='linear'))
|
||||||
|
|
||||||
if hyperparams['optimizer'] == 'Adam':
|
opt_name= hyperparams['optimizer']
|
||||||
opt = Adam(learning_rate=hyperparams['learning_rate'], decay=hyperparams['decay'])
|
lr = hyperparams['learning_rate']
|
||||||
elif hyperparams['optimizer'] == 'Nadam':
|
decay = hyperparams['decay']
|
||||||
opt = Nadam(learning_rate=hyperparams['learning_rate'])
|
if opt_name=='Adam':
|
||||||
|
opt= Adam(learning_rate=lr, decay=decay)
|
||||||
|
elif opt_name=='Nadam':
|
||||||
|
opt= Nadam(learning_rate=lr)
|
||||||
else:
|
else:
|
||||||
opt = Adam(learning_rate=hyperparams['learning_rate'])
|
opt= Adam(learning_rate=lr)
|
||||||
|
|
||||||
model.compile(optimizer=opt, loss=Huber(), metrics=['mae'])
|
model.compile(loss=Huber(), optimizer=opt, metrics=['mae'])
|
||||||
return model
|
return model
|
||||||
|
|
||||||
# 7) Optuna Tuning
|
# 6) Optuna objective for LSTM
|
||||||
def objective(trial):
|
def lstm_objective(trial):
|
||||||
num_lstm_layers = trial.suggest_int('num_lstm_layers', 1, 3)
|
import tensorflow as tf
|
||||||
lstm_units = trial.suggest_categorical('lstm_units', [32,64,96,128])
|
num_lstm_layers = trial.suggest_int('num_lstm_layers',1,3)
|
||||||
dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)
|
lstm_units = trial.suggest_categorical('lstm_units',[32,64,96,128])
|
||||||
learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
|
dropout_rate = trial.suggest_float('dropout_rate',0.1,0.5)
|
||||||
optimizer_name = trial.suggest_categorical('optimizer', ['Adam','Nadam'])
|
learning_rate = trial.suggest_loguniform('learning_rate',1e-5,1e-2)
|
||||||
decay = trial.suggest_float('decay', 0.0, 1e-4)
|
optimizer_name = trial.suggest_categorical('optimizer',['Adam','Nadam'])
|
||||||
|
decay = trial.suggest_float('decay',0.0,1e-4)
|
||||||
|
|
||||||
hyperparams = {
|
hyperparams = {
|
||||||
'num_lstm_layers': num_lstm_layers,
|
'num_lstm_layers': num_lstm_layers,
|
||||||
@@ -277,295 +330,390 @@ def main():
|
|||||||
'decay': decay
|
'decay': decay
|
||||||
}
|
}
|
||||||
|
|
||||||
model_ = build_advanced_lstm((X_train.shape[1], X_train.shape[2]), hyperparams)
|
model_ = build_lstm((X_train.shape[1], X_train.shape[2]), hyperparams)
|
||||||
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
|
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
|
||||||
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
|
lr_reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
|
||||||
cb_prune = KerasPruningCallback(trial, 'val_loss')
|
cb_prune = KerasPruningCallback(trial, 'val_loss')
|
||||||
|
|
||||||
history = model_.fit(
|
history= model_.fit(
|
||||||
X_train, y_train,
|
X_train, y_train,
|
||||||
epochs=100,
|
epochs=100,
|
||||||
batch_size=16,
|
batch_size=16,
|
||||||
validation_data=(X_val, y_val),
|
validation_data=(X_val,y_val),
|
||||||
callbacks=[early_stop, lr_reduce, cb_prune],
|
callbacks=[early_stop, lr_reduce, cb_prune],
|
||||||
verbose=0
|
verbose=0
|
||||||
)
|
)
|
||||||
val_mae = min(history.history['val_mae'])
|
val_mae = min(history.history['val_mae'])
|
||||||
return val_mae
|
return val_mae
|
||||||
|
|
||||||
logging.info("Starting hyperparameter optimization with Optuna...")
|
logging.info("Starting LSTM hyperparam optimization with Optuna...")
|
||||||
study = optuna.create_study(direction='minimize')
|
study_lstm= optuna.create_study(direction='minimize')
|
||||||
study.optimize(objective, n_trials=50)
|
study_lstm.optimize(lstm_objective, n_trials=n_trials_lstm)
|
||||||
|
best_lstm_params = study_lstm.best_params
|
||||||
|
logging.info(f"Best LSTM Hyperparams: {best_lstm_params}")
|
||||||
|
|
||||||
best_params = study.best_params
|
# 7) Train final LSTM
|
||||||
logging.info(f"Best Hyperparameters from Optuna: {best_params}")
|
final_lstm = build_lstm((X_train.shape[1], X_train.shape[2]), best_lstm_params)
|
||||||
|
early_stop_final= EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
|
||||||
|
lr_reduce_final= ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
|
||||||
|
|
||||||
# 8) Train the Best LSTM
|
logging.info("Training best LSTM model with found hyperparams...")
|
||||||
best_model = build_advanced_lstm((X_train.shape[1], X_train.shape[2]), best_params)
|
hist= final_lstm.fit(
|
||||||
|
X_train,y_train,
|
||||||
early_stop_final = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
|
|
||||||
lr_reduce_final = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
|
|
||||||
|
|
||||||
logging.info("Training the best LSTM model with optimized hyperparameters...")
|
|
||||||
history = best_model.fit(
|
|
||||||
X_train, y_train,
|
|
||||||
epochs=300,
|
epochs=300,
|
||||||
batch_size=16,
|
batch_size=16,
|
||||||
validation_data=(X_val, y_val),
|
validation_data=(X_val,y_val),
|
||||||
callbacks=[early_stop_final, lr_reduce_final],
|
callbacks=[early_stop_final, lr_reduce_final],
|
||||||
verbose=1
|
verbose=1
|
||||||
)
|
)
|
||||||
|
|
||||||
# 9) Evaluate
|
# Evaluate LSTM
|
||||||
def evaluate_model(model, X_test, y_test):
|
def evaluate_lstm(model, X_test, y_test):
|
||||||
logging.info("Evaluating model (LSTM)...")
|
logging.info("Evaluating final LSTM...")
|
||||||
y_pred_scaled = model.predict(X_test).flatten()
|
y_pred_scaled= model.predict(X_test).flatten()
|
||||||
y_pred_scaled = np.clip(y_pred_scaled, 0, 1)
|
y_pred_scaled= np.clip(y_pred_scaled,0,1)
|
||||||
y_pred = scaler_target.inverse_transform(y_pred_scaled.reshape(-1,1)).flatten()
|
y_pred = scaler_target.inverse_transform(y_pred_scaled.reshape(-1,1)).flatten()
|
||||||
y_test_actual = scaler_target.inverse_transform(y_test.reshape(-1,1)).flatten()
|
y_test_actual= scaler_target.inverse_transform(y_test.reshape(-1,1)).flatten()
|
||||||
|
|
||||||
mse = mean_squared_error(y_test_actual, y_pred)
|
mse_= mean_squared_error(y_test_actual,y_pred)
|
||||||
rmse= np.sqrt(mse)
|
rmse_= np.sqrt(mse_)
|
||||||
mae = mean_absolute_error(y_test_actual, y_pred)
|
mae_ = mean_absolute_error(y_test_actual,y_pred)
|
||||||
r2 = r2_score(y_test_actual, y_pred)
|
r2_ = r2_score(y_test_actual,y_pred)
|
||||||
|
|
||||||
direction_actual = np.sign(np.diff(y_test_actual))
|
direction_actual= np.sign(np.diff(y_test_actual))
|
||||||
direction_pred = np.sign(np.diff(y_pred))
|
direction_pred = np.sign(np.diff(y_pred))
|
||||||
directional_accuracy = np.mean(direction_actual==direction_pred)
|
directional_accuracy= np.mean(direction_actual== direction_pred)
|
||||||
|
|
||||||
logging.info(f"Test MSE: {mse}")
|
logging.info(f"Test MSE: {mse_}")
|
||||||
logging.info(f"Test RMSE: {rmse}")
|
logging.info(f"Test RMSE: {rmse_}")
|
||||||
logging.info(f"Test MAE: {mae}")
|
logging.info(f"Test MAE: {mae_}")
|
||||||
logging.info(f"Test R2 Score: {r2}")
|
logging.info(f"Test R2 Score: {r2_}")
|
||||||
logging.info(f"Directional Accuracy: {directional_accuracy}")
|
logging.info(f"Directional Accuracy: {directional_accuracy}")
|
||||||
|
|
||||||
# Plot
|
# Plot
|
||||||
plt.figure(figsize=(14,7))
|
plt.figure(figsize=(14,7))
|
||||||
plt.plot(y_test_actual, label='Actual Price')
|
plt.plot(y_test_actual, label='Actual Price')
|
||||||
plt.plot(y_pred, label='Predicted Price')
|
plt.plot(y_pred, label='Predicted Price')
|
||||||
plt.title('Actual vs Predicted Prices')
|
plt.title('LSTM: Actual vs Predicted')
|
||||||
plt.xlabel('Time Step')
|
|
||||||
plt.ylabel('Price')
|
|
||||||
plt.legend()
|
plt.legend()
|
||||||
plt.grid(True)
|
plt.grid(True)
|
||||||
plt.savefig('actual_vs_predicted.png')
|
plt.savefig('lstm_actual_vs_pred.png')
|
||||||
plt.close()
|
plt.close()
|
||||||
logging.info("Plot saved as 'actual_vs_predicted.png'")
|
|
||||||
|
|
||||||
# Tabulate first 40
|
# Tabulate first 40
|
||||||
table_data = []
|
table=[]
|
||||||
for i in range(min(40, len(y_test_actual))):
|
limit= min(40,len(y_test_actual))
|
||||||
table_data.append([i, round(y_test_actual[i],2), round(y_pred[i],2)])
|
for i in range(limit):
|
||||||
headers = ["Index", "Actual Price", "Predicted Price"]
|
table.append([i, round(y_test_actual[i],2), round(y_pred[i],2)])
|
||||||
print(tabulate(table_data, headers=headers, tablefmt="pretty"))
|
headers= ["Index","Actual Price","Predicted Price"]
|
||||||
|
print(tabulate(table, headers=headers, tablefmt="pretty"))
|
||||||
|
return r2_, directional_accuracy
|
||||||
|
|
||||||
return mse, rmse, mae, r2, directional_accuracy
|
_r2, _diracc= evaluate_lstm(final_lstm, X_test, y_test)
|
||||||
|
|
||||||
mse, rmse, mae, r2, directional_accuracy = evaluate_model(best_model, X_test, y_test)
|
# Save LSTM + scalers
|
||||||
|
final_lstm.save('best_lstm_model.h5')
|
||||||
|
joblib.dump(scaler_features,'scaler_features.pkl')
|
||||||
|
joblib.dump(scaler_target, 'scaler_target.pkl')
|
||||||
|
logging.info("Saved best LSTM model + scalers (best_lstm_model.h5, scaler_features.pkl, scaler_target.pkl).")
|
||||||
|
|
||||||
# 10) Save
|
############################################################
|
||||||
best_model.save('optimized_lstm_model.h5')
|
# B) DQN PART: BUILD ENV THAT USES THE LSTM + FORECAST
|
||||||
import joblib
|
############################################################
|
||||||
joblib.dump(scaler_features, 'scaler_features.save')
|
class StockTradingEnvWithLSTM(gym.Env):
|
||||||
joblib.dump(scaler_target, 'scaler_target.save')
|
|
||||||
logging.info("Model and scalers saved (optimized_lstm_model.h5, scaler_features.save, scaler_target.save).")
|
|
||||||
|
|
||||||
##########################################################
|
|
||||||
# 11) Reinforcement Learning: StockTradingEnv + DQN
|
|
||||||
##########################################################
|
|
||||||
class StockTradingEnv(gym.Env):
|
|
||||||
"""
|
"""
|
||||||
A simple stock trading environment for OpenAI Gym
|
An environment that uses the LSTM model's predicted next day close
|
||||||
with step-based reward = net_worth - initial_balance.
|
as part of the observation:
|
||||||
|
obs = [technical indicators, balance, shares, cost_basis, predicted_next_close].
|
||||||
|
Reward => net_worth - initial_balance each step.
|
||||||
"""
|
"""
|
||||||
metadata = {'render.modes': ['human']}
|
metadata = {'render.modes':['human']}
|
||||||
|
|
||||||
def __init__(self, df, initial_balance=10000, transaction_cost=0.001):
|
def __init__(self, df, feature_columns, lstm_model, scaler_features, scaler_target,
|
||||||
|
window_size=15, initial_balance=10000, transaction_cost=0.001):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.df = df.reset_index()
|
self.df= df.reset_index(drop=True)
|
||||||
self.initial_balance = initial_balance
|
self.feature_columns= feature_columns
|
||||||
self.balance = initial_balance
|
self.lstm_model= lstm_model
|
||||||
self.net_worth = initial_balance
|
self.scaler_features= scaler_features
|
||||||
self.max_steps = len(df)
|
self.scaler_target= scaler_target
|
||||||
self.current_step = 0
|
self.window_size= window_size
|
||||||
self.shares_held = 0
|
|
||||||
self.cost_basis = 0
|
|
||||||
self.transaction_cost = transaction_cost
|
|
||||||
|
|
||||||
# Re-use feature_columns from above
|
self.initial_balance= initial_balance
|
||||||
self.feature_columns = feature_columns
|
self.balance= initial_balance
|
||||||
|
self.net_worth= initial_balance
|
||||||
|
self.transaction_cost= transaction_cost
|
||||||
|
|
||||||
# Action space: 0=Sell,1=Hold,2=Buy
|
self.max_steps= len(df)
|
||||||
self.action_space = spaces.Discrete(3)
|
self.current_step=0
|
||||||
|
self.shares_held=0
|
||||||
|
self.cost_basis=0
|
||||||
|
|
||||||
# Observation = [17 indicators + balance + shares + cost_basis] => total 20
|
# raw array of features
|
||||||
self.observation_space = spaces.Box(
|
self.raw_features= df[feature_columns].values
|
||||||
low=0,
|
|
||||||
high=1,
|
# 0=Sell,1=Hold,2=Buy
|
||||||
shape=(len(self.feature_columns)+3,),
|
self.action_space= spaces.Discrete(3)
|
||||||
|
|
||||||
|
# observation dimension = len(feature_columns)+3 +1 => 17 + 3 +1=21
|
||||||
|
self.observation_space= spaces.Box(
|
||||||
|
low=0, high=1,
|
||||||
|
shape=(len(feature_columns)+3+1,),
|
||||||
dtype=np.float32
|
dtype=np.float32
|
||||||
)
|
)
|
||||||
|
|
||||||
def reset(self):
|
def reset(self):
|
||||||
self.balance = self.initial_balance
|
self.balance= self.initial_balance
|
||||||
self.net_worth = self.initial_balance
|
self.net_worth= self.initial_balance
|
||||||
self.current_step = 0
|
self.current_step=0
|
||||||
self.shares_held = 0
|
self.shares_held=0
|
||||||
self.cost_basis = 0
|
self.cost_basis=0
|
||||||
return self._next_observation()
|
return self._get_obs()
|
||||||
|
|
||||||
def _next_observation(self):
|
def _get_obs(self):
|
||||||
obs_vals = self.df.loc[self.current_step, self.feature_columns].values
|
row= self.raw_features[self.current_step]
|
||||||
# simple normalization
|
row_max= np.max(row) if np.max(row)!=0 else 1.0
|
||||||
if np.max(obs_vals)!=0:
|
row_norm= row/row_max
|
||||||
obs_vals = obs_vals / np.max(obs_vals)
|
|
||||||
|
|
||||||
additional = np.array([
|
# account info
|
||||||
|
additional= np.array([
|
||||||
self.balance/self.initial_balance,
|
self.balance/self.initial_balance,
|
||||||
self.shares_held/100.0,
|
self.shares_held/100.0,
|
||||||
self.cost_basis/self.initial_balance
|
self.cost_basis/(self.initial_balance+1e-9)
|
||||||
], dtype=np.float32)
|
], dtype=np.float32)
|
||||||
|
|
||||||
return np.concatenate([obs_vals, additional]).astype(np.float32)
|
# LSTM prediction
|
||||||
|
if self.current_step< self.window_size:
|
||||||
|
# not enough history => no forecast
|
||||||
|
predicted_close= 0.0
|
||||||
|
else:
|
||||||
|
seq= self.raw_features[self.current_step - self.window_size: self.current_step]
|
||||||
|
seq_scaled= self.scaler_features.transform(seq)
|
||||||
|
seq_scaled= np.expand_dims(seq_scaled, axis=0) # shape (1, window_size, #features)
|
||||||
|
pred_scaled= self.lstm_model.predict(seq_scaled, verbose=0).flatten()[0]
|
||||||
|
pred_scaled= np.clip(pred_scaled,0,1)
|
||||||
|
unscaled= self.scaler_target.inverse_transform([[pred_scaled]])[0,0]
|
||||||
|
# either keep raw or scale it. We'll do a naive scale by /1000 if typical price is double digits
|
||||||
|
predicted_close= unscaled/1000.0
|
||||||
|
|
||||||
|
obs= np.concatenate([row_norm, additional, [predicted_close]]).astype(np.float32)
|
||||||
|
return obs
|
||||||
|
|
||||||
def step(self, action):
|
def step(self, action):
|
||||||
current_price = self.df.loc[self.current_step, 'Close']
|
prev_net_worth= self.net_worth
|
||||||
|
current_price= self.df.loc[self.current_step,'Close']
|
||||||
|
|
||||||
if action==2: # Buy
|
if action==2: # BUY
|
||||||
shares_bought = int(self.balance // current_price)
|
shares_bought= int(self.balance// current_price)
|
||||||
if shares_bought>0:
|
if shares_bought>0:
|
||||||
cost= shares_bought* current_price
|
cost= shares_bought* current_price
|
||||||
fee = cost* self.transaction_cost
|
fee= cost* self.transaction_cost
|
||||||
self.balance-= (cost+ fee)
|
self.balance-= (cost+ fee)
|
||||||
old_shares= self.shares_held
|
old_shares= self.shares_held
|
||||||
self.shares_held+= shares_bought
|
self.shares_held+= shares_bought
|
||||||
# Weighted average cost
|
|
||||||
self.cost_basis=(
|
self.cost_basis=(
|
||||||
(self.cost_basis* old_shares)+(shares_bought* current_price)
|
(self.cost_basis* old_shares)+ (shares_bought* current_price)
|
||||||
)/ self.shares_held
|
)/ self.shares_held
|
||||||
|
|
||||||
elif action==0: # Sell
|
elif action==0: # SELL
|
||||||
if self.shares_held>0:
|
if self.shares_held>0:
|
||||||
revenue= self.shares_held* current_price
|
revenue= self.shares_held* current_price
|
||||||
fee = revenue*self.transaction_cost
|
fee= revenue* self.transaction_cost
|
||||||
self.balance+= (revenue- fee)
|
self.balance+= (revenue- fee)
|
||||||
self.shares_held=0
|
self.shares_held=0
|
||||||
self.cost_basis=0
|
self.cost_basis=0
|
||||||
|
|
||||||
prev_net_worth= self.net_worth
|
|
||||||
self.net_worth= self.balance+ self.shares_held* current_price
|
self.net_worth= self.balance+ self.shares_held* current_price
|
||||||
self.current_step+=1
|
self.current_step+=1
|
||||||
done= (self.current_step>= self.max_steps-1)
|
done= (self.current_step>= self.max_steps -1)
|
||||||
|
|
||||||
# Reward: net_worth - initial_balance (like original code)
|
|
||||||
reward= self.net_worth- self.initial_balance
|
reward= self.net_worth- self.initial_balance
|
||||||
|
obs= self._get_obs()
|
||||||
obs= self._next_observation()
|
|
||||||
return obs, reward, done, {}
|
return obs, reward, done, {}
|
||||||
|
|
||||||
def render(self, mode='human'):
|
def render(self, mode='human'):
|
||||||
profit= self.net_worth- self.initial_balance
|
profit= self.net_worth- self.initial_balance
|
||||||
print(f"Step: {self.current_step}, "
|
print(f"Step: {self.current_step}, "
|
||||||
f"Balance: {self.balance:.2f}, "
|
f"Balance={self.balance:.2f}, "
|
||||||
f"Shares: {self.shares_held}, "
|
f"Shares={self.shares_held}, "
|
||||||
f"NetWorth: {self.net_worth:.2f}, "
|
f"NetWorth={self.net_worth:.2f}, "
|
||||||
f"Profit: {profit:.2f}")
|
f"Profit={profit:.2f}")
|
||||||
|
|
||||||
|
###################################
|
||||||
|
# C) DQN HYPERPARAM TUNING W/ LSTM
|
||||||
|
###################################
|
||||||
|
# We'll define a function that trains a DQN with trial hyperparams,
|
||||||
|
# then evaluates final net worth on one run.
|
||||||
|
from stable_baselines3.common.evaluation import evaluate_policy
|
||||||
|
|
||||||
def train_dqn_agent(env):
|
# We'll define a small function to do final net worth check:
|
||||||
logging.info("Training DQN Agent (step-based reward).")
|
def evaluate_dqn_networth(model, env, n_episodes=1):
|
||||||
try:
|
# We do a simple loop that runs the entire dataset (1 episode)
|
||||||
model = DQN(
|
# to see final net worth.
|
||||||
'MlpPolicy',
|
# If you want multiple episodes, you can do multiple resets in random start positions, etc.
|
||||||
env,
|
final_net_worths = []
|
||||||
verbose=1,
|
for _ in range(n_episodes):
|
||||||
learning_rate=1e-3,
|
obs= env.reset()
|
||||||
buffer_size=10000,
|
done= False
|
||||||
learning_starts=1000,
|
while not done:
|
||||||
batch_size=64,
|
action, _= model.predict(obs, deterministic=True)
|
||||||
tau=1.0,
|
obs, reward, done, info= env.step(action)
|
||||||
gamma=0.99,
|
final_net_worths.append(env.net_worth)
|
||||||
train_freq=4,
|
return np.mean(final_net_worths)
|
||||||
target_update_interval=1000,
|
|
||||||
exploration_fraction=0.1,
|
|
||||||
exploration_final_eps=0.02,
|
|
||||||
tensorboard_log="./dqn_stock_tensorboard/"
|
|
||||||
)
|
|
||||||
model.learn(total_timesteps=100000)
|
|
||||||
model.save("dqn_stock_trading")
|
|
||||||
logging.info("DQN Agent trained and saved as 'dqn_stock_trading.zip'.")
|
|
||||||
return model
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"Error training DQN Agent: {e}")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
|
# We'll define the DQN objective with Optuna
|
||||||
|
def dqn_objective(trial):
|
||||||
|
# we sample some DQN hyperparams
|
||||||
|
lr = trial.suggest_loguniform("lr", 1e-5, 1e-2)
|
||||||
|
gamma = trial.suggest_float("gamma", 0.8, 0.9999)
|
||||||
|
exploration_fraction= trial.suggest_float("exploration_fraction", 0.01, 0.3)
|
||||||
|
buffer_size = trial.suggest_categorical("buffer_size",[5000,10000,20000])
|
||||||
|
batch_size = trial.suggest_categorical("batch_size",[32,64,128])
|
||||||
|
|
||||||
# 12) Train the DQN environment
|
# Build environment fresh each time or reuse:
|
||||||
logging.info("Initializing trading environment for DQN training...")
|
# We'll reuse the same data environment but new instance
|
||||||
trading_env = StockTradingEnv(data, initial_balance=10000, transaction_cost=0.001)
|
env = StockTradingEnvWithLSTM(
|
||||||
vec_env = DummyVecEnv([lambda: trading_env])
|
df=df,
|
||||||
|
feature_columns= feature_columns,
|
||||||
|
lstm_model= final_lstm, # use the best LSTM
|
||||||
|
scaler_features= scaler_features,
|
||||||
|
scaler_target= scaler_target,
|
||||||
|
window_size= lstm_window_size
|
||||||
|
)
|
||||||
|
vec_env = DummyVecEnv([lambda: env])
|
||||||
|
|
||||||
dqn_model = train_dqn_agent(vec_env)
|
# Build DQN
|
||||||
logging.info("DQN training complete.")
|
from stable_baselines3 import DQN
|
||||||
|
from stable_baselines3.common.callbacks import BaseCallback
|
||||||
|
|
||||||
# 13) Optional: run DQN inference right away (like use_dqn.py) if user wants
|
dqn_action_logger = ActionLoggingCallback(verbose=0)
|
||||||
if do_dqn_inference:
|
|
||||||
logging.info("Running DQN inference (test) after training...")
|
|
||||||
obs = vec_env.reset()
|
|
||||||
done = [False]
|
|
||||||
total_reward = 0.0
|
|
||||||
step_data = []
|
|
||||||
step_count = 0
|
|
||||||
|
|
||||||
# underlying env to access net worth, etc.
|
model = DQN(
|
||||||
underlying_env = vec_env.envs[0]
|
'MlpPolicy',
|
||||||
|
vec_env,
|
||||||
|
verbose=0,
|
||||||
|
learning_rate= lr,
|
||||||
|
gamma= gamma,
|
||||||
|
exploration_fraction= exploration_fraction,
|
||||||
|
buffer_size= buffer_size,
|
||||||
|
batch_size= batch_size,
|
||||||
|
train_freq=4,
|
||||||
|
target_update_interval=1000,
|
||||||
|
# etc
|
||||||
|
)
|
||||||
|
# Train some timesteps
|
||||||
|
model.learn(total_timesteps= dqn_total_timesteps, callback=dqn_action_logger)
|
||||||
|
|
||||||
while not done[0]:
|
# Evaluate final net worth
|
||||||
step_count += 1
|
final_net_worth= evaluate_dqn_networth(model, env, n_episodes=dqn_eval_episodes)
|
||||||
action, _ = dqn_model.predict(obs, deterministic=True)
|
# we want to maximize net worth => minimize negative net worth
|
||||||
obs, reward, done, info = vec_env.step(action)
|
return -final_net_worth
|
||||||
reward_scalar = reward[0]
|
|
||||||
total_reward += reward_scalar
|
|
||||||
|
|
||||||
step_data.append({
|
logging.info("Starting DQN hyperparam tuning with Optuna (using LSTM environment)...")
|
||||||
"Step": step_count,
|
study_dqn = optuna.create_study(direction='minimize')
|
||||||
"Action": int(action[0]),
|
study_dqn.optimize(dqn_objective, n_trials=n_trials_dqn)
|
||||||
"Reward": reward_scalar,
|
best_dqn_params = study_dqn.best_params
|
||||||
"Balance": underlying_env.balance,
|
logging.info(f"Best DQN hyperparams: {best_dqn_params}")
|
||||||
"Shares": underlying_env.shares_held,
|
|
||||||
"NetWorth": underlying_env.net_worth
|
|
||||||
})
|
|
||||||
|
|
||||||
final_net_worth = underlying_env.net_worth
|
###################################
|
||||||
final_profit = final_net_worth - underlying_env.initial_balance
|
# D) TRAIN FINAL DQN WITH BEST PARAMS
|
||||||
|
###################################
|
||||||
|
logging.info("Training final DQN with best hyperparams & LSTM environment...")
|
||||||
|
|
||||||
print("\n=== DQN Agent Finished ===")
|
env_final = StockTradingEnvWithLSTM(
|
||||||
print(f"Total Steps Taken: {step_count}")
|
df=df,
|
||||||
print(f"Final Net Worth: {final_net_worth:.2f}")
|
feature_columns=feature_columns,
|
||||||
print(f"Final Profit: {final_profit:.2f}")
|
lstm_model= final_lstm,
|
||||||
print(f"Sum of Rewards: {total_reward:.2f}")
|
scaler_features= scaler_features,
|
||||||
|
scaler_target= scaler_target,
|
||||||
|
window_size= lstm_window_size
|
||||||
|
)
|
||||||
|
vec_env_final = DummyVecEnv([lambda: env_final])
|
||||||
|
|
||||||
buy_count = sum(1 for x in step_data if x["Action"] == 2)
|
# Build final model
|
||||||
sell_count = sum(1 for x in step_data if x["Action"] == 0)
|
final_dqn_logger = ActionLoggingCallback(verbose=1) # We'll see logs each rollout
|
||||||
hold_count = sum(1 for x in step_data if x["Action"] == 1)
|
final_model= DQN(
|
||||||
print(f"Actions Taken -> BUY: {buy_count}, SELL: {sell_count}, HOLD: {hold_count}")
|
'MlpPolicy',
|
||||||
|
vec_env_final,
|
||||||
|
verbose=1,
|
||||||
|
learning_rate= best_dqn_params['lr'],
|
||||||
|
gamma= best_dqn_params['gamma'],
|
||||||
|
exploration_fraction= best_dqn_params['exploration_fraction'],
|
||||||
|
buffer_size= best_dqn_params['buffer_size'],
|
||||||
|
batch_size= best_dqn_params['batch_size'],
|
||||||
|
train_freq=4,
|
||||||
|
target_update_interval=1000
|
||||||
|
# etc if you want other params
|
||||||
|
)
|
||||||
|
final_model.learn(total_timesteps= dqn_total_timesteps, callback= final_dqn_logger)
|
||||||
|
final_model.save("best_dqn_model_lstm.zip")
|
||||||
|
|
||||||
# Show last 15 steps (like use_dqn)
|
###################################
|
||||||
steps_to_display = 15
|
# E) FINAL INFERENCE & LOG RESULTS
|
||||||
last_n = step_data[-steps_to_display:] if len(step_data)> steps_to_display else step_data
|
###################################
|
||||||
rows = []
|
logging.info("Running final inference with best DQN...")
|
||||||
for d in last_n:
|
|
||||||
rows.append([
|
env_test = StockTradingEnvWithLSTM(
|
||||||
d["Step"], d["Action"], f"{d['Reward']:.2f}",
|
df=df,
|
||||||
f"{d['Balance']:.2f}", d["Shares"], f"{d['NetWorth']:.2f}"
|
feature_columns= feature_columns,
|
||||||
])
|
lstm_model= final_lstm,
|
||||||
headers = ["Step","Action","Reward","Balance","Shares","NetWorth"]
|
scaler_features= scaler_features,
|
||||||
print(f"\n== Last {steps_to_display} Steps ==")
|
scaler_target= scaler_target,
|
||||||
print(tabulate(rows, headers=headers, tablefmt="pretty"))
|
window_size= lstm_window_size
|
||||||
|
)
|
||||||
|
obs = env_test.reset()
|
||||||
|
done=False
|
||||||
|
total_reward=0.0
|
||||||
|
step_data=[]
|
||||||
|
step_count=0
|
||||||
|
|
||||||
|
while not done:
|
||||||
|
step_count+=1
|
||||||
|
action, _= final_model.predict(obs, deterministic=True)
|
||||||
|
obs, reward, done, info= env_test.step(action)
|
||||||
|
total_reward+= reward
|
||||||
|
step_data.append({
|
||||||
|
"Step": step_count,
|
||||||
|
"Action": int(action),
|
||||||
|
"Reward": reward,
|
||||||
|
"Balance": env_test.balance,
|
||||||
|
"Shares": env_test.shares_held,
|
||||||
|
"NetWorth": env_test.net_worth
|
||||||
|
})
|
||||||
|
|
||||||
|
final_net_worth= env_test.net_worth
|
||||||
|
final_profit= final_net_worth - env_test.initial_balance
|
||||||
|
|
||||||
|
print("\n=== Final DQN Inference ===")
|
||||||
|
print(f"Total Steps: {step_count}")
|
||||||
|
print(f"Final Net Worth: {final_net_worth:.2f}")
|
||||||
|
print(f"Final Profit: {final_profit:.2f}")
|
||||||
|
print(f"Sum of Rewards: {total_reward:.2f}")
|
||||||
|
|
||||||
|
buy_count = sum(1 for x in step_data if x["Action"]==2)
|
||||||
|
sell_count = sum(1 for x in step_data if x["Action"]==0)
|
||||||
|
hold_count = sum(1 for x in step_data if x["Action"]==1)
|
||||||
|
print(f"Actions Taken -> BUY:{buy_count}, SELL:{sell_count}, HOLD:{hold_count}")
|
||||||
|
|
||||||
|
# Show last 15 steps
|
||||||
|
last_n= step_data[-15:] if len(step_data)>15 else step_data
|
||||||
|
rows=[]
|
||||||
|
for d in last_n:
|
||||||
|
rows.append([
|
||||||
|
d["Step"],
|
||||||
|
d["Action"],
|
||||||
|
f"{d['Reward']:.2f}",
|
||||||
|
f"{d['Balance']:.2f}",
|
||||||
|
d["Shares"],
|
||||||
|
f"{d['NetWorth']:.2f}"
|
||||||
|
])
|
||||||
|
headers= ["Step","Action","Reward","Balance","Shares","NetWorth"]
|
||||||
|
print(f"\n== Last 15 Steps ==")
|
||||||
|
print(tabulate(rows, headers=headers, tablefmt="pretty"))
|
||||||
|
|
||||||
logging.info("All tasks complete. Exiting.")
|
logging.info("All tasks complete. Exiting.")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__=="__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user