added griffins shit
This commit is contained in:
BIN
src/griffin-stuff/backtester/.config.json.un~
Normal file
BIN
src/griffin-stuff/backtester/.config.json.un~
Normal file
Binary file not shown.
@@ -0,0 +1,129 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import datetime
|
||||||
|
from sklearn.linear_model import LogisticRegression
|
||||||
|
from sklearn.metrics import accuracy_score
|
||||||
|
|
||||||
|
from indicators import add_indicators
|
||||||
|
from strategy import generate_signals
|
||||||
|
from backtester import backtest
|
||||||
|
from optimizer import parameter_search
|
||||||
|
from indicator_sets import indicator_sets
|
||||||
|
|
||||||
|
def load_config(config_path="config.json"):
|
||||||
|
with open(config_path, 'r') as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
def load_data(data_path):
|
||||||
|
df = pd.read_csv(data_path, parse_dates=['Date'], index_col='Date')
|
||||||
|
df = df.sort_index()
|
||||||
|
required_cols = ['Open','High','Low','Close','Volume']
|
||||||
|
if not all(col in df.columns for col in required_cols):
|
||||||
|
raise ValueError("Data file must contain Date,Open,High,Low,Close,Volume columns.")
|
||||||
|
return df
|
||||||
|
|
||||||
|
def visualize_data_with_indicators(df):
|
||||||
|
fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
|
||||||
|
|
||||||
|
axes[0].plot(df.index, df['Close'], label='Close', color='black')
|
||||||
|
axes[0].plot(df.index, df['EMA'], label='EMA', color='blue', alpha=0.7)
|
||||||
|
axes[0].set_title('Price and EMA')
|
||||||
|
axes[0].legend()
|
||||||
|
|
||||||
|
axes[1].plot(df.index, df['RSI'], label='RSI', color='green')
|
||||||
|
axes[1].axhline(70, color='red', linestyle='--')
|
||||||
|
axes[1].axhline(30, color='green', linestyle='--')
|
||||||
|
axes[1].set_title('RSI')
|
||||||
|
|
||||||
|
axes[2].plot(df.index, df['MACD'], label='MACD', color='purple')
|
||||||
|
axes[2].axhline(0, color='red', linestyle='--')
|
||||||
|
axes[2].set_title('MACD')
|
||||||
|
|
||||||
|
axes[3].plot(df.index, df['ADX'], label='ADX', color='brown')
|
||||||
|
axes[3].axhline(20, color='grey', linestyle='--')
|
||||||
|
axes[3].axhline(25, color='grey', linestyle='--')
|
||||||
|
axes[3].set_title('ADX')
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.show()
|
||||||
|
|
||||||
|
def log_results(message, log_file="indicator_test_results.log"):
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
with open(log_file, "a") as f:
|
||||||
|
f.write(f"{timestamp} - {message}\n")
|
||||||
|
|
||||||
|
def main():
|
||||||
|
config = load_config("config.json")
|
||||||
|
data_path = os.path.join("data", "SPY_5min_preprocessed.csv")
|
||||||
|
df = load_data(data_path)
|
||||||
|
|
||||||
|
# Add core indicators and visualize
|
||||||
|
df = add_indicators(df, config)
|
||||||
|
visualize_data_with_indicators(df)
|
||||||
|
|
||||||
|
# Generate signals and backtest
|
||||||
|
df = generate_signals(df, config)
|
||||||
|
results = backtest(df, config)
|
||||||
|
print("Backtest Results:")
|
||||||
|
print(results)
|
||||||
|
|
||||||
|
# Parameter optimization example
|
||||||
|
param_grid = {
|
||||||
|
"rsi_threshold_bearish": [65, 70, 75],
|
||||||
|
"rsi_threshold_bullish": [25, 30, 35]
|
||||||
|
}
|
||||||
|
best_params, best_performance = parameter_search(df, config, param_grid)
|
||||||
|
print("Best Parameters Found:", best_params)
|
||||||
|
print("Best Performance (Final Equity):", best_performance)
|
||||||
|
|
||||||
|
# Now test multiple indicator sets for classification accuracy
|
||||||
|
log_file = "indicator_test_results.log"
|
||||||
|
with open(log_file, "w") as f:
|
||||||
|
f.write("Indicator Test Results Log\n")
|
||||||
|
|
||||||
|
# Create prediction target: next candle up or down
|
||||||
|
df['Future_Close'] = df['Close'].shift(-1)
|
||||||
|
df['Up_Indicator'] = (df['Future_Close'] > df['Close']).astype(int)
|
||||||
|
df = df.dropna(subset=['Future_Close'])
|
||||||
|
|
||||||
|
train_size = int(len(df)*0.7)
|
||||||
|
df_train = df.iloc[:train_size].copy()
|
||||||
|
df_test = df.iloc[train_size:].copy()
|
||||||
|
|
||||||
|
for set_name, func in indicator_sets.items():
|
||||||
|
# Apply the indicator set to train/test
|
||||||
|
train = df_train.copy()
|
||||||
|
test = df_test.copy()
|
||||||
|
|
||||||
|
train = func(train)
|
||||||
|
test = func(test)
|
||||||
|
|
||||||
|
# Ensure columns align
|
||||||
|
test = test.reindex(columns=train.columns)
|
||||||
|
test = test.dropna()
|
||||||
|
if len(test) == 0 or len(train) == 0:
|
||||||
|
log_results(f"{set_name}: Not enough data after adding indicators.", log_file)
|
||||||
|
continue
|
||||||
|
|
||||||
|
base_cols = ['Open','High','Low','Close','Volume','Future_Close','Up_Indicator']
|
||||||
|
feature_cols = [c for c in train.columns if c not in base_cols]
|
||||||
|
|
||||||
|
X_train = train[feature_cols]
|
||||||
|
y_train = train['Up_Indicator']
|
||||||
|
X_test = test[feature_cols]
|
||||||
|
y_test = test['Up_Indicator']
|
||||||
|
|
||||||
|
# Train a simple logistic regression model
|
||||||
|
model = LogisticRegression(max_iter=1000)
|
||||||
|
model.fit(X_train, y_train)
|
||||||
|
y_pred = model.predict(X_test)
|
||||||
|
|
||||||
|
acc = accuracy_score(y_test, y_pred)
|
||||||
|
result_message = f"{set_name}: Accuracy = {acc:.4f}"
|
||||||
|
print(result_message)
|
||||||
|
log_results(result_message, log_file)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Binary file not shown.
Binary file not shown.
32
src/griffin-stuff/backtester/config.json
Normal file
32
src/griffin-stuff/backtester/config.json
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"input_csv": "data/SPY_5min_preprocessed.csv",
|
||||||
|
"date_column": "Date",
|
||||||
|
"price_column": "Close",
|
||||||
|
"high_column": "High",
|
||||||
|
"low_column": "Low",
|
||||||
|
"volume_column": "Volume"
|
||||||
|
},
|
||||||
|
"indicators": {
|
||||||
|
"trend": ["SMA", "EMA", "ADX"],
|
||||||
|
"momentum": ["RSI", "MACD"],
|
||||||
|
"volatility": ["BollingerBands"],
|
||||||
|
"volume": ["OBV"],
|
||||||
|
"mean_reversion": ["MeanReversionSignal"]
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"SMA": {"window": 20},
|
||||||
|
"EMA": {"window": 20},
|
||||||
|
"ADX": {"window": 14},
|
||||||
|
"RSI": {"window": 14, "overbought": 70, "oversold": 30},
|
||||||
|
"MACD": {"fastperiod": 12, "slowperiod": 26, "signalperiod": 9},
|
||||||
|
"BollingerBands": {"window": 20, "std_dev": 2},
|
||||||
|
"OBV": {},
|
||||||
|
"MeanReversionSignal": {"window": 10}
|
||||||
|
},
|
||||||
|
"evaluation": {
|
||||||
|
"prediction_horizon": 1,
|
||||||
|
"log_file": "logs/results.log"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
32
src/griffin-stuff/backtester/config.json~
Normal file
32
src/griffin-stuff/backtester/config.json~
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"input_csv": "data/price_data.csv",
|
||||||
|
"date_column": "Date",
|
||||||
|
"price_column": "Close",
|
||||||
|
"high_column": "High",
|
||||||
|
"low_column": "Low",
|
||||||
|
"volume_column": "Volume"
|
||||||
|
},
|
||||||
|
"indicators": {
|
||||||
|
"trend": ["SMA", "EMA", "ADX"],
|
||||||
|
"momentum": ["RSI", "MACD"],
|
||||||
|
"volatility": ["BollingerBands"],
|
||||||
|
"volume": ["OBV"],
|
||||||
|
"mean_reversion": ["MeanReversionSignal"]
|
||||||
|
},
|
||||||
|
"parameters": {
|
||||||
|
"SMA": {"window": 20},
|
||||||
|
"EMA": {"window": 20},
|
||||||
|
"ADX": {"window": 14},
|
||||||
|
"RSI": {"window": 14, "overbought": 70, "oversold": 30},
|
||||||
|
"MACD": {"fastperiod": 12, "slowperiod": 26, "signalperiod": 9},
|
||||||
|
"BollingerBands": {"window": 20, "std_dev": 2},
|
||||||
|
"OBV": {},
|
||||||
|
"MeanReversionSignal": {"window": 10}
|
||||||
|
},
|
||||||
|
"evaluation": {
|
||||||
|
"prediction_horizon": 1,
|
||||||
|
"log_file": "logs/results.log"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
819391
src/griffin-stuff/backtester/data/SPY_3yr_1min_data.csv
Normal file
819391
src/griffin-stuff/backtester/data/SPY_3yr_1min_data.csv
Normal file
File diff suppressed because it is too large
Load Diff
69901
src/griffin-stuff/backtester/data/SPY_3yr_5min_data.csv
Normal file
69901
src/griffin-stuff/backtester/data/SPY_3yr_5min_data.csv
Normal file
File diff suppressed because it is too large
Load Diff
69901
src/griffin-stuff/backtester/data/SPY_5min_preprocessed.csv
Normal file
69901
src/griffin-stuff/backtester/data/SPY_5min_preprocessed.csv
Normal file
File diff suppressed because it is too large
Load Diff
819391
src/griffin-stuff/backtester/data/spy_1min_preprocessed.csv
Normal file
819391
src/griffin-stuff/backtester/data/spy_1min_preprocessed.csv
Normal file
File diff suppressed because it is too large
Load Diff
32
src/griffin-stuff/backtester/evaluation.py
Normal file
32
src/griffin-stuff/backtester/evaluation.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def evaluate_indicator_accuracy(df, price_col="Close", horizon=1):
|
||||||
|
"""
|
||||||
|
Evaluate how often indicator signals predict the correct next-day price direction.
|
||||||
|
|
||||||
|
Logic:
|
||||||
|
- If signal[i] = 1 (bullish), correct if price[i+horizon] > price[i].
|
||||||
|
- If signal[i] = -1 (bearish), correct if price[i+horizon] < price[i].
|
||||||
|
- If signal[i] = 0, skip.
|
||||||
|
"""
|
||||||
|
correct = 0
|
||||||
|
total = 0
|
||||||
|
|
||||||
|
for i in range(len(df) - horizon):
|
||||||
|
sig = df['signal'].iloc[i]
|
||||||
|
if sig == 0:
|
||||||
|
continue
|
||||||
|
future_price = df[price_col].iloc[i + horizon]
|
||||||
|
current_price = df[price_col].iloc[i]
|
||||||
|
|
||||||
|
if sig == 1 and future_price > current_price:
|
||||||
|
correct += 1
|
||||||
|
elif sig == -1 and future_price < current_price:
|
||||||
|
correct += 1
|
||||||
|
|
||||||
|
if sig != 0:
|
||||||
|
total += 1
|
||||||
|
|
||||||
|
if total == 0:
|
||||||
|
return np.nan # No signals generated
|
||||||
|
return correct / total
|
||||||
96
src/griffin-stuff/backtester/indicators.py
Normal file
96
src/griffin-stuff/backtester/indicators.py
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import ta
|
||||||
|
|
||||||
|
def calculate_indicator_signals(df, indicator_name, params, price_col="Close", high_col="High", low_col="Low", volume_col="Volume"):
|
||||||
|
"""
|
||||||
|
Calculates indicator values and generates signals:
|
||||||
|
Signal Convention: 1 = Bullish Prediction, -1 = Bearish Prediction, 0 = Neutral
|
||||||
|
"""
|
||||||
|
if price_col not in df.columns:
|
||||||
|
raise ValueError(f"{price_col} column not found in the dataframe.")
|
||||||
|
|
||||||
|
if indicator_name == "SMA":
|
||||||
|
# Trend: price > SMA => bullish, else bearish
|
||||||
|
window = params.get("window", 20)
|
||||||
|
df['SMA'] = df[price_col].rolling(window).mean()
|
||||||
|
df['signal'] = np.where(df[price_col] > df['SMA'], 1, -1)
|
||||||
|
|
||||||
|
elif indicator_name == "EMA":
|
||||||
|
# Trend: price > EMA => bullish, else bearish
|
||||||
|
window = params.get("window", 20)
|
||||||
|
df['EMA'] = df[price_col].ewm(span=window, adjust=False).mean()
|
||||||
|
df['signal'] = np.where(df[price_col] > df['EMA'], 1, -1)
|
||||||
|
|
||||||
|
elif indicator_name == "ADX":
|
||||||
|
# Trend: use ADXIndicator
|
||||||
|
if high_col not in df.columns or low_col not in df.columns:
|
||||||
|
raise ValueError("ADX calculation requires 'High' and 'Low' columns.")
|
||||||
|
window = params.get("window", 14)
|
||||||
|
adx_indicator = ta.trend.ADXIndicator(high=df[high_col], low=df[low_col], close=df[price_col], window=window)
|
||||||
|
df['ADX'] = adx_indicator.adx()
|
||||||
|
df['DIP'] = adx_indicator.adx_pos() # +DI
|
||||||
|
df['DIN'] = adx_indicator.adx_neg() # -DI
|
||||||
|
|
||||||
|
# If ADX > 25 and DI+ > DI- => bullish
|
||||||
|
# If ADX > 25 and DI- > DI+ => bearish
|
||||||
|
# Otherwise => no strong signal
|
||||||
|
df['signal'] = 0
|
||||||
|
trending_up = (df['DIP'] > df['DIN']) & (df['ADX'] > 25)
|
||||||
|
trending_down = (df['DIN'] > df['DIP']) & (df['ADX'] > 25)
|
||||||
|
df.loc[trending_up, 'signal'] = 1
|
||||||
|
df.loc[trending_down, 'signal'] = -1
|
||||||
|
|
||||||
|
elif indicator_name == "RSI":
|
||||||
|
# Momentum: RSI > overbought => bearish, RSI < oversold => bullish
|
||||||
|
window = params.get("window", 14)
|
||||||
|
overbought = params.get("overbought", 70)
|
||||||
|
oversold = params.get("oversold", 30)
|
||||||
|
df['RSI'] = ta.momentum.rsi(df[price_col], window=window)
|
||||||
|
conditions = [
|
||||||
|
(df['RSI'] > overbought),
|
||||||
|
(df['RSI'] < oversold)
|
||||||
|
]
|
||||||
|
values = [-1, 1]
|
||||||
|
df['signal'] = np.select(conditions, values, default=0)
|
||||||
|
|
||||||
|
elif indicator_name == "MACD":
|
||||||
|
# Momentum: MACD line > Signal line => bullish, else bearish
|
||||||
|
fastperiod = params.get("fastperiod", 12)
|
||||||
|
slowperiod = params.get("slowperiod", 26)
|
||||||
|
signalperiod = params.get("signalperiod", 9)
|
||||||
|
macd = ta.trend.MACD(df[price_col], window_slow=slowperiod, window_fast=fastperiod, window_sign=signalperiod)
|
||||||
|
df['MACD'] = macd.macd()
|
||||||
|
df['MACD_Signal'] = macd.macd_signal()
|
||||||
|
|
||||||
|
df['signal'] = np.where(df['MACD'] > df['MACD_Signal'], 1, -1)
|
||||||
|
|
||||||
|
elif indicator_name == "BollingerBands":
|
||||||
|
# Volatility: price near upper band => bearish, near lower band => bullish
|
||||||
|
window = params.get("window", 20)
|
||||||
|
std_dev = params.get("std_dev", 2)
|
||||||
|
bb = ta.volatility.BollingerBands(df[price_col], window=window, window_dev=std_dev)
|
||||||
|
df['BB_High'] = bb.bollinger_hband()
|
||||||
|
df['BB_Low'] = bb.bollinger_lband()
|
||||||
|
df['signal'] = np.where(df[price_col] >= df['BB_High'], -1,
|
||||||
|
np.where(df[price_col] <= df['BB_Low'], 1, 0))
|
||||||
|
|
||||||
|
elif indicator_name == "OBV":
|
||||||
|
# Volume: Rising OBV => bullish, falling OBV => bearish
|
||||||
|
if volume_col not in df.columns:
|
||||||
|
raise ValueError(f"OBV calculation requires '{volume_col}' column.")
|
||||||
|
df['OBV'] = ta.volume.on_balance_volume(df[price_col], df[volume_col])
|
||||||
|
df['OBV_Change'] = df['OBV'].diff()
|
||||||
|
df['signal'] = np.where(df['OBV_Change'] > 0, 1, np.where(df['OBV_Change'] < 0, -1, 0))
|
||||||
|
|
||||||
|
elif indicator_name == "MeanReversionSignal":
|
||||||
|
# Mean Reversion: price > mean => bearish, price < mean => bullish
|
||||||
|
window = params.get("window", 10)
|
||||||
|
df['mean'] = df[price_col].rolling(window).mean()
|
||||||
|
df['signal'] = np.where(df[price_col] > df['mean'], -1,
|
||||||
|
np.where(df[price_col] < df['mean'], 1, 0))
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown indicator: {indicator_name}")
|
||||||
|
|
||||||
|
return df
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
2024-12-13 22:07:39,152 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
|
||||||
|
2024-12-13 22:07:40,855 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
|
||||||
|
2024-12-13 22:10:04,274 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
|
||||||
|
2024-12-13 22:10:05,997 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
|
||||||
|
2024-12-13 22:10:07,745 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
|
||||||
|
2024-12-13 22:10:08,484 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
|
||||||
|
2024-12-13 22:10:09,096 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
|
||||||
|
2024-12-13 22:10:11,937 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
|
||||||
|
2024-12-13 22:10:15,386 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330
|
||||||
|
2024-12-13 22:12:44,520 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
|
||||||
|
2024-12-13 22:12:45,874 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
|
||||||
|
2024-12-13 22:12:47,913 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
|
||||||
|
2024-12-13 22:12:48,530 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
|
||||||
|
2024-12-13 22:12:49,173 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
|
||||||
|
2024-12-13 22:12:51,230 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
|
||||||
|
2024-12-13 22:12:54,504 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330
|
||||||
|
2024-12-13 22:23:17,293 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
|
||||||
|
2024-12-13 22:23:18,087 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
|
||||||
|
2024-12-13 22:23:19,409 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
|
||||||
|
2024-12-13 22:23:19,797 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
|
||||||
|
2024-12-13 22:23:20,669 - INFO - Category: momentum, Indicator: MACD, Accuracy: 0.3184
|
||||||
|
2024-12-13 22:23:20,993 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
|
||||||
|
2024-12-13 22:23:21,786 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
|
||||||
|
2024-12-13 22:23:22,678 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330
|
||||||
8
src/griffin-stuff/backtester/logs/results.log
Normal file
8
src/griffin-stuff/backtester/logs/results.log
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
2024-12-13 22:23:17,293 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
|
||||||
|
2024-12-13 22:23:18,087 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
|
||||||
|
2024-12-13 22:23:19,409 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
|
||||||
|
2024-12-13 22:23:19,797 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
|
||||||
|
2024-12-13 22:23:20,669 - INFO - Category: momentum, Indicator: MACD, Accuracy: 0.3184
|
||||||
|
2024-12-13 22:23:20,993 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
|
||||||
|
2024-12-13 22:23:21,786 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
|
||||||
|
2024-12-13 22:23:22,678 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330
|
||||||
65
src/griffin-stuff/backtester/main.py
Normal file
65
src/griffin-stuff/backtester/main.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import pandas as pd
|
||||||
|
import os
|
||||||
|
|
||||||
|
from indicators import calculate_indicator_signals
|
||||||
|
from evaluation import evaluate_indicator_accuracy
|
||||||
|
|
||||||
|
def setup_logging(log_path):
|
||||||
|
os.makedirs(os.path.dirname(log_path), exist_ok=True)
|
||||||
|
logging.basicConfig(
|
||||||
|
filename=log_path,
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
|
||||||
|
def load_config(config_path="config.json"):
|
||||||
|
with open(config_path, 'r') as f:
|
||||||
|
config = json.load(f)
|
||||||
|
return config
|
||||||
|
|
||||||
|
def load_data(csv_path, date_col, price_col):
|
||||||
|
df = pd.read_csv(csv_path)
|
||||||
|
df[date_col] = pd.to_datetime(df[date_col])
|
||||||
|
df = df.sort_values(date_col).reset_index(drop=True)
|
||||||
|
df = df.dropna(subset=[date_col, price_col])
|
||||||
|
return df
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
config = load_config("config.json")
|
||||||
|
setup_logging(config["evaluation"]["log_file"])
|
||||||
|
|
||||||
|
# Load data
|
||||||
|
df = load_data(config["data"]["input_csv"],
|
||||||
|
config["data"]["date_column"],
|
||||||
|
config["data"]["price_column"])
|
||||||
|
|
||||||
|
# Calculate indicators and signals, evaluate accuracy
|
||||||
|
all_results = []
|
||||||
|
for category, indicators in config["indicators"].items():
|
||||||
|
for ind_name in indicators:
|
||||||
|
params = config["parameters"].get(ind_name, {})
|
||||||
|
|
||||||
|
signals_df = calculate_indicator_signals(
|
||||||
|
df.copy(),
|
||||||
|
indicator_name=ind_name,
|
||||||
|
params=params,
|
||||||
|
price_col=config["data"]["price_column"],
|
||||||
|
high_col=config["data"]["high_column"],
|
||||||
|
low_col=config["data"]["low_column"],
|
||||||
|
volume_col=config["data"]["volume_column"]
|
||||||
|
)
|
||||||
|
|
||||||
|
accuracy = evaluate_indicator_accuracy(
|
||||||
|
signals_df,
|
||||||
|
price_col=config["data"]["price_column"],
|
||||||
|
horizon=config["evaluation"]["prediction_horizon"]
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info(f"Category: {category}, Indicator: {ind_name}, Accuracy: {accuracy:.4f}")
|
||||||
|
all_results.append((category, ind_name, accuracy))
|
||||||
|
|
||||||
|
# Print results to console as well
|
||||||
|
for category, ind_name, acc in all_results:
|
||||||
|
print(f"Category: {category}, Indicator: {ind_name}, Accuracy: {acc:.4f}")
|
||||||
233
src/griffin-stuff/data_collection_daily.py
Normal file
233
src/griffin-stuff/data_collection_daily.py
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
import signal
|
||||||
|
from ibapi.client import EClient
|
||||||
|
from ibapi.wrapper import EWrapper
|
||||||
|
from ibapi.contract import Contract
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from tqdm import tqdm # For progress bar
|
||||||
|
import os
|
||||||
|
|
||||||
|
class IBApi(EWrapper, EClient):
|
||||||
|
def __init__(self):
|
||||||
|
EClient.__init__(self, self)
|
||||||
|
self.data = []
|
||||||
|
self.df = pd.DataFrame()
|
||||||
|
self.data_retrieved = False
|
||||||
|
|
||||||
|
def historicalData(self, reqId, bar):
|
||||||
|
# Debug: Print each received bar
|
||||||
|
print(f"Received bar: Date={bar.date}, Open={bar.open}, High={bar.high}, Low={bar.low}, Close={bar.close}, Volume={bar.volume}")
|
||||||
|
self.data.append({
|
||||||
|
"Date": bar.date,
|
||||||
|
"Open": bar.open,
|
||||||
|
"High": bar.high,
|
||||||
|
"Low": bar.low,
|
||||||
|
"Close": bar.close,
|
||||||
|
"Volume": bar.volume
|
||||||
|
})
|
||||||
|
|
||||||
|
def historicalDataEnd(self, reqId, start, end):
|
||||||
|
# Debug: Indicate end of data reception
|
||||||
|
print(f"HistoricalDataEnd received. Start: {start}, End: {end}. Number of bars fetched: {len(self.data)}")
|
||||||
|
chunk_df = pd.DataFrame(self.data)
|
||||||
|
if not chunk_df.empty:
|
||||||
|
self.df = pd.concat([self.df, chunk_df], ignore_index=True)
|
||||||
|
else:
|
||||||
|
print("No data received in this request.")
|
||||||
|
self.data_retrieved = True
|
||||||
|
self.data = [] # Reset data list for next request
|
||||||
|
|
||||||
|
class IBApp:
|
||||||
|
def __init__(self):
|
||||||
|
self.app = IBApi()
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
# Connect to IB API (ensure IB Gateway or TWS is running)
|
||||||
|
print("Connecting to IB API...")
|
||||||
|
self.app.connect("127.0.0.1", 4002, clientId=1)
|
||||||
|
# Start the API thread
|
||||||
|
thread = threading.Thread(target=self.run_app, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
time.sleep(1) # Allow time for connection
|
||||||
|
print("Connected to IB API.")
|
||||||
|
|
||||||
|
def run_app(self):
|
||||||
|
self.app.run()
|
||||||
|
|
||||||
|
def request_data(self, contract, end_date, duration, bar_size):
|
||||||
|
# Request historical data
|
||||||
|
print(f"Requesting data: endDateTime={end_date}, durationStr={duration}, barSizeSetting={bar_size}")
|
||||||
|
self.app.reqHistoricalData(
|
||||||
|
reqId=1,
|
||||||
|
contract=contract,
|
||||||
|
endDateTime=end_date,
|
||||||
|
durationStr=duration,
|
||||||
|
barSizeSetting=bar_size,
|
||||||
|
whatToShow="TRADES",
|
||||||
|
useRTH=1, # Use regular trading hours
|
||||||
|
formatDate=1,
|
||||||
|
keepUpToDate=False,
|
||||||
|
chartOptions=[]
|
||||||
|
)
|
||||||
|
# Wait until data is retrieved
|
||||||
|
while not self.app.data_retrieved:
|
||||||
|
time.sleep(0.1)
|
||||||
|
self.app.data_retrieved = False # Reset flag for next request
|
||||||
|
|
||||||
|
def fetch_historical_data_yearly(self, symbol, sec_type, exchange, currency, start_date, end_date, bar_size="1 day"):
|
||||||
|
"""
|
||||||
|
Fetch historical data in yearly chunks to cover 3 years.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
contract = Contract()
|
||||||
|
contract.symbol = symbol
|
||||||
|
contract.secType = sec_type
|
||||||
|
contract.exchange = exchange
|
||||||
|
contract.currency = currency
|
||||||
|
|
||||||
|
delta = timedelta(days=365)
|
||||||
|
current_end_date = end_date
|
||||||
|
|
||||||
|
total_years = 3 # Fetch 3 years of data
|
||||||
|
with tqdm(total=total_years, desc="Fetching Data", unit="year") as pbar:
|
||||||
|
for _ in range(total_years):
|
||||||
|
current_start_date = current_end_date - delta
|
||||||
|
end_date_str = current_end_date.strftime("%Y%m%d %H:%M:%S UTC")
|
||||||
|
self.request_data(contract, end_date_str, "1 Y", bar_size)
|
||||||
|
pbar.update(1)
|
||||||
|
current_end_date = current_start_date
|
||||||
|
time.sleep(1) # Respect IB API pacing
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching data: {e}")
|
||||||
|
|
||||||
|
def fetch_historical_data(self, symbol, sec_type, exchange, currency, existing_df=None):
|
||||||
|
"""
|
||||||
|
Fetch historical data for the given symbol.
|
||||||
|
If existing_df is provided, fetch data after the last date in existing_df.
|
||||||
|
Otherwise, fetch the entire 3 years of data.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
contract = Contract()
|
||||||
|
contract.symbol = symbol
|
||||||
|
contract.secType = sec_type
|
||||||
|
contract.exchange = exchange
|
||||||
|
contract.currency = currency
|
||||||
|
|
||||||
|
bar_size = "1 day" # Set bar size to 1 day for daily data
|
||||||
|
duration = "1 Y" # Fetch 1 year at a time
|
||||||
|
|
||||||
|
if existing_df is not None and not existing_df.empty:
|
||||||
|
# Get the last date from existing data
|
||||||
|
last_date_str = existing_df['Date'].iloc[-1]
|
||||||
|
# Clean up the date string to have single space
|
||||||
|
last_date_str = last_date_str.strip().replace(' ', ' ')
|
||||||
|
# Parse the last date as timezone-aware datetime (assuming UTC)
|
||||||
|
try:
|
||||||
|
# Try parsing in 'YYYYMMDD HH:MM:SS' format
|
||||||
|
last_date = datetime.strptime(last_date_str, "%Y%m%d %H:%M:%S").replace(tzinfo=timezone.utc)
|
||||||
|
except ValueError:
|
||||||
|
try:
|
||||||
|
# If that fails, try 'YYYY-MM-DD HH:MM:SS' format
|
||||||
|
last_date = datetime.strptime(last_date_str, "%Y-%m-%d %H:%M:%S").replace(tzinfo=timezone.utc)
|
||||||
|
except ValueError:
|
||||||
|
print(f"Error parsing last_date_str: {last_date_str}")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Remove any future dates if present
|
||||||
|
current_time = datetime.now(timezone.utc)
|
||||||
|
existing_df = existing_df[existing_df['Date'] <= current_time]
|
||||||
|
print(f"Last valid date after cleaning: {last_date.strftime('%Y-%m-%d %H:%M:%S')}")
|
||||||
|
|
||||||
|
# Fetch new data in yearly chunks
|
||||||
|
# Since we need 3 years of data, and assuming existing_df has some, adjust accordingly
|
||||||
|
# For simplicity, fetch the entire 3 years again
|
||||||
|
# Alternatively, fetch data from last_date forward
|
||||||
|
|
||||||
|
# Here, we'll fetch 3 years of data up to current_date
|
||||||
|
end_date = datetime.now(timezone.utc)
|
||||||
|
start_date = end_date - timedelta(days=365 * 3)
|
||||||
|
self.fetch_historical_data_yearly(symbol, sec_type, exchange, currency, start_date, end_date, bar_size)
|
||||||
|
else:
|
||||||
|
# No existing data, fetch all 3 years
|
||||||
|
end_date = datetime.now(timezone.utc)
|
||||||
|
self.fetch_historical_data_yearly(symbol, sec_type, exchange, currency, end_date - timedelta(days=365*3), end_date, bar_size)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching data: {e}")
|
||||||
|
|
||||||
|
def disconnect(self):
|
||||||
|
self.app.disconnect()
|
||||||
|
print("Disconnected from IB API.")
|
||||||
|
|
||||||
|
def get_user_input():
|
||||||
|
print("Provide the stock details for historical data retrieval.")
|
||||||
|
try:
|
||||||
|
symbol = input("Enter the stock symbol (e.g., 'AAPL'): ").strip().upper()
|
||||||
|
sec_type = "STK" # Automatically set to Stock
|
||||||
|
exchange = "SMART" # Automatically set to SMART routing
|
||||||
|
currency = "USD" # Automatically set to USD
|
||||||
|
|
||||||
|
if not symbol:
|
||||||
|
raise ValueError("Stock symbol is required. Please try again.")
|
||||||
|
|
||||||
|
return symbol, sec_type, exchange, currency
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Input Error: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def graceful_exit(signal_received, frame):
|
||||||
|
print("\nTerminating program...")
|
||||||
|
app.disconnect()
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
# Handle graceful exit on Ctrl+C
|
||||||
|
signal.signal(signal.SIGINT, graceful_exit)
|
||||||
|
|
||||||
|
# Initialize and connect the IBApp
|
||||||
|
app = IBApp()
|
||||||
|
app.connect()
|
||||||
|
|
||||||
|
try:
|
||||||
|
user_input = get_user_input()
|
||||||
|
if user_input:
|
||||||
|
symbol, sec_type, exchange, currency = user_input
|
||||||
|
|
||||||
|
# Define the filename (save directly in current directory)
|
||||||
|
filename = f"{symbol}_3yr_daily_data.csv"
|
||||||
|
|
||||||
|
# Fetch historical data
|
||||||
|
app.fetch_historical_data(symbol, sec_type, exchange, currency)
|
||||||
|
|
||||||
|
# Retrieve fetched data
|
||||||
|
data = app.app.df
|
||||||
|
if not data.empty:
|
||||||
|
print(f"Number of data points fetched: {len(data)}")
|
||||||
|
|
||||||
|
# Clean and parse the 'Date' column
|
||||||
|
# Attempt multiple formats
|
||||||
|
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
|
||||||
|
|
||||||
|
# Check if timezone is present; if not, localize to UTC
|
||||||
|
if data['Date'].dt.tz is None:
|
||||||
|
data['Date'] = data['Date'].dt.tz_localize(timezone.utc, ambiguous='NaT', nonexistent='NaT')
|
||||||
|
|
||||||
|
# Remove any rows with NaT in 'Date'
|
||||||
|
data.dropna(subset=['Date'], inplace=True)
|
||||||
|
|
||||||
|
# Sort by 'Date' ascending
|
||||||
|
data.sort_values(by='Date', inplace=True)
|
||||||
|
|
||||||
|
# Reset index
|
||||||
|
data.reset_index(drop=True, inplace=True)
|
||||||
|
|
||||||
|
# Save to CSV
|
||||||
|
data.to_csv(filename, index=False)
|
||||||
|
print(f"Data saved to {filename}.")
|
||||||
|
print(data.head())
|
||||||
|
else:
|
||||||
|
print("No data retrieved.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
finally:
|
||||||
|
app.disconnect()
|
||||||
144
src/griffin-stuff/data_collection_min.py
Normal file
144
src/griffin-stuff/data_collection_min.py
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
import signal
|
||||||
|
from ibapi.client import EClient
|
||||||
|
from ibapi.wrapper import EWrapper
|
||||||
|
from ibapi.contract import Contract
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
from tqdm import tqdm # For progress bar
|
||||||
|
|
||||||
|
|
||||||
|
class IBApi(EWrapper, EClient):
|
||||||
|
def __init__(self):
|
||||||
|
EClient.__init__(self, self)
|
||||||
|
self.data = []
|
||||||
|
self.df = pd.DataFrame()
|
||||||
|
self.data_retrieved = False
|
||||||
|
|
||||||
|
def historicalData(self, reqId, bar):
|
||||||
|
self.data.append({
|
||||||
|
"Date": bar.date,
|
||||||
|
"Open": bar.open,
|
||||||
|
"High": bar.high,
|
||||||
|
"Low": bar.low,
|
||||||
|
"Close": bar.close,
|
||||||
|
"Volume": bar.volume
|
||||||
|
})
|
||||||
|
|
||||||
|
def historicalDataEnd(self, reqId, start, end):
|
||||||
|
chunk_df = pd.DataFrame(self.data)
|
||||||
|
self.df = pd.concat([self.df, chunk_df], ignore_index=True)
|
||||||
|
self.data_retrieved = True
|
||||||
|
self.data = []
|
||||||
|
|
||||||
|
|
||||||
|
class IBApp:
|
||||||
|
def __init__(self):
|
||||||
|
self.app = IBApi()
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
self.app.connect("127.0.0.1", 4002, clientId=1)
|
||||||
|
thread = threading.Thread(target=self.run_app, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
def run_app(self):
|
||||||
|
self.app.run()
|
||||||
|
|
||||||
|
def request_data(self, contract, end_date, duration, bar_size):
|
||||||
|
self.app.reqHistoricalData(
|
||||||
|
reqId=1,
|
||||||
|
contract=contract,
|
||||||
|
endDateTime=end_date,
|
||||||
|
durationStr=duration,
|
||||||
|
barSizeSetting=bar_size,
|
||||||
|
whatToShow="TRADES",
|
||||||
|
useRTH=0,
|
||||||
|
formatDate=1,
|
||||||
|
keepUpToDate=False,
|
||||||
|
chartOptions=[]
|
||||||
|
)
|
||||||
|
# Ensure pacing between API calls
|
||||||
|
while not self.app.data_retrieved:
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
def fetch_historical_data(self, symbol, sec_type, exchange, currency):
|
||||||
|
try:
|
||||||
|
contract = Contract()
|
||||||
|
contract.symbol = symbol
|
||||||
|
contract.secType = sec_type
|
||||||
|
contract.exchange = exchange
|
||||||
|
contract.currency = currency
|
||||||
|
|
||||||
|
# Set duration and bar size
|
||||||
|
duration = "1 D" # 1 day chunks
|
||||||
|
bar_size = "5 mins" # 1-minute intervals
|
||||||
|
|
||||||
|
end_date = datetime.now(timezone.utc)
|
||||||
|
start_date = end_date - timedelta(days=365) #Can multiply for more years
|
||||||
|
|
||||||
|
total_days = (end_date - start_date).days
|
||||||
|
with tqdm(total=total_days, desc="Fetching Data", unit="day") as pbar:
|
||||||
|
current_date = end_date
|
||||||
|
while current_date > start_date:
|
||||||
|
end_date_str = current_date.strftime("%Y%m%d %H:%M:%S UTC")
|
||||||
|
try:
|
||||||
|
self.request_data(contract, end_date_str, duration, bar_size)
|
||||||
|
pbar.update(1)
|
||||||
|
time.sleep(5) # Sleep to avoid pacing violations
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching data for {end_date_str}: {e}")
|
||||||
|
current_date -= timedelta(days=1)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching data: {e}")
|
||||||
|
|
||||||
|
def disconnect(self):
|
||||||
|
self.app.disconnect()
|
||||||
|
|
||||||
|
|
||||||
|
def get_user_input():
|
||||||
|
print("Provide the stock details for historical data retrieval.")
|
||||||
|
try:
|
||||||
|
symbol = input("Enter the stock symbol (e.g., 'AAPL'): ").strip().upper()
|
||||||
|
sec_type = "STK" # Automatically set to Stock
|
||||||
|
exchange = "SMART" # Automatically set to SMART routing
|
||||||
|
currency = "USD" # Automatically set to USD
|
||||||
|
|
||||||
|
if not symbol:
|
||||||
|
raise ValueError("Stock symbol is required. Please try again.")
|
||||||
|
|
||||||
|
return symbol, sec_type, exchange, currency
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Input Error: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def graceful_exit(signal_received, frame):
|
||||||
|
print("\nTerminating program...")
|
||||||
|
app.disconnect()
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
signal.signal(signal.SIGINT, graceful_exit)
|
||||||
|
|
||||||
|
app = IBApp()
|
||||||
|
app.connect()
|
||||||
|
|
||||||
|
try:
|
||||||
|
user_input = get_user_input()
|
||||||
|
if user_input:
|
||||||
|
symbol, sec_type, exchange, currency = user_input
|
||||||
|
app.fetch_historical_data(symbol, sec_type, exchange, currency)
|
||||||
|
data = app.app.df
|
||||||
|
if not data.empty:
|
||||||
|
filename = f"{symbol}_1yr_5min_data.csv"
|
||||||
|
data.to_csv(filename, index=False)
|
||||||
|
print(f"Data saved to {filename}.")
|
||||||
|
print(data.head())
|
||||||
|
else:
|
||||||
|
print("No data retrieved.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
finally:
|
||||||
|
app.disconnect()
|
||||||
24
src/griffin-stuff/data_preprocessing.py
Normal file
24
src/griffin-stuff/data_preprocessing.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Define the path to your CSV file
|
||||||
|
csv_file_path = 'C:/Users/gwitt/MidasTechnologies/API/SPY_3yr_5min_data.csv' # Replace with your actual file path
|
||||||
|
df = pd.read_csv(csv_file_path)
|
||||||
|
|
||||||
|
# Step 2: Preprocess the data
|
||||||
|
# Parse the 'Date' column to datetime and set as index
|
||||||
|
df['Date'] = pd.to_datetime(df['Date'].str.strip(), format='%Y%m%d %H:%M:%S')
|
||||||
|
df.set_index('Date', inplace=True)
|
||||||
|
|
||||||
|
# Sort data in chronological order
|
||||||
|
df.sort_index(inplace=True)
|
||||||
|
|
||||||
|
# Handle missing data by forward filling
|
||||||
|
df.ffill(inplace=True)
|
||||||
|
|
||||||
|
# Step 3: Save preprocessed data to a new CSV file
|
||||||
|
preprocessed_file_path = 'SPY_5min_preprocessed.csv' # Replace with your desired path
|
||||||
|
df.to_csv(preprocessed_file_path)
|
||||||
|
|
||||||
|
print(f"Preprocessed data saved to {preprocessed_file_path}")
|
||||||
|
|
||||||
|
|
||||||
145
src/griffin-stuff/options_data_collection.py
Normal file
145
src/griffin-stuff/options_data_collection.py
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
import signal
|
||||||
|
from ibapi.client import EClient
|
||||||
|
from ibapi.wrapper import EWrapper
|
||||||
|
from ibapi.contract import Contract
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from tqdm import tqdm # For progress bar
|
||||||
|
|
||||||
|
|
||||||
|
class IBApi(EWrapper, EClient):
|
||||||
|
def __init__(self):
|
||||||
|
EClient.__init__(self, self)
|
||||||
|
self.data = []
|
||||||
|
self.df = pd.DataFrame()
|
||||||
|
self.data_retrieved = False
|
||||||
|
|
||||||
|
def historicalData(self, reqId, bar):
|
||||||
|
self.data.append({
|
||||||
|
"Date": bar.date,
|
||||||
|
"Open": bar.open,
|
||||||
|
"High": bar.high,
|
||||||
|
"Low": bar.low,
|
||||||
|
"Close": bar.close,
|
||||||
|
"Volume": bar.volume
|
||||||
|
})
|
||||||
|
|
||||||
|
def historicalDataEnd(self, reqId, start, end):
|
||||||
|
chunk_df = pd.DataFrame(self.data)
|
||||||
|
self.df = pd.concat([self.df, chunk_df], ignore_index=True)
|
||||||
|
self.data_retrieved = True
|
||||||
|
self.data = []
|
||||||
|
|
||||||
|
|
||||||
|
class IBApp:
|
||||||
|
def __init__(self):
|
||||||
|
self.app = IBApi()
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
self.app.connect("127.0.0.1", 4002, clientId=1)
|
||||||
|
thread = threading.Thread(target=self.run_app, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
def run_app(self):
|
||||||
|
self.app.run()
|
||||||
|
|
||||||
|
def request_data(self, contract, end_date, duration, bar_size):
|
||||||
|
self.app.reqHistoricalData(
|
||||||
|
reqId=1,
|
||||||
|
contract=contract,
|
||||||
|
endDateTime=end_date,
|
||||||
|
durationStr=duration,
|
||||||
|
barSizeSetting=bar_size,
|
||||||
|
whatToShow="TRADES",
|
||||||
|
useRTH=0,
|
||||||
|
formatDate=1,
|
||||||
|
keepUpToDate=False,
|
||||||
|
chartOptions=[]
|
||||||
|
)
|
||||||
|
# Ensure pacing between API calls
|
||||||
|
while not self.app.data_retrieved:
|
||||||
|
time.sleep(0.1)
|
||||||
|
|
||||||
|
def fetch_options_data(self, symbol, exchange, currency, right, strike, expiry):
|
||||||
|
try:
|
||||||
|
contract = Contract()
|
||||||
|
contract.symbol = symbol
|
||||||
|
contract.secType = "OPT" # Set security type to options
|
||||||
|
contract.exchange = exchange
|
||||||
|
contract.currency = currency
|
||||||
|
contract.right = right # 'C' for Call, 'P' for Put
|
||||||
|
contract.strike = float(strike) # Strike price
|
||||||
|
contract.lastTradeDateOrContractMonth = expiry # Expiry date in YYYYMMDD format
|
||||||
|
|
||||||
|
# Set duration and bar size for options data
|
||||||
|
duration = "1 D" # 1 day chunks
|
||||||
|
bar_size = "1 min" # 1-minute intervals
|
||||||
|
|
||||||
|
end_date = datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
# Since options data typically spans less than a year, we fetch for the expiry
|
||||||
|
with tqdm(total=1, desc=f"Fetching {right} {strike} {expiry} data", unit="contract") as pbar:
|
||||||
|
end_date_str = end_date.strftime("%Y%m%d %H:%M:%S UTC")
|
||||||
|
try:
|
||||||
|
self.request_data(contract, end_date_str, duration, bar_size)
|
||||||
|
pbar.update(1)
|
||||||
|
time.sleep(15) # Sleep to avoid pacing violations
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching data for contract {contract.symbol}: {e}")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching data: {e}")
|
||||||
|
|
||||||
|
def disconnect(self):
|
||||||
|
self.app.disconnect()
|
||||||
|
|
||||||
|
|
||||||
|
def get_user_input():
|
||||||
|
print("Provide the options contract details for data retrieval.")
|
||||||
|
try:
|
||||||
|
symbol = input("Enter the stock symbol (e.g., 'AAPL'): ").strip().upper()
|
||||||
|
exchange = "SMART" # Automatically set to SMART routing
|
||||||
|
currency = "USD" # Automatically set to USD
|
||||||
|
right = input("Enter the option type ('C' for Call, 'P' for Put): ").strip().upper()
|
||||||
|
strike = input("Enter the strike price (e.g., '150'): ").strip()
|
||||||
|
expiry = input("Enter the expiry date (YYYYMMDD): ").strip()
|
||||||
|
|
||||||
|
if not all([symbol, right, strike, expiry]):
|
||||||
|
raise ValueError("All fields are required. Please try again.")
|
||||||
|
|
||||||
|
return symbol, exchange, currency, right, strike, expiry
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Input Error: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def graceful_exit(signal_received, frame):
|
||||||
|
print("\nTerminating program...")
|
||||||
|
app.disconnect()
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
signal.signal(signal.SIGINT, graceful_exit)
|
||||||
|
|
||||||
|
app = IBApp()
|
||||||
|
app.connect()
|
||||||
|
|
||||||
|
try:
|
||||||
|
user_input = get_user_input()
|
||||||
|
if user_input:
|
||||||
|
symbol, exchange, currency, right, strike, expiry = user_input
|
||||||
|
app.fetch_options_data(symbol, exchange, currency, right, strike, expiry)
|
||||||
|
data = app.app.df
|
||||||
|
if not data.empty:
|
||||||
|
filename = f"{symbol}_{strike}_{right}_{expiry}_options_data.csv"
|
||||||
|
data.to_csv(filename, index=False)
|
||||||
|
print(f"Options data saved to {filename}.")
|
||||||
|
print(data.head())
|
||||||
|
else:
|
||||||
|
print("No options data retrieved.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
finally:
|
||||||
|
app.disconnect()
|
||||||
170
src/griffin-stuff/recent_data_pull.py
Normal file
170
src/griffin-stuff/recent_data_pull.py
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
import signal
|
||||||
|
from ibapi.client import EClient
|
||||||
|
from ibapi.wrapper import EWrapper
|
||||||
|
from ibapi.contract import Contract
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime, timedelta, timezone
|
||||||
|
import os
|
||||||
|
|
||||||
|
class IBApi(EWrapper, EClient):
|
||||||
|
def __init__(self):
|
||||||
|
EClient.__init__(self, self)
|
||||||
|
self.data = []
|
||||||
|
self.df = pd.DataFrame()
|
||||||
|
self.data_retrieved = False
|
||||||
|
|
||||||
|
def historicalData(self, reqId, bar):
|
||||||
|
# Debug: Print each received bar
|
||||||
|
print(f"Received bar: Date={bar.date}, Open={bar.open}, High={bar.high}, Low={bar.low}, Close={bar.close}, Volume={bar.volume}")
|
||||||
|
self.data.append({
|
||||||
|
"Date": bar.date,
|
||||||
|
"Open": bar.open,
|
||||||
|
"High": bar.high,
|
||||||
|
"Low": bar.low,
|
||||||
|
"Close": bar.close,
|
||||||
|
"Volume": bar.volume
|
||||||
|
})
|
||||||
|
|
||||||
|
def historicalDataEnd(self, reqId, start, end):
|
||||||
|
# Debug: Indicate end of data reception
|
||||||
|
print(f"HistoricalDataEnd received. Start: {start}, End: {end}. Number of bars fetched: {len(self.data)}")
|
||||||
|
chunk_df = pd.DataFrame(self.data)
|
||||||
|
if not chunk_df.empty:
|
||||||
|
self.df = pd.concat([self.df, chunk_df], ignore_index=True)
|
||||||
|
else:
|
||||||
|
print("No data received in this request.")
|
||||||
|
self.data_retrieved = True
|
||||||
|
self.data = [] # Reset data list for next request
|
||||||
|
|
||||||
|
class IBApp:
|
||||||
|
def __init__(self):
|
||||||
|
self.app = IBApi()
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
# Connect to IB API (ensure IB Gateway or TWS is running)
|
||||||
|
print("Connecting to IB API...")
|
||||||
|
self.app.connect("127.0.0.1", 4002, clientId=1)
|
||||||
|
# Start the API thread
|
||||||
|
thread = threading.Thread(target=self.run_app, daemon=True)
|
||||||
|
thread.start()
|
||||||
|
time.sleep(1) # Allow time for connection
|
||||||
|
print("Connected to IB API.")
|
||||||
|
|
||||||
|
def run_app(self):
|
||||||
|
self.app.run()
|
||||||
|
|
||||||
|
def request_data(self, contract, end_date, duration, bar_size):
|
||||||
|
# Request historical data
|
||||||
|
print(f"Requesting data: endDateTime={end_date}, durationStr={duration}, barSizeSetting={bar_size}")
|
||||||
|
self.app.reqHistoricalData(
|
||||||
|
reqId=1,
|
||||||
|
contract=contract,
|
||||||
|
endDateTime=end_date,
|
||||||
|
durationStr=duration,
|
||||||
|
barSizeSetting=bar_size,
|
||||||
|
whatToShow="TRADES",
|
||||||
|
useRTH=1, # Use regular trading hours
|
||||||
|
formatDate=1,
|
||||||
|
keepUpToDate=False,
|
||||||
|
chartOptions=[]
|
||||||
|
)
|
||||||
|
# Wait until data is retrieved
|
||||||
|
while not self.app.data_retrieved:
|
||||||
|
time.sleep(0.1)
|
||||||
|
self.app.data_retrieved = False # Reset flag for next request
|
||||||
|
|
||||||
|
def fetch_recent_data(self, symbol, sec_type, exchange, currency):
|
||||||
|
try:
|
||||||
|
# Define the contract
|
||||||
|
contract = Contract()
|
||||||
|
contract.symbol = symbol
|
||||||
|
contract.secType = sec_type
|
||||||
|
contract.exchange = exchange
|
||||||
|
contract.currency = currency
|
||||||
|
|
||||||
|
# Set duration and bar size for last 2 days
|
||||||
|
duration = "2 D" # 2 days
|
||||||
|
bar_size = "1 min" # 1-minute intervals
|
||||||
|
|
||||||
|
# Set end_date to now in UTC
|
||||||
|
end_date = datetime.now(timezone.utc)
|
||||||
|
end_date_str = end_date.strftime("%Y%m%d %H:%M:%S UTC")
|
||||||
|
print(f"Fetching data up to {end_date_str} for the last {duration} with bar size {bar_size}")
|
||||||
|
self.request_data(contract, end_date_str, duration, bar_size)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error fetching data: {e}")
|
||||||
|
|
||||||
|
def disconnect(self):
|
||||||
|
self.app.disconnect()
|
||||||
|
print("Disconnected from IB API.")
|
||||||
|
|
||||||
|
def get_user_input():
|
||||||
|
print("Provide the stock details for historical data retrieval.")
|
||||||
|
try:
|
||||||
|
symbol = input("Enter the stock symbol (e.g., 'AAPL'): ").strip().upper()
|
||||||
|
sec_type = "STK" # Automatically set to Stock
|
||||||
|
exchange = "SMART" # Automatically set to SMART routing
|
||||||
|
currency = "USD" # Automatically set to USD
|
||||||
|
|
||||||
|
if not symbol:
|
||||||
|
raise ValueError("Stock symbol is required. Please try again.")
|
||||||
|
|
||||||
|
return symbol, sec_type, exchange, currency
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Input Error: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
def graceful_exit(signal_received, frame):
|
||||||
|
print("\nTerminating program...")
|
||||||
|
app.disconnect()
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
# Handle graceful exit on Ctrl+C
|
||||||
|
signal.signal(signal.SIGINT, graceful_exit)
|
||||||
|
|
||||||
|
# Initialize and connect the IBApp
|
||||||
|
app = IBApp()
|
||||||
|
app.connect()
|
||||||
|
|
||||||
|
try:
|
||||||
|
user_input = get_user_input()
|
||||||
|
if user_input:
|
||||||
|
symbol, sec_type, exchange, currency = user_input
|
||||||
|
|
||||||
|
# Define the filename (save directly in current directory)
|
||||||
|
filename = f"{symbol}_recent_data.csv"
|
||||||
|
|
||||||
|
# Fetch recent data (last 2 days)
|
||||||
|
app.fetch_recent_data(symbol, sec_type, exchange, currency)
|
||||||
|
|
||||||
|
# Retrieve fetched data
|
||||||
|
data = app.app.df
|
||||||
|
if not data.empty:
|
||||||
|
print(f"Number of data points fetched: {len(data)}")
|
||||||
|
# Clean and parse the 'Date' column
|
||||||
|
# Attempt multiple formats
|
||||||
|
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
|
||||||
|
|
||||||
|
# Check if timezone is present; if not, localize to UTC
|
||||||
|
if data['Date'].dt.tz is None:
|
||||||
|
data['Date'] = data['Date'].dt.tz_localize(timezone.utc, ambiguous='NaT', nonexistent='NaT')
|
||||||
|
|
||||||
|
# Remove any rows with NaT in 'Date'
|
||||||
|
data.dropna(subset=['Date'], inplace=True)
|
||||||
|
|
||||||
|
# Sort by 'Date' ascending
|
||||||
|
data.sort_values(by='Date', inplace=True)
|
||||||
|
|
||||||
|
# Save to CSV
|
||||||
|
data.to_csv(filename, index=False)
|
||||||
|
print(f"Data saved to {filename}.")
|
||||||
|
print(data.tail())
|
||||||
|
else:
|
||||||
|
print("No new data fetched.")
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error: {e}")
|
||||||
|
finally:
|
||||||
|
app.disconnect()
|
||||||
119
src/griffin-stuff/technical_ind_calc.py
Normal file
119
src/griffin-stuff/technical_ind_calc.py
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
import ta
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
preprocessed_file_path = 'C:/Users/gwitt/MidasTechnologies/API/spy_1min_preprocessed.csv' # Replace with your file path
|
||||||
|
df = pd.read_csv(preprocessed_file_path, index_col='Date', parse_dates=True)
|
||||||
|
|
||||||
|
# **Trend Indicators**
|
||||||
|
# Simple Moving Averages
|
||||||
|
df['SMA_20'] = ta.trend.sma_indicator(close=df['Close'], window=20)
|
||||||
|
df['SMA_50'] = ta.trend.sma_indicator(close=df['Close'], window=50)
|
||||||
|
df['SMA_200'] = ta.trend.sma_indicator(close=df['Close'], window=200)
|
||||||
|
|
||||||
|
# Exponential Moving Averages
|
||||||
|
df['EMA_20'] = ta.trend.ema_indicator(close=df['Close'], window=20)
|
||||||
|
df['EMA_50'] = ta.trend.ema_indicator(close=df['Close'], window=50)
|
||||||
|
|
||||||
|
# MACD
|
||||||
|
macd = ta.trend.MACD(close=df['Close'], window_slow=26, window_fast=12, window_sign=9)
|
||||||
|
df['MACD'] = macd.macd()
|
||||||
|
df['MACD_Signal'] = macd.macd_signal()
|
||||||
|
df['MACD_Hist'] = macd.macd_diff()
|
||||||
|
|
||||||
|
# ADX
|
||||||
|
df['ADX_14'] = ta.trend.adx(high=df['High'], low=df['Low'], close=df['Close'], window=14)
|
||||||
|
|
||||||
|
# **Momentum Indicators**
|
||||||
|
# RSI
|
||||||
|
df['RSI_14'] = ta.momentum.rsi(close=df['Close'], window=14)
|
||||||
|
|
||||||
|
# Stochastic Oscillator
|
||||||
|
stoch = ta.momentum.StochasticOscillator(high=df['High'], low=df['Low'], close=df['Close'], window=14, smooth_window=3)
|
||||||
|
df['Stoch_%K'] = stoch.stoch()
|
||||||
|
df['Stoch_%D'] = stoch.stoch_signal()
|
||||||
|
|
||||||
|
# Rate of Change
|
||||||
|
df['ROC_10'] = ta.momentum.roc(close=df['Close'], window=10)
|
||||||
|
|
||||||
|
# **Volatility Indicators**
|
||||||
|
# Bollinger Bands
|
||||||
|
bollinger = ta.volatility.BollingerBands(close=df['Close'], window=20, window_dev=2)
|
||||||
|
df['Bollinger_High'] = bollinger.bollinger_hband()
|
||||||
|
df['Bollinger_Low'] = bollinger.bollinger_lband()
|
||||||
|
df['Bollinger_Middle'] = bollinger.bollinger_mavg()
|
||||||
|
|
||||||
|
# Average True Range
|
||||||
|
df['ATR_14'] = ta.volatility.average_true_range(high=df['High'], low=df['Low'], close=df['Close'], window=14)
|
||||||
|
|
||||||
|
# **Volume Indicators**
|
||||||
|
# On-Balance Volume
|
||||||
|
df['OBV'] = ta.volume.on_balance_volume(close=df['Close'], volume=df['Volume'])
|
||||||
|
|
||||||
|
# Volume Weighted Average Price
|
||||||
|
df['VWAP'] = ta.volume.volume_weighted_average_price(high=df['High'], low=df['Low'], close=df['Close'], volume=df['Volume'])
|
||||||
|
|
||||||
|
# Chaikin Money Flow
|
||||||
|
df['CMF_20'] = ta.volume.chaikin_money_flow(high=df['High'], low=df['Low'], close=df['Close'], volume=df['Volume'], window=20)
|
||||||
|
|
||||||
|
# **Composite Indicators**
|
||||||
|
# # Ichimoku Cloud
|
||||||
|
# ichimoku = ta.trend.IchimokuIndicator(high=df['High'], low=df['Low'], close=df['Close'], window1=9, window2=26, window3=52)
|
||||||
|
# df['Ichimoku_A'] = ichimoku.ichimoku_a()
|
||||||
|
# df['Ichimoku_B'] = ichimoku.ichimoku_b()
|
||||||
|
# df['Ichimoku_Base_Line'] = ichimoku.ichimoku_base_line()
|
||||||
|
# df['Ichimoku_Conversion_Line'] = ichimoku.ichimoku_conversion_line()
|
||||||
|
|
||||||
|
# Parabolic SAR
|
||||||
|
df['PSAR'] = ta.trend.psar_up(close=df['Close'], high=df['High'], low=df['Low'], step=0.02, max_step=0.2)
|
||||||
|
|
||||||
|
|
||||||
|
# **Classification Target:** 1 if next minute's close > current close, else 0
|
||||||
|
df['Target_Class'] = (df['Close'].shift(-1) > df['Close']).astype(int)
|
||||||
|
|
||||||
|
# **Regression Target:** Percentage change in close price
|
||||||
|
df['Target_Change'] = ((df['Close'].shift(-1) - df['Close']) / df['Close']) * 100
|
||||||
|
|
||||||
|
# Display targets
|
||||||
|
print("\nTarget Variables:")
|
||||||
|
print(df[['Close', 'Target_Class', 'Target_Change']].head())
|
||||||
|
|
||||||
|
# Define lag periods
|
||||||
|
lag_periods = [1, 2, 3]
|
||||||
|
|
||||||
|
# Create lagged features for key indicators
|
||||||
|
key_indicators = ['RSI_14', 'MACD', 'ADX_14', 'ATR_14', 'OBV', 'CMF_20']
|
||||||
|
|
||||||
|
for indicator in key_indicators:
|
||||||
|
for lag in lag_periods:
|
||||||
|
df[f'{indicator}_lag{lag}'] = df[indicator].shift(lag)
|
||||||
|
|
||||||
|
# Display lagged features
|
||||||
|
print("\nLagged Features:")
|
||||||
|
print(df[[f'RSI_14_lag{lag}' for lag in lag_periods]].head())
|
||||||
|
|
||||||
|
# Rolling mean of RSI over past 5 minutes
|
||||||
|
df['RSI_14_roll_mean_5'] = df['RSI_14'].rolling(window=5).mean()
|
||||||
|
|
||||||
|
# Rolling standard deviation of ATR over past 10 minutes
|
||||||
|
df['ATR_14_roll_std_10'] = df['ATR_14'].rolling(window=10).std()
|
||||||
|
|
||||||
|
# Display rolling features
|
||||||
|
print("\nRolling Features:")
|
||||||
|
print(df[['RSI_14_roll_mean_5', 'ATR_14_roll_std_10']].head())
|
||||||
|
|
||||||
|
# Interaction between MACD and RSI
|
||||||
|
df['MACD_RSI'] = df['MACD'] * df['RSI_14']
|
||||||
|
|
||||||
|
# Interaction between ATR and ADX
|
||||||
|
df['ATR_ADX'] = df['ATR_14'] * df['ADX_14']
|
||||||
|
|
||||||
|
# Display interaction features
|
||||||
|
print("\nInteraction Features:")
|
||||||
|
print(df[['MACD_RSI', 'ATR_ADX']].head())
|
||||||
|
|
||||||
|
|
||||||
|
# Save dataset with technical indicators
|
||||||
|
indicators_file_path = 'C:/Users/gwitt/MidasTechnologies/API/spy_1min_with_indicators.csv' # Replace with your desired path
|
||||||
|
df.to_csv(indicators_file_path)
|
||||||
|
|
||||||
|
print(f"Data with technical indicators saved to {indicators_file_path}")
|
||||||
Reference in New Issue
Block a user