added griffins shit

This commit is contained in:
klein panic
2024-12-14 12:57:00 -05:00
parent 9deab9e9ad
commit ffc8826580
23 changed files with 1779837 additions and 0 deletions

Binary file not shown.

View File

@@ -0,0 +1,129 @@
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
import datetime
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from indicators import add_indicators
from strategy import generate_signals
from backtester import backtest
from optimizer import parameter_search
from indicator_sets import indicator_sets
def load_config(config_path="config.json"):
with open(config_path, 'r') as f:
return json.load(f)
def load_data(data_path):
df = pd.read_csv(data_path, parse_dates=['Date'], index_col='Date')
df = df.sort_index()
required_cols = ['Open','High','Low','Close','Volume']
if not all(col in df.columns for col in required_cols):
raise ValueError("Data file must contain Date,Open,High,Low,Close,Volume columns.")
return df
def visualize_data_with_indicators(df):
fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
axes[0].plot(df.index, df['Close'], label='Close', color='black')
axes[0].plot(df.index, df['EMA'], label='EMA', color='blue', alpha=0.7)
axes[0].set_title('Price and EMA')
axes[0].legend()
axes[1].plot(df.index, df['RSI'], label='RSI', color='green')
axes[1].axhline(70, color='red', linestyle='--')
axes[1].axhline(30, color='green', linestyle='--')
axes[1].set_title('RSI')
axes[2].plot(df.index, df['MACD'], label='MACD', color='purple')
axes[2].axhline(0, color='red', linestyle='--')
axes[2].set_title('MACD')
axes[3].plot(df.index, df['ADX'], label='ADX', color='brown')
axes[3].axhline(20, color='grey', linestyle='--')
axes[3].axhline(25, color='grey', linestyle='--')
axes[3].set_title('ADX')
plt.tight_layout()
plt.show()
def log_results(message, log_file="indicator_test_results.log"):
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
with open(log_file, "a") as f:
f.write(f"{timestamp} - {message}\n")
def main():
config = load_config("config.json")
data_path = os.path.join("data", "SPY_5min_preprocessed.csv")
df = load_data(data_path)
# Add core indicators and visualize
df = add_indicators(df, config)
visualize_data_with_indicators(df)
# Generate signals and backtest
df = generate_signals(df, config)
results = backtest(df, config)
print("Backtest Results:")
print(results)
# Parameter optimization example
param_grid = {
"rsi_threshold_bearish": [65, 70, 75],
"rsi_threshold_bullish": [25, 30, 35]
}
best_params, best_performance = parameter_search(df, config, param_grid)
print("Best Parameters Found:", best_params)
print("Best Performance (Final Equity):", best_performance)
# Now test multiple indicator sets for classification accuracy
log_file = "indicator_test_results.log"
with open(log_file, "w") as f:
f.write("Indicator Test Results Log\n")
# Create prediction target: next candle up or down
df['Future_Close'] = df['Close'].shift(-1)
df['Up_Indicator'] = (df['Future_Close'] > df['Close']).astype(int)
df = df.dropna(subset=['Future_Close'])
train_size = int(len(df)*0.7)
df_train = df.iloc[:train_size].copy()
df_test = df.iloc[train_size:].copy()
for set_name, func in indicator_sets.items():
# Apply the indicator set to train/test
train = df_train.copy()
test = df_test.copy()
train = func(train)
test = func(test)
# Ensure columns align
test = test.reindex(columns=train.columns)
test = test.dropna()
if len(test) == 0 or len(train) == 0:
log_results(f"{set_name}: Not enough data after adding indicators.", log_file)
continue
base_cols = ['Open','High','Low','Close','Volume','Future_Close','Up_Indicator']
feature_cols = [c for c in train.columns if c not in base_cols]
X_train = train[feature_cols]
y_train = train['Up_Indicator']
X_test = test[feature_cols]
y_test = test['Up_Indicator']
# Train a simple logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)
result_message = f"{set_name}: Accuracy = {acc:.4f}"
print(result_message)
log_results(result_message, log_file)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,32 @@
{
"data": {
"input_csv": "data/SPY_5min_preprocessed.csv",
"date_column": "Date",
"price_column": "Close",
"high_column": "High",
"low_column": "Low",
"volume_column": "Volume"
},
"indicators": {
"trend": ["SMA", "EMA", "ADX"],
"momentum": ["RSI", "MACD"],
"volatility": ["BollingerBands"],
"volume": ["OBV"],
"mean_reversion": ["MeanReversionSignal"]
},
"parameters": {
"SMA": {"window": 20},
"EMA": {"window": 20},
"ADX": {"window": 14},
"RSI": {"window": 14, "overbought": 70, "oversold": 30},
"MACD": {"fastperiod": 12, "slowperiod": 26, "signalperiod": 9},
"BollingerBands": {"window": 20, "std_dev": 2},
"OBV": {},
"MeanReversionSignal": {"window": 10}
},
"evaluation": {
"prediction_horizon": 1,
"log_file": "logs/results.log"
}
}

View File

@@ -0,0 +1,32 @@
{
"data": {
"input_csv": "data/price_data.csv",
"date_column": "Date",
"price_column": "Close",
"high_column": "High",
"low_column": "Low",
"volume_column": "Volume"
},
"indicators": {
"trend": ["SMA", "EMA", "ADX"],
"momentum": ["RSI", "MACD"],
"volatility": ["BollingerBands"],
"volume": ["OBV"],
"mean_reversion": ["MeanReversionSignal"]
},
"parameters": {
"SMA": {"window": 20},
"EMA": {"window": 20},
"ADX": {"window": 14},
"RSI": {"window": 14, "overbought": 70, "oversold": 30},
"MACD": {"fastperiod": 12, "slowperiod": 26, "signalperiod": 9},
"BollingerBands": {"window": 20, "std_dev": 2},
"OBV": {},
"MeanReversionSignal": {"window": 10}
},
"evaluation": {
"prediction_horizon": 1,
"log_file": "logs/results.log"
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,32 @@
import numpy as np
def evaluate_indicator_accuracy(df, price_col="Close", horizon=1):
"""
Evaluate how often indicator signals predict the correct next-day price direction.
Logic:
- If signal[i] = 1 (bullish), correct if price[i+horizon] > price[i].
- If signal[i] = -1 (bearish), correct if price[i+horizon] < price[i].
- If signal[i] = 0, skip.
"""
correct = 0
total = 0
for i in range(len(df) - horizon):
sig = df['signal'].iloc[i]
if sig == 0:
continue
future_price = df[price_col].iloc[i + horizon]
current_price = df[price_col].iloc[i]
if sig == 1 and future_price > current_price:
correct += 1
elif sig == -1 and future_price < current_price:
correct += 1
if sig != 0:
total += 1
if total == 0:
return np.nan # No signals generated
return correct / total

View File

@@ -0,0 +1,96 @@
import pandas as pd
import numpy as np
import ta
def calculate_indicator_signals(df, indicator_name, params, price_col="Close", high_col="High", low_col="Low", volume_col="Volume"):
"""
Calculates indicator values and generates signals:
Signal Convention: 1 = Bullish Prediction, -1 = Bearish Prediction, 0 = Neutral
"""
if price_col not in df.columns:
raise ValueError(f"{price_col} column not found in the dataframe.")
if indicator_name == "SMA":
# Trend: price > SMA => bullish, else bearish
window = params.get("window", 20)
df['SMA'] = df[price_col].rolling(window).mean()
df['signal'] = np.where(df[price_col] > df['SMA'], 1, -1)
elif indicator_name == "EMA":
# Trend: price > EMA => bullish, else bearish
window = params.get("window", 20)
df['EMA'] = df[price_col].ewm(span=window, adjust=False).mean()
df['signal'] = np.where(df[price_col] > df['EMA'], 1, -1)
elif indicator_name == "ADX":
# Trend: use ADXIndicator
if high_col not in df.columns or low_col not in df.columns:
raise ValueError("ADX calculation requires 'High' and 'Low' columns.")
window = params.get("window", 14)
adx_indicator = ta.trend.ADXIndicator(high=df[high_col], low=df[low_col], close=df[price_col], window=window)
df['ADX'] = adx_indicator.adx()
df['DIP'] = adx_indicator.adx_pos() # +DI
df['DIN'] = adx_indicator.adx_neg() # -DI
# If ADX > 25 and DI+ > DI- => bullish
# If ADX > 25 and DI- > DI+ => bearish
# Otherwise => no strong signal
df['signal'] = 0
trending_up = (df['DIP'] > df['DIN']) & (df['ADX'] > 25)
trending_down = (df['DIN'] > df['DIP']) & (df['ADX'] > 25)
df.loc[trending_up, 'signal'] = 1
df.loc[trending_down, 'signal'] = -1
elif indicator_name == "RSI":
# Momentum: RSI > overbought => bearish, RSI < oversold => bullish
window = params.get("window", 14)
overbought = params.get("overbought", 70)
oversold = params.get("oversold", 30)
df['RSI'] = ta.momentum.rsi(df[price_col], window=window)
conditions = [
(df['RSI'] > overbought),
(df['RSI'] < oversold)
]
values = [-1, 1]
df['signal'] = np.select(conditions, values, default=0)
elif indicator_name == "MACD":
# Momentum: MACD line > Signal line => bullish, else bearish
fastperiod = params.get("fastperiod", 12)
slowperiod = params.get("slowperiod", 26)
signalperiod = params.get("signalperiod", 9)
macd = ta.trend.MACD(df[price_col], window_slow=slowperiod, window_fast=fastperiod, window_sign=signalperiod)
df['MACD'] = macd.macd()
df['MACD_Signal'] = macd.macd_signal()
df['signal'] = np.where(df['MACD'] > df['MACD_Signal'], 1, -1)
elif indicator_name == "BollingerBands":
# Volatility: price near upper band => bearish, near lower band => bullish
window = params.get("window", 20)
std_dev = params.get("std_dev", 2)
bb = ta.volatility.BollingerBands(df[price_col], window=window, window_dev=std_dev)
df['BB_High'] = bb.bollinger_hband()
df['BB_Low'] = bb.bollinger_lband()
df['signal'] = np.where(df[price_col] >= df['BB_High'], -1,
np.where(df[price_col] <= df['BB_Low'], 1, 0))
elif indicator_name == "OBV":
# Volume: Rising OBV => bullish, falling OBV => bearish
if volume_col not in df.columns:
raise ValueError(f"OBV calculation requires '{volume_col}' column.")
df['OBV'] = ta.volume.on_balance_volume(df[price_col], df[volume_col])
df['OBV_Change'] = df['OBV'].diff()
df['signal'] = np.where(df['OBV_Change'] > 0, 1, np.where(df['OBV_Change'] < 0, -1, 0))
elif indicator_name == "MeanReversionSignal":
# Mean Reversion: price > mean => bearish, price < mean => bullish
window = params.get("window", 10)
df['mean'] = df[price_col].rolling(window).mean()
df['signal'] = np.where(df[price_col] > df['mean'], -1,
np.where(df[price_col] < df['mean'], 1, 0))
else:
raise ValueError(f"Unknown indicator: {indicator_name}")
return df

View File

@@ -0,0 +1,24 @@
2024-12-13 22:07:39,152 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
2024-12-13 22:07:40,855 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
2024-12-13 22:10:04,274 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
2024-12-13 22:10:05,997 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
2024-12-13 22:10:07,745 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
2024-12-13 22:10:08,484 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
2024-12-13 22:10:09,096 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
2024-12-13 22:10:11,937 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
2024-12-13 22:10:15,386 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330
2024-12-13 22:12:44,520 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
2024-12-13 22:12:45,874 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
2024-12-13 22:12:47,913 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
2024-12-13 22:12:48,530 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
2024-12-13 22:12:49,173 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
2024-12-13 22:12:51,230 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
2024-12-13 22:12:54,504 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330
2024-12-13 22:23:17,293 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
2024-12-13 22:23:18,087 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
2024-12-13 22:23:19,409 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
2024-12-13 22:23:19,797 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
2024-12-13 22:23:20,669 - INFO - Category: momentum, Indicator: MACD, Accuracy: 0.3184
2024-12-13 22:23:20,993 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
2024-12-13 22:23:21,786 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
2024-12-13 22:23:22,678 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330

View File

@@ -0,0 +1,8 @@
2024-12-13 22:23:17,293 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
2024-12-13 22:23:18,087 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
2024-12-13 22:23:19,409 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
2024-12-13 22:23:19,797 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
2024-12-13 22:23:20,669 - INFO - Category: momentum, Indicator: MACD, Accuracy: 0.3184
2024-12-13 22:23:20,993 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
2024-12-13 22:23:21,786 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
2024-12-13 22:23:22,678 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330

View File

@@ -0,0 +1,65 @@
import json
import logging
import pandas as pd
import os
from indicators import calculate_indicator_signals
from evaluation import evaluate_indicator_accuracy
def setup_logging(log_path):
os.makedirs(os.path.dirname(log_path), exist_ok=True)
logging.basicConfig(
filename=log_path,
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
def load_config(config_path="config.json"):
with open(config_path, 'r') as f:
config = json.load(f)
return config
def load_data(csv_path, date_col, price_col):
df = pd.read_csv(csv_path)
df[date_col] = pd.to_datetime(df[date_col])
df = df.sort_values(date_col).reset_index(drop=True)
df = df.dropna(subset=[date_col, price_col])
return df
if __name__ == "__main__":
config = load_config("config.json")
setup_logging(config["evaluation"]["log_file"])
# Load data
df = load_data(config["data"]["input_csv"],
config["data"]["date_column"],
config["data"]["price_column"])
# Calculate indicators and signals, evaluate accuracy
all_results = []
for category, indicators in config["indicators"].items():
for ind_name in indicators:
params = config["parameters"].get(ind_name, {})
signals_df = calculate_indicator_signals(
df.copy(),
indicator_name=ind_name,
params=params,
price_col=config["data"]["price_column"],
high_col=config["data"]["high_column"],
low_col=config["data"]["low_column"],
volume_col=config["data"]["volume_column"]
)
accuracy = evaluate_indicator_accuracy(
signals_df,
price_col=config["data"]["price_column"],
horizon=config["evaluation"]["prediction_horizon"]
)
logging.info(f"Category: {category}, Indicator: {ind_name}, Accuracy: {accuracy:.4f}")
all_results.append((category, ind_name, accuracy))
# Print results to console as well
for category, ind_name, acc in all_results:
print(f"Category: {category}, Indicator: {ind_name}, Accuracy: {acc:.4f}")