added griffins shit

2024-12-14 12:57:00 -05:00
parent 9deab9e9ad
commit ffc8826580
23 changed files with 1779837 additions and 0 deletions
--- a/src/griffin-stuff/backtester/.config.json.un~
+++ b/src/griffin-stuff/backtester/.config.json.un~
--- a/src/griffin-stuff/backtester/.ipynb_checkpoints/evaluation-checkpoint.py
+++ b/src/griffin-stuff/backtester/.ipynb_checkpoints/evaluation-checkpoint.py
--- a/src/griffin-stuff/backtester/.ipynb_checkpoints/indicators-checkpoint.py
+++ b/src/griffin-stuff/backtester/.ipynb_checkpoints/indicators-checkpoint.py
--- a/src/griffin-stuff/backtester/.ipynb_checkpoints/main-checkpoint.py
+++ b/src/griffin-stuff/backtester/.ipynb_checkpoints/main-checkpoint.py
@@ -0,0 +1,129 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+import json
+import os
+import datetime
+from sklearn.linear_model import LogisticRegression
+from sklearn.metrics import accuracy_score
+
+from indicators import add_indicators
+from strategy import generate_signals
+from backtester import backtest
+from optimizer import parameter_search
+from indicator_sets import indicator_sets
+
+def load_config(config_path="config.json"):
+    with open(config_path, 'r') as f:
+        return json.load(f)
+
+def load_data(data_path):
+    df = pd.read_csv(data_path, parse_dates=['Date'], index_col='Date')
+    df = df.sort_index()
+    required_cols = ['Open','High','Low','Close','Volume']
+    if not all(col in df.columns for col in required_cols):
+        raise ValueError("Data file must contain Date,Open,High,Low,Close,Volume columns.")
+    return df
+
+def visualize_data_with_indicators(df):
+    fig, axes = plt.subplots(4, 1, figsize=(12, 10), sharex=True)
+
+    axes[0].plot(df.index, df['Close'], label='Close', color='black')
+    axes[0].plot(df.index, df['EMA'], label='EMA', color='blue', alpha=0.7)
+    axes[0].set_title('Price and EMA')
+    axes[0].legend()
+
+    axes[1].plot(df.index, df['RSI'], label='RSI', color='green')
+    axes[1].axhline(70, color='red', linestyle='--')
+    axes[1].axhline(30, color='green', linestyle='--')
+    axes[1].set_title('RSI')
+
+    axes[2].plot(df.index, df['MACD'], label='MACD', color='purple')
+    axes[2].axhline(0, color='red', linestyle='--')
+    axes[2].set_title('MACD')
+
+    axes[3].plot(df.index, df['ADX'], label='ADX', color='brown')
+    axes[3].axhline(20, color='grey', linestyle='--')
+    axes[3].axhline(25, color='grey', linestyle='--')
+    axes[3].set_title('ADX')
+
+    plt.tight_layout()
+    plt.show()
+
+def log_results(message, log_file="indicator_test_results.log"):
+    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    with open(log_file, "a") as f:
+        f.write(f"{timestamp} - {message}\n")
+
+def main():
+    config = load_config("config.json")
+    data_path = os.path.join("data", "SPY_5min_preprocessed.csv")
+    df = load_data(data_path)
+
+    # Add core indicators and visualize
+    df = add_indicators(df, config)
+    visualize_data_with_indicators(df)
+
+    # Generate signals and backtest
+    df = generate_signals(df, config)
+    results = backtest(df, config)
+    print("Backtest Results:")
+    print(results)
+
+    # Parameter optimization example
+    param_grid = {
+        "rsi_threshold_bearish": [65, 70, 75],
+        "rsi_threshold_bullish": [25, 30, 35]
+    }
+    best_params, best_performance = parameter_search(df, config, param_grid)
+    print("Best Parameters Found:", best_params)
+    print("Best Performance (Final Equity):", best_performance)
+
+    # Now test multiple indicator sets for classification accuracy
+    log_file = "indicator_test_results.log"
+    with open(log_file, "w") as f:
+        f.write("Indicator Test Results Log\n")
+
+    # Create prediction target: next candle up or down
+    df['Future_Close'] = df['Close'].shift(-1)
+    df['Up_Indicator'] = (df['Future_Close'] > df['Close']).astype(int)
+    df = df.dropna(subset=['Future_Close'])
+
+    train_size = int(len(df)*0.7)
+    df_train = df.iloc[:train_size].copy()
+    df_test = df.iloc[train_size:].copy()
+
+    for set_name, func in indicator_sets.items():
+        # Apply the indicator set to train/test
+        train = df_train.copy()
+        test = df_test.copy()
+
+        train = func(train)
+        test = func(test)
+
+        # Ensure columns align
+        test = test.reindex(columns=train.columns)
+        test = test.dropna()
+        if len(test) == 0 or len(train) == 0:
+            log_results(f"{set_name}: Not enough data after adding indicators.", log_file)
+            continue
+
+        base_cols = ['Open','High','Low','Close','Volume','Future_Close','Up_Indicator']
+        feature_cols = [c for c in train.columns if c not in base_cols]
+
+        X_train = train[feature_cols]
+        y_train = train['Up_Indicator']
+        X_test = test[feature_cols]
+        y_test = test['Up_Indicator']
+
+        # Train a simple logistic regression model
+        model = LogisticRegression(max_iter=1000)
+        model.fit(X_train, y_train)
+        y_pred = model.predict(X_test)
+
+        acc = accuracy_score(y_test, y_pred)
+        result_message = f"{set_name}: Accuracy = {acc:.4f}"
+        print(result_message)
+        log_results(result_message, log_file)
+
+if __name__ == "__main__":
+    main()
--- a/src/griffin-stuff/backtester/pycache/evaluation.cpython-313.pyc
+++ b/src/griffin-stuff/backtester/pycache/evaluation.cpython-313.pyc
--- a/src/griffin-stuff/backtester/pycache/indicators.cpython-313.pyc
+++ b/src/griffin-stuff/backtester/pycache/indicators.cpython-313.pyc
--- a/src/griffin-stuff/backtester/config.json
+++ b/src/griffin-stuff/backtester/config.json
@@ -0,0 +1,32 @@
+{
+	  "data": {
+		      "input_csv": "data/SPY_5min_preprocessed.csv",
+		          "date_column": "Date",
+			      "price_column": "Close",
+			          "high_column": "High",
+				      "low_column": "Low",
+				          "volume_column": "Volume"
+					    },
+					      "indicators": {
+						          "trend": ["SMA", "EMA", "ADX"],
+							      "momentum": ["RSI", "MACD"],
+							          "volatility": ["BollingerBands"],
+								      "volume": ["OBV"], 
+								          "mean_reversion": ["MeanReversionSignal"]
+									    },
+									      "parameters": {
+										          "SMA": {"window": 20},
+											      "EMA": {"window": 20},
+											          "ADX": {"window": 14},
+												      "RSI": {"window": 14, "overbought": 70, "oversold": 30},
+												          "MACD": {"fastperiod": 12, "slowperiod": 26, "signalperiod": 9},
+													      "BollingerBands": {"window": 20, "std_dev": 2},
+													          "OBV": {},
+														      "MeanReversionSignal": {"window": 10}
+														        },
+															  "evaluation": {
+																      "prediction_horizon": 1,
+																          "log_file": "logs/results.log"
+																	    }
+}
+
--- a/src/griffin-stuff/backtester/config.json~
+++ b/src/griffin-stuff/backtester/config.json~
@@ -0,0 +1,32 @@
+{
+	  "data": {
+		      "input_csv": "data/price_data.csv",
+		          "date_column": "Date",
+			      "price_column": "Close",
+			          "high_column": "High",
+				      "low_column": "Low",
+				          "volume_column": "Volume"
+					    },
+					      "indicators": {
+						          "trend": ["SMA", "EMA", "ADX"],
+							      "momentum": ["RSI", "MACD"],
+							          "volatility": ["BollingerBands"],
+								      "volume": ["OBV"], 
+								          "mean_reversion": ["MeanReversionSignal"]
+									    },
+									      "parameters": {
+										          "SMA": {"window": 20},
+											      "EMA": {"window": 20},
+											          "ADX": {"window": 14},
+												      "RSI": {"window": 14, "overbought": 70, "oversold": 30},
+												          "MACD": {"fastperiod": 12, "slowperiod": 26, "signalperiod": 9},
+													      "BollingerBands": {"window": 20, "std_dev": 2},
+													          "OBV": {},
+														      "MeanReversionSignal": {"window": 10}
+														        },
+															  "evaluation": {
+																      "prediction_horizon": 1,
+																          "log_file": "logs/results.log"
+																	    }
+}
+
--- a/src/griffin-stuff/backtester/data/SPY_3yr_1min_data.csv
+++ b/src/griffin-stuff/backtester/data/SPY_3yr_1min_data.csv
--- a/src/griffin-stuff/backtester/data/SPY_3yr_5min_data.csv
+++ b/src/griffin-stuff/backtester/data/SPY_3yr_5min_data.csv
--- a/src/griffin-stuff/backtester/data/SPY_5min_preprocessed.csv
+++ b/src/griffin-stuff/backtester/data/SPY_5min_preprocessed.csv
--- a/src/griffin-stuff/backtester/data/spy_1min_preprocessed.csv
+++ b/src/griffin-stuff/backtester/data/spy_1min_preprocessed.csv
--- a/src/griffin-stuff/backtester/evaluation.py
+++ b/src/griffin-stuff/backtester/evaluation.py
@@ -0,0 +1,32 @@
+import numpy as np
+
+def evaluate_indicator_accuracy(df, price_col="Close", horizon=1):
+    """
+    Evaluate how often indicator signals predict the correct next-day price direction.
+    
+    Logic:
+    - If signal[i] = 1 (bullish), correct if price[i+horizon] > price[i].
+    - If signal[i] = -1 (bearish), correct if price[i+horizon] < price[i].
+    - If signal[i] = 0, skip.
+    """
+    correct = 0
+    total = 0
+    
+    for i in range(len(df) - horizon):
+        sig = df['signal'].iloc[i]
+        if sig == 0:
+            continue
+        future_price = df[price_col].iloc[i + horizon]
+        current_price = df[price_col].iloc[i]
+        
+        if sig == 1 and future_price > current_price:
+            correct += 1
+        elif sig == -1 and future_price < current_price:
+            correct += 1
+        
+        if sig != 0:
+            total += 1
+    
+    if total == 0:
+        return np.nan  # No signals generated
+    return correct / total
--- a/src/griffin-stuff/backtester/indicators.py
+++ b/src/griffin-stuff/backtester/indicators.py
@@ -0,0 +1,96 @@
+import pandas as pd
+import numpy as np
+import ta
+
+def calculate_indicator_signals(df, indicator_name, params, price_col="Close", high_col="High", low_col="Low", volume_col="Volume"):
+    """
+    Calculates indicator values and generates signals:
+    Signal Convention: 1 = Bullish Prediction, -1 = Bearish Prediction, 0 = Neutral
+    """
+    if price_col not in df.columns:
+        raise ValueError(f"{price_col} column not found in the dataframe.")
+    
+    if indicator_name == "SMA":
+        # Trend: price > SMA => bullish, else bearish
+        window = params.get("window", 20)
+        df['SMA'] = df[price_col].rolling(window).mean()
+        df['signal'] = np.where(df[price_col] > df['SMA'], 1, -1)
+    
+    elif indicator_name == "EMA":
+        # Trend: price > EMA => bullish, else bearish
+        window = params.get("window", 20)
+        df['EMA'] = df[price_col].ewm(span=window, adjust=False).mean()
+        df['signal'] = np.where(df[price_col] > df['EMA'], 1, -1)
+    
+    elif indicator_name == "ADX":
+        # Trend: use ADXIndicator
+        if high_col not in df.columns or low_col not in df.columns:
+            raise ValueError("ADX calculation requires 'High' and 'Low' columns.")
+        window = params.get("window", 14)
+        adx_indicator = ta.trend.ADXIndicator(high=df[high_col], low=df[low_col], close=df[price_col], window=window)
+        df['ADX'] = adx_indicator.adx()
+        df['DIP'] = adx_indicator.adx_pos()  # +DI
+        df['DIN'] = adx_indicator.adx_neg()  # -DI
+        
+        # If ADX > 25 and DI+ > DI- => bullish
+        # If ADX > 25 and DI- > DI+ => bearish
+        # Otherwise => no strong signal
+        df['signal'] = 0
+        trending_up = (df['DIP'] > df['DIN']) & (df['ADX'] > 25)
+        trending_down = (df['DIN'] > df['DIP']) & (df['ADX'] > 25)
+        df.loc[trending_up, 'signal'] = 1
+        df.loc[trending_down, 'signal'] = -1
+    
+    elif indicator_name == "RSI":
+        # Momentum: RSI > overbought => bearish, RSI < oversold => bullish
+        window = params.get("window", 14)
+        overbought = params.get("overbought", 70)
+        oversold = params.get("oversold", 30)
+        df['RSI'] = ta.momentum.rsi(df[price_col], window=window)
+        conditions = [
+            (df['RSI'] > overbought),
+            (df['RSI'] < oversold)
+        ]
+        values = [-1, 1]
+        df['signal'] = np.select(conditions, values, default=0)
+    
+    elif indicator_name == "MACD":
+        # Momentum: MACD line > Signal line => bullish, else bearish
+        fastperiod = params.get("fastperiod", 12)
+        slowperiod = params.get("slowperiod", 26)
+        signalperiod = params.get("signalperiod", 9)
+        macd = ta.trend.MACD(df[price_col], window_slow=slowperiod, window_fast=fastperiod, window_sign=signalperiod)
+        df['MACD'] = macd.macd()
+        df['MACD_Signal'] = macd.macd_signal()
+        
+        df['signal'] = np.where(df['MACD'] > df['MACD_Signal'], 1, -1)
+    
+    elif indicator_name == "BollingerBands":
+        # Volatility: price near upper band => bearish, near lower band => bullish
+        window = params.get("window", 20)
+        std_dev = params.get("std_dev", 2)
+        bb = ta.volatility.BollingerBands(df[price_col], window=window, window_dev=std_dev)
+        df['BB_High'] = bb.bollinger_hband()
+        df['BB_Low'] = bb.bollinger_lband()
+        df['signal'] = np.where(df[price_col] >= df['BB_High'], -1,
+                        np.where(df[price_col] <= df['BB_Low'], 1, 0))
+    
+    elif indicator_name == "OBV":
+        # Volume: Rising OBV => bullish, falling OBV => bearish
+        if volume_col not in df.columns:
+            raise ValueError(f"OBV calculation requires '{volume_col}' column.")
+        df['OBV'] = ta.volume.on_balance_volume(df[price_col], df[volume_col])
+        df['OBV_Change'] = df['OBV'].diff()
+        df['signal'] = np.where(df['OBV_Change'] > 0, 1, np.where(df['OBV_Change'] < 0, -1, 0))
+    
+    elif indicator_name == "MeanReversionSignal":
+        # Mean Reversion: price > mean => bearish, price < mean => bullish
+        window = params.get("window", 10)
+        df['mean'] = df[price_col].rolling(window).mean()
+        df['signal'] = np.where(df[price_col] > df['mean'], -1, 
+                        np.where(df[price_col] < df['mean'], 1, 0))
+    
+    else:
+        raise ValueError(f"Unknown indicator: {indicator_name}")
+    
+    return df
--- a/src/griffin-stuff/backtester/logs/.ipynb_checkpoints/results-checkpoint.log
+++ b/src/griffin-stuff/backtester/logs/.ipynb_checkpoints/results-checkpoint.log
@@ -0,0 +1,24 @@
+2024-12-13 22:07:39,152 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
+2024-12-13 22:07:40,855 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
+2024-12-13 22:10:04,274 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
+2024-12-13 22:10:05,997 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
+2024-12-13 22:10:07,745 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
+2024-12-13 22:10:08,484 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
+2024-12-13 22:10:09,096 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
+2024-12-13 22:10:11,937 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
+2024-12-13 22:10:15,386 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330
+2024-12-13 22:12:44,520 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
+2024-12-13 22:12:45,874 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
+2024-12-13 22:12:47,913 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
+2024-12-13 22:12:48,530 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
+2024-12-13 22:12:49,173 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
+2024-12-13 22:12:51,230 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
+2024-12-13 22:12:54,504 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330
+2024-12-13 22:23:17,293 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
+2024-12-13 22:23:18,087 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
+2024-12-13 22:23:19,409 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
+2024-12-13 22:23:19,797 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
+2024-12-13 22:23:20,669 - INFO - Category: momentum, Indicator: MACD, Accuracy: 0.3184
+2024-12-13 22:23:20,993 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
+2024-12-13 22:23:21,786 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
+2024-12-13 22:23:22,678 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330
--- a/src/griffin-stuff/backtester/logs/results.log
+++ b/src/griffin-stuff/backtester/logs/results.log
@@ -0,0 +1,8 @@
+2024-12-13 22:23:17,293 - INFO - Category: trend, Indicator: SMA, Accuracy: 0.3166
+2024-12-13 22:23:18,087 - INFO - Category: trend, Indicator: EMA, Accuracy: 0.3160
+2024-12-13 22:23:19,409 - INFO - Category: trend, Indicator: ADX, Accuracy: 0.2696
+2024-12-13 22:23:19,797 - INFO - Category: momentum, Indicator: RSI, Accuracy: 0.2495
+2024-12-13 22:23:20,669 - INFO - Category: momentum, Indicator: MACD, Accuracy: 0.3184
+2024-12-13 22:23:20,993 - INFO - Category: volatility, Indicator: BollingerBands, Accuracy: 0.3114
+2024-12-13 22:23:21,786 - INFO - Category: volume, Indicator: OBV, Accuracy: 0.3167
+2024-12-13 22:23:22,678 - INFO - Category: mean_reversion, Indicator: MeanReversionSignal, Accuracy: 0.3330
--- a/src/griffin-stuff/backtester/main.py
+++ b/src/griffin-stuff/backtester/main.py
@@ -0,0 +1,65 @@
+import json
+import logging
+import pandas as pd
+import os
+
+from indicators import calculate_indicator_signals
+from evaluation import evaluate_indicator_accuracy
+
+def setup_logging(log_path):
+    os.makedirs(os.path.dirname(log_path), exist_ok=True)
+    logging.basicConfig(
+        filename=log_path,
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s'
+    )
+
+def load_config(config_path="config.json"):
+    with open(config_path, 'r') as f:
+        config = json.load(f)
+    return config
+
+def load_data(csv_path, date_col, price_col):
+    df = pd.read_csv(csv_path)
+    df[date_col] = pd.to_datetime(df[date_col])
+    df = df.sort_values(date_col).reset_index(drop=True)
+    df = df.dropna(subset=[date_col, price_col])
+    return df
+
+if __name__ == "__main__":
+    config = load_config("config.json")
+    setup_logging(config["evaluation"]["log_file"])
+    
+    # Load data
+    df = load_data(config["data"]["input_csv"], 
+                   config["data"]["date_column"], 
+                   config["data"]["price_column"])
+    
+    # Calculate indicators and signals, evaluate accuracy
+    all_results = []
+    for category, indicators in config["indicators"].items():
+        for ind_name in indicators:
+            params = config["parameters"].get(ind_name, {})
+            
+            signals_df = calculate_indicator_signals(
+                df.copy(),
+                indicator_name=ind_name,
+                params=params,
+                price_col=config["data"]["price_column"],
+                high_col=config["data"]["high_column"],
+                low_col=config["data"]["low_column"],
+                volume_col=config["data"]["volume_column"]
+            )
+            
+            accuracy = evaluate_indicator_accuracy(
+                signals_df, 
+                price_col=config["data"]["price_column"],
+                horizon=config["evaluation"]["prediction_horizon"]
+            )
+            
+            logging.info(f"Category: {category}, Indicator: {ind_name}, Accuracy: {accuracy:.4f}")
+            all_results.append((category, ind_name, accuracy))
+    
+    # Print results to console as well
+    for category, ind_name, acc in all_results:
+        print(f"Category: {category}, Indicator: {ind_name}, Accuracy: {acc:.4f}")