73 lines
3.0 KiB
Python
73 lines
3.0 KiB
Python
# midas/feature_engineer.py
|
|
import numpy as np
|
|
import pandas as pd
|
|
from sklearn.preprocessing import RobustScaler
|
|
from typing import Dict
|
|
|
|
class FeatureEngineer:
|
|
def __init__(self, config: Dict):
|
|
self.config = config
|
|
self.scaler = RobustScaler() # Handles outliers
|
|
|
|
def _calculate_obv(self, df: pd.DataFrame) -> pd.Series:
|
|
"""On-Balance Volume"""
|
|
obv = (np.sign(df['close'].diff()) * df['volume']).fillna(0).cumsum()
|
|
return obv.pct_change(periods=14) # Normalized OBV
|
|
|
|
def calculate_features(self, data: Dict[str, pd.DataFrame]) -> pd.DataFrame:
|
|
all_features = []
|
|
|
|
for ticker, df in data.items():
|
|
features = pd.DataFrame(index=df.index)
|
|
|
|
# Price-based features
|
|
if 'returns' in self.config['features']:
|
|
features['returns'] = np.log(df['close']).diff()
|
|
|
|
if 'volatility' in self.config['features']:
|
|
features['volatility'] = features['returns'].rolling(20).std() * np.sqrt(252)
|
|
|
|
if 'rsi' in self.config['features']:
|
|
delta = df['close'].diff()
|
|
gain = delta.where(delta > 0, 0).rolling(14).mean()
|
|
loss = -delta.where(delta < 0, 0).rolling(14).mean()
|
|
features['rsi'] = 100 - (100 / (1 + (gain / loss)))
|
|
|
|
if 'macd' in self.config['features']:
|
|
ema12 = df['close'].ewm(span=12, adjust=False).mean()
|
|
ema26 = df['close'].ewm(span=26, adjust=False).mean()
|
|
features['macd'] = ema12 - ema26
|
|
|
|
if 'atr' in self.config['features']:
|
|
high_low = df['high'] - df['low']
|
|
high_close = (df['high'] - df['close'].shift()).abs()
|
|
low_close = (df['low'] - df['close'].shift()).abs()
|
|
tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
|
|
features['atr'] = tr.rolling(14).mean() / df['close']
|
|
|
|
if 'volume_change' in self.config['features']:
|
|
features['volume_change'] = np.log(df['volume'] / df['volume'].shift(1))
|
|
|
|
if 'obv' in self.config['features']:
|
|
features['obv'] = self._calculate_obv(df)
|
|
|
|
# Add ticker identifier if combining
|
|
if self.config['combine_tickers']:
|
|
features['ticker'] = ticker
|
|
|
|
all_features.append(features.dropna())
|
|
|
|
combined = pd.concat(all_features).sort_index()
|
|
|
|
# Encode tickers if combining
|
|
if self.config['combine_tickers']:
|
|
combined = pd.get_dummies(combined, columns=['ticker'], prefix='', prefix_sep='')
|
|
|
|
# Scale features
|
|
scaled = pd.DataFrame(
|
|
self.scaler.fit_transform(combined),
|
|
index=combined.index,
|
|
columns=combined.columns
|
|
)
|
|
return scaled
|