Starter Code
This commit is contained in:
72
MidasHMM/hmm/midas/feature_engineer.py
Normal file
72
MidasHMM/hmm/midas/feature_engineer.py
Normal file
@@ -0,0 +1,72 @@
|
||||
# midas/feature_engineer.py
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.preprocessing import RobustScaler
|
||||
from typing import Dict
|
||||
|
||||
class FeatureEngineer:
|
||||
def __init__(self, config: Dict):
|
||||
self.config = config
|
||||
self.scaler = RobustScaler() # Handles outliers
|
||||
|
||||
def _calculate_obv(self, df: pd.DataFrame) -> pd.Series:
|
||||
"""On-Balance Volume"""
|
||||
obv = (np.sign(df['close'].diff()) * df['volume']).fillna(0).cumsum()
|
||||
return obv.pct_change(periods=14) # Normalized OBV
|
||||
|
||||
def calculate_features(self, data: Dict[str, pd.DataFrame]) -> pd.DataFrame:
|
||||
all_features = []
|
||||
|
||||
for ticker, df in data.items():
|
||||
features = pd.DataFrame(index=df.index)
|
||||
|
||||
# Price-based features
|
||||
if 'returns' in self.config['features']:
|
||||
features['returns'] = np.log(df['close']).diff()
|
||||
|
||||
if 'volatility' in self.config['features']:
|
||||
features['volatility'] = features['returns'].rolling(20).std() * np.sqrt(252)
|
||||
|
||||
if 'rsi' in self.config['features']:
|
||||
delta = df['close'].diff()
|
||||
gain = delta.where(delta > 0, 0).rolling(14).mean()
|
||||
loss = -delta.where(delta < 0, 0).rolling(14).mean()
|
||||
features['rsi'] = 100 - (100 / (1 + (gain / loss)))
|
||||
|
||||
if 'macd' in self.config['features']:
|
||||
ema12 = df['close'].ewm(span=12, adjust=False).mean()
|
||||
ema26 = df['close'].ewm(span=26, adjust=False).mean()
|
||||
features['macd'] = ema12 - ema26
|
||||
|
||||
if 'atr' in self.config['features']:
|
||||
high_low = df['high'] - df['low']
|
||||
high_close = (df['high'] - df['close'].shift()).abs()
|
||||
low_close = (df['low'] - df['close'].shift()).abs()
|
||||
tr = pd.concat([high_low, high_close, low_close], axis=1).max(axis=1)
|
||||
features['atr'] = tr.rolling(14).mean() / df['close']
|
||||
|
||||
if 'volume_change' in self.config['features']:
|
||||
features['volume_change'] = np.log(df['volume'] / df['volume'].shift(1))
|
||||
|
||||
if 'obv' in self.config['features']:
|
||||
features['obv'] = self._calculate_obv(df)
|
||||
|
||||
# Add ticker identifier if combining
|
||||
if self.config['combine_tickers']:
|
||||
features['ticker'] = ticker
|
||||
|
||||
all_features.append(features.dropna())
|
||||
|
||||
combined = pd.concat(all_features).sort_index()
|
||||
|
||||
# Encode tickers if combining
|
||||
if self.config['combine_tickers']:
|
||||
combined = pd.get_dummies(combined, columns=['ticker'], prefix='', prefix_sep='')
|
||||
|
||||
# Scale features
|
||||
scaled = pd.DataFrame(
|
||||
self.scaler.fit_transform(combined),
|
||||
index=combined.index,
|
||||
columns=combined.columns
|
||||
)
|
||||
return scaled
|
||||
Reference in New Issue
Block a user