fixed some bs

2024-10-27 18:03:01 -04:00
parent 8d9ed2e7ea
commit a81e38e21f
5 changed files with 103 additions and 28 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1 @@
+venv/
--- a/Data-Collection/WebScraper/scrapers/pycache/oil_news_scraper.cpython-311.pyc
+++ b/Data-Collection/WebScraper/scrapers/pycache/oil_news_scraper.cpython-311.pyc
--- a/Data-Collection/WebScraper/scrapers/oil_news_scraper.py
+++ b/Data-Collection/WebScraper/scrapers/oil_news_scraper.py
@@ -21,6 +21,9 @@ def scrape_oil_news():
    response = requests.get(OIL_NEWS_URL)
    response.raise_for_status()

+    # Print the HTML to see what we are working with
+    print(response.text[:1000])  # Print only the first 1000 characters for brevity
+
    # Parse the HTML using BeautifulSoup
    soup = BeautifulSoup(response.text, "html.parser")

@@ -45,9 +48,7 @@ def scrape_oil_news():
                'date': date
            })

-    # Convert the list into a pandas DataFrame
    df = pd.DataFrame(news_data)
-
    return df

 # Function to run the scraper and save data
--- a/Data-Collection/WebScraper/scrapers/tests/selenium_webdriver_test.py
+++ b/Data-Collection/WebScraper/scrapers/tests/selenium_webdriver_test.py
@@ -0,0 +1,26 @@
+from selenium import webdriver
+from selenium.webdriver.firefox.service import Service
+from selenium.webdriver.common.by import By
+import time
+
+# Provide the path to your geckodriver executable using the Service class
+service = Service(executable_path='/usr/local/bin/geckodriver')
+driver = webdriver.Firefox(service=service)
+
+# Open a website (e.g., OilPrice.com)
+driver.get("https://oilprice.com/Latest-Energy-News/World-News/")
+
+# Wait for the page to load
+time.sleep(5)
+
+# Print the title of the page to verify that it's loaded
+print(driver.title)
+
+# Find and print some element on the page, e.g., all article titles
+articles = driver.find_elements(By.CSS_SELECTOR, "div.categoryArticle")
+for article in articles:
+    title = article.find_element(By.TAG_NAME, "a").text
+    print(f"Article title: {title}")
+
+# Close the browser
+driver.quit()
--- a/GUSHTradingBotV1.0.py
+++ b/GUSHTradingBotV1.0.py
@@ -8,6 +8,7 @@ def ticker_info():
    ticker = "gush"
    return ticker.upper()

+
 def fetch_expiration_dates(ticker):
    print(f"Fetching available expiration dates for {ticker}...")
    stock = yf.Ticker(ticker)
@@ -15,12 +16,14 @@ def fetch_expiration_dates(ticker):
    print(f"Available expiration dates: {expiration_dates}")
    return expiration_dates

+
 def select_expiration_date(expiration_dates):
    print("Selecting the first available expiration date...")
    expiration_date = expiration_dates[0]
    print(f"Selected expiration date: {expiration_date}")
    return expiration_date

+
 def fetch_option_chain(ticker, expiration_date):
    print(f"Fetching option chain for {ticker} with expiration date {expiration_date}...")
    stock = yf.Ticker(ticker)
@@ -28,11 +31,14 @@ def fetch_option_chain(ticker, expiration_date):
    print("Option chain fetched successfully!")
    return options_chain

+
 def get_price_data(ticker, start_date, end_date):
    print(f"Fetching price data for {ticker} from {start_date} to {end_date}...")
    data = yf.download(ticker, start=start_date, end=end_date)
    print(f"Price data fetched successfully for {ticker}!")
    return data
+
+
 def moving_average_strategy(data, short_window=20, long_window=50):
    data['Short_MA'] = data['Close'].rolling(window=short_window).mean()
    data['Long_MA'] = data['Close'].rolling(window=long_window).mean()
@@ -41,33 +47,70 @@ def moving_average_strategy(data, short_window=20, long_window=50):

 def rsi_strategy(data, window=14, overbought=70, oversold=30):
    delta = data['Close'].diff(1)
-    gain = np.where(delta > 0, delta, 0)
-    loss = np.where(delta < 0, abs(delta), 0)
+    gain = np.where(delta > 0, delta, 0).flatten()  # Flatten to 1D array
+    loss = np.where(delta < 0, abs(delta), 0).flatten()  # Flatten to 1D array
+    
    avg_gain = pd.Series(gain).rolling(window=window).mean()
    avg_loss = pd.Series(loss).rolling(window=window).mean()
-    rs = avg_gain / avg_loss
+    
+    # Avoid division by zero by using np.where to replace 0 with np.nan in avg_loss
+    rs = avg_gain / np.where(avg_loss == 0, np.nan, avg_loss)  
+    
    rsi = 100 - (100 / (1 + rs))
+    
    signal = np.where(rsi < oversold, 1, np.where(rsi > overbought, -1, 0))
    return pd.Series(signal, index=data.index)

 def bollinger_bands_strategy(data, window=20, num_std=2):
+    # Calculate moving average
    data['Moving_Avg'] = data['Close'].rolling(window=window).mean()
-    data['Band_Upper'] = data['Moving_Avg'] + num_std * data['Close'].rolling(window).std()
-    data['Band_Lower'] = data['Moving_Avg'] - num_std * data['Close'].rolling(window).std()
-    signal = np.where(data['Close'] < data['Band_Lower'], 1, np.where(data['Close'] > data['Band_Upper'], -1, 0))
+
+    # Calculate rolling standard deviation and force it to be a Series
+    rolling_std = data['Close'].rolling(window).std()
+    rolling_std = rolling_std.squeeze()  # Ensure rolling_std is a Series
+
+    # Print shapes for debugging
+    print(f"Shape of Moving_Avg: {data['Moving_Avg'].shape}")
+    print(f"Shape of Rolling Std: {rolling_std.shape}")
+
+    # Calculate upper and lower bands
+    data['Band_Upper'] = data['Moving_Avg'] + (num_std * rolling_std)
+    data['Band_Lower'] = data['Moving_Avg'] - (num_std * rolling_std)
+
+    # Print shapes after assignments for debugging
+    print(f"Shape of Band_Upper: {data['Band_Upper'].shape}")
+    print(f"Shape of Band_Lower: {data['Band_Lower'].shape}")
+
+    # Check for NaN values
+    print(f"NaNs in Close: {data['Close'].isna().sum()}")
+    print(f"NaNs in Band_Upper: {data['Band_Upper'].isna().sum()}")
+    print(f"NaNs in Band_Lower: {data['Band_Lower'].isna().sum()}")
+
+    # Print the columns of the DataFrame
+    print(f"Columns in data before dropping NaNs: {data.columns.tolist()}")
+
+    # Optionally drop rows with NaNs
+    data = data.dropna(subset=['Close', 'Band_Upper', 'Band_Lower'])
+
+    # Generate signals based on the bands
+    signal = np.where(data['Close'] < data['Band_Lower'], 1, 
+                      np.where(data['Close'] > data['Band_Upper'], -1, 0))
+    
    return pd.Series(signal, index=data.index)
+
 def generate_signals(data):
    ma_signal = moving_average_strategy(data)
    rsi_signal = rsi_strategy(data)
    bollinger_signal = bollinger_bands_strategy(data)
-    return [ma_signal, rsi_signal, bollinger_signal]
+    return pd.DataFrame({'MA': ma_signal, 'RSI': rsi_signal, 'Bollinger': bollinger_signal})
+
+
 def backtest_option_trades(option_chain, signals, stock_data):
    """
    Backtest option trades based on the given signals and stock data.
    """
    trades = []
    current_position = None
-    signals = pd.Series(signals)  # Convert signals to pandas Series

    # Ensure both stock_data and option_chain indices are sorted in ascending order
    stock_data = stock_data.sort_index()
@@ -76,7 +119,7 @@ def backtest_option_trades(option_chain, signals, stock_data):
    if 'lastTradeDate' in option_chain.columns:
        option_chain['lastTradeDate'] = pd.to_datetime(option_chain['lastTradeDate'])
        option_chain = option_chain.set_index('lastTradeDate')
-    
+
    # If option_chain index isn't datetime, convert it to datetime (ensuring compatibility)
    option_chain.index = pd.to_datetime(option_chain.index)

@@ -88,7 +131,7 @@ def backtest_option_trades(option_chain, signals, stock_data):
    option_chain = option_chain.reindex(stock_data.index, method='ffill')

    for i in range(len(signals)):
-        if signals.iloc[i] == 1 and current_position is None:
+        if signals.iloc[i]['MA'] == 1 and current_position is None:
            # BUY signal
            entry_price = option_chain['lastPrice'].iloc[i]
            if pd.isna(entry_price):  # If price is nan, log the error and continue
@@ -101,7 +144,7 @@ def backtest_option_trades(option_chain, signals, stock_data):
            }
            print(f"BUY signal on {entry_date}: Entry Price = {entry_price}")
        
-        elif signals.iloc[i] == -1 and current_position is not None:
+        elif signals.iloc[i]['MA'] == -1 and current_position is not None:
            # SELL signal
            exit_price = option_chain['lastPrice'].iloc[i]
            if pd.isna(exit_price):  # If price is nan, log the error and continue
@@ -126,33 +169,35 @@ def backtest_option_trades(option_chain, signals, stock_data):
    win_rate = total_wins / total_trades if total_trades > 0 else 0

    return cumulative_pnl, trades, win_rate
-    total_trades = len(trades)
-    cumulative_pnl, daily_pnls, win_rate, total_trades = backtest_option_trades(options_chain.calls, weighted_signals, test_data)

-    return cumulative_pnl, trades, win_rate, total_trades
+
 def objective_function_profit(weights, strategy_signals, data, option_chain):
    weights = np.array(weights)
    weights /= np.sum(weights)  # Normalize weights
-    weighted_signals = np.sum([signal * weight for signal, weight in zip(strategy_signals, weights)], axis=0)
-    
+    weighted_signals = np.sum([signal * weight for signal, weight in zip(strategy_signals.T.values, weights)], axis=0)
+
    # Since `backtest_option_trades` returns 3 values, we only unpack those
    cumulative_pnl, _, _ = backtest_option_trades(option_chain, weighted_signals, data)
-    
+
    # Return negative cumulative P&L to maximize profit
    return -cumulative_pnl

+
 def optimize_weights(strategy_signals, data, option_chain):
-    initial_weights = [1/len(strategy_signals)] * len(strategy_signals)
+    initial_weights = [1 / len(strategy_signals.columns)] * len(strategy_signals.columns)
    constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1})
-    bounds = [(0, 1)] * len(strategy_signals)
-    
+    bounds = [(0, 1)] * len(strategy_signals.columns)
+
    result = minimize(objective_function_profit, initial_weights, args=(strategy_signals, data, option_chain),
                      method='SLSQP', bounds=bounds, constraints=constraints)
    return result.x  # Optimal weights
+
+
 def weighted_signal_combination(strategy_signals, weights):
-    weighted_signals = np.sum([signal * weight for signal, weight in zip(strategy_signals, weights)], axis=0)
+    weighted_signals = np.sum([signal * weight for signal, weight in zip(strategy_signals.T.values, weights)], axis=0)
    return weighted_signals

+
 def main_decision(weighted_signals):
    last_signal = weighted_signals[-1]  # Latest signal
    if last_signal > 0:
@@ -161,6 +206,8 @@ def main_decision(weighted_signals):
        return "SELL"
    else:
        return "HOLD"
+
+
 def run_backtest():
    ticker = ticker_info()
    expiration_dates = fetch_expiration_dates(ticker)
@@ -169,22 +216,22 @@ def run_backtest():

    # Fetch training data
    train_data = get_price_data(ticker, '2010-01-01', '2022-01-01')
-    
+
    # Generate signals
    strategy_signals_train = generate_signals(train_data)
-    
+
    # Optimize weights
    optimal_weights = optimize_weights(strategy_signals_train, train_data, options_chain.calls)

    # Fetch test data
    test_data = get_price_data(ticker, '2022-01-02', '2024-01-01')
-    
+
    # Generate test signals
    strategy_signals_test = generate_signals(test_data)

    # Combine signals and backtest
    weighted_signals = weighted_signal_combination(strategy_signals_test, optimal_weights)
-    cumulative_pnl, daily_pnls, win_rate = backtest_option_trades(options_chain.calls, weighted_signals, test_data)
+    cumulative_pnl, trades, win_rate = backtest_option_trades(options_chain.calls, weighted_signals, test_data)

    # Make final decision
    decision = main_decision(weighted_signals)
@@ -193,7 +240,7 @@ def run_backtest():
    # Output results
    print(f"Cumulative P&L: {cumulative_pnl}")
    print(f"Win Rate: {win_rate * 100:.2f}%")
-    
+

 # Call the main function
 run_backtest()