diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f7275bb --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +venv/ diff --git a/Data-Collection/WebScraper/scrapers/__pycache__/oil_news_scraper.cpython-311.pyc b/Data-Collection/WebScraper/scrapers/__pycache__/oil_news_scraper.cpython-311.pyc index cfde48a..3639622 100644 Binary files a/Data-Collection/WebScraper/scrapers/__pycache__/oil_news_scraper.cpython-311.pyc and b/Data-Collection/WebScraper/scrapers/__pycache__/oil_news_scraper.cpython-311.pyc differ diff --git a/Data-Collection/WebScraper/scrapers/oil_news_scraper.py b/Data-Collection/WebScraper/scrapers/oil_news_scraper.py index e0f4f31..b0791d2 100644 --- a/Data-Collection/WebScraper/scrapers/oil_news_scraper.py +++ b/Data-Collection/WebScraper/scrapers/oil_news_scraper.py @@ -21,6 +21,9 @@ def scrape_oil_news(): response = requests.get(OIL_NEWS_URL) response.raise_for_status() + # Print the HTML to see what we are working with + print(response.text[:1000]) # Print only the first 1000 characters for brevity + # Parse the HTML using BeautifulSoup soup = BeautifulSoup(response.text, "html.parser") @@ -45,9 +48,7 @@ def scrape_oil_news(): 'date': date }) - # Convert the list into a pandas DataFrame df = pd.DataFrame(news_data) - return df # Function to run the scraper and save data diff --git a/Data-Collection/WebScraper/scrapers/tests/selenium_webdriver_test.py b/Data-Collection/WebScraper/scrapers/tests/selenium_webdriver_test.py new file mode 100644 index 0000000..d70c785 --- /dev/null +++ b/Data-Collection/WebScraper/scrapers/tests/selenium_webdriver_test.py @@ -0,0 +1,26 @@ +from selenium import webdriver +from selenium.webdriver.firefox.service import Service +from selenium.webdriver.common.by import By +import time + +# Provide the path to your geckodriver executable using the Service class +service = Service(executable_path='/usr/local/bin/geckodriver') +driver = webdriver.Firefox(service=service) + +# Open a website (e.g., OilPrice.com) +driver.get("https://oilprice.com/Latest-Energy-News/World-News/") + +# Wait for the page to load +time.sleep(5) + +# Print the title of the page to verify that it's loaded +print(driver.title) + +# Find and print some element on the page, e.g., all article titles +articles = driver.find_elements(By.CSS_SELECTOR, "div.categoryArticle") +for article in articles: + title = article.find_element(By.TAG_NAME, "a").text + print(f"Article title: {title}") + +# Close the browser +driver.quit() diff --git a/GUSHTradingBotV1.0 b/GUSHTradingBotV1.0.py similarity index 76% rename from GUSHTradingBotV1.0 rename to GUSHTradingBotV1.0.py index d5d227f..69fa81b 100644 --- a/GUSHTradingBotV1.0 +++ b/GUSHTradingBotV1.0.py @@ -8,6 +8,7 @@ def ticker_info(): ticker = "gush" return ticker.upper() + def fetch_expiration_dates(ticker): print(f"Fetching available expiration dates for {ticker}...") stock = yf.Ticker(ticker) @@ -15,12 +16,14 @@ def fetch_expiration_dates(ticker): print(f"Available expiration dates: {expiration_dates}") return expiration_dates + def select_expiration_date(expiration_dates): print("Selecting the first available expiration date...") expiration_date = expiration_dates[0] print(f"Selected expiration date: {expiration_date}") return expiration_date + def fetch_option_chain(ticker, expiration_date): print(f"Fetching option chain for {ticker} with expiration date {expiration_date}...") stock = yf.Ticker(ticker) @@ -28,11 +31,14 @@ def fetch_option_chain(ticker, expiration_date): print("Option chain fetched successfully!") return options_chain + def get_price_data(ticker, start_date, end_date): print(f"Fetching price data for {ticker} from {start_date} to {end_date}...") data = yf.download(ticker, start=start_date, end=end_date) print(f"Price data fetched successfully for {ticker}!") return data + + def moving_average_strategy(data, short_window=20, long_window=50): data['Short_MA'] = data['Close'].rolling(window=short_window).mean() data['Long_MA'] = data['Close'].rolling(window=long_window).mean() @@ -41,33 +47,70 @@ def moving_average_strategy(data, short_window=20, long_window=50): def rsi_strategy(data, window=14, overbought=70, oversold=30): delta = data['Close'].diff(1) - gain = np.where(delta > 0, delta, 0) - loss = np.where(delta < 0, abs(delta), 0) + gain = np.where(delta > 0, delta, 0).flatten() # Flatten to 1D array + loss = np.where(delta < 0, abs(delta), 0).flatten() # Flatten to 1D array + avg_gain = pd.Series(gain).rolling(window=window).mean() avg_loss = pd.Series(loss).rolling(window=window).mean() - rs = avg_gain / avg_loss + + # Avoid division by zero by using np.where to replace 0 with np.nan in avg_loss + rs = avg_gain / np.where(avg_loss == 0, np.nan, avg_loss) + rsi = 100 - (100 / (1 + rs)) + signal = np.where(rsi < oversold, 1, np.where(rsi > overbought, -1, 0)) return pd.Series(signal, index=data.index) def bollinger_bands_strategy(data, window=20, num_std=2): + # Calculate moving average data['Moving_Avg'] = data['Close'].rolling(window=window).mean() - data['Band_Upper'] = data['Moving_Avg'] + num_std * data['Close'].rolling(window).std() - data['Band_Lower'] = data['Moving_Avg'] - num_std * data['Close'].rolling(window).std() - signal = np.where(data['Close'] < data['Band_Lower'], 1, np.where(data['Close'] > data['Band_Upper'], -1, 0)) + + # Calculate rolling standard deviation and force it to be a Series + rolling_std = data['Close'].rolling(window).std() + rolling_std = rolling_std.squeeze() # Ensure rolling_std is a Series + + # Print shapes for debugging + print(f"Shape of Moving_Avg: {data['Moving_Avg'].shape}") + print(f"Shape of Rolling Std: {rolling_std.shape}") + + # Calculate upper and lower bands + data['Band_Upper'] = data['Moving_Avg'] + (num_std * rolling_std) + data['Band_Lower'] = data['Moving_Avg'] - (num_std * rolling_std) + + # Print shapes after assignments for debugging + print(f"Shape of Band_Upper: {data['Band_Upper'].shape}") + print(f"Shape of Band_Lower: {data['Band_Lower'].shape}") + + # Check for NaN values + print(f"NaNs in Close: {data['Close'].isna().sum()}") + print(f"NaNs in Band_Upper: {data['Band_Upper'].isna().sum()}") + print(f"NaNs in Band_Lower: {data['Band_Lower'].isna().sum()}") + + # Print the columns of the DataFrame + print(f"Columns in data before dropping NaNs: {data.columns.tolist()}") + + # Optionally drop rows with NaNs + data = data.dropna(subset=['Close', 'Band_Upper', 'Band_Lower']) + + # Generate signals based on the bands + signal = np.where(data['Close'] < data['Band_Lower'], 1, + np.where(data['Close'] > data['Band_Upper'], -1, 0)) + return pd.Series(signal, index=data.index) + def generate_signals(data): ma_signal = moving_average_strategy(data) rsi_signal = rsi_strategy(data) bollinger_signal = bollinger_bands_strategy(data) - return [ma_signal, rsi_signal, bollinger_signal] + return pd.DataFrame({'MA': ma_signal, 'RSI': rsi_signal, 'Bollinger': bollinger_signal}) + + def backtest_option_trades(option_chain, signals, stock_data): """ Backtest option trades based on the given signals and stock data. """ trades = [] current_position = None - signals = pd.Series(signals) # Convert signals to pandas Series # Ensure both stock_data and option_chain indices are sorted in ascending order stock_data = stock_data.sort_index() @@ -76,7 +119,7 @@ def backtest_option_trades(option_chain, signals, stock_data): if 'lastTradeDate' in option_chain.columns: option_chain['lastTradeDate'] = pd.to_datetime(option_chain['lastTradeDate']) option_chain = option_chain.set_index('lastTradeDate') - + # If option_chain index isn't datetime, convert it to datetime (ensuring compatibility) option_chain.index = pd.to_datetime(option_chain.index) @@ -88,7 +131,7 @@ def backtest_option_trades(option_chain, signals, stock_data): option_chain = option_chain.reindex(stock_data.index, method='ffill') for i in range(len(signals)): - if signals.iloc[i] == 1 and current_position is None: + if signals.iloc[i]['MA'] == 1 and current_position is None: # BUY signal entry_price = option_chain['lastPrice'].iloc[i] if pd.isna(entry_price): # If price is nan, log the error and continue @@ -101,7 +144,7 @@ def backtest_option_trades(option_chain, signals, stock_data): } print(f"BUY signal on {entry_date}: Entry Price = {entry_price}") - elif signals.iloc[i] == -1 and current_position is not None: + elif signals.iloc[i]['MA'] == -1 and current_position is not None: # SELL signal exit_price = option_chain['lastPrice'].iloc[i] if pd.isna(exit_price): # If price is nan, log the error and continue @@ -126,33 +169,35 @@ def backtest_option_trades(option_chain, signals, stock_data): win_rate = total_wins / total_trades if total_trades > 0 else 0 return cumulative_pnl, trades, win_rate - total_trades = len(trades) - cumulative_pnl, daily_pnls, win_rate, total_trades = backtest_option_trades(options_chain.calls, weighted_signals, test_data) - return cumulative_pnl, trades, win_rate, total_trades + def objective_function_profit(weights, strategy_signals, data, option_chain): weights = np.array(weights) weights /= np.sum(weights) # Normalize weights - weighted_signals = np.sum([signal * weight for signal, weight in zip(strategy_signals, weights)], axis=0) - + weighted_signals = np.sum([signal * weight for signal, weight in zip(strategy_signals.T.values, weights)], axis=0) + # Since `backtest_option_trades` returns 3 values, we only unpack those cumulative_pnl, _, _ = backtest_option_trades(option_chain, weighted_signals, data) - + # Return negative cumulative P&L to maximize profit return -cumulative_pnl + def optimize_weights(strategy_signals, data, option_chain): - initial_weights = [1/len(strategy_signals)] * len(strategy_signals) + initial_weights = [1 / len(strategy_signals.columns)] * len(strategy_signals.columns) constraints = ({'type': 'eq', 'fun': lambda weights: np.sum(weights) - 1}) - bounds = [(0, 1)] * len(strategy_signals) - + bounds = [(0, 1)] * len(strategy_signals.columns) + result = minimize(objective_function_profit, initial_weights, args=(strategy_signals, data, option_chain), method='SLSQP', bounds=bounds, constraints=constraints) return result.x # Optimal weights + + def weighted_signal_combination(strategy_signals, weights): - weighted_signals = np.sum([signal * weight for signal, weight in zip(strategy_signals, weights)], axis=0) + weighted_signals = np.sum([signal * weight for signal, weight in zip(strategy_signals.T.values, weights)], axis=0) return weighted_signals + def main_decision(weighted_signals): last_signal = weighted_signals[-1] # Latest signal if last_signal > 0: @@ -161,6 +206,8 @@ def main_decision(weighted_signals): return "SELL" else: return "HOLD" + + def run_backtest(): ticker = ticker_info() expiration_dates = fetch_expiration_dates(ticker) @@ -169,22 +216,22 @@ def run_backtest(): # Fetch training data train_data = get_price_data(ticker, '2010-01-01', '2022-01-01') - + # Generate signals strategy_signals_train = generate_signals(train_data) - + # Optimize weights optimal_weights = optimize_weights(strategy_signals_train, train_data, options_chain.calls) # Fetch test data test_data = get_price_data(ticker, '2022-01-02', '2024-01-01') - + # Generate test signals strategy_signals_test = generate_signals(test_data) # Combine signals and backtest weighted_signals = weighted_signal_combination(strategy_signals_test, optimal_weights) - cumulative_pnl, daily_pnls, win_rate = backtest_option_trades(options_chain.calls, weighted_signals, test_data) + cumulative_pnl, trades, win_rate = backtest_option_trades(options_chain.calls, weighted_signals, test_data) # Make final decision decision = main_decision(weighted_signals) @@ -193,7 +240,7 @@ def run_backtest(): # Output results print(f"Cumulative P&L: {cumulative_pnl}") print(f"Win Rate: {win_rate * 100:.2f}%") - + # Call the main function run_backtest()