fixed some bs
This commit is contained in:
Binary file not shown.
@@ -21,6 +21,9 @@ def scrape_oil_news():
|
||||
response = requests.get(OIL_NEWS_URL)
|
||||
response.raise_for_status()
|
||||
|
||||
# Print the HTML to see what we are working with
|
||||
print(response.text[:1000]) # Print only the first 1000 characters for brevity
|
||||
|
||||
# Parse the HTML using BeautifulSoup
|
||||
soup = BeautifulSoup(response.text, "html.parser")
|
||||
|
||||
@@ -45,9 +48,7 @@ def scrape_oil_news():
|
||||
'date': date
|
||||
})
|
||||
|
||||
# Convert the list into a pandas DataFrame
|
||||
df = pd.DataFrame(news_data)
|
||||
|
||||
return df
|
||||
|
||||
# Function to run the scraper and save data
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.firefox.service import Service
|
||||
from selenium.webdriver.common.by import By
|
||||
import time
|
||||
|
||||
# Provide the path to your geckodriver executable using the Service class
|
||||
service = Service(executable_path='/usr/local/bin/geckodriver')
|
||||
driver = webdriver.Firefox(service=service)
|
||||
|
||||
# Open a website (e.g., OilPrice.com)
|
||||
driver.get("https://oilprice.com/Latest-Energy-News/World-News/")
|
||||
|
||||
# Wait for the page to load
|
||||
time.sleep(5)
|
||||
|
||||
# Print the title of the page to verify that it's loaded
|
||||
print(driver.title)
|
||||
|
||||
# Find and print some element on the page, e.g., all article titles
|
||||
articles = driver.find_elements(By.CSS_SELECTOR, "div.categoryArticle")
|
||||
for article in articles:
|
||||
title = article.find_element(By.TAG_NAME, "a").text
|
||||
print(f"Article title: {title}")
|
||||
|
||||
# Close the browser
|
||||
driver.quit()
|
||||
Reference in New Issue
Block a user