diff --git a/Data-Collection/WebScraper/scrapers/__pycache__/oil_news_scraper.cpython-311.pyc b/Data-Collection/WebScraper/scrapers/__pycache__/oil_news_scraper.cpython-311.pyc
index cdf3710..a61209e 100644
Binary files a/Data-Collection/WebScraper/scrapers/__pycache__/oil_news_scraper.cpython-311.pyc and b/Data-Collection/WebScraper/scrapers/__pycache__/oil_news_scraper.cpython-311.pyc differ
diff --git a/Data-Collection/WebScraper/scrapers/backups/oil_news_scraper.py.bak b/Data-Collection/WebScraper/scrapers/backups/oil_news_scraper.py.bak
index 344a3c9..d8577a8 100644
--- a/Data-Collection/WebScraper/scrapers/backups/oil_news_scraper.py.bak
+++ b/Data-Collection/WebScraper/scrapers/backups/oil_news_scraper.py.bak
@@ -1,76 +1,100 @@
+import json
 from selenium import webdriver
 from selenium.webdriver.firefox.options import Options
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from bs4 import BeautifulSoup
-import pandas as pd
 import os
+import time
+import re
 
-# URL for OilPrice.com homepage
 OIL_NEWS_URL = "https://oilprice.com/Latest-Energy-News/World-News/"
-
-# Set up the data directory
 DATA_DIR = os.path.join(os.getcwd(), "data")
 if not os.path.exists(DATA_DIR):
     os.makedirs(DATA_DIR)
 
+def load_existing_data(file_path):
+    """Load existing data from JSON file to avoid duplicates."""
+    if os.path.exists(file_path):
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    return []
+
+def save_to_json(data, file_path):
+    """Save scraped data to a JSON file, ensuring no duplicates."""
+    existing_data = load_existing_data(file_path)
+    existing_links = {article['link'] for article in existing_data}
+
+    new_data = [article for article in data if article['link'] not in existing_links]
+    combined_data = existing_data + new_data
+
+    with open(file_path, 'w', encoding='utf-8') as f:
+        json.dump(combined_data, f, ensure_ascii=False, indent=4)
+    print(f"Oil news data saved to {file_path}")
+
+def extract_keywords(text):
+    """Simple function to extract keywords from text."""
+    keywords = re.findall(r'\b\w+\b', text.lower())
+    return list(set(keywords))[:10]  # Return the first 10 unique keywords
+
 def scrape_oil_news():
     print("Scraping oil market news using Selenium...")
 
-    # Set up Selenium options
     options = Options()
     options.headless = True
     driver = webdriver.Firefox(options=options)
 
-    driver.get(OIL_NEWS_URL)
-
-    # Wait until 'categoryArticle' elements load
-    try:
-        WebDriverWait(driver, 20).until(
-            EC.presence_of_element_located((By.CLASS_NAME, "categoryArticle"))
-        )
-    except Exception as e:
-        print("Error: Content did not load properly.")
-        driver.quit()
-        return pd.DataFrame()
-
-    soup = BeautifulSoup(driver.page_source, "html.parser")
-    driver.quit()
-
-    # Parse the articles
-    articles = soup.find_all('div', class_='categoryArticle')
     news_data = []
+    page_number = 1
+    max_pages = 10  # Limit to 10 pages
 
-    print(f"Found {len(articles)} articles.")
+    while page_number <= max_pages:
+        # Load the page with pagination
+        driver.get(f"{OIL_NEWS_URL}Page-{page_number}.html")
+        
+        try:
+            WebDriverWait(driver, 20).until(
+                EC.presence_of_element_located((By.CLASS_NAME, "categoryArticle"))
+            )
+        except Exception as e:
+            print(f"Error: Content did not load properly on page {page_number}.")
+            break
 
-    for i, article in enumerate(articles):
-        # Extract the title, link, and date using the adjusted structure
-        headline = article.find('h2', class_='categoryArticle__title').get_text(strip=True) if article.find('h2', class_='categoryArticle__title') else None
-        link = article.find('a', href=True)['href'] if article.find('a', href=True) else None
-        date = article.find('p', class_='categoryArticle__meta').get_text(strip=True) if article.find('p', class_='categoryArticle__meta') else None
+        soup = BeautifulSoup(driver.page_source, "html.parser")
+        
+        articles = soup.find_all('div', class_='categoryArticle')
+        if not articles:
+            print(f"No articles found on page {page_number}. Ending pagination.")
+            break
 
-        # Log each article's details for debugging
-        print(f"Article {i+1} - Headline: {headline}, Link: {link}, Date: {date}")
+        for article in articles:
+            headline = article.find('h2', class_='categoryArticle__title').get_text(strip=True) if article.find('h2', class_='categoryArticle__title') else None
+            link = article.find('a', href=True)['href'] if article.find('a', href=True) else None
+            date = article.find('p', class_='categoryArticle__meta').get_text(strip=True) if article.find('p', class_='categoryArticle__meta') else None
+            excerpt = article.find('p', class_='categoryArticle__excerpt').get_text(strip=True) if article.find('p', class_='categoryArticle__excerpt') else None
+            author = date.split('|')[-1].strip() if '|' in date else "Unknown Author"
+            timestamp = date.split('|')[0].strip() if '|' in date else date
+            
+            if headline and link and date:
+                news_data.append({
+                    'headline': headline,
+                    'link': link,
+                    'date': timestamp,
+                    'author': author,
+                    'excerpt': excerpt,
+                    'keywords': extract_keywords(headline + " " + excerpt if excerpt else headline),
+                    'sentiment_analysis': None  # Placeholder for future sentiment analysis
+                })
 
-        # Only add valid entries
-        if headline and link and date:
-            news_data.append({
-                'headline': headline,
-                'link': link,  # Assuming the link is already a full URL
-                'date': date
-            })
+        page_number += 1
+        time.sleep(2)
 
-    df = pd.DataFrame(news_data)
-    return df
+    driver.quit()
+    return news_data
 
 def run_scraper():
-    news_df = scrape_oil_news()
-    file_path = os.path.join(DATA_DIR, 'oil_news.csv')
-
-    if not news_df.empty:
-        news_df.to_csv(file_path, index=False)
-        print(f"Oil news data saved to {file_path}")
-    else:
-        print("No data was scraped. The CSV file is empty.")
+    file_path = os.path.join(DATA_DIR, 'oil_news.json')
+    news_data = scrape_oil_news()
+    save_to_json(news_data, file_path)
 
diff --git a/Data-Collection/WebScraper/scrapers/oil_news_scraper.py b/Data-Collection/WebScraper/scrapers/oil_news_scraper.py
index 344a3c9..d8577a8 100644
--- a/Data-Collection/WebScraper/scrapers/oil_news_scraper.py
+++ b/Data-Collection/WebScraper/scrapers/oil_news_scraper.py
@@ -1,76 +1,100 @@
+import json
 from selenium import webdriver
 from selenium.webdriver.firefox.options import Options
 from selenium.webdriver.common.by import By
 from selenium.webdriver.support.ui import WebDriverWait
 from selenium.webdriver.support import expected_conditions as EC
 from bs4 import BeautifulSoup
-import pandas as pd
 import os
+import time
+import re
 
-# URL for OilPrice.com homepage
 OIL_NEWS_URL = "https://oilprice.com/Latest-Energy-News/World-News/"
-
-# Set up the data directory
 DATA_DIR = os.path.join(os.getcwd(), "data")
 if not os.path.exists(DATA_DIR):
     os.makedirs(DATA_DIR)
 
+def load_existing_data(file_path):
+    """Load existing data from JSON file to avoid duplicates."""
+    if os.path.exists(file_path):
+        with open(file_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+    return []
+
+def save_to_json(data, file_path):
+    """Save scraped data to a JSON file, ensuring no duplicates."""
+    existing_data = load_existing_data(file_path)
+    existing_links = {article['link'] for article in existing_data}
+
+    new_data = [article for article in data if article['link'] not in existing_links]
+    combined_data = existing_data + new_data
+
+    with open(file_path, 'w', encoding='utf-8') as f:
+        json.dump(combined_data, f, ensure_ascii=False, indent=4)
+    print(f"Oil news data saved to {file_path}")
+
+def extract_keywords(text):
+    """Simple function to extract keywords from text."""
+    keywords = re.findall(r'\b\w+\b', text.lower())
+    return list(set(keywords))[:10]  # Return the first 10 unique keywords
+
 def scrape_oil_news():
     print("Scraping oil market news using Selenium...")
 
-    # Set up Selenium options
     options = Options()
     options.headless = True
     driver = webdriver.Firefox(options=options)
 
-    driver.get(OIL_NEWS_URL)
-
-    # Wait until 'categoryArticle' elements load
-    try:
-        WebDriverWait(driver, 20).until(
-            EC.presence_of_element_located((By.CLASS_NAME, "categoryArticle"))
-        )
-    except Exception as e:
-        print("Error: Content did not load properly.")
-        driver.quit()
-        return pd.DataFrame()
-
-    soup = BeautifulSoup(driver.page_source, "html.parser")
-    driver.quit()
-
-    # Parse the articles
-    articles = soup.find_all('div', class_='categoryArticle')
     news_data = []
+    page_number = 1
+    max_pages = 10  # Limit to 10 pages
 
-    print(f"Found {len(articles)} articles.")
+    while page_number <= max_pages:
+        # Load the page with pagination
+        driver.get(f"{OIL_NEWS_URL}Page-{page_number}.html")
+        
+        try:
+            WebDriverWait(driver, 20).until(
+                EC.presence_of_element_located((By.CLASS_NAME, "categoryArticle"))
+            )
+        except Exception as e:
+            print(f"Error: Content did not load properly on page {page_number}.")
+            break
 
-    for i, article in enumerate(articles):
-        # Extract the title, link, and date using the adjusted structure
-        headline = article.find('h2', class_='categoryArticle__title').get_text(strip=True) if article.find('h2', class_='categoryArticle__title') else None
-        link = article.find('a', href=True)['href'] if article.find('a', href=True) else None
-        date = article.find('p', class_='categoryArticle__meta').get_text(strip=True) if article.find('p', class_='categoryArticle__meta') else None
+        soup = BeautifulSoup(driver.page_source, "html.parser")
+        
+        articles = soup.find_all('div', class_='categoryArticle')
+        if not articles:
+            print(f"No articles found on page {page_number}. Ending pagination.")
+            break
 
-        # Log each article's details for debugging
-        print(f"Article {i+1} - Headline: {headline}, Link: {link}, Date: {date}")
+        for article in articles:
+            headline = article.find('h2', class_='categoryArticle__title').get_text(strip=True) if article.find('h2', class_='categoryArticle__title') else None
+            link = article.find('a', href=True)['href'] if article.find('a', href=True) else None
+            date = article.find('p', class_='categoryArticle__meta').get_text(strip=True) if article.find('p', class_='categoryArticle__meta') else None
+            excerpt = article.find('p', class_='categoryArticle__excerpt').get_text(strip=True) if article.find('p', class_='categoryArticle__excerpt') else None
+            author = date.split('|')[-1].strip() if '|' in date else "Unknown Author"
+            timestamp = date.split('|')[0].strip() if '|' in date else date
+            
+            if headline and link and date:
+                news_data.append({
+                    'headline': headline,
+                    'link': link,
+                    'date': timestamp,
+                    'author': author,
+                    'excerpt': excerpt,
+                    'keywords': extract_keywords(headline + " " + excerpt if excerpt else headline),
+                    'sentiment_analysis': None  # Placeholder for future sentiment analysis
+                })
 
-        # Only add valid entries
-        if headline and link and date:
-            news_data.append({
-                'headline': headline,
-                'link': link,  # Assuming the link is already a full URL
-                'date': date
-            })
+        page_number += 1
+        time.sleep(2)
 
-    df = pd.DataFrame(news_data)
-    return df
+    driver.quit()
+    return news_data
 
 def run_scraper():
-    news_df = scrape_oil_news()
-    file_path = os.path.join(DATA_DIR, 'oil_news.csv')
-
-    if not news_df.empty:
-        news_df.to_csv(file_path, index=False)
-        print(f"Oil news data saved to {file_path}")
-    else:
-        print("No data was scraped. The CSV file is empty.")
+    file_path = os.path.join(DATA_DIR, 'oil_news.json')
+    news_data = scrape_oil_news()
+    save_to_json(news_data, file_path)