Updating another attempt. currently there are errors

This commit is contained in:
2025-04-12 21:26:05 +00:00
parent 50a245056c
commit 916b8b4611
6 changed files with 65308 additions and 32 deletions

View File

@@ -1,3 +1,18 @@
2025-04-12 20:44:28,103 - __main__ - INFO - Starting FuturesTradingAI application... 2025-04-12 20:44:28,103 - __main__ - INFO - Starting FuturesTradingAI application...
2025-04-12 20:44:28,104 - __main__ - INFO - Loading data from /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv... 2025-04-12 20:44:28,104 - __main__ - INFO - Loading data from /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv...
2025-04-12 20:44:28,107 - src.data.loader - ERROR - Data file not found: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv 2025-04-12 20:44:28,107 - src.data.loader - ERROR - Data file not found: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv
2025-04-12 20:59:23,967 - __main__ - INFO - Starting FuturesTradingAI application...
2025-04-12 20:59:23,968 - __main__ - INFO - Loading data from /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv...
2025-04-12 20:59:23,968 - root - INFO - Loading data from: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv
2025-04-12 20:59:23,968 - root - ERROR - Data file not found: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv
2025-04-12 21:23:12,967 - __main__ - INFO - Starting FuturesTradingAI application...
2025-04-12 21:23:12,968 - __main__ - INFO - Loading data from /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv...
2025-04-12 21:23:12,968 - root - INFO - Loading data from: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv
2025-04-12 21:23:12,968 - root - ERROR - Data file not found: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv
2025-04-12 21:25:01,971 - __main__ - INFO - Starting FuturesTradingAI application...
2025-04-12 21:25:01,972 - __main__ - INFO - Loading data from src/data/cleaned_MES_data.csv...
2025-04-12 21:25:01,972 - root - INFO - Loading data from: src/data/cleaned_MES_data.csv
2025-04-12 21:25:01,972 - root - INFO - Detected CSV file format.
2025-04-12 21:25:02,058 - root - INFO - Data columns after renaming: ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
2025-04-12 21:25:02,063 - root - INFO - Data loaded and sorted successfully.
2025-04-12 21:25:02,063 - __main__ - INFO - Calculating technical indicators on dataset...

File diff suppressed because it is too large Load Diff

View File

@@ -1,59 +1,75 @@
""" """
src/data/loader.py src/data/loader.py
This module provides functions to load and clean CSV market data. This module provides functions to load and clean market data from various file types.
It now supports both CSV and JSON files. When a file path is passed (for example,
using the --mode testing flag), the function detects the file extension and loads
the data with the appropriate Pandas reader.
The expected columns are: time, open, high, low, close, volume.
After loading, columns are renamed for consistency and the data is sorted chronologically.
""" """
import pandas as pd import pandas as pd
import logging import logging
import sys
import os
def load_data(file_path): def load_data(file_path):
""" """
Load CSV data from the specified file path. Load market data from a specified file, supporting both CSV and JSON formats.
Parameters: Parameters:
- file_path (str): Path to the CSV file. - file_path (str): Path to the data file (CSV or JSON).
Returns: Returns:
- pandas.DataFrame: Loaded and cleaned data. - pandas.DataFrame: Loaded and cleaned data with standardized column names.
""" """
logger = logging.getLogger(__name__) logging.info(f"Loading data from: {file_path}")
# Check if the file exists
if not os.path.exists(file_path):
logging.error(f"Data file not found: {file_path}")
sys.exit(1)
# Determine file type based on extension (case-insensitive)
file_ext = os.path.splitext(file_path)[1].lower()
try: try:
# Attempt to read the CSV with proper parsing of date columns if file_ext == '.csv':
df = pd.read_csv(file_path, parse_dates=['time']) logging.info("Detected CSV file format.")
logger.info(f"Successfully loaded data from {file_path}") df = pd.read_csv(file_path, parse_dates=['time'])
elif file_ext == '.json':
logging.info("Detected JSON file format.")
# For JSON files, we assume a records-oriented format.
df = pd.read_json(file_path, convert_dates=['time'])
else:
logging.error("Unsupported file format. Only CSV and JSON are supported.")
sys.exit(1)
except FileNotFoundError: except FileNotFoundError:
logger.error(f"Data file not found: {file_path}") logging.error(f"File not found: {file_path}")
raise sys.exit(1)
except pd.errors.ParserError as e: except pd.errors.ParserError as e:
logger.error(f"Error parsing the CSV file: {e}") logging.error(f"Error parsing file: {e}")
raise sys.exit(1)
except Exception as e: except Exception as e:
logger.error(f"Unexpected error loading data: {e}") logging.error(f"Unexpected error: {e}")
raise sys.exit(1)
# Standardize column names (e.g., time, open, high, low, close, volume) # Standardize column names. Adjust this mapping if your JSON/CSV keys differ.
expected_cols = ['time', 'open', 'high', 'low', 'close', 'volume']
df.columns = [col.strip().lower() for col in df.columns]
if not all(col in df.columns for col in expected_cols):
logger.warning("Input data does not contain all expected columns. Attempting to map columns.")
# Rename columns if necessary (this can be extended based on the actual CSV structure)
rename_mapping = { rename_mapping = {
'time': 'time', 'time': 'Date',
'open': 'open', 'open': 'Open',
'high': 'high', 'high': 'High',
'low': 'low', 'low': 'Low',
'close': 'close', 'close': 'Close',
'volume': 'volume' 'volume': 'Volume'
} }
df = df.rename(columns=rename_mapping) df.rename(columns=rename_mapping, inplace=True)
# Sort data chronologically and reset index logging.info(f"Data columns after renaming: {df.columns.tolist()}")
df.sort_values(by='time', inplace=True) df.sort_values('Date', inplace=True)
df.reset_index(drop=True, inplace=True) df.reset_index(drop=True, inplace=True)
logging.info("Data loaded and sorted successfully.")
# Handle missing values by forward filling
df.fillna(method='ffill', inplace=True)
return df return df