Updating another attempt. currently there are errors
This commit is contained in:
@@ -1,3 +1,18 @@
|
||||
2025-04-12 20:44:28,103 - __main__ - INFO - Starting FuturesTradingAI application...
|
||||
2025-04-12 20:44:28,104 - __main__ - INFO - Loading data from /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv...
|
||||
2025-04-12 20:44:28,107 - src.data.loader - ERROR - Data file not found: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv
|
||||
2025-04-12 20:59:23,967 - __main__ - INFO - Starting FuturesTradingAI application...
|
||||
2025-04-12 20:59:23,968 - __main__ - INFO - Loading data from /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv...
|
||||
2025-04-12 20:59:23,968 - root - INFO - Loading data from: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv
|
||||
2025-04-12 20:59:23,968 - root - ERROR - Data file not found: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv
|
||||
2025-04-12 21:23:12,967 - __main__ - INFO - Starting FuturesTradingAI application...
|
||||
2025-04-12 21:23:12,968 - __main__ - INFO - Loading data from /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv...
|
||||
2025-04-12 21:23:12,968 - root - INFO - Loading data from: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv
|
||||
2025-04-12 21:23:12,968 - root - ERROR - Data file not found: /home/midas/codeWS/Projects/MidasTechnologiesINC/MidasEngine/src/MidasHL/data/sample_data.csv
|
||||
2025-04-12 21:25:01,971 - __main__ - INFO - Starting FuturesTradingAI application...
|
||||
2025-04-12 21:25:01,972 - __main__ - INFO - Loading data from src/data/cleaned_MES_data.csv...
|
||||
2025-04-12 21:25:01,972 - root - INFO - Loading data from: src/data/cleaned_MES_data.csv
|
||||
2025-04-12 21:25:01,972 - root - INFO - Detected CSV file format.
|
||||
2025-04-12 21:25:02,058 - root - INFO - Data columns after renaming: ['Date', 'Open', 'High', 'Low', 'Close', 'Volume']
|
||||
2025-04-12 21:25:02,063 - root - INFO - Data loaded and sorted successfully.
|
||||
2025-04-12 21:25:02,063 - __main__ - INFO - Calculating technical indicators on dataset...
|
||||
|
||||
Binary file not shown.
Binary file not shown.
65245
src/MidasHL/src/data/cleaned_MES_data.csv
Normal file
65245
src/MidasHL/src/data/cleaned_MES_data.csv
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,59 +1,75 @@
|
||||
"""
|
||||
src/data/loader.py
|
||||
|
||||
This module provides functions to load and clean CSV market data.
|
||||
This module provides functions to load and clean market data from various file types.
|
||||
It now supports both CSV and JSON files. When a file path is passed (for example,
|
||||
using the --mode testing flag), the function detects the file extension and loads
|
||||
the data with the appropriate Pandas reader.
|
||||
|
||||
The expected columns are: time, open, high, low, close, volume.
|
||||
After loading, columns are renamed for consistency and the data is sorted chronologically.
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
import logging
|
||||
import sys
|
||||
import os
|
||||
|
||||
def load_data(file_path):
|
||||
"""
|
||||
Load CSV data from the specified file path.
|
||||
Load market data from a specified file, supporting both CSV and JSON formats.
|
||||
|
||||
Parameters:
|
||||
- file_path (str): Path to the CSV file.
|
||||
- file_path (str): Path to the data file (CSV or JSON).
|
||||
|
||||
Returns:
|
||||
- pandas.DataFrame: Loaded and cleaned data.
|
||||
- pandas.DataFrame: Loaded and cleaned data with standardized column names.
|
||||
"""
|
||||
logger = logging.getLogger(__name__)
|
||||
logging.info(f"Loading data from: {file_path}")
|
||||
|
||||
# Check if the file exists
|
||||
if not os.path.exists(file_path):
|
||||
logging.error(f"Data file not found: {file_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# Determine file type based on extension (case-insensitive)
|
||||
file_ext = os.path.splitext(file_path)[1].lower()
|
||||
try:
|
||||
# Attempt to read the CSV with proper parsing of date columns
|
||||
df = pd.read_csv(file_path, parse_dates=['time'])
|
||||
logger.info(f"Successfully loaded data from {file_path}")
|
||||
if file_ext == '.csv':
|
||||
logging.info("Detected CSV file format.")
|
||||
df = pd.read_csv(file_path, parse_dates=['time'])
|
||||
elif file_ext == '.json':
|
||||
logging.info("Detected JSON file format.")
|
||||
# For JSON files, we assume a records-oriented format.
|
||||
df = pd.read_json(file_path, convert_dates=['time'])
|
||||
else:
|
||||
logging.error("Unsupported file format. Only CSV and JSON are supported.")
|
||||
sys.exit(1)
|
||||
except FileNotFoundError:
|
||||
logger.error(f"Data file not found: {file_path}")
|
||||
raise
|
||||
logging.error(f"File not found: {file_path}")
|
||||
sys.exit(1)
|
||||
except pd.errors.ParserError as e:
|
||||
logger.error(f"Error parsing the CSV file: {e}")
|
||||
raise
|
||||
logging.error(f"Error parsing file: {e}")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error loading data: {e}")
|
||||
raise
|
||||
logging.error(f"Unexpected error: {e}")
|
||||
sys.exit(1)
|
||||
|
||||
# Standardize column names (e.g., time, open, high, low, close, volume)
|
||||
expected_cols = ['time', 'open', 'high', 'low', 'close', 'volume']
|
||||
df.columns = [col.strip().lower() for col in df.columns]
|
||||
if not all(col in df.columns for col in expected_cols):
|
||||
logger.warning("Input data does not contain all expected columns. Attempting to map columns.")
|
||||
# Rename columns if necessary (this can be extended based on the actual CSV structure)
|
||||
# Standardize column names. Adjust this mapping if your JSON/CSV keys differ.
|
||||
rename_mapping = {
|
||||
'time': 'time',
|
||||
'open': 'open',
|
||||
'high': 'high',
|
||||
'low': 'low',
|
||||
'close': 'close',
|
||||
'volume': 'volume'
|
||||
'time': 'Date',
|
||||
'open': 'Open',
|
||||
'high': 'High',
|
||||
'low': 'Low',
|
||||
'close': 'Close',
|
||||
'volume': 'Volume'
|
||||
}
|
||||
df = df.rename(columns=rename_mapping)
|
||||
df.rename(columns=rename_mapping, inplace=True)
|
||||
|
||||
# Sort data chronologically and reset index
|
||||
df.sort_values(by='time', inplace=True)
|
||||
logging.info(f"Data columns after renaming: {df.columns.tolist()}")
|
||||
df.sort_values('Date', inplace=True)
|
||||
df.reset_index(drop=True, inplace=True)
|
||||
|
||||
# Handle missing values by forward filling
|
||||
df.fillna(method='ffill', inplace=True)
|
||||
logging.info("Data loaded and sorted successfully.")
|
||||
|
||||
return df
|
||||
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user