added some simple nns. working on rnns

2025-01-23 15:14:37 -05:00
parent b146bf84e3
commit 7d743e0527
16 changed files with 61425 additions and 0 deletions
--- a/src/nn_tests/3_month_testing_data.csv
+++ b/src/nn_tests/3_month_testing_data.csv
--- a/src/nn_tests/Makefile
+++ b/src/nn_tests/Makefile
@@ -0,0 +1 @@
+gcc -o stock_predictor stock_predictor.c -lm
--- a/src/nn_tests/README.md
+++ b/src/nn_tests/README.md
@@ -0,0 +1 @@
+THis is a simple feed forward MLP and I want to make it a LSTM/GRU in pure C baby.
--- a/src/nn_tests/RNN-LSTM/BATS_GTLB,
+++ b/src/nn_tests/RNN-LSTM/BATS_GTLB,
--- a/src/nn_tests/RNN-LSTM/GTLC.csv
+++ b/src/nn_tests/RNN-LSTM/GTLC.csv
--- a/src/nn_tests/RNN-LSTM/README.txt
+++ b/src/nn_tests/RNN-LSTM/README.txt
@@ -0,0 +1,436 @@
+Below is a **minimalist** example of how one might **program an LSTM forward pass in C** (and do a very rough training loop) to handle a **sequence** of inputs, such as your macro/micro/technical factors plus past prices. This is **not production-ready**—it’s a demonstration of the core ideas. In real practice, you’d want to handle:
+
+1. **Memory management** carefully (possibly dynamic allocation).  
+2. **Vectorized operations** for speed (e.g., SSE, AVX, CUDA, etc.).  
+3. **Gradient computation** (backpropagation through time) if you actually want to *train* the LSTM in C.  
+4. **Data loading** from files or other sources.  
+
+Nevertheless, this skeleton code shows how to **structure** an LSTM in pure C, perform a **forward pass** over a time series, and do a *very basic* gradient step. You could adapt it for your multi-factor stock model by constructing the input vector \(\mathbf{x}(t)\) from your macro/micro/technical variables.
+
+---
+
+## Table of Contents
+1. **Definitions and Structures**  
+2. **Helper Functions (Activation, Matrix Ops)**  
+3. **LSTM Forward Pass**  
+4. **Basic MSE Loss & Naive Training Loop**  
+5. **Example `main()`**  
+
+---
+
+# 1. Definitions and Structures
+
+### 1.1 LSTM Hyperparameters
+
+```c
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+#define INPUT_SIZE  8   // e.g., number of features: macro + micro + tech + maybe past price
+#define HIDDEN_SIZE 4   // size of LSTM hidden state
+#define OUTPUT_SIZE 1   // e.g. predict a single scalar: P(t)
+
+// For toy example:
+#define TIME_STEPS  5   // length of a single sequence
+#define BATCH_SIZE  1   // for simplicity
+
+// A small learning rate for naive training:
+#define LEARNING_RATE 0.001
+```
+
+Here,
+- `INPUT_SIZE` might be 8 if you combine, say, GDP, inflation, interest, earnings, sales, competition, momentum, etc.  
+- `HIDDEN_SIZE` is how many hidden units (cells) the LSTM has.  
+- `OUTPUT_SIZE` is the dimension of the output (predict 1D stock price).  
+- `TIME_STEPS` is the length of the sequence we unroll over.  
+- `LEARNING_RATE` is just for a naive gradient update.
+
+### 1.2 LSTM Parameter Structure
+
+We’ll store weights and biases for **all gates** in one struct. Recall each gate (input, forget, output, candidate) has its own weight matrices and biases. A naive layout:
+
+- **W_x**: Weight matrix for input-to-hidden (dimensions \(\mathrm{HIDDEN\_SIZE} \times \mathrm{INPUT\_SIZE}\))  
+- **W_h**: Weight matrix for hidden-to-hidden (dimensions \(\mathrm{HIDDEN\_SIZE} \times \mathrm{HIDDEN\_SIZE}\))  
+- **b**: Bias vector (dimensions \(\mathrm{HIDDEN\_SIZE}\))
+
+We do this for the input, forget, output gates, and the candidate cell state. That’s 4 sets of parameters.
+
+```c
+typedef struct {
+    // Input gate
+    float W_ix[HIDDEN_SIZE][INPUT_SIZE]; 
+    float W_ih[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_i[HIDDEN_SIZE];
+    
+    // Forget gate
+    float W_fx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_fh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_f[HIDDEN_SIZE];
+    
+    // Output gate
+    float W_ox[HIDDEN_SIZE][INPUT_SIZE];
+    float W_oh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_o[HIDDEN_SIZE];
+
+    // Candidate gate (cell update)
+    float W_cx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ch[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_c[HIDDEN_SIZE];
+    
+    // Output layer (hidden->output)
+    float W_hy[OUTPUT_SIZE][HIDDEN_SIZE]; 
+    float b_y[OUTPUT_SIZE];
+    
+} LSTMParams;
+```
+
+We will also store the **hidden state** \(\mathbf{h}(t)\) and **cell state** \(\mathbf{c}(t)\) in a separate struct for clarity:
+
+```c
+typedef struct {
+    float h[HIDDEN_SIZE];  // hidden state
+    float c[HIDDEN_SIZE];  // cell state
+} LSTMState;
+```
+
+---
+
+# 2. Helper Functions (Activation, Matrix Ops)
+
+We need a **sigmoid** function and possibly **tanh**. We also need some small matrix or vector routines.
+
+```c
+// Sigmoid activation
+float sigmoid(float x) {
+    return 1.0f / (1.0f + expf(-x));
+}
+
+// Tanh activation
+float tanh_approx(float x) {
+    // We can just use the standard tanh from math.h
+    return tanhf(x);
+}
+
+// Vector + Bias -> In-place addition
+void add_bias(float *y, float *b, int size) {
+    for(int i = 0; i < size; i++) {
+        y[i] += b[i];
+    }
+}
+
+// y = W * x  (simple matrix-vector multiply), W is [out_dim x in_dim], x is [in_dim]
+void matvec(const float *W, const float *x, float *y, int out_dim, int in_dim) {
+    // We'll assume W is laid out row-major
+    for(int i = 0; i < out_dim; i++) {
+        float sum = 0.0f;
+        for(int j = 0; j < in_dim; j++) {
+            sum += W[i*in_dim + j] * x[j];
+        }
+        y[i] = sum;
+    }
+}
+
+// Elementwise multiply of two vectors
+void vec_mul(float *out, const float *a, const float *b, int size) {
+    for(int i = 0; i < size; i++) {
+        out[i] = a[i] * b[i];
+    }
+}
+
+// Elementwise add
+void vec_add(float *out, const float *a, const float *b, int size) {
+    for(int i=0; i<size; i++){
+        out[i] = a[i] + b[i];
+    }
+}
+```
+
+> **Note**: In the code above, `matvec()` expects a flat array for `W`. But in our struct, `W_ix` is a 2D array. We’ll cast it or flatten it carefully.
+
+---
+
+# 3. LSTM Forward Pass
+
+We implement a **single time-step** of LSTM. The equations are:
+
+\[
+\begin{aligned}
+\mathbf{i}(t) &= \sigma\bigl(W_{ix} \mathbf{x}(t) + W_{ih} \mathbf{h}(t-1) + \mathbf{b}_i\bigr),\\
+\mathbf{f}(t) &= \sigma\bigl(W_{fx} \mathbf{x}(t) + W_{fh} \mathbf{h}(t-1) + \mathbf{b}_f\bigr),\\
+\mathbf{o}(t) &= \sigma\bigl(W_{ox} \mathbf{x}(t) + W_{oh} \mathbf{h}(t-1) + \mathbf{b}_o\bigr),\\
+\mathbf{\tilde{c}}(t) &= \tanh\bigl(W_{cx} \mathbf{x}(t) + W_{ch} \mathbf{h}(t-1) + \mathbf{b}_c\bigr),\\
+\mathbf{c}(t) &= \mathbf{f}(t) \odot \mathbf{c}(t-1) \;+\; \mathbf{i}(t) \odot \mathbf{\tilde{c}}(t),\\
+\mathbf{h}(t) &= \mathbf{o}(t) \odot \tanh(\mathbf{c}(t)).
+\end{aligned}
+\]
+
+```c
+// Forward pass for ONE time step
+void lstm_step(LSTMParams *params, LSTMState *state, float *x_t) {
+    float i_t[HIDDEN_SIZE];
+    float f_t[HIDDEN_SIZE];
+    float o_t[HIDDEN_SIZE];
+    float c_hat_t[HIDDEN_SIZE];
+
+    // Temporary vectors for mat-vec results
+    float tmp_i[HIDDEN_SIZE], tmp_f[HIDDEN_SIZE], tmp_o[HIDDEN_SIZE], tmp_c[HIDDEN_SIZE];
+
+    // 1. i(t) = sigmoid(W_ix*x_t + W_ih*h(t-1) + b_i)
+    matvec((float*)params->W_ix, x_t, tmp_i, HIDDEN_SIZE, INPUT_SIZE);
+    matvec((float*)params->W_ih, state->h, tmp_i, HIDDEN_SIZE, HIDDEN_SIZE);
+    add_bias(tmp_i, (float*)params->b_i, HIDDEN_SIZE);
+    for(int i=0; i<HIDDEN_SIZE; i++) {
+        i_t[i] = sigmoid(tmp_i[i]);
+    }
+
+    // 2. f(t) = sigmoid(W_fx*x_t + W_fh*h(t-1) + b_f)
+    matvec((float*)params->W_fx, x_t, tmp_f, HIDDEN_SIZE, INPUT_SIZE);
+    matvec((float*)params->W_fh, state->h, tmp_f, HIDDEN_SIZE, HIDDEN_SIZE);
+    add_bias(tmp_f, (float*)params->b_f, HIDDEN_SIZE);
+    for(int i=0; i<HIDDEN_SIZE; i++) {
+        f_t[i] = sigmoid(tmp_f[i]);
+    }
+
+    // 3. o(t) = sigmoid(W_ox*x_t + W_oh*h(t-1) + b_o)
+    matvec((float*)params->W_ox, x_t, tmp_o, HIDDEN_SIZE, INPUT_SIZE);
+    matvec((float*)params->W_oh, state->h, tmp_o, HIDDEN_SIZE, HIDDEN_SIZE);
+    add_bias(tmp_o, (float*)params->b_o, HIDDEN_SIZE);
+    for(int i=0; i<HIDDEN_SIZE; i++) {
+        o_t[i] = sigmoid(tmp_o[i]);
+    }
+
+    // 4. c_hat(t) = tanh(W_cx*x_t + W_ch*h(t-1) + b_c)
+    matvec((float*)params->W_cx, x_t, tmp_c, HIDDEN_SIZE, INPUT_SIZE);
+    matvec((float*)params->W_ch, state->h, tmp_c, HIDDEN_SIZE, HIDDEN_SIZE);
+    add_bias(tmp_c, (float*)params->b_c, HIDDEN_SIZE);
+    for(int i=0; i<HIDDEN_SIZE; i++){
+        c_hat_t[i] = tanh_approx(tmp_c[i]);
+    }
+
+    // 5. c(t) = f(t)*c(t-1) + i(t)*c_hat(t)
+    for(int i=0; i<HIDDEN_SIZE; i++){
+        state->c[i] = f_t[i]*state->c[i] + i_t[i]*c_hat_t[i];
+    }
+
+    // 6. h(t) = o(t)*tanh(c(t))
+    for(int i=0; i<HIDDEN_SIZE; i++){
+        state->h[i] = o_t[i] * tanh_approx(state->c[i]);
+    }
+}
+```
+
+### 3.1 LSTM Output Layer
+
+After we get \(\mathbf{h}(t)\), we do a final linear layer:
+
+\[
+\hat{y}(t) = W_{hy} \, \mathbf{h}(t) + b_y.
+\]
+
+```c
+float lstm_output(LSTMParams *params, float *h) {
+    // Single output dimension => do matvec of dimension [1 x HIDDEN_SIZE] * [HIDDEN_SIZE]
+    float y = 0.0f;
+    for(int i=0; i<HIDDEN_SIZE; i++){
+        y += params->W_hy[0][i] * h[i];
+    }
+    y += params->b_y[0];
+    return y;
+}
+```
+
+---
+
+# 4. Basic MSE Loss & Naive Training Loop
+
+For a single sequence \(\{x(1), \dots, x(T)\}\) and targets \(\{y(1), \dots, y(T)\}\), we do:
+
+1. Initialize LSTM state: \(\mathbf{h}(0) = 0, \mathbf{c}(0) = 0\).  
+2. For each \(t\) in \(\{1, \dots, T\}\):  
+   - Compute one LSTM step using `lstm_step()`.  
+   - Get prediction \(\hat{y}(t)\) via `lstm_output()`.  
+3. Compute MSE vs. target.  
+4. (Naive) Use partial derivatives w.r.t. each weight to do gradient updates.  
+   - **Below** we only show a dummy gradient update to illustrate the approach. Doing real BPTT in C is quite involved (we need partial derivatives for each gate, accumulate them across timesteps, etc.).  
+
+```c
+float forward_sequence(LSTMParams *params, float *input_seq, float *target_seq, int seq_len) {
+    // We'll store the LSTM state
+    LSTMState state;
+    // Initialize hidden and cell to zero
+    for(int i=0; i<HIDDEN_SIZE; i++){ 
+        state.h[i] = 0.0f; 
+        state.c[i] = 0.0f; 
+    }
+
+    float mse = 0.0f;
+    // For each timestep in the sequence
+    for(int t=0; t<seq_len; t++){
+        // input_seq has shape [seq_len * INPUT_SIZE]
+        float *x_t = &input_seq[t * INPUT_SIZE];
+        float y_true = target_seq[t];
+
+        // 1. LSTM step
+        lstm_step(params, &state, x_t);
+
+        // 2. Output
+        float y_pred = lstm_output(params, state.h);
+
+        // 3. Accumulate MSE
+        float err = (y_true - y_pred);
+        mse += err * err;
+        
+        // 4. Naive gradient step (just as a placeholder)
+        //   Real backprop requires partial derivatives w.r.t all gates, etc.
+        //   This is just to show how you might do a simplistic update
+        float grad = -2.0f * err;  // d/dy_pred of MSE
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            params->W_hy[0][i] -= LEARNING_RATE * grad * state.h[i];
+        }
+        params->b_y[0] -= LEARNING_RATE * grad;
+    }
+
+    mse /= (float)seq_len;
+    return mse;
+}
+```
+
+> **Important**: The code above is *not* a correct BPTT implementation. We’re only applying a gradient step to the final linear layer for demonstration. Proper training would require storing the internal gate values and partial derivatives at each time step, then backpropagating.  
+
+---
+
+# 5. Example `main()` Function
+
+We can **initialize** the LSTM parameters randomly, create a **dummy** input sequence, a **dummy** target sequence, and run `forward_sequence()`.
+
+```c
+int main() {
+    srand((unsigned int)time(NULL));
+
+    // 1. Allocate parameters
+    LSTMParams params;
+    
+    // 2. Randomly initialize
+    //    For simplicity, small random floats in [-0.1, 0.1]
+    float range = 0.1f;
+    float scale = (float)RAND_MAX;
+    #define RAND_WEIGHT (range * ( (float)rand()/scale*2.0f - 1.0f ))
+    
+    // Macro to fill an array with random numbers
+    #define FILL_MATRIX(mat) do {                     \
+        for (int i = 0; i < (int)(sizeof(mat)/sizeof(mat[0][0])); i++){  \
+            ((float*)mat)[i] = RAND_WEIGHT;           \
+        }                                             \
+    } while(0)
+    
+    FILL_MATRIX(params.W_ix); FILL_MATRIX(params.W_ih); 
+    FILL_MATRIX(params.W_fx); FILL_MATRIX(params.W_fh);
+    FILL_MATRIX(params.W_ox); FILL_MATRIX(params.W_oh);
+    FILL_MATRIX(params.W_cx); FILL_MATRIX(params.W_ch);
+    FILL_MATRIX(params.W_hy);
+    
+    // Bias arrays
+    for(int i=0; i<HIDDEN_SIZE; i++){
+        params.b_i[i] = RAND_WEIGHT;
+        params.b_f[i] = RAND_WEIGHT;
+        params.b_o[i] = RAND_WEIGHT;
+        params.b_c[i] = RAND_WEIGHT;
+    }
+    params.b_y[0] = RAND_WEIGHT;
+
+    // 3. Create a dummy input sequence:
+    //    Suppose we have TIME_STEPS=5, each with INPUT_SIZE=8
+    float input_seq[TIME_STEPS * INPUT_SIZE];
+    float target_seq[TIME_STEPS];
+    
+    for(int t=0; t<TIME_STEPS; t++){
+        for(int i=0; i<INPUT_SIZE; i++){
+            input_seq[t*INPUT_SIZE + i] = (float)(rand() % 100) / 100.0f; 
+            // e.g., random macro/micro/tech features
+        }
+        // Suppose the "target" price is also random
+        target_seq[t] = (float)(rand() % 100) / 10.0f; 
+    }
+
+    // 4. A very naive training loop
+    int epochs = 1000;
+    for(int e=0; e<epochs; e++){
+        float mse = forward_sequence(&params, input_seq, target_seq, TIME_STEPS);
+        if((e+1) % 100 == 0){
+            printf("Epoch %d, MSE = %f\n", e+1, mse);
+        }
+    }
+
+    // 5. Check final predictions
+    {
+        LSTMState s;
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            s.h[i] = 0.0f; s.c[i] = 0.0f;
+        }
+        printf("\nFinal predictions:\n");
+        for(int t=0; t<TIME_STEPS; t++){
+            float *x_t = &input_seq[t*INPUT_SIZE];
+            lstm_step(&params, &s, x_t);
+            float y_pred = lstm_output(&params, s.h);
+            printf(" t=%d, target=%.3f, pred=%.3f\n", t, target_seq[t], y_pred);
+        }
+    }
+
+    return 0;
+}
+```
+
+**Compile and run** (on a Unix-like system, for instance):
+
+```bash
+cc -o lstm_example lstm_example.c -lm
+./lstm_example
+```
+
+You should see it print MSE decreasing over epochs (though with the naive partial update, it might not do much). Then it prints final predictions vs. targets.
+
+---
+
+# Extending to Your Multi-Factor Stock Model
+
+1. **Input Construction**:  
+   - Instead of random data, fill `input_seq` with your real macro/micro/technical variables for each time \(t\). For instance:
+     \[
+     \mathbf{x}(t) = [\,G(t),\; I(t),\; R(t),\; E(t),\; S(t),\; C(t),\; M(t),\; V(t)\,].
+     \]
+   - Possibly also include the past price \(P(t-1)\) or log-return \(r(t-1)\) if you want the LSTM to learn from that.
+
+2. **Targets**:  
+   - If you want to predict next price \(P(t)\), store that in `target_seq[t]`.  
+   - Or if you want to predict the change \(\Delta P(t)\) or log-return, store that. The code is essentially the same; you just feed different targets.
+
+3. **Full BPTT**:  
+   - Implement the backward pass for the LSTM, summing gradients from each time step, etc. This is non-trivial but well-documented in many references on building RNNs from scratch.
+
+4. **Longer Sequences & Mini-Batches**:  
+   - Increase `TIME_STEPS`.  
+   - Use more advanced data handling for multiple sequences (batch size > 1).  
+
+5. **Production Considerations**:  
+   - For real forecasting, you’d likely want to shift your target so that \(\hat{P}(t)\) uses data up to \(t-1\).  
+   - Do train/test splitting, cross-validation, hyperparameter tuning, etc.
+
+---
+
+## Final Thoughts
+
+This code demonstrates the **core LSTM mechanics** in pure C:
+
+- A **parameter struct** holding weight matrices for each gate.  
+- A **forward pass** function that calculates gate activations and updates hidden/cell states.  
+- A **rudimentary training loop** (with only partial gradient updates in the example).
+
+To incorporate your **modular stock model** (macro, micro, technical factors), you’d:
+
+1. **Generate or load** those time series for each time \(t\).  
+2. **Feed** them into the LSTM as the input vector.  
+3. **Train** the LSTM to predict \(\Delta P(t)\) or \(P(t)\).
+
+A fully correct solution requires implementing **backpropagation through time** so that the network learns the best gating and transformations for your data. However, this skeleton shows that **yes, you can indeed** implement an LSTM in C to model a complex, factor-based stock-price time series—**it just takes a fair amount of low-level coding** compared to using high-level libraries (Python/TensorFlow/PyTorch, etc.).
--- a/src/nn_tests/RNN-LSTM/gitlab_lstm_nextday
+++ b/src/nn_tests/RNN-LSTM/gitlab_lstm_nextday
--- a/src/nn_tests/RNN-LSTM/gitlab_lstm_nextday.c
+++ b/src/nn_tests/RNN-LSTM/gitlab_lstm_nextday.c
@@ -0,0 +1,558 @@
+/*******************************************************************************
+ * gitlab_lstm_nextday.c (Revised)
+ *
+ * Key changes from previous version:
+ *   1. Improved CSV parsing & daily aggregation:
+ *      - Skips malformed rows (missing columns or can't parse float).
+ *      - Skips final daily bar if it has invalid data.
+ *   2. Basic data validation:
+ *      - If a daily bar has open<=0, high<=0, low<=0, close<=0, or volume<0,
+ *        we discard that day from the final dataset.
+ *   3. Prints a summary of final daily bars used.
+ *   4. (Optional) Simple min/max normalization for daily features to help
+ *      prevent exploding values.
+ *
+ * Compile:
+ *   cc gitlab_lstm_nextday.c -o gitlab_lstm_nextday -lm
+ * Run:
+ *   ./gitlab_lstm_nextday path/to/gitlab_5min.csv
+ *******************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <time.h>
+
+/***************************************
+ * Hyperparams & Dimensions
+ ***************************************/
+#define MAX_DAYS       600  /* Safety upper limit, can handle ~2 years if needed */
+#define INPUT_SIZE     6    /* daily (Open,High,Low,Close,Volume, e.g., Range) */
+#define HIDDEN_SIZE    8
+#define OUTPUT_SIZE    1    /* next-day close prediction */
+#define EPOCHS         300
+#define LEARNING_RATE  0.01f
+
+/***************************************
+ * Data Structures
+ ***************************************/
+
+/* LSTM parameters */
+typedef struct {
+    /* Input gate */
+    float W_ix[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ih[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_i[HIDDEN_SIZE];
+    /* Forget gate */
+    float W_fx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_fh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_f[HIDDEN_SIZE];
+    /* Output gate */
+    float W_ox[HIDDEN_SIZE][INPUT_SIZE];
+    float W_oh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_o[HIDDEN_SIZE];
+    /* Candidate gate */
+    float W_cx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ch[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_c[HIDDEN_SIZE];
+    /* Output layer */
+    float W_hy[OUTPUT_SIZE][HIDDEN_SIZE];
+    float b_y[OUTPUT_SIZE];
+} LSTMParams;
+
+/* Parameter gradients, same shape */
+typedef struct {
+    float W_ix[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ih[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_i[HIDDEN_SIZE];
+    float W_fx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_fh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_f[HIDDEN_SIZE];
+    float W_ox[HIDDEN_SIZE][INPUT_SIZE];
+    float W_oh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_o[HIDDEN_SIZE];
+    float W_cx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ch[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_c[HIDDEN_SIZE];
+    float W_hy[OUTPUT_SIZE][HIDDEN_SIZE];
+    float b_y[OUTPUT_SIZE];
+} LSTMGrads;
+
+/* Forward-pass caches for each day t */
+typedef struct {
+    float x[MAX_DAYS][INPUT_SIZE];
+    float i[MAX_DAYS][HIDDEN_SIZE];
+    float f[MAX_DAYS][HIDDEN_SIZE];
+    float o[MAX_DAYS][HIDDEN_SIZE];
+    float c_hat[MAX_DAYS][HIDDEN_SIZE];
+    float c[MAX_DAYS][HIDDEN_SIZE];
+    float h[MAX_DAYS][HIDDEN_SIZE];
+    float y[MAX_DAYS][OUTPUT_SIZE];
+} LSTMCache;
+
+/***************************************
+ * Utility Functions
+ ***************************************/
+static inline float randf(float range) {
+    float r = (float)rand() / (float)RAND_MAX;
+    return (r*2.0f - 1.0f)*range;
+}
+
+static inline float sigmoid(float x) {
+    return 1.0f/(1.0f + expf(-x));
+}
+static inline float dsigmoid_from_val(float s) {
+    return s*(1.0f - s); /* derivative if s = sigmoid(...) */
+}
+
+static inline float tanh_approx(float x) {
+    return tanhf(x);
+}
+
+static inline float dtanh_from_val(float tval) {
+    return 1.0f - tval*tval; 
+}
+
+static void init_params(LSTMParams *p, float range) {
+    float *pf = (float*)p;
+    int count = sizeof(LSTMParams)/sizeof(float);
+    for(int i=0; i<count; i++){
+        pf[i] = randf(range);
+    }
+}
+
+static void zero_grads(LSTMGrads *g) {
+    memset(g, 0, sizeof(LSTMGrads));
+}
+
+static void update_params(LSTMParams *params, LSTMGrads *grads) {
+    float *p = (float*)params;
+    float *r = (float*)grads;
+    int count = sizeof(LSTMParams)/sizeof(float);
+    for(int i=0; i<count; i++){
+        p[i] -= LEARNING_RATE*r[i];
+    }
+}
+
+/***************************************
+ * Aggregated daily bars from intraday
+ ***************************************/
+typedef struct {
+    char date[11];  /* "YYYY-MM-DD" */
+    float open;
+    float high;
+    float low;
+    float close;
+    float volume;
+} DailyBar;
+
+/* Load intraday 5-min bars, group them by date, produce daily bars.
+   Skips malformed lines or days with obviously invalid data. */
+static int load_and_aggregate_daily(
+    const char *csv_file,
+    DailyBar *daily,
+    int max_days
+){
+    FILE *fp = fopen(csv_file, "r");
+    if(!fp){
+        fprintf(stderr, "Could not open file: %s\n", csv_file);
+        return -1;
+    }
+
+    char line[1024];
+    int firstRow = 1;
+    char currentDate[11] = {0};
+    DailyBar curDay;
+    int haveCurrent = 0;
+    int dayCount = 0;
+
+    while(fgets(line, sizeof(line), fp)){
+        if(firstRow){
+            /* skip header if present */
+            firstRow = 0;
+            if(strstr(line, "Timestamp")){
+                continue; 
+            }
+        }
+
+        char *ts   = strtok(line, ",");
+        char *oStr = strtok(NULL, ",");
+        char *hStr = strtok(NULL, ",");
+        char *lStr = strtok(NULL, ",");
+        char *cStr = strtok(NULL, ",");
+        char *vStr = strtok(NULL, ",");
+        if(!ts || !oStr || !hStr || !lStr || !cStr || !vStr) {
+            /* skip malformed line */
+            continue;
+        }
+
+        /* extract date (YYYY-MM-DD) from Timestamp */
+        char dateBuf[11];
+        strncpy(dateBuf, ts, 10);
+        dateBuf[10] = '\0';
+
+        char *endptr;
+        float o_val = strtof(oStr, &endptr);
+        if(endptr == oStr) continue; /* parse fail */
+        float h_val = strtof(hStr, &endptr);
+        if(endptr == hStr) continue;
+        float l_val = strtof(lStr, &endptr);
+        if(endptr == lStr) continue;
+        float c_val = strtof(cStr, &endptr);
+        if(endptr == cStr) continue;
+        float v_val = strtof(vStr, &endptr);
+        if(endptr == vStr) continue;
+
+        /* If it's a new date, finalize the old day (if any) */
+        if(!haveCurrent || strcmp(dateBuf, currentDate) != 0){
+            /* finalize the previous day */
+            if(haveCurrent){
+                /* check if curDay is valid: open>0, high>=open, ... etc. 
+                   You can customize. We'll do a minimal sanity check. */
+                if(curDay.open > 0 && curDay.high > 0 && curDay.low > 0 
+                   && curDay.close > 0 && curDay.volume >= 0 
+                   && dayCount < max_days)
+                {
+                    daily[dayCount++] = curDay;
+                }
+            }
+            /* start new day */
+            strncpy(currentDate, dateBuf, 11);
+            curDay.open   = o_val;
+            curDay.high   = h_val;
+            curDay.low    = l_val;
+            curDay.close  = c_val;
+            curDay.volume = v_val;
+            strncpy(curDay.date, dateBuf, 11);
+            haveCurrent = 1;
+        } else {
+            /* same day => update H,L,C,Vol */
+            if(h_val > curDay.high) curDay.high = h_val;
+            if(l_val < curDay.low)  curDay.low  = l_val;
+            curDay.close  = c_val;
+            curDay.volume += v_val; 
+        }
+    }
+    /* finalize last day if valid */
+    if(haveCurrent && dayCount < max_days) {
+        if(curDay.open > 0 && curDay.high > 0 && curDay.low > 0 
+           && curDay.close > 0 && curDay.volume >= 0)
+        {
+            daily[dayCount++] = curDay;
+        }
+    }
+
+    fclose(fp);
+    return dayCount;
+}
+
+/***************************************
+ * LSTM Forward
+ ***************************************/
+static void lstm_forward(
+    LSTMParams *params,
+    float inputs[][INPUT_SIZE],
+    int seq_len,
+    LSTMCache *cache
+){
+    float h_prev[HIDDEN_SIZE];
+    float c_prev[HIDDEN_SIZE];
+    memset(h_prev, 0, sizeof(h_prev));
+    memset(c_prev, 0, sizeof(c_prev));
+
+    for(int t=0; t<seq_len; t++){
+        /* store x in cache */
+        for(int i=0; i<INPUT_SIZE; i++){
+            cache->x[t][i] = inputs[t][i];
+        }
+        float i_in[HIDDEN_SIZE], f_in[HIDDEN_SIZE], o_in[HIDDEN_SIZE], c_in[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            i_in[i] = params->b_i[i];
+            f_in[i] = params->b_f[i];
+            o_in[i] = params->b_o[i];
+            c_in[i] = params->b_c[i];
+        }
+        /* Add input->gate and h_prev->gate */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            for(int j=0; j<INPUT_SIZE; j++){
+                float val = inputs[t][j];
+                i_in[i] += params->W_ix[i][j]*val;
+                f_in[i] += params->W_fx[i][j]*val;
+                o_in[i] += params->W_ox[i][j]*val;
+                c_in[i] += params->W_cx[i][j]*val;
+            }
+            for(int j=0; j<HIDDEN_SIZE; j++){
+                float hval = h_prev[j];
+                i_in[i] += params->W_ih[i][j]*hval;
+                f_in[i] += params->W_fh[i][j]*hval;
+                o_in[i] += params->W_oh[i][j]*hval;
+                c_in[i] += params->W_ch[i][j]*hval;
+            }
+        }
+        /* Activations */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            cache->i[t][i] = sigmoid(i_in[i]);
+            cache->f[t][i] = sigmoid(f_in[i]);
+            cache->o[t][i] = sigmoid(o_in[i]);
+            cache->c_hat[t][i] = tanh_approx(c_in[i]);
+        }
+        /* c(t), h(t) */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            cache->c[t][i] = cache->f[t][i]*c_prev[i] + cache->i[t][i]*cache->c_hat[t][i];
+        }
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            cache->h[t][i] = cache->o[t][i]*tanh_approx(cache->c[t][i]);
+        }
+        /* output y(t) */
+        {
+            float sum = params->b_y[0];
+            for(int k=0; k<HIDDEN_SIZE; k++){
+                sum += params->W_hy[0][k]*cache->h[t][k];
+            }
+            cache->y[t][0] = sum;
+        }
+        /* update h_prev, c_prev */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            h_prev[i] = cache->h[t][i];
+            c_prev[i]  = cache->c[t][i];
+        }
+    }
+}
+
+/***************************************
+ * BPTT (MSE on next-day close)
+ ***************************************/
+static float lstm_backward(
+    LSTMParams *params,
+    LSTMCache *cache,
+    float targets[][OUTPUT_SIZE],
+    int seq_len,
+    LSTMGrads *grads
+){
+    float total_loss = 0.f;
+    float dh_next[HIDDEN_SIZE];
+    float dc_next[HIDDEN_SIZE];
+    memset(dh_next, 0, sizeof(dh_next));
+    memset(dc_next, 0, sizeof(dc_next));
+
+    for(int t=seq_len-1; t>=0; t--){
+        if(t == seq_len-1){
+            /* no next day for the final day -> skip */
+            continue;
+        }
+        float pred  = cache->y[t][0];
+        float truth = targets[t+1][0]; 
+        float diff  = (pred - truth);
+        float dy    = diff;
+        total_loss += 0.5f*diff*diff;
+
+        /* Output layer grads => W_hy, b_y, dh(t) */
+        float dh[HIDDEN_SIZE];
+        memset(dh, 0, sizeof(dh));
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            grads->W_hy[0][i] += dy*cache->h[t][i];
+            dh[i] = dy*params->W_hy[0][i];
+        }
+        grads->b_y[0] += dy;
+
+        /* add dh_next from future time step */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            dh[i] += dh_next[i];
+        }
+
+        /* h(t) = o(t)*tanh(c(t)) => do, dc */
+        float do_[HIDDEN_SIZE], dc[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            float o_val = cache->o[t][i];
+            float c_val = cache->c[t][i];
+            float tanhc = tanh_approx(c_val);
+            do_[i] = tanhc*dh[i];
+            dc[i]  = o_val*(1.f - tanhc*tanhc)*dh[i];
+        }
+        /* plus dc_next */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            dc[i] += dc_next[i];
+        }
+
+        /* c(t)=f(t)*c(t-1) + i(t)*c_hat(t) => di, df, dc_hat */
+        float di[HIDDEN_SIZE], df[HIDDEN_SIZE], dc_hat[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            float c_prev = (t==0)? 0.f : cache->c[t-1][i];
+            di[i]      = dc[i]*cache->c_hat[t][i];
+            df[i]      = dc[i]*c_prev;
+            dc_hat[i]  = dc[i]*cache->i[t][i];
+        }
+
+        /* gate pre-activation grads */
+        float do_in[HIDDEN_SIZE], di_in[HIDDEN_SIZE], df_in[HIDDEN_SIZE], dc_in[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            do_in[i] = do_[i]*dsigmoid_from_val(cache->o[t][i]);
+            di_in[i] = di[i]*dsigmoid_from_val(cache->i[t][i]);
+            df_in[i] = df[i]*dsigmoid_from_val(cache->f[t][i]);
+            float ch  = cache->c_hat[t][i];
+            dc_in[i]  = dc_hat[i]*(1.f - ch*ch);
+        }
+
+        float dh_prev[HIDDEN_SIZE];
+        memset(dh_prev, 0, sizeof(dh_prev));
+
+        #define ACCUM_GATE(Wx,Wh,bias,d_in)                                    
+            do {                                                               
+                for(int i=0; i<HIDDEN_SIZE; i++){                              
+                    float dval = d_in[i];                                      
+                    /* dW_x  => x(t) */                                        
+                    for(int j=0; j<INPUT_SIZE; j++){                           
+                        grads->Wx[i][j] += dval*cache->x[t][j];                
+                    }                                                          
+                    /* dW_h => h(t-1) if t>0 */                                
+                    if(t>0) {                                                 
+                        for(int j=0; j<HIDDEN_SIZE; j++){                      
+                            grads->Wh[i][j] += dval*cache->h[t-1][j];          
+                            dh_prev[j]       += dval*params->Wh[i][j];        
+                        }                                                      
+                    }                                                          
+                    grads->bias[i] += dval;                                    
+                }                                                              
+            } while(0)
+
+        ACCUM_GATE(W_ix, W_ih, b_i, di_in);
+        ACCUM_GATE(W_fx, W_fh, b_f, df_in);
+        ACCUM_GATE(W_ox, W_oh, b_o, do_in);
+        ACCUM_GATE(W_cx, W_ch, b_c, dc_in);
+
+        float dc_prev[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            dc_prev[i] = dc[i]*cache->f[t][i];
+        }
+
+        memcpy(dh_next, dh_prev, sizeof(dh_prev));
+        memcpy(dc_next, dc_prev, sizeof(dc_prev));
+    }
+
+    return total_loss;
+}
+
+/***************************************
+ * Main
+ ***************************************/
+int main(int argc, char *argv[]){
+    if(argc < 2){
+        fprintf(stderr, "Usage: %s path/to/gitlab_5min.csv\n", argv[0]);
+        return 1;
+    }
+    srand((unsigned)time(NULL));
+
+    /* 1) Load intraday CSV, aggregate daily bars */
+    DailyBar dailyData[MAX_DAYS];
+    int rawCount = load_and_aggregate_daily(argv[1], dailyData, MAX_DAYS);
+    if(rawCount <= 1){
+        fprintf(stderr,"No valid daily bars found in CSV.\n");
+        return 1;
+    }
+
+    /* 2) Build final array of valid days only 
+          - Possibly do more checks or normalizations
+    */
+    float inputs[MAX_DAYS][INPUT_SIZE];
+    float targets[MAX_DAYS][OUTPUT_SIZE];
+    int dayCount = 0;
+    for(int i=0; i<rawCount; i++){
+        DailyBar *d = &dailyData[i];
+        /* Basic check again (some days might slip if partial): */
+        if(d->open <= 0.f || d->high <= 0.f || d->low <=0.f || d->close<=0.f || d->volume<0.f){
+            continue;
+        }
+        inputs[dayCount][0] = d->open;
+        inputs[dayCount][1] = d->high;
+        inputs[dayCount][2] = d->low;
+        inputs[dayCount][3] = d->close;
+        inputs[dayCount][4] = d->volume;
+        inputs[dayCount][5] = d->high - d->low; /* example range */
+
+        /* target = today's close, for reference. Next-day logic used in backprop. */
+        targets[dayCount][0] = d->close;
+        dayCount++;
+    }
+
+    if(dayCount<=1){
+        fprintf(stderr,"After validation, we have only %d daily bars.\n",dayCount);
+        return 1;
+    }
+
+    printf("Final daily bars used: %d\n", dayCount);
+    printf("First day: %s O=%.2f H=%.2f L=%.2f C=%.2f V=%.0f\n",
+        dailyData[0].date, dailyData[0].open, dailyData[0].high, dailyData[0].low,
+        dailyData[0].close, dailyData[0].volume);
+    printf("Last day:  %s O=%.2f H=%.2f L=%.2f C=%.2f V=%.0f\n",
+        dailyData[dayCount-1].date, dailyData[dayCount-1].open,
+        dailyData[dayCount-1].high, dailyData[dayCount-1].low,
+        dailyData[dayCount-1].close, dailyData[dayCount-1].volume);
+
+    /* Optional: Basic min-max or standard normalization to avoid large volumes, etc. */
+    
+    float minVal[INPUT_SIZE], maxVal[INPUT_SIZE];
+    for(int j=0; j<INPUT_SIZE; j++){
+        minVal[j] = 1e9f; maxVal[j] = -1e9f;
+    }
+    for(int i=0; i<dayCount; i++){
+        for(int j=0; j<INPUT_SIZE; j++){
+            float v = inputs[i][j];
+            if(v<minVal[j]) minVal[j]=v;
+            if(v>maxVal[j]) maxVal[j]=v;
+        }
+    }
+    for(int i=0; i<dayCount; i++){
+        for(int j=0; j<INPUT_SIZE; j++){
+            float denom = (maxVal[j]-minVal[j]);
+            if(denom<1e-9f) denom=1.f;
+            inputs[i][j] = (inputs[i][j] - minVal[j]) / denom;
+        }
+    }
+    
+
+    /* 3) LSTM Setup */
+    LSTMParams params;
+    init_params(&params, 0.1f);
+    LSTMGrads grads;
+    zero_grads(&grads);
+    static LSTMCache cache;
+
+    /* 4) Train/Validation split: e.g. last 30 days as validation */
+    int trainLen = dayCount - 30;
+    if(trainLen < 2) {
+        trainLen = dayCount; /* fallback if not enough days */
+    }
+
+    /* 5) Training */
+    for(int e=1; e<=EPOCHS; e++){
+        zero_grads(&grads);
+        lstm_forward(&params, inputs, trainLen, &cache);
+        float loss = lstm_backward(&params, &cache, targets, trainLen, &grads);
+        update_params(&params, &grads);
+
+        if(e%50==0){
+            printf("Epoch %3d, TrainLoss=%.4f\n", e, loss);
+            if(isnan(loss)) {
+                fprintf(stderr, "Encountered NaN at epoch %d, aborting.\n", e);
+                break;
+            }
+        }
+    }
+
+    /* 6) Evaluate one-day-behind on entire dataset */
+    lstm_forward(&params, inputs, dayCount, &cache);
+    printf("\nValidation (last 30 days):\n");
+    int valStart = trainLen-1; 
+    if(valStart<0) valStart=0;
+    for(int t=valStart; t<dayCount-1; t++){
+        float y_pred = cache.y[t][0];
+        float y_true = targets[t+1][0];
+        printf(" Day %3d => Predict=%.3f, Actual=%.3f [Day %s -> next day %s]\n",
+            t, y_pred, y_true,
+            dailyData[t].date, dailyData[t+1].date);
+    }
+
+    return 0;
+}
+
--- a/src/nn_tests/RNN-LSTM/test_concept_for_synthetic_data/stock_lstm
+++ b/src/nn_tests/RNN-LSTM/test_concept_for_synthetic_data/stock_lstm
--- a/src/nn_tests/RNN-LSTM/test_concept_for_synthetic_data/stock_lstm.c
+++ b/src/nn_tests/RNN-LSTM/test_concept_for_synthetic_data/stock_lstm.c
@@ -0,0 +1,543 @@
+/*******************************************************************************
+ * stock_lstm_extended.c
+ *
+ * Demonstrates:
+ *   - Synthetic data for multiple sequences
+ *   - Next-day price prediction (shifted targets)
+ *   - Variable sequence lengths
+ *   - A single-layer LSTM with BPTT
+ *
+ * Compile:
+ *   cc -o stock_lstm_extended stock_lstm_extended.c -lm
+ *
+ * Run:
+ *   ./stock_lstm_extended
+ *******************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <string.h>
+
+/***********************************
+ * Hyperparameters
+ ***********************************/
+#define MAX_SEQ_LEN   50    // max length of a sequence
+#define BATCH_SIZE    3     // how many sequences in this batch
+#define INPUT_SIZE    8     // e.g., G(t), I(t), R(t), E(t), S(t), C(t), M(t), V(t)
+#define HIDDEN_SIZE   6     // number of LSTM hidden units
+#define OUTPUT_SIZE   1     // predict next-day price as a single value
+#define EPOCHS        200
+#define LR            0.01f // learning rate
+
+/***********************************
+ * Data Structures
+ ***********************************/
+// LSTM parameters
+typedef struct {
+    // input gate
+    float W_ix[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ih[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_i[HIDDEN_SIZE];
+    // forget gate
+    float W_fx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_fh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_f[HIDDEN_SIZE];
+    // output gate
+    float W_ox[HIDDEN_SIZE][INPUT_SIZE];
+    float W_oh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_o[HIDDEN_SIZE];
+    // candidate gate
+    float W_cx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ch[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_c[HIDDEN_SIZE];
+    // output layer (h -> y)
+    float W_hy[OUTPUT_SIZE][HIDDEN_SIZE];
+    float b_y[OUTPUT_SIZE];
+} LSTMParams;
+
+// LSTM parameter gradients
+typedef struct {
+    float W_ix[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ih[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_i[HIDDEN_SIZE];
+    float W_fx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_fh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_f[HIDDEN_SIZE];
+    float W_ox[HIDDEN_SIZE][INPUT_SIZE];
+    float W_oh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_o[HIDDEN_SIZE];
+    float W_cx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ch[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_c[HIDDEN_SIZE];
+    float W_hy[OUTPUT_SIZE][HIDDEN_SIZE];
+    float b_y[OUTPUT_SIZE];
+} LSTMGrads;
+
+/* We store intermediate values for each time step **per sequence**.
+   So dimension: [batch_size][max_seq_len].
+*/
+typedef struct {
+    // input (copied for convenience)
+    float x[BATCH_SIZE][MAX_SEQ_LEN][INPUT_SIZE];
+    // gates
+    float i[BATCH_SIZE][MAX_SEQ_LEN][HIDDEN_SIZE];
+    float f[BATCH_SIZE][MAX_SEQ_LEN][HIDDEN_SIZE];
+    float o[BATCH_SIZE][MAX_SEQ_LEN][HIDDEN_SIZE];
+    float c_hat[BATCH_SIZE][MAX_SEQ_LEN][HIDDEN_SIZE];
+    // states
+    float c[BATCH_SIZE][MAX_SEQ_LEN][HIDDEN_SIZE];
+    float h[BATCH_SIZE][MAX_SEQ_LEN][HIDDEN_SIZE];
+    // output
+    float y[BATCH_SIZE][MAX_SEQ_LEN][OUTPUT_SIZE];
+} LSTMCache;
+
+/***********************************
+ * Math helpers
+ ***********************************/
+static inline float randf(float range) {
+    // random in [-range, range]
+    float r = (float)rand() / (float)RAND_MAX;
+    return (r*2.0f - 1.0f)*range;
+}
+
+static inline float sigmoid(float x) {
+    return 1.0f / (1.0f + expf(-x));
+}
+static inline float dsigmoid_from_val(float s) {
+    // derivative given the *sigmoid value* s, i.e. s*(1-s)
+    return s*(1.0f - s);
+}
+static inline float tanh_approx(float x) {
+    return tanhf(x);
+}
+static inline float dtanh_from_val(float tval) {
+    // derivative given tanh(t) = tval => 1 - tval^2
+    return 1.0f - tval*tval;
+}
+
+/***********************************
+ * Parameter Initialization
+ ***********************************/
+void init_params(LSTMParams *p, float range) {
+    float *pf = (float*)p;
+    int count = sizeof(LSTMParams)/sizeof(float);
+    for(int i=0; i<count; i++){
+        pf[i] = randf(range);
+    }
+}
+void zero_grads(LSTMGrads *g) {
+    memset(g, 0, sizeof(LSTMGrads));
+}
+
+/***********************************
+ * Forward Pass (per sequence)
+ *   We handle each sequence in the batch independently.
+ ***********************************/
+void lstm_forward(
+    LSTMParams *params,
+    LSTMCache *cache,
+    float **inputs,       // inputs[s] is a pointer to array of (seq_len_s * INPUT_SIZE)
+    int *seq_len,         // lengths of each sequence
+    float **h0, float **c0, // initial hidden and cell states, shape [batch_size][HIDDEN_SIZE]
+    int batch_size
+)
+{
+    // For each sequence in the batch
+    for(int s=0; s<batch_size; s++){
+        int L = seq_len[s];
+        // copy in h0, c0
+        float h_prev[HIDDEN_SIZE], c_prev[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            h_prev[i] = h0[s][i];
+            c_prev[i] = c0[s][i];
+        }
+
+        // Unroll for L time steps
+        for(int t=0; t<L; t++){
+            // 1. Copy input to cache
+            for(int i=0; i<INPUT_SIZE; i++){
+                cache->x[s][t][i] = inputs[s][t*INPUT_SIZE + i];
+            }
+
+            // 2. Compute gates
+            // We'll do i(t) = sigmoid( W_ix x(t) + W_ih h_prev + b_i ), etc.
+            float i_in[HIDDEN_SIZE], f_in[HIDDEN_SIZE], o_in[HIDDEN_SIZE], c_in[HIDDEN_SIZE];
+            // Zero them first
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                i_in[i] = params->b_i[i];
+                f_in[i] = params->b_f[i];
+                o_in[i] = params->b_o[i];
+                c_in[i] = params->b_c[i];
+            }
+
+            // input->gate: i_in, f_in, o_in, c_in
+            // i_in += W_ix x(t)
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                for(int j=0; j<INPUT_SIZE; j++){
+                    float x_val = cache->x[s][t][j];
+                    i_in[i] += params->W_ix[i][j]*x_val;
+                    f_in[i] += params->W_fx[i][j]*x_val;
+                    o_in[i] += params->W_ox[i][j]*x_val;
+                    c_in[i] += params->W_cx[i][j]*x_val;
+                }
+            }
+            // i_in += W_ih h_prev
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                for(int j=0; j<HIDDEN_SIZE; j++){
+                    float h_val = h_prev[j];
+                    i_in[i] += params->W_ih[i][j]*h_val;
+                    f_in[i] += params->W_fh[i][j]*h_val;
+                    o_in[i] += params->W_oh[i][j]*h_val;
+                    c_in[i] += params->W_ch[i][j]*h_val;
+                }
+            }
+            // Activation
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                cache->i[s][t][i] = sigmoid(i_in[i]);
+                cache->f[s][t][i] = sigmoid(f_in[i]);
+                cache->o[s][t][i] = sigmoid(o_in[i]);
+                cache->c_hat[s][t][i] = tanh_approx(c_in[i]);
+            }
+
+            // 3. c(t) = f(t)*c_prev + i(t)*c_hat(t)
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                cache->c[s][t][i] = cache->f[s][t][i]*c_prev[i]
+                                    + cache->i[s][t][i]*cache->c_hat[s][t][i];
+            }
+
+            // 4. h(t) = o(t)*tanh(c(t))
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                cache->h[s][t][i] = cache->o[s][t][i] * tanh_approx(cache->c[s][t][i]);
+            }
+
+            // 5. output layer: y = W_hy h(t) + b_y
+            for(int out_i=0; out_i<OUTPUT_SIZE; out_i++){
+                float sum= params->b_y[out_i];
+                for(int j=0; j<HIDDEN_SIZE; j++){
+                    sum += params->W_hy[out_i][j]*cache->h[s][t][j];
+                }
+                cache->y[s][t][out_i] = sum;
+            }
+
+            // 6. Update h_prev, c_prev
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                h_prev[i] = cache->h[s][t][i];
+                c_prev[i] = cache->c[s][t][i];
+            }
+        } // end for t
+    } // end for s
+}
+
+/***********************************
+ * Backprop (BPTT) + MSE Loss
+ *
+ * We'll shift the target by +1 for "next-day" prediction:
+ *   => The prediction for time t is used to match target[t+1].
+ *   => There's no target for the last time step in each sequence.
+ ***********************************/
+float lstm_backward(
+    LSTMParams *params, LSTMCache *cache, LSTMGrads *grads,
+    float **targets, // targets[s][t*OUTPUT_SIZE + ?]
+    int *seq_len,    // for each sequence
+    int batch_size
+)
+{
+    float total_loss = 0.0f;
+    // For storing d h(t-1), d c(t-1) across time
+    static float dh_next[BATCH_SIZE][HIDDEN_SIZE];
+    static float dc_next[BATCH_SIZE][HIDDEN_SIZE];
+    memset(dh_next, 0, sizeof(dh_next));
+    memset(dc_next, 0, sizeof(dc_next));
+
+    // Work backwards
+    for(int s=0; s<batch_size; s++){
+        int L = seq_len[s];
+        for(int t=L-1; t>=0; t--){
+            // Because we predict "next-day" price:
+            //   The target for time t is actually the data at t+1 => if (t == L-1), no target
+            if(t == L-1){
+                // no next-day target for the very last step
+                continue;
+            }
+            // 1. MSE derivative wrt y(t)
+            float dy[OUTPUT_SIZE];
+            float y_pred = cache->y[s][t][0];
+            float y_true = targets[s][(t+1)*OUTPUT_SIZE + 0];  // next-day target
+            float diff = (y_pred - y_true);
+            dy[0] = diff; // dL/dy
+            total_loss += 0.5f * diff*diff;
+
+            // 2. Backprop to output layer => W_hy, b_y, and dh(t)
+            float dh[HIDDEN_SIZE];
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                dh[i] = 0.0f;
+            }
+            // single output dimension => out_i=0
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                grads->W_hy[0][i] += dy[0]*cache->h[s][t][i];
+                dh[i] += dy[0]*params->W_hy[0][i];
+            }
+            grads->b_y[0] += dy[0];
+
+            // add dh_next from future time step
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                dh[i] += dh_next[s][i];
+            }
+
+            // 3. backprop through h(t) = o(t)*tanh(c(t))
+            float *o_t = cache->o[s][t];
+            float *c_t = cache->c[s][t];
+            float *i_t = cache->i[s][t];
+            float *f_t = cache->f[s][t];
+            float *c_hat_t = cache->c_hat[s][t];
+
+            float do_t[HIDDEN_SIZE], dc_t[HIDDEN_SIZE];
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                float tanhc = tanh_approx(c_t[i]);
+                do_t[i] = tanhc * dh[i]; // partial w.r.t. o_in
+                // partial w.r.t. c(t) from dh
+                dc_t[i] = o_t[i]*(1.0f - tanhc*tanhc)*dh[i];
+            }
+            // plus dc_next
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                dc_t[i] += dc_next[s][i];
+            }
+
+            // 4. c(t) = f(t)*c(t-1) + i(t)*c_hat(t)
+            // => partial wrt f(t) is c(t-1), partial wrt i(t) is c_hat(t), partial wrt c_hat(t) is i(t)
+            // We need c(t-1). If t=0, c(t-1)=0. We'll handle that carefully:
+            float dc_prev_val[HIDDEN_SIZE];
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                float c_prev = (t==0) ? 0.0f : cache->c[s][t-1][i];
+                dc_prev_val[i] = dc_t[i]*f_t[i]; // for next iteration
+            }
+            float di_t[HIDDEN_SIZE], df_t[HIDDEN_SIZE], do_in[HIDDEN_SIZE], dc_hat_in[HIDDEN_SIZE];
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                di_t[i] = dc_t[i]*c_hat_t[i]; // derivative wrt i(t)
+                df_t[i] = dc_t[i]*((t==0)?0.0f:cache->c[s][t-1][i]); // wrt f(t)
+            }
+            // c_hat(t) = tanh(c_in)
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                dc_hat_in[i] = dc_t[i]*i_t[i]*(1.0f - c_hat_t[i]*c_hat_t[i]);
+            }
+            // do_in: do_t[i] => derivative wrt logistic pre-activation => multiply by dsigmoid(o_t[i])
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                do_in[i] = do_t[i]*dsigmoid_from_val(o_t[i]);
+            }
+            // same for di_t, df_t => multiply by dsigmoid
+            float di_in[HIDDEN_SIZE], df_in[HIDDEN_SIZE];
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                di_in[i] = di_t[i]*dsigmoid_from_val(i_t[i]);
+                df_in[i] = df_t[i]*dsigmoid_from_val(f_t[i]);
+            }
+
+            // 5. Now we accumulate parameter grads for (i_in, f_in, o_in, c_hat_in).
+            // We'll unify c_hat_in under "dc_in" as if it's a separate gate with tanh activation.
+            float dc_in[HIDDEN_SIZE];
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                dc_in[i] = dc_hat_in[i]; // already includes tanh derivative
+            }
+
+            float dh_prev[HIDDEN_SIZE], dc_prev[HIDDEN_SIZE];
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                dh_prev[i] = 0.f;
+                dc_prev[i] = dc_prev_val[i];
+            }
+
+            // x(t) and h(t-1)
+            float *x_t = cache->x[s][t];
+            float *h_prev;
+            if(t == 0) {
+                // no real "h(t-1)", assume 0
+                static float zero_h[HIDDEN_SIZE];
+                h_prev = zero_h;
+            } else {
+                h_prev = cache->h[s][t-1];
+            }
+
+            // For each gate: i_in, f_in, o_in, c_in
+            // i_in => W_ix, W_ih, b_i
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                float dval = di_in[i];
+                // W_ix
+                for(int j=0; j<INPUT_SIZE; j++){
+                    grads->W_ix[i][j] += dval*x_t[j];
+                }
+                // W_ih
+                for(int j=0; j<HIDDEN_SIZE; j++){
+                    grads->W_ih[i][j] += dval*h_prev[j];
+                    // accumulate dh_prev
+                    dh_prev[j] += dval*params->W_ih[i][j];
+                }
+                // b_i
+                grads->b_i[i] += dval;
+            }
+            // f_in => W_fx, W_fh, b_f
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                float dval = df_in[i];
+                for(int j=0; j<INPUT_SIZE; j++){
+                    grads->W_fx[i][j] += dval*x_t[j];
+                }
+                for(int j=0; j<HIDDEN_SIZE; j++){
+                    grads->W_fh[i][j] += dval*h_prev[j];
+                    dh_prev[j] += dval*params->W_fh[i][j];
+                }
+                grads->b_f[i] += dval;
+            }
+            // o_in => W_ox, W_oh, b_o
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                float dval = do_in[i];
+                for(int j=0; j<INPUT_SIZE; j++){
+                    grads->W_ox[i][j] += dval*x_t[j];
+                }
+                for(int j=0; j<HIDDEN_SIZE; j++){
+                    grads->W_oh[i][j] += dval*h_prev[j];
+                    dh_prev[j] += dval*params->W_oh[i][j];
+                }
+                grads->b_o[i] += dval;
+            }
+            // c_in => W_cx, W_ch, b_c
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                float dval = dc_in[i];
+                for(int j=0; j<INPUT_SIZE; j++){
+                    grads->W_cx[i][j] += dval*x_t[j];
+                }
+                for(int j=0; j<HIDDEN_SIZE; j++){
+                    grads->W_ch[i][j] += dval*h_prev[j];
+                    dh_prev[j] += dval*params->W_ch[i][j];
+                }
+                grads->b_c[i] += dval;
+            }
+
+            // 6. set dh_next, dc_next for next iteration
+            for(int i=0; i<HIDDEN_SIZE; i++){
+                dh_next[s][i] = dh_prev[i];
+                dc_next[s][i] = dc_prev[i];
+            }
+        } // end for t
+    } // end for s
+
+    return total_loss;
+}
+
+/***********************************
+ * Grad Update (simple SGD)
+ ***********************************/
+void update_params(LSTMParams *params, LSTMGrads *grads) {
+    float *p = (float*)params;
+    float *g = (float*)grads;
+    int N = sizeof(LSTMParams)/sizeof(float);
+    for(int i=0; i<N; i++){
+        p[i] -= LR*g[i];
+    }
+}
+
+/***********************************
+ * Main: Demonstration
+ ***********************************/
+int main(){
+    srand((unsigned)time(NULL));
+
+    // We will have BATCH_SIZE sequences, each up to MAX_SEQ_LEN.
+    // Let's define variable lengths for each.
+    int seq_len[BATCH_SIZE];
+    seq_len[0] = 10;   // shorter
+    seq_len[1] = 15;   // medium
+    seq_len[2] = 20;   // longer
+
+    // We store the "inputs" and "targets" in dynamic arrays:
+    // For each sequence s, we have seq_len[s]*INPUT_SIZE, and seq_len[s]*OUTPUT_SIZE (although the last step has no target).
+    float *inputs[BATCH_SIZE];
+    float *targets[BATCH_SIZE];
+
+    for(int s=0; s<BATCH_SIZE; s++){
+        inputs[s] = (float*)malloc(seq_len[s]*INPUT_SIZE*sizeof(float));
+        targets[s] = (float*)malloc(seq_len[s]*OUTPUT_SIZE*sizeof(float));
+    }
+
+    // Synthetic data: 
+    // - We'll create "true price" P(t) that random-walks. 
+    // - We'll create random macro factors in [0..1] and let the "price" evolve.
+    // - We'll store them in inputs[s], and the "price" in a separate array. Then the "price" at t will be an input. Next-day price is the target for time t.
+    for(int s=0; s<BATCH_SIZE; s++){
+        float cur_price = 10.0f + (rand()%100)/10.0f;  // random start in [10..20]
+        for(int t=0; t<seq_len[s]; t++){
+            // macro/micro/tech
+            for(int i=0; i<INPUT_SIZE-1; i++){
+                inputs[s][t*INPUT_SIZE + i] = (float)(rand()%100)/100.0f;
+            }
+            // let's store current price as the last input factor
+            inputs[s][t*INPUT_SIZE + (INPUT_SIZE-1)] = cur_price;
+
+            // next day price (target) we do a random walk step
+            float noise = (float)(rand()%100 - 50)/100.0f; // in [-0.5..0.5]
+            float next_price = cur_price + noise;  
+            if(next_price < 0.0f) next_price = 0.0f; // clamp
+
+            // The "target" for time t is next day's price
+            targets[s][t*OUTPUT_SIZE + 0] = next_price;
+
+            // Update cur_price for next iteration
+            cur_price = next_price;
+        }
+    }
+
+    // LSTM parameters & grads
+    LSTMParams params;
+    LSTMGrads grads;
+    init_params(&params, 0.1f); // random init
+    zero_grads(&grads);
+
+    // LSTM cache
+    static LSTMCache cache;
+
+    // initial states (h0, c0) for each sequence
+    float *h0[BATCH_SIZE], *c0[BATCH_SIZE];
+    for(int s=0; s<BATCH_SIZE; s++){
+        h0[s] = (float*)calloc(HIDDEN_SIZE, sizeof(float));
+        c0[s] = (float*)calloc(HIDDEN_SIZE, sizeof(float));
+    }
+
+    // Training
+    for(int e=0; e<EPOCHS; e++){
+        zero_grads(&grads);
+
+        // Forward
+        lstm_forward(&params, &cache, inputs, seq_len, h0, c0, BATCH_SIZE);
+
+        // Backward
+        float loss = lstm_backward(&params, &cache, &grads, targets, seq_len, BATCH_SIZE);
+
+        // Update
+        update_params(&params, &grads);
+
+        if((e+1)%20==0){
+            printf("Epoch %3d, Loss=%.4f\n", e+1, loss);
+        }
+    }
+
+    // Check final predictions
+    printf("\nFinal Predictions vs. Targets (showing last sequence):\n");
+    int s = BATCH_SIZE-1; // show the longest sequence
+    for(int t=0; t<seq_len[s]; t++){
+        float y_pred = cache.y[s][t][0];
+        // target is next-day => if t==seq_len[s]-1, no real next day
+        if(t < seq_len[s]-1){
+            float y_true = targets[s][(t+1)*OUTPUT_SIZE + 0]; 
+            printf(" t=%2d: pred=%.3f, next-day-true=%.3f\n", t, y_pred, y_true);
+        } else {
+            printf(" t=%2d: pred=%.3f, next-day-true=N/A (last step)\n", t, y_pred);
+        }
+    }
+
+    // Cleanup
+    for(int s=0; s<BATCH_SIZE; s++){
+        free(inputs[s]);
+        free(targets[s]);
+        free(h0[s]);
+        free(c0[s]);
+    }
+    return 0;
+}
+
--- a/src/nn_tests/RNN-LSTM/test_concept_for_synthetic_data/stock_lstm_prod
+++ b/src/nn_tests/RNN-LSTM/test_concept_for_synthetic_data/stock_lstm_prod
--- a/src/nn_tests/RNN-LSTM/test_concept_for_synthetic_data/stock_lstm_prod.c
+++ b/src/nn_tests/RNN-LSTM/test_concept_for_synthetic_data/stock_lstm_prod.c
@@ -0,0 +1,406 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <string.h>
+#include <time.h>
+
+/********************************************************
+ * Hyperparameters
+ ********************************************************/
+#define BATCH_SIZE   1    /* We'll do one sequence at a time for simplicity */
+#define MAX_SEQ_LEN  50   /* Maximum length of the time series */
+#define INPUT_SIZE   8    /* # of input features (macro/micro/technical + current price) */
+#define HIDDEN_SIZE  6
+#define OUTPUT_SIZE  1    /* Predict next-day price */
+#define EPOCHS       200
+#define LEARNING_RATE 0.01f
+
+/********************************************************
+ * LSTM Parameter Structures
+ ********************************************************/
+typedef struct {
+    /* Input gate */
+    float W_ix[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ih[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_i[HIDDEN_SIZE];
+    /* Forget gate */
+    float W_fx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_fh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_f[HIDDEN_SIZE];
+    /* Output gate */
+    float W_ox[HIDDEN_SIZE][INPUT_SIZE];
+    float W_oh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_o[HIDDEN_SIZE];
+    /* Candidate gate */
+    float W_cx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ch[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_c[HIDDEN_SIZE];
+    /* Output layer (h->y) */
+    float W_hy[OUTPUT_SIZE][HIDDEN_SIZE];
+    float b_y[OUTPUT_SIZE];
+} LSTMParams;
+
+typedef struct {
+    float W_ix[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ih[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_i[HIDDEN_SIZE];
+    float W_fx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_fh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_f[HIDDEN_SIZE];
+    float W_ox[HIDDEN_SIZE][INPUT_SIZE];
+    float W_oh[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_o[HIDDEN_SIZE];
+    float W_cx[HIDDEN_SIZE][INPUT_SIZE];
+    float W_ch[HIDDEN_SIZE][HIDDEN_SIZE];
+    float b_c[HIDDEN_SIZE];
+    float W_hy[OUTPUT_SIZE][HIDDEN_SIZE];
+    float b_y[OUTPUT_SIZE];
+} LSTMGrads;
+
+typedef struct {
+    float x[MAX_SEQ_LEN][INPUT_SIZE];
+    float i[MAX_SEQ_LEN][HIDDEN_SIZE];
+    float f[MAX_SEQ_LEN][HIDDEN_SIZE];
+    float o[MAX_SEQ_LEN][HIDDEN_SIZE];
+    float c_hat[MAX_SEQ_LEN][HIDDEN_SIZE];
+    float c[MAX_SEQ_LEN][HIDDEN_SIZE];
+    float h[MAX_SEQ_LEN][HIDDEN_SIZE];
+    float y[MAX_SEQ_LEN][OUTPUT_SIZE];
+} LSTMCache;
+
+/********************************************************
+ * Utilities
+ ********************************************************/
+static inline float randf(float range) {
+    float r = (float)rand() / (float)RAND_MAX;
+    return (r*2.0f - 1.0f)*range;
+}
+static inline float sigmoid(float x) {
+    return 1.0f / (1.0f + expf(-x));
+}
+static inline float dsigmoid_from_val(float val) {
+    return val*(1.0f - val);
+}
+static inline float tanh_approx(float x) {
+    return tanhf(x);
+}
+static inline float dtanh_from_val(float val) {
+    return 1.0f - val*val;
+}
+
+static void init_params(LSTMParams *p, float range) {
+    float *pf = (float*)p;
+    int count = sizeof(LSTMParams)/sizeof(float);
+    for(int i=0; i<count; i++){
+        pf[i] = randf(range);
+    }
+}
+
+static void zero_grads(LSTMGrads *g) {
+    memset(g, 0, sizeof(LSTMGrads));
+}
+
+static void update_params(LSTMParams *params, LSTMGrads *grads) {
+    float *p = (float*)params;
+    float *r = (float*)grads;
+    int count = sizeof(LSTMParams)/sizeof(float);
+    for(int i=0; i<count; i++){
+        p[i] -= LEARNING_RATE * r[i];
+    }
+}
+
+/********************************************************
+ * Forward Pass for a single sequence
+ ********************************************************/
+static void lstm_forward(
+    LSTMParams *params,
+    float inputs[][INPUT_SIZE], /* shape [seq_len][INPUT_SIZE] */
+    int seq_len,
+    LSTMCache *cache,
+    float *h0, float *c0
+){
+    float h_prev[HIDDEN_SIZE], c_prev[HIDDEN_SIZE];
+    memcpy(h_prev, h0, sizeof(float)*HIDDEN_SIZE);
+    memcpy(c_prev, c0, sizeof(float)*HIDDEN_SIZE);
+
+    for(int t=0; t<seq_len; t++){
+        memcpy(cache->x[t], inputs[t], INPUT_SIZE*sizeof(float));
+
+        /* Gate pre-activations */
+        float i_in[HIDDEN_SIZE], f_in[HIDDEN_SIZE], o_in[HIDDEN_SIZE], c_in[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            i_in[i] = params->b_i[i];
+            f_in[i] = params->b_f[i];
+            o_in[i] = params->b_o[i];
+            c_in[i] = params->b_c[i];
+        }
+
+        /* Add x->(gate) and h_prev->(gate) contributions */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            for(int j=0; j<INPUT_SIZE; j++){
+                float x_val = inputs[t][j];
+                i_in[i] += params->W_ix[i][j]*x_val;
+                f_in[i] += params->W_fx[i][j]*x_val;
+                o_in[i] += params->W_ox[i][j]*x_val;
+                c_in[i] += params->W_cx[i][j]*x_val;
+            }
+            for(int j=0; j<HIDDEN_SIZE; j++){
+                float h_val = h_prev[j];
+                i_in[i] += params->W_ih[i][j]*h_val;
+                f_in[i] += params->W_fh[i][j]*h_val;
+                o_in[i] += params->W_oh[i][j]*h_val;
+                c_in[i] += params->W_ch[i][j]*h_val;
+            }
+        }
+
+        /* Activations */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            cache->i[t][i] = sigmoid(i_in[i]);
+            cache->f[t][i] = sigmoid(f_in[i]);
+            cache->o[t][i] = sigmoid(o_in[i]);
+            cache->c_hat[t][i] = tanh_approx(c_in[i]);
+        }
+
+        /* Cell state */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            cache->c[t][i] = cache->f[t][i]*c_prev[i] + cache->i[t][i]*cache->c_hat[t][i];
+        }
+
+        /* Hidden state */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            cache->h[t][i] = cache->o[t][i]*tanh_approx(cache->c[t][i]);
+        }
+
+        /* Output layer */
+        for(int out_i=0; out_i<OUTPUT_SIZE; out_i++){
+            float sum = params->b_y[out_i];
+            for(int j=0; j<HIDDEN_SIZE; j++){
+                sum += params->W_hy[out_i][j]*cache->h[t][j];
+            }
+            cache->y[t][out_i] = sum;
+        }
+
+        /* Update h_prev, c_prev */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            h_prev[i] = cache->h[t][i];
+            c_prev[i] = cache->c[t][i];
+        }
+    }
+}
+
+/********************************************************
+ * Backprop (MSE loss)
+ * Next-day price => target[t] = actual price at t+1
+ ********************************************************/
+static float lstm_backward(
+    LSTMParams *params,
+    LSTMCache *cache,
+    float targets[][OUTPUT_SIZE],
+    int seq_len,
+    LSTMGrads *grads
+){
+    float loss = 0.f;
+    /* We'll store dh_next, dc_next for each time */
+    float dh_next[HIDDEN_SIZE], dc_next[HIDDEN_SIZE];
+    memset(dh_next, 0, sizeof(dh_next));
+    memset(dc_next, 0, sizeof(dc_next));
+
+    for(int t=seq_len-1; t>=0; t--){
+        if(t == seq_len-1){
+            /* last step has no next-day target */
+            continue;
+        }
+
+        /* MSE derivative wrt output */
+        float y_pred = cache->y[t][0];
+        float y_true = targets[t+1][0]; /* next-day price */
+        float diff = (y_pred - y_true);
+        float dy = diff;
+        loss += 0.5f*diff*diff;
+
+        /* Output layer grads => dW_hy, db_y, dh(t) */
+        float dh[HIDDEN_SIZE];
+        memset(dh, 0, sizeof(dh));
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            grads->W_hy[0][i] += dy*cache->h[t][i];
+            dh[i] = dy*params->W_hy[0][i];
+        }
+        grads->b_y[0] += dy;
+
+        /* Add dh_next from future time step */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            dh[i] += dh_next[i];
+        }
+
+        /* h(t) = o(t)*tanh(c(t)) => do(t), dc(t) */
+        float do_[HIDDEN_SIZE], dc[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            float o_val = cache->o[t][i];
+            float c_val = cache->c[t][i];
+            float tanhc = tanh_approx(c_val);
+            do_[i] = tanhc*dh[i];
+            dc[i] = o_val*(1.0f - tanhc*tanhc)*dh[i];
+        }
+
+        /* plus dc_next */
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            dc[i] += dc_next[i];
+        }
+
+        /* c(t)=f(t)*c(t-1)+i(t)*c_hat(t) => df, di, dc_hat */
+        float di[HIDDEN_SIZE], df[HIDDEN_SIZE], dc_hat[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            float c_prev = (t==0)? 0.0f : cache->c[t-1][i];
+            di[i] = dc[i]*cache->c_hat[t][i];
+            df[i] = dc[i]*c_prev;
+            dc_hat[i] = dc[i]*cache->i[t][i];
+        }
+
+        /* convert to pre-activation space => multiply by derivative of sigmoid or tanh */
+        float do_in[HIDDEN_SIZE], di_in[HIDDEN_SIZE], df_in[HIDDEN_SIZE], dc_in[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            do_in[i] = do_[i]*dsigmoid_from_val(cache->o[t][i]);
+            di_in[i] = di[i]*dsigmoid_from_val(cache->i[t][i]);
+            df_in[i] = df[i]*dsigmoid_from_val(cache->f[t][i]);
+            float ch_val = cache->c_hat[t][i];
+            float dch = dc_hat[i]*(1.0f - ch_val*ch_val);
+            dc_in[i] = dch;
+        }
+
+        /* Accumulate gate weight grads => W_ix, W_ih, etc. */
+        float dh_prev[HIDDEN_SIZE];
+        memset(dh_prev, 0, sizeof(dh_prev));
+
+        float *x_t = cache->x[t];
+        float *h_t_1 = (t==0)? NULL : cache->h[t-1];
+
+        #define ACCUM_GATE(Wx,Wh,bias, d_in)                                   \
+            do {                                                               \
+                for(int i=0; i<HIDDEN_SIZE; i++){                              \
+                    float dval = d_in[i];                                      \
+                    for(int j=0; j<INPUT_SIZE; j++){                           \
+                        grads->Wx[i][j] += dval*x_t[j];                        \
+                    }                                                          \
+                    if(h_t_1 != NULL){                                         \
+                        for(int j=0; j<HIDDEN_SIZE; j++){                      \
+                            grads->Wh[i][j] += dval*h_t_1[j];                  \
+                            dh_prev[j] += dval*params->Wh[i][j];              \
+                        }                                                      \
+                    }                                                          \
+                    grads->bias[i] += dval;                                    \
+                }                                                              \
+            } while(0)
+
+        ACCUM_GATE(W_ix, W_ih, b_i, di_in);
+        ACCUM_GATE(W_fx, W_fh, b_f, df_in);
+        ACCUM_GATE(W_ox, W_oh, b_o, do_in);
+        ACCUM_GATE(W_cx, W_ch, b_c, dc_in);
+
+        /* handle dh_prev if no h(t-1). We skip for t=0 => no real backprop to h(-1). */
+
+        /* c(t-1) => pass to dc_next */
+        float dc_prev[HIDDEN_SIZE];
+        for(int i=0; i<HIDDEN_SIZE; i++){
+            float f_val = cache->f[t][i];
+            dc_prev[i] = dc[i]*f_val;
+        }
+
+        memcpy(dh_next, dh_prev, sizeof(dh_prev));
+        memcpy(dc_next, dc_prev, sizeof(dc_prev));
+    }
+    return loss;
+}
+
+/********************************************************
+ * Synthetic Data Generation
+ * - We add cyclical (daily) signals plus random noise
+ * - "Price" evolves with random walk
+ ********************************************************/
+static void generate_synthetic_data(
+    float inputs[][INPUT_SIZE],
+    float targets[][OUTPUT_SIZE],
+    int seq_len
+){
+    float price = 10.0f + (rand()%100)/10.f;
+
+    for(int t=0; t<seq_len; t++){
+        /* Macro/Micro/Tech factors (7 of them) */
+        float dayCycle = sinf(2.0f*3.14159f*t/30.0f)*0.5f + 0.5f; // daily-ish cycle
+        for(int i=0; i<INPUT_SIZE-1; i++){
+            /* random in [0..1], modulated by dayCycle */
+            inputs[t][i] = ((float)rand()/RAND_MAX + dayCycle)/2.f;
+        }
+
+        /* Store current price as last feature */
+        inputs[t][INPUT_SIZE-1] = price;
+
+        /* Random walk step for next price */
+        float noise = (float)(rand()%100 - 50)/100.0f; // in [-0.5..0.5]
+        float next_price = price + noise;
+        if(next_price < 0) next_price = 0;
+
+        targets[t][0] = next_price;  /* This is the "future" price, used at time t-1 -> t */
+        price = next_price;
+    }
+}
+
+/********************************************************
+ * Main
+ ********************************************************/
+int main(){
+    srand((unsigned)time(NULL));
+
+    LSTMParams params;
+    init_params(&params, 0.1f);
+
+    LSTMGrads grads;
+    zero_grads(&grads);
+
+    LSTMCache cache;
+    memset(&cache, 0, sizeof(cache));
+
+    /* We'll train on a single sequence (BATCH_SIZE=1), length=MAX_SEQ_LEN. */
+    int seq_len = MAX_SEQ_LEN;
+    float inputs[MAX_SEQ_LEN][INPUT_SIZE];
+    float targets[MAX_SEQ_LEN][OUTPUT_SIZE];
+
+    generate_synthetic_data(inputs, targets, seq_len);
+
+    /* Hidden, cell init */
+    float h0[HIDDEN_SIZE], c0[HIDDEN_SIZE];
+    memset(h0, 0, sizeof(h0));
+    memset(c0, 0, sizeof(c0));
+
+    /* Training */
+    for(int e=1; e<=EPOCHS; e++){
+        zero_grads(&grads);
+
+        /* Forward */
+        lstm_forward(&params, inputs, seq_len, &cache, h0, c0);
+
+        /* Backward (MSE) */
+        float loss = lstm_backward(&params, &cache, targets, seq_len, &grads);
+
+        /* Update */
+        update_params(&params, &grads);
+
+        if(e%20==0){
+            printf("Epoch %3d, Loss=%.4f\n", e, loss);
+        }
+    }
+
+    /* Final predictions */
+    printf("\nFinal Predictions:\n");
+    for(int t=0; t<seq_len; t++){
+        float y_pred = cache.y[t][0];
+        if(t < seq_len-1){
+            float y_true = targets[t+1][0];
+            printf(" t=%2d: pred=%.3f, next-day-true=%.3f\n", t, y_pred, y_true);
+        } else {
+            printf(" t=%2d: pred=%.3f, next-day-true=N/A (last step)\n", t, y_pred);
+        }
+    }
+
+    return 0;
+}
+
--- a/src/nn_tests/simple_feed_forward_model_with_indicators
+++ b/src/nn_tests/simple_feed_forward_model_with_indicators
--- a/src/nn_tests/simple_feed_forward_model_with_indicators.c
+++ b/src/nn_tests/simple_feed_forward_model_with_indicators.c
@@ -0,0 +1,859 @@
+/*****************************************************************************
+ * simple_feed_forward_model_with_indicators.c
+ *
+ * A feed-forward neural network in C that:
+ *   - Reads CSV with OHLCV data
+ *   - Computes 7 indicators internally
+ *   - Uses [Open,High,Low,Volume] + 7 indicators = 11 features
+ *   - Trains to predict Close
+ *   - Prints extended info: predictions, errors, feature importances
+ *****************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <string.h>
+
+// ---------------------------------------------------------------------------
+// CONFIG & GLOBALS
+// ---------------------------------------------------------------------------
+#define MAX_SAMPLES   100000
+
+// 4 base + 7 technical = 11 inputs
+#define INPUT_SIZE    11
+#define OUTPUT_SIZE   1
+
+static double openArr[MAX_SAMPLES];
+static double highArr[MAX_SAMPLES];
+static double lowArr[MAX_SAMPLES];
+static double closeArr[MAX_SAMPLES];
+static double volumeArr[MAX_SAMPLES];
+
+static double X_raw[MAX_SAMPLES][INPUT_SIZE];
+static double y_raw[MAX_SAMPLES];
+
+static int    NUM_SAMPLES = 0;  // actual number of rows loaded
+
+// 7 indicator arrays
+static double obvArr[MAX_SAMPLES];
+static double adArr[MAX_SAMPLES];
+static double adxArr[MAX_SAMPLES];
+static double aroonUpArr[MAX_SAMPLES];
+static double aroonDownArr[MAX_SAMPLES];
+static double macdArr[MAX_SAMPLES];
+static double rsiArr[MAX_SAMPLES];
+static double stochArr[MAX_SAMPLES]; // if you want to use it
+
+// For normalization
+static double X_mean[INPUT_SIZE];
+static double X_std[INPUT_SIZE];
+static double y_mean;
+static double y_std;
+
+// Architecture
+#define NUM_HIDDEN_LAYERS 2
+static const int HIDDEN_SIZES[NUM_HIDDEN_LAYERS] = {8, 6};
+
+static int layer_sizes[NUM_HIDDEN_LAYERS + 2];
+
+// Weights & Biases
+static double **W[NUM_HIDDEN_LAYERS + 1];
+static double  *b[NUM_HIDDEN_LAYERS + 1];
+
+// Adam buffers
+static double **mW[NUM_HIDDEN_LAYERS + 1], **vW[NUM_HIDDEN_LAYERS + 1];
+static double  *mb[NUM_HIDDEN_LAYERS + 1],  *vb[NUM_HIDDEN_LAYERS + 1];
+
+// Adam hyperparams
+#define ADAM_LEARNING_RATE 0.01
+#define BETA1 0.9
+#define BETA2 0.999
+#define EPSILON 1e-8
+
+// L2 regularization
+#define L2_LAMBDA 0.001
+
+// Training
+#define MAX_EPOCHS  1000
+#define BATCH_SIZE  32
+#define PRINT_INTERVAL 100
+
+// ---------------------------------------------------------------------------
+// UTILS
+// ---------------------------------------------------------------------------
+static inline double relu(double x) {
+    return x > 0.0 ? x : 0.0;
+}
+static inline double relu_derivative(double x) {
+    return x > 0.0 ? 1.0 : 0.0;
+}
+
+// 2D allocation
+static double **alloc_2d(int rows, int cols) {
+    double **ptr = (double **)malloc(rows * sizeof(double *));
+    if(!ptr) {
+        fprintf(stderr, "Memory alloc error\n");
+        exit(1);
+    }
+    for(int i = 0; i < rows; i++) {
+        ptr[i] = (double *)calloc(cols, sizeof(double));
+        if(!ptr[i]) {
+            fprintf(stderr, "Memory alloc error\n");
+            exit(1);
+        }
+    }
+    return ptr;
+}
+static void free_2d(double **arr, int rows) {
+    if(!arr) return;
+    for(int i = 0; i < rows; i++){
+        free(arr[i]);
+    }
+    free(arr);
+}
+
+// ---------------------------------------------------------------------------
+// 1) LOAD CSV
+// ---------------------------------------------------------------------------
+static void load_csv_data(const char *filename)
+{
+    FILE *fp = fopen(filename, "r");
+    if (!fp) {
+        perror("Error opening CSV file");
+        exit(1);
+    }
+
+    char line[1024];
+    if(!fgets(line, sizeof(line), fp)) {
+        fprintf(stderr, "CSV file seems empty\n");
+        fclose(fp);
+        exit(1);
+    }
+
+    int row = 0;
+    while (fgets(line, sizeof(line), fp)) {
+        // Avoid overflow
+        if (row >= MAX_SAMPLES) {
+            fprintf(stderr, "Reached MAX_SAMPLES limit.\n");
+            break;
+        }
+
+        // Expect: idx, date, open, high, low, close, volume
+        char *token = strtok(line, ","); // index
+        if(!token) continue;
+
+        token = strtok(NULL, ","); // date
+        if(!token) continue;
+
+        token = strtok(NULL, ","); // open
+        if(!token) continue;
+        double openVal = atof(token);
+
+        token = strtok(NULL, ","); // high
+        if(!token) continue;
+        double highVal = atof(token);
+
+        token = strtok(NULL, ","); // low
+        if(!token) continue;
+        double lowVal = atof(token);
+
+        token = strtok(NULL, ","); // close
+        if(!token) continue;
+        double closeVal = atof(token);
+
+        token = strtok(NULL, ","); // volume
+        if(!token) continue;
+        double volumeVal = atof(token);
+
+        openArr[row]   = openVal;
+        highArr[row]   = highVal;
+        lowArr[row]    = lowVal;
+        closeArr[row]  = closeVal;
+        volumeArr[row] = volumeVal;
+
+        row++;
+    }
+    fclose(fp);
+
+    NUM_SAMPLES = row;
+    printf("Loaded %d samples from CSV.\n", NUM_SAMPLES);
+}
+
+// ---------------------------------------------------------------------------
+// 2A) TECHNICAL INDICATORS
+// ---------------------------------------------------------------------------
+static void compute_obv()
+{
+    obvArr[0] = 0.0;
+    for(int i = 1; i < NUM_SAMPLES; i++){
+        if(closeArr[i] > closeArr[i-1]) {
+            obvArr[i] = obvArr[i-1] + volumeArr[i];
+        } else if(closeArr[i] < closeArr[i-1]) {
+            obvArr[i] = obvArr[i-1] - volumeArr[i];
+        } else {
+            obvArr[i] = obvArr[i-1];
+        }
+    }
+}
+
+static void compute_ad_line()
+{
+    adArr[0] = 0.0;
+    for(int i=1; i<NUM_SAMPLES; i++){
+        double high  = highArr[i];
+        double low   = lowArr[i];
+        double close = closeArr[i];
+        double range = (high - low) + 1e-9;
+
+        double mfm = ((close - low) - (high - close)) / range;
+        double mfv = mfm * volumeArr[i];
+        adArr[i]   = adArr[i-1] + mfv;
+    }
+}
+
+#define ADX_PERIOD 14
+static void compute_adx()
+{
+    double *posDM = calloc(NUM_SAMPLES, sizeof(double));
+    double *negDM = calloc(NUM_SAMPLES, sizeof(double));
+    double *tr    = calloc(NUM_SAMPLES, sizeof(double));
+    // Compute +DM, -DM, TR
+    for(int i=1; i<NUM_SAMPLES; i++){
+        double upMove   = highArr[i] - highArr[i-1];
+        double downMove = lowArr[i-1] - lowArr[i];
+        posDM[i] = (upMove > downMove && upMove > 0)   ? upMove : 0.0;
+        negDM[i] = (downMove > upMove && downMove > 0) ? downMove : 0.0;
+
+        double highLow   = highArr[i] - lowArr[i];
+        double highClose = fabs(highArr[i] - closeArr[i-1]);
+        double lowClose  = fabs(lowArr[i] - closeArr[i-1]);
+        double trueRange = fmax(highLow, fmax(highClose, lowClose));
+        tr[i] = trueRange;
+    }
+
+    // Smooth them
+    double *smoothPosDM = calloc(NUM_SAMPLES, sizeof(double));
+    double *smoothNegDM = calloc(NUM_SAMPLES, sizeof(double));
+    double *smoothTR    = calloc(NUM_SAMPLES, sizeof(double));
+
+    double sumPos=0, sumNeg=0, sumTR_=0;
+    for(int i=1; i<=ADX_PERIOD && i<NUM_SAMPLES; i++){
+        sumPos += posDM[i];
+        sumNeg += negDM[i];
+        sumTR_ += tr[i];
+    }
+    if(ADX_PERIOD < NUM_SAMPLES){
+        smoothPosDM[ADX_PERIOD] = sumPos;
+        smoothNegDM[ADX_PERIOD] = sumNeg;
+        smoothTR[ADX_PERIOD]    = sumTR_;
+    }
+    for(int i=ADX_PERIOD+1; i<NUM_SAMPLES; i++){
+        smoothPosDM[i] = smoothPosDM[i-1] - (smoothPosDM[i-1]/ADX_PERIOD) + posDM[i];
+        smoothNegDM[i] = smoothNegDM[i-1] - (smoothNegDM[i-1]/ADX_PERIOD) + negDM[i];
+        smoothTR[i]    = smoothTR[i-1]    - (smoothTR[i-1]/ADX_PERIOD)    + tr[i];
+    }
+
+    double *pdi = calloc(NUM_SAMPLES, sizeof(double));
+    double *ndi = calloc(NUM_SAMPLES, sizeof(double));
+    for(int i=ADX_PERIOD; i<NUM_SAMPLES; i++){
+        if(smoothTR[i] < 1e-9){
+            pdi[i] = 0.0;
+            ndi[i] = 0.0;
+        } else {
+            pdi[i] = (smoothPosDM[i]/smoothTR[i])*100.0;
+            ndi[i] = (smoothNegDM[i]/smoothTR[i])*100.0;
+        }
+    }
+
+    double *dx = calloc(NUM_SAMPLES, sizeof(double));
+    for(int i=ADX_PERIOD; i<NUM_SAMPLES; i++){
+        double sum_ = pdi[i] + ndi[i];
+        if(sum_<1e-9) dx[i] = 0.0;
+        else dx[i] = (fabs(pdi[i]-ndi[i]) / sum_) * 100.0;
+    }
+
+    for(int i=0; i<ADX_PERIOD && i<NUM_SAMPLES; i++){
+        adxArr[i] = 0.0;
+    }
+    for(int i=ADX_PERIOD; i<NUM_SAMPLES; i++){
+        double sumDX=0.0; int count=0;
+        for(int k=i-ADX_PERIOD+1; k<=i; k++){
+            sumDX += dx[k];
+            count++;
+        }
+        adxArr[i] = sumDX / count;
+    }
+
+    free(posDM); free(negDM); free(tr);
+    free(smoothPosDM); free(smoothNegDM); free(smoothTR);
+    free(pdi); free(ndi); free(dx);
+}
+
+#define AROON_PERIOD 14
+static void compute_aroon()
+{
+    for(int i=0; i<NUM_SAMPLES; i++){
+        aroonUpArr[i]=0.0; aroonDownArr[i]=0.0;
+    }
+    for(int i=AROON_PERIOD; i<NUM_SAMPLES; i++){
+        double highestHigh = highArr[i];
+        double lowestLow   = lowArr[i];
+        int highestIndex=i, lowestIndex=i;
+        for(int back=1; back<AROON_PERIOD; back++){
+            int idx = i-back;
+            if(highArr[idx]>highestHigh){
+                highestHigh=highArr[idx];
+                highestIndex=idx;
+            }
+            if(lowArr[idx]<lowestLow){
+                lowestLow=lowArr[idx];
+                lowestIndex=idx;
+            }
+        }
+        double up   = ((AROON_PERIOD-(i-highestIndex))/(double)AROON_PERIOD)*100.0;
+        double down = ((AROON_PERIOD-(i-lowestIndex)) /(double)AROON_PERIOD)*100.0;
+        aroonUpArr[i]   = up;
+        aroonDownArr[i] = down;
+    }
+}
+
+#define FAST_PERIOD 12
+#define SLOW_PERIOD 26
+static void compute_macd()
+{
+    double *emaFast = calloc(NUM_SAMPLES, sizeof(double));
+    double *emaSlow = calloc(NUM_SAMPLES, sizeof(double));
+
+    emaFast[0] = closeArr[0];
+    emaSlow[0] = closeArr[0];
+
+    double alphaFast = 2.0/(FAST_PERIOD+1.0);
+    double alphaSlow = 2.0/(SLOW_PERIOD+1.0);
+
+    for(int i=1; i<NUM_SAMPLES; i++){
+        emaFast[i] = alphaFast*closeArr[i] + (1-alphaFast)*emaFast[i-1];
+        emaSlow[i] = alphaSlow*closeArr[i] + (1-alphaSlow)*emaSlow[i-1];
+    }
+    for(int i=0; i<NUM_SAMPLES; i++){
+        macdArr[i] = emaFast[i] - emaSlow[i];
+    }
+    free(emaFast); free(emaSlow);
+}
+
+#define RSI_PERIOD 14
+static void compute_rsi()
+{
+    double *gain=calloc(NUM_SAMPLES,sizeof(double));
+    double *loss=calloc(NUM_SAMPLES,sizeof(double));
+
+    for(int i=1; i<NUM_SAMPLES; i++){
+        double change = closeArr[i] - closeArr[i-1];
+        gain[i] = (change>0)? change:0.0;
+        loss[i] = (change<0)? -change:0.0;
+    }
+
+    double avgGain=0, avgLoss=0;
+    for(int i=1; i<=RSI_PERIOD && i<NUM_SAMPLES; i++){
+        avgGain+=gain[i];
+        avgLoss+=loss[i];
+    }
+    avgGain/=RSI_PERIOD; avgLoss/=RSI_PERIOD;
+
+    if(RSI_PERIOD<NUM_SAMPLES){
+        double rs = (avgLoss<1e-9)?0.0:(avgGain/avgLoss);
+        rsiArr[RSI_PERIOD] = 100.0-(100.0/(1.0+rs));
+    }
+
+    for(int i=RSI_PERIOD+1; i<NUM_SAMPLES; i++){
+        avgGain=(avgGain*(RSI_PERIOD-1)+gain[i])/(double)RSI_PERIOD;
+        avgLoss=(avgLoss*(RSI_PERIOD-1)+loss[i])/(double)RSI_PERIOD;
+        double rs=(avgLoss<1e-9)?0.0: (avgGain/avgLoss);
+        rsiArr[i] = 100.0-(100.0/(1.0+rs));
+    }
+    for(int i=0; i<RSI_PERIOD && i<NUM_SAMPLES; i++){
+        rsiArr[i]=0.0;
+    }
+    free(gain); free(loss);
+}
+
+#define STOCH_PERIOD 14
+static void compute_stochastic()
+{
+    for(int i=0; i<NUM_SAMPLES; i++){
+        stochArr[i]=0.0;
+    }
+    for(int i=STOCH_PERIOD; i<NUM_SAMPLES; i++){
+        double highestHigh=highArr[i];
+        double lowestLow=lowArr[i];
+        for(int back=1; back<STOCH_PERIOD; back++){
+            int idx = i-back;
+            if(highArr[idx]>highestHigh) highestHigh=highArr[idx];
+            if(lowArr[idx]<lowestLow)   lowestLow=lowArr[idx];
+        }
+        double denom=(highestHigh-lowestLow)+1e-9;
+        stochArr[i]=((closeArr[i]-lowestLow)/denom)*100.0;
+    }
+}
+
+static void compute_all_indicators()
+{
+    compute_obv();
+    compute_ad_line();
+    compute_adx();
+    compute_aroon();
+    compute_macd();
+    compute_rsi();
+    compute_stochastic(); // if you want to use it
+}
+
+// ---------------------------------------------------------------------------
+// 2B) ASSEMBLE FEATURES
+// ---------------------------------------------------------------------------
+static void assemble_features()
+{
+    for(int i=0; i<NUM_SAMPLES; i++){
+        // 4 base
+        X_raw[i][0] = openArr[i];
+        X_raw[i][1] = highArr[i];
+        X_raw[i][2] = lowArr[i];
+        X_raw[i][3] = volumeArr[i];
+
+        // 7 indicators => total 11 features
+        X_raw[i][4]  = obvArr[i];
+        X_raw[i][5]  = adArr[i];
+        X_raw[i][6]  = adxArr[i];
+        X_raw[i][7]  = aroonUpArr[i];
+        X_raw[i][8]  = aroonDownArr[i];
+        X_raw[i][9]  = macdArr[i];
+        X_raw[i][10] = rsiArr[i];
+        // If you prefer to swap in stochArr, do it as needed
+
+        y_raw[i]     = closeArr[i];
+    }
+}
+
+// ---------------------------------------------------------------------------
+// 3) NORMALIZATION
+// ---------------------------------------------------------------------------
+static void compute_mean_std()
+{
+    for(int j=0; j<INPUT_SIZE; j++){
+        double sum=0, sum_sq=0;
+        for(int i=0; i<NUM_SAMPLES; i++){
+            sum    += X_raw[i][j];
+            sum_sq += X_raw[i][j]*X_raw[i][j];
+        }
+        double mean = sum/NUM_SAMPLES;
+        double var  = (sum_sq/NUM_SAMPLES)-(mean*mean);
+        double sd   = sqrt(var+1e-9);
+        X_mean[j]=mean; X_std[j]=sd;
+    }
+
+    double sumY=0, sumYsq=0;
+    for(int i=0; i<NUM_SAMPLES; i++){
+        sumY  += y_raw[i];
+        sumYsq+= y_raw[i]*y_raw[i];
+    }
+    double meanY = sumY/NUM_SAMPLES;
+    double varY  = (sumYsq/NUM_SAMPLES) - (meanY*meanY);
+    double sdY   = sqrt(varY+1e-9);
+    y_mean=meanY; y_std=sdY;
+}
+
+static void normalize_data(double X_norm[][INPUT_SIZE], double y_norm[])
+{
+    for(int i=0; i<NUM_SAMPLES; i++){
+        for(int j=0; j<INPUT_SIZE; j++){
+            X_norm[i][j] = (X_raw[i][j] - X_mean[j]) / (X_std[j]);
+        }
+        y_norm[i] = (y_raw[i] - y_mean)/y_std;
+    }
+}
+
+// ---------------------------------------------------------------------------
+// 4) INIT NETWORK
+// ---------------------------------------------------------------------------
+static double **alloc_2d(int rows, int cols);
+static void init_network()
+{
+    layer_sizes[0] = INPUT_SIZE;  // 11
+    for(int i=0; i<NUM_HIDDEN_LAYERS; i++){
+        layer_sizes[i+1]=HIDDEN_SIZES[i];
+    }
+    layer_sizes[NUM_HIDDEN_LAYERS+1]=OUTPUT_SIZE; // 1
+
+    srand((unsigned int)time(NULL));
+    for(int L=0; L<NUM_HIDDEN_LAYERS+1; L++){
+        int in_dim = layer_sizes[L];
+        int out_dim= layer_sizes[L+1];
+
+        W[L]=alloc_2d(out_dim, in_dim);
+        b[L]=(double*)calloc(out_dim,sizeof(double));
+
+        mW[L]=alloc_2d(out_dim, in_dim);
+        vW[L]=alloc_2d(out_dim, in_dim);
+        mb[L]=(double*)calloc(out_dim,sizeof(double));
+        vb[L]=(double*)calloc(out_dim,sizeof(double));
+
+        for(int i=0; i<out_dim; i++){
+            b[L][i]=0.0;
+            for(int j=0; j<in_dim; j++){
+                W[L][i][j] = ((double)rand()/RAND_MAX -0.5);
+            }
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// 5) FORWARD/BACKWARD
+// ---------------------------------------------------------------------------
+static void forward_pass(const double *x, double **acts)
+{
+    // input
+    for(int i=0; i<layer_sizes[0]; i++){
+        acts[0][i] = x[i];
+    }
+    // hidden -> out
+    for(int L=0; L<NUM_HIDDEN_LAYERS+1; L++){
+        int in_dim=layer_sizes[L];
+        int out_dim=layer_sizes[L+1];
+        for(int o=0; o<out_dim; o++){
+            double z = b[L][o];
+            for(int in_i=0; in_i<in_dim; in_i++){
+                z += W[L][o][in_i]*acts[L][in_i];
+            }
+            if(L<NUM_HIDDEN_LAYERS) {
+                // relu
+                acts[L+1][o] = relu(z);
+            } else {
+                // linear
+                acts[L+1][o] = z;
+            }
+        }
+    }
+}
+
+static void backward_pass(const double *y_true, double **acts, double **deltas)
+{
+    int L_out=NUM_HIDDEN_LAYERS;
+    int out_dim=layer_sizes[L_out+1];
+
+    // output delta
+    for(int i=0; i<out_dim; i++){
+        double y_pred=acts[L_out+1][i];
+        double error=y_pred-y_true[i];
+        deltas[L_out+1][i]=error;
+    }
+    // hidden deltas
+    for(int L=L_out; L>=1; L--){
+        int out_dim_L=layer_sizes[L];
+        int out_dim_Lplus=layer_sizes[L+1];
+
+        for(int i=0; i<out_dim_L; i++){
+            double d_act=relu_derivative(acts[L][i]);
+            double sum_=0.0;
+            for(int k=0; k<out_dim_Lplus; k++){
+                sum_ += W[L][k][i]*deltas[L+1][k];
+            }
+            deltas[L][i]=sum_*d_act;
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// 6) ADAM UPDATE
+// ---------------------------------------------------------------------------
+static double pow_beta1_t=1.0, pow_beta2_t=1.0;
+
+static void adam_update_weights(double **acts, double **deltas,int batch_size,int t)
+{
+    pow_beta1_t*=BETA1; 
+    pow_beta2_t*=BETA2;
+
+    for(int L=0; L<NUM_HIDDEN_LAYERS+1; L++){
+        int in_dim=layer_sizes[L];
+        int out_dim=layer_sizes[L+1];
+        for(int o=0; o<out_dim; o++){
+            double grad_b=deltas[L+1][o];
+            mb[L][o] = BETA1*mb[L][o] + (1.0-BETA1)*grad_b;
+            vb[L][o] = BETA2*vb[L][o] + (1.0-BETA2)*(grad_b*grad_b);
+
+            double m_hat_b= mb[L][o]/(1.0-pow_beta1_t);
+            double v_hat_b= vb[L][o]/(1.0-pow_beta2_t);
+
+            b[L][o] -= ADAM_LEARNING_RATE*(m_hat_b/(sqrt(v_hat_b)+EPSILON)) / batch_size;
+
+            for(int in_i=0; in_i<in_dim; in_i++){
+                double grad_w = deltas[L+1][o]*acts[L][in_i];
+                grad_w += L2_LAMBDA*W[L][o][in_i];
+
+                mW[L][o][in_i] = BETA1*mW[L][o][in_i] + (1.0-BETA1)*grad_w;
+                vW[L][o][in_i] = BETA2*vW[L][o][in_i] + (1.0-BETA2)*(grad_w*grad_w);
+
+                double m_hat= mW[L][o][in_i]/(1.0-pow_beta1_t);
+                double v_hat= vW[L][o][in_i]/(1.0-pow_beta2_t);
+
+                W[L][o][in_i] -= ADAM_LEARNING_RATE*(m_hat/(sqrt(v_hat)+EPSILON))/batch_size;
+            }
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Shuffle
+// ---------------------------------------------------------------------------
+static void shuffle_indices(int *indices, int n)
+{
+    for(int i=0; i<n; i++){
+        int r = i + rand()%(n-i);
+        int temp=indices[i];
+        indices[i]=indices[r];
+        indices[r]=temp;
+    }
+}
+
+// ---------------------------------------------------------------------------
+// 7) TRAIN
+// ---------------------------------------------------------------------------
+static void train_network(double X_norm[][INPUT_SIZE], double y_norm[])
+{
+    double **activations=malloc((NUM_HIDDEN_LAYERS+2)*sizeof(double*));
+    double **deltas=     malloc((NUM_HIDDEN_LAYERS+2)*sizeof(double*));
+    for(int L=0; L<NUM_HIDDEN_LAYERS+2; L++){
+        activations[L]=calloc(layer_sizes[L],sizeof(double));
+        deltas[L]=calloc(layer_sizes[L],sizeof(double));
+    }
+    int *indices=malloc(NUM_SAMPLES*sizeof(int));
+    for(int i=0; i<NUM_SAMPLES; i++){
+        indices[i]=i;
+    }
+
+    int steps=0;
+    for(int epoch=0; epoch<MAX_EPOCHS; epoch++){
+        shuffle_indices(indices,NUM_SAMPLES);
+
+        double epoch_loss=0.0;
+        int num_batches=(NUM_SAMPLES+BATCH_SIZE-1)/BATCH_SIZE;
+
+        for(int b_i=0; b_i<num_batches; b_i++){
+            int start=b_i*BATCH_SIZE;
+            int end=start+BATCH_SIZE;
+            if(end>NUM_SAMPLES) end=NUM_SAMPLES;
+
+            for(int L=0; L<NUM_HIDDEN_LAYERS+2; L++){
+                memset(deltas[L],0, layer_sizes[L]*sizeof(double));
+            }
+
+            double batch_loss=0.0;
+            for(int m_i=start; m_i<end; m_i++){
+                int idx=indices[m_i];
+                // forward
+                forward_pass(X_norm[idx],activations);
+                // MSE
+                double y_pred=activations[NUM_HIDDEN_LAYERS+1][0];
+                double error=y_pred-y_norm[idx];
+                batch_loss += 0.5*error*error;
+
+                // backward
+                backward_pass(&y_norm[idx], activations, deltas);
+            }
+            steps++;
+            adam_update_weights(activations, deltas, (end-start), steps);
+
+            epoch_loss+=batch_loss;
+        }
+
+        if(epoch%PRINT_INTERVAL==0){
+            double avg_loss=epoch_loss/NUM_SAMPLES;
+            printf("Epoch %d, Loss = %f\n", epoch, avg_loss);
+        }
+    }
+
+    free(indices);
+    for(int L=0; L<NUM_HIDDEN_LAYERS+2; L++){
+        free(activations[L]);
+        free(deltas[L]);
+    }
+    free(activations);
+    free(deltas);
+}
+
+// ---------------------------------------------------------------------------
+// 8) PREDICT
+// ---------------------------------------------------------------------------
+static double predict(const double *x_input)
+{
+    double **acts=malloc((NUM_HIDDEN_LAYERS+2)*sizeof(double*));
+    for(int L=0; L<NUM_HIDDEN_LAYERS+2; L++){
+        acts[L]=calloc(layer_sizes[L],sizeof(double));
+    }
+    forward_pass(x_input, acts);
+
+    double y_pred_norm=acts[NUM_HIDDEN_LAYERS+1][0];
+
+    for(int L=0; L<NUM_HIDDEN_LAYERS+2; L++){
+        free(acts[L]);
+    }
+    free(acts);
+
+    double y_pred=(y_pred_norm*y_std)+y_mean;
+    return y_pred;
+}
+
+// ---------------------------------------------------------------------------
+// EXTRA: PRINT PREDICTIONS & METRICS
+// ---------------------------------------------------------------------------
+static void print_predictions_and_metrics(double X_norm[][INPUT_SIZE], double y_norm[])
+{
+    // We'll predict the entire dataset, compute errors, and print some rows
+    double mse=0.0, mae=0.0;
+    double *predictions=malloc(NUM_SAMPLES*sizeof(double));
+
+    for(int i=0; i<NUM_SAMPLES; i++){
+        predictions[i] = predict(X_norm[i]);
+    }
+
+    // Let's print the first 20 or so:
+    int rows_to_print = (NUM_SAMPLES<20)? NUM_SAMPLES : 20;
+    printf("\n-----------------------------------------------\n");
+    printf(" Index |  Actual   | Predicted | Abs Error \n");
+    printf("-----------------------------------------------\n");
+    for(int i=0; i<rows_to_print; i++){
+        double actual = y_raw[i];
+        double pred   = predictions[i];
+        double err    = fabs(pred-actual);
+        printf(" %5d | %9.4f | %9.4f | %9.4f\n",
+               i, actual, pred, err);
+    }
+    printf("-----------------------------------------------\n");
+    if(NUM_SAMPLES>rows_to_print) {
+        printf("  (... omitted %d rows ...)\n", NUM_SAMPLES-rows_to_print);
+    }
+
+    // Compute MSE, MAE
+    for(int i=0; i<NUM_SAMPLES; i++){
+        double err=(predictions[i]-y_raw[i]);
+        mse += err*err;
+        mae += fabs(err);
+    }
+    mse /= NUM_SAMPLES;
+    mae /= NUM_SAMPLES;
+    double rmse=sqrt(mse);
+
+    printf("\nError Metrics (all %d samples):\n", NUM_SAMPLES);
+    printf(" - MSE : %f\n", mse);
+    printf(" - RMSE: %f\n", rmse);
+    printf(" - MAE : %f\n", mae);
+
+    free(predictions);
+}
+
+// ---------------------------------------------------------------------------
+// EXTRA: FEATURE IMPORTANCE (NAIVE)
+// ---------------------------------------------------------------------------
+// We'll do a simple sum of absolute weights from the first layer
+// i.e. feature_importance[j] = sum over hidden_neurons of |W[0][neuron][j]|
+static void print_feature_importance()
+{
+    int in_dim = layer_sizes[0];     // 11
+    int out_dim= layer_sizes[1];     // 8 if HIDDEN_SIZES[0]=8
+    double *importance=calloc(in_dim,sizeof(double));
+
+    // Sum abs value of weights for the FIRST layer only
+    for(int j=0; j<in_dim; j++){
+        double sum_=0.0;
+        for(int neuron=0; neuron<out_dim; neuron++){
+            double w = W[0][neuron][j];
+            sum_ += fabs(w);
+        }
+        importance[j]=sum_;
+    }
+
+    // We'll define some labels:
+    // 0:Open,1:High,2:Low,3:Volume,4:OBV,5:AD,6:ADX,7:AroonUp,8:AroonDown,9:MACD,10:RSI
+    const char* feature_names[INPUT_SIZE] = {
+        "Open", "High", "Low", "Volume","OBV","A/D","ADX","AroonUp","AroonDown","MACD","RSI"
+        // or rename if you put stoch in the array
+    };
+
+    // We want to print them sorted by importance
+    // Let's create an array of indices and sort
+    int *idxs=malloc(in_dim*sizeof(int));
+    for(int i=0; i<in_dim; i++){
+        idxs[i]=i;
+    }
+    // simple bubble sort or so
+    for(int i=0; i<in_dim-1; i++){
+        for(int j=0; j<in_dim-1-i; j++){
+            if(importance[idxs[j]]<importance[idxs[j+1]]) {
+                int tmp=idxs[j];
+                idxs[j]=idxs[j+1];
+                idxs[j+1]=tmp;
+            }
+        }
+    }
+
+    printf("\nNaive Feature Importance (sum of abs W in first layer):\n");
+    printf("-------------------------------------------------------\n");
+    for(int i=0; i<in_dim; i++){
+        int f=idxs[i];
+        printf(" %10s: %.6f\n", feature_names[f], importance[f]);
+    }
+    printf("-------------------------------------------------------\n");
+    free(importance);
+    free(idxs);
+}
+
+// ---------------------------------------------------------------------------
+// MAIN
+// ---------------------------------------------------------------------------
+int main(void)
+{
+    // 1) Load CSV
+    load_csv_data("3_month_testing_data.csv");
+    if(NUM_SAMPLES<2) {
+        fprintf(stderr,"Not enough data.\n");
+        return 1;
+    }
+
+    // 2) Compute indicators
+    compute_all_indicators();
+
+    // 3) Assemble features
+    assemble_features();
+
+    // 4) Normalize
+    compute_mean_std();
+    double (*X_norm)[INPUT_SIZE] = malloc(NUM_SAMPLES*sizeof(*X_norm));
+    double *y_norm=malloc(NUM_SAMPLES*sizeof(double));
+    normalize_data(X_norm,y_norm);
+
+    // 5) Init & Train
+    init_network();
+    printf("Starting training on %d samples...\n",NUM_SAMPLES);
+    train_network(X_norm,y_norm);
+    printf("Training complete.\n");
+
+    // 6) Print extended predictions & metrics
+    print_predictions_and_metrics(X_norm,y_norm);
+
+    // 7) Print naive feature importance
+    print_feature_importance();
+
+    // 8) Cleanup
+    for(int L=0; L<NUM_HIDDEN_LAYERS+1; L++){
+        int out_dim=layer_sizes[L+1];
+        free_2d(W[L],out_dim);
+        free_2d(mW[L],out_dim);
+        free_2d(vW[L],out_dim);
+        free(b[L]);
+        free(mb[L]);
+        free(vb[L]);
+    }
+    free(X_norm);
+    free(y_norm);
+
+    return 0;
+}
+
--- a/src/nn_tests/stock_predictor
+++ b/src/nn_tests/stock_predictor
--- a/src/nn_tests/stock_predictor.c
+++ b/src/nn_tests/stock_predictor.c
@@ -0,0 +1,631 @@
+/*****************************************************************************
+ * stock_predictor_enhanced.c
+ *
+ * Updated to parse real CSV data:
+ *   - Reads 3_month_testing_data.csv
+ *   - Uses columns [Open, High, Low, Volume] as inputs
+ *   - Uses column [Close] as target
+ *****************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <string.h>
+
+/***************************************
+ * Configuration and Globals           *
+ ***************************************/
+
+// Adjust these if your CSV is large
+#define MAX_SAMPLES   100000  // max number of rows to read from CSV
+#define INPUT_SIZE    4       // [Open, High, Low, Volume]
+#define OUTPUT_SIZE   1       // we're predicting [Close]
+#define NUM_HIDDEN_LAYERS 2
+static const int HIDDEN_SIZES[NUM_HIDDEN_LAYERS] = {8, 6};
+
+// Adam hyperparams
+#define ADAM_LEARNING_RATE 0.01
+#define BETA1 0.9
+#define BETA2 0.999
+#define EPSILON 1e-8
+
+// L2 regularization term
+#define L2_LAMBDA 0.001
+
+// Training settings
+#define MAX_EPOCHS  1000
+#define BATCH_SIZE  32        // you can change this
+#define PRINT_INTERVAL 100    // print loss every 100 epochs
+
+// We’ll fill these from the CSV file
+static int NUM_SAMPLES = 0;
+
+// Global arrays to hold raw data from CSV
+static double X_raw[MAX_SAMPLES][INPUT_SIZE];
+static double y_raw[MAX_SAMPLES];
+
+/***************************************
+ * Normalization Buffers (Global)      *
+ ***************************************/
+static double X_mean[INPUT_SIZE];
+static double X_std[INPUT_SIZE];
+static double y_mean;
+static double y_std;
+
+/***************************************
+ * Weights & Biases Data Structures    *
+ ***************************************/
+
+// layer_sizes[0] = INPUT_SIZE
+// layer_sizes[1] = HIDDEN_SIZES[0]
+// layer_sizes[2] = HIDDEN_SIZES[1]
+// layer_sizes[3] = OUTPUT_SIZE
+static int layer_sizes[NUM_HIDDEN_LAYERS + 2];
+
+// Weight arrays: W[L] with shape [layer_sizes[L+1]][layer_sizes[L]]
+// Bias arrays:   b[L] with shape [layer_sizes[L+1]]
+static double **W[NUM_HIDDEN_LAYERS + 1];
+static double  *b[NUM_HIDDEN_LAYERS + 1];
+
+// For Adam, we store momentum (m) and velocity (v) for each weight & bias
+static double **mW[NUM_HIDDEN_LAYERS + 1], **vW[NUM_HIDDEN_LAYERS + 1];
+static double  *mb[NUM_HIDDEN_LAYERS + 1],  *vb[NUM_HIDDEN_LAYERS + 1];
+
+/***************************************
+ * Utilities                           *
+ ***************************************/
+
+static inline double relu(double x) {
+    return x > 0.0 ? x : 0.0;
+}
+static inline double relu_derivative(double x) {
+    return x > 0.0 ? 1.0 : 0.0;
+}
+
+// Allocate a 2D array of size rows x cols
+static double **alloc_2d(int rows, int cols)
+{
+    double **ptr = (double **)malloc(rows * sizeof(double *));
+    if(!ptr) {
+        fprintf(stderr, "Memory alloc error\n");
+        exit(1);
+    }
+    for(int i = 0; i < rows; i++) {
+        ptr[i] = (double *)calloc(cols, sizeof(double));
+        if(!ptr[i]) {
+            fprintf(stderr, "Memory alloc error\n");
+            exit(1);
+        }
+    }
+    return ptr;
+}
+static void free_2d(double **arr, int rows)
+{
+    if(!arr) return;
+    for(int i = 0; i < rows; i++){
+        free(arr[i]);
+    }
+    free(arr);
+}
+
+/***************************************
+ * Step 1: CSV Parsing                 *
+ ***************************************/
+
+// We expect CSV with a header like:
+// Index,Date,Open,High,Low,Close,Volume
+// We'll read up to MAX_SAMPLES lines
+static void load_csv_data(const char *filename)
+{
+    FILE *fp = fopen(filename, "r");
+    if (!fp) {
+        perror("Error opening CSV file");
+        exit(1);
+    }
+
+    // Read and discard the header line
+    char line[1024];
+    if(!fgets(line, sizeof(line), fp)) {
+        fprintf(stderr, "CSV file seems empty\n");
+        fclose(fp);
+        exit(1);
+    }
+
+    int row = 0;
+    while (fgets(line, sizeof(line), fp)) {
+        // Avoid overflow
+        if (row >= MAX_SAMPLES) {
+            fprintf(stderr, "Reached MAX_SAMPLES limit (%d), some data not read.\n", MAX_SAMPLES);
+            break;
+        }
+
+        // Tokenize by comma
+        char *token = strtok(line, ","); // index
+        if(!token) continue; // skip malformed line
+
+        // skip index (already in token)
+        token = strtok(NULL, ",");       // Date (we skip or ignore for now)
+        if(!token) continue;
+
+        // next token = Open
+        token = strtok(NULL, ","); 
+        if(!token) continue;
+        double openVal = atof(token);
+
+        // next token = High
+        token = strtok(NULL, ",");
+        if(!token) continue;
+        double highVal = atof(token);
+
+        // next token = Low
+        token = strtok(NULL, ",");
+        if(!token) continue;
+        double lowVal = atof(token);
+
+        // next token = Close
+        token = strtok(NULL, ",");
+        if(!token) continue;
+        double closeVal = atof(token);
+
+        // next token = Volume
+        token = strtok(NULL, ",");
+        if(!token) continue;
+        double volumeVal = atof(token);
+
+        // Store in global arrays
+        X_raw[row][0] = openVal;
+        X_raw[row][1] = highVal;
+        X_raw[row][2] = lowVal;
+        X_raw[row][3] = volumeVal;   // 4 features: open, high, low, volume
+        y_raw[row]    = closeVal;    // target = close
+
+        row++;
+    }
+
+    fclose(fp);
+
+    // The actual number of samples read
+    NUM_SAMPLES = row;
+    printf("Loaded %d samples from CSV.\n", NUM_SAMPLES);
+}
+
+/***************************************
+ * Step 2: Data Normalization          *
+ ***************************************/
+
+// We'll standardize inputs: x' = (x - mean) / std
+// We'll also standardize targets: y' = (y - mean) / y_std
+static void compute_mean_std()
+{
+    // Compute mean & std for each input dimension
+    for(int j = 0; j < INPUT_SIZE; j++) {
+        double sum = 0.0, sum_sq = 0.0;
+        for(int i = 0; i < NUM_SAMPLES; i++) {
+            sum    += X_raw[i][j];
+            sum_sq += X_raw[i][j] * X_raw[i][j];
+        }
+        double mean = sum / NUM_SAMPLES;
+        double var  = (sum_sq / NUM_SAMPLES) - (mean * mean);
+        double std  = sqrt(var + 1e-9); // add small epsilon
+
+        X_mean[j] = mean;
+        X_std[j]  = std;
+    }
+
+    // Compute mean & std for target
+    {
+        double sum = 0.0, sum_sq = 0.0;
+        for(int i = 0; i < NUM_SAMPLES; i++) {
+            sum    += y_raw[i];
+            sum_sq += y_raw[i] * y_raw[i];
+        }
+        double mean = sum / NUM_SAMPLES;
+        double var  = (sum_sq / NUM_SAMPLES) - (mean * mean);
+        double std  = sqrt(var + 1e-9);
+
+        y_mean = mean;
+        y_std  = std;
+    }
+}
+
+static void normalize_data(double X_norm[][INPUT_SIZE], double y_norm[])
+{
+    // Standardize X
+    for(int i = 0; i < NUM_SAMPLES; i++){
+        for(int j = 0; j < INPUT_SIZE; j++){
+            X_norm[i][j] = (X_raw[i][j] - X_mean[j]) / X_std[j];
+        }
+    }
+    // Standardize y
+    for(int i = 0; i < NUM_SAMPLES; i++){
+        y_norm[i] = (y_raw[i] - y_mean) / y_std;
+    }
+}
+
+/***************************************
+ * Step 3: Init the Network            *
+ ***************************************/
+
+static void init_network()
+{
+    // Fill layer_sizes array
+    layer_sizes[0] = INPUT_SIZE;               // e.g. 4
+    for(int i = 0; i < NUM_HIDDEN_LAYERS; i++){
+        layer_sizes[i+1] = HIDDEN_SIZES[i];    // e.g. 8, 6
+    }
+    layer_sizes[NUM_HIDDEN_LAYERS + 1] = OUTPUT_SIZE; // 1
+
+    // Initialize random
+    srand((unsigned int)time(NULL));
+
+    // For each layer L
+    for(int L = 0; L < NUM_HIDDEN_LAYERS + 1; L++){
+        int in_dim  = layer_sizes[L];
+        int out_dim = layer_sizes[L+1];
+
+        // Allocate W[L] & b[L]
+        W[L] = alloc_2d(out_dim, in_dim);
+        b[L] = (double *)calloc(out_dim, sizeof(double));
+
+        // Allocate Adam buffers
+        mW[L] = alloc_2d(out_dim, in_dim);
+        vW[L] = alloc_2d(out_dim, in_dim);
+        mb[L] = (double *)calloc(out_dim, sizeof(double));
+        vb[L] = (double *)calloc(out_dim, sizeof(double));
+
+        // Random initialization
+        for(int i = 0; i < out_dim; i++){
+            b[L][i] = 0.0; // or small random
+            for(int j = 0; j < in_dim; j++){
+                // He initialization example:
+                // double scale = sqrt(2.0 / in_dim);
+                // W[L][i][j] = scale * ((double)rand() / RAND_MAX - 0.5);
+
+                // Uniform in [-0.5, 0.5]
+                W[L][i][j] = ((double)rand() / RAND_MAX - 0.5);
+            }
+        }
+    }
+}
+
+/***************************************
+ * Forward Pass                        *
+ ***************************************/
+
+static void forward_pass(const double *x, double **activations)
+{
+    // Copy x into activations[0]
+    for(int i = 0; i < layer_sizes[0]; i++){
+        activations[0][i] = x[i];
+    }
+
+    // For each layer L
+    for(int L = 0; L < NUM_HIDDEN_LAYERS + 1; L++){
+        int in_dim  = layer_sizes[L];
+        int out_dim = layer_sizes[L+1];
+
+        for(int out_i = 0; out_i < out_dim; out_i++){
+            double z = b[L][out_i];
+            // Weighted sum
+            for(int in_i = 0; in_i < in_dim; in_i++){
+                z += W[L][out_i][in_i] * activations[L][in_i];
+            }
+            // Activation
+            if(L < NUM_HIDDEN_LAYERS){
+                // Hidden layer -> ReLU
+                activations[L+1][out_i] = relu(z);
+            } else {
+                // Output layer -> linear
+                activations[L+1][out_i] = z;
+            }
+        }
+    }
+}
+
+/***************************************
+ * Backward Pass (Compute Deltas)      *
+ ***************************************/
+
+static void backward_pass(const double *y_true, double **activations, double **deltas)
+{
+    // Output layer delta
+    // MSE: dL/dy_pred = (y_pred - y_true)
+    int L_out = NUM_HIDDEN_LAYERS; // index of last hidden layer
+    int out_dim = layer_sizes[L_out + 1];
+    for(int i = 0; i < out_dim; i++){
+        double y_pred = activations[L_out + 1][i];
+        double error  = y_pred - y_true[i];
+        deltas[L_out + 1][i] = error; // linear activation => direct
+    }
+
+    // Hidden layers
+    for(int L = L_out; L >= 1; L--){
+        int out_dim_L = layer_sizes[L];
+        int out_dim_Lplus = layer_sizes[L+1];
+
+        for(int i = 0; i < out_dim_L; i++){
+            double d_act = relu_derivative(activations[L][i]);
+            // sum over next layer
+            double sum_ = 0.0;
+            for(int k = 0; k < out_dim_Lplus; k++){
+                sum_ += W[L][k][i] * deltas[L+1][k];
+            }
+            deltas[L][i] = sum_ * d_act;
+        }
+    }
+}
+
+/***************************************
+ * Step 4: Adam Update (Mini-Batch)    *
+ ***************************************/
+
+static double pow_beta1_t = 1.0;
+static double pow_beta2_t = 1.0;
+
+static void adam_update_weights(
+    double **activations, double **deltas,
+    int batch_size, int t /*current step*/)
+{
+    // Increment these for bias correction in Adam
+    pow_beta1_t *= BETA1;
+    pow_beta2_t *= BETA2;
+
+    // For each layer
+    for(int L = 0; L < NUM_HIDDEN_LAYERS + 1; L++){
+        int in_dim  = layer_sizes[L];
+        int out_dim = layer_sizes[L+1];
+
+        for(int out_i = 0; out_i < out_dim; out_i++){
+            // Grad for bias
+            double grad_b = deltas[L+1][out_i];
+
+            // Accumulate in m, v
+            mb[L][out_i] = BETA1 * mb[L][out_i] + (1.0 - BETA1) * grad_b;
+            vb[L][out_i] = BETA2 * vb[L][out_i] + (1.0 - BETA2) * (grad_b * grad_b);
+
+            // Bias-corrected m, v
+            double m_hat_b = mb[L][out_i] / (1.0 - pow_beta1_t);
+            double v_hat_b = vb[L][out_i] / (1.0 - pow_beta2_t);
+
+            // Update bias
+            b[L][out_i] -= ADAM_LEARNING_RATE * (m_hat_b / (sqrt(v_hat_b) + EPSILON))
+                           / batch_size; // average over batch
+
+            for(int in_i = 0; in_i < in_dim; in_i++){
+                // Weight gradient
+                double grad_w = deltas[L+1][out_i] * activations[L][in_i];
+                // Add L2 regularization
+                grad_w += L2_LAMBDA * W[L][out_i][in_i];
+
+                // Accumulate in m, v
+                mW[L][out_i][in_i] = BETA1 * mW[L][out_i][in_i] + (1.0 - BETA1) * grad_w;
+                vW[L][out_i][in_i] = BETA2 * vW[L][out_i][in_i] + (1.0 - BETA2) * (grad_w * grad_w);
+
+                double m_hat = mW[L][out_i][in_i] / (1.0 - pow_beta1_t);
+                double v_hat = vW[L][out_i][in_i] / (1.0 - pow_beta2_t);
+
+                // Weight update
+                W[L][out_i][in_i] -= ADAM_LEARNING_RATE * (m_hat / (sqrt(v_hat) + EPSILON))
+                                     / batch_size;
+            }
+        }
+    }
+}
+
+/***************************************
+ * Shuffling Utility                   *
+ ***************************************/
+
+static void shuffle_indices(int *indices, int n)
+{
+    for(int i = 0; i < n; i++){
+        int r = i + rand() % (n - i);
+        int temp = indices[i];
+        indices[i] = indices[r];
+        indices[r] = temp;
+    }
+}
+
+/***************************************
+ * Step 5: Train Loop                  *
+ ***************************************/
+
+static void train_network(double X_norm[][INPUT_SIZE], double y_norm[])
+{
+    // Prepare activation & delta arrays
+    // For each of (NUM_HIDDEN_LAYERS+2) layers, we store up to layer_sizes[L] values.
+    double **activations = (double **)malloc((NUM_HIDDEN_LAYERS+2) * sizeof(double *));
+    double **deltas      = (double **)malloc((NUM_HIDDEN_LAYERS+2) * sizeof(double *));
+    for(int L = 0; L < NUM_HIDDEN_LAYERS+2; L++){
+        activations[L] = (double *)calloc(layer_sizes[L], sizeof(double));
+        deltas[L]      = (double *)calloc(layer_sizes[L], sizeof(double));
+    }
+
+    // Indices for shuffling
+    int *indices = (int *)malloc(NUM_SAMPLES * sizeof(int));
+    for(int i = 0; i < NUM_SAMPLES; i++) indices[i] = i;
+
+    int steps = 0; // Adam steps
+
+    for(int epoch = 0; epoch < MAX_EPOCHS; epoch++){
+        // Shuffle each epoch
+        shuffle_indices(indices, NUM_SAMPLES);
+
+        double epoch_loss = 0.0;
+
+        // Process mini-batches
+        int num_batches = (NUM_SAMPLES + BATCH_SIZE - 1) / BATCH_SIZE;
+
+        for(int b_i = 0; b_i < num_batches; b_i++){
+            int start = b_i * BATCH_SIZE;
+            int end   = start + BATCH_SIZE;
+            if(end > NUM_SAMPLES) end = NUM_SAMPLES;
+
+            // We'll accumulate deltas in this batch
+            // First, zero out the deltas for all layers
+            for(int L = 0; L < NUM_HIDDEN_LAYERS+2; L++){
+                for(int k = 0; k < layer_sizes[L]; k++){
+                    deltas[L][k] = 0.0;
+                }
+            }
+
+            double batch_loss = 0.0;
+
+            // Forward+Backward for each sample in this batch
+            for(int m_i = start; m_i < end; m_i++){
+                int idx = indices[m_i];
+                // 1) Forward
+                forward_pass(X_norm[idx], activations);
+
+                // 2) Compute loss (MSE = 0.5 * error^2)
+                double y_pred = activations[NUM_HIDDEN_LAYERS+1][0];
+                double error  = y_pred - y_norm[idx];
+                double loss   = 0.5 * error * error;
+                batch_loss   += loss;
+
+                // 3) Backward
+                backward_pass(&y_norm[idx], activations, deltas);
+            }
+
+            // 4) Update weights once for the entire batch
+            steps++;
+            adam_update_weights(activations, deltas, (end - start), steps);
+
+            epoch_loss += batch_loss;
+        }
+
+        // Print progress
+        if(epoch % PRINT_INTERVAL == 0){
+            double avg_loss = epoch_loss / NUM_SAMPLES;
+            printf("Epoch %d, Loss = %f\n", epoch, avg_loss);
+        }
+    }
+
+    // Cleanup
+    free(indices);
+    for(int L = 0; L < NUM_HIDDEN_LAYERS+2; L++){
+        free(activations[L]);
+        free(deltas[L]);
+    }
+    free(activations);
+    free(deltas);
+}
+
+/***************************************
+ * Step 6: Inference (Predict)         *
+ ***************************************/
+
+static double predict(const double *x_input)
+{
+    // Create temporary activation array
+    double **activations = (double **)malloc((NUM_HIDDEN_LAYERS+2)*sizeof(double *));
+    for(int L=0; L < NUM_HIDDEN_LAYERS+2; L++){
+        activations[L] = (double *)calloc(layer_sizes[L], sizeof(double));
+    }
+
+    // Forward pass
+    forward_pass(x_input, activations);
+
+    // Grab output
+    double y_pred_norm = activations[NUM_HIDDEN_LAYERS+1][0];
+
+    // Free
+    for(int L=0; L < NUM_HIDDEN_LAYERS+2; L++){
+        free(activations[L]);
+    }
+    free(activations);
+
+    // "Denormalize" the prediction
+    double y_pred = (y_pred_norm * y_std) + y_mean;
+    return y_pred;
+}
+
+/***************************************
+ * Main                                *
+ ***************************************/
+
+int main(void)
+{
+    // 1) Load CSV data
+    load_csv_data("3_month_testing_data.csv");
+    if (NUM_SAMPLES < 2) {
+        fprintf(stderr, "Not enough samples to train.\n");
+        return 1;
+    }
+
+    // 2) Compute data stats & normalize
+    compute_mean_std();
+
+    // We'll create arrays to hold the normalized data
+    double (*X_norm)[INPUT_SIZE] = malloc(NUM_SAMPLES * sizeof(*X_norm));
+    double *y_norm = malloc(NUM_SAMPLES * sizeof(double));
+    if(!X_norm || !y_norm){
+        fprintf(stderr, "Allocation error for normalized data\n");
+        return 1;
+    }
+
+    normalize_data(X_norm, y_norm);
+
+    // 3) Initialize network
+    init_network();
+
+    // 4) Train
+    printf("Starting training on %d samples...\n", NUM_SAMPLES);
+    train_network(X_norm, y_norm);
+    printf("Training complete.\n");
+
+    // 5) Predict on a couple of “test” points
+    // For example, let's pick row 0 and row NUM_SAMPLES-1 in raw scale
+    // (Or any custom data you'd like.)
+
+    // We'll do it manually here:
+    if(NUM_SAMPLES > 1) {
+        double sample_raw[INPUT_SIZE] = {
+            X_raw[0][0], // open
+            X_raw[0][1], // high
+            X_raw[0][2], // low
+            X_raw[0][3]  // volume
+        };
+        // Normalize
+        double sample_norm[INPUT_SIZE];
+        for(int i=0; i<INPUT_SIZE; i++){
+            sample_norm[i] = (sample_raw[i] - X_mean[i]) / X_std[i];
+        }
+        double pred1 = predict(sample_norm);
+        printf("Prediction for row 0's close = %.4f (actual=%.4f)\n", pred1, y_raw[0]);
+    }
+
+    if(NUM_SAMPLES > 10) {
+        // arbitrary row e.g. 10
+        double sample_raw2[INPUT_SIZE] = {
+            X_raw[10][0],
+            X_raw[10][1],
+            X_raw[10][2],
+            X_raw[10][3]
+        };
+        double sample_norm2[INPUT_SIZE];
+        for(int i=0; i<INPUT_SIZE; i++){
+            sample_norm2[i] = (sample_raw2[i] - X_mean[i]) / X_std[i];
+        }
+        double pred2 = predict(sample_norm2);
+        printf("Prediction for row 10's close = %.4f (actual=%.4f)\n", pred2, y_raw[10]);
+    }
+
+    // 6) Cleanup all W & b
+    for(int L=0; L < NUM_HIDDEN_LAYERS + 1; L++){
+        int out_dim = layer_sizes[L+1];
+        free_2d(W[L], out_dim);
+        free_2d(mW[L], out_dim);
+        free_2d(vW[L], out_dim);
+        free(b[L]);
+        free(mb[L]);
+        free(vb[L]);
+    }
+
+    free(X_norm);
+    free(y_norm);
+
+    return 0;
+}
+
				`@@ -0,0 +1 @@`
				`gcc -o stock_predictor stock_predictor.c -lm`
				`@@ -0,0 +1 @@`
				`THis is a simple feed forward MLP and I want to make it a LSTM/GRU in pure C baby.`