jeudi 12 mai 2022

rand() % (int) in a loop causing seg fault [closed]

I'm trying to make a Qlearning function for a basic automatic learning project. pseudo code for Qlearning function here When using rand() % 4; in espi_greed (which aim is to sometimes choose a random action, other times chose the best action derived from Q) function it causes seg fault (when I replace it by action_choisie = 2; for instance I get no seg fault, so I guess the modulo operation is causing this error).

#include "functions.h"
#include <time.h>
#include <stdlib.h>


double alpha = 0.5;
double gamma_temp =1;
double epsilon=0.15;

double max_Q(double*** Q, int ligne, int colonne){
 double max=-100;
 for (int i=0;i<4;++i){
     if (Q[ligne][colonne][i]>max){
         max = Q[ligne][colonne][i];
     }
 }
 return (max);
}

action epsi_greed(double*** Q, int ligne, int colonne){
 float ran_nb = rand()/RAND_MAX;
enum action action_choisie;
if (ran_nb<epsilon) {
 action_choisie = rand() % 4; 
} 
else {
 double max=-100;
 int ind_max = 0;
   for (int i=0;i<4;++i){
       if (Q[ligne][colonne][i]>max){
           max = Q[ligne][colonne][i];
     ind_max = i;
       }
   }
action_choisie = ind_max;
} 
return(action_choisie);
}
                                
 

void Q() {
 
double*** Q = (double***)malloc(rows * sizeof(double**));

 if (Q == NULL)
 {
     fprintf(stderr, "Out of memory");
     exit(0);
 }

 for (int i = 0; i < rows; i++)
 {
     Q[i] = (double**)malloc(cols * sizeof(double*));

     if (Q[i] == NULL)
     {
         fprintf(stderr, "Out of memory");
         exit(0);
     }

     for (int j = 0; j < cols; j++)
     {
         Q[i][j] = (double*)malloc(4 * sizeof(double));
         if (Q[i][j] == NULL)
         {
             fprintf(stderr, "Out of memory");
             exit(0);
         }
     }
 }

 // assign values to the allocated memory
 for (int i = 0; i < rows; i++)
 {
     for (int j = 0; j < cols; j++)
     {
         for (int k = 0; k < 4; k++) {
             Q[i][j][k] = 0;
         }
     }
 }   


     int etat_ligne=start_row;
     int etat_col=start_col;
     int ancienne_ligne;
     int ancienne_col;
     action prochaine_action;
     envOutput nouvel_etat;
     srand(time(NULL));

 for (int u=0;u<30;++u) { 
     int i =0;
     nouvel_etat.done=0;
     while (nouvel_etat.done!=1 && i<1000) {
         

         prochaine_action=epsi_greed(Q,etat_ligne,etat_col); 
     
         nouvel_etat = maze_step(prochaine_action);
         
         /* on update les anciennes valeurs */
         ancienne_ligne=etat_ligne;
         ancienne_col=etat_col;
         
         /* on update les nouvelles valeurs */
         etat_ligne=nouvel_etat.new_row;
         etat_col=nouvel_etat.new_col;

         /* on update Q */
         Q[ancienne_ligne][ancienne_col][prochaine_action] = Q[ancienne_ligne][ancienne_col][prochaine_action] + alpha*(nouvel_etat.reward + gamma_temp*max_Q(Q,etat_ligne,etat_col) - Q[ancienne_ligne][ancienne_col][prochaine_action]);
         i=i+1;
     }
     if (nouvel_etat.done==1) {
         printf("success",i);
     }
     else {
         printf("échec");
     }
 }
 }

(mazestep function is from the teacher so it should not be a problem)

One thing I found is that putting very small numbers for i and u (100 and 2 for instance) in the loops for Q() function avoids segment fault but the code will not be functional this way

How to solve or avoid this problem ?




Aucun commentaire:

Enregistrer un commentaire