I'm trying to make a Qlearning function for a basic automatic learning project. pseudo code for Qlearning function here When using rand() % 4; in espi_greed (which aim is to sometimes choose a random action, other times chose the best action derived from Q) function it causes seg fault (when I replace it by action_choisie = 2; for instance I get no seg fault, so I guess the modulo operation is causing this error).
#include "functions.h"
#include <time.h>
#include <stdlib.h>
double alpha = 0.5;
double gamma_temp =1;
double epsilon=0.15;
double max_Q(double*** Q, int ligne, int colonne){
double max=-100;
for (int i=0;i<4;++i){
if (Q[ligne][colonne][i]>max){
max = Q[ligne][colonne][i];
}
}
return (max);
}
action epsi_greed(double*** Q, int ligne, int colonne){
float ran_nb = rand()/RAND_MAX;
enum action action_choisie;
if (ran_nb<epsilon) {
action_choisie = rand() % 4;
}
else {
double max=-100;
int ind_max = 0;
for (int i=0;i<4;++i){
if (Q[ligne][colonne][i]>max){
max = Q[ligne][colonne][i];
ind_max = i;
}
}
action_choisie = ind_max;
}
return(action_choisie);
}
void Q() {
double*** Q = (double***)malloc(rows * sizeof(double**));
if (Q == NULL)
{
fprintf(stderr, "Out of memory");
exit(0);
}
for (int i = 0; i < rows; i++)
{
Q[i] = (double**)malloc(cols * sizeof(double*));
if (Q[i] == NULL)
{
fprintf(stderr, "Out of memory");
exit(0);
}
for (int j = 0; j < cols; j++)
{
Q[i][j] = (double*)malloc(4 * sizeof(double));
if (Q[i][j] == NULL)
{
fprintf(stderr, "Out of memory");
exit(0);
}
}
}
// assign values to the allocated memory
for (int i = 0; i < rows; i++)
{
for (int j = 0; j < cols; j++)
{
for (int k = 0; k < 4; k++) {
Q[i][j][k] = 0;
}
}
}
int etat_ligne=start_row;
int etat_col=start_col;
int ancienne_ligne;
int ancienne_col;
action prochaine_action;
envOutput nouvel_etat;
srand(time(NULL));
for (int u=0;u<30;++u) {
int i =0;
nouvel_etat.done=0;
while (nouvel_etat.done!=1 && i<1000) {
prochaine_action=epsi_greed(Q,etat_ligne,etat_col);
nouvel_etat = maze_step(prochaine_action);
/* on update les anciennes valeurs */
ancienne_ligne=etat_ligne;
ancienne_col=etat_col;
/* on update les nouvelles valeurs */
etat_ligne=nouvel_etat.new_row;
etat_col=nouvel_etat.new_col;
/* on update Q */
Q[ancienne_ligne][ancienne_col][prochaine_action] = Q[ancienne_ligne][ancienne_col][prochaine_action] + alpha*(nouvel_etat.reward + gamma_temp*max_Q(Q,etat_ligne,etat_col) - Q[ancienne_ligne][ancienne_col][prochaine_action]);
i=i+1;
}
if (nouvel_etat.done==1) {
printf("success",i);
}
else {
printf("échec");
}
}
}
(mazestep function is from the teacher so it should not be a problem)
One thing I found is that putting very small numbers for i and u (100 and 2 for instance) in the loops for Q() function avoids segment fault but the code will not be functional this way
How to solve or avoid this problem ?
Aucun commentaire:
Enregistrer un commentaire