It seems when I do env.seed(x), it start in the same way, but after some episodes it start make differences.. why? Even if I add np.random.seed(0) with env.seed(0)
import gym
env = gym.make("CartPole-v1")
env.seed(0)
n1 = []
for _ in range(1):
n1.append(env.reset()[0])
done = False
while not done:
next , reward, done, _ = env.step(env.action_space.sample())
n1.append(next[0])
env.seed(0)
n2 = []
for _ in range(1):
n2.append(env.reset()[0])
done = False
while not done:
next , reward, done, _ = env.step(env.action_space.sample())
n2.append(next[0])
print(n1[0] == n2[0]) # trueprint(n1[1] == n2[1]) # true
print(n1[2] == n2[2]) # sometimes true????
Aucun commentaire:
Enregistrer un commentaire