The below works when the only two values for the data being sampled are 0 and 1. How do I change the code so that it works for normal distributions with different means?
import random
import pandas as pd
import matplotlib.pyplot as plt
data = {}
observations = 200
data['B1'] = [random.randint(0,1) for x in range(observations)]
data['B2'] = [random.randint(0,1) for x in range(observations)]
data['B3'] = [random.randint(0,1) for x in range(observations)]
data['B4'] = [random.randint(0,1) for x in range(observations)]
data['B5'] = [random.randint(0,1) for x in range(observations)]
data = pd.DataFrame(data)
data.head(10)
machines = 5
machine_selected = []
rewards = [0] * machines
penalties = [0] * machines
total_reward = 0
for n in range(0, observations):
bandit = 0
beta_max = 0
for i in range(0, machines):
beta_d = random.betavariate(rewards[i] + 1, penalties[i] + 1)
if beta_d > beta_max:
beta_max = beta_d
bandit = i
machine_selected.append(bandit)
reward = data.values[n, bandit]
if reward == 1:
rewards[bandit] = rewards[bandit] + 1
else:
penalties[bandit] = penalties[bandit] + 1
total_reward = total_reward + reward
print("Rewards By Machine = ", rewards)
print("Total Rewards = ", total_reward)
plt.bar(['B1','B2','B3','B4','B5'],rewards)
plt.show()
I tried changing the code segment
if reward == 1:
rewards[bandit] = rewards[bandit] + 1
else:
penalties[bandit] = penalties[bandit] + 1
to rewards[bandit] = rewards[bandit] + reward
, but that did not work. Any help would be greatly appreciated.
Aucun commentaire:
Enregistrer un commentaire