I assign normal rv with mean 12 to ultimately generate order_date for my mock data. But when i am checking it out to see whether 12 is most used value, it turns out to be not. Any recommendations why it is the case...
def random_date_generator(month):
day_range = calendar.monthrange(2020, month)[1]
day = random.randint(1, day_range)
**first_hour = int(np.random.normal(12, 2))**
# second_hour = int(np.random.normal(18, 2))
hour = random.choices([first_hour])[0]
minute = random.randint(1, 59)
date = dt.datetime(2020, month, day, hour, minute).strftime("%Y/%m/%d %H:%M")
return date
columns = ['Order ID', 'Product', 'Quantity Ordered', 'Price Each', 'Order Date',
'Purchase Address', "Month"]
df = pd.DataFrame(columns=columns)
order_id = 123
for month_int in range(1, 13):
if month_int == 12:
order_amount = int(np.random.normal(100, 30))
if month_int == 11:
order_amount = int(np.random.normal(90, 30))
if month_int < 11:
order_amount = int(np.random.normal(60, 10))
for i in range(order_amount):
products_list = [product for product in products]
weights = [products[key][1] for key in products_list]
product = random.choices(products_list, weights=weights)[0]
price = products[product]
date = random_date_generator(month_int)
address = generate_random_addresses()
month = calendar.month_name[month_int]
df.loc[i] = [order_id, product, "NA" ,price, date, address, month_int]
order_id += 1
df.to_csv(f"{month}_data2.csv")
print(f"{month}_data2.csv")
break
january = pd.read_csv("January_data2.csv")
january["Hour"] = january["Order Date"].str[-5:-3]
january.groupby("Hour").count()
That is the output and as you see the most generated time is 12 but 10.
Unnamed: 0 Order ID Product Quantity Ordered Price Each Order Date Purchase Address Month
Hour
07 2 2 2 0 2 2 2 2
08 2 2 2 0 2 2 2 2
09 9 9 9 0 9 9 9 9
10 15 15 15 0 15 15 15 15
11 11 11 11 0 11 11 11 11
12 10 10 10 0 10 10 10 10
13 9 9 9 0 9 9 9 9
14 7 7 7 0 7 7 7 7
15 3 3 3 0 3 3 3 3
Aucun commentaire:
Enregistrer un commentaire