I am in the process of writing a fantasy novel but I've never been the best at making "natural" sounding names, so the other day I looked up how to program a random name generator using coding. I happened upon one on YouTube where the guy was using machine learning through examples of names in a CSV file (he was using Excel; I'm poor right now and using LibreOffice Calc).
I copied his code and listened to him explain it to better learn, as I'm new to programming, and I keep getting an "IndexError: list index out of range", even though I copied his code exactly the way he had it. I even downloaded the file he had placed on the YouTube videos and I couldn't even get his to work. Basically I want to use a list of names for the machine to figure out the probability of certain letter combinations in order to make a more "natural" sounding name.
I am using Sublime Text 3 as the editor and using the Build feature on that for Python 3. The error comes at the line "prob[num1][num2] += 1" (which is line 95 on Sublime Text 3). This is a link to the machine learning part of the series: https://www.youtube.com/watch?v=Zy6-ixrY3gs&list=LL&index=6 . The code as I copied it is as follows:
import csv
global letter_count
letter_count = 0
race_name = "human male"
class letter():
# Each letter has a lowercase character, and uppercase character, and
# identifiers as vowel or consonant.
def __init__(self, lowerchar, upperchar, is_vowel, is_consonant):
global letter_count
self.upperchar = upperchar
self.lowerchar = lowerchar
self.is_vowel = is_vowel
self.is_consonant = is_consonant
self.num = letter_count
letter_count += 1
def normalize(prob):
# Normalize the probability matrix so that the sum of each row is 1.
global alphabet
new_prob = prob
for i in range(0,len(alphabet)):
total = 0
for j in range(0,len(alphabet)):
total += prob[i][j]
if (total > 0):
for j in range(0,len(alphabet)):
new_prob[i][j] = prob[i][j]/total
else:
for j in range(0,len(alphabet)):
new_prob[i][j] = len(alphabet)**(-1)
return new_prob
# Define the alphabet
global alphabet
alphabet = [letter('a','A',True,False),
letter('b','B',False,True),
letter('c','C',False,True),
letter('d','D',False,True),
letter('e','E',True,False),
letter('f','F',False,True),
letter('g','G',False,True),
letter('h','H',False,True),
letter('i','I',True,False),
letter('j','J',False,True),
letter('k','K',False,True),
letter('l','L',False,True),
letter('m','M',False,True),
letter('n','N',False,True),
letter('o','O',True,False),
letter('p','P',False,True),
letter('q','Q',False,True),
letter('r','R',False,True),
letter('s','S',False,True),
letter('t','T',False,True),
letter('u','U',True,False),
letter('v','V',False,True),
letter('w','W',False,True),
letter('x','X',False,True),
letter('y','Y',True,True),
letter('z','Z',False,True)
]
# Initialize probability matrix.
# prob[i][j] = probability that letter j comes after letter i
global prob
file_name = 'default prob.csv' # Should initialize to all 0s.
prob = []
with open(file_name, newline='') as csvfile:
prob_reader = csv.reader(csvfile, delimiter=',', quotechar='|')
for row in prob_reader:
prob.append([])
for num in row:
prob[len(prob)-1].append(float(num))
# Read list of pre-generated names. Names should be stored one per line in file.
file_name = race_name + ' names.csv' # Change to name of file with names you wish to use as seeds.
with open(file_name, newline='') as csvfile:
name_reader = csv.reader(csvfile, delimiter=',', quotechar='|') # Record file contents.
for names in name_reader: # Loop over names in list.
name = names[0]
# Loop over letters in the current name.
for i in range(0,len(name)-1):
letter1 = name[i]
letter2 = name[i+1]
num1 = 0
num2 = 0
for i in range(0,len(alphabet)):
if (letter1 == alphabet[i].lowerchar or letter1 == alphabet[i].upperchar):
num1 = alphabet[i].num
if (letter2 == alphabet[i].lowerchar or letter2 == alphabet[i].upperchar):
num2 = alphabet[i].num
# Add one to the number of times letter i is followed by letter number i
prob[num1][num2] += 1
# Normalize the probability matrix.
prob = normalize(prob)
# Write probability matrix to file. This file will be read by the name generator.
file_name = race_name + '.csv'
with open(file_name, 'w', newline='') as csvfile:
prob_writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
for i in range(0,len(alphabet)):
prob_writer.writerow(prob[i])