lundi 30 novembre 2020

Fantasy Name Generator using Machine Learning

I am in the process of writing a fantasy novel but I've never been the best at making "natural" sounding names, so the other day I looked up how to program a random name generator using coding. I happened upon one on YouTube where the guy was using machine learning through examples of names in a CSV file (he was using Excel; I'm poor right now and using LibreOffice Calc).

I copied his code and listened to him explain it to better learn, as I'm new to programming, and I keep getting an "IndexError: list index out of range", even though I copied his code exactly the way he had it. I even downloaded the file he had placed on the YouTube videos and I couldn't even get his to work. Basically I want to use a list of names for the machine to figure out the probability of certain letter combinations in order to make a more "natural" sounding name.

I am using Sublime Text 3 as the editor and using the Build feature on that for Python 3. The error comes at the line "prob[num1][num2] += 1" (which is line 95 on Sublime Text 3). This is a link to the machine learning part of the series: https://www.youtube.com/watch?v=Zy6-ixrY3gs&list=LL&index=6 . The code as I copied it is as follows:

import csv
global letter_count
letter_count = 0

race_name = "human male"

class letter():
    # Each letter has a lowercase character, and uppercase character, and
    # identifiers as vowel or consonant.
    def __init__(self, lowerchar, upperchar, is_vowel, is_consonant):
        global letter_count
        self.upperchar = upperchar
        self.lowerchar = lowerchar
        self.is_vowel = is_vowel
        self.is_consonant = is_consonant
        self.num = letter_count
        letter_count += 1

def normalize(prob):
    # Normalize the probability matrix so that the sum of each row is 1.
    global alphabet
    new_prob = prob
    for i in range(0,len(alphabet)):
        total = 0
        for j in range(0,len(alphabet)):
            total += prob[i][j]
        if (total > 0):
            for j in range(0,len(alphabet)):
                new_prob[i][j] = prob[i][j]/total
        else:
            for j in range(0,len(alphabet)):
                new_prob[i][j] = len(alphabet)**(-1)
    return new_prob

# Define the alphabet
global alphabet
alphabet = [letter('a','A',True,False),
            letter('b','B',False,True),
            letter('c','C',False,True),
            letter('d','D',False,True),
            letter('e','E',True,False),
            letter('f','F',False,True),
            letter('g','G',False,True),
            letter('h','H',False,True),
            letter('i','I',True,False),
            letter('j','J',False,True),
            letter('k','K',False,True),
            letter('l','L',False,True),
            letter('m','M',False,True),
            letter('n','N',False,True),
            letter('o','O',True,False),
            letter('p','P',False,True),
            letter('q','Q',False,True),
            letter('r','R',False,True),
            letter('s','S',False,True),
            letter('t','T',False,True),
            letter('u','U',True,False),
            letter('v','V',False,True),
            letter('w','W',False,True),
            letter('x','X',False,True),
            letter('y','Y',True,True),
            letter('z','Z',False,True)
            ]

# Initialize probability matrix.
# prob[i][j] = probability that letter j comes after letter i
global prob
file_name = 'default prob.csv' # Should initialize to all 0s.
prob = []
with open(file_name, newline='') as csvfile:
    prob_reader = csv.reader(csvfile, delimiter=',', quotechar='|')
    for row in prob_reader:
        prob.append([])
        for num in row:
            prob[len(prob)-1].append(float(num))

# Read list of pre-generated names. Names should be stored one per line in file.
file_name = race_name + ' names.csv' # Change to name of file with names you wish to use as seeds.
with open(file_name, newline='') as csvfile:
    name_reader = csv.reader(csvfile, delimiter=',', quotechar='|') # Record file contents.
    for names in name_reader: # Loop over names in list.
        name = names[0]
        # Loop over letters in the current name.
        for i in range(0,len(name)-1):
            letter1 = name[i]
            letter2 = name[i+1]
            num1 = 0
            num2 = 0
            for i in range(0,len(alphabet)):
                if (letter1 == alphabet[i].lowerchar or letter1 == alphabet[i].upperchar):
                    num1 = alphabet[i].num
                if (letter2 == alphabet[i].lowerchar or letter2 == alphabet[i].upperchar):
                    num2 = alphabet[i].num
            # Add one to the number of times letter i is followed by letter number i
            prob[num1][num2] += 1

# Normalize the probability matrix.
prob = normalize(prob)

# Write probability matrix to file. This file will be read by the name generator.
file_name = race_name + '.csv'
with open(file_name, 'w', newline='') as csvfile:
    prob_writer = csv.writer(csvfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
    for i in range(0,len(alphabet)):
        prob_writer.writerow(prob[i])



Aucun commentaire:

Enregistrer un commentaire