jeudi 19 mai 2016

how to generate sentences as random sequences of words from the probability of the word that i've calculated

this is my coding to generate random sequences of word based on my unigrams model. can somebody show me how to get the probability of the word from my dictionary so i can generate the random word. Im just the beginner so i don't know much about python..

import nltk
from nltk.tokenize import word_tokenize
from nltk import ngrams
from nltk import FreqDist

Textfile = "TrainingSetLab2.txt"
Unigrams ={}

for line in open(Textfile):
    line = line.rstrip()
    tokens = word_tokenize(line)
    fdist = FreqDist(Unigrams)
    for word in tokens:     
        if word in Unigrams:
            Unigrams[word] = Unigrams[word]+1


        else:
            Unigrams[word] = 1

    for word in Unigrams:
         n = fdist.N()



for k,v in Unigrams.items():
    prob = (v/n)

    print(k,v,prob)
print("Total word: ", n)
#761
word = "put"
for index in range(8):
    word = prob[word].generate()
    print (word)




Aucun commentaire:

Enregistrer un commentaire