i'm doing an automatic language detection in python using stopwords
but i'm getting KeyError when trying to test the code. this is the code
import nltk
from nltk.corpus import stopwords
def scoreFunction(wholetext):
dictiolist={}
scorelist={}
NLTKlanguage = ["dutch","finnish","german","italian","portuguese","spanish","turkish","danish","english"," french","hungarian","norwegian","russian","swedish"]
FREElanguages = [""]
languages= NLTKlanguages + FREElanguages
for lang in NLTKlanguages:
dictiolist[lang]=stopwords.words(lang)
tokens=nltk.tokenize.word_tokenize(wholetext)
tokens=[t.lower() for t in tokens]
freq_dist=nltk.FreqDist(tokens)
for lang in languages:
scorelist[lang]=0
for word in freq_dist.keys()[0:20]:
if word in dictiolist[lang]:
scorelist[lang]+=1
return scorelist
def whichLanguage(scorelist):
maximum=0
for item in scorelist:
value = scorelist[item]
if maximum<value:
maximum = value
lang = item
return lang
whene i run it scoreFunction("hillo my name is osfar and i'm genius") i get the error Traceback (most recent call last): File "", line 1, in
scoreFunction("hello my name is osfar and i'm very genius")
File "C:/Users/osama1/Desktop
/fun-test", line 17, in scoreFunction
if word in dictiolist[lang]:
KeyError: ''