33
44# # Meet Robo: your friend
55
6- import nltk
6+ import io
7+ import random
8+ import string # to process standard python strings
79import warnings
8- warnings .filterwarnings ("ignore" )
910
10- # nltk.download() # for downloading packages
11+ from sklearn .feature_extraction .text import TfidfVectorizer
12+ from sklearn .metrics .pairwise import cosine_similarity
1113
1214import numpy as np
13- import random
14- import string # to process standard python strings
1515
16+ import nltk
17+ from nltk .stem import WordNetLemmatizer
18+
19+ warnings .filterwarnings ("ignore" )
1620
17- f = open ('chatbot.txt' ,'r' ,errors = 'ignore' )
18- raw = f .read ()
19- raw = raw .lower ()# converts to lowercase
21+ nltk .download ('popular' , quiet = True ) # for downloading packages
22+ # Includes the following already.
2023#nltk.download('punkt') # first-time use only
2124#nltk.download('wordnet') # first-time use only
25+
26+ with open ('chatbot.txt' ,'r' , encoding = 'utf8' , errors = 'ignore' ) as fin :
27+ raw = fin .read ().lower ()
28+
2229sent_tokens = nltk .sent_tokenize (raw )# converts to list of sentences
2330word_tokens = nltk .word_tokenize (raw )# converts to list of words
2431
2936word_tokens [:5 ]
3037
3138
32- lemmer = nltk . stem . WordNetLemmatizer ()
39+ lemmer = WordNetLemmatizer ()
3340def LemTokens (tokens ):
3441 return [lemmer .lemmatize (token ) for token in tokens ]
3542remove_punct_dict = dict ((ord (punct ), None ) for punct in string .punctuation )
@@ -50,8 +57,6 @@ def greeting(sentence):
5057 return random .choice (GREETING_RESPONSES )
5158
5259
53- from sklearn .feature_extraction .text import TfidfVectorizer
54- from sklearn .metrics .pairwise import cosine_similarity
5560
5661
5762# Generating response
0 commit comments