1

I'm using a LSTM for multi class text classification. I tried a lot of solutions to the problem mentioned in the header but I cant get it working. Code :

vocab_size = 5000
embedding_dim = 200
max_length = 200 #try 150 next time
trunc_type = 'post'
padding_type = 'post'
oov_tok = '<OOV>'


my_file = open("C:\\Users\\Ashish\\Desktop\\pr2_data\\train.txt", "r", encoding="utf8")
content = my_file.read()
articles = content.split("\n") #articles is a list of training articles
my_file.close()

my_file = open("C:\\Users\\Ashish\\Desktop\\pr2_data\\train.labels", "r", encoding="utf8")
content = my_file.read()
labels = content.split("\n") #labels is a list of training labels
my_file.close()


train_articles=[]
for i in articles:
        article = i
        for word in STOPWORDS:
            token = ' ' + word + ' '
            article = article.replace(token, ' ')
            article = article.replace(' ', ' ')
        train_articles.append(article)
train_labels=labels


#PREP VALIDATION SET
my_file = open("C:\\Users\\Ashish\\Desktop\\pr2_data\\val.txt", "r", encoding="utf8")
content = my_file.read()
val_articles = content.split("\n")
my_file.close()
validation_articles=[]
for i in val_articles:
        article = i
        for word in STOPWORDS:
            token = ' ' + word + ' '
            article = article.replace(token, ' ')
            article = article.replace(' ', ' ')
        validation_articles.append(article)
my_file = open("C:\\Users\\Ashish\\Desktop\\pr2_data\\val.labels", "r", encoding="utf8")
content = my_file.read()
validation_labels = content.split("\n")
my_file.close()
labels=train_labels+validation_labels
articles=train_articles+validation_articles
####error free till here



#####train_sequences
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(train_articles)
word_index = tokenizer.word_index

train_sequences = tokenizer.texts_to_sequences(train_articles)
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
validation_sequences = tokenizer.texts_to_sequences(validation_articles)
validation_padded = pad_sequences(validation_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)


label_tokenizer = Tokenizer()
label_tokenizer.fit_on_texts(labels)
training_label_seq = np.array(label_tokenizer.texts_to_sequences(train_labels))
validation_label_seq = np.array(label_tokenizer.texts_to_sequences(validation_labels))


model = tf.keras.Sequential([
    # Add an Embedding layer expecting input vocab of size 5000, and output embedding dimension of size 64 we set at the top
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embedding_dim)),
#    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    # use ReLU in place of tanh function since they are very good alternatives of each other.
    tf.keras.layers.Dense(embedding_dim, activation='relu'),
    # Add a Dense layer with 6 units and softmax activation.
    # When we have multiple outputs, softmax convert outputs layers into a probability distribution.
    tf.keras.layers.Dense(12, activation='softmax')
])
model.summary()
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
num_epochs = 10
history = model.fit(train_padded, training_label_seq, epochs=num_epochs, validation_data=(validation_padded, validation_label_seq), verbose=2)

The error is as follows:

File "aa.py", line 97, in <module>
    history = model.fit(train_padded, training_label_seq, epochs=num_epochs, validation_data=(validation_padded, validation_label_seq), verbose=2)
File "C:\Users\Ashish\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\constant_op.py", line 98, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).
2
  • 1
    Your array contains lists or arrays that differ in size. Commented May 18, 2021 at 23:40
  • Hi @Ashish, is this still an issue? As mentioned here, it could be due to not giving the appropriate maximum sequence length. Thank you! Commented Oct 10, 2022 at 13:55

0

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.