from spacy.lang.en import English from spacy.pipeline import Sentencizer from keras.preprocessing.text import Tokenizer from keras.layers import Input, Dense from keras.models import Model import numpy as np import re nlp = English() sentencizer = Sentencizer() np.set_printoptions(precision=2) with open("anime.txt", encoding="utf-8") as f: anime_text = re.sub(r'\[\d\]','',f.read()) an = sentencizer(nlp(anime_text)) anime_sents = [ span.text.strip() for span in an.sents ] with open("basketball.txt", encoding="utf-8") as f: basketball_text = re.sub(r'\[\d\]','',f.read()) ba = sentencizer(nlp(basketball_text)) basketball_sents = [ span.text.strip() for span in ba.sents ] with open("everything.txt", encoding="utf-8") as f: everything_text = re.sub(r'\[\d\]','',f.read()) ev = sentencizer(nlp(everything_text)) everything_sents = [ span.text.strip() for span in ev.sents ] labels = np.concatenate([ np.repeat(np.array([1,0,0]).reshape((1,3)), len(anime_sents), axis=0), np.repeat(np.array([0,1,0]).reshape((1,3)), len(basketball_sents), axis=0), np.repeat(np.array([0,0,1]).reshape((1,3)), len(everything_sents), axis=0), ]) t = Tokenizer() t.fit_on_texts(anime_sents + basketball_sents + everything_sents) dtm = t.texts_to_matrix(anime_sents + basketball_sents + everything_sents, mode='tfidf') input_layer = Input(shape=(dtm.shape[1],)) hidden_layer = Dense(20, activation='relu')(input_layer) output_layer = Dense(3, activation='softmax')(hidden_layer) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='categorical_crossentropy', metrics=['accuracy']) model.fit(dtm, labels, epochs=20, verbose=0) sent = input("Enter a sentence: ") while (sent != 'done'): coded = t.texts_to_matrix([ sent ], mode='tfidf') print(model.predict(coded)) sent = input("Enter a sentence: ")