from spacy.lang.en import English from spacy.pipeline import Sentencizer from keras.preprocessing.text import Tokenizer from keras.layers import Input, Dense from keras.models import Model import numpy as np import re nlp = English() sentencizer = Sentencizer() np.set_printoptions(precision=2) with open("anime.txt", encoding="utf-8") as f: anime_text = re.sub(r'\[\d\]','',f.read()) an = sentencizer(nlp(anime_text)) anime_sents = [ span.text.strip() for span in an.sents ] with open("basketball.txt", encoding="utf-8") as f: basketball_text = re.sub(r'\[\d\]','',f.read()) ba = sentencizer(nlp(basketball_text)) basketball_sents = [ span.text.strip() for span in ba.sents ] labels = np.concatenate([ np.repeat(0, len(anime_sents)), np.repeat(1, len(basketball_sents)), ]) t = Tokenizer() t.fit_on_texts(anime_sents + basketball_sents) dtm = t.texts_to_matrix(anime_sents + basketball_sents, mode='tfidf') input_layer = Input(shape=(dtm.shape[1],)) hidden_layer = Dense(20, activation='relu')(input_layer) output_layer = Dense(1, activation='sigmoid')(hidden_layer) model = Model(inputs=input_layer, outputs=output_layer) model.compile(loss='binary_crossentropy', metrics=['accuracy']) model.fit(dtm, labels, epochs=20, verbose=0) sent = input("Enter a sentence: ") while (sent != 'done'): coded = t.texts_to_matrix([ sent ], mode='tfidf') print(model.predict(coded)) sent = input("Enter a sentence: ")