Deep learning model, heat map, data prepo

Overview

DEEP LEARNING ON USA DEMOCRATES DEBATE

By Pamela Dekas

import sys
import csv
import re 
import nltk
import string
import unicodedata
from textblob import TextBlob
from collections import Counter
import pandas as pd
import numpy as np
from wordcloud import WordCloud
from nltk.classify import * 
from nltk.corpus import stopwords
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
import nltk.classify.util
import matplotlib.pyplot as plt
from string import punctuation 
from nltk.corpus import stopwords
from wordcloud import STOPWORDS
import os
from sklearn.model_selection import train_test_split
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence, text
from keras.callbacks import EarlyStopping
Using TensorFlow backend.



---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)


   
     in 
    
     ()
     22 import os
     23 from sklearn.model_selection import train_test_split
---> 24 from keras.datasets import imdb
     25 from keras.models import Sequential
     26 from keras.layers import Dense


~\Anaconda3\lib\site-packages\keras\__init__.py in 
     
      ()
      1 from __future__ import absolute_import
      2 
----> 3 from . import utils
      4 from . import activations
      5 from . import applications


~\Anaconda3\lib\site-packages\keras\utils\__init__.py in 
      
       ()
      4 from . import data_utils
      5 from . import io_utils
----> 6 from . import conv_utils
      7 from . import losses_utils
      8 from . import metrics_utils


~\Anaconda3\lib\site-packages\keras\utils\conv_utils.py in 
       
        () 7 from six.moves import range 8 import numpy as np ----> 9 from .. import backend as K 10 11 ~\Anaconda3\lib\site-packages\keras\backend\__init__.py in 
        
         () ----> 1 from .load_backend import epsilon 2 from .load_backend import set_epsilon 3 from .load_backend import floatx 4 from .load_backend import set_floatx 5 from .load_backend import cast_to_floatx ~\Anaconda3\lib\site-packages\keras\backend\load_backend.py in 
         
          () 88 elif _BACKEND == 'tensorflow': 89 sys.stderr.write('Using TensorFlow backend.\n') ---> 90 from .tensorflow_backend import * 91 else: 92 # Try and load external backend. ~\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py in 
          
           () 52 53 # Private TF Keras utils ---> 54 get_graph = tf_keras_backend.get_graph 55 # learning_phase_scope = tf_keras_backend.learning_phase_scope # TODO 56 name_scope = tf.name_scope AttributeError: module 'tensorflow.python.keras.backend' has no attribute 'get_graph' 
          
         
        
       
      
     
    
   
speech = pd.read_csv('debate_transcripts_v3_2020-02-26.csv',encoding= 'unicode_escape')
df= pd.DataFrame(speech)
dem_speakers = df["speaker"]
number_of_speakers = len(set(dem_speakers))
print("Nombre de speakers:",number_of_speakers, "speakers")

# Mean duration of speech.
print("temps moyen de parole:",np.mean(df["speaking_time_seconds"]), "seconds")
print("Dataset size:", len(df))
Nombre de speakers: 106 speakers
temps moyen de parole: 16.49230769230769 seconds
Dataset size: 5911
df.info()

   
    
RangeIndex: 5911 entries, 0 to 5910
Data columns (total 6 columns):
date                     5911 non-null object
debate_name              5911 non-null object
debate_section           5911 non-null object
speaker                  5911 non-null object
speech                   5911 non-null object
speaking_time_seconds    5395 non-null float64
dtypes: float64(1), object(5)
memory usage: 277.2+ KB

   
df.groupby('speaker')['speaking_time_seconds'].sum(level=0).nlargest(10).plot.bar()
plt.title('Repartition par temps de parole')
plt.show()

png

debate_time = df.groupby(by=['speaker', 'date']).speaking_time_seconds.sum().nlargest(15)
debate_time.plot()

   

   

png

suppresion des colonnes qui ne seront pas utilisé dans la suite du projet et creation du dataset final###

df=df.drop(['date','debate_name','debate_section','speaking_time_seconds'],1)
df.head(5)
speaker speech
0 Norah O�Donnell Good evening and welcome, the Democratic presi...
1 Gayle King And Super Tuesday is just a week away and this...
2 Norah O�Donnell And CBS News is proud to bring you this debate...
3 Gayle King And we are partnering tonight also with Twitte...
4 Norah O�Donnell Now, here are the rules for the next two hours...

PREPROCESSING

import nltk 
nltk.download('punkt')
stopwords = nltk.corpus.stopwords.words('english')
Tailored_stopwords=('im','ive','mr','weve','dont','well','will','make','us','we',
                      'I','make','got','need','want','think',
                      'going','go','one','thank','going',
                      'way','say','every','re','us','first',
                     'now','said','know','look','done','take',
                     'number','two','three','s','m',"t",
                      'let','don','tell','ve','im','mr','put','maybe','whether','many', 'll','around','thing','Secondly','doesn','lot')
#stopwords = nltk.corpus.stopwords.words('english')
stopwords = set(STOPWORDS)
stopwords= stopwords.union(Tailored_stopwords)
[nltk_data] Downloading package punkt to C:\Users\pamel.DESKTOP-O19M7N
[nltk_data]     F\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
def Text_cleansing(speech):
    speech = re.sub('@[A-Za-z0–9]+', '', str(speech))
    speech = re.sub('#', '', speech) # Enlever les '#' hash tag
    speech = re.sub('rt', '', speech)
    speech=re.sub(',',' ', speech)
    speech=re.sub('!',' ',speech)
    speech=re.sub(':',' ',speech)
    speech=re.sub("'","",speech)
    speech=re.sub('"','',speech)
    speech=speech.lower()
    speech = word_tokenize(speech)
    return speech
def remove_stopwords(speech):
    speech_clean = [word for word in speech if word not in stopwords]
    return speech_clean
                         
df['speech_tokens']= df['speech'].apply(Text_cleansing)
df.head(5)
speaker speech speech_tokens
0 Norah O�Donnell Good evening and welcome, the Democratic presi... [good, evening, and, welcome, the, democratic,...
1 Gayle King And Super Tuesday is just a week away and this... [and, super, tuesday, is, just, a, week, away,...
2 Norah O�Donnell And CBS News is proud to bring you this debate... [and, cbs, news, is, proud, to, bring, you, th...
3 Gayle King And we are partnering tonight also with Twitte... [and, we, are, panering, tonight, also, with, ...
4 Norah O�Donnell Now, here are the rules for the next two hours... [now, here, are, the, rules, for, the, next, t...
df['speech_clean']=df['speech_tokens'].apply(remove_stopwords)
df.head(5)
speaker speech speech_tokens speech_clean
0 Norah O�Donnell Good evening and welcome, the Democratic presi... [good, evening, and, welcome, the, democratic,... [good, evening, welcome, democratic, president...
1 Gayle King And Super Tuesday is just a week away and this... [and, super, tuesday, is, just, a, week, away,... [super, tuesday, week, away, biggest, primary,...
2 Norah O�Donnell And CBS News is proud to bring you this debate... [and, cbs, news, is, proud, to, bring, you, th... [cbs, news, proud, bring, debate, along, co-sp...
3 Gayle King And we are partnering tonight also with Twitte... [and, we, are, panering, tonight, also, with, ... [panering, tonight, twitter, ., home, paicipat...
4 Norah O�Donnell Now, here are the rules for the next two hours... [now, here, are, the, rules, for, the, next, t... [rules, next, hours, ., asked, question, minut...
def wordcloud(dataframe):
    Aw= df['speech_clean']
    wordCloud = WordCloud(width=500, height=300,background_color='white', max_font_size=110).generate(str(Aw))
    plt.imshow(wordCloud, interpolation="bilinear")
    plt.axis("off")
    plt.title("speech wordcloud")

wordcloud(df['speech_clean'])

png

Pour la suite du projet on reduira la liste des speakers aux candidats les plus notoires (top 7 speakers)###

df = df.loc[df.speaker.isin({'Joe Biden', 'Bernie Sanders', 'Elizabeth Warren', 'Michael Bloomberg', 'Pete Buttigieg', 'Amy Klobuchar',  'Tulsi Gabbard'})]
df.head()
df.shape
(2245, 4)

CountVectorizer et creation du dict des mots par candidat a utiliser sur les modeles ML qui seront en back-up###

Analyse Lexicale

cv = CountVectorizer(stop_words=stopwords)
df_cv = cv.fit_transform(df.speech)
df_words = pd.DataFrame(df_cv.toarray(), columns=cv.get_feature_names())
df_words.index = df.speaker
df_words = df_words.transpose()
df_words
speaker Bernie Sanders Michael Bloomberg Michael Bloomberg Bernie Sanders Pete Buttigieg Elizabeth Warren Elizabeth Warren Pete Buttigieg Joe Biden Bernie Sanders ... Amy Klobuchar Elizabeth Warren Amy Klobuchar Tulsi Gabbard Tulsi Gabbard Amy Klobuchar Amy Klobuchar Amy Klobuchar Elizabeth Warren Elizabeth Warren
00 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
000 2 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
001st 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
01 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
02 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
03 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
04 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
05 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
06 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
07 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
08 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
09 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
100 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
10000 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
100s 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
10th 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
120 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
125 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
12th 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
130 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
135 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
137 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
13th 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
140 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
149 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
xinjiang 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yachts 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yale 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yang 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yanked 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
ye 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yeah 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
year 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yearly 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
years 2 0 0 0 0 0 0 1 0 0 ... 0 0 0 0 0 0 0 0 1 0
yellow 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yemen 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yemin 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yep 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yes 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yesterday 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yet 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yo 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
york 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yorker 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
young 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
younger 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
youngest 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
youth 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
youtube 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
zealand 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
zero 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
zeroed 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
zip 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
zone 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

6385 rows × 2245 columns

top_dict = {}
for c in df_words.columns:
    top = df_words[c].sort_values(ascending=False).head(30)
    top_dict[c]= list(zip(top.index, top.values))
for speaker, top_words in top_dict.items():
    print(speaker)
    print(', '.join([word for word, count in top_words[0:9]]))
    print('---')
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)


   
     in 
    
     ()
      1 top_dict = {}
      2 for c in df_words.columns:
----> 3     top = df_words[c].sort_values(ascending=False).head(30)
      4     top_dict[c]= list(zip(top.index, top.values))
      5 for speaker, top_words in top_dict.items():


TypeError: sort_values() missing 1 required positional argument: 'by'

    
   
df2=pd.DataFrame(top_dict)
df2.head(15)
from collections import Counter
words = []
for speaker in df_words.columns:
    top = [word for (word, count) in top_dict[speaker]]
    for t in top:
        words.append(t)
Counter(words).most_common(15)
---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)


   
     in 
    
     ()
      2 words = []
      3 for speaker in df_words.columns:
----> 4     top = [word for (word, count) in top_dict[speaker]]
      5     for t in top:
      6         words.append(t)


KeyError: 'Bernie Sanders'

    
   

Implemantation du modèle###

print(df.columns)
print(df.shape)
df['speaker'] = df['speaker'].astype(str)
Index(['speaker', 'speech', 'speech_tokens', 'speech_clean'], dtype='object')
(2245, 4)

Embedding

import gensim
RANDOM_STATE = 50
EPOCHS = 5
BATCH_SIZE = 256
EMB_DIM = 100
SAVE_MODEL = True

X = df['speech_clean']
print(X.head())
X.shape
5     [well, you�re, right, economy, really, great, ...
6                                            [senator-]
8     [think, donald, trump, thinks, would, better, ...
9     [oh, mr., bloomberg, ., let, tell, mr., putin,...
11     [know, president, russia, wants, it�s, chaos, .]
Name: speech_clean, dtype: object





(2245,)
emb_model = gensim.models.Word2Vec(sentences = X, size = EMB_DIM, window = 5, workers = 4, min_count = 1)
print('La taille du vocabulaire appris est de ',len(list(emb_model.wv.vocab)))
La taille du vocabulaire appris est de  7139
from keras.preprocessing.text import Tokenizer
import tokenize
max_length = max([len(s) for s in X])

tokenizer_new = Tokenizer()
tokenizer_new.fit_on_texts(X)

X_seq = tokenizer_new.texts_to_sequences(X)
X_fin = sequence.pad_sequences(X_seq, maxlen = max_length)
print(X_fin.shape)
(2245, 140)
emb_vec = emb_model.wv
MAX_NB_WORDS = len(list(emb_vec.vocab))
tokenizer_word_index = tokenizer_new.word_index
vocab_size = len(tokenizer_new.word_index) + 1
embedded_matrix = np.zeros((vocab_size, EMB_DIM))


for word, i in tokenizer_word_index.items():
    if i>= MAX_NB_WORDS:
        continue
    try:
        embedding_vector = emb_vec[word]
        wv_matrix[i] = embedding_vector
    except:
        pass      
embedded_matrix.shape
print(embedded_matrix)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

Préparation des variables

from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
y = df.speaker
print(y.head(10))
y.shape
5     1
6     4
8     4
9     1
11    5
12    2
13    2
15    5
21    3
23    1
Name: speaker, dtype: int32





(2245,)
Counter(y)
Counter({'Bernie Sanders': 430,
         'Michael Bloomberg': 97,
         'Pete Buttigieg': 392,
         'Elizabeth Warren': 440,
         'Joe Biden': 456,
         'Amy Klobuchar': 353,
         'Tulsi Gabbard': 77})
le=LabelEncoder()
df['speaker'] = le.fit_transform(df['speaker'])
df.head()

y = df.speaker
y.head()
print(y.shape)
print(X_fin.shape)
(2245,)
(2245, 140)
X_train, X_test, y_train, y_test = train_test_split(X_fin , y, test_size = 0.2, random_state = 42)


print(X_train.shape)
print(y_train.shape)
(1796, 140)
(1796,)

Construction des NN

model_pre_trained = Sequential()

model_pre_trained.add(Embedding(vocab_size, EMB_DIM, weights = [embedded_matrix], 
                    input_length = max_length, trainable = False))
model_pre_trained.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model_pre_trained.add(Dense(1, activation='softmax'))

model_pre_trained.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model_pre_trained.summary()
Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_11 (Embedding)     (None, 140, 100)          714000    
_________________________________________________________________
lstm_13 (LSTM)               (None, 128)               117248    
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 129       
=================================================================
Total params: 831,377
Trainable params: 117,377
Non-trainable params: 714,000
_________________________________________________________________

Fitting

history_pre_trained = model_pre_trained.fit(X_fin, y, batch_size = BATCH_SIZE, epochs =20, verbose =1, validation_split = 0.2)
Train on 1796 samples, validate on 449 samples
Epoch 1/20
1796/1796 [==============================] - 4s 2ms/step - loss: 0.5429 - accuracy: 0.1754 - val_loss: -0.4417 - val_accuracy: 0.2472
Epoch 2/20
1796/1796 [==============================] - 3s 2ms/step - loss: -6.7429 - accuracy: 0.1776 - val_loss: -14.1017 - val_accuracy: 0.2472
Epoch 3/20
1796/1796 [==============================] - 3s 2ms/step - loss: -15.8550 - accuracy: 0.1776 - val_loss: -19.5441 - val_accuracy: 0.2472
Epoch 4/20
1796/1796 [==============================] - 3s 2ms/step - loss: -20.7949 - accuracy: 0.1776 - val_loss: -23.4335 - val_accuracy: 0.2472
Epoch 5/20
1796/1796 [==============================] - 3s 2ms/step - loss: -24.1430 - accuracy: 0.1776 - val_loss: -25.9735 - val_accuracy: 0.2472
Epoch 6/20
1796/1796 [==============================] - 3s 2ms/step - loss: -26.4535 - accuracy: 0.1776 - val_loss: -28.0725 - val_accuracy: 0.2472
Epoch 7/20
1796/1796 [==============================] - 3s 2ms/step - loss: -28.4266 - accuracy: 0.1776 - val_loss: -29.9313 - val_accuracy: 0.2472
Epoch 8/20
1796/1796 [==============================] - 3s 2ms/step - loss: -30.1754 - accuracy: 0.1776 - val_loss: -31.6261 - val_accuracy: 0.2472
Epoch 9/20
1796/1796 [==============================] - 3s 2ms/step - loss: -31.8791 - accuracy: 0.1776 - val_loss: -33.3337 - val_accuracy: 0.2472
Epoch 10/20
1796/1796 [==============================] - 4s 2ms/step - loss: -33.5166 - accuracy: 0.1776 - val_loss: -34.9834 - val_accuracy: 0.2472
Epoch 11/20
1796/1796 [==============================] - 3s 2ms/step - loss: -35.1544 - accuracy: 0.1776 - val_loss: -36.5973 - val_accuracy: 0.2472
Epoch 12/20
1796/1796 [==============================] - 3s 2ms/step - loss: -36.7253 - accuracy: 0.1776 - val_loss: -38.2070 - val_accuracy: 0.2472
Epoch 13/20
1796/1796 [==============================] - 3s 2ms/step - loss: -38.3344 - accuracy: 0.1776 - val_loss: -39.8655 - val_accuracy: 0.2472
Epoch 14/20
1796/1796 [==============================] - 3s 2ms/step - loss: -39.9810 - accuracy: 0.1776 - val_loss: -41.5162 - val_accuracy: 0.2472
Epoch 15/20
1796/1796 [==============================] - 3s 1ms/step - loss: -41.6567 - accuracy: 0.1776 - val_loss: -43.2049 - val_accuracy: 0.2472
Epoch 16/20
1796/1796 [==============================] - 3s 1ms/step - loss: -43.2579 - accuracy: 0.1776 - val_loss: -44.8235 - val_accuracy: 0.2472
Epoch 17/20
1796/1796 [==============================] - 3s 1ms/step - loss: -44.9030 - accuracy: 0.1776 - val_loss: -46.4982 - val_accuracy: 0.2472
Epoch 18/20
1796/1796 [==============================] - 2s 1ms/step - loss: -46.5038 - accuracy: 0.1776 - val_loss: -48.0627 - val_accuracy: 0.2472
Epoch 19/20
1796/1796 [==============================] - 3s 1ms/step - loss: -48.0124 - accuracy: 0.1776 - val_loss: -49.5424 - val_accuracy: 0.2472
Epoch 20/20
1796/1796 [==============================] - 2s 1ms/step - loss: -49.5209 - accuracy: 0.1776 - val_loss: -51.1489 - val_accuracy: 0.2472

Evaluation du modèle

score = model_pre_trained.evaluate(X_test, y_test, verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Test loss: -51.148848297866785
Test accuracy: 0.18930958211421967

ptoblèmes: npmbre important de stopwords à rajouter au dictionnaire, doutes sur la fonction dactivation, stemming/lemmatization qui semble peu efficace; axes d'amélioration: explorer les N grammes pouir contextualiser les mots et creer u_n dictionnaire de stopwords customisé pour les deabts ( association d'idées)/


Owner
Pamela Dekas
Adepte de text mining, deep learning and data visualization
Pamela Dekas
TEA: A Sequential Recommendation Framework via Temporally Evolving Aggregations

TEA: A Sequential Recommendation Framework via Temporally Evolving Aggregations Requirements python 3.6 torch 1.9 numpy 1.19 Quick Start The experimen

DMIRLAB 4 Oct 16, 2022
天勤量化开发包, 期货量化, 实时行情/历史数据/实盘交易

TqSdk 天勤量化交易策略程序开发包 TqSdk 是一个由信易科技发起并贡献主要代码的开源 python 库. 依托快期多年积累成熟的交易及行情服务器体系, TqSdk 支持用户使用极少的代码量构建各种类型的量化交易策略程序, 并提供包含期货、期权、股票的 历史数据-实时数据-开发调试-策略回测-

信易科技 2.8k Dec 30, 2022
Action Recognition for Self-Driving Cars

Action Recognition for Self-Driving Cars This repo contains the codes for the 2021 Fall semester project "Action Recognition for Self-Driving Cars" at

VITA lab at EPFL 3 Apr 07, 2022
Code for CVPR2021 "Visualizing Adapted Knowledge in Domain Transfer". Visualization for domain adaptation. #explainable-ai

Visualizing Adapted Knowledge in Domain Transfer @inproceedings{hou2021visualizing, title={Visualizing Adapted Knowledge in Domain Transfer}, auth

Yunzhong Hou 80 Dec 25, 2022
Official TensorFlow code for the forthcoming paper

~ Efficient-CapsNet ~ Are you tired of over inflated and overused convolutional neural networks? You're right! It's time for CAPSULES :)

Vittorio Mazzia 203 Jan 08, 2023
ML for NLP and Computer Vision.

Sparrow is our open-source ML product. It runs on Skipper MLOps infrastructure.

Katana ML 2 Nov 28, 2021
Code for ICCV 2021 paper "Distilling Holistic Knowledge with Graph Neural Networks"

HKD Code for ICCV 2021 paper "Distilling Holistic Knowledge with Graph Neural Networks" cifia-100 result The implementation of compared methods are ba

Wang Yucheng 30 Dec 18, 2022
[NeurIPS 2021] Towards Better Understanding of Training Certifiably Robust Models against Adversarial Examples | ⛰️⚠️

Towards Better Understanding of Training Certifiably Robust Models against Adversarial Examples This repository is the official implementation of "Tow

Sungyoon Lee 4 Jul 12, 2022
The implementation of PEMP in paper "Prior-Enhanced Few-Shot Segmentation with Meta-Prototypes"

Prior-Enhanced network with Meta-Prototypes (PEMP) This is the PyTorch implementation of PEMP. Overview of PEMP Meta-Prototypes & Adaptive Prototypes

Jianwei ZHANG 8 Oct 14, 2021
diablo2 resurrected loot filter

Only For Chinese and Traditional Chinese The filter only for Chinese and Traditional Chinese, i didn't change it for other language.Maybe you could mo

elmagnifico 249 Dec 04, 2022
A collection of pre-trained StyleGAN2 models trained on different datasets at different resolution.

Awesome Pretrained StyleGAN2 A collection of pre-trained StyleGAN2 models trained on different datasets at different resolution. Note the readme is a

Justin 1.1k Dec 24, 2022
Conflict-aware Inference of Python Compatible Runtime Environments with Domain Knowledge Graph, ICSE 2022

PyCRE Conflict-aware Inference of Python Compatible Runtime Environments with Domain Knowledge Graph, ICSE 2022 Dependencies This project is developed

<a href=[email protected]"> 7 May 06, 2022
Machine Learning Time-Series Platform

cesium: Open-Source Platform for Time Series Inference Summary cesium is an open source library that allows users to: extract features from raw time s

632 Dec 26, 2022
As a part of the HAKE project, includes the reproduced SOTA models and the corresponding HAKE-enhanced versions (CVPR2020).

HAKE-Action HAKE-Action (TensorFlow) is a project to open the SOTA action understanding studies based on our Human Activity Knowledge Engine. It inclu

Yong-Lu Li 94 Nov 18, 2022
This repository for project that can Automate Number Plate Recognition (ANPR) in Morocco Licensed Vehicles. 💻 + 🚙 + 🇲🇦 = 🤖 🕵🏻‍♂️

MoroccoAI Data Challenge (Edition #001) This Reposotory is result of our work in the comepetiton organized by MoroccoAI in the context of the first Mo

SAFOINE EL KHABICH 14 Oct 31, 2022
Training neural models with structured signals.

Neural Structured Learning in TensorFlow Neural Structured Learning (NSL) is a new learning paradigm to train neural networks by leveraging structured

955 Jan 02, 2023
CT-Net: Channel Tensorization Network for Video Classification

[ICLR2021] CT-Net: Channel Tensorization Network for Video Classification @inproceedings{ li2021ctnet, title={{\{}CT{\}}-Net: Channel Tensorization Ne

33 Nov 15, 2022
Text-to-SQL in the Wild: A Naturally-Occurring Dataset Based on Stack Exchange Data

SEDE SEDE (Stack Exchange Data Explorer) is new dataset for Text-to-SQL tasks with more than 12,000 SQL queries and their natural language description

Rupert. 83 Nov 11, 2022
Official implementation of "StyleCariGAN: Caricature Generation via StyleGAN Feature Map Modulation" (SIGGRAPH 2021)

StyleCariGAN: Caricature Generation via StyleGAN Feature Map Modulation This repository contains the official PyTorch implementation of the following

Wonjong Jang 270 Dec 30, 2022
[ICCV 2021] Target Adaptive Context Aggregation for Video Scene Graph Generation

Target Adaptive Context Aggregation for Video Scene Graph Generation This is a PyTorch implementation for Target Adaptive Context Aggregation for Vide

Multimedia Computing Group, Nanjing University 44 Dec 14, 2022