Deep learning model, heat map, data prepo

Overview

DEEP LEARNING ON USA DEMOCRATES DEBATE

By Pamela Dekas

import sys
import csv
import re 
import nltk
import string
import unicodedata
from textblob import TextBlob
from collections import Counter
import pandas as pd
import numpy as np
from wordcloud import WordCloud
from nltk.classify import * 
from nltk.corpus import stopwords
from sklearn.metrics import f1_score, roc_auc_score
from sklearn.feature_extraction.text import CountVectorizer
from nltk.tokenize import word_tokenize
import nltk.classify.util
import matplotlib.pyplot as plt
from string import punctuation 
from nltk.corpus import stopwords
from wordcloud import STOPWORDS
import os
from sklearn.model_selection import train_test_split
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence, text
from keras.callbacks import EarlyStopping
Using TensorFlow backend.



---------------------------------------------------------------------------

AttributeError                            Traceback (most recent call last)


   
     in 
    
     ()
     22 import os
     23 from sklearn.model_selection import train_test_split
---> 24 from keras.datasets import imdb
     25 from keras.models import Sequential
     26 from keras.layers import Dense


~\Anaconda3\lib\site-packages\keras\__init__.py in 
     
      ()
      1 from __future__ import absolute_import
      2 
----> 3 from . import utils
      4 from . import activations
      5 from . import applications


~\Anaconda3\lib\site-packages\keras\utils\__init__.py in 
      
       ()
      4 from . import data_utils
      5 from . import io_utils
----> 6 from . import conv_utils
      7 from . import losses_utils
      8 from . import metrics_utils


~\Anaconda3\lib\site-packages\keras\utils\conv_utils.py in 
       
        () 7 from six.moves import range 8 import numpy as np ----> 9 from .. import backend as K 10 11 ~\Anaconda3\lib\site-packages\keras\backend\__init__.py in 
        
         () ----> 1 from .load_backend import epsilon 2 from .load_backend import set_epsilon 3 from .load_backend import floatx 4 from .load_backend import set_floatx 5 from .load_backend import cast_to_floatx ~\Anaconda3\lib\site-packages\keras\backend\load_backend.py in 
         
          () 88 elif _BACKEND == 'tensorflow': 89 sys.stderr.write('Using TensorFlow backend.\n') ---> 90 from .tensorflow_backend import * 91 else: 92 # Try and load external backend. ~\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py in 
          
           () 52 53 # Private TF Keras utils ---> 54 get_graph = tf_keras_backend.get_graph 55 # learning_phase_scope = tf_keras_backend.learning_phase_scope # TODO 56 name_scope = tf.name_scope AttributeError: module 'tensorflow.python.keras.backend' has no attribute 'get_graph' 
          
         
        
       
      
     
    
   
speech = pd.read_csv('debate_transcripts_v3_2020-02-26.csv',encoding= 'unicode_escape')
df= pd.DataFrame(speech)
dem_speakers = df["speaker"]
number_of_speakers = len(set(dem_speakers))
print("Nombre de speakers:",number_of_speakers, "speakers")

# Mean duration of speech.
print("temps moyen de parole:",np.mean(df["speaking_time_seconds"]), "seconds")
print("Dataset size:", len(df))
Nombre de speakers: 106 speakers
temps moyen de parole: 16.49230769230769 seconds
Dataset size: 5911
df.info()

   
    
RangeIndex: 5911 entries, 0 to 5910
Data columns (total 6 columns):
date                     5911 non-null object
debate_name              5911 non-null object
debate_section           5911 non-null object
speaker                  5911 non-null object
speech                   5911 non-null object
speaking_time_seconds    5395 non-null float64
dtypes: float64(1), object(5)
memory usage: 277.2+ KB

   
df.groupby('speaker')['speaking_time_seconds'].sum(level=0).nlargest(10).plot.bar()
plt.title('Repartition par temps de parole')
plt.show()

png

debate_time = df.groupby(by=['speaker', 'date']).speaking_time_seconds.sum().nlargest(15)
debate_time.plot()

   

   

png

suppresion des colonnes qui ne seront pas utilisé dans la suite du projet et creation du dataset final###

df=df.drop(['date','debate_name','debate_section','speaking_time_seconds'],1)
df.head(5)
speaker speech
0 Norah O�Donnell Good evening and welcome, the Democratic presi...
1 Gayle King And Super Tuesday is just a week away and this...
2 Norah O�Donnell And CBS News is proud to bring you this debate...
3 Gayle King And we are partnering tonight also with Twitte...
4 Norah O�Donnell Now, here are the rules for the next two hours...

PREPROCESSING

import nltk 
nltk.download('punkt')
stopwords = nltk.corpus.stopwords.words('english')
Tailored_stopwords=('im','ive','mr','weve','dont','well','will','make','us','we',
                      'I','make','got','need','want','think',
                      'going','go','one','thank','going',
                      'way','say','every','re','us','first',
                     'now','said','know','look','done','take',
                     'number','two','three','s','m',"t",
                      'let','don','tell','ve','im','mr','put','maybe','whether','many', 'll','around','thing','Secondly','doesn','lot')
#stopwords = nltk.corpus.stopwords.words('english')
stopwords = set(STOPWORDS)
stopwords= stopwords.union(Tailored_stopwords)
[nltk_data] Downloading package punkt to C:\Users\pamel.DESKTOP-O19M7N
[nltk_data]     F\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
def Text_cleansing(speech):
    speech = re.sub('@[A-Za-z0–9]+', '', str(speech))
    speech = re.sub('#', '', speech) # Enlever les '#' hash tag
    speech = re.sub('rt', '', speech)
    speech=re.sub(',',' ', speech)
    speech=re.sub('!',' ',speech)
    speech=re.sub(':',' ',speech)
    speech=re.sub("'","",speech)
    speech=re.sub('"','',speech)
    speech=speech.lower()
    speech = word_tokenize(speech)
    return speech
def remove_stopwords(speech):
    speech_clean = [word for word in speech if word not in stopwords]
    return speech_clean
                         
df['speech_tokens']= df['speech'].apply(Text_cleansing)
df.head(5)
speaker speech speech_tokens
0 Norah O�Donnell Good evening and welcome, the Democratic presi... [good, evening, and, welcome, the, democratic,...
1 Gayle King And Super Tuesday is just a week away and this... [and, super, tuesday, is, just, a, week, away,...
2 Norah O�Donnell And CBS News is proud to bring you this debate... [and, cbs, news, is, proud, to, bring, you, th...
3 Gayle King And we are partnering tonight also with Twitte... [and, we, are, panering, tonight, also, with, ...
4 Norah O�Donnell Now, here are the rules for the next two hours... [now, here, are, the, rules, for, the, next, t...
df['speech_clean']=df['speech_tokens'].apply(remove_stopwords)
df.head(5)
speaker speech speech_tokens speech_clean
0 Norah O�Donnell Good evening and welcome, the Democratic presi... [good, evening, and, welcome, the, democratic,... [good, evening, welcome, democratic, president...
1 Gayle King And Super Tuesday is just a week away and this... [and, super, tuesday, is, just, a, week, away,... [super, tuesday, week, away, biggest, primary,...
2 Norah O�Donnell And CBS News is proud to bring you this debate... [and, cbs, news, is, proud, to, bring, you, th... [cbs, news, proud, bring, debate, along, co-sp...
3 Gayle King And we are partnering tonight also with Twitte... [and, we, are, panering, tonight, also, with, ... [panering, tonight, twitter, ., home, paicipat...
4 Norah O�Donnell Now, here are the rules for the next two hours... [now, here, are, the, rules, for, the, next, t... [rules, next, hours, ., asked, question, minut...
def wordcloud(dataframe):
    Aw= df['speech_clean']
    wordCloud = WordCloud(width=500, height=300,background_color='white', max_font_size=110).generate(str(Aw))
    plt.imshow(wordCloud, interpolation="bilinear")
    plt.axis("off")
    plt.title("speech wordcloud")

wordcloud(df['speech_clean'])

png

Pour la suite du projet on reduira la liste des speakers aux candidats les plus notoires (top 7 speakers)###

df = df.loc[df.speaker.isin({'Joe Biden', 'Bernie Sanders', 'Elizabeth Warren', 'Michael Bloomberg', 'Pete Buttigieg', 'Amy Klobuchar',  'Tulsi Gabbard'})]
df.head()
df.shape
(2245, 4)

CountVectorizer et creation du dict des mots par candidat a utiliser sur les modeles ML qui seront en back-up###

Analyse Lexicale

cv = CountVectorizer(stop_words=stopwords)
df_cv = cv.fit_transform(df.speech)
df_words = pd.DataFrame(df_cv.toarray(), columns=cv.get_feature_names())
df_words.index = df.speaker
df_words = df_words.transpose()
df_words
speaker Bernie Sanders Michael Bloomberg Michael Bloomberg Bernie Sanders Pete Buttigieg Elizabeth Warren Elizabeth Warren Pete Buttigieg Joe Biden Bernie Sanders ... Amy Klobuchar Elizabeth Warren Amy Klobuchar Tulsi Gabbard Tulsi Gabbard Amy Klobuchar Amy Klobuchar Amy Klobuchar Elizabeth Warren Elizabeth Warren
00 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
000 2 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
001st 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
01 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
02 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
03 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
04 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
05 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
06 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
07 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
08 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
09 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
10 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
100 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
10000 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
100s 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
10th 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
11 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
12 0 0 0 0 0 0 0 0 1 0 ... 0 0 0 0 0 0 0 0 0 0
120 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
125 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
12th 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
13 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
130 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
135 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
137 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
13th 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
14 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
140 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
149 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
xinjiang 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yachts 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yale 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yang 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yanked 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
ye 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yeah 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 1 0 0
year 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yearly 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
years 2 0 0 0 0 0 0 1 0 0 ... 0 0 0 0 0 0 0 0 1 0
yellow 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yemen 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yemin 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yep 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yes 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yesterday 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yet 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yo 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
york 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
yorker 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
young 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
younger 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
youngest 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
youth 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
youtube 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
zealand 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
zero 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
zeroed 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
zip 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
zone 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

6385 rows × 2245 columns

top_dict = {}
for c in df_words.columns:
    top = df_words[c].sort_values(ascending=False).head(30)
    top_dict[c]= list(zip(top.index, top.values))
for speaker, top_words in top_dict.items():
    print(speaker)
    print(', '.join([word for word, count in top_words[0:9]]))
    print('---')
---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)


   
     in 
    
     ()
      1 top_dict = {}
      2 for c in df_words.columns:
----> 3     top = df_words[c].sort_values(ascending=False).head(30)
      4     top_dict[c]= list(zip(top.index, top.values))
      5 for speaker, top_words in top_dict.items():


TypeError: sort_values() missing 1 required positional argument: 'by'

    
   
df2=pd.DataFrame(top_dict)
df2.head(15)
from collections import Counter
words = []
for speaker in df_words.columns:
    top = [word for (word, count) in top_dict[speaker]]
    for t in top:
        words.append(t)
Counter(words).most_common(15)
---------------------------------------------------------------------------

KeyError                                  Traceback (most recent call last)


   
     in 
    
     ()
      2 words = []
      3 for speaker in df_words.columns:
----> 4     top = [word for (word, count) in top_dict[speaker]]
      5     for t in top:
      6         words.append(t)


KeyError: 'Bernie Sanders'

    
   

Implemantation du modèle###

print(df.columns)
print(df.shape)
df['speaker'] = df['speaker'].astype(str)
Index(['speaker', 'speech', 'speech_tokens', 'speech_clean'], dtype='object')
(2245, 4)

Embedding

import gensim
RANDOM_STATE = 50
EPOCHS = 5
BATCH_SIZE = 256
EMB_DIM = 100
SAVE_MODEL = True

X = df['speech_clean']
print(X.head())
X.shape
5     [well, you�re, right, economy, really, great, ...
6                                            [senator-]
8     [think, donald, trump, thinks, would, better, ...
9     [oh, mr., bloomberg, ., let, tell, mr., putin,...
11     [know, president, russia, wants, it�s, chaos, .]
Name: speech_clean, dtype: object





(2245,)
emb_model = gensim.models.Word2Vec(sentences = X, size = EMB_DIM, window = 5, workers = 4, min_count = 1)
print('La taille du vocabulaire appris est de ',len(list(emb_model.wv.vocab)))
La taille du vocabulaire appris est de  7139
from keras.preprocessing.text import Tokenizer
import tokenize
max_length = max([len(s) for s in X])

tokenizer_new = Tokenizer()
tokenizer_new.fit_on_texts(X)

X_seq = tokenizer_new.texts_to_sequences(X)
X_fin = sequence.pad_sequences(X_seq, maxlen = max_length)
print(X_fin.shape)
(2245, 140)
emb_vec = emb_model.wv
MAX_NB_WORDS = len(list(emb_vec.vocab))
tokenizer_word_index = tokenizer_new.word_index
vocab_size = len(tokenizer_new.word_index) + 1
embedded_matrix = np.zeros((vocab_size, EMB_DIM))


for word, i in tokenizer_word_index.items():
    if i>= MAX_NB_WORDS:
        continue
    try:
        embedding_vector = emb_vec[word]
        wv_matrix[i] = embedding_vector
    except:
        pass      
embedded_matrix.shape
print(embedded_matrix)
[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]

Préparation des variables

from keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
y = df.speaker
print(y.head(10))
y.shape
5     1
6     4
8     4
9     1
11    5
12    2
13    2
15    5
21    3
23    1
Name: speaker, dtype: int32





(2245,)
Counter(y)
Counter({'Bernie Sanders': 430,
         'Michael Bloomberg': 97,
         'Pete Buttigieg': 392,
         'Elizabeth Warren': 440,
         'Joe Biden': 456,
         'Amy Klobuchar': 353,
         'Tulsi Gabbard': 77})
le=LabelEncoder()
df['speaker'] = le.fit_transform(df['speaker'])
df.head()

y = df.speaker
y.head()
print(y.shape)
print(X_fin.shape)
(2245,)
(2245, 140)
X_train, X_test, y_train, y_test = train_test_split(X_fin , y, test_size = 0.2, random_state = 42)


print(X_train.shape)
print(y_train.shape)
(1796, 140)
(1796,)

Construction des NN

model_pre_trained = Sequential()

model_pre_trained.add(Embedding(vocab_size, EMB_DIM, weights = [embedded_matrix], 
                    input_length = max_length, trainable = False))
model_pre_trained.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model_pre_trained.add(Dense(1, activation='softmax'))

model_pre_trained.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model_pre_trained.summary()
Model: "sequential_11"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_11 (Embedding)     (None, 140, 100)          714000    
_________________________________________________________________
lstm_13 (LSTM)               (None, 128)               117248    
_________________________________________________________________
dense_9 (Dense)              (None, 1)                 129       
=================================================================
Total params: 831,377
Trainable params: 117,377
Non-trainable params: 714,000
_________________________________________________________________

Fitting

history_pre_trained = model_pre_trained.fit(X_fin, y, batch_size = BATCH_SIZE, epochs =20, verbose =1, validation_split = 0.2)
Train on 1796 samples, validate on 449 samples
Epoch 1/20
1796/1796 [==============================] - 4s 2ms/step - loss: 0.5429 - accuracy: 0.1754 - val_loss: -0.4417 - val_accuracy: 0.2472
Epoch 2/20
1796/1796 [==============================] - 3s 2ms/step - loss: -6.7429 - accuracy: 0.1776 - val_loss: -14.1017 - val_accuracy: 0.2472
Epoch 3/20
1796/1796 [==============================] - 3s 2ms/step - loss: -15.8550 - accuracy: 0.1776 - val_loss: -19.5441 - val_accuracy: 0.2472
Epoch 4/20
1796/1796 [==============================] - 3s 2ms/step - loss: -20.7949 - accuracy: 0.1776 - val_loss: -23.4335 - val_accuracy: 0.2472
Epoch 5/20
1796/1796 [==============================] - 3s 2ms/step - loss: -24.1430 - accuracy: 0.1776 - val_loss: -25.9735 - val_accuracy: 0.2472
Epoch 6/20
1796/1796 [==============================] - 3s 2ms/step - loss: -26.4535 - accuracy: 0.1776 - val_loss: -28.0725 - val_accuracy: 0.2472
Epoch 7/20
1796/1796 [==============================] - 3s 2ms/step - loss: -28.4266 - accuracy: 0.1776 - val_loss: -29.9313 - val_accuracy: 0.2472
Epoch 8/20
1796/1796 [==============================] - 3s 2ms/step - loss: -30.1754 - accuracy: 0.1776 - val_loss: -31.6261 - val_accuracy: 0.2472
Epoch 9/20
1796/1796 [==============================] - 3s 2ms/step - loss: -31.8791 - accuracy: 0.1776 - val_loss: -33.3337 - val_accuracy: 0.2472
Epoch 10/20
1796/1796 [==============================] - 4s 2ms/step - loss: -33.5166 - accuracy: 0.1776 - val_loss: -34.9834 - val_accuracy: 0.2472
Epoch 11/20
1796/1796 [==============================] - 3s 2ms/step - loss: -35.1544 - accuracy: 0.1776 - val_loss: -36.5973 - val_accuracy: 0.2472
Epoch 12/20
1796/1796 [==============================] - 3s 2ms/step - loss: -36.7253 - accuracy: 0.1776 - val_loss: -38.2070 - val_accuracy: 0.2472
Epoch 13/20
1796/1796 [==============================] - 3s 2ms/step - loss: -38.3344 - accuracy: 0.1776 - val_loss: -39.8655 - val_accuracy: 0.2472
Epoch 14/20
1796/1796 [==============================] - 3s 2ms/step - loss: -39.9810 - accuracy: 0.1776 - val_loss: -41.5162 - val_accuracy: 0.2472
Epoch 15/20
1796/1796 [==============================] - 3s 1ms/step - loss: -41.6567 - accuracy: 0.1776 - val_loss: -43.2049 - val_accuracy: 0.2472
Epoch 16/20
1796/1796 [==============================] - 3s 1ms/step - loss: -43.2579 - accuracy: 0.1776 - val_loss: -44.8235 - val_accuracy: 0.2472
Epoch 17/20
1796/1796 [==============================] - 3s 1ms/step - loss: -44.9030 - accuracy: 0.1776 - val_loss: -46.4982 - val_accuracy: 0.2472
Epoch 18/20
1796/1796 [==============================] - 2s 1ms/step - loss: -46.5038 - accuracy: 0.1776 - val_loss: -48.0627 - val_accuracy: 0.2472
Epoch 19/20
1796/1796 [==============================] - 3s 1ms/step - loss: -48.0124 - accuracy: 0.1776 - val_loss: -49.5424 - val_accuracy: 0.2472
Epoch 20/20
1796/1796 [==============================] - 2s 1ms/step - loss: -49.5209 - accuracy: 0.1776 - val_loss: -51.1489 - val_accuracy: 0.2472

Evaluation du modèle

score = model_pre_trained.evaluate(X_test, y_test, verbose = 0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Test loss: -51.148848297866785
Test accuracy: 0.18930958211421967

ptoblèmes: npmbre important de stopwords à rajouter au dictionnaire, doutes sur la fonction dactivation, stemming/lemmatization qui semble peu efficace; axes d'amélioration: explorer les N grammes pouir contextualiser les mots et creer u_n dictionnaire de stopwords customisé pour les deabts ( association d'idées)/


Owner
Pamela Dekas
Adepte de text mining, deep learning and data visualization
Pamela Dekas
MAME is a multi-purpose emulation framework.

MAME's purpose is to preserve decades of software history. As electronic technology continues to rush forward, MAME prevents this important "vintage" software from being lost and forgotten.

Michael Murray 6 Oct 25, 2020
Predicting a person's gender based on their weight and height

Logistic Regression Advanced Case Study Gender Classification: Predicting a person's gender based on their weight and height 1. Introduction We turn o

1 Feb 01, 2022
Image Fusion Transformer

Image-Fusion-Transformer Platform Python 3.7 Pytorch =1.0 Training Dataset MS-COCO 2014 (T.-Y. Lin, M. Maire, S. Belongie, J. Hays, P. Perona, D. Ram

Vibashan VS 68 Dec 23, 2022
Negative Sample is Negative in Its Own Way: Tailoring Negative Sentences forImage-Text Retrieval

NSGDC Some codes in this repo are copied/modified from opensource implementations made available by UNITER, PyTorch, HuggingFace, OpenNMT, and Nvidia.

Zhihao Fan 2 Nov 07, 2022
AgeGuesser: deep learning based age estimation system. Powered by EfficientNet and Yolov5

AgeGuesser AgeGuesser is an end-to-end, deep-learning based Age Estimation system, presented at the CAIP 2021 conference. You can find the related pap

5 Nov 10, 2022
The PyTorch implementation of Directed Graph Contrastive Learning (DiGCL), NeurIPS-2021

Directed Graph Contrastive Learning The PyTorch implementation of Directed Graph Contrastive Learning (DiGCL). In this paper, we present the first con

Tong Zekun 28 Jan 08, 2023
(CVPR 2022) Pytorch implementation of "Self-supervised transformers for unsupervised object discovery using normalized cut"

(CVPR 2022) TokenCut Pytorch implementation of Tokencut: Self-supervised Transformers for Unsupervised Object Discovery using Normalized Cut Yangtao W

YANGTAO WANG 200 Jan 02, 2023
Implementation of "The Power of Scale for Parameter-Efficient Prompt Tuning"

Prompt-Tuning Implementation of "The Power of Scale for Parameter-Efficient Prompt Tuning" Currently, we support the following huggigface models: Bart

Andrew Zeng 36 Dec 19, 2022
Bridging the Gap between Label- and Reference based Synthesis(ICCV 2021)

Bridging the Gap between Label- and Reference based Synthesis(ICCV 2021) Tensorflow implementation of Bridging the Gap between Label- and Reference-ba

huangqiusheng 8 Jul 13, 2022
Minimal PyTorch implementation of YOLOv3

A minimal PyTorch implementation of YOLOv3, with support for training, inference and evaluation.

Erik Linder-Norén 6.9k Dec 29, 2022
A lightweight library designed to accelerate the process of training PyTorch models by providing a minimal

A lightweight library designed to accelerate the process of training PyTorch models by providing a minimal, but extensible training loop which is flexible enough to handle the majority of use cases,

Chris Hughes 110 Dec 23, 2022
Python Single Object Tracking Evaluation

pysot-toolkit The purpose of this repo is to provide evaluation API of Current Single Object Tracking Dataset, including VOT2016 VOT2018 VOT2018-LT OT

348 Dec 22, 2022
Semantic Segmentation for Aerial Imagery using Convolutional Neural Network

This repo has been deprecated because whole things are re-implemented by using Chainer and I did refactoring for many codes. So please check this newe

Shunta Saito 27 Sep 23, 2022
A collection of SOTA Image Classification Models in PyTorch

A collection of SOTA Image Classification Models in PyTorch

sithu3 85 Dec 30, 2022
CALVIN - A benchmark for Language-Conditioned Policy Learning for Long-Horizon Robot Manipulation Tasks

CALVIN CALVIN - A benchmark for Language-Conditioned Policy Learning for Long-Horizon Robot Manipulation Tasks Oier Mees, Lukas Hermann, Erick Rosete,

Oier Mees 107 Dec 26, 2022
This is a TensorFlow implementation for C2-Rec

This is a TensorFlow implementation for C2-Rec We refer to the repo SASRec. Requirements requirement.txt Datasets This repo includes Amazon Beauty dat

7 Nov 14, 2022
FMA: A Dataset For Music Analysis

FMA: A Dataset For Music Analysis Michaël Defferrard, Kirell Benzi, Pierre Vandergheynst, Xavier Bresson. International Society for Music Information

Michaël Defferrard 1.8k Dec 29, 2022
PyTorch reimplementation of the paper Involution: Inverting the Inherence of Convolution for Visual Recognition [CVPR 2021].

Involution: Inverting the Inherence of Convolution for Visual Recognition Unofficial PyTorch reimplementation of the paper Involution: Inverting the I

Christoph Reich 100 Dec 01, 2022
Patches desktop steam to look like the new steamdeck ui.

steam_deck_ui_patch The Deck UI patch will patch the regular desktop steam to look like the brand new SteamDeck UI. This patch tool currently works on

The_IT_Dude 3 Aug 29, 2022
Code for "Learning the Best Pooling Strategy for Visual Semantic Embedding", CVPR 2021

Learning the Best Pooling Strategy for Visual Semantic Embedding Official PyTorch implementation of the paper Learning the Best Pooling Strategy for V

Jiacheng Chen 106 Jan 06, 2023