Momofil31 · Aug 21, 2022
diff --git a/‎baseline.py
+32-31 b/‎baseline.py
+32-31
diff --git a/‎data_processing.py
+26 b/‎data_processing.py
+26
diff --git a/‎dataset_stats.py
+75 b/‎dataset_stats.py
+75
diff --git a/‎experiment.py
+109-101 b/‎experiment.py
+109-101
diff --git a/‎main.py
+29-2 b/‎main.py
+29-2
diff --git a/‎models/models.py ‎models.py
+29-6 b/‎models/models.py ‎models.py
+29-6
diff --git a/‎settings.py
+56-10 b/‎settings.py
+56-10
diff --git a/‎stats/baseline_polarity-filter_stats.csv
+8 b/‎stats/baseline_polarity-filter_stats.csv
+8
diff --git a/‎stats/baseline_polarity_stats.csv
+8 b/‎stats/baseline_polarity_stats.csv
+8
diff --git a/‎stats/baseline_subjectivity_stats.csv
+8 b/‎stats/baseline_subjectivity_stats.csv
+8
diff --git a/‎stats/datasets.csv
+5 b/‎stats/datasets.csv
+5
diff --git a/‎transformer/__init__.py b/‎transformer/__init__.py
diff --git a/‎transformer/data_processing.py
-32 b/‎transformer/data_processing.py
-32
diff --git a/‎transformer/models.py
-15 b/‎transformer/models.py
-15
diff --git a/‎models/__init__.py ‎weights/.gitkeep b/‎models/__init__.py ‎weights/.gitkeep
@@ -13,6 +13,7 @@
 import pandas as pd
 import itertools
 
+
 class BaselineExperiment:
 
     def __init__(self, task="polarity", sjv_classifier=None, sjv_vectorizer=None):
@@ -22,6 +23,22 @@ def __init__(self, task="polarity", sjv_classifier=None, sjv_vectorizer=None):
         self.sjv_classifier = sjv_classifier
         self.sjv_vectorizer = sjv_vectorizer
 
+    @staticmethod
+    def removeObjectiveSents(docs_sents, mask):
+        i = 0
+        remaining_sents = 0
+        clean_docs = []
+        for doc in docs_sents:
+            clean_docs.append([])
+            for sent in doc:
+                if mask[i] == 1:
+                    clean_docs[-1] += sent
+                    remaining_sents += 1
+                i += 1
+        clean_docs = [" ".join(sents) for sents in clean_docs]
+        print(f"Remaining {remaining_sents} sentences from original {i} sentences count.")
+        return clean_docs
+
     def prepare_data(self):
         print("Loading data")
         if self.task == "polarity":
@@ -33,33 +50,19 @@ def prepare_data(self):
             self.data_Y = [0] * len(neg_raw) + [1] * len(pos_raw)
 
         elif self.task == "subjectivity":
-            obj_fileid = subjectivity.fileids()[0]  # plot.tok.gt9.5000 
-            subj_fileid = subjectivity.fileids()[1] # quote.tok.gt9.5000
+            obj_fileid = subjectivity.fileids()[0]  # plot.tok.gt9.5000
+            subj_fileid = subjectivity.fileids()[1]  # quote.tok.gt9.5000
 
             # this to avoid splitting words into lists
             obj_raw = subjectivity.raw(fileids=obj_fileid).split('\n')[:5000]
             subj_raw = subjectivity.raw(fileids=subj_fileid).split('\n')[:5000]
             self.data_raw = obj_raw + subj_raw
             self.data_Y = [0] * len(obj_raw) + [1] * len(subj_raw)
-        elif (self.task == "polarity-no-obj-sents"
-            and self.sjv_classifier is not None
-            and self.sjv_vectorizer is not None
-            ):
-            def removeObjectiveSents(docs_sents, mask):
-                i = 0
-                remaining_sents = 0
-                clean_docs = []
-                for doc in docs_sents:
-                    clean_docs.append([])
-                    for sent in doc:
-                        if mask[i] == 1:
-                            clean_docs[-1] += sent
-                            remaining_sents += 1
-                        i += 1
-                clean_docs = [" ".join(sents) for sents in clean_docs]
-                print(f"Remaining {remaining_sents} sentences from original {i} sentences count.")
-                return clean_docs
-                
+        elif (self.task == "polarity-filter"
+              and self.sjv_classifier is not None
+              and self.sjv_vectorizer is not None
+              ):
+
             # get docs divided in sentences
             negative_fileids = movie_reviews.fileids('neg')
             positive_fileids = movie_reviews.fileids('pos')
@@ -70,8 +73,8 @@ def removeObjectiveSents(docs_sents, mask):
 
             mr_sjv_vectors = self.sjv_vectorizer.transform(mr_sents)
             pred = self.sjv_classifier.predict(mr_sjv_vectors)
-            
-            self.data_raw = removeObjectiveSents(mr_corpus, pred)
+
+            self.data_raw = BaselineExperiment.removeObjectiveSents(mr_corpus, pred)
             self.data_Y = [0] * len(negative_fileids) + [1] * len(positive_fileids)
         else:
             print("Cannot prepare data. Wrong parameters.")
@@ -81,15 +84,13 @@ def run(self):
         print(f"Running experiment {self.task} classification.")
         self.prepare_data()
         vectorizer = CountVectorizer()
-        classifier = MultinomialNB() 
+        classifier = MultinomialNB()
         vectors = vectorizer.fit_transform(self.data_raw)
-        scores = cross_validate(classifier, vectors, self.data_Y, cv=StratifiedKFold(n_splits=N_FOLDS_BASELINE) , scoring=['accuracy', 'f1'], return_estimator=True)
+        scores = cross_validate(classifier, vectors, self.data_Y, cv=StratifiedKFold(n_splits=N_FOLDS_BASELINE), scoring=['accuracy', 'f1'], return_estimator=True)
         best_model = scores["estimator"][np.argmax(scores["test_accuracy"])]
 
         metrics_df = pd.DataFrame.from_dict(scores)
         metrics_df.drop("estimator", axis='columns', inplace=True)
-        metrics_df.loc["max"] = metrics_df[:N_FOLDS_BASELINE].max()
-        metrics_df.loc["min"] = metrics_df[:N_FOLDS_BASELINE].min()
         metrics_df.loc["mean"] = metrics_df[:N_FOLDS_BASELINE].mean()
         metrics_df.loc["std"] = metrics_df[:N_FOLDS_BASELINE].std()
         print(metrics_df)
@@ -101,12 +102,12 @@ def run(self):
 if __name__ == "__main__":
     # Run polarity on whole movie review dataset
     exp_polarity = BaselineExperiment(task="polarity")
-    exp_polarity.run()       
+    exp_polarity.run()
 
     # Run subjectivity
     exp_subjectivity = BaselineExperiment(task="subjectivity")
-    sjv_classifier, sjv_vectorizer = exp_subjectivity.run()  
+    sjv_classifier, sjv_vectorizer = exp_subjectivity.run()
 
     # Run polarity on movie review dataset removing objective sentences
-    exp = BaselineExperiment(task="polarity-no-obj-sents", sjv_classifier=sjv_classifier, sjv_vectorizer=sjv_vectorizer)
-    exp.run()
+    exp = BaselineExperiment(task="polarity-filter", sjv_classifier=sjv_classifier, sjv_vectorizer=sjv_vectorizer)
+    exp.run()
@@ -2,6 +2,7 @@
 from collections import Counter
 from torch.utils.data import Dataset
 import torch
+from transformers import AutoTokenizer
 
 
 class Lang():
@@ -81,3 +82,28 @@ def merge(sequences):
         label = torch.LongTensor(new_item["label"])
         text_lens = torch.LongTensor(lenghts)
         return ({"document": src_docs, "text_lens": text_lens}, label)
+
+
+class TransformerDataset(Dataset):
+
+    def __init__(self, documents, labels, config, task):
+        self.tokenizer = AutoTokenizer.from_pretrained(config["pretrained_model"])
+        self.documents = documents
+        self.labels = labels
+
+        self.docs_tensor = self.tokenizer(self.documents,
+                                          padding='max_length',
+                                          max_length=config["sequence_max_len"][task],
+                                          truncation=True,
+                                          return_tensors="pt")
+
+    def __len__(self):
+        return len(self.documents)
+
+    def __getitem__(self, idx):
+        label = torch.tensor(self.labels[idx])
+        sample = {'input_ids': self.docs_tensor["input_ids"][idx],
+                  'attention_mask': self.docs_tensor["attention_mask"][idx]}
+        if "token_type_ids" in self.docs_tensor.keys():
+            sample["token_type_ids"] = self.docs_tensor["token_type_ids"][idx]
+        return sample, label
@@ -0,0 +1,75 @@
+import nltk
+from nltk.corpus import movie_reviews, subjectivity, stopwords
+from experiment import Experiment
+from settings import STATS_SAVE_PATH
+from baseline import BaselineExperiment
+
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+import string
+
+def compute_stats(data, name):
+    stats = {}
+    seq_lens = [len(sents) for sents in data]
+    stats["num_sequences"] = len(data)
+    stats["num_words"] = sum([len(sent) for sents in data for sent in sents])
+    stats["avg_seq_len"] = np.average(seq_lens)
+    stats["max_seq_len"] = np.max(seq_lens)
+    stats["min_seq_len"] = np.min(seq_lens)
+
+    NLTK_STOP_WORDS = set(stopwords.words('english')+list(string.punctuation))
+    lexicon = set([w for doc in data for w in doc])
+
+    filtered_mr_words = [word for word in lexicon if not word in NLTK_STOP_WORDS]
+    lexicon_filtered = set(filtered_mr_words)
+
+    stats["lexicon_size"] = len(lexicon)
+    stats["lexicon_size_no_stopwords"] = len(lexicon_filtered)
+    return stats
+    
+
+if __name__ == "__main__":
+    stats = {}
+
+    # Movie review dataset
+    negative_fileids = movie_reviews.fileids('neg')
+    positive_fileids = movie_reviews.fileids('pos')
+
+    # each is a list of documents
+    mr_neg_words = [movie_reviews.words(fileids=fileid) for fileid in negative_fileids]
+    mr_pos_words = [movie_reviews.words(fileids=fileid) for fileid in positive_fileids]
+    mr_neg_sents = [movie_reviews.sents(fileids=fileid) for fileid in negative_fileids]
+    mr_pos_sents = [movie_reviews.sents(fileids=fileid) for fileid in positive_fileids]
+
+    mr_sents =  mr_neg_sents + mr_pos_sents
+    mr_words = mr_neg_words + mr_pos_words
+
+    stats["MR"] = compute_stats(mr_words, "MR")
+
+    # Treating MR as subjectivity dataset (list of sentences)
+    mr_sjv = [sent for doc in mr_sents for sent in doc]
+    stats["MR_sjv"] = compute_stats(mr_sjv, "MR_SJV")
+
+    # Subjectivity dataset
+    obj_fileid = subjectivity.fileids()[0]  # plot.tok.gt9.5000 
+    subj_fileid = subjectivity.fileids()[1] # quote.tok.gt9.5000
+    obj_words = subjectivity.sents(fileids=obj_fileid)
+    subj_words = subjectivity.sents(fileids=subj_fileid)
+    sjv_words = obj_words + subj_words
+    stats["SJV"] = compute_stats(sjv_words, "SJV")
+    
+    # Clean MR
+    # Train baseline subjectivity classifier
+    exp_subjectivity = BaselineExperiment(task="subjectivity")
+    sjv_classifier, sjv_vectorizer = exp_subjectivity.run()
+    mr_vectors = sjv_vectorizer.transform([" ".join(sent) for sent in mr_sjv])
+    preds = sjv_classifier.predict(mr_vectors)
+
+    # Remove objective sentences
+    mr_sents_filtered = Experiment.removeObjectiveSents(mr_sents, preds)
+    stats["MR_clean_baseline"] = compute_stats(mr_sents_filtered, "MR_clean_baseline")
+
+    stats_df = pd.DataFrame.from_dict(stats, orient="index")
+    stats_df.to_csv(f"{STATS_SAVE_PATH}/datasets.csv")
+    print(stats_df)
@@ -7,12 +7,10 @@
 import pandas as pd
 from baseline import BaselineExperiment
 
-from models.models import SentimentGRU, SentimentCNN
-from transformer.data_processing import TransformerDataset
-from transformer.models import TransformerClassifier
+from models import BiGRU, TextCNN, TransformerClassifier
 from utils import init_weights
 from settings import *
-from data_processing import Lang, CustomDataset
+from data_processing import Lang, CustomDataset, TransformerDataset
 
 from nltk.corpus import movie_reviews, subjectivity
 from sklearn.model_selection import train_test_split
@@ -34,10 +32,12 @@ def __init__(self, model_name, task="polarity", sjv_classifier=None, sjv_vectori
         self.sjv_classifier = sjv_classifier
         self.sjv_vectorizer = sjv_vectorizer
 
-        if model_name == "SentimentGRU":
-            self.model_config = SentimentGRU_config
-        if model_name == "SentimentCNN":
-            self.model_config = SentimentCNN_config
+        if model_name == "BiGRU":
+            self.model_config = BiGRU_config
+        if model_name == "BiGRUAttention":
+            self.model_config = BiGRUAttention_config
+        if model_name == "TextCNN":
+            self.model_config = TextCNN_config
 
     def prepare_data(self):
         if self.task == "polarity":
@@ -69,20 +69,10 @@ def prepare_data(self):
             self.data_Y += [1]*len(subj_sents)
             print("Total samples: ", len(self.data_raw))
 
-        elif (self.task == "polarity-no-obj-sents"
+        elif (self.task == "polarity-filter"
               and self.sjv_classifier is not None
               and self.sjv_vectorizer is not None
               ):
-            def removeObjectiveSents(docs_sents, mask):
-                i = 0
-                clean_docs = []
-                for doc in docs_sents:
-                    clean_docs.append([])
-                    for sent in doc:
-                        if mask[i] == 1:
-                            clean_docs[-1] += sent
-                        i += 1
-                return clean_docs
 
             # get docs divided in sentences
             negative_fileids = movie_reviews.fileids('neg')
@@ -95,7 +85,7 @@ def removeObjectiveSents(docs_sents, mask):
             # shallow subjectivity classifier is used to allow comparisons
             movie_sjv_vectors = self.sjv_vectorizer.transform(mr_sents)
             pred = self.sjv_classifier.predict(movie_sjv_vectors)
-            clean_mr = removeObjectiveSents(mr_docs_sents, pred)
+            clean_mr = Experiment.removeObjectiveSents(mr_docs_sents, pred)
 
             mr_neg = [{"document": doc, "label": 0} for doc in clean_mr[:1000]]
             mr_Y_neg = [0]*len(mr_neg)
@@ -111,6 +101,21 @@ def removeObjectiveSents(docs_sents, mask):
             print("Cannot prepare data. Wrong parameters.")
             exit()
 
+    @staticmethod
+    def removeObjectiveSents(docs_sents, mask):
+        i = 0
+        remaining_sents = 0
+        clean_docs = []
+        for doc in docs_sents:
+            clean_docs.append([])
+            for sent in doc:
+                if mask[i] == 1:
+                    clean_docs[-1] += sent
+                    remaining_sents += 1
+                i += 1
+        print(f"Remaining {remaining_sents} sentences from original {i} sentences count.")
+        return clean_docs
+
     def create_fold(self):
         train, test, _, _ = train_test_split(self.data_raw, self.data_Y, test_size=TRAIN_TEST_SPLIT,
                                              random_state=RANDOM_SEED,
@@ -122,46 +127,49 @@ def create_fold(self):
         train_dataset = CustomDataset(train, self.lang)
         test_dataset = CustomDataset(test, self.lang)
 
-        self.train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, collate_fn=train_dataset.collate_fn,  shuffle=True)
-        self.test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, collate_fn=test_dataset.collate_fn, drop_last=True)
+        self.train_loader = DataLoader(train_dataset, batch_size=self.model_config["batch_size"], collate_fn=train_dataset.collate_fn,  shuffle=True)
+        self.test_loader = DataLoader(test_dataset, batch_size=self.model_config["batch_size"], collate_fn=test_dataset.collate_fn)
 
     def run(self):
         self.prepare_data()
         models = []
         metrics_list = []
-        for i_fold in range(N_FOLDS):
+        for fold_idx in range(N_FOLDS):
             self.create_fold()
 
-            if self.model_name == "SentimentGRU":
+            if self.model_name == "BiGRU":
+                vocab_size = len(self.lang.word2id)
+                model = BiGRU(vocab_size, self.model_config)
+            if self.model_name == "BiGRUAttention":
                 vocab_size = len(self.lang.word2id)
-                model = SentimentGRU(vocab_size, self.model_config)
-            elif self.model_name == "SentimentCNN":
+                model = BiGRU(vocab_size, self.model_config)
+            elif self.model_name == "TextCNN":
                 vocab_size = len(self.lang.word2id)
-                model = SentimentCNN(vocab_size, self.model_config)
+                model = TextCNN(vocab_size, self.model_config)
             elif self.model_name == "Transformer":
                 model = TransformerClassifier(self.model_config)
             else:
                 print("Model name does not exist")
                 return
+
+            print(model)
             model.to(DEVICE)
 
             run = wandb.init(
                 project="NLU_SA",
                 entity="filippomomesso",
                 group=f"{self.model_name}",
-                name=f"fold_{i_fold:02d}",
+                name=f"{self.task}_{self.model_name}_fold_{fold_idx:02d}",
                 config={
-                    "model": self.model_name,
-                    "epochs": EPOCHS,
-                    "batch_size": BATCH_SIZE,
-                    "lr": LR,
+                    "task": self.task,
+                    **self.model_config,
                     "loss": "BCELoss",
                     "optimizer": "Adam"
                 }
             )
-            #wandb.watch(model, "gradients", log_freq=5)
+            wandb.watch(model, "gradients", log_freq=5)
             self.optimizer = optim.Adam(model.parameters(), lr=run.config['lr'])
-            self.cost_fn = torch.nn.BCEWithLogitsLoss()  # Because we do not have the pad token
+            self.cost_fn = torch.nn.BCEWithLogitsLoss()
 
             best_model, metrics = self.training_loop(model, self.train_loader, self.test_loader, run)
             models.append(best_model)
@@ -171,15 +179,68 @@ def run(self):
         metrics_df = pd.DataFrame.from_dict(metrics_list)
         metrics_df.loc["mean"] = metrics_df[:N_FOLDS].mean()
         metrics_df.loc["std"] = metrics_df[:N_FOLDS].std()
-        metrics_df.loc["max"] = metrics_df[:N_FOLDS].max()
-        metrics_df.loc["min"] = metrics_df[:N_FOLDS].min()
         print(metrics_df)
-        metrics_df.to_csv(f"{self.model_name}_stats.csv")
+        metrics_df.to_csv(f"{STATS_SAVE_PATH}/{self.model_name}_{self.task}.csv")
 
         best_model_overall_idx = metrics_df["acc"].idxmax()
         return models[best_model_overall_idx]
 
-    def training_step(self, model, data_loader, optimizer, cost_function, clip=CLIP_GRADIENTS, epoch=0):
+    def training_loop(self, model, tr_dl, ts_dl, wandb_run, save=True):
+        print(f"Runnig: {wandb_run.name}")
+
+        # Check if model is pretrained to avoid initializing weights
+        if not wandb_run.config.get("pretrained"):
+            print("Model is not pretrained: initializing weigths.")
+            model.apply(init_weights)
+
+        optimizer = self.optimizer
+        cost_fn = self.cost_fn
+
+        best_loss = 0.
+        best_acc = 0.
+
+        print("Start training")
+        for e in tqdm(range(wandb_run.config['epochs']), desc="Training Loop"):
+            train_metrics = self.training_step(model, tr_dl, optimizer, cost_fn, clip=wandb_run.config["clip_gradients"], epoch=e)
+            test_metrics = self.test_step(model, ts_dl, cost_fn, epoch=e)
+
+            metrics = {**train_metrics, **test_metrics}
+            wandb.log(metrics)
+
+            train_loss = train_metrics['train/train_loss']
+            train_acc = train_metrics['train/train_acc']
+
+            test_loss = test_metrics['test/test_loss']
+            test_acc = test_metrics['test/test_acc']
+            test_f1 = test_metrics['test/test_f1']
+
+            if best_acc < test_acc or e == 0:
+                best_acc = test_acc
+                best_loss = test_loss
+                best_f1 = test_f1
+                best_model = copy.deepcopy(model)
+                # Save new best weights
+                if save:
+                    self.save_weights(e, model, optimizer, test_loss, f"{WEIGHTS_SAVE_PATH}/{wandb_run.name}.pth")
+                    artifact = wandb.Artifact(f'{wandb_run.name}', type='model', metadata={**wandb_run.config, **metrics})
+                    artifact.add_file(f"{WEIGHTS_SAVE_PATH}/{wandb_run.name}.pth")
+                    wandb_run.log_artifact(artifact)
+
+            print('\n Epoch: {:d}'.format(e + 1))
+            print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_acc))
+            print('\t Test loss {:.5f}, Test accuracy {:.2f}, Test F1 {:.2f}'.format(test_loss, test_acc, test_f1))
+            print('-----------------------------------------------------')
+
+        #visualize(best_model, ts_dl, wandb_run)
+        print('\t BEST Test loss {:.5f}, Test accuracy {:.2f}, Test F1 {:.2f}'.format(best_loss, best_acc, best_f1))
+        wandb.summary["test_best_loss"] = best_loss
+        wandb.summary["test_best_accuracy"] = best_acc
+        wandb.summary["test_best_f1"] = best_f1
+        wandb.finish()
+        best_metrics = {"loss": best_loss, "acc": best_acc, "f1": best_f1}
+        return best_model, best_metrics
+
+    def training_step(self, model, data_loader, optimizer, cost_function, clip=0, epoch=0):
         n_samples = 0
         cumulative_loss = 0.
         cumulative_accuracy = 0.
@@ -188,7 +249,7 @@ def training_step(self, model, data_loader, optimizer, cost_function, clip=CLIP_
 
         for batch_idx, (inputs, targets) in enumerate(tqdm(data_loader, desc="Training Step", leave=False)):
             for k in inputs.keys():
-                inputs[k] = inputs[k].to(DEVICE) 
+                inputs[k] = inputs[k].to(DEVICE)
             targets = targets.to(DEVICE)
             outputs = model(inputs)
 
@@ -232,7 +293,7 @@ def test_step(self, model, data_loader, cost_function, epoch=0):
         with torch.no_grad():
             for batch_idx, (inputs, targets) in enumerate(tqdm(data_loader, desc="Test Step", leave=False)):
                 for k in inputs.keys():
-                    inputs[k] = inputs[k].to(DEVICE) 
+                    inputs[k] = inputs[k].to(DEVICE)
                     targets = targets.to(DEVICE)
                 outputs = model(inputs)
                 loss = cost_function(outputs, targets.unsqueeze(-1).float())
@@ -282,58 +343,6 @@ def load_weights(self, model, optimizer, weights_path, DEVICE, scheduler=None):
 
         return epoch, model, optimizer, scheduler
 
-    def training_loop(self, model, tr_dl, ts_dl, wandb_run, save=False):
-        print(wandb_run.name)
-        model.apply(init_weights)
-        experiment = wandb_run.name
-
-        optimizer = self.optimizer
-        cost_fn = self.cost_fn
-
-        best_loss = 0.
-        best_acc = 0.
-
-        print("Start training")
-        for e in tqdm(range(wandb_run.config['epochs']), desc="Training Loop"):
-            train_metrics = self.training_step(model, tr_dl, optimizer, cost_fn, epoch=e)
-            test_metrics = self.test_step(model, ts_dl, cost_fn, epoch=e)
-
-            metrics = {**train_metrics, **test_metrics}
-            wandb.log(metrics)
-
-            train_loss = train_metrics['train/train_loss']
-            train_acc = train_metrics['train/train_acc']
-
-            test_loss = test_metrics['test/test_loss']
-            test_acc = test_metrics['test/test_acc']
-            test_f1 = test_metrics['test/test_f1']
-
-            if best_acc < test_acc or e == 0:
-                best_acc = test_acc
-                best_loss = test_loss
-                best_f1 = test_f1
-                best_model = copy.deepcopy(model)
-                # Save new best weights
-                if save:
-                    self.save_weights(e, model, optimizer, test_loss, f"./weights/{wandb_run.group}_{wandb_run.name}")
-                    artifact = wandb.Artifact(f'ResNet18CAN_{experiment}', type='model', metadata={**wandb_run.config, **metrics})
-                    artifact.add_file(f"./weights/{wandb_run.group}_{wandb_run.name}")
-                    wandb_run.log_artifact(artifact)
-
-            print('\n Epoch: {:d}'.format(e + 1))
-            print('\t Training loss {:.5f}, Training accuracy {:.2f}'.format(train_loss, train_acc))
-            print('\t Test loss {:.5f}, Test accuracy {:.2f}, Test F1 {:.2f}'.format(test_loss, test_acc, test_f1))
-            print('-----------------------------------------------------')
-
-        #visualize(best_model, ts_dl, wandb_run)
-        print('\t BEST Test loss {:.5f}, Test accuracy {:.2f}, Test F1 {:.2f}'.format(best_loss, best_acc, best_f1))
-        wandb.summary["test_best_loss"] = best_loss
-        wandb.summary["test_best_accuracy"] = best_acc
-        wandb.summary["test_best_f1"] = best_f1
-        wandb.finish()
-        best_metrics = {"loss": best_loss, "acc": best_acc, "f1": best_f1}
-        return best_model, best_metrics
-
 
 class TransformerExperiment(Experiment):
     def __init__(self, model_name, task="polarity", sjv_classifier=None, sjv_vectorizer=None):
@@ -343,15 +352,14 @@ def __init__(self, model_name, task="polarity", sjv_classifier=None, sjv_vectori
 
     def create_fold(self):
         train, test, train_y, test_y = train_test_split(self.data_raw, self.data_Y, test_size=TRAIN_TEST_SPLIT,
-                                             random_state=RANDOM_SEED,
-                                             shuffle=True,
-                                             stratify=self.data_Y)
-
-        train_dataset = TransformerDataset(train, train_y)
-        test_dataset = TransformerDataset(test, test_y)
+                                                        random_state=RANDOM_SEED,
+                                                        shuffle=True,
+                                                        stratify=self.data_Y)
+        train_dataset = TransformerDataset(train, train_y, self.model_config, self.task)
+        test_dataset = TransformerDataset(test, test_y, self.model_config, self.task)
 
-        self.train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
-        self.test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
+        self.train_loader = DataLoader(train_dataset, batch_size=self.model_config["batch_size"],  shuffle=True)
+        self.test_loader = DataLoader(test_dataset, batch_size=self.model_config["batch_size"])
 
     def prepare_data(self):
-        BaselineExperiment.prepare_data(self)
+        BaselineExperiment.prepare_data(self)
@@ -3,11 +3,38 @@
 
 if __name__ == "__main__":
     filter = False
+    # if filter:
+    #     # Run subjectivity
+    #     exp_subjectivity = BaselineExperiment(task="subjectivity")
+    #     sjv_classifier, sjv_vectorizer = exp_subjectivity.run()  
+    #     exp = TransformerExperiment("Transformer", "polarity-filter", sjv_classifier, sjv_vectorizer)
+    # else:
+    #     exp = TransformerExperiment("Transformer", "subjectivity")
+    # best_model = exp.run()
+
+    # if filter:
+    #     # Run subjectivity
+    #     exp_subjectivity = BaselineExperiment(task="subjectivity")
+    #     sjv_classifier, sjv_vectorizer = exp_subjectivity.run()  
+    #     exp = Experiment("BiGRU", "polarity-filter", sjv_classifier, sjv_vectorizer)
+    # else:
+    #     exp = Experiment("BiGRU", "polarity")
+    # best_model = exp.run()
+
+    # if filter:
+    #     # Run subjectivity
+    #     exp_subjectivity = BaselineExperiment(task="subjectivity")
+    #     sjv_classifier, sjv_vectorizer = exp_subjectivity.run()  
+    #     exp = Experiment("BiGRUAttention", "polarity-filter", sjv_classifier, sjv_vectorizer)
+    # else:
+    #     exp = Experiment("BiGRUAttention", "subjectivity")
+    # best_model = exp.run()
+
     if filter:
         # Run subjectivity
         exp_subjectivity = BaselineExperiment(task="subjectivity")
         sjv_classifier, sjv_vectorizer = exp_subjectivity.run()  
-        exp = TransformerExperiment("Transformer", "polarity-no-obj-sents", sjv_classifier, sjv_vectorizer)
+        exp = Experiment("TextCNN", "polarity-filter", sjv_classifier, sjv_vectorizer)
     else:
-        exp = TransformerExperiment("Transformer", "polarity")
+        exp = Experiment("TextCNN", "subjectivity")
     best_model = exp.run()
@@ -2,9 +2,15 @@
 from torch import nn
 from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 from settings import PAD_TOKEN
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
 
 class SoftAttention(nn.Module):
+    '''
+        Multilayer perception to learn attention coefficients. 
+        As described in https://arxiv.org/pdf/1409.0473.pdf, Bengio et al. ICLR 2015
+    '''
+
     def __init__(self, dim, hidden_dim, dropout_ratio=0.1):
         super(SoftAttention, self).__init__()
         self.attention = nn.Sequential(
@@ -18,13 +24,14 @@ def __init__(self, dim, hidden_dim, dropout_ratio=0.1):
     def forward(self, context_vector):
         return self.attention(context_vector)
 
-class SentimentGRU(nn.Module):
+
+class BiGRU(nn.Module):
     '''
-        Architecture based on the one seen during lab
+        Architecture based on the one seen during lab.
     '''
 
     def __init__(self, vocab_size, config, pad_index=0):
-        super(SentimentGRU, self).__init__()
+        super(BiGRU, self).__init__()
         self.emb_size = config["emb_size"]
         self.hidden_size = config["hidden_size"]
         self.out_size = config["out_size"]
@@ -37,6 +44,8 @@ def __init__(self, vocab_size, config, pad_index=0):
         if self.attention:
             self.att_hidden_size = config["att_hidden_size"]
             self.attention_module = SoftAttention(self.hidden_size*self.num_dir, self.att_hidden_size, dropout_ratio=self.dropout_ratio)
+        if self.num_layers == 1:
+            self.dropout_ratio = 0
 
         self.embedding = nn.Embedding(vocab_size, self.emb_size, padding_idx=pad_index)
         self.utt_encoder = nn.GRU(self.emb_size, self.hidden_size, self.num_layers, bidirectional=self.bidirectional, dropout=self.dropout_ratio)
@@ -63,7 +72,7 @@ def forward(self, inputs):
         # "A potential issue with this encoder–decoder approach is that a neural network
         # needs to be able to compress all the necessary information of a source sentence into a fixed-length vector.
         # This may make it difficult for the neural network to cope with long sentences,
-        # especially those that are longer than the sentences in the training corpus." 
+        # especially those that are longer than the sentences in the training corpus."
         # https://arxiv.org/pdf/1409.0473.pdf, Bengio et al. ICLR 2015
         if not self.attention:
             hidden_view = hidden.view(self.num_layers, self.num_dir, batch_size, self.hidden_size)  # 2 for bidirectional
@@ -86,7 +95,7 @@ def forward(self, inputs):
         return out
 
 
-class SentimentCNN(nn.Module):
+class TextCNN(nn.Module):
     '''
         Architecture based on: 
         Yoon Kim. 2014. Convolutional Neural Networks for Sentence Classification. 
@@ -102,7 +111,7 @@ class SentimentCNN(nn.Module):
     '''
 
     def __init__(self, vocab_size, config):
-        super(SentimentCNN, self).__init__()
+        super(TextCNN, self).__init__()
         self.emb_size = config["emb_size"]
         self.num_filters = config["num_filters"]
         self.filter_sizes = config["filter_sizes"]
@@ -129,3 +138,17 @@ def forward(self, inputs):
         x_fc = torch.cat([x_pool.squeeze(dim=2) for x_pool in x_pool_list], dim=1)
         logits = self.fc(self.dropout(x_fc))
         return logits
+
+
+class TransformerClassifier(nn.Module):
+
+    def __init__(self, config):
+        super(TransformerClassifier, self).__init__()
+        self.out_size = config["out_size"]
+        self.transformer = AutoModelForSequenceClassification.from_pretrained(
+            config["pretrained_model"],
+            num_labels=self.out_size,
+            ignore_mismatched_sizes=True)
+
+    def forward(self, input):
+        return self.transformer(**input, return_dict=True).logits
@@ -7,39 +7,85 @@
 # nltk.download("movie_reviews")
 # nltk.download("subjectivity")
 
-N_FOLDS = 1
-N_FOLDS_BASELINE = 5
 RANDOM_SEED = 42
-BATCH_SIZE = 512
-PAD_TOKEN = 0
+N_FOLDS = 5
+N_FOLDS_BASELINE = 5
 TRAIN_TEST_SPLIT = 0.2
-EPOCHS = 10
+
+PAD_TOKEN = 0
+
+EPOCHS = 50
+EPOCHS_PRETRAINED = 5
+
 LR = 0.001
+LR_PRETRAINED = 5e-5
+
+SEQUENCE_MAX_LENGTHS = {
+    "polarity": 512, 
+    "subjectivity": 128,
+    "polarity-filter": 512
+    }
+
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-SEQUENCE_MAX_LENGTH= 10
 PRETRAINED_MODEL_NAME = "distilbert-base-uncased"
 CLIP_GRADIENTS = 5
 
+WEIGHTS_SAVE_PATH = "./weights"
+STATS_SAVE_PATH = "./stats"
+
 # models config
-SentimentGRU_config = {
+BiGRUAttention_config = {
+    "model_name": "BiGRUAttention",
+    "epochs": EPOCHS,
+    "batch_size": 256,
+    "lr": LR,
     "emb_size": 300,
     "hidden_size": 128,
     "out_size": 1,
     "num_layers": 2,
     "dropout_ratio": 0.5,
     "bidirectional": True,
     "attention": True,
-    "att_hidden_size": 64
+    "att_hidden_size": 64,
+    "clip_gradients": CLIP_GRADIENTS
 }
 
-SentimentCNN_config = {
+BiGRU_config = {
+    "model_name": "BiGRU",
+    "epochs": EPOCHS,
+    "batch_size": 256,
+    "lr": LR,
+    "emb_size": 300,
+    "hidden_size": 128,
+    "out_size": 1,
+    "num_layers": 2,
+    "dropout_ratio": 0.5,
+    "bidirectional": True,
+    "attention": False,
+    "clip_gradients": CLIP_GRADIENTS
+}
+
+TextCNN_config = {
+    "model_name": "TextCNN",
+    "epochs": EPOCHS,
+    "batch_size": 256,
+    "lr": LR,
     "emb_size": 300,
     "out_size": 1,
     "filter_sizes": [3, 5, 7],
     "num_filters": [100, 100, 100],
     "dropout_ratio": 0.5,
+    "clip_gradients": 0
 }
 
 Transformer_config = {
+    "model_name": "Transfomer",
+    "pretrained_model": PRETRAINED_MODEL_NAME,
+    "epochs": EPOCHS_PRETRAINED,
+    "batch_size": 32,
+    "lr": LR_PRETRAINED,
+    "sequence_max_len": SEQUENCE_MAX_LENGTHS,
     "out_size": 1,
-}
+    "pretrained": True,
+    "clip_gradients": 0
+}
@@ -0,0 +1,8 @@
+,fit_time,score_time,test_accuracy,test_f1
+0,0.00449824333190918,0.0020368099212646484,0.8225,0.8174807197943446
+1,0.004383087158203125,0.001954793930053711,0.8625,0.8641975308641976
+2,0.004187107086181641,0.0021529197692871094,0.84,0.8375634517766498
+3,0.003898143768310547,0.0018398761749267578,0.87,0.8659793814432989
+4,0.004026889801025391,0.0017871856689453125,0.825,0.8214285714285714
+mean,0.004198694229125976,0.001954317092895508,0.844,0.8413299310614125
+std,0.0002468359881403142,0.00014771421203888375,0.021549361939509966,0.022964961813428303
@@ -0,0 +1,8 @@
+,fit_time,score_time,test_accuracy,test_f1
+0,0.007039070129394531,0.0022912025451660156,0.8125,0.810126582278481
+1,0.006242990493774414,0.002053976058959961,0.8275,0.8261964735516373
+2,0.005444049835205078,0.002074003219604492,0.8075,0.80306905370844
+3,0.0054531097412109375,0.0020837783813476562,0.8325,0.830379746835443
+4,0.005436897277832031,0.002056121826171875,0.7925,0.785529715762274
+mean,0.005923223495483398,0.00211181640625,0.8145,0.811060314427255
+std,0.0007131781285630203,0.00010104794256270079,0.01604680653588123,0.018158244363148276
@@ -0,0 +1,8 @@
+,fit_time,score_time,test_accuracy,test_f1
+0,0.004384040832519531,0.002396821975708008,0.9175,0.9183572488866898
+1,0.003802061080932617,0.0023458003997802734,0.918,0.9196865817825661
+2,0.0037550926208496094,0.0023450851440429688,0.9275,0.9288168875797741
+3,0.0037908554077148438,0.0023431777954101562,0.9275,0.9284657128761717
+4,0.00376129150390625,0.002343893051147461,0.9105,0.9113422486379396
+mean,0.0038986682891845705,0.0023549556732177734,0.9202,0.9213337359526284
+std,0.00027204293300494247,2.3426126839661284e-05,0.00729383301152419,0.007386898266289606
@@ -0,0 +1,5 @@
+,num_sequences,num_words,avg_seq_len,max_seq_len,min_seq_len,lexicon_size,lexicon_size_no_stopwords
+MR,2000,6226700,791.91,2879,19,39768,39587
+MR_sjv,65258,6226700,24.270127800422937,187,1,39768,39587
+SJV,10000,1049750,24.0576,120,10,23906,23737
+MR_clean_baseline,2000,3937817,501.3715,2169,19,30266,30088