diff --git a/alpha_automl/pipeline_search/agent_environment.py b/alpha_automl/pipeline_search/agent_environment.py
index 8e008e66..25f3b4da 100644
--- a/alpha_automl/pipeline_search/agent_environment.py
+++ b/alpha_automl/pipeline_search/agent_environment.py
@@ -40,8 +40,6 @@ def __init__(self, config: EnvContext):
         self.action_offsets = self.generate_action_offsets()
         self.action_space = Discrete(self.max_actions)
 
-        
-        self.cur_player = 1  # NEVER USED - ONLY ONE PLAYER
 
     def reset(self, *, seed=None, options=None):
         # init number of steps
@@ -59,7 +57,7 @@ def step(self, action):
         offseted_action = self.action_offsets[curr_step]+action
         valid_action_size = self.action_spaces[curr_step]
         # Check the action is illegal
-        valid_moves = self.game.getValidMoves(self.board, self.cur_player)
+        valid_moves = self.game.getValidMoves(self.board)
         if action >= valid_action_size or valid_moves[offseted_action-1] != 1:
             return (
                 {"board": np.array(self.board).astype(np.uint8)},
@@ -89,13 +87,13 @@ def step(self, action):
 
         # update board with new action
         #         print(f"action: {action}\n board: {self.board}")
-        self.board, _ = self.game.getNextState(self.board, self.cur_player, offseted_action-1)
+        self.board = self.game.getNextState(self.board, offseted_action-1)
 
         if self.num_steps > 9:
-            logger.info(f"[YFW]================={self.board[self.game.m:]}")
+            logger.debug(f"[YFW]================={self.board[self.game.m:]}")
         # reward: win(1) - pipeline score, not end(0) - 0, bad(2) - 0
         reward = 0
-        game_end = self.game.getGameEnded(self.board, self.cur_player)
+        game_end = self.game.getGameEnded(self.board)
         if game_end == 1:  # pipeline score over threshold
             try:
                 if self.game.problem == "REGRESSION":
diff --git a/alpha_automl/pipeline_search/agent_lab.py b/alpha_automl/pipeline_search/agent_lab.py
index 11b2a7ee..16e34ccf 100644
--- a/alpha_automl/pipeline_search/agent_lab.py
+++ b/alpha_automl/pipeline_search/agent_lab.py
@@ -70,7 +70,7 @@ def load_rllib_checkpoint(game, num_rollout_workers):
 
     # Checking if the list is empty or not
     if [f for f in os.listdir(PATH_TO_CHECKPOINT) if not f.startswith(".")] == []:
-        logger.info("[RlLib] Cannot read RlLib checkpoint, create a new one.")
+        logger.debug("[RlLib] Cannot read RlLib checkpoint, create a new one.")
         return config.build()
     else:
         algo = config.build()
@@ -88,21 +88,21 @@ def train_rllib_model(algo, time_bound, save_checkpoint=False):
     result = algo.train()
     last_best = result["episode_reward_mean"]
     best_unchanged_iter = 1
-    logger.info(pretty_print(result))
+    logger.debug(pretty_print(result))
     while True:
         if (
             time.time() > timeout
             or (best_unchanged_iter >= 600 and result["episode_reward_mean"] >= 0)
             # or result["episode_reward_mean"] >= 70
         ):
-            logger.info(f"[RlLib] Train Timeout")
+            logger.debug(f"[RlLib] Train Timeout")
             break
             
         if save_checkpoint and [f for f in os.listdir(PATH_TO_CHECKPOINT) if not f.startswith(".")] != []:
             weights = load_rllib_policy_weights()
             algo.set_weights(weights)
         result = algo.train()
-        logger.info(pretty_print(result))
+        logger.debug(pretty_print(result))
         # stop training of the target train steps or reward are reached
         if result["episode_reward_mean"] > last_best:
             last_best = result["episode_reward_mean"]
@@ -115,7 +115,7 @@ def train_rllib_model(algo, time_bound, save_checkpoint=False):
 
 
 def load_rllib_policy_weights():
-    logger.info(f"[RlLib] Synchronizing model weights...")
+    logger.debug(f"[RlLib] Synchronizing model weights...")
     policy = Policy.from_checkpoint(PATH_TO_CHECKPOINT)
     policy = policy['default_policy']
     weights = policy.get_weights()
@@ -127,7 +127,7 @@ def save_rllib_checkpoint(algo):
     save_result = algo.save(checkpoint_dir=PATH_TO_CHECKPOINT)
     path_to_checkpoint = save_result.checkpoint.path
 
-    logger.info(
+    logger.debug(
         f"[RlLib] An Algorithm checkpoint has been created inside directory: '{path_to_checkpoint}'."
     )
 
diff --git a/alpha_automl/pipeline_search/game.py b/alpha_automl/pipeline_search/game.py
index 8ef27d90..dc798d42 100644
--- a/alpha_automl/pipeline_search/game.py
+++ b/alpha_automl/pipeline_search/game.py
@@ -1,9 +1,6 @@
 from __future__ import print_function
-import os
-import pickle
 import math
 import logging
-from copy import deepcopy
 from alpha_automl.pipeline_search.game_logic import Board
 import numpy as np
 import traceback
@@ -14,7 +11,7 @@
 
 class PipelineGame():
     # FIXEME: Maybe the input parameters can be in json
-    def __init__(self, input={}, eval_pipeline=None):
+    def __init__(self, input=None, eval_pipeline=None):
         self.steps = 0
         self.evaluations = {}
         self.eval_times = {}
@@ -53,18 +50,18 @@ def getActionSize(self):
         board = Board(self.m, self.grammar, self.pipeline_size, self.metric)
         return len(board.valid_moves)
 
-    def getNextState(self, board, player, action):
-        # if player takes action on board, return next (board,player)
+    def getNextState(self, board, action):
         # action must be a valid move
         b = Board(self.m, self.grammar, self.pipeline_size, self.metric)
         b.set_metafeatures(board)
         b.set_pipeline(board)
         # logger.debug('PREV STATE %s', b.pieces_p)
-        b.execute_move(action, player)
+        b.execute_move(action)
         # logger.debug('NEXT STATE %s', b.pieces_p)
-        return (b.pieces_m+b.pieces_p, -player)
 
-    def getValidMoves(self, board, player):
+        return b.pieces_m+b.pieces_p
+
+    def getValidMoves(self, board):
         # return a fixed size binary vector
         b = Board(self.m, self.grammar, self.pipeline_size, self.metric)
         b.set_metafeatures(board)
@@ -97,9 +94,8 @@ def getEvaluation(self, board):
 
         return eval_val
 
-    def getGameEnded(self, board, player, eval_val=None):
-        # return 0 if not ended, 1 if x won, -1 if x lost
-        # player = 1
+    def getGameEnded(self, board, eval_val=None):
+        # return 0 if not ended, 1 if x won, 2 if x lost
 
         b = Board(self.m, self.grammar, self.pipeline_size, self.metric)
         b.set_metafeatures(board)
@@ -117,21 +113,14 @@ def getGameEnded(self, board, player, eval_val=None):
 
         eval_val = self.getEvaluation(board)
 
-        if b.findWin(player, eval_val):
-            logger.debug('findwin %s', player)
+        if b.findWin(eval_val):
+            logger.debug('Win')
             return 1
-        if b.findWin(-player, eval_val):
-            logger.debug('findwin %', -player)
-            return -1
         if b.has_legal_moves():
             return 0
 
         return 2
 
-    def getCanonicalForm(self, board, player):
-        # return state if player==1, else return -state if player==-1
-        return deepcopy(board)
-
     def stringRepresentation(self, board):
         # 3x3 numpy array (canonical board)
         return np.asarray(board).tostring()
diff --git a/alpha_automl/pipeline_search/game_logic.py b/alpha_automl/pipeline_search/game_logic.py
index 6608c0ca..21810cb1 100644
--- a/alpha_automl/pipeline_search/game_logic.py
+++ b/alpha_automl/pipeline_search/game_logic.py
@@ -16,7 +16,7 @@
 
 class Board():
 
-    def __init__(self, m=30, grammar={}, pipeline_size=6, metric='f1macro', win_threshold=0.01):
+    def __init__(self, m=30, grammar=None, pipeline_size=6, metric='accuracy', win_threshold=0.01):
         "Set up initial board configuration."
 
         self.terminals = grammar['TERMINALS']
@@ -61,7 +61,7 @@ def is_terminal_pipeline(self):
                 return False
         return True
 
-    def findWin(self, player, eval_val=None):
+    def findWin(self, eval_val=None):
         """Find win of the given color in row, column, or diagonal
         (1 for x, -1 for o)"""
         if not any(self[0:]):
@@ -135,7 +135,7 @@ def get_train_board(self):
     def get_board_size(self):
         return self.m+(len(self.terminals)+len(self.non_terminals))
 
-    def execute_move(self, action, player):
+    def execute_move(self, action):
         """Perform the given move on the board;
         color gives the color of the piece to play (1=x,-1=o)
         """
diff --git a/alpha_automl/pipeline_synthesis/setup_search.py b/alpha_automl/pipeline_synthesis/setup_search.py
index bcc0a9e5..4f092607 100644
--- a/alpha_automl/pipeline_synthesis/setup_search.py
+++ b/alpha_automl/pipeline_synthesis/setup_search.py
@@ -1,10 +1,7 @@
-import logging
 import sys
+import logging
 from datetime import datetime
-from os.path import join
-
-from alpha_automl.grammar_loader import (load_automatic_grammar,
-                                         load_manual_grammar)
+from alpha_automl.grammar_loader import load_manual_grammar
 from alpha_automl.pipeline_search.game import PipelineGame
 from alpha_automl.pipeline_search.agent_lab import pipeline_search_rllib, dump_result_to_json, read_result_to_pipeline
 from alpha_automl.pipeline_synthesis.pipeline_builder import BaseBuilder
@@ -27,7 +24,8 @@
         'TABULAR': 1, 
         'TEXT': 2, 
         'IMAGE': 3, 
-        'VIDEO': 4
+        'VIDEO': 4,
+        'MULTIMODAL': 5
     },
     'PIPELINE_SIZE': 10
 }
@@ -40,56 +38,9 @@ def signal_handler(queue, signum):
     sys.exit(0)
 
 
-def check_repeated_classifiers(pipeline_primitives, all_primitives, ensemble_pipelines_hash):
-    # Verify if the classifiers are repeated in the ensembles (regardless of the order)
-    classifiers = []
-    pipeline_hash = ''
-    has_ensemble_primitive = False
-    has_repeated_classifiers = False
-
-    for primitive_name in pipeline_primitives:
-        primitive_type = all_primitives[primitive_name]['type']
-
-        if primitive_type == 'CLASSIFIER':
-            classifiers.append(primitive_name)
-        elif primitive_type == 'MULTI_ENSEMBLER':
-            has_ensemble_primitive = True
-            pipeline_hash += primitive_name
-            if len(classifiers) != len(set(classifiers)):  # All classifiers should be different
-                has_repeated_classifiers = True
-        else:
-            pipeline_hash += primitive_name
-
-    if not has_ensemble_primitive:
-        return False
-
-    if has_repeated_classifiers:
-        return True
-
-    pipeline_hash += ''.join(sorted(classifiers))
-
-    if pipeline_hash in ensemble_pipelines_hash:
-        return True
-    else:
-        ensemble_pipelines_hash.add(pipeline_hash)
-        return False
-
-def search_pipelines(
-    X,
-    y,
-    scoring,
-    splitting_strategy,
-    task_name,
-    time_bound,
-    automl_hyperparams,
-    metadata,
-    output_folder,
-    verbose,
-):
+def search_pipelines(X, y, scoring, splitting_strategy, task_name, time_bound, automl_hyperparams, metadata, output_folder, verbose):
     # signal.signal(signal.SIGTERM, lambda signum, frame: signal_handler(queue, signum))
-    hide_logs(
-        verbose
-    )  # Hide logs here too, since multiprocessing has some issues with loggers
+    hide_logs(verbose)  # Hide logs here too, since multiprocessing has some issues with loggers
 
     builder = BaseBuilder(metadata, automl_hyperparams)
     all_primitives = builder.all_primitives
@@ -159,6 +110,41 @@ def update_config(task_name, metric, grammar, metadata):
     return config
 
 
+def check_repeated_classifiers(pipeline_primitives, all_primitives, ensemble_pipelines_hash):
+    # Verify if the classifiers are repeated in the ensembles (regardless of the order)
+    classifiers = []
+    pipeline_hash = ''
+    has_ensemble_primitive = False
+    has_repeated_classifiers = False
+
+    for primitive_name in pipeline_primitives:
+        primitive_type = all_primitives[primitive_name]['type']
+
+        if primitive_type == 'CLASSIFIER':
+            classifiers.append(primitive_name)
+        elif primitive_type == 'MULTI_ENSEMBLER':
+            has_ensemble_primitive = True
+            pipeline_hash += primitive_name
+            if len(classifiers) != len(set(classifiers)):  # All classifiers should be different
+                has_repeated_classifiers = True
+        else:
+            pipeline_hash += primitive_name
+
+    if not has_ensemble_primitive:
+        return False
+
+    if has_repeated_classifiers:
+        return True
+
+    pipeline_hash += ''.join(sorted(classifiers))
+
+    if pipeline_hash in ensemble_pipelines_hash:
+        return True
+    else:
+        ensemble_pipelines_hash.add(pipeline_hash)
+        return False
+
+
 def compute_metafeatures(metadata):
     metafeatures = []
     # IMPUTE