BatsResearch
diff --git a/‎Quickstart.ipynb
+4-3 b/‎Quickstart.ipynb
+4-3
diff --git a/‎Readme.md
+7-5 b/‎Readme.md
+7-5
diff --git a/‎datasets/pascal_zeroshot.py
+3-3 b/‎datasets/pascal_zeroshot.py
+3-3
diff --git a/‎datasets/phrasecut.py
+6 b/‎datasets/phrasecut.py
+6
diff --git a/‎evaluation_utils.py
+1 b/‎evaluation_utils.py
+1
@@ -83,7 +83,8 @@
    "hash": "800ed241f7db2bd3aa6942aa3be6809cdb30ee6b0a9e773dfecfa9fef1f4c586"
   },
   "kernelspec": {
-   "display_name": "Python 3.8.8 64-bit ('env2': conda)",
+   "display_name": "Python 3",
+   "language": "python",
    "name": "python3"
   },
   "language_info": {
@@ -96,9 +97,9 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.8.10"
   }
  },
  "nbformat": 4,
- "nbformat_minor": 2
+ "nbformat_minor": 4
 }
@@ -1,5 +1,5 @@
 # Image Segmentation Using Text and Image Prompts
-This repository contains the code used in the paper "Image Segmentation Using Text and Image Prompts".
+This repository contains the code used in the paper ["Image Segmentation Using Text and Image Prompts"](https://arxiv.org/abs/2112.10003).
 
 <img src="overview.png" alt="drawing" height="200em"/>
 
@@ -44,22 +44,24 @@ git clone https://github.com/juhongm999/hsnet.git
 - [CLIPSeg-D64](https://github.com/timojl/clipseg/raw/master/weights/rd64-uni.pth) (4.1MB, without CLIP weights)
 - [CLIPSeg-D16](https://github.com/timojl/clipseg/raw/master/weights/rd16-uni.pth) (1.1MB, without CLIP weights)
 
-### Training
+### Training and Evaluation
+
+To train use the `training.py` script with experiment file and experiment id parameters. E.g. `python training.py phrasecut.yaml 0` will train the first phrasecut experiment which is defined by the `configuration` and first `individual_configurations` parameters. Model weights will be written in `logs/`.
+
+For evaluation use `score.py`. E.g. `python score.py phrasecut.yaml 0 0` will train the first phrasecut experiment of `test_configuration` and the first configuration in `individual_configurations`.
 
-See the experiment folder for yaml definitions of the training configurations. The training code is in `experiment_setup.py`.
 
 ### Usage of PFENet Wrappers
 
 In order to use the dataset and model wrappers for PFENet, the PFENet repository needs to be cloned to the root folder.
 `git clone https://github.com/Jia-Research-Lab/PFENet.git `
 
 ### Citation
-
 ```
 @article{lueddecke21
     title={Image Segmentation Using Text and Image Prompts},
     author={Timo Lüddecke and Alexander Ecker},
-    journal={...},
+    journal={arXiv preprint arXiv:2112.10003},
     year={2021}
 }
 ```
@@ -7,9 +7,9 @@
 from general_utils import log
 from torchvision import transforms
 
-# PASCAL_VOC_CLASSES_ZS = [['cattle.n.01', 'motorcycle.n.01'], ['aeroplane.n.01', 'sofa.n.01'], 
-#                          ['cat.n.01', 'television.n.03'], ['train.n.01', 'bottle.n.01'],
-#                           ['chair.n.01', 'pot_plant.n.01']]
+PASCAL_VOC_CLASSES_ZS = [['cattle.n.01', 'motorcycle.n.01'], ['aeroplane.n.01', 'sofa.n.01'], 
+                         ['cat.n.01', 'television.n.03'], ['train.n.01', 'bottle.n.01'],
+                          ['chair.n.01', 'pot_plant.n.01']]
 
 
 class PascalZeroShot(object):
 
@@ -66,6 +66,7 @@ def __init__(self, split, image_size=400, negative_prob=0, aug=None, aug_color=F
         self.image_size = image_size
         self.with_visual = with_visual
         self.only_visual = only_visual
+        self.phrase_form = '{}'
         self.mask = mask
         self.aug_crop = aug_crop
 
@@ -125,7 +126,9 @@ def __init__(self, split, image_size=400, negative_prob=0, aug=None, aug_color=F
 
             elif remove_classes[0] == 'zs':
                 stop = remove_classes[1]
+                
                 from datasets.pascal_zeroshot import PASCAL_VOC_CLASSES_ZS
+
                 avoid = [c for class_set in PASCAL_VOC_CLASSES_ZS[:stop] for c in class_set]
                 print(avoid)
 
@@ -209,6 +212,7 @@ def load_sample(self, sample_i, j):
 
         polys_phrase0 = img_ref_data['gt_Polygons'][j]
         phrase = img_ref_data['phrases'][j]
+        phrase = self.phrase_form.format(phrase)
 
         masks = []
         for polys in polys_phrase0:
@@ -248,6 +252,8 @@ def load_sample(self, sample_i, j):
 
         img = self.normalize(img)
 
+
+
         return img, seg, phrase
 
     def __getitem__(self, i):
 
@@ -29,6 +29,7 @@ def norm(img):
     std = torch.Tensor([0.229, 0.224, 0.225])
     return (img - mean[:,None,None]) / std[:,None,None]
 
+
 def compute_shift(name, w, datasets, size=1, seed=1):
 
     if type(name) == str: