|
| 1 | +<!DOCTYPE html> |
| 2 | +<html> |
| 3 | +<head> |
| 4 | + <meta charset="utf-8"> |
| 5 | + <!-- Meta tags for social media banners, these should be filled in appropriatly as they are your "business card" --> |
| 6 | + <!-- Replace the content tag with appropriate information --> |
| 7 | + <meta name="description" content="DESCRIPTION META TAG"> |
| 8 | + <meta property="og:title" content="SOCIAL MEDIA TITLE TAG"/> |
| 9 | + <meta property="og:description" content="SOCIAL MEDIA DESCRIPTION TAG TAG"/> |
| 10 | + <meta property="og:url" content="URL OF THE WEBSITE"/> |
| 11 | + <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X630--> |
| 12 | + <meta property="og:image" content="GenSAM_logo.png" /> |
| 13 | + <meta property="og:image:width" content="1200"/> |
| 14 | + <meta property="og:image:height" content="630"/> |
| 15 | + |
| 16 | + |
| 17 | + <meta name="twitter:title" content="TWITTER BANNER TITLE META TAG"> |
| 18 | + <meta name="twitter:description" content="TWITTER BANNER DESCRIPTION META TAG"> |
| 19 | + <!-- Path to banner image, should be in the path listed below. Optimal dimenssions are 1200X600--> |
| 20 | + <meta name="twitter:image" content="GenSAM_logo.png"> |
| 21 | + <meta name="twitter:card" content="summary_large_image"> |
| 22 | + <!-- Keywords for your paper to be indexed by--> |
| 23 | + <meta name="keywords" content="KEYWORDS SHOULD BE PLACED HERE"> |
| 24 | + <meta name="viewport" content="width=device-width, initial-scale=1"> |
| 25 | + |
| 26 | + |
| 27 | + <title>Generalizable SAM</title> |
| 28 | + <link rel="icon" type="image/x-icon" href="GenSAM_logo.png"> |
| 29 | + <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" |
| 30 | + rel="stylesheet"> |
| 31 | + |
| 32 | + <link rel="stylesheet" href="static/css/bulma.min.css"> |
| 33 | + <link rel="stylesheet" href="static/css/bulma-carousel.min.css"> |
| 34 | + <link rel="stylesheet" href="static/css/bulma-slider.min.css"> |
| 35 | + <link rel="stylesheet" href="static/css/fontawesome.all.min.css"> |
| 36 | + <link rel="stylesheet" |
| 37 | + href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css"> |
| 38 | + <link rel="stylesheet" href="static/css/index.css"> |
| 39 | + |
| 40 | + <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script> |
| 41 | + <script src="https://documentcloud.adobe.com/view-sdk/main.js"></script> |
| 42 | + <script defer src="static/js/fontawesome.all.min.js"></script> |
| 43 | + <script src="static/js/bulma-carousel.min.js"></script> |
| 44 | + <script src="static/js/bulma-slider.min.js"></script> |
| 45 | + <script src="static/js/index.js"></script> |
| 46 | +</head> |
| 47 | +<body> |
| 48 | + |
| 49 | + |
| 50 | + <section class="hero"> |
| 51 | + <div class="hero-body"> |
| 52 | + <div class="container is-max-desktop"> |
| 53 | + <div class="columns is-centered"> |
| 54 | + <div class="column has-text-centered"> |
| 55 | + <h1 class="title is-1 publication-title">Relax Image-Specific Prompt Requirement in SAM: A Single Generic Prompt for Segmenting Camouflaged Objects</h1> |
| 56 | + <div class="is-size-5 publication-authors"> |
| 57 | + <!-- Paper authors --> |
| 58 | + <span class="author-block"> |
| 59 | + <a href="https://lwpyh.github.io/" target="_blank">Jian Hu</a><sup>*</sup>,</span> |
| 60 | + <span class="author-block"> |
| 61 | + <a href="https://jylin8100.github.io/" target="_blank">Jiayi Lin</a><sup>*</sup>,</span> |
| 62 | + <span class="author-block"> |
| 63 | + <a href="https://lvgd.github.io/" target="_blank">Weitong Cai</a>,</span> |
| 64 | + <span class="author-block"> |
| 65 | + <a href="http://www.eecs.qmul.ac.uk/~sgg/" target="_blank">Shaogang Gong</a> |
| 66 | + </span> |
| 67 | + </div> |
| 68 | + |
| 69 | + <div class="is-size-5 publication-authors"> |
| 70 | + <span class="author-block">Queen Mary University of London<br>AAAI 2024</span> |
| 71 | + <span class="eql-cntrb"><small><br><sup>*</sup>Equal Contribution</small></span> |
| 72 | + </div> |
| 73 | + |
| 74 | + <div class="column has-text-centered"> |
| 75 | + <div class="publication-links"> |
| 76 | + <!-- Arxiv PDF link --> |
| 77 | + <!-- ArXiv abstract Link --> |
| 78 | + <span class="link-block"> |
| 79 | + <a href="https://arxiv.org/abs/2312.07374" target="_blank" |
| 80 | + class="external-link button is-normal is-rounded is-dark"> |
| 81 | + <span class="icon"> |
| 82 | + <i class="ai ai-arxiv"></i> |
| 83 | + </span> |
| 84 | + <span>arXiv</span> |
| 85 | + </a> |
| 86 | + </span> |
| 87 | + |
| 88 | + <!-- Supplementary PDF link --> |
| 89 | + <span class="link-block"> |
| 90 | + <a href="supplementary_material.pdf" target="_blank" |
| 91 | + class="external-link button is-normal is-rounded is-dark"> |
| 92 | + <span class="icon"> |
| 93 | + <i class="fas fa-file-pdf"></i> |
| 94 | + </span> |
| 95 | + <span>Supplementary</span> |
| 96 | + </a> |
| 97 | + </span> |
| 98 | + |
| 99 | + <!-- Github link --> |
| 100 | + <span class="link-block"> |
| 101 | + <a href="https://github.com/jyLin8100/GenSAM" target="_blank" |
| 102 | + class="external-link button is-normal is-rounded is-dark"> |
| 103 | + <span class="icon"> |
| 104 | + <i class="fab fa-github"></i> |
| 105 | + </span> |
| 106 | + <span>Code</span> |
| 107 | + </a> |
| 108 | + </span> |
| 109 | + </div> |
| 110 | + </div> |
| 111 | + </div> |
| 112 | + </div> |
| 113 | + </div> |
| 114 | + </div> |
| 115 | +</section> |
| 116 | + |
| 117 | +<!-- Paper abstract --> |
| 118 | +<section class="section hero is-light"> |
| 119 | + <div class="container is-max-desktop"> |
| 120 | + <div class="columns is-centered has-text-centered"> |
| 121 | + <div class="column is-four-fifths"> |
| 122 | + <h2 class="title is-3">Abstract</h2> |
| 123 | + <div class="content has-text-justified"> |
| 124 | + <style> |
| 125 | + .content { |
| 126 | + font-family: "Times New Roman", Times, serif; |
| 127 | + } |
| 128 | + </style> |
| 129 | + <p> |
| 130 | + Camouflaged object detection (COD) approaches heavily rely on pixel-level annotated datasets. Weakly-supervised COD (WSCOD) approaches use sparse annotations like scribbles or points to reduce annotation effort, but this can lead to decreased accuracy. |
| 131 | + The Segment Anything Model (SAM) shows remarkable segmentation ability with sparse prompts like points. However, manual prompt is not always feasible, as it may not be accessible in real-world application. Additionally, it only provides localization information instead of semantic one, which can intrinsically cause ambiguity in interpreting the targets. In this work, we aim to eliminate the need for manual prompt. The key idea is to employ Cross-modal Chains of Thought Prompting (CCTP) to reason visual prompts using the semantic information given by a generic text prompt. To that end, we introduce a test-time adaptation per-instance mechanism called Generalizable SAM (GenSAM) to automatically generate and optimize visual prompts the generic task prompt for WSCOD. In particular, CCTP maps a single generic text prompt onto image-specific consensus foreground and background heatmaps using vision-language models, acquiring reliable visual prompts. Moreover, to test-time adapt the visual prompts, we further propose Progressive Mask Generation (PMG) to iteratively reweight the input image, guiding the model to focus on the targets in a coarse-to-fine manner. Crucially, all network parameters are fixed, avoiding the need for additional training. Experiments demonstrate the superiority of GenSAM. Experiments on three benchmarks demonstrate that GenSAM outperforms point supervision approaches and achieves comparable results to scribble supervision ones, solely relying on general task descriptions as prompts. |
| 132 | + </p> |
| 133 | + </div> |
| 134 | + </div> |
| 135 | + </div> |
| 136 | + </div> |
| 137 | +</section> |
| 138 | +<!-- End paper abstract --> |
| 139 | + |
| 140 | + <!-- Demo video--> |
| 141 | +<div class="container is-max-desktop" style="width: 150%;"> |
| 142 | + <div class="hero-body"> |
| 143 | + <video width="100%" autoplay controls muted loop> |
| 144 | + <!-- Your video here --> |
| 145 | + <source src="demo_show.mp4" type="video/mp4"> |
| 146 | + </video> |
| 147 | + <h2 class="subtitle has-text-centered"> |
| 148 | + Demo of our proposed GenSAM. |
| 149 | + </h2> |
| 150 | + </div> |
| 151 | +</div> |
| 152 | +<!-- End demo video --> |
| 153 | + |
| 154 | + <!-- Demo video--> |
| 155 | +<div class="container is-max-desktop" style="width: 150%;"> |
| 156 | + <div class="hero-body"> |
| 157 | + <video width="100%" autoplay controls muted loop> |
| 158 | + <!-- Your video here --> |
| 159 | + <source src="framework.mp4" type="video/mp4"> |
| 160 | + </video> |
| 161 | + <h2 class="subtitle has-text-centered"> |
| 162 | + This video shows how our framework works. |
| 163 | + </h2> |
| 164 | + </div> |
| 165 | +</div> |
| 166 | +<!-- End demo video --> |
| 167 | + |
| 168 | +<!-- Image carousel --> |
| 169 | +<section class="hero is-small"> |
| 170 | + <div class="hero-body"> |
| 171 | + <div class="container"> |
| 172 | + <div id="results-carousel" class="carousel results-carousel"> |
| 173 | + <div class="item"> |
| 174 | + <!-- Your image here --> |
| 175 | + <div style="display: flex; justify-content: center; align-items: center;"> |
| 176 | + <img src="static/images/AIG_framework_v3.png" alt="MY ALT TEXT" style="width: 1000px; height: auto; margin-top: 30px;"/> |
| 177 | + </div> |
| 178 | + <h2 class="subtitle has-text-centered"> |
| 179 | + Framework of our GenSAM |
| 180 | + </h2> |
| 181 | + </div> |
| 182 | + <div class="item"> |
| 183 | + <!-- Your image here --> |
| 184 | + <div style="display: flex; justify-content: center; align-items: center;"> |
| 185 | + <img src="static/images/supp_cod.png" alt="MY ALT TEXT" style="width: 780px; height: auto; margin-top: -15px;"/> |
| 186 | + </div> |
| 187 | + <h2 class="subtitle has-text-centered"> |
| 188 | + Example images on COD tasks |
| 189 | + </h2> |
| 190 | + </div> |
| 191 | + <div class="item"> |
| 192 | + <!-- Your image here --> |
| 193 | + <div style="display: flex; justify-content: center; align-items: center;"> |
| 194 | + <img src="static/images/supp_other.png" alt="MY ALT TEXT" style="width: 800px; height: auto;"/> |
| 195 | + </div> |
| 196 | + <h2 class="subtitle has-text-centered"> |
| 197 | + Example images on COD tasks |
| 198 | + </h2> |
| 199 | + </div> |
| 200 | + <div class="item"> |
| 201 | + <!-- Your image here --> |
| 202 | + <div style="display: flex; justify-content: center; align-items: center;"> |
| 203 | + <img src="static/images/result1.png" alt="MY ALT TEXT" style="width: 1000px; height: auto; margin-top: 60px;"/> |
| 204 | + </div> |
| 205 | + <h2 class="subtitle has-text-centered"> |
| 206 | + Experiment Results. |
| 207 | + </h2> |
| 208 | + </div> |
| 209 | + </div> |
| 210 | + </div> |
| 211 | + </div> |
| 212 | +</section> |
| 213 | +<!-- End image carousel --> |
| 214 | + |
| 215 | + <!-- Paper poster --> |
| 216 | +<section class="hero is-small is-light"> |
| 217 | + <div class="hero-body"> |
| 218 | + <div class="container"> |
| 219 | + <h2 class="title">Poster</h2> |
| 220 | + |
| 221 | + <iframe src="poster_GenSAM.pdf" width="100%" height="550"> |
| 222 | + </iframe> |
| 223 | + </div> |
| 224 | + </div> |
| 225 | + </section> |
| 226 | +<!--End paper poster --> |
| 227 | + |
| 228 | +<!--BibTex citation --> |
| 229 | + <section class="section" id="BibTeX"> |
| 230 | + <div class="container is-max-desktop content"> |
| 231 | + <h2 class="title">BibTeX</h2> |
| 232 | + <pre><code>@misc{hu2023relax, |
| 233 | + title={Relax Image-Specific Prompt Requirement in SAM: A Single Generic Prompt for Segmenting Camouflaged Objects}, |
| 234 | + author={Jian Hu and Jiayi Lin and Weitong Cai and Shaogang Gong}, |
| 235 | + year={2023}, |
| 236 | + eprint={2312.07374}, |
| 237 | + archivePrefix={arXiv}, |
| 238 | + primaryClass={cs.CV} |
| 239 | +}</code></pre> |
| 240 | + </div> |
| 241 | +</section> |
| 242 | +<!--End BibTex citation --> |
| 243 | + |
| 244 | + |
| 245 | + <footer class="footer"> |
| 246 | + <div class="container"> |
| 247 | + <div class="columns is-centered"> |
| 248 | + <div class="column is-8"> |
| 249 | + <div class="content"> |
| 250 | + |
| 251 | + <p> |
| 252 | + This page was built using the <a href="https://github.com/eliahuhorwitz/Academic-project-page-template" target="_blank">Academic Project Page Template</a> which was adopted from the <a href="https://nerfies.github.io" target="_blank">Nerfies</a> project page. |
| 253 | + You are free to borrow the of this website, we just ask that you link back to this page in the footer. <br> This website is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/" target="_blank">Creative |
| 254 | + Commons Attribution-ShareAlike 4.0 International License</a>. |
| 255 | + </p> |
| 256 | + |
| 257 | + </div> |
| 258 | + </div> |
| 259 | + </div> |
| 260 | + </div> |
| 261 | +</footer> |
| 262 | + |
| 263 | +<!-- Statcounter tracking code --> |
| 264 | + |
| 265 | +<!-- You can add a tracker to track page visits by creating an account at statcounter.com --> |
| 266 | + |
| 267 | + <!-- End of Statcounter Code --> |
| 268 | + |
| 269 | + </body> |
| 270 | + </html> |
0 commit comments