diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000000000000000000000000000000000..2c42816425db1f1263f95925124cda0a1ba95490
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,4 @@
+*.mp4 filter=lfs diff=lfs merge=lfs -text
+
+sam3/perflib/tests/assets/masks.tiff filter=lfs diff=lfs merge=lfs -text
+assets/** filter=lfs diff=lfs merge=lfs -text
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..f9ecf0b31915cfa2275069837b93e90c346eb1fd
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,54 @@
+__pycache__/
+.DS_Store
+example_sam3_project_local/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+venv/
+ENV/
+*.egg-info/
+.eggs/
+dist/
+build/
+
+# Source code protection
+# app.py is tracked in GitHub (private), but excluded in HuggingFace deployments
+# Use 'push-hf.sh' script to deploy to HuggingFace without app.py
+encrypt_code.py
+setup_hooks.sh
+cookie_util.py
+
+# Gradio
+flagged/
+gradio_cached_examples/
+
+# Outputs
+*.mp4
+!bedroom.mp4
+!football.mp4
+!openfield_5mice_3s.mp4
+!openfield_5mice_5min.mp4
+!openfield_5mice_6min.mp4
+!openfield_5mice_7min.mp4
+!examples/*.mp4
+
+# Model checkpoints
+*.pth
+*.ckpt
+checkpoints/
+
+# Logs
+*.log
+
+# IDE/editor
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# OS
+.DS_Store
+Thumbs.db
+.env
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000000000000000000000000000000000000..19e546635e034bf5f20951b1e481a31912bb2c99
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,2 @@
+Unauthorized use of this code is prohibited.\n
+For inquiries regarding permissions, please send email to : bellmake@naver.com
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..56ef86267932a5c1d5dbab3d35f5e7856aa5aa3a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,100 @@
+---
+title: SAM3 Video Segmentation and Tracking with Text Prompt
+emoji: 🎬
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 5.9.1
+app_file: app_loader.py
+pinned: false
+---
+
+# 🎬 SAM3 Video Segmentation and Tracking with Text Prompt
+
+A HuggingFace Spaces app that automatically segments and tracks objects in videos using text prompts.
+
+## ✨ Key Features
+
+- **Text-driven segmentation**: Describe the target object in natural language
+- **Multiple objects**: Segment several objects at once by separating them with commas
+- **Temporal consistency**: Tracks objects across frames for stable masks
+- **Distinct colors**: Assigns unique colors for each object
+- **Live progress**: Watch processing updates in real time
+- **High-quality output**: Produces MP4 encoded with H.264 for broad compatibility
+
+## 🚀 How to Use
+
+1. **Upload a video**: Provide an MP4 file
+2. **Enter prompts**: Type the objects to segment
+   - Single object: `bed`, `person`, `car`, `dog`, etc.
+   - Multiple objects: `bed, pillow, person` (comma-separated)
+3. **Run**: Click the "🚀 Run Segmentation" button
+4. **Review results**: Preview and download the segmented video
+
+## 📋 Examples
+
+### Single-object segmentation
+```
+Prompt: bed
+Result: Only the bed is segmented and highlighted
+```
+
+### Multi-object segmentation
+```
+Prompt: bed, pillow, person
+Result: Bed, pillow, and person are segmented with unique colors
+```
+
+## 🛠️ Tech Stack
+
+- **Model**: SAM3 (Segment Anything Model 3)
+- **Framework**: Gradio
+- **Video processing**: OpenCV, FFmpeg
+- **Deep learning**: PyTorch
+
+## 📊 Performance & Limitations
+
+- **Processing time**: Scales with video length and resolution
+- **Multiple objects**: More objects increase runtime (roughly N objects ≈ N× time)
+- **GPU requirement**: GPU needed to run SAM3 efficiently
+- **Memory**: High-resolution videos require more memory
+
+## 🔧 Run Locally
+
+```bash
+# Clone the space
+git clone https://huggingface.co/spaces/bellmake/SAM3-video-segmentation-tracking
+cd SAM3-video-segmentation-tracking
+
+# Install dependencies
+pip install -r requirements.txt
+
+# Start the app
+python app.py
+```
+
+## 📁 Project Structure
+
+```
+.
+├── app.py                          # Main Gradio app
+├── requirements.txt                # Python dependencies
+├── packages.txt                    # System packages (FFmpeg)
+├── bedroom.mp4                     # Sample video
+├── example_sam3_project_local/     # Local SAM3 project
+└── README.md                       # This file
+```
+
+## 🤝 Contributing
+
+Issues and pull requests are welcome!
+
+
+## 📄 License
+
+Unauthorized use of this code is prohibited.\n
+For inquiries regarding permissions, please send email to : bellmake@naver.com
+
+---
+
+**Powered by SAM3 & HuggingFace Spaces**
diff --git a/app.py.encrypted b/app.py.encrypted
new file mode 100644
index 0000000000000000000000000000000000000000..8b361cef9cd6b75593c7f6611bde523f3180766a
--- /dev/null
+++ b/app.py.encrypted
@@ -0,0 +1 @@
+gAAAAABpQAOkwIIaR9EIc43PkLCBX7kvakkCrMiBG1HiyQ-E6BnxEQv6ZfuUmOXUdqzpDjvi46hivU6gQH3A_xan1i4BnqIbVJ3KQXcDwQ3hnDxIw_Bu2tS4IDXxRkGdf2x2ul4MSHwPQjxA50vomzoUspGusMvdajSTfQPxaHutBC4X4jOd70DYiJ_XsljNRyp5uxYSekvaNjc2Bbb47juhmuawgFKMqdpLxHgOzuzsbolBgXpmn_RJhhZesyO8-vepyzeqZcMKtbrwI2Wp2ff0LvzWc2h61DyEvDl1amnA94M8MPFkRdthMCczXe_fEVlNeiazKmsvEfnxUOvCx2yNizIxkWmNO_Tq8XYbcsrHI3gLzQRil3iP925hshQwcA27SSUQAu3J2W360Yhd0qE2SxcBKpiANWft-E7-Ds660D-zxod4dGkiSU5w-tsZ2g5VMNCQVqv-tlbDsREd5dn8WRsMkdV-bb5tVgVFXcx2jGiRtM6SOYLjx6sS7iRL03oPnOkum4MltQIcPU63fy_WDyIQu9n_lakuDJDPepvMw1jF4pD4KqfQIGBsphFjSnLZ-lDY8R5vNMm7beuzWN7ed2WfoId3-zI43BsXCdQ95BV16Vz5aycGAY1LVNuZfQYoWMHVTBrK7v8yeNexi5pIf4cTDR9gn29K8vXHB8CAaosfNlKj_2d1vXgm58bHKF9I8gE9Y_Zb9bh4W-LSdLAYKuMmtEUCEE9wEb0pnt9CKb7tNd2Un9patHuFe_xulF7NrwrVfKbKkx33BonoclvANMH6mACM1tsY_6D08hqQP_pmv4Blgf59MUmCWe4eq8vCPI4aYOjLEEDFE9ojvflxF-MycYk2fIiQNSCWvpNFR1FCHPjC-doFUjuspBkaO2O3zZAbky7-XuXafE7saF7I4Bcqa5uHZJbOE-BwYOzNjIMDIam8DPNLM8kokVWGXXZMwKVHVuIE1DJNCLvyenolQ55AT3ClqjDAMK4c68g_e5RMs47GF_3LViYIiZjTfMMOix2mguSRJVGr1Lm6tjlCHMBQMVEthpQRKt4lcYp59qyX6Q-EoHh7LF-b7ihW2ND9C5swCUgLoerugEoKH9yWQmfWGvxsPtyZ--xQvtLnHgNBkpmkorHvUnh_zuJWZZXT0xBPWu7NJrCdg9ZNlINnVJdMroMU1DbUspIxgLvXNfWAbZFmBuCu1eBSGpyHHcIwuTU-cq9zVr9VTDFlOTKzvVz9NyKigDToq3iJ2EM1X8oR7-OsVAy9_FvuGpH2i1InpP-aczUovEyATAx9H_nRyM_oFBRjlFW6A3xe4AdG8t752zuJ0ujcD3-fnCJpYXfA0FSIEDpVssQMtORMajVDdxYw9JNVfvVT1hC2nG2UHyVvuSBzPeMUPndwOmV6Faydjaj88fksJytAEG7veesLTMp9tUkMe_a6i56qdfkeuYReGRSZ_QDa8tE4KdRuG7vKVNWuY7Ekpt9BofMv07m6fBwqCQqbGXK49SjajkxKMbEdbct2b8kEz2gDELU2EHc3yre9rONho9wn_1-xPgBxPrpbc0eXlpoTGGizD7vdcaFdWT04vd9p6VTwdjMPxM4-bH3kzPnJXixKGCQ4ll8GelP2GewIpfYakDXeYQ6yeonBWGnGCCiClHhCtoS3nnbLjERo_z7A9nJVyKwFJ8VPNyCupiH0qTWmH3e9sDqEreSRRmILdzAbNDV0SX1_fMz9ePN2mDwjf_5eD3vctrKssryn1NkusPiM7cZXJLL4nXJmyJ8Prj0NjFnctKRGPVEJemMesx5K28HSB4myIv8vbt9UeeL9da-yNUYqN4ON4-Rh9r12vURRFFyi3U7aUcJ4GN7BRYt71GVFV1fnwCsOYzcFH50s-ubmt-JIt7tDpl344V97EcWUiQsmVP-JTbJb61e3YZSMeIjRZihBJGKdAz9nxmNH5Gw1J1NlBL265XrdO-UFl0sOVwAJ75JtIvH3peMsfW5nS2oQkVNRKYnwCkNO4stPOrYvawGdOI37SFn7RdQzwEu83_tXeo4DxhUhC1WtgJLOQlhadV9ikAq_13Ka43eYS2K5gPplfOZzo9BkkEGH4wjbQRWhWJAU7kwght6mAnAAwkg6F81DDPEm4e7SyKfeX5BGwYjI1A_3e-tPXCzeJTp6C084S1c1XyYp7K0W7_Wk10nqbskbpMgEckI5a8oa-YtjhSQTwFWeOJ2e0k3upfRntp_WJMqz2V0BFvJRMv3hxkof34Gn93GXZJijjXrpAxFEOrEBfyMd_LJeHKxXrYyanx9j5SvSDiBKXmjfyAqp1kS1lXoECYb0NAg2_X5CHKssBFMTJzNPdKnDseMaJg6I9B1oUXQ0dBTPk_HrtmFFV1QtJS-EfIXDWJXROoASvU_tpDdQq_U57PRJM_gCVRnp4TEAkv_XYaxP1S4MhuuP7-MZnyB25sC_xq9L9lLG8BtkkLcwymOG1Q053xzFp_GXAmKX0duEz4pu7Qe_m3Ki4yyzLOSCZyB1HofP23xM3pGDUbJwbLtUWUSnpP9wh0BaZmLKkbA07GuVp4xXdZxXhCSacxpNCJlVd0pzrrdPTHNZyzYa43RwfEbdRxj3yJIU4RAFL8NE78VKyG95eKeG8O86hSm4Uty2fvhn-iMMnHfEf8Fo_hgJ-IoQkbbKBXKD-z7b__uhDEIFsQPBVx_eTwHYIQDGFjP1qe5N5oCgY6scdM_Jy0pfa1NyIRPeBMLZk6yJaBlmdABPJdYfVHhrStdi1BsQvPdwXv-6L9-ehqYbmkTLcL4ORvZPaY5ttdACjuq182E8eegejCUfErGpu28oYjmQsjDLWjYyUFKFD90AjxZxlihNQ3x-o_MmdRbFOll1KD3jLqYx_PQ2AQ7rQR1-S749dTgjfYrl8zi0tr9X4ALOomYjOIusj-FdKLOYpwBtxCxNRT9rBROHt1WHhTA2dQT6RdJPBX1LPg37YnAaDuiuNtxdrrdJhN9K63FpbntG-1OWm7ODjuAXIISuafHIB6LvB9ow7W2vCYlcSYJTli4Zmpz3KDVzMAbm32bFTPcGRt5xSKC0Uhdlh-d8HHgpFgBgG958wKYz1XfW6jhwnsE581km6mbhvdPwVZS3mz34-E21nlwcl_E4qoKchy-pRhnH6f7LgCAXaKKdmYAK1L8Wfw35TW9ZWHds8i1q3JHIw14xAVlg2sNwyCsb0JeK2cFCnVt47ievFa3nFkgokMBRMc2q0ZgPv0ql_MPNYOK6tiw40yWXj0irU5ul0i9jEffohfaSnw8XG3yc_03ehfXZdY3yIDpmUzw_o0wqB2HXeNAyNkpf7pM3rQLY_bO7QgUhQw1xc4Y9clF5JKTFwg_IpPXSQo6sfHeOCDF8zwfnVGgvb7X5QDIkjIlQj0JPbZkS-ojqYssBDIQlvzayl-07o86MFnw6uxUhPjzKCJ8HGfHDB92dgPaYfqkBMOdsUtQOSCaMzds-KL5NNdTdIm1EiOIxd6I38FANMqlhE3Tcqvxhh2GASbWRXQfTxS2q9pnedqD6qzJcsGxwls9Db4yCiIn741v2K0DMMN_LiqPhJObkpCFTBMx0UY_5h5MivRXIEHevU3Cvb4X_tN1vLqmmiXctlcF1-duNzYnoqu9jo2up7gzD9WqTi22KJTemXA3AG7eOBw8XIpTkCEkyF8GX2ig6yiWZiH686trfc7eiJ_FJP5FeCbi8PsEzunbJZUlF0m4Id5ZYZrxpOgxezNhpDozycTEnT9edX3WjveZSBKTJvF1Lj0E8QBG0tenhWrpBxrdTD2MyTw6PiiebXgqNSBs4mEzjtMKS2kemzYFn6tXtv2qoQ4HEGC4buet5ZYTBNcjS9YhUR5K2qTKZ-0VIhEFt593497beQF9SqPa8YhgWBpMlt3k6GprgQl-DkkpnVhwn3RvPyPIiPoXEU7GDo0DORbi2hSM4LMQCjgFyGVfuN4LjCnGJ4SoZNby7MQkhPdd8_Np5gGfn4pL2vNy63-9lx_z7KylR8R6qe7CE4r6MB_8_e9Tvi0fXvw8lzVDXuG6dTc4_CgBQREF8UinVO5gVUe5Pnpd-WZgE26jFaUMp4jjykRig8DOmuDl63XzsiILOkPJ7XB8XnabVVMruKQ1wO-aytI4QsdVnSU8oZ9h3cUZARyTxZHY5R80CATzuyrN-PU-nOQr64jUrWQdcYowuGqVu8bnMBSx93K4w_FjsbvPLsDILIi0H9l5bzs_Bz6sm8pZdqpYNrGdqzPafmDxB1OLeuUnQRvX71mZytVMlOwb_dwQD9jgNskoN71Irfl2koSaijIwLtISGdum1J2CpbxJld_w4rOFI6_hj_v4PP3JcFgbZXtm7FsiEREaRhlRlamZi8NaluKTl6B88byDRyR593GWiGMgZNlK7YpOCk6RcbGk7hULYvzkwnLIZKhN7YrSzM4BEMuWzbz3wqFramBzJgcG1eNqeZ89Uh8bIUvIrvBG0IiAw68Lu3Pd-kqSKYlWExd8tb6CZEDVghBZcz6zdwTyI5eSa7oh0noA33ODJNdJKTacTp7fEvDvM3c_8iSl3An0dO6BZM8G6gsPEoW-m5hvBDwR-SmqisGAHNMBf-hs_X5LGJUkaajrk_ek3YJyXMQ1KvOmu3I4E3H8odgwmBnlqq9cff4opvJTXQQVPPkFqClZvyAmhH3Q08v-6f2rBWlwjxchBWDESwSTtQk0x5mYZ96p4YltyPuXI3_BSqhJdVARC33upFk9RX0ZSWs6F3F1VwHRCef9vMap54_ihbhYbt6qgNSNjypX0IP0-bzNBMxemjpDjRRcP_gtJ2evAyjrt7vf6vuD53WGJ2dLCSfwEIy40Ze0hlwIDeewiHTlRrGm7N_Hmu9W3hmkRwQVKj8KPN7eCjJr7vztNd6NdjgpPncbwp6eCGCwNxx-m5el48gsrb634EogQrLWKQGyZzQKsNWr_yNsMgKdxyq7DbKRB8F0FBLPu8N3KkJjgKluxPAcJp3JweKON7Vr3wRgb8_mPvsVTsvBAHwL2JlKxXwWwyLBUqUS190m1QGVDY07vc5Nv0_jR3V5D7Ie2_zjt96sMLVxV_oZTcyYEkj6oJOLLgF-PP9eEOhTVqftOLortnfc6bVaBYnyV-h94fYjrQlR7vjYwSKb7RA_1Rzcsc9ElJyc8kwit-Nf9uOlwnTMj4NwPXJLkFHR0lKSyLIYbw4rMpKyQMZiJDrU6VrdgDMlYKZQTcpDHUFcK15BwAG0kBYTIzH68o_RnZYsN9P1rRXlm85Y_z0W2oQ4syvvBufkDBnIX8jn_kCpoCd_8kGtARvHCkzSNvx0VcG1ybrS5eN8LFeMjDM318T2v77f4bk29TSytPsqCwmZu5EDbxni7EBohLgDrEGze2uhRR5FlwDUo0pyUVwOLXVNbfCUuvTYg-k4P9fgHS79Dxaie4eAqWHKkwMAjqi4LFOnA310x3UQ16ouIUh-FfhhXDmaaU161ttLCXX4pELCq75FFLeLBfOyXbXUPBC-yGIT39Z5SefichVWzjvLJlXYLXxsumuM9uNQV2LBBnyDRvctUzmQP1esQhaNCAaf7zk17czhXVGj9OmI79jO87IsgGFSfj94wO-wSsduGnSFcFWN8Y-17aviwDNt-wHYnvkatDWhUmKCMsh2wTh_G2oNWTlOi2ugTIIC54_FnTWXSXtKbt3AEzGftgU1ZHc7q5WOxNnoHFBtdVpn8KlUuEkzXc9q1aRYogndvSBUvUCHEpSL36uiPfvDJPnEyMaGhprKz9WFoADbna-xYYgbA6TYiSZYOrQa_BAqR1PvDmwLRTCoq_HWV918xztF18Pa2X85xJZxWG4DqvEkZVFeuswifxM9L2UWXC1wSFYq_zxpxfiRNLZBtZjuur3xVOMQihjRcS4SgZxw7wHvO02LR0tZcbkdjIaZ36RmVE4Mvs4yQ8ETYIaN_UmNfMuYFbgiimY3xGOBviIM2uO4bXVbJmeXB7xtKOq9P5kMZuMRsHZxhrAxuQBZx0Zb1Swfry0S28HUlqURYfcY6SPjav9VlaI0uXjIMuPkiGdlYXb6zBjXpZr1Di55d8qcHvYlzA_jjwDaeg_d4N4Mt6wvY0P0dTsKonqOgz_4Qz1ojEJfOepfbst-yCmGz5-gcQMken8rOwOPE9ihWVYh17_YcHszKhLoLnl7Gmro9qOCTiIFQ5bjKZx0hEi500mLjhC9JzvYuldi76aHbPH6xhRukfBGUqbiknrhYlLH6mark3s40Xj2MULEU3TShpEB94NcV7jj97GMvyJO1lktPofzwdsH0doU3gJtJrWDERse8KUlwQrlMNesH8btn4D2g_6iEwSpymMPHqMrEbfxxasz4MQpMAFCMVWImAcdVlpNK-1gTzSf0YGAv_r6XT_CFvhRxh8WxpBy_NrjweUbRPa3xV1pcp3rZCAC9Gqu9RVeZ-ROnbjzml2St-EE6TJrkWpTlp-QBNF7CWQvNNSnnhDJv9BGNBCw_eL9lfRLz2ZF_5qj8DwOIqKunjsoEHpXu82cvJ0MjSZwuBmYbb0uuWC64qCy6OiI-IN6Q41_bqy84yCBasx_iqx_ag9TA2ltsRzjCtW0LanSFjxoulWTd0p5aknsZGhSP7vISDQS7IVVmcVpOHxgu-NsQkU_-rd_l0WzoQvvEYyYlNHhgmawnprZbm5pXZvXhKTDQgykA8Uk4H64W_Wcson9bNAig0J9_Ae71S3p4n_HDhmfAufMwX7YJ6ne8IQrjx0SW2n3LwBNAweZuP-8aj5hEDBRbcoCwO3p9tEs1TqFwIks3jTyjN3IEBVyWIawBFg6Ug13N4P2rnPk7d8oSsnSTADlDzt0NhOe7nKpX6WlYq4RTe5W_SpO8LPu3PfN8O2BOpHt7f6rf96_HHTSSddiSW666Whleq8Glsci95ojwpUOFFMvddUOGUGB6aVh-38FlDWiW3yO9Njxo07kjENdSWSu22nGGQ0Zl3KjaOy5v4pUuuomUUSEbewSvqd530A7eNcWqzLWhhWu-oRIMDEArYU2GYSHnkxJWdE0GoDrSO5EVPsgUI7S__nuespQaVajgYsGIgQ7uKNtrMb0z49l7wrwf9Fs3JziOm91qsVdvkPZc914OHLLQ5eybCv3MTLowKkhcWV-sZHUf9-yC3Nrda0ask3Mfgx-n19PDkd8QDDrFvBT4t_TMXNfUdioidk2-1m8RxzEKryodl_tG_Ydpw8usLVAoFtBB1m-9w1Bzg4bUTDi1udDo2j7Z7B2FOpG76vYZ5CyYwe0r5gBzhH5Dwe7TMQdtOCQ6IK1CGsvAa06AdrpA83G-dHZ1gfOSB5VxQCsfGB_rLoPm2whlQ0ioupZ3OM8Qg1sAo-EHGupEC7UZqQxNJDSjWfzZ7jQ4aZm3nz2j8wvrWJZOB_rV1UEBJkR4nWqxUBcIsRHl8Puj-IQ3PkOy0AMKmNLRMc9YdxqfpUImK_mA8dA24-awTnuPzTeFEtuCCrJIUTHMPWcnLJomlnQrF380KyBKXVUQTAl3vEKydYMNoR1a3mtzY8rSlDkQ7fI0SBoJUPjsUnFYukoRJqvu_pHyWGLDMuZBQubTmq7QjvP9E7GeDUzu-E-CsMuFm0AUEzn0K2Bx3Pt3w-_JxUAbcrQngPqY58AxlTHD_1hJQX7bz05gCNikhcmMqUY8tWadYymhikdiHx4Ul97btCFa4kfZhTQW8kjKOGrBYLFIQ0WiKccXlYnSTzAEeNf9pVRmbYQdagyvThOoeACQ6BvOUM9skhAZwxSzKL1LliYZUEFhNOvUwZSmslTpqAMXHNQpNnE1QZOfZrCLvKOhv0McaNE0yzTJPYaeHzGqM8a1R7YGGVgsWaKbvw61ysHXfBwrntvY5jfzDrsCqfltO07KSb4flx9_4h0jiQGYsTFWd4uWrgcD41e9hGn7Rbfg_3nE9KmnDbzd_vJoZm_htHOqBK5GMVI4Xl9762MnEaNmJmQZNVd4O-46DrZS9I4lIoBtGuKTSV56vl62UE4BA2MqN63TUJWZVVF7bDdGDXET2KgGqb5ilUaYNjXZeKuOZmY9hJafjYfAPIrQao-YmS-iHnCVL2s7DOI_5XgrfXagBDX27E5eSo69AGr-8xhqGem2r9yGD43R6P_z2SZ0B4U-ilpJ7-JYybrrPTOxUB1ZxiQuddocRIm5n4XJPjyVFJeeUYS3_aT10Vf7QWtHXnogeexcWCv4DIzizE7U4EYOLH42Me7oRX_xCw9v-pFoeO_ps0mYRRSX1KOnn6EtoeH1UkbDwlmys3rmZa-ZS8WpKBOuQFDg_6-jmPX3nBzns1SdMqu-7hNt4EWMotfM5MTjosgu2UDv78lif_CxpDVvBGOEl68L8c6Y74be87XdfJAWcr1Na-owjAoTK7ia1tdGCKuwaXu1BReF3Q-Zq9BgD4f_K1KhYqIFy2ipZa9klYKvSkMKMxYnnHhvPNt3Q5l-lb1QRnR-zVbQBKhWJJHy7UQio4NBTkc3kMe8lZ7EBuXKLcHSUa5VN40D0i3PamhcmOjMUjwu1ov9kOe3CUigqn9HdcwfLojTNZW312VH7q-0K7jMFEuDHnn-rJQuhyguqbD0bqpckOCPYeCT7TNnNg5F_a7WNt7ESCfmlJpTGkP5b2bvPEcaA-Bk7bH691aIUWIxHFNxkC5lJCvfSXoUHfBLR5B7PCz0SLjW37CGWebvU3PfqIOkl9yIFojr0PUbuez8bYMVj8WGwt3CvNe11I2HT94VYeJP_xzENJsRTr6YxXVGz-qWNcXgbFIt3ANY8-wm8v1FQ9_xvla1g6mpsTWBt6bXWPntOq1jSSF8SaSEv4dOy-e6HqngZzer-wXOu8K9xJV54kFOcAoiMS1xNsvKgP_LoBTgxdlv9wFVwFmKg3E8sZlMXLF95AzUE4UrFXL6OY26qQBYBGLqfBaWim6cfP_CGkthP3hTjzAYqt2aDBF4mdmmL2IQiOFoXJGi0Q1W6EZQG-qyNoZcFB2z4iJ_xE1rzuQBXf8-7cjSi80S0ZCmwWBUtsKG0ycmp2f29wPptnPtrMdux_sjZ28UEcMnEfbf74YNQamWZ0s3gitGh23qwbCRT-C0CMNG1KgkwraSBjSF1EaiaHexcFdQ0ZE4TVXIT6GUzwmuwt6mfM0_6ok8z0ei2CSHyW2wbQP0GBGh5rq--zvoBW2K5NEOb4Cfgzygi-35B5lcYJ5Fh7UuJ5yKWLUOO4eLH25uHnbagSkvP5CNH2H_QmmcuMg10h1zhMSBI_eqwu43D5aNHQhox38cBblfwykv1cjn8cQdQ_u_e3el7f49AKWaTbZ6ogl-kS0M9jh72me_62rgblB6QZORH3_57UvWAGmnjjLaQ2bIKzxAIL-FmnSfYK1eJxWxBSK_14qQ-EtVk-DPROzIf_Zok70z8iaDAQHnFXArk9jHxfV7UiSkai5pKDkqZ4gNPAigJ80wqg9RZ7AwtHfJ1_vZBi5Qj0dFCKUNXl9CINobN9ZVI4tsAaI-caj0ikvCUN0FNmZFoJpfXcZMjdZlYWis1bi3HWIBw3FwBIMoZqA7-Yb3KzudsYSIjVzGzkc4Nh30bZbw6KXHflt1RMZ5K-g_v8_u39z1E-h2qJTB6ORj3px27lJ6UhwG_1uC1hMlPLJKNSvw9ogE0ZEbaMxf4EKw9Zhfk10KZ3xGtpSQxFVaokLbvSdCtD6MaWg_ARzhY4KO6xm6-NS8B4U4g8Sc4GBzi9ASJCOFkElr1niLSLIGq-8NVXYZY1Oxuv__yNPiJa9IVtXIQ5cjcpu1lK4rQQH29Eg-OPbbzDRVSOuYL7HH1IEu-imRvr2pqtzZWyyqq5aeBTLF0TGVM5KqgymKBlZPmppAN0TIZRbg0lXqpI0dYJAq5zLFpZIy17CO4dnbVq0toFQ2h25EiG8KhuqbAOb42QWeNjfcLxs41me15fhG9ZMn53FHekp1PYGzR3WPFaqKKU5mzAXQlaj2ap-AE7LxwRKlasDykPs6LOVWJFTiovq7uK1bxfRjKPUTvCWrPiY2ZcR1i70wnWTBz4Ex8Ag46pMJc87L7Ava2te0gMpg_gxaHxmT_uHB6aTNdTJ-ILmOAJQGYQHdSDDX35VM5FZYtxNoACHbkeQJi9LjaYxuV3hBc1NYTETYeF8VvSE3iGE02GFLI2hAC_UIR6TdmbbZiRcVMbv_iCLVIrDasrwmvoffo7eB9E9BaRb7Nj25REAlrHtUwcGDa3axndvMPgm6EWeV2_e4BU09WkaSSmUNMTfM7DPA6gkj6mwV3tqn6angDTuO8rHPuYwn553cN_R0C0CALyBSaadvIUrt_lHJiA_2nIoLi2hHs9A7Xl0S7al718eoahIbKirPPsGuPdUnZtKPU9r1uGk7IOz1R954Tt27mK0RYLUcQoK7a10tvhX-Ffeid9CXayRIUBR1_BXszlU9ypZjeaQMjA3B8dQxAbS1FocrFcxWkaUN9dHZBuPIuZlp6lulecoNBpu2otS7EBd9kLiENSjCixcu5doDBaZMc-nPTIA7b5ij6KF6CIZ0Kdkdb_fzJCzk6pm7hlM71RRINjLUFxcy5GSssy18Uq6O59A37xI3kdEf2layIwtl_wcMIh_YfdROVSK3ZjMIg4ttvyuHMlt9ZlKL0OzA_3bt0bIxinWrI5KF68i_V8VagZb4-bh8lSxiJWwVOC2ljQ6OZH4vS10sUZcpb57LGaF2TSCewF3gJFu4SQieTt5smci6MS3u78pIR7oFSP49ornE1QuAwGFoyU5VI0Yc0fn4mdbhTAxbrNTxruxrX0-uydMN8FkEz8PvGZajzU8tl81Ix-gcL1g-a6tgeNgo3bs6gn1HH6Y2-uA8TSwfS-MW1w-fBQaDU0eEWLC53-cLENkeyFw9YkcUSqq3AZhicL27jqKoaiv3crHfS2dRYG5GVhNuqlpBBflmrp-DqIztinu6v3QmZpIfPvmMirEt4N4ocBihW9O_SUE2UW0O3KATcRYvUcfjl8Jfl-_EIUsTbpC5L0Z4JJbjcnJePBIyzqQ68EscPWSCK14jrqxnQsj-gHOgk3qz68lT-LZ4LJl36ZXrs41OUr8hwWfiBJ4T0Rg5BxX_Qq5uGY8Qmu-sujnkHzDcQczmjwqnUH7_eSNCmEZumJoZwI_Hoph1Tss_yb2Dt0N8k0oiuaalp9tT2OrZPrVHMghGJ4_qaSW7pneXCOeLMr1C9ImqD2SloCzHYG0CnoD1qhxLlTut3CbkFpN56-8B3YdVY3yF8Wt9AM63EpuD0J8866m2Oi2Ovrg4ECxqsgvoQK6wSm6ICOfZ5Jf42geZGrugY9fUVHMdCiZmRBcwZW_KE9uJlQBDQKD4FbHJkGHmekPF8dQbAkQejlowhVFYaB3zelIRoJ-P29qm5D4od3DW_0ovDu3FjqJZ1tfEnve_S23wOsV_hFaxi77tRkj2CRbRimAdaqoHsaXMQcNijSRR82XthSyPRfXYtFKuYgcQa80Jtg4YUZE9bMuJSVTgPdVdxP1RBFNW1O-tGrDSYY8DJsdWuXxf3Wl9JtcF_feNqLyB0tQ3D_UBw8IBBzmHmMnfCgHHee7aqSUL91Wtcb_Wc-K9v3RkKj4IrjrLZP0rSYNcEg25-oTVBfrpnHvgfCgmSTqYzDSqv_Ic5qcURWBkbx35ExGeNP4oRijw-eXWEO-m_Fy_op35yIrON1-iJ1UKH7PeIzV66AA0tVEIHDdOLn1jFWn1j7q_Y4r_FflVwcCbt67A49Ep5A3FigO6Hob8Ej03Z7kX2XQf7YueXuG3FZmzYkQooHFL-ZGK1mFJV2ptj1qMzs0kHyNq6QSxQsIjv14iNv3wHKZRecDEQpqbgJa7uz67eSeAKX1zCcoFbAEOyKkHk8US6Xwb5CgEcrMqVa36wQ87cMgl58IE3vd_jFHFDxTqNHJikFukuu0LMrUqBf8Mgsc71jMhfSe9SLbUoAx-GtmFbuSqm__dP8Dtgvrypn301uFCU9txu8vi-V5dA5YO2LQ7Gp0AU-Jiy8Z8u1X73HS73x3AgJjudoAlfVOUM9uIskpLjXnpCDBFiUIeH-PueWo3ukKNvvGyR-kx9S57gXxWZbT9wykX4Jns3aEXhwfXbhzCrB8UWO2-XYgVwICagHz9IxjZeXPdQ7LfS9Yn0oqGP3fMbnUWt05iraAqfTR1o2wvEmZU9sKBHmR6CVe5EuNU5dH9fQgBu1are2CD-X5xh4dqFqWNtP467FRnChLXPuWgpq8UEDEHR2R-QvPjhZwcOYreyKrhELXvtBw0ZtcnKCyVnb9h9waRSS5nRIhELeThm1UMauqJJYz4uq_3mzKH-l23alzZa3qaJKzvAAzneNFzHjMgD28A4tFNOrcmgtJCCXY_QULUjcZA9uMi4If5A4vH16KfwTAEsWbM3dHfYq8wS6miOu4onDtMQm_k20yUtyP_mQ7IqQtvxj-ZPmw3ZbPqbBfuW_B1TAoKmE6LXoTTCGTz_z5Z5EURJGAm6HDXCeJSZw5jnL5P6Fn5ByQPc4o-AKaEGuuJBBCYF8PstfZW_vNkDuhfz9P-vpWYWYFMdXxOnehVr8d6PHhcLsh4E0hc7YiMHz91NTXA-kz6vQNKXZwjjsqdSsophxCdJaWJvO87p3k6VQ2HfOm3RKIOh9pnVwl-TsSlsaB9eX3qyS35WR8KcH7kGw5dJzYymZ87BLiCXCnRjSisLfe0fm7Q249U0OGDmGPZuk5zGV738Tzzli7SqLoqS9q6jC2ZRwqvZHj6uHsu8xrPNljw0tOlBOkJoKmEHasus-VdQQw-tcToZk5Be-uZvAqTWNerKHkZy7xKabml8ZYbrvEeBaKTcCan9LFMcsbJZxwFU5RoPMQbLD2ZXI_mFFwKIo-rpfCi1yOmJYzZWMhwr9T83aRASnv0mfl6TshTRuPyu18rivYYdeU8bu99gf7PiMpNYcYXioTB-acdGA4kHEaCz02IFtZ4BA_Z4blXCcCELTnAS2WmcA4y6peLp8PrsWDlVyms03e2F0g7X8TbAhf2DI5xZthZl7aAuIBi6-W6ZjhLPGg6vPCWGSmTaJdpiABsDMGLP2csrPAjnMD8iEg4f31pv2_VN_Y7c2YtgiOkZqIlt5ih-ZUdm08ip4CFVyRLjLwQlKml2G4V3JYZyTm8ZKxkStCI7pAa0UuvqiUsagAoHY0pX909Xy3drgzsid39jW50-BqalAcknfkWPKz4XD5r0QbBMv8nWwmOoqWhx4qfAaxRSz4etHTygokM0FYN6VgiHKKzP957m2Xh2K0aunTs7IxliCzKC65Aj1gUORgWgW7d5Q0nC0v_pZ0ypYiINVkzN2-EoOhvRATCP9Gu5VL7k1-gyJBYKpgPMFQFsauHmQLCcweZ3idT-TIB0Wd9aj-LSJbdO6dgTe52Fc1tsgnWLccaKT-9x4Jgg4ftcWnU9_OHKGmfabDfx4OIvYqgNsyB8DI3r_cSF6I-1_XOpgouSamuNa-4RBPsE6yEv8C1cwsSpihQBDMBqh4ZegdY044C3Kv2mtvrmhGbcEtCH-qo7BbN41NrCV1FrzhmTCyiuQkr5xU_AUiW3YQpVBOaUKYrFhXnS85QMfmTwoMczvRdSuwveWqxhf36cubBm2uVQ90zBCcAklwxxV8Wh5Xny-TIXvz68F2Tydm2P5U18Rv7eety5ataq9Bp_ydl5982HOcxn8YLxrZAHigXUB1zfTghz_jB9TdMs6z6_wwPux0uA12MxZ3C5em7tllyyFmxOa4VK5FIMz12fdEsoPT4O8q9XYc_1hK6WlREZwcIQpOoxh7ijv1txktLpxIws0GUDlLrgxz6LR2cTB3DTvGnaym5Ih5sGbtx8UCpttSt0ERD0ow_awOt6H92P4bojKINHW-A6Wz_hsW1hrPaFjRzAYPbCi3G487UyZhiO_ZgxcouvNx3MGqaqVbmmJwbtTca1z-3IxPr5xPWIxEZtHyavRShgQa-AQMh6oSWFMRbVN2pPZbQKo_nRI3y5XQpxGTAAWNF1sg-G38JoQbIpq_6H9TRO680ROtnTKkEEyca084w7ql7Z9yXClFf4fFO6oAbyowKWI0mRW8UwO00I-bO-Y7JUKIqqaXa3LHKFUF6q2ZDv6Qf-d6MZIneg5JK1GBYuhytDBBoeKPV7FWROqQ75FiXwexgWYeNSXHCyP__Oc_zLmz9JSxKQlfkRgBX7uKQI2lDMTDJq3cBLpngK156yTv_2cMd10AEG1f3ADIHXjm-o1uMrMbCN9kQL_oM5XMUYUSpQv66vMheUglk7C-4gK_C65xmOMrOq_uq3yBV1C3UuI2wtyGR-idFsp_58mdCgj7jbJdQ6txiMcBDNOgYWS3C8qF4gTktC8jZZI3gN16s1ywaa6_NWneQPOUqQsxAyMbnf-SBCrxGuWdmMCJwH1WxorA1XWxZr-bsRYUOLcBTSi3HHfV2nk5VlrPB1iHNH4GXdNNEce8sMMtso0HsvG291zjr-MugxSByPbahh4ybqUozMaD5uX_2ulirXx92l1EMX-k3lCVaUW5i9uWd2OhVM3vcOHoU4d6w_BFoY8W7g95vBcnz_f6CjA7zoHPyJ_55KrKyscHrFpSjS-Q9fNWrd7NKgbAhJIhxztBwi7vzjBckkuTZas9lrnagC-u3mR7-T4fKsTG3Ryq3AF5VQnPvaDTCstorF__ufMbQ-5ZksB1ZFnbpA29HL5juNpuitoFtzh6hYsW8o7CQWmaeRp2H_JRDNI72HqBDJT1tbD-J4mzjJl5rRYdGGRUuZm_zsSimDIfsZ8X671u78Lk3ZJIs7eLol81h1oVanBNL3PslnI5hvh65zaElfQXJyBzjECXun_GtKfbYi_7E2m9jcpry5jCtT1wbPYpxomU7Fm8h9l1cwthLVc3b_6s5haiEwQR0ZZQqfXOSulatd-zmgrO4psN5L5PlUh16978npiQA6AzdpoQMXzv3iWbNDed1MpvPb_jsPoHF1-0NZrZeCAQgl1qunFb2NZlPBiQ7vCqt6jX0cLf0qaK1GwLJGDjIyG4-R_ZqgTzRjn-Z_ODEIzJivPVJj0QHuRY1IIQ_6N9IWjYXiowb8VC45DbECUoUYwWD5dZdWL7oGPwVHt6SMcul9OaygAFwKilDqQPe2WezvYhVPWHZytL84SEZMMkcDS38bKX7m2iCRt4n8mm8OqroUNYfUT6NmNjbqhRJZ5CMMS_HCb6d6qiSl_j7ctNh8ZnaVVSHIAj-Ccd8Ih1xEceloExmEkikLatqrx-4Zt1DkAYwADJc3R1xxNhZVoHLZ3j7sPSLLbGRh6EtvrhXbqSHI5WwH2m6yVq-1HbRM0eGz4jN78YNZt-HqfXB4dOfuYP6LZufUOzVev4U-eBLzkocAKd_RU0H0KuG69CXcyWE01EJE20pI3f11y9MzbvsRZPJeP_aQCzuGY549v0LzVkBG29mu0RMRT6D9eAW_LTfw7p1CmcfZtGTLk9PHk--1roZ0RdbyXxmRz-3f41XKXCLi6HsQO6uG_bQDXMISMQDqC7HZBllJD2sBJNC-xPltgXJc8nuu9_mfc4-VIr2wz0DPERWOa7ct3Oxvic9b7CJlBoEjAjjcgZh8GTPi4HB9Pdkax3mQSVvIUZ0mSTNTPeIvsROeVoyTP9SxIaoy5T5wadnxeISKu5X0V_ScRipufsVyUPP0jLOwZZebSEOg_RR1M_zuA81Xti2DpEkonfGaKFPSU5FLMMODecYpqAY_xY_pRS5gl4asXWvEFJefgYES2hZI_A7y6clpBhII1NB0f91wjdqgN-NquRxPz7mFyquEVYwIztd6ukuKkkzzHiBN0u7a5ruGiGjBJgDSwpZJnuoRD6tFl6PbxbuaXqvNK6v-YinlFUe2tuNtqNCzo68AZ4UOh5a2f5YAWaucMwLwrfVdtdv6X2_A7ye0g_r4uSq5-lugktA9d5KryjT73vppX8qO7-zdLS63N0nrLFDiiudny9MjxoHIJ3d-vOZ9yhX5soRWfSP0XdNkBjGDoMb2EH_KVZv-DeHhr3TOOt35NBdbuSLFcdL5NQ5BXpfar26YGCyGje8H4ctl-rqfQc5H0o3Yf_hc9bshVPOZ1epUr2Tqkol9RmonSD0nkPK5Uqkg70sJUHE4kGQs6J5vCn3cqsazdNgLpDD-IjgRboLffMSe6asoLkzYJ30xT5a5OXJSVSB5A5BaVBa9BKcxP3whWtDvXwvaRVXSsA5PFZ2R2C2VSTYcuyaGfhHkHp6NR-d3Bed0o9ZWwjCFnvDchrgvhO0EdGiBADw4SF7uLBmmRId3jGg8BVYINEsbwvbLhDWhOyDFlQQEmVFx_M9FENZhCT1icRgNYJHfoorPV9H9eszTj-tiwKUa7Qzf3vA5X6gzF1fQtg72J6mOL0vkNT9Cfqh9J-8q7Ga45cJgN59jGLbgvWN2hZhM28dshlnbWMbeGnEsKpwJhFqz8auWpQhKqNYsi5rEbYIz7g7_m0wbigYGk9VLoBVw3j_ouMa2sCrk4zOxEBdx49LpCmbkjpkCxIPt2_Rs9QKZdBULWOQ-40vz8jYC5yQdjUpsx4oQUn1dL8XGl_9Osq8zlYgpXljqUbXwJId2w9fT0ecyVvDj9iZWo8Eb6NA5iE4Pua7KYUImBqDU9oRM8Ve2JHbXREnMEE9p02MfRBT0mW1xjwH2XyaEv0CD-PJlI8kvoJQSoj5sC3QVQx5su5YBe0vfe9A2JOPZdzXsH-ijYmvk8ku2T8eCC7Kaea25RZVuOxYEewvb3e2iJiPtrXbZ3RZaO45W8LvhfD1oqOaifwqU6dIVKa4TTfPvfpSDphlschskAxnzCR1vqFbZBP3ZkR-6nwJ5j_eXJH_m8KPqocUnyUS_jF0QZetCt8eB0FHhXTudAEh4HP70Lbe-1uR_chTTPNx4uXope67V_Wkh3Ij2Oe1acxpEVK0hoO8ohq4HAg46_7tdAWaBeJWjQRHLP9ccRbeK07RAQf5fDuXCnjGjkix1WrUk8inL_I01OMkvY_Gc7XSVERk4T5QUgoxWQNNu-oiYRZ9GCG8YJY_me2d6IICCrVnaeorpmnQvkljLHA6wXsvFcXX-N1Yum57C8zmXEO-Xir9h3Wyt5rAFQYxlG1nEgRvQr29BzdOeFs3T82eitWyrtmM9UoLTGraW8nuiZsf4Ov-98C4DQjQmIquSthFcl2dn3pa1sLj5T4OrOzeTthRvW9ti2zaSNqmihjcEsarHu9UQKg0PPdIaiEp73F5SIqRzzwqenzHonFFjwwNZm7ZfkYDigkZ_EqloDTLSZyc892la42W7tnZHzLjRbpIQb1EBYPsezAat1lI8hHEJKCDiGyF3mP9bI2vKJ3YXi-THpYWP9qAfRpp4dmzz4UMw-QX6-K4gRsydLljnSlZ0n3LTYyUpQw_5Wp7xRenfPfBXmdd0oNaQBpFBok8jVX7dqbkGAqp7ekLCI0cYUOn3UXQGsxRwd6EAcX_VWgBHTCNs3UM4lQno4G-GISagd6jdRKxtngYv0zUrc_Q8q2Nj9mpFNUmjiOyfqxJEsqZZdgsErGDFx8aohgwbAv5oITAaXTyENJL526wEK-NZhsNMwEFnYhomv85MyvqKWBjpWGcuI_4rAGapzymyuiHWwV3PxmmQebv8BtmEdiMPg3FvYqT7eDDHWQMs2OH-f7smNSzgjjmqBB6O4Qdb2V7O7P1Nud9PlSWWNqX0Fs9tN5UnNRlGvXdYWXoESvh-ROgx0Y-ratFzYkL_iUI4KdmT57NysX5lRj4boQ-gyj6JFZAyu5O-yQB4tMkED9O1aG2Fml4fQMY-9J4tJRDlac_vQuVXCKkGHVUpPPDKMq5m6fgzoMX0TswRzzvE5zAk-yyY0oJ4ke-pTNnpMdnIIa28vNZnB53_XnI8nNFilvz34Bq-BSVmvi4cMyF2ZhsoK_SeFbx3SOyx7LUKI6DzVnriJBhdAJjxhx1sCyVdnwtj2AsRbQ-qVcl2B4e64UHqbjZBDwGD_kipexacqWi7CHwLV-hM_6j4xdLt6Bt2VkD0cmgq9_-REM3_5M4LHuSx8ck1TMmMZLgc7AUsybP4jujjow3fUix4f0BUpmAzQEiN1uvV4184RVxTaIj2eBBkjMaMlLl7XG4PxQCgzq8gsjEKSbcO-iLysjr0mSFQQURIB20Fxctw39JwwCWVWEXH4I8zRzps5o4JpMYfrZlOViBsL3ntWx9pGYpy0ImsbG3pnN_mNWwX6G7pIia2OrUfwlHDiz7F82Xr04cZk6sRH22BbRDhHzu_HYdly1256BMge76V0xAgtnec6Maaf2OwhfYKDVX2U8mh4-F7ZrSPrT9xWvS_RH082m_0ntTDnsk0wiN0ito-bmMTtObur84H1CokxJLoxgdrbz3uGBqWQxizaZ35jY_nwpaXvatTooz8O4zuPI6v5jcapdnFUUz-WNw3SpH5aV7rWES81NvDDFrnEv4MLNVpuhePI6dk4XcktDJb-xdEJznHS18oQyYOkwN9d7cNRxS5L2myRR-rWTBihcEpwALeZ56E82TSSiKTZDD_UqUfYNPDWt8bjilg_ekJuaFkzv_WR8jwK02nBQtHK-4bdCVLU87KZYh3Vl_8E7gLH95QRfUqS9JlKyiVQFvGW4DxDaxddBWqKct8hiqL72yo2H0zyFe_etbdNi-ZzDj5rUwNdeAOwoNJT1kk9Z6JktawIe4N7QSNNBrChkkUrqrA86kDqmIeARRYPRRrPSu3eDzOtSNyc4-AUyS8tdYQb9RmHYIxGesVWzkSAD4SH1BvXVFLzQxk5zQyx_KPKa9SL9KPSKzNv1UhR6NUYwn0bVg5AahSD7du-zzlwvHhyMJviovjGIJXuZBwe5rfkl-iSuo_87Dr6UKtxd7HYTYwwC8crzuOWmq75-3d3_i51M3PZ_xQdNR4cWzn3yi6bsp1wjfRi8T0p3NHCKUO_zMQ4rl_9w9WlMR_2-CyuxDxg1b_WRAlU1oab3E8iKXPdWlypLTFSdgX9IW_7WPX82olvCEbvZFEejxbDHK4FiKsc5JV3dknvQni-MNh1LWx2crXDkjeRzRNaJVJiOUUIVq4psGf6BJdAkhmeHL_MxT7vNe33idP7Fh28_vYvjYeundQp7O55oqPzGNXk-EOSENxyRmFEw6LwuRmI7ouRAEngL5kRCZpjHBtbTwsnA2CMrKfJRz6sIIMX2ZuTSo2_AXt8Xxw5f76MkwBk4wLqjg6LbztTTQy_6X6t16jGxyWRLd3D66wfmt8gBY5uoj5WWNkgCEM-mtwYlJ0j6Ery9-Qyo9zI9Lawnw5ZBOZvgDj59cIzqgO477B0MhSLOTsnq5CgYc3EoJlxJcTbxRyFkpbr9h9dnwu5lO7oUFcU8uNaNm0dOeZ3HZWv-x73NpFxOJwy_k1gXemGnxd1IVaJuP5WPQJ-t7ndDIvxdBL8tlUmVOjQQk61810QSzuO65lpiwVzARx8AiNiijoImX-xmtkJuvJjTMtNViRTKIPuvcHhGPb0bcYhA2mhNVntDlASUt4NGkYopv6XD3K7Y98eYxpUH5-z8UQT-xgui94Fm5Hbvf02B0g1BzKG8SV273NzepBy_SqPVVuCYxKryfBEOBksrunGqP7ePF3Bvzd5xKcHm9Rsd0Lq7rgfBv5Uu4urCLQpajApktd61iT7rQxi9BHmhIUbZ4Lm2mKD5PoA_jmivEzFtaBsVczjobB3NCgXDVqpZZPZAxZ2YDlWJvmMKUdKi2TObzR9ZAim0795UTHwoAz9T-gkI1OEN7sxNFBWrkhY9CalXE-csG3rBHtA2NeBvbrIuYSZW17YvFl4kutO5BM70hNzD0COTYB3MhWmHagWw5Y9nHIWjBZwW0POGonCJt2i3brjvkrsmqXRXl7Vdh-RUizfIjnPuTKDitT1PROKlvuo2f4U__dDBTl7LMPfpCyHAGbY48IliiCdLTwA0H8rmBji6-jS0Yl0yNPR3x6eA-9ksoKdxifZRqSiIVMvgiQ-aVB1lbW3gtWLx5PmGVSWiRguBfLo6Zu-taq0F7kKWOtaEFuF5MPnuqfTC98Mk9D53VFXX0RTXWNKpX2GQEg__Fd9w2yT6xth6yVwt5ZnSTv-_6StsijodNXerIDiyxcI6Fm9ATFxmzeMb_EBW3NwDN-qq6RKxbREnF4-XnqCYcvQl35WpCFY5l991m8j5bmK-JPF7Wiimqvaek9gqFTSJ966ucnuBeTgHGN2RFipd4_SMJQvdO6Gs736C3fROeosDCVU-b4_Wx2nLZK3NyHzQWW9dEJwtc0k9M-QkKwnL-ClKtg7-V78r1N7CIL5AXTQjvC4k2_dbE9PHnS9S9exM3gYGP1ujEQsmlyjm47N-Cw2S9vVbLpwvcj5VhksjPRNnSQ7csDwDo_q_LxwzQd9qYg-_ag0CfMmiguZhjxNrYTh2lvZRef8I0NDFZhGc1EwzyNcr-qSY7er-r1H5c2za7fZj03NaoxBA2xOxI_yCQ1sKtk1hlTd4Liu6zk4SErbj4iqntNYSczthSO5VoHj_RJhR7lQ51thHn9tUpYFQ2KCktMJfLypzIS8ESiSLDABhOA_6NlBmc9onT6lWGKRX0Pgn1Wmh_RZTZe9vxJ0J0jDyuDirLb_Mwa4yGh_4FdVWF98d8TL_NnjXr6gEsjYO8rEqPfKhPpFvzlVhtW4aROrHa2KF1e5epw-XsLl-Su9zqGzyilRvFf7veIhI5FFgBN7rCIRlgWxvL0aXKrULVAawbiODkld6E-9C1h7MP-xa9D0pYQ3ldt2AycNC-sl0YV0b3Hdqsb1bBMuhh_AgJDuHW_2Xuny-v9sHfj_5evrqjzETYD2WEnzC3-Ff7ateQztUKYDVtdsGZj0RX5e0aqpVLUp9CBy1ItYyrFSYY0FFFgAg4xA44bYCIHcbf6buxZmNsKv8pbOXD-mcZkwhahuWR-0Mp_O1chjXZgO5r_iEz5iZhrixS99tvcJDxHfmX9DESemCCqdOdEK88SsDotJxRXJa6JSCadTBEG-zlbzTm7rH0N5YqEGY0KflZIBbATK2qVG9HlINTYjXIIrQbAlqGt6ZZYHnSPg9qm1diwGkRc7KJ_njjbNn0DiLQmyeq-CCUs1a9A6XdNjl2uDkHb2_A3HrLjZmkP_l3x7GFfccSEaKqORLyjScqFcDf1PwRD0P-lCn7EEmahVMf1WaY7AjLuGDgIybt9UZF5Wu-_uF09NbjImNigI0f1b5o6umhhbda0m_Fb4hYsMRGflqUwkus6xWozFctF9S1VCOTSvQ2UkEWgykFuBDzl4x2-pzwUCo0rW1t6ePqxPYaoW2GymTLorv8YYULfseVTwj81ng64qyU6XYWNZYI_xyYVlfjkpGTGNcEuOKNqZUnRH49KfX5RvPcxMJFaFWgLQRvdKx-QSSiNfsvgLZ13jaSdUDX1JJbtFy8_QFmM2sbhhFggjRq8n-c4V-NORGT036ye0sBuK352sWHTJykFA-fTw5iCgCL4AtBVosl07qKh4i1FhYdW7IFrZdX7FWr4oLMwNb5azViihZXbH1cE7v5HtZT-22dabeJbVELIzS6A-j0Ux5wCDYfaWgzqfDWZAh76tyi4WZ6tOi5UHW-MA6OU4BT4E7ws9WY1GjmVjkMYJM1r3F0h6JUvrPZBdFlHQUAEj3HhMjkTq6BoUbXZ33_Qum_hOONrr2KW8W2LElFd4Sxprp65f96cbp9TgPxyyArd4EHrQAxB7Sbe-0g1Wrg91GOFfjanG3DaNHu93kqije5b-J_POVYl2Ku-QjlV2ZmO9MBLvYu41UmcGgUJdBmh0-nUEpAdbzqkBRMCMtGh-ZBkkYpaRXFsar-ubdi9-7U4hRIb4uL912zfl5Tamu8tzNSGUh26Ek3QgUz3YA6f_ScntXlD94ngxCcD6CAm6H6Ubq8hM9vNpJarGdURXnj8P7-s8kioT5hWrP1RZ95CQrUuuKUjk4fYHiVM_hX7XEynjh639xHhiXgmRPT8V0QhoYEbNpQ5I9dtlnzVTro4qF9n82KNdfaJid3RQxwbxeJwMGfjz0WSQTzZpbwstWmhiot88TVWdkhLNY6mHHrnh9QcVNet4pM4qlYB6wODMU4dxrBqkQ2nN9pKf334wGCjEyEs1B1rlEmFEHzHgXOr3Bmft4YszCuXwVOucl6V8tSV-oeiVIX7fPNaf1OhvIRINB1WgAi0kQgX384xGm8xrt2QD5GDZQ_y1RSZ78K3WPSzGawVW3A-EfCCtUCT9MoIhadUkTkZrNnvluIGsD5-8aItn0gPhhkC0Uj_4h_24GpvxW8KXf1Vj4krzWvWGL3-HnN6oyPnKRNk8h5yMK0pMrDtBeO4TrVG4ILg-Smhu1ZuBx1ONg49WECFwkOvt0KFy3cHN4QPKRpHuioituY9iyXkutCiDOVj59deElKJfzBWKq7UWXeVlSGJUHNIFDaA5LAxCSmVPMNsLiTGmFChvYGzeIbQeDc4rdJhdSBvq3MH1ITkip3kT-nXQxqVFMHvXQF3KpkB6pnYJAUAJics-FfiEBWuNaqN2e9q0seuWPgx7NInoMIPbYJyjdkJb8qrbNLIa3NItPjoXnJk5IDBoUKG4l_zf2lPu41EsKcU_wCL34zu7vVdMCoRjWTl3IjOoVz8LcUG8ln78A1DOH4hsCMND-83k3Gi_s4yWgM8sSlVNAwF4opVoqNDlklZ1he_iVZubgRmdVnPLHFkfNU48hTV97IqDikKvfJgOlKP4QB8-_N4E5Pp9qV9BJD3qBd27EYsZBxHvS3BnLoEfYe_qAmmWwMbT39Sp1WXFc9K3imHS6u7BW9XShGdCnJaYYleQs3kI6XQXS_41YpGz5EZEDPyQxYZb947dKvoBxSQpvg-Z_si3nR8T4CjuAw9l42wfSfd1iFBCcrjXfV38MSo1RjG-o8fjkZ_k_4Z-VfYIyZcQcDO23oNsBI_29FKmcX_JBOMciSWalMrPZWv5aTNnfzdUuYt24sI5AQKCt0QDgN29nJaQwKYsw1kE_FlSgZq8JsqW8bcoAmSCSjn5E94SFMk7is2pz4j863YpweYwSUMKgckB23-ZEIbarfxhXO90QAj-3dXqYTc_mIOcVqysU9-_hcA5oq7swfuCOSvRzRsCmsRA13q87om8Cihw7nUoiZgRHFdJsnTr8dlGvHxYjxwHhITbAO__-K55BsFhIt1uos4g29D1V85Pg7W39_wtN9-pBuKsbbNuGMMCPwVs0MNQoHo8msfUtTOyuHNuQbOwl5iJMrgfnc4dXdXUAw0okA7Q8APfkq52p2BhBpxrIaQ9pVb_xG_wen7tcyiXT9AaainJxnutAzs_utPcygS9ewHWWluTbH07YyGHfM0xPMP9z9VvaDSGYCLNoScvPi8mP0KMCWa4Eg8D_tcGRGgJuAlTFrPO6WB7Cisa1RpSx__2r1dlHLzQjWLd8y36F-PwipwQFDOF8YXzFjJUadTu0gWQ6hCZY2rsyPQPUJ_EtRA1KiTUW3gdlXbAt-r6-7Iw7zPOUOBGH_q-8Ma-yUivqJieNfA5MgIYc5e8l35hC6WyYIfZ995IAeBUMXLN0iQ85yxYupwrKu3Ci4nrSyuZhrBI6Eykvvcuqw_6tufHcK6gqGxw3SbGS4za1fS9nEaGDvAu_ohNQdumENR6zq7IrV9Ypa0xsqghf0OeX3stQtgl4-LZnRaxwYLwuToEUqTFsqAckc_Epyj5Z3uCUMOMLg6fpLkT4Fyki0ILKusUUGb6lBdriwDnNNYR5p8smBUpoFsTfBvht016ixtgk5UMkaDmt9Ldn7Cd3uLy5QVp31w3nu9MG-wTJUvB8fd8CMkqGf6dgMgb42U91TvQ2u1IHz_lwbOdl1pp8euyAQfOL3eLZJqhwNF0SkLQUs7IX4zyDRrHC8TYHGE-sG2lW5hYrwVVDZfpajvs14Fgi5FCy8tu3J2EnbVdJehgZVy6tJ59PXnrqky3Be2zOKvjyzASeABPr61f24GY9u0-BYJsx2M0o2Y3DGDvbnhQklsdQ4eEY9M-n0An4C7JbVFYzYObdjy3u4Yzgc8Lp4ZmDDsP30KmWz_ItuFr7EVHiR-l4S6V09woAlZDfRUTN60Yyebrys1XP-TVeUzAClfPk6143goDYmrp4ZAYeYEV7FAYYPQcUUgddZBMQJY00rvbSj6zRKRjGkJiX85VfMjEIZYUX2x1MFM7OO0IYGQ8KCpFI0g9oT_pFMJXhlLwBUbXedzJFK8-nuuYIm56sc6mmdyWVWv41MM9nY9GGbL6T1XFYqt_mDnIwTnB_Sevhzh0RTo435VU1dJ0C_TqF5ggDU3R3wEDvnEtfVM4LyUrn7FEqtrng-4mSaPBPkOuw8653y0awi-vc0RmraJFKjHJOwinJ8w9tndokqt4Bm0VeJj3vquXMRiYFEW-ucUVjXJbtJnLujZWEu8dv3hBrREbNZc0FnG3kk-_jjpBY6j5gy3JmhlPD7q-gCFuHhgPmCQ7rPSWKeyIvP0VbfxRW6lePx9VjjQxdjeUuxPPlqphAHAP7EzrIixbseJjdKOUB2ZsN6s-Pvh-lObR8Dy9fHfZph6DRCwvAQWPC1wWaIXzik0Nf0Krj_XfNbwscUqtJOPU1TEYfl3WBOXq4SWHCByb-ZKrhi-ouJytqhd7Em2VDNZJPm8XkgPABNUUdUaJP26SWHg1LY9ygIslC2GsxBgFRzciVQHZNxsEPDfESPOITzyTtp3VMgOaNYgWRhzMIiw-tMY0AjO92JqzzvME_rRdRDqAsX9CeY0MXBl_nmDJn5dQ9f7gnnVzt_XZsbsxySZbxCoeNWiZIv-UYmHlLzB2bWsPwTlK6Xm4qDgbQgHFuHEV1Z3LRfqCYWQfDmKqVzssZyoihwnqOIjwuDln0JuAkk1yXepqxR2_2SbP0tFIXOqtX-gPxztpkV3HFeNSSXVPbx-oTPQjMbyqZXCmNBVjU-GbInhwnP2cg1puYdHfMMOtyJyUfqsmUGK2C9JvohhRR7FjL7yMAC0yfcuYf4vyubLm_VM5XmyLGFXXxlAKGtP6gzKT2LVbQAWUsdxa4dNkM1OVzPQN2jwu3CeolaXfzKG7yyOcAsiNrNXK6ZInayNRFGOsJsM9XTHx1msYVY7a-87AI6bDiKdYuWegQWCi9Q5BjJOuk5R2y6RS2Al6yfqRHK_1trkas7Zj-sF-KKVTf68A7kiHBzq8aLUBkexo17N1sv4IPZ7shHocP1lMljrBHWM0Z-YhAK_g0y2aJ_5coAdc-4_l0_P41kSFNPC2DGJVQoHp5gx1B60PhClXZ3VouqFSJHm4NO2jMwq7UJiG-QFNBhFIgoXoS5eVeYxwbrP6_FnFhlpNCt57lSdAQbG7CrTrxpe6prC5nzrgFVMySuECDwJJ-5bAKQ1JlPlnL6BBB7chXMQ5L1v0lw7l-cJpnf8akuIseMRBtkkAyjbfXV63JKWioy_4vgtNodx8e21edNgdZSk53Yvk8MNj0vjfTqUHU0f4yGUZIvnVZ5fVo_4ADvR9eub9O49Fq8l7bXDVmcKzCt1obgxRN63J0IMCyxXpS55HB-Zu0s-HSJc-vw4iE2HYSJVCheciiMfG7BE77acjXdwhQIUpaapKvfDYdK2yOoUZBsZEn044WueG0YVNh5HbXQz4vedcBdKUjgFmXHn4AAG_oTkkTlHrPh2jfH83DoTvNI2-HuNHkWADbwqNQdYI0xE9zRWx_mYzCD3KdJbDM5I9fdGpilwBunV255MrBwbge4faor9_HRO6WYZX3e993MaajusT3G6ZrhMBVYwXQqk_0w9yx62cLSn0skshnQRUVpSzBhaHnSdMrjbbshisa5Nt26TPyx4ytSLt3pOsYNq-dWzb4fu2FhW3E9QGgWd1rO6TB28zN_YayqCohGBf7YRUo_kWQWsgPHwlGGoInH5qcMSiCwE3ppoo4R-ixlkpVJJXHSYpGKoX1LycD5aLrAn6k9AieJr5u4TWWFxEB5CMkooLIDRD_2XLQvk6eWnaiqR8rG3ZwV-y1PEGqaMkbu0M_YUy6drPoFJD7wr_fXAZ0-F-zL6tBORbCVCBWIIfa6bK_TLTxbEZHqmRBKl8s8Q9UsEzqvIr1LNOYgQ7eVO3r2D4LEbDeBmgwGvUlwDeBqJWX-BrTHjGC2AolzYvEABP-PSQzV8gTlRnY8Y5eCGAoOhXUnW7qwRvIRnAUuuWX9EJJ6kC3ovydyAagEnx7BdHq2Jz5HvPYqfKStWLiieaEzkSnzMjGXyzab8T2XofVo7RDcxC0hCi25pPyvdhVjuzpsNZj_g1ApgOtlDVyDj6VxVyLrROv7xgHLePf2YJCl6JYwn8zkgslGvjpit2POHVqUOL_5Zmo92yg8HWafA3rrXt3yU79b_fweUNCe_VdYR2-u9Rwv6K2CIP7mYmn1fwVk8DfptfNlh6BbAGRlRQzO5-Wz6AtRaK_bplazeLW4gnbNIkZApF9s5ko_nQpGODypA5bKYwyek0iNxVYuDQqqUogDaw4gKKk6VON8VNq2Ewpg0orFmzJ9eCXSRdgoFnuOh3RGP_p2gi7Vcnxwabm9TwkSZFGM35p3BtWhUbBDHBqTrGKVHVJNDIgQNwmUoy7B_V1lPLhGJfFQ1Wh54tgteQmea4qS7ComLBHOlEtzrysRUwDCUyv5YYni1movzHb7VnnfA9Pzc1R2QtaS2DuoyB3Tbn478XyTNiF2SZU2mK_q3p6mA2KNMTGgWy1Iu4goYYXAODK7e7iqXf26_L8fhqCuHq_IyJL9Ongpm5k5TtypuWRGezwHUzj0_63tpPPc5oRVsTH74syoUxdqfZ0MfvagEz3cSpa5WM2hAo66IYAy2Ma-FaCoWBZrzIQcHzuthZWG5c0WVxG9tzKW8i1uOCc9WAFuu2wmT6xX8heTTDO4rdmf72-hxfF7jlUrD805997DAu8K7ypY7CUWHiGX3WV8mHGRQAgG6gwsiuGDsnCheCnyQCRBtOihIa6TAk4CiYgsZlpBuTK4E6OiNCgSJqBDJc00EZ0usV56Nm9-b9mTVSlp3uNksBhMTIFzUDRe1mwLZaBBI2yIIwA8Dc_jUQMc8aNhzljD0h7PSBN2wk9gWXAIoz_OsyFScylztgZmgVfWaVnER4QEjEv03y6UfS_zmY-HmAjDBA43fbhEmmMzAtUkUedMtfm0x3bowmseVfhmdEut5YMuJTnfyJ45tGz-b7_mL1P4YpVduzMTN_C2whFy8pzDz7mcMtKd0gDY-Q_tuJLQIROaVfpm1YM2oRKOxoU8UhAPiw47GQb0vvSvVhSxevtd-wHV_-y3BVJOeP6A50FXm45ijYEUfGECqO3Z1N6CNXRArfiN3FPeN5ZHzV3tC-4HXb46_G-GINOd6ofrd6yJHQ1PtkaSm45VrwEhUmqgTpA8rGa___mVFn-HFks0q2XUqwUJMnUeWpADU9HXKPOnxTwmcw0DVLumvUlVGW-NscMIzQAel_fbaZ9W48OfC5-tfhZMY5a4hQDCiRnHyfXbXl71OC9Cwi3DQ-eM9pNKidHDwx6SMazoPbW3ENiAKbAH1-kDf-hOp6ewIEsH07kdvLHoG0CUKIWGrT7hMeb8g6Kylv65JNRMG7Yf7etvBhthVVj-8_ZaD5vBo4enD6VPMwc3w2M4tX2JvnJKweUMY-yVrsL4rqk6vjEryzG2Vfs6qraG3ORLNYFeNI5pOXsAVhNX7GmGJ-33iZAnD2tuSnriTjXbl-8omnNXESrEklty0zh6a8hu97pvk-605B79JgKZL3ksyLJ6AMm4L0UezOa3qqBeapPi1qkIiRWmFYb44B6utt3xj6K7XMClcFuh_E0U-NjkHc3A7rRzPvgqQE4PovxM4SwRhSCz02PmYxXDCisUSvZdq4j6xmoNBb0qgvrgJKWrbF6ZWjSKbmLbnuHC2H5TozUQPKOv9nY24wNtLsqsJ6uhYjW8rztXqbQhiAzr4DgmMf3bMaLqYthR7IxV0RzCpan4LBija8T1X5w1hKFe0yKsi2XM0zxcjg-svbSh4iX21US5vP4DdhL6ApO_GPwR5s5nrKv88MWbVz4F74_VG_qb2O3LcOA6oUEQUEnWHYm8UAfax58JnEZUf2PIOn8S47L4p7ahOMChehc7rMiojXLujYFO5H88IcWeA0CIuljWZdssl2y5O2ndZYhAdU9-UrEtTK2IHgn6qunRiEFwQXaSLBffUIHq17Kq1KFrnpY9RqGA9muqI1VJFMgFS-CK9KrBVMTwAPRMs-L-vpHwWotHHo_NuVZITQYCwV0zS1PWroQzZv6EKvp9K102u4L9Cb0yA0v2SIGNMHnYc8H1BWr-5jO5RYQ16X8FxHEn9RGvep86cHWd-H-qVxiH82vg9Eh-aygJs0Ih8xMEYyZMU0IOdNU5n8iLr1z4FaGgbA7J1_Bn5wSDkzO8uA_HWAEFLohTXKCKNIINujKQvngBUb8oESNG_1xk-fULRENMGkN-YF6jMj4FGHXrUITO5LaMxMF-8yufjigtMVSqKCPV89bbFynOeUJxUCGxJfHHNht7R_jmjhnC6vP7RN07bIeY8F9Az5jBvv7N-TwOcvz4mzk51GYjh4LeBtLgAh014k55XBBgtmsVoRg4SlPDegYgQ2JzaZpCdwEknZDr61FO6KVTLK4WmcnH-GQnC4Nlwsokm5FfVw9q0DNKDg26fnNJuiudVpqmGI8wUlljaoxz03DYiRxPQ9NOI-qDCuBy4ji_zbQ9szsk30FL-BuZxKq8JHLqOZk_MoiaWAsf32FU3kv9S9wxVy6GV-xBIN9E1GWGbP9LxE-JZMXxSOeoiKRZT1RoWokntORCFyLhwZtfM4aAHL9gOMyAdBkvxE-BTzDZW57FHWgkCLVxQCra3vKxETAiAmw2ryE0bd_8WJFxuDo-Skh2lcL8LAhx_MCBgjX0nibj5B3dkd4mJTNiqXzGl9nFhn-12RZXZpA61I0muGr8E4wxfGBPU1oEQ0DSpaI9YVwayud1L21CY8pE-JGJ0sL5KosRzPMYiBXcNdyiOSl9n5gbyXJLNeSVbV32TsV5eL8X_D6JJdrstEAovbdBXEJs0LDr5IH2SKXRiHHi6fkZVCXM_GnMAudeZiuSR77jSSl9Roi808Hy3uGKBj5AtyfDBvtd-9ZOGu_qUKXJICa0F7RxueOSqDVU8ZAyy9DmfRaEs9ldpKG4VsdK35XW7oSe19scfKTpZ4NnzrMVNB6zAXbm9_b-YDUp1NsdM2-zcSvv0efwwHue9fHALW3Z9NIHVahGCLeCYNAvCSlsoVM27YCQEVYJ2IetLRpWYVngCU9Sc3xXtWCjcsQK7VdO_M2Cm9kAACGZzvNP4ebBjmDhZaYoa4op12iz4XmgtNLekdnpEFeEQvqWjgcRJNx3_HBFuW909QowG5ywbcX7OGcEILAz2QkoWZv4xZV1cO22m18eoRIkLJe4N4CsssNl6_vSPFkL6Wd_5lsOQKS2_3pNhNYsS2KQo7kk_qOpEKaK4RWdtp-gcPvSAXZkojEX_rbYKktJcvG8XbX9Vf5wU-qx--o2jz5fp_2JUbFlABa0-d9eac6hczOvr352aDLETptO4t07G_V8btIuTUdaOWLAJhABd-3Vq1jg6Wk_foFqFQ-qy6_qZJhiQStUTQWqea4kGSYrjZLrtZFsfSChL6yS3QxcKbJhpD7o_GEOZa6vbHoyyndyHiIow1O1XJ3BHsH06AupgAIXL3hAqogqU9kPeB5SMeBViDdB2tUipiALHZn66EwpPmuKcENhQ1k62wPZwo1XGuHesYxgLklVdsVSFa31nIq95jE_qmMInJiKjFsEDRIznwAK1fOCNpbWyzxr0U5_XyjdaWtV0U9nIJT9iEIPMjV3Lf8TyXmzgyHPVr_csSNzcHZ9AJJHfMlR2rZzTUlU7zC1D_WcucC8Q4KVf3BFLmjfNVy9avZAF3qrw_yCLef_Nk-7y4cnUp8jeLzu179UQ3IcjA-Epe8cDOpCBjHIM2-AWYBIa2Ic7MKS1HUheToWC5UGI-yQLgyzqeLST_gN4A5HjCmZGCsuHyiAUgW96Av7R5GtQgHtOGswuDgzyR8r-UWsNZG1f_LPhC-shvD42UrbvksRL-4GflQK0BEtvSnHVIX-GL0n1fIiMwDcBqL9_NxcMw1095nG1BT9cpaijFYpzGfLIRfinjMptgR7QjlEXuCYfXOz6bppsiA7o46Yo75WpNZktTQ2pgooYBZEaZGYBDF3U0TRtrzENnTj1k1A818Apgfb_j2Rhjf8noIfS4HwOZBEQhGgfVbst-RzExn-W_fFFt8I4U5bbE2fNTacy6SGmKMaTPRzn-cjiWc9JMkGtDLfZw_DunsWO7M-U0i1w-RYrqaRpt_kR8VOyMldGKxJOxcJGzCqs4hUtb28C2m0W9obJ44eHVrbGUWu9iaTFX4G3z521XT7tKuMnKz1pV40uF4aRxlOxTIy3si9NzePkjNB3NJmZgjTI7Hb_ISbKTl79azpWVJG3X9cTDxDWvDnM9cp1hrmVW7q5c8uHI79jPWauWAwNdwpt_s0OT17xMZnPNQ32UfL_nmhtj2LU7RV_CXEPBoQJUx11qGI6P9YTQ1fR9ZOzVquw_rBJlOykCAcBXYtBUbytkYxl1IQkSIjYvEp86jB0NaJPAvIVt6Ue4HizNOnuCOfweM24vx9-IAYhhXsiPrsbQ0v_-h15CywSgadeloEwCyVtpDBXG2LxVzISz17zmdXpw7f1wT0emxhBSrTCv7sacrg-vTp4Q0TPETn5VuKpNsU9ZcupUZTXPkyI9yR_t8NqmtJyQ9f5f3cMx32e7-nY-A32M5_rkKI9MwtWurNMNXRkd4iuALJCiwSqdb2A1dhqAN7Myp0_r6pLIuY_uks8TeoJFJ-e62cTLqMmxQInf7KL4Ikdi7q7KzPUcBVZl9ZUOqZ8Nq3Bj17y68naEjFerZzbdaVSkaHCOTRprltMZmOevEITIQL9fhzoHSQZPbGeNij250zU95OjXwjiY-9IMDHb9dy6ImAd59ZO7MlazwBb_YUo7j0-vmYkUG4YUeUFJsfSyiym99LxtXv7pmFXSJYaoMa2J1pIno93z9xTny-UWWCxKpJF6Zpobm2kagwaGRjxeb1RDAPxnLDZpqFoI8CGqBiLRXd8VDykwRAVHctL4_0amGjSWBkgvhA5mP3gR9l0FEjuVbZvhZ2cGelXpOgMHFkdxINhri07wLR4q9T_3SYqKevOJciSbHD3VEDyndI9NY7KMSDm2sAh7Q5WBVxndkEZDxs6vEH5Hy5pVXYLG44fhIhRkM3m6xCxExOwo-4IABsF-ySDNskTdkElVUQ8DKBCm0rEb3f1B_OSatQtxaqlb3pirpnvgxl6QlLzEsfrUCKvBNi9OJd_-7tFBcyHf8TzGvjf8-bYJHroTn9Dm491skPmeH8u1bqJA03SY7oQgiwbdmEtLajleURTz0f_LXCoke4BvMUkqIBVmD2KIdJY8W1crQkndH8eSszOz5pOyDIHm-3He0byI3ejGE5sAEyE0gn513ugmmjQwKGBrHxdxliiLW3sBw5klcjvoPQCvG15d3Xkrz28S1h-p7wUPJKmJgXca0J4RRwP_JQtgtgNP8rMhlUapMDwuo9HozbZ1y3F6nI07FuAtHBJTzFl02NBgeiTKr1pfa7sETm0yUIV6qqSJF-8BpV8OZYLSVh4_7P8_Q_WLAo1KSJO9uOSzHEI8phJryGbpVXIfCm0kLcRy9QJ5Gc8x8QagEXyRHW1FYaAsNUvvyhJgAW2YEbtGzp3rpsIPQIWvNW_3qogCYw3FEwF6sbU2b_1aIKce7UcI-f48zLGiwHuuioSeSQcJbytFMfDz_tg6kAQbAfAHoCn7Lr12IQ4mMr8mxTu6rnSzIW_yDDU3YyQto3q04X94EkBE5CaDtthwLfvxmAWTult7xp3Z2n_OKD_mtBkiWK0ImgfF-zVDtjFNNgjZ-wvWctlM6BKlAihN9k-9h27IQhViXoRlNc1KEveHBQ1RNXVdSvjf3WQVukI2u9sS3zBW1G8yUejrXiKexgm-nnvrpHT1ZWicQYfajBohpnYnkRqf1HPuoKz5eokjCk9lPIXAc5IpHm5OGAPOTz9bYX0uVuQHAenIZwHS-kR3LHDKjlevUrBiH9ii9OUZ8GJYsWKOribzxEGcVHIykI1YBu6piqLBGbZwWqDbvZEXBgNm85AtEfOtIiZ_x4rdX9eyzfYUZu32djiExwj0CIGP2qGEiLb6yd90CnZs4cqlJjPTlTEdrV8hL14At2xUENrL2-8nlXtk5oCCHxBda8l2gTSacbLoVz0raBwWLPpUETr4dPuUtdhZWuXjYdZHqj5K4owO3JUj41EiJOTwg8VckY4JN9wpG4HojDvSy05G5vI4C86PxHZMue-hDbDAC_p2Sn6yKtxojbs1bTA_q9dy4KMiKdJGxyOV-THCg2oOMU91QnYRVIsdsCAKTiYT72v8zNalshruS2UxSm7u6M7GnIhzJmRWuOgDSmlozmEJktfiJQnGad5pBRB2iIfu4kXJ9aBRszyMbXLSJFJPyZkoz2U7JLtAqSVPDiS7gUMvUa43nlDO7guFrk50gbUxu-b8yA7weYClHu1ndUwdk6vOqC4U3wr2Cficau6w7ONV9gsHoOB_bwLlXiKmEa1IvcBIWSj3CqG4Mx-BsD6G-HDVw2-k5ea7gQK4WYzLnIVaB8hsIyvEPWPAEs0eF7WOrdRNRAlhCMqJ1Ugmjlte1agdw9deVCHLY6V0F4hNYTNUIVEFJHTGTRgBTy26PHpZOmPS-p_3g-_fKD_B_xaAwJvCL1KfjxrOryZKF-9hSIzloRjLNgUmwq0sfIcgdS76n5C1rN2SSEUVJr0fkf9NaLi5_d7Yp9fr549CTRRI5spjljt_3QpqYPldYEjSitLQ9nfrrNg0_8G0W_OBNPmRbsYyz0zFPnsHsVJ01K3tIp44KsPoD5-TkLVALyL63HzzDMO8KRKdqSa23ERdVZnHPu5vBf31Yz7Iy-P9t9bxgfy9JImZLJDiS5SI74JQAFdXSnu-fe-GazAMM8Td8pdESUcTUutLyc-ez63L6mev15t8S0kJ5scuitQeGSoA7EVxONUb0yOQWhAXg1b-B-miEHMPyPrZCGEKhE6PGqcat6vK9_WD6MZ2upcbT3VnDXAtPajGShcs8mzQZre1Vw7UamgO_69nMihFoAPL2Ll3PUxGQ4S1Ui_lJwjEBdCgoArI8bidpeuv_e9NuGtYTaLgjVrIvpt2F7EctDJ6yNk3xOV_a_4tF_5B7iTCa0eUYBCamOsOKjBNV64tnfiBOnZdESJ16B3T4REWmObiHBsv23Oax3B05SnSvSt44YltqTGJnXC0w4ETLqIHZuTrZulwIX9bkSmRc1LUcS2-S31-M88tWtgsz9ehKaUxA1bZ5obtjYtBnJebayAVB4d-bX2m6difjXegQmSkUxsXmFkl7EPL2Zv8wd9e-Jgqt-ZQEKPuq-eNlVl_l3v3-C0LsQUMzui_YocXvNuZLEdUAK11kwMEdjrXD3w_ARacq53wvNVcmm2BcykDU2_JAwBmIB5gEZq3CGjzNBklrFLkpW8tHaqjjfzzLOAJi_bmBI4tvimiReSz-RJkNpipSLWdY4IkbtgCKlRHf04d4EccqCYMvarQZfnwP45lB5w2n3ey8zwS8mr1XCTJONX1SKWwfSdUi_hEuEETt12u7sa2Mlx7HxiXbeNHQ3bOy27ny073WgTtSfh3tMbaTa1ttQTX5b8O0ND8GsGphdwpieugQh-JPeOvFFYVnfYfXQmgCu9MZwZ9kjhGrYCPSikTxDs1J3LP3r4Jk2VTpGWrVgjkJDEePNT_dTh7TfAvFLb-V-KcGObyVq2MUhp59_sYtgkhzEnwUSQnus9LMmpps9ZeKsLjPk0u1CDYJXvD7PqPvDIMCjbZ94r07pIhRFpEqT-PFxFwLLvbs6kthqbiBwSn1cC82zphzu-8Ht8JC0W1pO4EAU1_dtRmwSqK6a4g7wJk6CIkKns7Azk2p7sOgy7qTU-805-15bsqNdACUtDaTVuMVTRIOeHBeXXqLJ1Z7uowcgUJmYbRoirSjzgLGfibpVlaM4XE6lLI5DsUVIBegxzXdvHXQmqq_rqwdT6bWcQXDhuEdhKAmd5yuobH47062saoH4ap6L7jg-cyIX1dqhqMYZb1TlZN0HT6__VnXkVLADye8icHGXRXhuo2ALWdQy7H0tq8ADlX92u7X_0FradY_ymgtU8SafuuQGzG7rUafuCBtmFScGTZ65LKGIpD-IRKFk9W9hAPTFTj1W4slkuN1MomEgAsWxcIV6L_1KyaAh_OLebVlE9bI5h0IzqJtar7UfcM1rJ-szdZH3ZurbWAB70xKQ485CFaoMY-cpcmtyEtXphXoYctE6zlJYaMUZNFTHKVBtuIwgSvzSya-vdXeVgpqjpvr5YOUMVdEs6ZXFdhRanw4TpIwPLZH9P20Cy-kpCBFuw5V-uTzjFHGp1-knxFBaM2rwyCSppDEwq8uGHiB1Jww85GUXWTIoYsMVrvWO9u5thawPml_DbCHw-RL6wogZuPef9Qb_9iYx4R_bsPjirGhDVEkIRSGRX6uZQidhy0i2IoS5tWep7UWHU_5Yj6nwdNgyfA2TzBlsvZtHgw1PpljurZgdoOpo1-FT1bKBzlIgyKdH3HHxai_1m57Xj7WCvIop-ZCDhLf4AnIlYI3dUxspuzPBzpmgHwBHlSNqKT0oBuZpIOo5LilK7MX9mX0qYqVgvD-OMaF8nv1EOkpXEUpQnRNKZUhkMzKNxJomKHYxG0pdYb5PlS_zu79xKTC2r9qq0Q79PFXo_LLVnIi_6vcqgKRJkZY5F1gvNtmUHIuj7Chlgar9jk7T9Bq0Whcw0WFomAPqrUi7jNmE7sm8Vh26ujZinzq_H0iHWyt29JRUhEaqXM9DsQ1ChnDiNIDBQ8vxrt3LdRW8nPxlfDxL4Tk6bRkvY8FFmB_GAhcBCNIRjVoB6ETjMDL5bnytIhGKyj_VMzookU9mPoB_VXMdjY4yI7AzYxGiqCe-Y6w1p9MuNODgjmoZFeLEwrWtNPGfZyFMfZOMydZ3neVCHSy9EvO_7wBty6k_4AJfKomHg1dJkduAuq-Xb6Z4eK1TKNBpAKsU8moxS42b6j2fVsM80QF4Kb-5gWHp_-ylkrcvs7vkn9cyE2zygeX5J_iUgM-B0AwR4ITuP4urOtzFiJOB414yM-wVKA-JGLVlvmNutUpL2jHySYezSP1Co31WE7RNSKsqiHLAY-slkT3Hp77jctDxIDwOM7gFfphDN_t1cdwDjWghG0mL0hyDO5vnEvQ7DsIdTZj77OMpf6UBc0gS_RVzxQ_LLGkpHHwSOVgcKxyeVLLSX6qd8St_gxKx7uBcFzQs0eP63FkYQBt0drth4zP1MmimkgyZ_6ttNs22aKhxeCtXNfiycBv6gZN0V8tutfNQdx0iIYgDBErqZtxBKKHk-rc9PNMEw_-_PmmDPusuuGrvx008rjOtWLAhmnFoqZQk8Yh66j31PYjw5-PeMuquZ5d4l1R1Dj_B57YhvJGlfFRt7unfMzK_TSpMCCzpcXQrGUBGpgpP6zBvArHS17BE6z4l83wFS3sFGbPZHb4r0geUDcW02q9Wq6kQmrRmZhosiQfNMRB8Ab6zS1IZLS_an1ZA3mzuNjO_RzaG6PYzebjRPiVpg6UVzl4kSxzsD_cRmNlzTrVjhnjQz83mnidE-FnivnsPTgq8MpUN1HIC5B6AJm3faPtDglAeWOudYKGKoUOrCZOwgRnfvFSd-gZKjGiKftSdC8an4FCEIwNlg_Z55q2m4EaBGsa4YocS-xInHdK0HTBEynxtqPYAsGSIFo4hfn1z6qs-WrA0zAm84N4897NfaR4t4jzcoDvSSQzkVJX-wQ-edcW0Jz3TNlKlys_o9mYO9nd1YuQ7B_XWvqA9h5KqELozIUhEUszPrF7pHFiljSCEL-PmDIZKUK8UkE5g5KrGOcTE1kJQuPckZbILqTYG_BW4_s9zEKUizFyy22-XX2c1YytOpLk_-uDsWav3djFCD0iI9ocm5ynWmZj8RpOCJr1nv_xlKi2tMahvx0hjvRyBrRJ5qXjKxGbkjAL3-6h6DfMakg92C2nUEckYfby0Z5_xqqrwYyRUE1hTj_TqYaRa39Kpu_4eTtdNFfHRyflx22CrVALCZAdTgBAEF4kYA5_UvhPSSNXDJs4pVZumFa1nmvrJDqxl2MLR15NoNeXkqlzyVJTM-azudMi_-4mYo1ESML9EBjdFkbousJha9bpyNaPtMg7rHGHxGG63pHZmUTeEjc6EUu15hSmXk3olQbfVL2Xd4w3FjRi6LghzrVdTHAuipmczs5qUNTbPpkYeHNVscJ_OGZevg-5vsJaTLdGp69-w03NLR7PBenrM_oPw94jo365JIqu4-B97sgUSib9el_BVvJZ_vl7kict7TkwzPs16xL8dhTtL75T6vpV7lU2CvJuHNB6DeYIr904HwLJgq1q_nHa_P0AW3VwRMu3l4wzw9RV6p_3NO9BJmYB9KEwIlhEXhLnr5Kj5o2YCGwxEJCvET4gYd2W-pof6BaeIcK6ikLbdLF_SF2N1OUX_h62UgeRwYXEd9dQU0C-G_0ywHEUvfTp6JFEfBlNThe_n2jC2wuiO-x4ZVVYhyjips3KsfUrLPgmO5yIsfJqyVxHP0cFD4F8oIdTndLrV48Niy7wJoa3_K0urQ4Cet0ckt7NXIEmA9WwQlEzfB-nknO40bUzlTm4dyjCHkNUrpro6RPkQu2iBkudMbkl_EmNHcG5eUROCO1tjjGocax-7pWNXtxJttaNr2W5hEVf9_uooXW-KrOfklYu5fVIca8O8lZBmIXLcSEvO5lT9JevG5RbJyzMNTnZSkTWbCCytoTOnpwr_p1UXini4gZFfqLaI_lFpycPfJQmrbNnzSuItxflYY1uhzI48x8UtvQEfW96CsTIsxdUTSPPAhD87FesRo7PV7PZ34c9kSQaPamED39etcQP_hWLBfMpCojFpPXsuOer1Gvi-7vxB9GDR-y_p0jssqgYgmF_l0p0tTsoB1zeXU7JVwe0VygaMDYbGMv4jdHHptAqsyLw3LdRH7lPGnklBdGpgewZQayCa6q0EaL5S5UPHDR5G9KxyltSZMz_NjqjmTwILHKTjgd9PFwNBIc9iiaVhSO3_Dy0Rz4vyNtS3BVorTjoSxSN881GYp8u2TCku0gwNeBwMspGzOdrLVcjUcRX3LAVgOsG7H2r4_e-tRBKQp4G28ZEY62PoSHNtavLlvTCgVGNvNgiO2DNJH2AK3r7s7ytOpviss653oPF7ysnCaShoj4ru-nNSOrdK7U0DM0qEDdmPBiTN-6YvJEJJQJMdW22YdkWAxtyobhz3x5Zpf5BHsLBiwzByEO6eHnRG9a0OK0MfiWJeKID_Cp4GiikjNpyzRPEBOHhLAVrPcak0z5Aw7cyZ3rhgCBe2ATjNx8WNtjx53ohI1JcP_ML8VsZW7WfaVUlwr0vfLvj0Q5hn29oRt-BPHr-NtPfnVYMcevBOvSvwijCbpIXIm4ElWf20xsPc7xQt4P2h411AavGOl58ncnUd3WjBY0juBmW7o061wgSbqusdoXlKCRhvZAa_iB_wnHt5_vwk5C8MNRzLMtGxhyhJEaUwjit5dYkUusKjG7DXFYbJed5CQxqXYFuk8ounRqqO695-mvfg8PYeqgy4H4tS__unCdoZz0cLRoXBXM4CM5ct_i-riieIWuX_OEhH7lkXZBk6Okii767BAdsMVLWQBa47XRRRMQSXyM4gu_EQIlMvCYKKM7TE0KGeI42r3kUDR23PVhjP6p309Fkeba-uY9VU02LyVvrRg7vyq6hNTjJICtL6GQd_wfH9b6j3vcbZaUgcmdinPzQZ0vXtkCKzzQuSaBiYGKjG3u8bOIRtZNIl7n_1hXNW5TPcojyd8wZEMdnkacQuhBhXegadwOiDr90pcTdAh9QYyYFhW-hNhw4Vukuf82CpySixeds8uWucHIE4ezCWEW093-m7wTX5Ub_-FV-j1BrbzrQdrKZfSWpcO9nD81OWM-8430UYTAck8BjrOQmlMd70rUWG7EsKSa6qTCcv7ZLYfv7rpH2mShmqtq8Zj24p1BPdp_cXlu8ZxxR1ne9uCwuy6RiLb5IcpultF73t_g8cr1tXAfqXSeUJDxui75VFSn28rvMPZP3f77Va8HuLI1AcKLb0kzCs_yqwfZ9ESCYTZAz_t75tbkyxpM_QISZHkowaOne3R-GO2qxxxAO4Kk5ehOFFoy_EujVhw7mjwftJqCaDpAovz0bZ4YtqUMSuIUjW3XdkRyHRPgJ6sFBWOiUz-I9FKUknF5LCgKP_bjQmljWa0mEjMPjj9qYPPgcwcy1ciRPX6dDseI-o0tMBah-ajKM7W75kbI9n5I39662ysxsxfcr774bugwek81auZ_bFXh55LXsmFBQaDRZn8bvKwlhJ4V6rIpLJR8JoU4RvnLm2PmqHnHgiIzSvIxcC0XN42voWHa9DRteXJFqn9XjQjtmuX8LG3iJSoyyPudM10cLxUyNT-MrORJiYJR1P2RtwDzBO4b6_IfYgwiFwWb_JEIGt5OS_J7Lr4-gh8Wjo1lKqa1Sg8LWGR2vX77KVfikaVBkVbcJLPVuQrK4PaLlg5KNgKQd9Pg24ca1n1zMWKPHSll_ctZSKKHE_bI9qQAoIZcl94JpopB86egaK_h8qekRbs8sgVhByjiBkbVwHmfBF6T2vg3Lb4T9ZZNiBe3dTiPYdayqECsLg7hnwbB7KHwzmeRHuLbsufhZqjkQB08OgzOMtySXWdne_fjsayiUVAyInjZYI8SSN6Bg38ZSICFO5sCA1OHehH92kzeTO_q40nGli4m7tFwCQZNx1SWug73imeElaOTyduBovG_CT-3bqrl3uLEqElHqkDyjj9J67got6GKmB7Ivna8Mk6aWxQw-_AXPbo2pR236toZJ9rHiSkTUJypfkBqkivBFDmXLFJEBT992dyd1W9DdDE1JMuVrBIQ0MisfLVndkublcXDhMVM2VusVL8Zh4Dl80JjmY5EJAnoUF5t7QioGAImzuHgLFBYy5VoIcwb2gcPFphhCUmmDt8ixOY2QbfKqQBoRNIL-r57wKDlJfy557o_bJcYASJLxGk0quzh22lPgtF7eZefu1m0E_-6HjYys6U4DohCXGQTuKE-3wUY8Ke0yPF9faEJ-oyRZU4cxRWumjHWdXCFaZBM0Wz9R9czHwFSvzdO7HH29Cc2NW6j7VxaR3VGrfNEvstYQdcSVzC2KFUcEOceqtkGZjy3QQ7lKjOlkE2-TU24OA7IiY0C9_2VPtGiPOlp7MQRaiS_DZFXBzOx10Bar-clkNrzeXmMsLRhWo7rELZ6-0cDBXvxCyWw8wPjpaP8Kl3tVY_4yZB5OgzV7zdKDeCoX0DRN17IFje-g2pQ2U1MiQSHzHhGaPoxYzKb4056tneEZug0t1R8x_9_sb0SbzaTgtzjXB5u1TAfODBlEAGJUsxceIRg0lC-Jbfg4PtHupOR5WRIRwZr_Z7PaAyF2VzWYv3AYKc6BDr3ZKnhVNQEkiAjn8NnwBipBKdXt1Gs8l5CmW9b7iasN8sIviIB01WKb9jqS7yETrU1h2p-OLjEZcnbQV4Dq-9QLbsVcKOmJQrX6cwLog0sW7vCtBiZ3PMdf73pyuvnNajfCjPvbsp8JXKXjLsiCh9OTzkTzZelllMaG0UpsTSkVcZMeAs3acColJQPcmqk8hJKhcwUnIe38ZcTolBj6Dq64QaZlG0vPITcu7en5XS5Zx5sIEagjgBAwlmsA-VIDvGSrNoS4leWAMt8SA-jtJpOMQnDdRwWEVFWd02Nq6eNQjce2WkaUtY26uOvhgvVz4yFUHD8cGGWdNbe9HNlUFxQcY-LoPkufjTAyIUJF1QlD33mGRiO5cR71G9r291NwPBR5wiK3wSf-RLnYC1frbwIqmajp6EmMihzXxCYwvHpoBwAoICNDPdaYq3qJFnhlivgtBSSMthGiwfX8dyJ5mm6SkMxA5_6ti7PB9HuAlufa_5F3WtcgJvM5-iIsAhwTiFnP9FLibnqSFsaARWGFeOfsuy9G3-m6evrzZPtsdBnbxLm0aGrP4Qy_EWsSJ-7ehAZ_xtdNfyn0RVoxlJf4BuzkiY4XQAMDOY-et0dNFJ3iSam0ChSJBzTAJ-iAzFk-eyyovjTCSqivjc1805pBfMFsBWZLbefO7ihFCEpvK8P_RcZ2YKVGatV1WZUDL663FBgcKYvcMq6ojk-ENdoBwJ27jtS1TVGuy_9T5gDAAXag45VAwgAtIwMK_-KGfQNLoMPTczr7ePekkHG3W19xCzGcgUprnjYWXKRdVX9-M5eT2E1DZuzlZ7GkEQ-z8RgMW1oRF5bXVKcJOJpivl5eJE4tRaxZZwXytS5K_cadwQL9ESHmahDs054J14o9cEeq6OlPWxDeFTVMb31LsnrgufNXKDCJSv6KjI-M0dmpF4c_gRxw1u5FA6-V2aCGfmP5dKxK2aM8NRfaVjwiIxcA70F5e8Ymo-xADKZFX7ed11fjeAgBwNy9idhrsj2jSeZpsctvocWgYCmmHkyb6ApGNk-spSmz5Iw9d8Hz-3nNZGGHv2tzIn_GOkNYrjWIs964VeSs4AWi2zK7KtTMoby55rtbJ4lWZtSntQytw7X2yn7NQfrm0Im5oEQqOMA06GIWFOmCzaOBEehF9yqK1N0YbG2WN31RXB4wkT2Q_iNzYHjQZxhlWWynoQX65svYkrRm7h7QX8IahQXbHfZJm9RADVj8M7hi_80OZLu_Xe3N-14ljifrNfryU-zYCWMgCqnv6-5xXRtM7m1a_A8WnVcb1dSDtFUZd8pahNPLpR_GpSE2nXDkV2MCjyLSRfD8Sl_yjxgJB3eFkckvPSiXRNnOCqP7x_DJ7V5gJfIeG4sDUsEZVuopopdRja8Ed32Xty9pJYZ_G4ZB6Qt9yUKggg1Ej-ueBS1BXX27-QErdR8CPZEDbn7IknIcgs_uTuXEnBQq_0sH29NJ4jRQL6PnnTy5vNzZRysSKD7o67TUC89HqZToWLN49pfVR3_JhlUJM_dWcSgLx7cm0LpdQp36kjAyEgoOkKbxVgGSeatIBpMnp0idJMe446fXwswfPl8mzO4Go94OhMDyWGazE_2QRWe8pf6EaF6Zj5xoZzkOcyROYF5pQs0OnX4mETD5RaYd7HbYw8T3IhiGyE-TsiI_NIpvFhAMaofEbCe3kWwgbpETJpE1j3ZBWbobybKbVsT_FmvR2mOa-qLQAwj8myIs49YgjTlHXu7bBkr-5qCcfBiHEiahl9YvxTmeXrsqffCDCXNIU4z3gbyOrUTlBAuHOlGrgBkAn_5mmfLjVQQafi7gTr1BuDL4qOOJNA7gRmx-2VPJMEfmaw0ARBMaae-jMgh1ugRSEvzPseKi3iaQ8tB7xTZs8ctceIatDc8zkrxvTy7dORRRm8uG_9dpJ7cpF6dkXJujXyj1fCMIdMj_GY_1r9cEQIZrJwLpMwapw2cINMlGdAnEDN_tFB6U3pCLvAHl1osfxrrisGlQE6XqLiWMs-JHo5cPyGQJC170uBWIrLXnFrdaa4_kOJJJyOdZbMrXjnlGY1HYWw96eYx28331CJxRzbqpcENpC32cj6ge0s3K96BizS7kp3-3Z4TCV8dPAkWmmbgaeBj2_ZJU8JMsiM1idAkJwv0F8q9-Xq_O5oUv-eieFnAeRXYIsvIOekIu2X5z2lT8r0CXqFEPsxdIfEh89q3UhjlXjoUqyvJ9FEns9Qund6nN1yuNLe_uCZmEBzOo6jJm-S6oN0rUIQ4r4-kjiXtRqcfqqddvGVYTWYpmUoVR6J5WbaaudLYIaASX5nOEIOer_ZSt0gBAOMQ0niaU_CyUC6u9QieQJ1qMwwgpOAxHwm-uYsr0654bi73oNAXbo9aiYHre3g6ejFlMb2SWfaoqfhiAYxEY4rXoL5bqi71KAGO41BM29L-WtKVY0BfCNpCM24-cdL7dF024A7nJFLmKuHBW4gNRovb1Cvvngbju85bQgOZMHHW3cZWLHZaNewKO5zrQ2DPwE991g7SYh_aAisKrRg4IDKMCirw6sqpablmvkts9SOO6QLLygfGlreLe1pOLNSkAC8UL4l3TpdW9u8wSdSKkS0DJJsBgqceyAqQoLUqn3-_1VhAu_cAo9dL-9UqtsXxcSNwdQdpVXlOM0qiE3_zJruAgv8O9_rK75ON9IlVitO48Ty0y1bSmgJfsBNxCjJ3dmVgyJVsHfVLiU7yUvKKrjpeY9TpKkiCnueGS2s9kieQKePY4AYUlA-Xms0AvI3U5DXQxH8fqlNDn4WrkWeQCeLESfNfmNSPdJHTMJNhlIP7wR_kMKlljHey2g6-QQCk41skNn6aUWpuWUiRextb8iHYDkLecMONgFOV2Nz8OlrMfXtSDkyiXCHcX8lwytZFc8NxgvgOCNICXkJbBHBfpu3GQj0rZAgSKKrDUECz2JJCLQzv9izPjaTRoiCyis2GUj-VefdjniOy9U5-6A-qIs9L1aWi-l0sR1aEvamaxGt3Ced6hj4BppCB6XebYXyh8Qq6nDVGd7092ciTqJz_ApAKQtfLYprEoix8WX4J7asP97X2z4n74ThDKl3SwTKJeqBBcxIKQXHjfsEE0TNwb3PhPW8Z3onb9o-s4Manz9gF7_HJXUV-plPtzOKH9VsUq83IjSSTQR5c_dg4ByLSeqL31vCRoa6j4KY1SQwF8YeyKGvz--dWsH1vyqMXFzSxgDpmYTVIll4tguUgsqb-PwBlQVK-4JIGponzvCSMdlp_-n13Qn-oxhCER8GxbZzNgJXqNhbwELdgRDZ4Kyi04QVUaYDUh9cZExiigVnNjOD62DSLJaT3s200_yPqgzp3XGhccsbOP6BW4GXiYReAqHVB5yMwcULVrkuQFZ6d1_bpdHZYUBYFp1USZ-xXDqFn832HEezbep6qhwBa3acuZihXXz8cGUDKBS2OFKT82zhEu69YmymEj9_HNM9mk_bWTtJUmJ8F18uiAUmhW1vYOGN7ZQlbflgxQb_jQXLWTYzpy4RPCo02TZ3dYEZITJPqcngnY9mpFp4P9GKKWrUvrb_jw1taJsVD7gmK2ZFofQCU3D4Hp51RRdUJCa9KCqGokyaPlU8NUrA46inJECE5qfm3zNLYhakw3pXF_hTFS7YQn45JDYIMR_Qzz-G1kfUCnytvopWz7bRfkLtB2AU16ksJ-1Zn_AfVY8O3m_L7nPEn54E3yToGYiAGg3CB2ORLn7nMAExfrQ99vZQ_JvOr0dOO5rSbLQIk3rg5phTeoHZnKuO48QeJD9OI7b5cd1s2RLqEr4LfcVXEL-e4jW1C6S50wWNgScirwcmZ5OjKGdPtzLTT_O5mDP96xLXDjvrZnZvhPvP2dzxUcEc7DTATd9KHwmgpesugx-wgF4Hzd2j4KZu-K6DSDiSgltzSbVfPuFVUqHbcbYn0vl1yNmDHbQCzDvy44pvny0fK4oYfl6xJAvpgl9h1i53HsRClwvQe-AyPDhKEJSuM2o2E6Me1tGx7InMEamC2LxP2lSUuE76cl4dXgkF-IdYSFnDtT_t4XTAMior9hrsv3ELCQHvdEDz-nkR8SLQYZHQ2Jzue5BqrNOqp96jaF3H4Jph73bqi2DG_olqyTwZm0mC-mLulvtbFSPiBfB6BCI_pcRRyQrc_IYlmd7dShzXYfWhT7Ma29VtGwhwzqsOJ5zG83qLQt2kqGvn-0DdhuB9oqtPaIMQjfC3aRWww9CsLBZ_4snUAhAECm-seCBhB1Iv3xhJDVWcohTrY6Zb3OXvqP5GmZ56m0DrkJIZ9wB0Z1mY0td4pDFmwopqYgqBbdlCUoXyOzTollLPFusl2oxjPrzqyUXWjJgC71n8k5pfdRudb26UtUAjgWXCmwpX5NUulExPQkJCuUUOOzwCscsv03tZ8CYOyNBhGYPbaITmrPENTD9Zt_MgrBQvqqQbHHqVofJKGAhb3lBh0ss52q-FSGgohOtSaNUxY3A-npXEQnvfWT6ojpqsvU0On9bNKuXR_JPYG2wqyDtNDOgzaopwqox2yXLv-WyNYH2wi8P6FbNPhw7kjJ3KOndAVP2ygzq6uqwuAtKzIX9iBGnz5Lv-CMJi-vniIghpfyOaWMxB0aOa3O0lx8ZIcBFF3WhpydVrA708DenltMzLhPxZvqbhaZmuGQO0D5txo7MaBkLPctTlYdznLNTPaeV8-bxgPBqcccNycirfLikgYzo3T0o7EovOTeNDMqx8TK5EVuTuIDKWanxQBp64gKMPC2YfZb6HlmYliz9kmQys2CyMagS1y5_N0KWKweJhOKrxt2O5w7VVZnuiyQlsUG_P4hVqMW38e62LLfxfoQZZRhxXt_8U_tZeAXG73GJo40ncc7NMPA62XRwGYZ0GaQ8DssJxb9_sQnALSqxtnnok-XPbbvXV1iHksaf97tnvmCj0jMJRBgUk3H-BvtgKq5BiZylkefaTbxQvO-o_fM_JWsuUAQtE6knyXEI_urERilwbvtWYD8vhI7VGZuqAmPzUlaFHIIET0z2WS3WhZ9G3iUrBQtwbMFhnsQd5vYvsX92YJNtjzjC44Fw6OE9TdF6GwkMBsBmR4ttSPlgJWPENUtn_vboDG-zs8MHA0kD47lyP4wXauOJgoL0VxZ9XlwQljJeC9rLhj_pmJJsAhkagnLBC4pindUNJZtlTcR4Om8gRYesx2a0ZRH6ALPVE6x1se63ksaS56muos2s4o61vQN-aXQDJX_4GUHv73fvKekvzWrhQf60xfoK9ztXCuzVTr7qlIro5iJ7JxoMAircj09iEe8ZY43MxkUFvvsdjIdfLedMYwvzHt-SSqyTeTM2cwYPP8DNEvMPNL4h2BJrnbCaOcXocjJnmHL71tVyVatyckvMW-TIxAVzgyQ4wdD9zR5MLY4TEttipBnPfZiG0YNnFeDkznRGf8Ax8m4DxIFV-NNnEouRwLifoQwWTrH9A0D2NXLdRjX2VeuaUAbvyNO8qXMVQPyUW_di-aXWygD5c469ubCxaKngMtPi1w_JyPmhjDWWyxa8A1wPDmT5ebGtcfcK74Pr4wydJYGDbCZmqMJjzfKI9pIuztBgNllLnXSmciOygoOYE7UBeRE953CoPR-uoAgmRP9FJ_KlumvNbMF_IgVlO3hvePq4hiKA5LhNQvNuiqlHbXRSImw1HuO8TXVU8Ni6legbw0UEQuvNg3RhpBty5CYHmlO6zAFYC9IOqt42iX3XtxSA93aYGPGlo8qHtJ0QeQlK8nK2R65e1WWIdcTQzSQWgVNpH0Hv3G6N8hIcQtBzVgiHn-UWgQzB2iWSkXkohhjNDsBNQyrvpEy_BAvna13TnfXZEuRxzeYIakd6sNhPBWO-BzJ6k12H2dw0NnweWO_4BnIcOentY1YflJ_Ryc8MidAoXdeyHko5fT_S71RwR-PVTdKt0SNXTOen19bu8ium0hQOsx8sKq6mRGPw6LsmAV3mk-4Wgqkq_fZMuLJYNnutu7WXwHEDfxfpuTm5_gqfdr82y02Td_fAmh1RTB5FI_wPOMkFqTUSeIHr-f99cx6DzPvM99W2AWY1TxDFL4BZplfOcxVIs3sq_Oxy8lyIC0pUacFpL1zaID75GmDQ7md4AMkvLTIfFZA0YxSAfVYls_jJGu6TblXq1kphDMhqOdpOVrgLPytWMoOc0tVYZAkCsEZy4bj3XSTy6YsSNsFLPjyct2bFPZCtfWo8Xx3vbh0F49UWYCd604nPYnoTpZJnjlEqB2P7TJMZk0CNJwplzZLxEcF2wYq_iJ3lGFv3H4YxJpCMhlDIcnCt20bfkpLXI7ZuiLAQYrK_XSGJ7RD-Jp-yhDydWjoIaIJLJDGNO7Ejni9V7q7Cso_bteQRG3QdKthr26WTq4F0hatDwfX1hJRQPcMSKAd5mHmtTTGHwX8k7zrOruMgTQF32-CtDda_f6OjNPObHxeR5VPrpcwvJ2cSvNbzizOvc8Cxeo1VPTMMRpsrOUJDJL5CrLhjO0wUbB80jruTAeagFW1MMmB-v1T8pCLdZkIPdZOehsbaPMzaSV8OCnZ1y2LSUlQ4DJBuIX6YD7Jwz2qzQMa7scR2AFXIEjINEItB3xMqzUXVCbHkGFKT6spPDV7YJZ9UyxVkozEyq5lOv8FsY3tUfi4ZLieOIkp6th6gmVu4h_7pm5hkiBthF-ShFUHav7Dp9Scaw3rlaCqVvasQ4UhbvEzrVRlBodZW7POEKoN4jiQCV1W40_HD78jZOwnQnW83Hn-oXkoNyxIV-CcrSoIfQMIOl1-rSGcrD8MEkIk3mXGJNJiikJtvG3DBFm2eAiNF_o8lhoN8wr4ZeUOvR4OYOXozZ7bBACzkYmb8aogi9EPz5O7iM1BZMAyJy_8vkb3zWSxph3MiVyaVqowSmKBi9kZncmHy2UFG8yjOHYSuRjzHCCr5T26OD1XrpYUMG_2XPt6IFkKFWZmqICZL-e9QGxUf7U4d2FxzP4iH3kcg43SYCX0fWPS9-rqnIe9IaXPeCetQXMOPokFuvv83qr0EWifa104X4PecRgIfN1p8kpAhEJY5HQ1lG42rtEHI7p-UabXf13PlR2-btl-0YhrHwfJjmnmkNUv78Kq9ii42bovBjnZbPguYyf_D39nj7vYUPt3PEBOh_NS4axmPj_VqDEHjqUixjlfQmKas66qPz1hfnhA7gy7ltg03xLUD1GUi2IGnFi2OnvGiHz5hndvSTWRGiWzOykfQulHJJZoHO9hnjfH8lwrn_k5TQlro5_QSGx9cZbpXpWMfSsyrA0KYcdgh0LJcI5Eg9ZBacpwzhEkExP-nFhgQ6wDpnKyUmx_SaadBXOfP7WC_MxhXstWh50hJ2yhyPIgVjmYz1CuxILa2nVCujhu909wX9lqR703ubtIcgBTShXg5Gq0hKXiAIXlpUE-zReO_khcXFpqZvRd81kYn223ll8tIIliRpA3N0LnqFzRGbDQgN6zE8WWntvsg4vaFm1ouaIEJRfpRPxYnmEV6ftKLLLH89SBikMmtHDudc2_jw8kcZR3YRFhGPPSEsFLW2mdh1Nbv1G2OCwnZY2XzwmTdZVlWA1Bc3DcMzzhy00KD9MEfZgL7MevcsGqqNiTlKbXlY3vlzIV9Ee1gJFoccthaWbu2WrmrfpfEkNEsmN2vpXUPa6Rvm44k4PK-wjEYhOMq9PO8EFAwCv68MYi0eh4n56xHnodPCChNIRXJIdlvougMCdb4sVndLSzKnvyQpZogIj9vvKBhY-DcUEM15_Y5Nat9VNmNExgLQsimfshHjg2L9jqBNHhrYE0IBkdpiLMGLdss4IAS1UuN3GmWOVEtNKrb9PXNKLeh1PtVUQA2MQP7HLFfSrgyABIqDp1SG_UhVioc7i75dTtkOXLQTG1jLa0BOv-bFvDq8kF7jA7Z8Kgh-3w4_nQ07A4gXtOISPRzOJq2TsLwemdRR0wlwb2BwlHBZXCPFxs2IAvcQgGbNuj9bvRGCj-D7DeK4DSNtN7RCpeVMntbdawLpgOek5Q25KQVLLYogJDNJaVtKY5DpEa7y6ozrfFQzfiXiK7RpExTJ4MQ_GWfoZOUfhzX9-c3HPtdmGIsEzZhLfibApP2MtBWiPzJ79Hm_h6yBaybS9_w57Jj4QDdqSFWfV6V8oNZI39JKpLlfZuuQcCzJYO1qtPCZgPd8xl7XUfZrDpHW_JIjtNkg3ydhz0es6AvPfqswLWEvYxBmwQtQPXaJf0gV7gSs3VXKptYTL1HZo_kH8vVoZqEohOlcP1WoW6Cw5gBndgk5gdZS_ZSfRyI_wniKldv_Suzfy0q__sR2RB3lE8Z-vn6zwEiDFeFEqgDr68cVPMBNh4rbSXpjeBC9iOto58ImbjX_KetEvYdk10Oyc_dwGu0w77IrxojtOYe6hqSA9XuIehebg06ehE193PNm8kN2Mj7mWLzo80UAsFK6wS54egbCV7eb97tEx3nPyRKFRqZHmz6VybrC6X4ViyQGBpiw7LI40OpIhmol3hMnk0MPDn72t48KpreF7OXPl5nHmFiC8JwPikW-KJsfFlKqGuWC41U14zWbn0H4TVk0pX6Ir5g0nRWhr09jQ1yihq54WwJFPrNMCF8dDXEiKo9onA8SfdfKT40zc2VS3gDo5g9K06v2X_pNwFWqDWPNLO9Xe4bmZ7klnKa4oUay8nKhRCpJw6fYzwRoqfPJxBF5Lkw7-rrvLfvKgYU32wPNvQztcOr8rUoceZTn1NdnoHTm8h2W3cuUHZyvHnPvK_wmuyoz03AN3U68xPGzWMwk-kJAHMx1uPXdmut56IoYO72el_GaQ0eGOwyEW6t2BZJAGpD_9k_XjoAt6pwP6kyBgBKGY5rSOg0R2bvz9ZJxcgGEtPHj0IbcA_Y2m7Ribmkr0jESsq0e1nN_7OALGP-LQYaFGfGUBxdPDVdGpZU_BlcFwyvBm0FUWfP98UaaX8ZFT3PuSb0JFYDQcUnoFa1yiEWSmR3GKR7plOs9IJrWvGu-Kgcl6wC28gzZzT1nHFfR0JjJrmt-Ylk0Vkz7xkb2ff46h1avZ2fCQNs6RZ4JpJ3bEBQSEFCMgY-LJOc219ajdqsb6XKBUwvBVxWFECM0141zs9t2Od9IAZLuMgu20_UWyelfOwEFpI993Mw9YZqIY780ioroUZESqAKiA8w7fJ0K3XFhElZFQvua-MGIWxQxXM1vC2E9U4wxcivDwtbo0-vt16pJ_auMwzW9UM-GfzuHzOHmnQ1ZHVh3bGwh2_6OGCNf1VhbtHYQsJVi_866xc7O_IOCVlXJou-mpeee_-oAf_oACCzD10za12GsARBdCPPkq5VDcKJ124zX3Z03rA4LuhXPzy8PYnxlLvvHY2m0Kd580Iu6aZ4T7IuPtH0OpaUHJb-P2R1Mn0g3I2cC8e6__87pbfi3OoDMrge2HdjKMoeC_dsHdq5DLSpsa_Ah36P4P67ozpcNFMuvaIMF6ggswYPADKp98bF3coOkaDr0blVxYfDnfftRq9X6w3-4Jgo-dsmC3-J-ztu9hdaVbdzM08X_2ey_8Du5swjw56NA9-Hs-4D9wKHrDnkGbsz1FocipbqJB8QfpUBWOaVaDjvBKMMvQ5BWT2XcyyOn978L33nnCPEK5NyC8trmPvn7Hc1MlvdYYIwI_lYz2iQOgDUltUXNnTY8D1WIZkuAHf-1m8ZMpZUHyI3LtFWlu4Sx9g8pi-v5E7PMS1Lmj6XtyFSKEGXEZw7GPZAhq1rIUD6h5ARbEqTFN0dcPhyq_f_NvIiDCipbh5nPkAi0GGepkRTFgefuEPTfXbc4oY-PIqVSiyVGw0aFPWv8K6C8T_z0qrRuHZcc_igyfaHCMWTGvND1qIg2kmsCHGy5DGCECNXXRpQYHc_Gw7auwzJqP_P_fqjHnYR99kWe1ac2szel0wccRDvUwy23xEvBenjIfc4AiAlMNhh9ER-f8Db6F-FT1rvdmWQi3WxAn8Iyf0we85OsTbAA1os2gwNfZbHcIFszthhHY_5-8hUDo1QyJCJm9MgTp8IjGNBPzOH5HivtL4R32m3EAQWXlYOCscqZk8BGsY8roZwi3II-FHrjFKruvhNIIq2ie5UBvxq8LiWbdAyvWddnCCSV0c98ngpqzPgrHbtgWPdgOFMFl2Wpm5aS4hKnP51_X8yC6tIF3M4owpAJpR-QI5MAjCZ4Bb-LjSR4TyA8n55femOrTc1ag0-5hCLu0wOGtXeY-orZamEo4FTuowzOIOefC-b68ct3I9d3rgW6rVzbVZbLwde44vVdhWdy-lTMN906Q16QM0g3hmENK2wf1XlZqgbiPSM9QAruhH1KoHMywPyM2H4--uyGytGhLm9vMHP-w-8DhpzjjUgnFUOirwHXHD5uF4xLwoVeBJXXwAGsIs2WYwRYaceMxivjYiUrpaOLGDTx7o1icGAc6OIxa_8api8C2pyjyu_ytlXjaeClNY6qP7DAEs5csQV8ocug0OwjsTCiFGHUoOSoeJ3BDDDilz94vXQeYbe9_wwzeZbOLnty7CyoqBJsqrAdWhR7Zooqh7T28ducKA2-Vt4YaPjZ3qeNS1f26Us5RHQBcX4j0zz6fXj0e7X-iNOM-0EVUwV15YvD5GhgeTfpO8dR1Wvxp38fZAQOZ7StRHi_cdgzIhcpJapiYGj5rqwtJGfVHRWpFn-PTBB7ywzSxf8-Ij1vxYXr0M3CGC25v9gXxCm1SApdMWFQwZwV_LDzctaTiSwuLtDd73y6TE0e4sIg6_B2xq2xoTTmL_UydrCVT4M-wydOGoQ1sBy4SHr6kejRaekvs3Doy3NcjpZgRu65YLL0uTdGDCJZ0jRdX6tKbMUZEa9r8wgbKCM_ZBeGd5a679H-KXfNzDPEEy1r-YN1WlhF3eLVnZST0MLiabPvK2Dba9ZG1K0QTladEDmJzJNEBUVK65EOgEcOqWAI923OkuUe1b3N0fyUU4kowGf4CHb3ZashmMdJWl95mMoZsWM5hxpAjrmPhaFymZtbL_xXuQ-RJhHVG-6bq41FPZWTcT5zCrmg1oAuG60gVjQyjLuuxrzWzU0pP7ruwCccScqOjw29A9WQseEzSP0PMYL2cd06eaJJjN00H092M2wxMIusLy0YPAI9-NHbiWpu_Xl5jUQcPYOJ3oUvy8geO4X6lPi9L0SxpTpn-EXzV9NWEsBlf2MTwxVmGy7dJI05WvKNANDa3RMdrBPAEBFP1iVvs18VyAgTZi7l1lOl_kasoWwbmXofInqxr0yt4onjq_FSSashbubUWcZXBw4IhNQI8Yrc_7eq0fNfX-5pIEuHVTlbC4bmY4BWSWdoerOywWULF3gsUtGSaMxCy2qMSN8t6oWOxB4TOPTjmv005DpZV3WpRh4CJ6Lbd_kKJ2KGHVGXRR63ZDfzZmBtS0G94rGoq4z6ygtcCefuCXLC9b5fX0BdCRiDcI61i9KPAQEWYKMUFK8GA4FN8zjo2xvLsBqLkNse50hqFwBnmemZ_KNGmz0SAYMa4JjzoFXaA6MhBugUv3aRvt9-3tyclbGgSlD3owbhWiGq-oQbzDgUlzpdFSqnykUAkECf07OaIdiTS6KfN4akALjdfFzQbwDlzoPqY4XMsrHNSQzkTtpw8P40ILy1Zdqd2x3VK2XOcSaZKxdnFkXBWxQLDfOv6i8NsapgytOQ19ovbFypzISxhjSGdFffjVCnmHhfcq1vX0YeaXXE-JWjAjRk-IJtA4PNpa7mNxK5c8RCu-3GFHSdyFtUey88gzSzWBFalNjBjQy9G__J_H0_mbWfGWWdSZUTeYjKoDwwEezANGN-1JgWRbb2xiudd-B2-xcKz0qK36w7k2vxQF5mnwkAKjI-PrrQdtZBQuclRP9NEvbSbPcMEATlcLN3Y2PsbIikGexlkSUl93WBl1xu8Vb5hz6r3IU8OcZLkyPnkPytuZXKLLiMkPTx31BFqMat5EfZxS58QScJiaGkN-Mo8uHDqTdAUWvKA4f8EI3DLZAucQKOQUYvpF3Qt0mqShnDgM1fdGlTgOkHpzUSh_e3PwRPQCcZzlW6WPsq0UuZfxGxj0NBCD2uRj9mmr9xsQW6SR08YKnPmVAlJrkK6cvOJCZXaP9HPj-dvc9LcDSMeI6MODIaE1-YiNkUMvH-Yke8Nb_03_WysK5Ymh7klDon7Y-RvM8HZd3XzDzPCpD0wSxTGRKLR5Oy4gZ_zU55EUrsBA26pCRVzYU7KJU7jJYWuTQkV8OYDvbKIKY7Zh_245O0ma6Z4iRLtaAwcIADBTbY9UrGOs0YHldRmav4HkNPxxuE0rlY0JqHpNZOCGM5RTJRyeZtjXXh-IKOW1opdyiTnfcAfeNaZfYrJNncN8Uc0M_p3s8eW_RCluE5kYjVP6S8cxys-EvGT56abu1ug4mOxvfB-wtm_APtFozfj5kthhUjJWZe7_lcxTRo-boeTcUw6SHgHfgy_o_x8LdMR1_t6lkXfJ27e3-yT_bkiLCR9hUXHtQLHfwMrr4x6_71mw4M2gUqFWBgZrhqKw0yHWWyk-RiSMR1jH2Ziwz3uJ8BMNA1KgTaNniQq3pbgGb-ODu82q9xNWcmDD5EA_KNpIT1sZWpUJJnzxvo_vHPKb8dzd8bs1ODpXnXsNIGyAtXw7rXfR5t0-5bKKHSbQqPTC1Z5axWdt6zGuui4I4YBk4Rez1yOavIfp__rtYlm4DP62u3mB_UzUjKB7xLaCebrB-pby8MKNjWjLSn2E5BgcF6cDmcbV5l2AkuVy_nmqoXVgYEnKH9Vx876YSYUr92n4nSE1vUxzW2ZZF9yz9kJQFO_ywY5gXKYo4Zu2eZTzZkIpLGvm8tdZRegQsm2ZPBNnjeW5G89IDuE8e7DUadzUXbeyq1YXlJNB0uGpPai_UrrvEJ2q376vr-WvRuAwAE6cxOVqxP-pJEMXjd2x7Yc2ttxQTwi39Czus4KFMwTLzHHiibOY6TAJEdJIvBafMn51FXkbMLx1Mb9Wy_A4arr34WWVMM1FqNP3qwjE21R530j-Pemw2yHT4zjRyLPPTBvldb1Zv1UEJv1s-beJIRCBhT3zQnyDA4clo5ONV92DvrQReTKUsMLp4znPqDQ0wuWaLW-4I6ZVQhAbHpNlxHxwne8UHTun3WFbd8AXQ7D5yHP4Co0y0BUrvw1ut5k3rahVzXJb8w7eDda4jIQC11_W-Nxj3xBboX_f_De691lVLvOOnByjJzgX_1aE1tB1JgSGElCsh_rrl9NLcp2kUb449KqWQvLOc967YhTWDCOcESAemZyFkH_DqgV8ksJk6i0HL8sEsUXMWS8Neb14swedoI5ZabWVwDv1XL9zkf9laC91xQX2ZKvXQbQ6rsNllcy_WAPH4RNm8jLVcdqkF2HsH_yxpmwA6MTefp0VLxlBUgZMjrcC4bejZoN0rqhSR4De7Dnt6mUdLHAsw4r2kImhZVlqDR88SpvFdo--YLsL6IPhUWCN3JYgf2wAs1FfX8IxLk9UaNRnFBNpkW-P8dIHCpCqAF4XwwUhOE15yJgI3US7japZggvaBAQOFQuM9ouWYTN4_N10LOWDmMnOJR9MnKq26J3DsbuKKlSwOshHwsn-R_b14hYnt-14_9Tn_Y_qwMmpVENEgbwvN-MGlzEhXPQN8DoJfOq2cH1WUQf6I-BTdfy98nme0MRyQZIU13kb7SUE2tG0IFjG64FjC5ro-rUD-F9xhXg5UiAyYq7mK_b4ERNij2JwcHKzQMdMIvzo70E6JCFPwlCayW37BeydQPVXY-0qO8meC8s4dHfiaHBF3gJb3f5SZcyuRIpuLB4idjEVrtBSyogaZKgqaTV5DsBMHhyPHiLdOa9KHRYgWBYBcyBlVI-GJ7y8I4Ui5RD_HF1stsc60ffig6L5Hl0UJHksFdn0jcP55iWVmD1BZVn4jcnoTA8PmSHNi4A1CXF9cTPM4pw8kBOImWKbhsTrjQVrRslW2wctRN4Mfnf9nzoY8G4dmRyAQZ2Oa-j-9WmPQfI0oebWYgFhzzDgzq1q0ue_v-X0--VILGtfPqoXyXMpTY02Mw1NAF3Wh1t-PFXFtgqA8oyL_MBlK3eh72fPIfBwxMT60lrybekIkwUgN1vIXhjtyZSpABSIiAhF4Lwg5nWE-FNg6ywerRKWcoTJMtQAkWNSwAyNl2SWvWo6ybOHdPklhnfyRVC9Br7kh4ZRt-6E43RJ1w05vcLhELqYKd290UXfnfs2h-xRUyCk1Znz4pGgm2ScS_mvczHHvHNl-asnyNlvvDiwm6YLH4UsqwxJoIYTgsk5PmfneQDd17BTjlI2i6W-KWUEOgG5TqvOdgh6di9vwHZ2KR_8eOxVnmG8Gc7pngts_oHwohcTUUqBHVKEC-wLZu33cHesISwZpernaFciF81UeE3vNm1TT0S_0keEnK5GiOmG6-5NlMMFpfUwgQzdrImOKL4e_YIBe9ryBYTUvMvSQ0l9lmvf9L0EgiMshgqcvx10J6xm4_BjMbjfsKYIGg3-FgmrM9T79vKESYmRJBFB2H8MXzKv-P_CnLwVv_yDhIB4hfP5ictYf7XlybJaebr9kJn2m5jwkay43rET4fh_n7ue7IQVaUzUaXnxBCofG_RdbUDKYeg_OmVtJ6gmM3NZ8kuQvp3_RqKuW9dLwdCzPWTEjVyy2DKX2dt2_XlDD8EFlKkEERP5z0u8OYyYRCYFAYITvd5-8aHNy_kCmgZ6ps5EJ-evlRFDtg7q3Ddqa2Uok-uev6QRIx1yudtm2Su19u2bbuQvp2N-wJfVtTVT21S7Xk0m2aVd2zs4fzGU4gfMJNqiM86ZaCyuHAui8c3FLqfshtnY8TUdKWo78FKpeQZ7g0uNSRGkQRnz474djfjqW4dQZjuhnKNSI4QCXMY2Sa1WqiTXMSOLLzsPDAFuBktzR3W31OI32nA3-VePmADxxLQ172OiEQtD5WKWm35SOKah1Fe_UJRQQO4RE9daumw4HLX0sFWGAgb7g8dTHn4hDZ3kPNsM80ll6BpUJm6ZkXaGQFQY2nRBeSBzNhfiwFbaBSUBkOuTWJEJzRP-qem9VWaPodViMgzZTWSnexlwz00P12bN8YlDECkgSDtqZW9qvLwEESzEt8IA4q1wJLcwCeYBjPJprYHBZXMOVPP8lT_f3VL1LCjrI168Wx2Dv-SPuaChyuOpyqbTTDcT-TqAiE-pPeWVQDRE83KxMquDfU2aKjepbSttAC9h0F7MYTR6U0sBVu0nC7pnkmUvSM65yNoVFpvgiCF4DVMAu6D3RStDHLy7MYGuE983-G3kr18zQl7NCCe8fLH9b5yKzBGHh-evn6Iz9X61EJtTQc0lwuZ-oYMjOyEL_gtB0K-QjsdCJpOOQcoOyrEX_hChvg_pVyNrIo4Uq_s9gnykvCLGkQ0bjWNbbMRQvjsbd3EkShEqTvdoDfDyii5uOvk9TSOr-EuXtRvQCZiC0hmMJbQjbQaDCum8Aoa56WHSkfCQwg-cygt5yUWXpXC_V7sOkljbrJdqiP0pywxn2xQlyg58lkH_nYivHx2BblDsL_asLj4koHKPFa-He9YlVbP8EGzXgqks4EZek1II0HqDKp7yD-cszmRoLEKrfrbDcUdVHuUF2a9AYwWl89CLZK1-pnnwZCchtKmKwvfYUoYbbvQ5mtHaJ6NxopKGeiw_3qsPp40n32_0TKEfEUlUJXRIcGbK6Inw-xhEJ4R9kUquZ8VPg-8tVgVMnokcWQQgr6q9O8t52H8Ko4sfkf0bxTfBguJdkDsk0OOKETdluZOgFhUurs66T7-bwioxzIKuyRAQ8w4yKD9pt2rRsbE4tCfon7rgdRkfK4QSCU5hFVKBHfhHCUloTuHaRvK203p81hh2RyHhKBtVIaCLP5oczK7Rd6VXVNtf10QLTMnqRtiHvqn0RpnSxZZHKBmkD3aUe763MXKEgeG4oAnISJnTK6i6enS1R9_pM5EvrM_NwVpbfPDpHTsLSAM36RpOO_qYq4nv7rXrcw8wai3Yl43aZvOnYP-ig4gEh-9UWdf60nsCNAFeJ4Fyqw0CM9iSMYmgp526KsPcJLw5ZLpWN27c1VI1mcUjdkF8qQZAQ_NcJzR96K6a0JnuOGAHASUNg6U5V1mXeytr_r5ntJ9M2D6KsPRI72-f_Ry9UewgkFX_-O9jitRWx5spo-mJMPtmrg_-xsf9O_ySrranKm9PadAVRa_CKSY3la1ZUPCtFQulr9I2w8pknc6rgyndX1-EodRie4QhpJr4Xz1BL-AbTVAwCAbrezPq3Bvm7eSv-Qy7BmQxNB8r0M5sK9xJ7hSy7WnzulkKL0GmRpemd0LeWpALtNE5CgJpD-5CHUOmG8tdF3kvomvk_ULUJ7CJeXySEJNcoEUCoNdLkmdRmmpHtk4ORSk6yYKLXePGpfT4o3Zyn4YAWSGITcysEFIvBhdoxi_jHZ5y819DgDZ4OzNeBOgF2aQr12YevKcY6C8MzTA-pspRGqdJj9ujZvyC3FYlyzz7aFCe5jiL0vUevIAYSdDpFW-9tvM2EAdxqEjigfUiyJM_Gd6bi7sYsLQ1QOsTVCAnNeFlMIgvnp9LnjQFbfelk3jelAS6XUUhuglWvI5m4_130zja9tbs_9BEoHgEu9GqbpaBi7I3A7BytI1308Sqptm7jxSapMtnXeg3ZCFi1MvyE5172zp0fpmxPB-5kfHi5xadz-biXs8d_avFvL6xVuDumNBSkz2988H1fa40cn-zWFfzqSH2BI6UJskqfDyURI_TYs20lgHtYjfKPWUQ_QHZWCGbALmYHghrm_7VDR8qvJvs-Q09QN-zn8tTcqJhB_MSmoxiYFYqjY6r0Oi7_ey8oQH_KHUymL_OP9_eFllB_sZSlkJZ28eNC9Tgt_cDc1vtFWaKj8ehMtgJ7Y48_x0mMA_kNrL458DMa1LuapXgT14OXKe2-0XTZ_Jinys3sCSuRqwMBMm8XcQ6Iv-zISUpARHtxKUbO-PBlq3UWHqTUVpqWh9cI8FwYXVa-6w5OPU8ZdMaUGMsfLWx26BD1_zkJdtLST4Q8Q5rKULeaCb5OUzyo752rkwr3NM3sm40WZ253pMHCjc-cixyB6PhvNhBMBCRZMoYIx6eFj_tKL1smdXRcoMS4GfMWAPoutVv8OjLwQiIsKDMKSzZ2AxVsJF4TiL1HQQUiR61ZqDV5gXZ9Q-qG-Xn6iKkpXABtE0GCJjrjj5ed0l9ToKJh4FNC7OCyxpLhKHYlBMt4ALZRK6AsOhq1XZd0cyx36pUhhpdnuBQqF5hO4a1zW12FzXRapmELg_RZeFOMYowkeNkIQzGrmV25nNIEd__c7t2e_sjTPJElWOVaDC3Lvlg2KtCGunEuqwIuJM7yLvbjI9XyHSK9vUWYb7-2wz6xUJkHXHwENp6_gRxHJVWRZ3A2At8BHFWd-RqwtVNlBry5dMix0M16NN775-AHnMdvKES-PeYTRp1LOGxdvsevewuorQJGN2cwzJTZ-qOr3iceIOKHAJCV_4b7aELF_F5_H5r4U5SP-Z3wabflIzvLKcT4cff1PUcU9gY72sVke-elEBBWYbZG3B3SwTJbKpTuG4OTxUKjrl-0x12lIyG2CmLaCnKQMNILE7KeBmU2x-2Nnzn80yGu0IlrPWq2CdERQP7nsDdnph_d8V_00elM0KgBGOUWrYye1s3TDD48Uc9Qtgnc5SHVNJZ3h6XSTgWxcEGVp5lPNLs1I3nlFeB3TyU4Ed0k19VVUYa1fgZ08i-rHSome5WDLvQ60BhWoLKCghKngLyAKNq5KpI2RS0-wr3HckOD3thBVo-ukqpbZ9v7Ukql2unPnjPVCnVe5kO8L0f50hQ2PtSt3oh3qcmPPDyUa6z11aICjJPeCnWxWBTR3tyr7j3rOxwgDFL5t8SSnpHj9sMocIlh3mpSneCnxyVBWhSUFgyrNleUvrPQNIdKBOvSWbM54iApFbV5EVlP1okr1wLBFx4c0G_cfGpPhbr0AF62M9oBdMrXKx3YtHMBdb7eybnTyRC9fR2oL0DjJrxBz5UdhBJRvo3EXdHSd10xS6ZImdiq0kbrqzurM3QFWKaeMN2W21iGGJYMuNohz8bhX2HVB1v6KXAmMo0SaPxslHCtSqBCDxepiGdkpsZ5NB_shaB90QeEIq56Qh5shGZHvwcpYVMj84qeg9gZYYmAC22BZIGTs1bg3EEiOO6fvFLV2nNpyaPYbUVpr-VmH4kTLsHyAXOfOZPSQQKAbQWr9e2f4KNI_JolSoS3dwcGS_uG4JHd5U69ysKSnzZj327QCNl41p2CBSbORgXaLhllEyJnXEOuHvKxGZ9PuKcG1lbgnldJaT0PvkOMVTy9OWTJJA5QnlyYITNCQcCuWLI6dTQknPE0xqTzxNXLUa-TdVh9pCllFteFS88wm6HcsQrF6w668ZLi7Xu-uzQyIpkhjLCuN6Km-ik4uvsrPvL8GDWVMgHbHtvIAvqcfx4cHL6ul6P3DyQa8E0Vj3qsJoVq--K5Y2PrxhVpPuefSmJ1tmri_ZxW6r72C-Rc6ZN71yxPIH2bFnmhNamkYXC33gYhSYmtX1rtelXuh1iEDgGZwRfjJUM4JP727XjSwh8xjvCtYZ_p0FhTLxTZMMOpWjZAZ5kJjxAfzrHEobhdQonGAf_f50HSjDENDaa4pzt2keSMJnyLeHqTjq3nshYDs7bc8lSbxgbPgOpYPsNhwFYVrtbbATO-SOWLDIzttnZikykZsxIPWocHYHrAcNXMOwYENW7TM1UJ4XMUGXIzMtbTCzHiMG2NtMxR2HjJRSwn2s9N5fs5yUorYybWk-tMQhnWd5JKGiO_iUu_O9Aqn39otMV0zRe0hyCcrAe8aHhJXMKB1Tx_mG8UHBIsXPwrm3pxKV32orOp-rdU_1FpM1Uyq6IkZav0VrkRTfq2EjL1XN0i1er7dNTQOf2LL-bIA4AVlyolfV8POIKZ83pNOCNDGvga2kJtf6KDuz2f35AFdNOgDfAu58qR3a4R1x0lRmxEC_6X4PzWPx9-Bwvagr2rPkU8zY7m2iYT3vf3EOfJLIlSZ6M_xHxH-l6kY4q2wnVC_S77142cR9UvLxVPpXvbeiv-VIkiQiQrV7oEXOIkcjxoca071xaEdy75xx9VN7GqE41inJX1yXW2yo4SdQ1h-_R7sxgc8m-sYfLOhB3NaI1HX2eO0hJNPs73U-W5t4HGf47XlQAl3SyAVG5H3ypyFNDfZfkf7CsYN8lBZSL6sdggD67HuPvn__KKBkhxZFwCB20kmfWE6Rm3v6YpoE4YdvMLw_Qpcr3VcvHBZ4YtDQkE3Hnz3KweETllXTS2falsoB5wR7YZLwOwyztkD9_mXZmYxaN7vU31b-_kCXmO3zJeSE5UYD-O0dwG-OuslJk973fIo5AemMh6J6yfkmIHLl-XvnRX_zbqTHFEDwlF3_Is39VIQ6PoFiGzLJMQrGeN6UA58f84KkbQVAmdS3Vlpml0l-azX5piudZ-wz_Jabfrbl-THi-9-G1nctjikIVLwZ5ywbL_Vw96oAjT1HF65QXUtTf5ZA_qQNrWNpMS_k83o6RsZXiXaTbqKoQ7dhBAwPD0bR_TmT2Z41TZgfxuTgN3OytU9WVFFhJ4hmoDU5F6By-DvNEpXlvWvYcQxpWc9CUZBESSraM8do65azf5M97JWLJ8HHHUFpEW40Az1CGjJdLaq5Cg7tyGSQyQmYi5jEynK_QkeTXCOEBUbWKvkxXli0DKBxCGQWImxtKOJ_AIkCPtJ2lzHwzlMG9CuOPEL9-udFo3EcTIIRFAUzS29A_ZoNmC50YgJRLPMUJYCxeb3nSi2FOX5y5TJNOIv-eH20fspriuSca0-Q_bWcZ7KjQOQID5Re5AMtg1g4XMncWaHu0rXb3IFrO3SZkzu11mE0oTA7XoO1sFWAtp-u6KGtiR5SL6CfnNpUHARv-jC_Vja5U9Q2Q68M7BjWHnUSbYA8vW14t0K8ntR-Zogjp83JkxiqenAKS8r7-fj9TioJe3OtINY4PURpFRSN_T7VV_0AaAVuqrpxxk_ctRxFNFO0Iqh5dPbwIqdDLY0Mds6cPGp5fR9lrQWJRzg-aDlecPxExYUf7FJr9M6AB8h6m8xj1DOg1bFLsKHrIP-1aNRFs2ezMgj18W3rK2sY8ZxmWM0emHWPb84ELEgNjXVAIFZSzRNxfCcWwEJ3Wg5N3O8AQBXd36dd7NCn1Akg8TZi6yO6ZkMSxHvSPa2wGG5sdfSf47gLkPWX0hS102oAoqDfQN-PjeV8yBzCRvXwX1o7GnrR49J41_2R0ArYT__2DlJmQGmZqGRJey-DlnYhUFcj0SDuq7rXgNoGEd1DxOC62pTgKNAM8ab39-CJ8KAY9WFEHsBOdz_uMgq5cG4dXxgnAfkUT_n2hDG7DqusEUdLBKVQqItu0bZV82nXVIXqnjpyLmqT_CBqCx0CS3caXwNPb3kbg9ItJrSLE0M-amdmf2-DsgxlE0Z2V-bgG677VlDyQ-1BaJ9MgkEmPWx24jOnq5d-umCLDOEpESVbMHVAD7PhLzZpXRYOhIz_UkAFDYMMr7RDJbYTqwCwmBigcJY3-AY_AN5czBariXvgg9plIWIefm0KhcrQSuFbaquTcKokIBhqXYCThklGLlixLppH0yGoi9aSwd3P2IIe6HErvFAxRPpo5NB6EiO5xODNnOxBRlketpCZovhAG7KD06YPXpKOVxbDaNwxLTRRcRfC4Ftmc7IGI6luCIRhmGd3iG59Rz66Y4y0vkfpMN4DDBwJ5j119a06kbRQBIzIQy1uoNfr453z8_oowj8N9MzWAH-YAFDSR4lS3XIqJSnoMi3G7tR5rlVBl0hbGhQfC4kPnoQEpoETfuzd1jb6uD72po9rUQTXVT9IG0wHsb9zrRjNzodZAAHIdn5RSRpUA62wb275bwL-6iwk_Jc-pNffAlJcoc5eNIHRavJAuZPWdpqu4dUnvfFUkGGexjgfcGWOsXFWeWYFGKGR-ysk3yNerDbj3v6HPmBfMNOXcrrZnEhp_WyNFcd83ZDgICWbVcdDlg86YHq4Z-685jIgonIHjgbTNS9TawN5VXXs2K4WPbrE6FQy8EIp_UfH47OMhBI2lirGUzTp65hs77fxUDCpuXSuo9WIdbvtVyN45gqaCP0yxvrdcPkCG8Fd9VQZ4nq9Iycoo_FyEsVRve9DVm4Uhik1vkErIG-vun6GVJ-Dgsh-BzA0Ji2ECUSlh4fF2iXCMa_kK-oqwSAZqCuMHShz9Am3r8g4nf4oeGqg1RY16pMuwkE48oru-l5modX-Ov4FW0ziYlHEETHzFC6Omg_r07v4uxjTGgJHPLyJk0GC6KqjWJr9G8vt-9fJsV5L8Xol3LVc5KaKgxWz2oTuMFSTVtcWd7Lk6rP0i6B-ma3MOg3w-aPfslXL13MhgBM3c-T524VcG-H798vMcag7QINJUaEYBiMX1Dqymyg9HPU-z0VO3mneQr8EyUzf9Fx7ytjMU0Q056OP0KGjFfHtN48jVoFIo7woibOn3zA_DcD95BmYsEcD5NC8mln3XCGjqSD37tLzorQZABTQrEWYqVTyxVjpMIvtnVmrjP88AQGv0x0KeJqQqHbwGOfLx_0wGYrF7-1wEWHqVcaF1dE7UCa3M4eDvSVySJpL3fRjze5Kd4Qkp46AGK_X6fkB2BPjMx6p3o3gx94DhlMjwt0ZWvoZN8ktz7t9bh6CJ-_xdKUUrXgiRzBfjv0FMmxBgWgp4bAqFnkcZLo9GNDUCs22TJpbbXMl_K6_YgynFzuDuaLKDeVd3dWvhkDxK7r4yJNu7zoQaq2EjoWF00t3uGdEf4c3oXM6Ix4HX_VwYCNcHHRT-9sriMqe14GsgOKUm1ZIoL45A5U3ZwKB3XrTXxi1pIT36meHJYAtWACcLAr0Js5zxDjBKxU_vtIPm6YfIngPc7XoiSGY0aPbmhHcvbYOHvZM3YInr7WlQ-Bflr77Q7XgDtLXXnDXh2qd5wEIsE988MaWuGWb15M-ZYGOqBfzw9LmxngWnONuOlrvpV-soxgBr2vCbTBmw5hFK15_qu-fMUJ2cr12apqT1lxtIzDymNB_4sLDPKzqoxY5TS6GXdSFMs3QGU1TEMhzslY6k8MzYM-i_k21DAI71EL7ARxB51InyoK1GzbKaSzQneg_-sAFexCCXKBFlUk2AzeEyL1vb8uJdhVNDLxx4a2OIcv6KrU4ay2VggCsy8TwtamvsEdLDo6K3nMbJnsjAL3rGEJchXam6DfH-c5JmzBhMMgirWQyZJ6odqBQXibL-9W7ZA7aqVWAgTC5LrJWyIRPn2bMV-YdjqKGyaEdfGqiaTNPhE6i1YXb7zcIBwG_275HNr7PmxYLmwp71CJffrse5qytz42YsUZ9bKGL6aa_nsUuslpJCIe4QbzFgrcfXHdVhWprtxVPrUEvQLMyMdxd7FJasitjY4GyLwrr51_nYj53yMaNsJ4Qi2kX51dvpIgQEFh7ZUPiiHn4y0WLUmoaYAQz551TeeV6gVkRUyOJqPFV76tXgvlc6WLxrVoT5SeEsBw8sej5ZZz9QeTOjmX2hW5D4hPTZYwKhmzfbtXtEz9Ofy_WEFXutrBqrUM2QsficAL5ZjLD1tVAKjb2As_3PsxM4XLM1hgNMsLfFBepQR79u287l29XrvL0l3MyHGVPL2oQH4ddpVG3n8lzFbokK0j0h6fNvsWGQJY7gtSObaajd2ntnqciNI6IjIC1oN5MegGjscFcmYbfKUcEiHIHmWqHA5hd3Hj6CleCnZm4-Pb8gtgwoNrqfDD4G6LQWqRbhWIfbwc5pxcncw9rhq0MrlH3tbIlkAfAAX9PMCokcNsHbAUDcaw7TTjSjSphUfbyhmrxp3D0SyQ-yCOzJQ4VP_BHL6I_GdhXprSYae8AmTRorATbYQe35tkFbmLFif8r6NPMHwhaI-M_ApqJXd-KIy9i7_H4PnEHCABPTuA7RTivm88yplxJgtLw_MRQR3nGz5MoRcPp7QQg-cUGwbcJcVT727kunTBOPi0VuF0xs51efoVWNtIrXiVNyvtqJSpwYXMEGBSCoIpPKvIGd7FZofYN-bkOLtw2P_9NrtDJMsAV5qwFXmi0m4XaQ801xAyoQlTS0DOaBCQj-bH4-jjUFuOaaDYnfizIVgi3Gsqi5vywpee5mwfQ8_9kwvRoi3X-PlvFjM3K3I7DqG9aypxm7UYM7I0y4QCKp2mj4rUv6TnTABSUBWCYGvipLhnYMvUSg3pTXJe5xPTc4sQPwO0A3kpqONcOYCsgwTJ4etUWF7TUkdhDO1cs2ZlQ_bWs5W7N-qXWz6m8-VTkYqDGqCOl4mKrDb2C9B0aqsguQ40GoMlVtkaVULcCSrip0Lf0PLFVx2_1ce8caKEhMZbNySrnS5UU7VRI8_JFADmUNXoUgChqCXTPl7glgBwcbns4ycFIbrKQkoDRRIrdSPErbmQNoJJgxRbz0UiucnS1R3EQfJImYSo0dlWt9pydN4CWEJY1073KYnaB0gIMt3Xf8SG3Tj92c-i3G3csqLaj_mIm2uslxmssGTXnf7cOlfa-8ed41iCSdVjDytEhRWKif5HPUlffEryLaC5a3F5nt36gdx_5Mhs64OA0swLdQcDTPS0qHxsrQK8gySYERjqTsC9WjEhHiyz0ZygTvv4eufiU9oHtRMiLhbQEObHuz4f0yVvXWrNrktO4SSOLUXYTrbOjgOXtULRnFeXlykF9ySOvK0G8ffatczXNPptWCzcGueDC5lF6Eg-idOU05Or8z04qZ9A3xTpYOwSngb3Mw2FBzQNl0PNzM3EcRGMoshF3fYj4WbJsUvLr32wXSjY5TsvUxHBO2ERk9rYHgDtVanXp43vEQogDg9w2_0mNOpUqFB4V4OE-jnIZ5zsxrW06mRhOnDW5qHf-zYI_9CIEfSg9ggy419f3xTOb-Bcx5jreAh_u-g0W1AHDmZDh_UkUXnBV0hmlFYfYt-3Tz--QR009YzAVVsy4Klnu0tMzPpbWJujkzV-cRL52qect1gxg5votJaN3gMHXpKyLpWipr1ucnz6jZmDMsTzA9msCW-XwAmH8fV0fgq60zLr43dDJ05wsmw_acQi3JNL-dREXYFtHeY9A1I0j8ZTaQ9IW0xHIvGchMnmxkKPNN7KGz-veGQSGWpOjXwWddaMbjTI2eJKfP-Su-pgykStv1bPdWX2-JIDY329i6rqgcirnRwkpdcsRpF988-5Us7QJiSHT4RwIxMgDbmRzaxhrmCcXUcUlqplh-yT7Uibu_sL8uyVtHvPo0Vunz57R2qmAKqueuzO6rSBF7-qp9lHK4jgDCaFA3caz0HxQWq4HBmKQgHOM82rrXT1mxdozaxbYelojUrPXgji2sKPkKaJv4Pa7aQErG1sDKnFDFxsVGCHT4JBl_fSPfsxk31bin8IX7G8lGVxhfEpHdSEjncqemm44QpSEu_6xM2Zyfg8hXemPBVnNObWvoy4yQm-d0cf-Kn8Zeux2Kbbv0UQ8fDeIvTjsz9oX1KmRWXpobLSyIhGj4kP07GR1oeDQcDQanJTq6ggjL1HdHmueUBL3q1Z7DJ8gmBoYjn-VHVmZ9lPmtZvI30gPBp3YNvKYQnQm1kGQY9DsQ7-4hD841uJdPAavTt2RQRRwh7URq5buF0B3HbifanT1ggWmh6kr5p79Rb2rlrQOU06vgAv_q_MmQwtDdEeh5CfEUFiI0e8utj4DmuPnJUHuKel8daELMc02_mHaNHdbuWcNQ8wSjP5JTTOhcYt3b4_iarhjMpmRG6SB8aJzn9mnFWS3H9LEC2IRFAAiDuVMlyMUUmRi7kvpqWqC_FlgFY_cW9LKfgJEatntboUrnNMapiAhVkzYOaA9r2oYupD8OETXHac-doND1qp3nyaOQkmKq4tdIyvM8KxBqNZdtpwmig1HuFUdgiC_y_rB5ADiHZdxsWCwZR068r-AwEqoouqQOEDJ_rxgrxhzdAY3pUx4dtgBClvXUQtzxMevfPfBi0GHFpdqUMenVAs6IABt9P6HrPfS_BmphIW1g-CnbI-o6wEjedDI4G3opGeJxKM3mkHIQ2ALQWuB_lSvEI0lIe6OnI3jP_ePj_jn2v1HEL3Dog3ObbVX2fmC16_GEkYWSYgOj56Yf_SJCaL5iBM_-mDrp2Q4SZXjv8w15jNRTlIWOZnB2DlHogtZCngxNpx54tF7Kau50BMMDf3fwF73rdkKU4SBaFpk1Zz7JuFxUhYGO3LCyACdWkHw0qwEROHRsfU_o6vZT0tAeMu9QL_SrtVrQYBNXrkeNss1LktLjG193fZZc0g-ynSwnQQ6C8ugsEvANDZPGIR6s9ZVCYd4RAD9VsPgRG36xci4A263bnggHXLAVkqC4QOEaQaXrIySFr1DeU697z_sj6w-MSTwEZtZe2vKD0YtTkjCuJs_ry5zeCwO22d5UiiR4jWGJhh-VPs4VTnnH7MZ-dugAAIwho0N5Z9I8YsKrHIz_m-I02ME-7lYA2leh2EcrgcDbrXlnBd3haWzMJQ7Mgvs61FjSgBkfGDABvXuwbO0vf1aXZvCxhdiSE1fLbJIxWzufpibuHTNUlrBMwbL0yU-D3_sn760gUkjiZ0UnTIKVJJe3cwp9WYDV4zmfdMCj8SI81sGgbSjO-BEulRh-09UFwQoj_wnjMjMUUng2u9hd4akyYK-H1saxLxy21ePT9i0oT26liifO-xMXrxwXIYi8wvOJi4PEEzZOa-jpVJenpgFOOrDvnopbiO1V3dRMuRsUV6etZFuo0wx7PR-SjPkjLQmxXXezmRQCOJWZQs9bNLWDdxSJo8m-OtTVSTUwdzZKGyGqkuekEhF-PNx5c99Lh70Trg0cWzL2gS9X2tlUS-uvEaxkGxKzYVRS2gyUTQxGA58QDL43rnBgTihYxtGNgHLOUotn4-cyU7S7cMTrcfpKVN-EqfZFUprQDgSZOPIj_rDGUsv1fkUNUS_rbG1ojAdiajbKZfmsDzI-pTeaG0HeSGggg5mm3kwce6imnOc72ZNTamG3NTMu1AsW1-YnhbybAglN2PRnRlqKBtWZY8jNPgj_gB34yNueMz_o_3owVEr9LigMGB54L-um0nSw0OPg8tMyYNfx8xJLEdO2BBlWJLjvNnqNQlouAEzmCWI2_CJLTbmOLHDAs1pt2gTSdwekB_FvMAPVhuTzXWa0bs1CV0tGbNwRdsA5DiwKR2RHFf-JnHYUTa1whTwLh29hgCSy_d5yFWl_6TtzKiONdzIiFlsDQbbZgoJtElAP5D_7qJZf9E_vfnrKY9yQJRME-dVevyxv2HIOJNlRwA0OyAzt0_zt3yQecAM7aFwzWwEllJnTJwYlZrOyGpDcnRMDVQPQFmeDlQrCkvYguc_BC5AU5B4MiqZw11HdjDlSbCEhKT7YEq_jfjmBqK9Hssyxh3TkT97NCpiyL7y4g0_XoUucQbCbqtitFxaJ1aC0K42TZA0K8gM4rvT-ztd2MfFJDGIxRml9tFiz3o_ELcmzj97qBie_N1u1DV9HO8bMkDStVa9R66nOOcxs22ffDRoXldn4dQ-wJxsIWLYcKwgfjYXRYnWl3H_oTR3_H0ml3A3TIs9W7CL9qIclRlo22BfEdXYownKbK4V2UmDXWZefMQQsFw4r9x0U0Qbqp9ky5Zu4UVXy_JiA0H8x_fZJVdUgWLUq_tDEyr-NFORbecu7Vdf-b208U3lC0mcM5epMtkzkPxHlYNDMAdTR1TqolYhQA4Kz6E2GSLSnCPRUMz1OQPsLJ-UUbckKButX4VqzFTkbKR7EF9_IFWgdE8hgIbqQlRrFxjjXUrj6iFstbyzekZducxG2ZnMY8up2hox-zHwSXXSmlGSFlmifESU9Aql9oNSUX6yNV7C_AXvwqYCEaT9TGGhBKKKjMVpGK9k28Xr8IAbsre4l4c0azS6Pl14KVbFIftrddsaZvMyt2C8881iSVhXL8h46t-6poLvaKV8Pi6Z0Arb27c870aVy6Mu2rqxiQj8VUU29FEqm4aRH2FCuOdxsbB5LoZlHNWovFmyedz3vXyGSM-zwBdCQKlfKT77hltCsSK-CqZGo-5LogXGHQ4hb8NX5pQwrDzLN6d3X9NImZ2RGUcc5vlnAW3m3gTTrmOtL_3x6D7sgo6oWBvvbZ_Jf9x9nG8p4Yo6XwdDzV9zIuE8YU-8d9-FcsQnvKBzLETZVQ-CqB-WiCIOzCnNtJ8p7F0eIQynWi2Cjn6IdxkKA9LrnH793yYi-6c00C126FPapLVf1vqUkvi1uzaAijV-ryqBbr6ECD8oVwYQ1_oi_pCIvU1Xq7J5dgX5uDn1Agej_DpaMjJoVApEL6IZsfOj30_ZPisSOayjywfw4VSdqEBBcSDUcJQ6FZinLNW_l-BxF8jhsx1qi_VfNVjubTp6zqFLSecouIaMzbMcv34kbUIoqm9ZOT7OhHoV1MeDQqA2Z46PQFUK-8fDYRQrAE0mK2jFmhWcPtqDe54xQ8oKt0wdj3NgJV5s7KeHv3w5veJyD9HmIK8jHh3XDzsgPvaLO77sfX0kcOAlSjgZgGvQ0-lf3Sec-tupRjAH0dBLevWaEZQLcctlnVZbHAsE_G2y_-UlFH_jNGqtfmzRvY-q70rmSSFgT5IuLikbK-SpIiccfclKZN84SQJ6HHwLb5Dpd0-RMgY-Hcv5l0nRd3tS9F3a4zY8Vhhxfb-mUIzdSUk-wWe_nYdzsbjcTrg7kxZx-guUt-xTlgNAZfOIHdkzHaN8qx-OBCi1iQP-lUT6hlJEwdF_g7jtwslAUzbB0TyPVIc3p1lFKsPoTz-bpxkhHIggvFpVogatAl_0WewK1-iU7kFvIilgynxZ6VvvYxRkDzfzaV05Y2MmS7yKUO8qe9Y77Fch_ne3LdTNKrkh93dGEWmA-Xg_RTbLVfjwnwDqlNC-pEcbWvLlYxjxPVaWeoU-K1OdrsNV_3Fw-QEdORRuCX26VCuhTNVV_9pSQYlXxglovDsNlPG7yDrhLVMsf1RPMitfjtQzducuLl5SA0YbLOzB8cV_5klhZIpuZN8uZ8vTEMzvAmFDS2wVuMPA1lRg-dnS6Sl6xFZcFdb_58mDQRDmaZ3cv78dagfMByhGgzFU9h6Uf1KYdeoMpMysbZryJoclGTgXMliTTgQeQcci3i-oHK_fxwcl8c_VyL7e0Gy2JwbuRULWnDFkp-juIzm-4EN01TZcQhX2ihXAYtT9pUqa4frCceyNWwmdIdyh4eBQg4uKV9rKLDqNg6ONKhiTuwssIq8Pr2PY3Em3WtTqli8SS3i-GjTUe76W_AAdVRr8cWYJEZ7Lt4XSR0QXaEavo9Rkzc-Qph6-84XrTsxsxTER-RKKvr74H2DXZiPi2AMSmSi7rh0GSxVfzBS24qAfqKIZ_DciFKYhIOKqnWTbJVPozpBd863_O4F1yS9wWwKVizNKHtYfkgnERzWdLx9rRwzpjuSKbYXyckNvf7PNBILscn1M5_hJDEdkbhgWW3JMJQWa1W85XFiWTlgICOKaCn1avAnj7IFFgcR9G5cYT9MTFVLPehCP4h-NL3l4b_xthtdujSNETXqjHL7OdcnTAP5eNR_l-zOmER7kWoYeoSYhGPk1SrNOwCQNyLL9kCSXYSTzcXiFYJ2dXa200m2Ho8uEVqeoFZxoeOlRHoHLAmkq2zHJleUyUAXAC2SVD_LNbMY0BI3Tv99dsnj4Mqrxy95vlzmJKnmtbGD0BKEMrdmiWCIOkxvi4gIz1_naSad3Cd9fnzTJeabiBo07bLK7GtrkbpxDOuTK5cPt2SzJsl8cuFarKR04sB8B6eMb_csrrv5F0ynEsjtn6sSrG8Um7yfc7whhm5riXxWG-Q-BwB2vJLUodCXRTJ6foBOS3IokEInJ39SvuShpHttbWnzi-za6IvzuHoRhgkEjIkxD9HANUvA7xoPm2zUt12vnuj8dR_ZTFqN8SnqjzojFxDrHFUle54pxJAKwIxPUR4Y67SCp2BW-0yVqFbA0D3w_AYIqZXbbj8YrxcnYJg2jSDcr9Lb3tMLAB--BwMWM8Ty08JqPMRtGDQpBHLfYM9Y7CoHCJyGZOX-qGbqA5UaToY1Pe6H56SReWEOnyDLfkcBmKBHGNXti6DkRZuC9YlSlopdWmiwDPPU9wTippwXiv5yQfls_kmYVTORz6JrGuJcz9NvEp_lkhnhGftK1Jd_JuSOCjOJoRuCeV-KkgIihCRpt6XEO9e-sVW4ER2RJtfE2db_1cEFpKHit1sbaLM7uksmuPh3RPePAufVNDPi1p8ujcMdEHqFX4KNs3WQw_AO_HuiV4MtS4VURts9TfufNw4KVQOkb_Kppza9N2H8tnHP62p_oQUx_NXoe92XgIFVduLgppj7VJ0oiW7aiq4t0_UBJy_m0hBRVCYNXd7_T7sOmNrillTgP2rl_E8ffHZZ5tnAE9WNH2Bp5iJLgApnXx5cbhyXQ1uA3hDDcEZPDLZEe5wqRwW_CVztwrDr_Wkbr6A-dH4sbyHbKwB3wMtfR560Hcsgf1D4TMP3l_fuRPCRKzkat1TOBWk-iR-G75yAglkF67r0_TsyDepBCSB0sbDndyHqViezYXrKo0dJPq6yirrDKojPKtEIzpbYYsmdE6ZN9kqQnR2tLWaNNH_A34wAvvCT5PYyX6j4gaHepGK3DyHOwp0UKVn_f2ko9oCcv3_7xCbWfAbmYd2ldUXd80tx5leeaL_mvEJ_lyHLtrW4aqNBblTY3oCVvxBfFw4fZVFvdr-H1PtbaqCDaS20rbm6OGnQ5tqzeFJXS7GGr_7Xi-MIftasIgKmCSde7bupjAMSihGkkzYyAuA0Lt3O2qTDgjhsIVCpjnbaC7O_Zp51NKKD7azIx4R3NsnDQoKAKvfi2zAjBBBI8ovXOxwL5R7Wl4jvOH9JmVTsT6GVO4jc2G59LXCxq44l1sQoKPhZeCFgwb81GHFUapbbgocr4YNOdyxoiE5LLTJsgea31WECHreKybGL-jVWVoz-2M70BDdkubyKHfsI2TcoCooyP2j94z2v91a9ltLSQ73-9VLSFtAvcgzgMTJR3PwYCm38Xx5Pe8wYoLs7OXquSiTE3xI5DQTfHPc6gDwj2odgsiAizV2dOOb6VFgvjNJQTw6bOGNTFP98-mf3ouuOBrvsA8s59ItjzGQcg9jACs_hm8UCDfQ4EGtsHemFfWcZrxOk3-HmsYeHRvkGakqXbO1_N3gUo8qhLfoz9NTzHquXi8XGQPpWPwWNJ6H2zqwwDihdkr7LOx--bGhmW2-HeMnXWmhXQM72C001wbHOkZiuFgJYVwMP1d69-lIG6C271efIcxK6kZBlu9UrH4E4_kHt0d0chtRFm2PzGBNm4EIGua5F9PUq_s5MVCmj2-jzsSuFysHyEb9WzdKIetgi9XDkgfxWc_P6TjdFk3q0vVLbxGjpF2qTUe9A5lwj3hYrjuwjKHO3H1JrPCiTLmw39OP7pKQ1TdOYKLpzX5usXF6vyHhonohU_zfBrOx1hfRRY1hn_I8UXlHxgCcz9J7IlocvTkGKUI4LDDOYLvw0QTxu8d0QPZr8XDLs8zuO9xhV50RVIe8KZKGnn1w4WF9SUb4ojcTY0uXvmj51Lckf4uzZ50aeP6_MBMB_SJ7i3-JgI28WiOacxPRsEKzksAnTlMBI6VDPcqapceV79NkODMCiLccNb8znAvYczoLJrzr1N03-vYXDb_2qW3MBQoCB2PpiGrJ1oQzpWnREguL4xE2GMnmrSafGM0LaiOYsnG3IKAZ2ZAbR_PmNjisgED0sniSVDS3gWEYp1A0QKFqtkSKjIhBpADqQr9XROaVS_uXPlHioUFFI_PDU7825Qy7HHO9GLYeGV5UTfisuUS9_DaTniIAMFlTPi4FddIjScuRLKFyghUJs6un-lt1a_R1-42hVqSwbBu2eHCXHx0Jn3Eism1VYj7V9VSr9fyK1_4y0D1s8jI7J1dwHa8_Q5d753e_wqWyEHPRNXR7ZW8b7J3Vh_VubCo1EoJqMrRUqg7uHdot_hwDeZPea_qxLjJ5FEl1Q5uyL7bBW--MhNyoF4eidt5XU1j5OIlDLGyKbusblz4IpDHONSyhkL9lrk3CU1N4zJqluwOidAJBho2WfMttqnmCT_lCOHl1mZ6MMMUDjyhgQucvuMdkZ-RqeenP-4v3jqhpLkXcHxzKyhxOxkCMK694PNUmtN5jX-WwqkJ8jU4qbJEYQVqr7pMhpR-QffH7j8GhDUe9-zDyLFtxVTKjdmEQBUSKXUa9D8J6KuXoKs0P_qorUX2gFy7uMtBXfqbQbgGuqJwk8Wzo1jCmAU-dlfuiDRNzSvinebs7FoNkUxNm60aL1z9Qaqm852bjFDHbgUSrp18sZ-W9ZLIas58-u_CU-aiSj3SyN5LnyNmF_zrq5UInNS0HuJCQC5YFs7o9kOMuXdXr17G5W8hG4dMB3q83t7OY4GZwOICAZUVPPS1tRikVZHROkLljCBKjBf7Ik7w9tZvR5JV2l9Uoyv1b-e3jQFqhzIAMyssoC9n7BQlbufVgfSM-pe5hNgZtyel10s7XcmiGBCiZShSL6w_MnuoCtd27EPf85fEa9qA3JroeTBSgmtwQFBV-5B_YNYuVmUWrvpgccCl_lOvaVKEtQV2rNMpg7Z-5OMkuzkEVVZioCsOl9ybw1zRW9EbPX1fqEBzhukuacXxypSjk8xlgauLRRYk1JS0OxVJfrWpQZhA8F4bKgmz7W-_ORmJ5ep1JXbNDteAyqQpXn8n85SFOtZwt33pLyur4dz1wKPcYy0I2IJa1V7p2fmBbHD9hB2DZrRMooJaihFab1BoyA2FBPsR_t0ltVhMET3jsQqyevvdNsZYDEq_cmothmYSMMkcCX7q3bVuTH1Td-iiHGVrJCQE8W9DvGNGPHre6OaVZvkiuRMo_BXcTpKv_68W3ebgRFKX072NfjG__a_p1x9rKb1wBYlqeARp_rz6f62J0Ogcjyjc6qSS2l0nfVSONXANTnocKhEWfMEPMDayTBaOnzMV646XrAOq4KISW_yGxF7YqLdBiOfBxwuUOopetw7etQAyUtseN4ERtXD0JeoqdYp5nqU6POhLEckUdPKleWDeRusOjtD52QPNrakZO0AU_0ZhH4r6h93n0uYVyro7BZztyvdd2nPYeVs-9hzQJg5S41QnMkNAceL0loJCXuzZJfVy3nixoTpGhgNAT4spA-RIqSqpxHjiF2sSdDKYdC6s47ykvDDtj34mpBVA0ZlOOk2jac9nAjHR4bN8iNyTkwDuPXlvlefwI9sSHFzRV7x49yZ1Lw4Sj82RgFPEZBtMsA3WYqJXg5y2n1Go7jTwREs8wyzIaNmav-60rXMpeS59UlVrSS4hE9X8tyFRC1eUykzdDfsDE2gaC_0RhphN4tG_bySNCh6pTKquoom4Dw-O3TmZhockwkIQ-gsJ2uFGe9mdDwfnnFbnR9ccXX5tpQrjjQ29HFtGUI_qzCcvmc2hG4W_ghvGXyqtDxCDp9ikXA4SPMTBq6xMGl4evNaWaQ3P9mnmzaD779y-R_en5CTIKHTsVWkuSkxyKUBxlHE20IavL44xyfKHGevPF0St4KaxgRwEO26xMpE_S1cNWMusi9EQvl5cp7QCDHj-LPxJGKgNGSFJfLUur3kPa4Q-tc4GXOw5hQN2ZOawCXK5h4KZsSXsu1EfVD8K0vMAoJIwJSuygiKfeyOYnmMEcRcJaEaUCf8hyySIW52B3Cd4bKRKPC1e1olLXc07BDaeDsldZCHV_Cl-Z2jejKOVwNuNYQvxdFYv2ULvBi0FBQPl0ECIg0r6GmwpALPGySSFlcQ4fNiW1nQiNseGeqYwagRdJd3BYxXFaNi0odKay1TVz6ckaWms3_FCCGlSm-ZLeGp-DZjEFphMNs6dnlMMJRNc0HcaodmmKBI6n1ISk-6psnXtXqEpCfHCIGhM-ZM5GTUohmgL_S81W263hRmF2BxZoPZb8KMxXhd9eBRrlI2umNJIJ224j7_Bzwf0boMAS8ZjCbRosuYUIGLZYNapem56HAvzW4EYYM60h1iwEzcJ2ASFTcPaMqhrEh3NQNoSdjm43Ol9-ewsJwejjHR2n6wyHkYDF2Ftjau0_pVPRS8UDnn_Lt_04a0oHKEuAG-F71JdtRrUzbUOSbNbcX0cF1VIqcJfAOWZc-dBNCnM6pXW3tRf0ow54Dxhtg4s7bOJwV6Xk8LnOY7ChEEFzPaUaY0KDiFEVolnjcc3D1r8lDONwtVOlKHLchqJbwuHZYv4_kvmYMIfJ68jU2aaFOdDHWPkxqKGSSC1vjATJV4In3aclW9IkwJ6mNzopqkCmehMOJJyncSBGi_iOlR3aKIN_JbErmUF7f1WHpqDXS6FP3JrM-QbLwKexQVUwDgpPD3aVzfA0k_x8kIREPJkQbvm7q8RllF8PXH13VSpvnFSNBGteC76TiRs6muXzKg999sa3J3O-ysFKIpdDsb5ExdDBSeODpH6rwFGQgG5nFnO0svqPgVLnymsnF1uYtdrhbH4MqVkAkNBa8Sko4oiMFzo95epQKK-Sin9bRQGSrUwTwFYj4cIHw2QwrLBHD0TpjoL_tOFRzrD_DCdTrzTppjI6pU65nIJU9vrc2L1Fxpzdwyrca3Jl9VdxfJoVhF8VFheuBrf4fGGa8O3uhQ5GaEVtWygkRI-QNyCyMfdkefC4bXigZy5mX6W-am4oSCjWW4VqLieQ20Kprym_sWVRddguyX7IACs8Qs1XV8kHlpxzgyD8E2t-ol5DjWLwnZbmjKcMGSfKHaBs7clLk_8eIyKEgKB8ryO2Zzlw3-dO4oOA3ry0JZ5-YfK5chheTih6tZkDZcnWVYNi8n87ejAQVg_fzDF0OVEulK_R6Rs1gUJmgiJTx5Bj4e9HAMVHL-k9EIGQiddkxVQ3OUCmVC7ENu1TY3reQilu--tTyw64Oj88okz2VC0F7p61ZzYegYnlRTb1JOltZrgPQjERLdYZA6jjY8upsBaOgkGt7FJpkZ0usEn8OArR9nagcpId9vCV6zeuls5tHOgKtW2eCLZQoFnhNYtRKKuFEKMNACLvu3jr4ORalgWImajMyeCwya3qR-Vn6YTDWShFYO3cCvDpRqTJEUCZBui0mRnWkfE1qV-PRZ4d2bwC5JCkubBub0hqhMJaYIPZ1yipIDevAstDwD_z0U_ia7w5Hl1putVNSRNFqxs4ssSGnyuBhLR_jMCKuE6WacdatFZ9y2WjHQ2of9UV2fbQLHtYxYqBfpQNUO46eFn-YgNOrGONB8_XLDV3g7GQMN-duvNPUyeHf-QNfDiEqKSjbCQqZnj1LKZ8FlaRhc2oIvTBoMrQ40p0ILsy-CO3mXc9_neY0G-EaglbDePQDRJ_CTBKP3_8sAdH1kla6Ixx5NppH3I33vz-jTa0P6Rd3NyvEZj_qCBPCGE_rdGr8IYj8-i2iECUkRBb2bkM6u-QKmZ8EA57KYqqKMm4D6uomjw09gIcFSJSGklzIV1RV8dhOMbXyxCKRo-LHpF6krdN5TS7IAKmcYjZtoM7TaKo5hkGzdmMawRHvbxYK5qEzwjjxaWQ1p247NQvLZ_2SsjOJ-ZXLiig3Sss7ejVfTrhddRA_2svUOwJrvAtyS_DgeIQu2MDCh3g4ZSzUJHGFWcnN75OMdk3kLe2N-N2spBN9bTedA8VKWlyQoXe0qxrf_-fr01EYIKdjgIb_bNHefgYrlsHojs4IUtePv1TbR5WORWpirYZh8OujfUaEsf25SPSlFSHTptVmc-UvCBqg4WVPsGeOAgZCVkCqDoZ0ToG9gH0g_PBjKDI_MgOWUvk36XlydcyVRypxQWLMSHeVuMQa3PmrdzFQ6p8j-S7XgKtrtjRbl-P_qOu9lVa_NmTCiaBCzyrQY4RjEW3toZ7aSsmuPky2VvH8m1_2r5KzP3OyxkZMaCzSEbXpOu1WPRvNR87WQnw3B33X_BRrSXp0V8McOMdgY2aFewvGy5rx1R4IbQvYiQpsT-XVcQh_ytx_X-PmHIhbtGK7IgBlln-T3AbHuKEOv-zQKu8YIfbZdHvbzi7OE-biYCjXw1TWsKed4jgmdYtd_c-08L79oez0_jq_QaabEcBUGH5bdFUXfsuM8kFDQyNc1M4AmuKGO2fx5QJLPUFJgPJ4VOkze48ZOLyf_XCO4gt58OSFObLm44uf6MTZwDKOIA_A2D8pkgs1N1DImUm6EChD7j7AbWz33r6utktTIY3yCGNataBqosDSfWpTnyBLg8XiQm05pZdkgXE3BgbgwsKNQeHOSGApZXZLz7Ss3NTGyAUuJ25iVQT08-g2-lt458LaAGQSc5mM318KG6d0_yV1zYz-Q_z_c8wivd42uVZKu8iw5kJuisdKYXqllwQ4xIH4rXF74UCxTtVDYn9AKX7QmNwpHq3vzZocbH-dkpIjc-fzeAtEKPl6booAnhawDr8xr1VpLJmuAYRaO6YNU16r7Lm4sB_5uF2d6exNTuciHHISdASKrb7l96LV1_x2DC-nEXaJ0aLabVWenG6h_dj66hgNwnTK3MK929JjtEWmxsEZNX97PrH9-h8E6u2pOoOSz-fBDGAOi8Fn_G7z776qM4GinlYaxO18GQedEgAuwiTLmoOcm7GbLtq6t-s7NjR4X-FriLJOESDuLPFyYTE4SWESsn4pByO3dFW85eiOA5EmIlTGuE8cfZDN0Gl0su_qTt_VFt5hyWHyHT8CAxGVljPmxoHSrE9YdiFcQALUPHIUrPFvurGcAHSAn3ctZRNqH6mSYTivg6Tgim-KIp1EKsxWCz509bJc5O5YQhGRzKXWrr4S0eUbnF1oOy7-SGK7CiiXt-wMavy-p4-Drvd80ogb8jmS_bQmj-pNA6tTgeUJajrNApViIwBOKm8CUu8DpP6FXx7-xr2pgxlnfzXMgJCDpA9Ijt2aPlCxiKV3jwsrL7YXnxD0qHHjjNDn3mDYv39S2XM3NnbavOSiuz_o-Q6JRKLI3sM6Y2HixiPdzao230TeCRnbhWomALaxbpbjxJHM9nZeRtJrFIlczT_w_ZPYqzVmwISCX9oCni1oB8xBILZDGzT67UF3UduyzGpIoH54z88Hg2d4WFc2QMSnDzakVNFRSHjgxnSumd5lycv_Y9PHnhvEjV50kmFofl-FlwgYRTYy8djxY3Y88ki1ZcpMJFIoehdmb6iIEnqlyaydCrOV6GZJgV_g2prHnCJBPOCpDTwWMl2T0niC2qFePSsEqxbzGsjtN_G2sJF6u3nD-Kifud4Awx0CzDudhBie0FNltF-Wl-WHw5RTzMpfYMuZCHO5f1IE7P03AE7CRNu1xOrRDFl5H_FIl6e84Ag8J9Tesl7FXyhIF_9TTVAzuvIPWzh9yv-QzIndjl0AnadYXVA99-lqmNKPL93FW8LHYDIVwMDn5B3HkiUzbJ4GbHqgDazojfV8DRrHCSQmN80Ki0DR2vl4D_HDZ6kW9ubQsAK3MDEKUDoMz4xbfvy9UWbprixb9UBjcz1OjVsH9mkq5oEWqsCJIxCuhvUkfsZ6Xt_8wt9N-NPwZDBj6wcgBn7XWkMhaYOJsfuQ2kAqEdsbv0Rnfh22ZCUMk-_Iqily2FqUmZX2FqIIStVpVT5W3a5HYqNolnkZC1XIrNP9oGT7CL7LJI6Uhygiev90lR0Tu40ampfYahcZuo3Y-sWzH2bJ0GEXrsQff9kQDYCeaNzQTyrOorflpC5Z5b7j_Un55Ej0C9O8z7Dx20NpGAD0SLfQ8my8gHhFJ7fWdzhFWwsfMghTdYNuilIcGGiUniK-UgnNCG5VYFKnct8eVkTfiQEMC4TYggTzFRyf4ZzPt9GkwanCriTRmB0N9fFfzrjNXBKg7rosNoF-XMaNb7AjPCrRLavAfZVJEVG1WJhVAshTG8SxvoFZHwzk3CEtJO4ANFH7WNCDm2Dpr2jyE8dws00SzemJH35JWnXXWiLk9XSGo536q-jWjk7mGge80VWmUf0ZOwTZcnM3xludvlcl1c2Pzke2z0SEPQLhl96EBXSU76u9qJBYWgPdrTOSx74hrz7VJO4T-2M192THS--nDtwK-w16SrLedm18-iw0Ji9KFSnvthJRAq9fCPEit-MkBI1rXURZ5F0CyrCv0ha7iSAPfNteMWoyys12XK0b6XdWpAL7A-T-CLad1M_uUIe87koYrY5MUMVH0M_oWXcV9dFjvgRnitoB5vEXfCTQ5GKzCZsbIxII0MKdJp0F0bGXZfvqT3qbZVEhEUrx-eFMZJ_nxOEjTldPnHOQWij60NDu0mKr1GB7tix4CNXoft29Ln-w-0ltFfB8uOssT00TyaxIMzmeykdI6OU8B-aLyvK2d4VJVaxRprcLLVw8HQ2-49fOw6O2m1kchknHZdVO4fUrL6kyb3uBmVqk_otF3Zj1LlEPU1EWLhkMyYpwkpG_c2R_3zQ0AA5TohUpzeK5Dz10Het6l-33uWTG2s7kRHHZ1-aq-R4ggMBIpjrVwZxVdSd4PG0Tl2WmNM4dkb2tUbVNStq96oqWQZZlLEr-IsgO612ZfgZT1Ve9tA2Y0ZVm3X3mtDOwgAArcVv4jJHjZ0yJG_4ZQdgiw93L21zr-mk6FC8iA1sJKpgUa5bABjypz6uVGWqAmG3oeZMcPW-8ey7dlmcvVZ7l8fbyM7yPn92EXYRMaJAaryF0KKu7Ou9t1W640r36IJG-BTSM0MGzSHLjH8KQNXumYLqkNFccQ9zaJsKxS8H8k0QenWkhRKuuiToiPxIelwIk_JQJKQZL-FVrlW5qfVMjTZ8mVwg_QHTONMmUtFCghX9qNR6xhDCdUhle8TW3pjekloctQQsTZe1cKL-BP79KdylqAt19XoqDNjsNJvcqqVgyPJb4zVN4Uq2VJOrwESZOqLiGzRqZ2unl2-cX-8Cd10Pf5L4FuBZ0pciwQcLLlAoB8SlMjN0u3PHXHrJY_vbOt-xqQ5tnGnwQFdhSSd2f-fLOtFiQK16pGYpBKxziZ4fUXThOWP1NMBvmPvmw-tYltf-RxT_Z5ClncNcWSmtDEbYBSc4j5C8tgzBDxtsNYafZovB8t9uO5ErnbeOUX2WWzZOartv6ydQfUcuN4rsf4lxnS2SkOVGeA1e-A5qco7gBOOJC4szpuIVKQamELWtz6ExHYkbYZfoBhVqpCJ-UmQvCqKiZWCCIJDtCT6f2pLaM5FeT4uAXeU3bY6TiiRbgqftqp5bnL-8Ncwmo7vMcC7an-ek_IPICDeqSQh0eXY0qMSLriwj1YCmMQpTBwwwtg6O8FvuqT6DXhvukWkav3FDk-7lAa2C9SJ-dchUbdQEANyakVTQ_C9GAxX0RjqvO7nhuO2AGhbLxpHPeFPe01N_QUlH8D8MVobig3eRmmF5fF0eRGWdwgBMVimIEs5CEv9-dO3JWaWjV_DfxYPorbN-wbe7pVFxmrHmUcsgvKa64cUnuboEPX22JYtuvzGyOc8I0L_mzu8a_BxTv6CacI_AEAMxNVHexplvOuqnNEBXGr6LIQMci4q3tcT6Z64bti6Yys1piFhQLeD-LMt17_ba--rnUocoTy08EFWJPWl4uSyMXn3e21miLzUzOKBAM6QpVMTEAMTJDkgsb4iLEVPm6LeF4WFv55zIZdYeyOOa6sR2COv1yuXijoOkmH3IUU5t1ZVc5Z3kPzQhq8rNSIiRZropri07OqAGY1YJJPUfa1lOeaAxoU0UX6YazimcWpLlbRDoGpJ8dCU6DbUnO5TDVmso64Qh1CmMWH-6Bz8cNr6ea8P9RMgOWncQTKPoPBpWRySST-PMeOkCk12EmTSKvYFG00i27hbf57ZO1VyW6AnpYtONiiBnKXTLo51Rm5UQS8_hv2FwgVOSgmxRvSDIAe0hoQ8NbHzSE7b-YxlgUhJ-WpOphXFARK9TRcandrQYh-D_tE7vO19PDJp0xE5jy2VTNvSRN4ePDAECePPD9MQ5MjQugH2EqMBOS3jKfZFn4LRFuUcO_nJ40XW5YgZNDqxuP6Pa553F5CewJEk-uig8W4iDeHza7cpcSYBnfMiHDY-fFwttctFV406Z0Bd-gUjMb_2JuiHw5eiu9dV08IuwLhyPBAjw-AWqr27zJ-062c-qQn2ICQVnUivndXU8p0EZdaYhyszcIQZt1yIpntHWbvKLZym3rxbyW59228LoWdMBB9mveZ0mmw-s3gt6EhQiNrIa-RWvgR5e9BwI9hqfOEtsemAGdsxJwQaaCMCYe41-Qp1dy9RgDYjlgMklzKNrxJ17tet2uyVGd4vKsTQW65iAaBdh5ixUxqQb9ZAzdw5pFup3cboiqg5NQSsL54vWYu-tUs7jHDWmoka4g3ydDCjJ0Y0mwo8YKYf_JtizmI7dmudDl9R9Un9qtYK82rF0p8iJ6TIr3D_kZqSx7Rdn6EaM8N3Jc8UJsz4fr2gYOSv-qbSCehMves3RYppKl_L6YJ1hdbWGS1kIMrS3DE6zqw5bMfAwo-o_BtPCnacJVf07n_LgiJQBOGllDJoNqfJiU4XDWF1OeU6yTjfz9debftoJpgnZt3-k11vAfTBqn8JYDac7Nkj01eW_RMkxX86MqcnkkJYIODvAllNjRhRS8ERvcEpcSdXF4gjdmIZ8WD6SO4b6UaomTjIixqKGAY_zIjZG1i23-NKW1z3CYqCjbCVC1AJSgp53Y-GupXjm29kkBvn_WZwdSbjivrOBFdpTVhp8fnt9jl7H_bTZUYLMzDHBT0ddGCBzZSclNdNp_LotlO2KFM0TPMHjMzke8x1jlJ0B77vg40wS_KzNhI3V0t6mMfoUk-mbSbB3oGhzlcIFK-yWOldJWTh6z40cmKiAFbNduZDm13oDBqIlzdY4kLhetorsTe3qUsszuVTfdoONHGu10_kZOvSQrKS5kCxQFVPIG-7C_aZQ183Xc9r5s5lI1RMmOz0JKfHxuqoFpDvKD7BO_zMwAy2t9TjLbjvqMmrfITaH01KUG3ASJi3C_Jh_xlcLlUkUq0ixZpXEqD64YgIXztJw1Hqo144rsODZHeRtLZoc1LRD8WEX9P5Su6QeRp9gJs6xbIlC2NMHF4su09pcMb4HbsOga1nakmOhH6E5VjPvwFIumMh2N0D93-RSry_8XdWRTgj2Q6DFuUamX7odPtLiPaWQC05CV43PMKX1QtnSSQNIlzZqZMhPdgLznID6_kDhhLe3NAL_JV-cKk-sP2mMgqvTLMiHKyZLI_-f68w7Yrs_MW-HcdlDR0UuHbtaVhySVe-0e-moDcZ5ceovKBbkbQYsuph5eGzPQb3-wQZU_fl8PGyXc4CgonXFRPQKn2w7Grj_5WIbQOLyZ8RXw760TEHYaBDsRsHQGojcfV2lSiOMr-0XSJDtN7TwN6uQOYE-wR4iu9QUuE0DdTSt8dfd1J6dcFoxoAlelwlvrYpi9z30a6qNdu_OGwMlKVEcJUxNnovYU5VQnTX9lgz8gXV2oKsku_i8Jq_HRW7-gG37-P1lTeTpirsufOXa2l7s73Uc9AuipzelhXLUPGRdNvB1TBd6ntF22D2ApYTqsaar-PHWNxDCtCuGkkwNr1hasZZeQVJHQi977G072-EGI9N7HdE6OLLnrcc7blH-tL8HIwzH9B0yblVxW6hry4vpBGd7Ms9K6rYFGXBrszR9WcdZN8gxpoMcgQ8L-hvWR7NaHkbTLBmgksJy-QotZzgHbd4lal_27ZK7Y7H14HWUElbn8HkpX2xt78SXh_-5fXnq_k_e6npUWQPNHFnqQUC7fcoDn_e2Y-3VzlA7Ct2_rG2eaO6gyIO68pnUgEGYmAHhF8dMRyOtvJeIsjXn-HX0Qkz56_6Du7AWLt4u9qCcjWHFCXfq_jTQX4SQ2yjeBr5ax9HcQVHv8wScWF8-aVsm8679gShEo6wSqqpzlWNqspwei_GgaNxTkRCIboZaCToIgJrV9BSUXXX9LvWM-lznY1sTdvo_bOJ_ByMtofM7G8CeMbaCw0gnDx_E2grh4rNbY7Um4uMLALlB4X-jQFaCNLeunIbhXAzKG4tPAJ83_Vk0qQ3iD3uCWF5lYWTrdyz4wagfHcJubXVhpzp2YWNnEX-Q-sEV8Ql_lcF_8_PCSoYWhJimhvt8yteDpAzR8zdzPDSJjPwGGtMdlWSFO-zJfW7U19IE2L6wF5xcZq7u_VTE0kKAGIyW5JZVCexxwQ1xfcp2iHkuzw0vBLBY-sgfjsuob8yDyt_uIG6sOzKV9touQ__gfGQLke1QFDus25s0dhY-pmXu9Q_WX63X2d9-8FKV0A-vJc_yvsFk0kETFzjADAkiVg2iOKeXnuSKUpY0i2Q_9n2-A7d6xgDUYSRZeHP_Z0Bno0dGHdEscBzuEXtZqakkX_o1j6Kf2OHYd5-CwoQ_RUj93t5yyod7GMYN6QTNi1QtBKP44sIPBBigPQilz-1YAJh0vZ3ZcNZLjWXkg2sZyHC2Sqm-vVgHGBmBGbmmPaaCrJLPCdZak3dddC0yYnij2KmVakwK-OQMZBe-Yqx8u25aHCXPB8HhoupAD5-IBNtD7zE2JzkAktzr_TM9fmz75XH6yIxHheISAiCUUpIJTBKwB4QBj92VNbquPqD7hAUDxHmv2jDzpWnIVYE0iyTNH0ESqqrVRCqtszNnlx_YhUnOkt1UT4Lq8XIIYrWA35soAahV3JZt0cyLH1WYoPrM3KvlXyHsDACPPJI4eg5ug4rg6arqMkqkD6K0Jwt8JJ3I8f67-uj5KLRwb6ZEJkUqIig1-bv0oydjs4N_nAET5_ZN8ts-a0f9l3tVuFuaEveMOue_UYE5VZOReDwrEd7STVckvXJ-9rYb33VYjQByr8a_hNpokpjDWUpcWMFGi33woqyritkBXedj4PTB3POMRpEQoonNm-IODV_gzHDAi-0SwQ9Xms17c-1pYSHiPmtkQUf55YCaU0WVASFEYMaQeeeloJdpVCBX1Q-Jvr4gjWK3T48pQ9UwsozfClv43eoC4_WQpGdpgRUD6tfl-lqxhTGKntrZun_HuLKcqT2rq1XvfZejApZuLklsoqH66T1N6edo9rYjMZ-W2OYqmao4mJVHQSYq0ibZQQzDpDAfBFKt1f2XfRItQ4qohGOCbEycRvsT83GsFALBhNVR2DjrUjIkLRBfwSxt0bxzl8B0Cuns-UprtKsNwE8F2vNiFSntdEIwN_UNYENz95J5VTlvkQGJaULurMLSvTxuSD-nYRgO-6EuwDXRZjrmiQ-EPmn0gDiaQcWSjsqGQNEdF2TmHi9YvE8AxlM_lbmG0yi3Wr7zu0OgbdvULKiCzo02olXXPLjUTKXqNwgX_y4DssjM81Y1Sh9Z7HUZ8kpz_WBUwfg9Djcjuuq865q_u945O4wek0giVVp-TFooBd1z8uwIpeYyDwDw1zRW8IlwjldunxEOoZCGEqa4k-nlbHFLaPOiHNEIQ-VilcsVNIpR76evARcn7bsYNNIDrxfm5cwLR-KdFxZtCw5421nWyw3U3XhqVLYuuxD6nk6PZHYgxZyxeE00h0uDZtoedEGkvWwfFKxb5y44w2S8r710qIeK3axMsWnHcIEMQU2nHFRBm34PAC1pJsyOm7cm-sgmrfKBKrqYcwJpLRGyCUekvz2aDGz5CUxjEIa-UMPhHvyIBkzXfofUGrTwgH6CgnwZ4au7XgR5iuyo4kCB0BGA2mWhhYI_wZksP7_0waNj1wEq7iPAs5aB5OLePg4Sk8wXCFmspkqRnFZJSbrhmuIwBVjl2KdfyjQVGVzecykaPH_KgpbAwHFOpxxFJeIVe0pyuJrBB7FsMor-oLO3sA4rwB1Iq5b774EX9MXIZr1hjn3oG0-XiiBgkjNtRJs3tC9aeQxFHbAkVprbJUx5i60WDp5DI0HNAuYOW45kQpufYHDNjB2v760ad-UeZJOCmeqb8bJkSre2kaZ9jfNDyJIT7PThwilQwqQg1OdHYmbTJq9EVbMeGZnfd-_BAy_megrTy2BotASiN8j3zgzExBWWhvus8e2qxBKcegFGIg6ggF4Cx-Jui0WRdR53nS6oQly3zr-VsuC7aefmC5bZfJwCTk0WzdBMJOQgyQy3y9WMIxwidG3JRLdqE5e9jYyGtlShnzDw6bjJtWwUaSytoemraUUeNOY9PkbeQh1zQHLg9LvP0Fl6ZvilKMq7LUS_DJj_XE-lMdopTiW8R6KBzrd61NIQh8188UztNm0O5E6afQkRBJDqY-iT32VMov7x5I86Y50IdkDbIA3bq06MKZeyqmIzpfcWjFPU43bfQyWDU4kVXFhr2BWhVheT5J0XLiwBN_s1XsCHeXvFf2sFXG15KhKLGfRlLBdh-db_uvdQoXuMjVN8pRgrAvOQKGQwdygt_yivmXW-Ajb7FoE7HRcYw8n5ZKXjDeHuzCPbG7JaG_N4eklqrOdY-eXn5eekayRSxmziKaYxpsypEKC3j8IlUf5PqLgDo0pS76kt_5INNQ06i4xYkmSCwFxt-boxUq_wdZXQp6rUbwTKdNqt_L581j5qUwgYYjI-urMKTMqY_AJK5vc-R0JLjn_MAg3EsooAPWDNjne1ZkxijoeA4kPVAn0IRxeBFe2kShRkFrf_nyB2Ans3hkLKgzoasNp2CkPo6dkLmtBTPsPFevwVJ2wAVTZnsPQAIeYFTEuGwpqqO-9p3xwOEfxFJk1ebLzTpN1CBjBuRdfNN9CKmSim8ECpjZDFRK95ymnvBAhKTXpIIjYyAOw8U8H5tjwSqDHRn8tqu6X-5IjHnnI0hvudP4A-VAX6Y1ak44Hatgf9VzeiVcn9L3Xn9F8dCNka5otCmZ181U65vFyf_7kdEIymfMebP2osWDWjkrnnMpmTI2TSq2iY7_x5A2x2dhKi9VWWLXpspZLH5z9vd2hyzPdsnT9FBZ59gMp1ETdPKDImw3rTLFMZXQ-s4Qo-v8T8my-WisZ29xjZgMfI5v-BNA6uvL1PPNb_68wK8-ylASr0x5SJXQfKm1uVw9JTNXWo8cXGQf0l0utrDUJ2r1LUVzqN2BlcgIurxB9aoPrIIsqi7jwQZp0njyYR23x2o8oD8Fp6JGN4j9e6xnW4xUa-MGHPMkqYnRBF44p0Ku_sRFlSTUsB1TSRPS-VwGSayeutD4JkKAWLAq66ekAO_gQzXi-UxzXhOGO5kouQL4xQsaFPFgMdGF65HUiBRJkhPqW3iRc1OxP3jeO4g0ceXBN0AMisXASswQ3lMaccXNVEt1XORrhOTrkMnZNWwt1SoyWUC3Y15UsjwYxUQykgjkwEPH7PR9z4ppoZmcJjeNOxnIbLSp1krMMdrRi1Fe21PmPOIOoy5H02qTKei6CtYubgv5GpVezSjekcJIJEa8SgbmgSOj68k77NMQErL16280Fn8Gbd7w7YZC-3GNbeE5dJ1NiPOwM-XeGegfgE1B2UPzk36sjBPNOFl_T0TxwikAYqpjcDEDRGj1aToOC-5gNfgEk5UEQGknc52cAabgjBWKU9iJCv5hrkYTqEUQmut3h4l7SaxF5neX8WiZbqbFnthcDxai4nALzTTLvXnvP9rv4MoiUxxiyReYJ-ZzNgHxGtBg9JRocXAPUeN0JjY3q7QuKL3_MyDOVZ-tNyHJvpMPFXqlNzi40Hsm6c_Xc6hkCX2EPNBHro-5HyfIpE3Rr8KJUGo2_-dA5ppCrpfhe_vpRoG5ZCtUech8WqnSPVtQzCv0FxEjQNcJKE2dWyd3nG2QVbn9XOmzKEM1FoCuVYp82NOkrq3cPJ4iCtS8jIiO2wxdQ051gJUMl_2scyBlN94GDHMtZzy-T6BBj5E4RcmopCDZTslBKQZtzDXVwdWLo0lGJj22mGbAcOfhv48I0ynflpLLvmmkq1xiS__cTZRNjDj9GXS1WQwqP7d8yeJqzqxoezDm1PbqYXVcP012IclXcJwipttduABs1dL5cyOQZKm_6w_kCm0ZgdN2exZiH9mWUZj86LTmHJWrVenDvBFnWM2oMoMQoyG3g5BehSLwETKAn3psLsXXy-zZqyd2GJMKM8LThzBfRXpFr_zYazOoBYBOMRstTBHlTlWe1pVMY94G2RAJbbAET96vnMwXTnV0dG1nsgIP6xYtt-x2edPjD-v6ddyePtilWlLI12-VFxh1eGOBHbjlB3MtTD8inHdqa3T-8mi97ilKshD1FQEZvYyvNp_xjUbKq9PJL2upe2zPfZHnvWGbjeUk4fRygzhVrhFCeJb1iwljrBSGQopmsvOKi9tdoicRgZIzo1Rg7Ris2Bxi5wZP5prPw8593kvk8V8oUSoMhpwgFNAjzpfaDyw5_MxqLJUAdbVL7JKAWaDhidhXcdLBBJ-LTPIc7Y_kd8nmsh0IhqztkccfdlBUylJIuv3zzycir-pvLsloZyUaWuG_TexgXZqm1cW30Go2lgYMdkCXSUHGuo4hXhRCvD2XDFvIXWUM85zeDNDCkp5DdLmPs3mrj6Sr_O2J3POGjlelbmRtLEH1gFs7SoYfCEv-XwCa2CGkl24ZEUoRBwacBmMzm9hZIYOn1a8TV00Uf6tNGt5wU8t9FtTFwwM2VK1goPJZCtEkS4tSfRbVc-4mGTHpM2cpyxeeki65zlR62CSLbOXmtzXmcn-EAMtIn47iGmcA8ia48bdLegM1NkDkSNr4kCX6w_XrZ0lqpwuUCa9mbB5P9WJ3ZMHvGB34p0k8kqjOdNS947Z-sMrazUIUMSC_KdoeoRjg05U34zjC3HvdEEmjO5vpj11_AabnWrsmIcYYaJu0PMaB14aPPAPBTX_4_v8unYo6HGv27DXz6oguSlP6dee2iH8tJ2T8Ou_TCR6m9WwlLlRH3PCMywBnXa6x8r-h2MTk4e8X3lNWAaRWYjB3Sl-Yrg9h_Uft2aWbbEgmfYVJ8ibPpLGvWkCJxtum_8-9EAo1fjsOc0vFwe2tvAir4aSIPCxb0Xi8HopXZ6y5K9A77sTw70GXOITcmyoG-cMfOQsWCTf5CIJ2mG8lZi67cGEotjE7CTu2KRxR05b3Zfe1d__6XvpEDWfEnaUQsfxzkvpImuIk9oGBK6OWt1hJGxsy0JoAujsKZGKepd5xZe1GxXeSr4QxiMzLFOswK65A6WA5ofRlsnjoWGKUV6ctsU93iXaSiPiZAAtpyjMar5fhf4gO2dC2F5u5mIfGpdqthqlaxcEm2kbogDPZIuoYcjXOIZQf3ZFWHm3g54dnD9aE3GfKP9Zzdpzhmz985iK5CTU_9Kbl1N_JUGdCfs6JumBI9UIEI1fOkFN-wsYQl4Z72xfRJ1-0QamN55pcMzfnzJNAU9OHIaIzYxNcxeiZR-BmIJfKbJS61zxCs1gYeY3Dg1ro4qP4y8VKUM8Zwkz5jaiHAX40kGfFrFbJg4jb3VofvTwm1PzpmHIAoC5bzgbMqSfcWh9Af2TL10Pzny4szs4Nv6L7wdITFMUtF8_MkNu2lXA8LwaBw_kjpogD1JwoZSFm1QgIjGFT4m12kCxa4LVEtQRSb2RM3hoscKZI88ewlNnR833QxBT7pkqZ_2vjjq4fI6XWBYaUOgBnbtgOw02W-j0iP_Zzfzm0k1U2HfkxY8_SNPjnTUSW9M8SYCFt6psqVh58-6h0Ic6M373hzzxr2oQrBTw5jQpVb_oJ69v2r6gTgirNUkISAUKgN92j-VUkgTW6cehzeCWbk3BMujbltyYk9F1jkUE_QS4Gwqa9vJ1KVWVo-tM9JCmQ4qleSeb5WhpBGL6rpOYJ2SNAsC77OXmd0PkV_oV2iYjD5unHUls3hNEikchlCcIXw32PDdHnlI3h6KTzE3EObeFlE48B9cBTsWT_5sVdwV8PM2CiqPBujLxMcbG4waV6_yPIs2U5MQyFclTfRZzBH27HCjn3NeIwPsYGNYdhrnPrIkZTlXqAHo_g0UxJgeL-0KficnVa-Qk7oLIGFZ9YPoRrgN7xiLE0HI26OovyORoZEHt1Pka70jy55z0QvXlHohNNdbrLMsu4eabSFsAmsDRj5lSvahpP7T24eYzdlyIZ4I7nBAHhT3c04zsDxUV_BwSmxq6BnwUnz-j8hne_LbKvdXyQInaZxBszdiW6V-QoLhQ2O__HB5hJdWZcA4lH5kiVSzGvi7pmDFlGy1Ca2snZESUFY8JsEd0k88HiY3NUmjeJ9r-ty1ozZv_5KJ6vgwPl-_BuzX4-yzmKvIoOR6PNWdUkyJC-yATP9YFm-e52BuogeYDIIFF59-puooXGFO7IdhYtjWuoiamKqKVGr62xaxpUr5xmirRe_2qcDtBaxxL3GLPoFowbKDDfKH406fU-KRKP0ZyXA29N3TW0qhfj8Hu-RfpptnrGC1VgUaQNo9agJJZ53BiosJvuu5ruMEAqdTOFkd9-Okchlb2xmL64L4Jfbl_5E4pEQpRjyaWC-TXKq8MclPLTUN6JGd86Xeg5_Iawg-Pl7uvBs7MNLJs7OfITIwVcDoRnXlXVM3R63tAAAQqB67oN15rIeXrZAFEcKm0Vb5nLCW2zCC3hqygdFpzNljJ5Vf1AKwIJejWZw2rbdxdHiIvbj-CFc-dw15JuUjgx73zg_AegfwU-WcWUaVxxQzs-UHPbSlUCPYkZRPI0xldhpxmQGOok_kFv7kefRcKS2lGYKGo2yOEGFiof3PSAI1CSQXS1DbVoqCxw9t02jmGp0Jsqw4NkVsW2zpoorFga1BOFr8FeuV-CKIDxiH2KZj6vu8b-uikA9iKedIPBEVJQ5RtV5juJwgyXN6g3NtAKWkbyU3IOQsxtiwnOMAyJMLiJ6Md30XNxXLWv9XylKtcLEfqiPVD9pFoExFWbFgJEvbYCF5cGxzv2EBvLwaWvhGo8Q1UnthRb_5DwHNnm40D9OTr0y6aKxHEC_OcqMGDbOZiirxtjyuVwSsu7dHmLY5_9SgMyWrJYzgaOPWmsYhq5M_K4pv7qCu5vg4TRrnTO2lP73mGuH54CElz5eBKfgQyqsSn0LM_y7RnZwY2CrmypHjtr4Eamr-bf4RaOfBW7GsNAR8DIy0kXV8bxETw110RdyqJwbGSMokEeRUxz0Uxk7yAAWFkqBoHKn_IOxQBWLHSqZ76TceRgH58irSJsErrxx-QMS_oAAFjH1SDyZvETpleQUlhj8YQa-1DGJNNID-H0CWjhxoxcMwUAURYB-hFocBWrpK1tsnG6A86FfJspS9c1kKBa_PzBaH2-cPyjOb9s-T-eKzvZ5Pvb-ThHD1ZByPxnVzfstIeUnDO8OiuMBGdareZZddebSUNwewdvWJfKsa20FjZFXGvUke71YQcCH2zIpuB1mxt6xFEWOC069yka7aqlmv1ZBdfMajIm_9ySejv0BsOuUxXXTxcb9a5BssyTNVuTqSOuUOS7WX_BNrC-n2pELt0IbL2MIGR3nHK2W9A6IUsf6ass_Y_jZqocChs8GmOzMOt7EoCNiqT8_ic0aRKUn3ssp2FnvZxvDWaZQaAY9jFyvNkel1GG1YEQyooU5rtmQ9LIZHN3bhxMT-Asu-KH955cIQDFNkPkzAMwMiDB17D8_a5eXgBHc4IA02WELc4lN1IDcKmUaRmV5qrCGcgATzwjAhxA0GL09w6FFbd2L-5gxWwkVv7z3hTnZaQBFQzJlmUcAS77M8kX5Jfu7h1by1YPTbcELBmQDIHsXhwoG97MxPLLWnC-ziwV9GyONvtB1iIrPGu7dft6Lswejh6KqomfY97bmRfH4E0Kc2eABWT9EvV80aSPq7NeIeuGE4I22xzb2WykxL2WHSxj9maI4b0kXB4Gi65U_ru5bMgkzh3Iq6CSqB740cahqenaKQN3WgNuudI8pnd-6-n8ppaRp9p7tK0Zc3T0kntbWWeMTrWjmKsXr-xk9e1zdJrI24PxSaNxmSCsfORrlVrfc3gnQ8ie10Bv3_3xcnbIYtlWAwIAwgk4BOJr0i7l7Nr9n-tc5LssVPUOhDNf17HYh4995VItcmhYinctR96-uXdbeudrFRc-0ErrEsFvtwxQcXoMyYIulYpIzeoW9_SLcY2eA_ess9ICxiwb6vesQMi076FrUwUMxkW1OZEAtPciI1qDXwq_8cC03Z76tKfY97JO75040fREOywsohrr8YFWFQ8oROzpZ_t0ykkWXfvy_ZZRwFaF7FTK5gW4j6CF380jN4C_4Qq95c_lhLBIA4SrJfBV2vSL0_ODEy6JdXvie22WBd5C97Gs6VbmxyPQnPAuDZseGE559TIPgTK2IEAoHu6QCRD5rnviQkNfMgeZfBeFdM4JsDqz4iUAvKq46almkmHI9rDpJYXUzPdyr1aIF1ItMwcWRRPUlDq7mPUQaL8z233jb1VTpkmGP0tw_3x_m8NhHNlepmgLD3z2awevdAgSmj8cVYhuSggfWPLawavNQh2Uyj-BLwFshkS-0xTL_FUMwhF8sXUkBrrOP18GLROU2mdHyiV4rR8jrqHoPF1fgT6fRbdIMPFOj3pdPiF_3T4864m69GfXALz1JZt1t8Ju8upVbkygqsLQ5bXr-rjvQ1IuAVma_4er9RWgnx6iadz9T7X5WE13PgtsroRpDphWxCw-e5HjNQgChEVVkDWyjB1V9pJRaOlQB9ltD07ZIEjHMouPa2XivXSKcewXwyvCWvfKJqHYsjO3njTq1khnt3Nliqw6goduz_KRDWa-52RMI0-8fFSRPX5DZ7zBCfXDEG4Layfec11w16qPvxrBN2sl7MBNUk0OLURSWaUtSNO_CtQtRokiYjynaKqeOiIE2wk6jRcMV0kSRtoiFPtDcj7wzpIFLIjTnYEewiPYNw23u1GW9HVBXr6FZQ4T0GpSuVR1H-MtmC7f8OuTkPsg2Wr6UZZyHvAxmXmBHQtr3RjcVdIaiKPzFrMS0Mk6h4BBDyg8bLtgze8QsmDgoaeGI6Dli8C8Vj3d0-qWj-vKO9ImBbTVjp5n-m1K5DExH4TQVeNji7JWurW_PIeqEnd-JYdcktYZKQbbs1-mfUSMP2lHUBKxG4OyLniDjjR2NrAvb7zW1-DEagQ9rfBpsh9-mohHK_dGWdIiypzUGfohdF-fPZZR3gicQaoX58TwUzcW6ULlp0wBmkXonMgE4Rcls57d7tdwBe_MVmKIoQkNZWzIF-or7VzNcRNYdHGa5n9hjJYhh4ovsHSWywHsGEFiRLhDsjyteQQAnFqU0GzoRAnv5F0MRuJoOZdWiCBEKu8UkFaPZ-LOCV-54eBlWKHwnDEreFzya2_8Eeyo40aro_xbgPWJUUD7r2FdmvjHWQFWxfZSNvcCo2CfDR_YtSE6U21wO8dcTzrfUwLqf98nt4jfUWbbYvCoFKZdOBnYi0jHx90psGjUbi6DQMHavyyCOPGXmnLlRmlY13oZ6QERHlWfWMw6pjcF-YenotrRGS0VRnnw8W4kNnZyy3K-_tECds8FqgISTAsylol7fjmcaZbPTJ2kYHt225OMoQ2GLzfrNIeoIQCho68D3eWVDRZgkBjIcrrLpw34Nh_lrypwRfUYNUOTjn_I7WQbB1orNpQDeuSUDe_IPHiFXmAj7I5FnlLjJEIU8EWyacMI4P2sh69XcEhuTX5zzqoFK1x5lc_RlMaYlykLjeXfCoZM6idUvdZWK02iURQ0E47OQIUoaJrr3rV0avlFds4KP18ZJlKffmgFb0_O80ctMiPjT4Zn_kaVxg-p7thZ95Oy3y12RpC4B11eKXrpSmd2Ejj24Ls9wTrcsSyddy4QcC-W-NikR6PQPg3NGcFikuPm21_srAMae5zMNhwgSERFT3DCfP0rJwl7bkPzCRnZ2wbZZPkB7Q_mXHZpe9dgJTFMG6CY-5S_9R6C2M0NFBHBo9XxZOwyDSY-4DnIKE5o2cNp6EuBoybfTBBv3PEIzN25JS6AL7vV3n15b5IGW5iaW5-Q2a3tC8pZRCxfaXwjmXqGTlmZLODfkZyy6WkvemaKkmGyXJY_fdJ8e1-VL7lvNVj5HNezA8LMkv87Y_EVv60vsnyB7TvrewuQ3zRCAv7nZRB0NwMoAXzoqepverjmxygbr2Ljbmnjt6S3esylXQPsf2e3ZXlT0PBEEYz7Rpqz8EWO_pb-SkYvBj8u8TlOVLKYkjx4vGeoEbgd6QGn5A0LSC9LRe1x__Sgj2U9qMBprWHx40IiX71p_G6hiU3l965kJxd2upb8b-pSCtnnnB_5uOgg99k3d9rGM0qFb4od5GxoQpfO4f5wwXceZXQqd33jLcv_HljlKI837yWYSvPevPtq4BBzjgXaUInsmdxfleThaV7DNkQFG9GhVNfY6bF1iS_wgvbsAXmMKAcsbIxBWEY1vurMStASdnGR39ssMQn80TIDjJf9eZCVNj2BMQuoBrBKTNrIZH90i5oynU0T2XQsBgYVOyTf6H5K0FaDsDQ6yYFNjDSRZMkzEUIVtWQtdz7AzkgilrZ5s2ZKCkRQ4j_8OmQzccOuRJx6FtZE0iz3Mu64I2eBpHmWk47d3_S3tVwR0UXoKu3-Uu6C-18Ko7ChsMNDQz5J_YZ2JGBMvhzpipV4pMN5ahyAL6l_WHsa0f-5kJKrxn1HkwMWsmZEhuvP6NhY6F0FiNHrktZ8zg2QmvHvWdoserGDLyl4559zRw4l6LPKN1Fw4eQhPlb0Ah1UxxWECRGVs4nQdFWhyHQK2NulV-xJxNhDMix850Wz64W7HhcyOQ-BP1KgNQ4Lc4gANlCRTKW5-XjKbYVLPyrQokUolIPc-xyYVHy0Z1ESPnTjRNEkAVRc798CZgCHeOw51mcRPYl3C_-z56yWpSChP_rfMPB3GXx2GqZDMUVfI5fx3e54tJ--1e4SEfVCZVHNYiGO-oo3NQE5b1vL8hBIzThAu4hpGSmN63PDBPORSDolYY1HLQfXXkQ80qHUZ1WtZub_Hk3wdgQKZ8mx48sQfQUzT3UFF5BTPF-mPXRT4HAXmEBxFt248r-kH3YagN-RXgJOc_c9BuiBqjnICCvcRvUXythv9VjOQOcvW0ukO3IEIKF5SjyzlfJ02mZBPeDNu1_vh2BZxvqDH8CjoLR8wvkazzVX-J_yZMixQp8wddevth-FEeC-UA4x06vGVMsXfiKjagK2Aea55ytXQpqrJeugI-86CnO1m7-U3QM2LHb34pPOaA2izKuEWuKnIhfKjfsff0sC6NcCDG60PGC83J-2XEi3AZw03QWOSRY0vX8ooia3G6_7-nZv5EhPkVP4HDXKNwiQkhir7kd8uFf8hbrF-FqIzAmYIQIhRTbxoHgRR4pzz2lARB83vOoJrvDim9zzymcHd-T3PnXRpyCSGtSbuY8Elh4Lj0FIR02fpS3q3S-6ZwFUnugMF1mbn4pqf_NiCQG5HcsXwm1qVMb8x-p4H7uaXXcv_IgcHYPG6g6fKw4ALCKxkEd0DkkVhRH0oyk0VplnKQP_zE4MKkmF9-F9zoi-KJGgcEC2Xuu-H01V4ZvGXC2WIVIozD5Ix93ibIZLzPAfhdFBMZZTJnTEx8xLMXWflqsglsbxlOQAI80WbijiHvDWMkM35ngbm1JiIRnA9-4-NMecUV_QSyuDV6DzA71VoIZEQ5VJ3tGbEQVog1dPT_zrVRZ5yPxhhKrq3D6DyNDH9k-ClXyDYGeTsSYGDRNvQs7bkGitYsQwVuIEsI-q9u_piNLuh1lbbDDVVy7Hi9Sr5udQo1of5MntmlE1SrUOddZGYs9KygoaMhvu2HbHoih_xatY9nKQiKjFIl-Ci06btAg1efLO-7PSdKmQJR_T4ak0MiLs0y0GF-nxYT7zWNs1Nv5Nc96onRumkvWwSV7vKccvjuWiJ1IWrcqlJvAcQ7XleaMfotIn3uLxdBTuztICz4eguKzN3TA8dANkgBulFZw3PifdnmIJlj7SpIVcAlShXdnxcHyrtEMMd9X6gnCKxA82xZ9MCkMWSv_evMmKnGpvztE-r7Ge80L2dMa_rLkoPVYaE-KdbTxglJUfnZIX__XKVdNYjwXjc8x9Tv2362NbyGpNixAYlV32UTQVW9FHfSr9PXnxs3UFr19O2qTx0MfjEiXnwQcEkSI7qtpjU7fP47TABlw2VVtd6QwDV-bzDOZw68XnTc9EqU59b8Zn0KYnxz_dundShLR_Q8Gw3vPrGne3f8fx4939-bfE2kjXlaIVZFmUJFw-7WpP7tYNfFSAPj9HTbTgVrWbuLNiO7ANAtDGUk7g2dhRvLOFw-bjHgP7neJUx6FuzWlr14w_wYiHdZKfo-Ziee-TdQ7iaOS-eFD7gjI3AAr2orAiqQq2HGXNisX1gl85vs5SLwN96G8DhUrzzw9grjifQetZ3T_4G_vK8SCN6j2GXp542Fa8skLswU-xVUEMaKdmZV18M53fnHvLkn0R4LPJTTYGv5sfMcXzawrU8QzvQr9I9x5CxzDWMhcdd70qRlKltu9Y4botNkjCNItSgX4Idnvg1FqGSg1671mhduHmrkA2E9Ad8Vwwyhs2uIBYsfWu1E7KIb_OpV52bTEyi3o363kMjQ9MODEpPxvnrqHdWSpBF0akTfFhASL_SCZFKQP5dq6Tr4Mf4vA0WZQCFNb7uXq8YjP74zpWMdpjk_9f0HJtk91vINJNKsYpudECkJZI78utjQu8z4aAZYgovimZVxaYt5-YIeahFuqr__1achuNrlPeirRbJu5kbOcZrjspz1vM1gcDagYJzO1MhK-aFmCmPhUwciac1zjH58L5nyKnb_T5xZyWQ0B698Xf80akMfrEDF8AayiI2BKOZONQxf28T4tk6LK8bQSOH89yd8DiIdFF6Ls--RdxFjw-aY4T-LXMOuVWpA-749rkwajnwDPa1NyygQPImUgDQVKTduhKl6H5ta4NRPhUCxOSoR9u44uP0kF-Pdp6Uhaomf83X1kBi9JjjiC4R3MVLLC9979xAC7CfFsmbpeneYGXfJv7DcUSg1lEDuDsmyLw4v4NIlM2NJp6O6JPBoeUaxPmyvti-43jQHEKeLyxgp3as4vLcS96CdKRQ18pONLbLXOgMx80jhG1B8TMwE7YGGHSmDdkudsacAhnZs-btdx6gy7REFv8OSgQMUFXw7UV77ykF0kBr7JhrdNR9_C3mfnm-d20NTtcsLbnmk4jndCtrCRTuWsO-FLIPCWsFzSsGShdN4SdG0eICZyP3b1BokVZ6Pxqr7BxtNuhuD2vi-fQubuFld1CfzOTxQgyVMcIKVJ5UOlHNFhTw5rM8OolnIR-yZAoWhRpv2V5F2UXp0nqkNaSeLNDHKZaueT7p94sZezfFIqbWoB4cgIvXyzqKW-sK4hJKaVOgL6V8HkRINHn1lRDhB3k0TYebugAoRNWxWbvqUjG9Jy5_hYqA1Wsld_ZQK4-HQLUmLy4cB0riH0j_uglYNWBKw2vMUo0ckVpazIdUfMUT9ADbfxhgcNNDKZ7OZk4LEbflUWz4VVONiXYgtm1D5wtAwwH1OW_c0QZkx68T-dVceh4eJGwdG9dlTvbFIigAM2r5MKOJgeXQv6oOz7TJ5o-0mWgNZv5bN4-_vpBq3zmYLiRf0_E1J7byq7MLb-4Lhqay6GXcBl_uNkbjIbPnU1glDQN-8IgZh9c46qSizDCzRJZqiVz8AVyL1dbz-izPi9B7kSvO7PhB44p4sXpVPJ4VUMJduvR6x6KPi9SBCbwj2r-sbfrXJC3rQNEEItwysAQWSd6YfRnrQZNg5LRdLFujyR1az1zTUwQRsY5AnDJoaNT1peVEG4r0E06RMB_JIcwcToKMBGSBagPlmlRoyuBL5vOgz0EjzWu9pMcy4PcZ8tDmCBlfx09zciP2BoCjhz1s3XbCCvQgMDsGhKG-h51hzSF8ryAh0nNhR4UdrmSFUp6arRjPt5q-Bvhcb2xNPJLK9Ajr9duOhKfh3Q3mviBb2Hc-KOJQJwmtCRsZDjKZ89tzTMk_NtHk1497EI_oDt4b39lUu8oV1WasjOBZl48A3krvpGh_nyUqU8d_vlRuqoU5SN7-PUEep7xz-hP8PaL-6WYpJPCs792WG_4XTFfAXgMZFE41BQnry6PQK3tI5isdIppXbq52lpicqzdxjTWoqXj50bGUGRo8QKskl7Qky2ecEbIJMPK3Xt3YmSvDOMB4-LATM2SwFLmMn4bzIKwr4YEjYheN8JuVpo0bUZCJhd4Y0Y4NGCmkilJRnWbqZee7ERuBHhOiY9KqlC6lV9FcR6myIaVxr5P2mJ-RuTleehezp89MFT5EBIsorm9eoaemFbMbUFvO-Isl9OzaD3y1-PoXsmTfhmDPvjC4_aWC-WyScKib2glzEVkyiJl1cFeSV7TZqry_mMATeNJPgaQYQXxtvd10CdDTgR7zS5oWZafFeoWjWT_hCYgPGxc9Ra5t4AweWBjIn7Uw7pIJLnaxGqeIc31BhNeLv6Q5imV5K9QmwrJRXrR8g3D7IQmvSeeyOE1M0dv4CfspRk3uPB22LLGD13WN_OMGtcHhSHZU_lkyRsMW-GdsFLMg4XWwg6DA--MqvEoh-wLcmvzMJrAccHkjiAP16aUxt32C2MaD_qmtDsC6daelqs5WUe1hjBMNBpQGro16oW_wPu2RLundT0SKR9Oyx19wzbeBsQX1Q9DJVVfjJvdas15VsYOY1KDb5w4NpvY7KsNa-aJ4as1fB28DV105HNjM8oDVl-AORt0U_pIfOdN5vZ3ILhhMIlS8uVoGWaupwlVQtaByagEiCHuM_4QLnJa8HVABjvGZlmhXTn1oerh5B4Oefb4RBs-JFuznL3UucQRdfdE6bNKZjnh8pHcwKkLMY6E9MxTuEWs6vvK2VWM-Hu2nANlgdxlKRXNRPPMtrgbvT4ufVEPjXbOKyzR6sU0xtzXhnuBJBbwqc35MyDKcnIYnyuJrGvrn01dufG-LNLXA8NIqvQ6S-DoTyvP0wPS7XuW979QNCKon75h0JKlOLK4qP9xjvk8xYN2gOkrOaCAjQSHirKCNehPJgXw-Tzb7uLa6lLBCvYgCsxXL_DbBb6hE1g1sr2cVvhrXCg6b0BmNwcrvU6zjFfuiXp-ZBshwMq52tFc1NPnahqeCJF9NKYErDEpcxUsrsyBlT3X93MO8I9qR_AQPxWG4UALPJ1AhalMayIo4JNSxPhu-tjlEhtyHjYxupTR3MPhhYoLOQPvsvCcNfeztekG3xPnxfy-_egFpBDEPHrUHMYbCB2bExNGiRaNU4y-jpb5yTk7Uu03sXzO4NOrsVkqNtJc42lyuYa0ZR3RiIBRGQWvmTdc_EX-lDt71VeW-Wn9UzNvTGUN_AINHFHoF3i5f7yjFZzf3PrZHKyP0UehByj1-SM5RCMFHagaT5_iw0K0XnTWMJLTeaEfCxALpNYg7JCkA3NSs5cOvqsZqygstCL7MW2TNCflO0CTjCXNyCL__989-7DOwKs8KO2eQri_exyR6ElkMq9N6e9sc46FxQHt48GuhVDNsR-heBLzI3GtKeCXxfWLgEt0YYx0f1kA12BtPGLu3hUiBLux344xaYf6Aw6ZYQQev0SejhUx4sUl-tVvSV9WirP-nQ_F6-FecJQ2XmDAPHv16X_iwSTWGnkj80zTdP6bTO1JY2chiG9zVREB1S8PtwH7hIWVTY9FqxDZlRykFyBH7vYXYd9Np9M-6pNtKSOHUUy3C_RWG04QhsS0IZPPZD99p6kAVKGM8HSBNBlbNn7UlOxhxiPvTG5H21e-z-kvhyPEBryLkxhREi9OV2D0wyJvw_LM_V_5TZedPhi434Y402pQRyHcBydjpL9leNQr3bQ5ETXrE5P3djgyR7MF3veJDqPWWzlCbB08FHI2LMEzMD1HURhaepubxaWO9FQaF7_cLE1Tef05XYupreZogybJWzoClSl19qoQwgqLwi0QsfzssgSabaAzKHI0Hrurz4k_PW_U4Eugg_obKbGn1ClBzgw2IBi4I39UCRjUaZcG19we_vQKLHQuPeW2J7vZ7MCo9OAANehSGiL0G07JfIATzgxpduvzscyOicnarxg3hZmBDCRo1K4Kt_H_Kc2JRhyQe9qr-9Gb7632_PzHB6ZScfLBoGG3FgH0Aem3opAJ6yQOEcl3He_1HR6pVZ9GARzaYCfBWxnHxuEQl-uG-Ev6FMOBmhfW7ieCCKBMTantG2y3vkVUnkzqhrrPdl4qqyXQ2PD40Qqw7dz6xQWGCDp4qO7ho76rEB-PqjF1FIree5OdpkpTnhY0v-fLHeXjQECdC8ZsfDUS203z9KjVSouP9X9S1nrJzwZLmJ7GJMTuyiDLJ2MoReKCB7hGwMQMWGAr3UCasrr5-4jkmSlkSP5M-_os5T6hntVnF2G9xYZDL87FzIoMen1RLbmqd2nW3uUjSjGSTyuaQ-ev_v6uggJRmuiWEflsoY2tS3T8ylnGLh5_lr0xp0mhV5zDu0iKtoIoefEhAzGquFc2fNXFEJdTKodazmFsuhNEHIzRq2Vj26KvpjLYtbBCvI9QB3KyhaSw3__e3CHESJ9LlawyiyuhRacBmT8DkwXearhvpMm1b75xG7QO96UyRELFU-2ZVWp5kyq2AH18PQJJmuGJvoqJLZljUQqtxOareIUFvsF-zdNqPdrwrQwFZTzxr8-NbgyWfdJRVegSaxety1bvVys4Mc88HKmspuasA1GYPTweWE0xLXBW2fWtND1zX10_5m5UpM3hcMVxl_yZfgGhB3SSeYMX8BJ0vJWHbsKKSWTnRfnw5MWNpx4AK_MnqBrXRHnb4y_8RqG1GBweA8aaR42CXqZ4cgs_7TkIsuAdsOmxvw6zHE_m-ZqAMt0zbjMTT1VjUuLWd5ZEDIzzDid7jmx5mUnWGK-LDuCV2hKUqTen277UBuwHo2g7GCl46o_Zu1k6lhgRt12islnS8G7nMkGuMoGorRgyM67TDdhtgDHnny3OnL05svieFnCPcw2K2gdZWfHm_p6myZ4KjCuN7XZlPNFcLSlihiZDBlHSkht8-VAHFlUZBkKAfNpx4NeDBvJVTI9C_dYHkYAozIY6tC9YBVl2h4583Ve208YPbM03EEIqs61eIMvH78B18TEb38N0EbJZGiaq8kbDk0TXhrVLOyYFDzHC_V0_zhCSvghlTRADWrBMXz9PH0R6IV9hMQja9kEY5sDxJA1fpPMuDsYwF48JsyxMCOI6KGDRCcu9zqLnIKl8UKGisfpxYfbq2_dvVEKxZWkT7MXSI-9uJLpzAwTK0O7XKEWjM7YsnoZ8WElciSmGPlRkSjC5XWkijIuiEiVTc_ILo9l_2NVgKnInqsspnVj_6VeUImqeVc0LiXrON7L5ffeU-HWVJgKP_TB6TfyMIXsFxfU_zFfeLOQ2RJdE-ga4JzJIzf6F0l0cO1dQl6nr9U44FRiPOJBttKJJrsJA9F0_2BKfPG2GXtaqQapJdnxlEpCQ8QSNtDDau7HykCuzdF2LPveAZawriSzj8cm41Ip1ZK4J8qKpn5GBxqmcKof0qQ0YUghAjhxQAkJjs324NdlYQWGCbhb66Wnxw8gTax-orahTpd3SNYpTUTUodlujDNLaJTCVwA08vO1hOwSrIaibroNRR39dlvZoIOJltrvJn1vK8VMQrdmOP1yaU3pDfmByorWwahMa6fUVN2YJ9rBVJKTyikXJAdtFz56DCkwxNoIw7D_twrTDH7PqSPTTJ6ftIwp_WgfgG-FDG-gxTvdhyCKJ8wLvH-JQbXq_jjYDy7qaFF_2HER3nDiqM8CvX4zPDJaOeBEhdjD33_RyiPtRHRjqakL5Pwzh7BYVYqzXd71M7xJa0TPFyk0jLCbT9S_CHkp6JzEogj0s2rqJnGSlLJfoxCM30ZACb-D2XfTIyakPUYUqn0z22TH7pxJnuentThJY-6FN6ocVPa85rBp9vFYZv3BblYVaSXuG4gjXS30qshiVy9C0ZFwXxlBnOchBDKjg8g_8muQxEfPwKP-lQc25M3DmfX9jV4PYd3sRA8SOdBKRZpMePYl_9tNAMPzAvd6rrRfl_cBudMEbsaV5BahPPBZLMLJkJPm5aBYr8sPRa0FRQ9_kl2zMKLBuKYzCEmcvDGh2v1vs-vHPXIbjqLZwmoiuV55vg4K7edUYeV7rFaTJ2z3p-V-sgIpCM9wAuoqq5g2sstnOL-J8kMKjeQjk_r8aMzu5jPLTEah-sKtFQgIawx4awmqqIrmXawaitoXFQ2rL6qbWmomV1qF6CiXOufSXNgob6CZ-DpwZQuq27AUqCLArkPumlCtboEXrmJAhuSZ36mAdVaZL6FNRElgN5hV49zzmPWhDY7-tsoa9wdgKiBUybU_TlXHUYcSFyvCHca7s_3WT934zg-UmyIQnkEdxhs1tTtUb2R08OvTLcq_2htZWYB8ODVZm4lWGZZRKDVnmO4gYeF9WqjEwe0YSA2STb9n6dSMR15po6c1uhAvtBZK4VYNf9kHI5aYt7Ma6kLNVENmKWZJnkGwXeSlzZdf14AzHhelr0sSFMT0A0sPs_fO_Uk3O59eMss5kcZIFzHtvXtNUyxROxG6yiskXoD9Wz9_biYZqOXCIDtauBMDxZkWPahVue-JCEuIYtEFKVic1efWLpapNJHY1-22UitYbMrPGibS_8owUUE8utKtkNoX47LJynrSaMw7kkS2Y3OVhr87l88mIzCLkDZKjgF33QYMDk7SJHPdZIjm4qyV90OpSeRw8gEJqsStbx8boiFT-i0LLvlOv9tLiwrkujtDZ-VM69Vrl1svPZ3ny91g8SvzETQ40jaNHlv82PICaRIhNHve9M2VilRpmdncZDWocXvCk-io0LpsTBB2ucoB3Nk_GufcezqGp1_xvvqVbgUp_gToh07D2FTt_DslgPeVVLsb-ALH1BNRTLbaNy9w4H8f9UsXOfRN5qPVftvaikziZ8q_wWJVBmO1vMDfB4SMgbVtXOKne0ZAJwF_EpsjEHXsL8s7RjY6bgrm40mEObmJnol4n8mKkP8P1qc-xXsDB62IRrBLqea8v8gJFrCKkkEoRDitJw2FWGgP-XYZuYFdSsTDEFGKZtY8H6xulBApTSqM0S3bc65LKAWBepO2HvViGYvsnWlL9qdW_AJzokj8CXNOsQJjVX5-f6VbPzfiZsZvd41z66KUnIhrmhaZZzhssZ3Q5YKnL0LuEqzURBEvQ-pD_iYl0fKdrffr67ecYPQYPm89ZMgoJ1sPE512yIe93MupDVkFxmyRcqRJGd43Oz2Qm7eANUb_MgSMNkKyVjHYuUX219k1U0g-MH-RVLiw2uNf8pP7i1TTA5c-Kl2tp1lS2EbepGg86hFXltZT8yivWxmXU2DLQm2RCinnEy0S5m6ilvOEogtqn14ye83PJCoV7mSnXxvq5iWz71CSnsUdjjjch5mkRM8AgsiVy3aCb4XClQk1D6vhA3iLuch4t8IJSc2wwN797uJ00T89ynWUPvQSFrq2KLoYt6mV3MqZnLC_3GPFHCJ-19oIFkV7qjQ7epFqcVEdB62jWLcLEstBWXh4ZFaJLbzJ7dlb0aoa8NsxKctgsmMh4GLvmwCf_ajSoJOVCBjx4S8JAGO_lIelFRUMQuQ2m5ShNr8__CvyR-dZ1_GNPDCGk-tfaNoEr-9MIBqhTDEeBnx4qTUnBKXl-YDz3QrxrllFxD9Llsq-3byzwnqZj-bKXLwqOp0uCd6_BrgrGFkjFVj1ztSMxaivjeXIqDFTC6ugxmir8Kedn1NQeXpQQ3smF9ILt5Q0NR6ltlrWlhSPzbcCQ2TTn_-xKSMWEDHfrD3cb3hTksGrvw-iHAluXDwDhQxo_BxM1fNgvxL6VXOjN7MRiTkWaL1CYZo99OqJqk9D2WPiFyd1gNTtOGxO1scSdTwy4rPHARpXHLO89oDwkP85l3X0rHGw0ypnsV2iu9a7zR8k-B5q-acCx0NDmHdE7zaJO2212cmvebnIX5_RjUODwYdyD5mrT9JsfXXvU0jShxSPJT2ns9Mt2Yvg7zckIYYwekpl1fvzuxyGYrTGIqzqGeQfZzuwKXpRUUN0M2LtdnOV4mxMwZGxMIuYhqc_wuwi9I4HILQGUtfaDF0e8_HAoxr1TDq9Sbw7LFqN5KSpWewlUommY7ENIS7WyG_lLCFIDV93sj8g6WFQlB3ElBJdKd6gIqaKm51Bwd27Q7Chh_2kfDzlCHHTAg78bcpbEYazIXlBjz8uf5KiZBYmjuB8o6RyUHIekDFLRsmx-1N-s-RuPy6exKyV746BPoZM8nxyUS2lfYiT1vL5Mw_K_y8yBXX5mdO_04SNmgMkb8ht28puFMLqPeTLoRSIk1inG-eFNi5LFMmF3zH7hrcrt4KoOqXUJquTTTmFkUDMmL-3OB5jMtK39OVFxAfkwRxzgGTlAOzn6UXoaHAPA-9M_Z2f_Y-wraBvGsnpC4nF8o2Q8lcI6DKtpdKmOMZY1-5Jbl7EBQeT-d1wL0zVgv30osMKoXd0XH53F_si32ROtogLWzupQz97k84aMLiECH0HlX4dMQ1BDgzYts1PBwzY1CmzPSU9dhPiEa5tS19SMWSXaBk7PgRy5qcUbjWdKb_pj5gwzwsjbnj81yGOfHkUZNzMHBBT9TsGQ4A7wTL65XZP_jLsLgRMNnM8Mkl9W387eoUgEXU-En9XeAN9kN4JsRUTP3jZ-mpSjwqX3PvzAfHZ4cmk3gYH7nq0SBKoaRM1n-FPU5A2GCIzw-OXKjMT3hdsb3HHbHHDBxXionnH5vVBHee5Z_1fTSFYalw4SLpF5dLwBuTkS42GHLIMM1y4ZkZdGaZR5ynWZkrJNRr9gzHHTsT4ZotRC7KNDgjVVxJKkrUZrJEL7QD71rftZNQR78HCS5QSoHhsvVum8S6GW-qitojaV0nJO_RVlIEn964B6HYb7fAXHj2yyPYPY2Z_E3OUUB1yjovuzc5hDrqznRd39RDLJMjj-g34xhVTnp1gq9jShMbp21pM9iVidOjecnoEtA2nma1JB4-gZHZGb6kzc9scsJtGINmkGy0He6Y2b8QwzDvgfdrNsrXPyAyxLHc6HItjXjucfzPcpPSj_KDNTqMOlq0oMVKysEWYFatmy3jluRd3QWhupZs-Ivb5poRmSjf4Jjl84c0sd4ZPzK2h52DXqXGTglYtWMYXAOMiLu8SYAvsllcFz0ezPJyfmUYnCVFpic-UwOPFtB1UTkibFCWSUtg9kUZ69pT9PCIoj50oPMCEnAi0HqBJnoFSnhrFDWUmLP6UluTQScyKTnsN3LRr1hGilimpCWyh-6BVx3OhTfiXXIUL6ZLGskGIurLlzabApirjNSAKHYwpkOZx6t47a7mHA4inEw9L_TEb7eersun5lu6gTw72oFzmQwCx7aOv794tvlcC9A8EsPSnPi6bn_Z0dGNXNwUEk27fD42Xn-AzNy_UUuUDsXR5kuvV2q1uX8F7Ngu1kKI-2S4KoTH5epGDgiLw4pzfrlQMVnlOgPq7XYKut4PPd5pR45-qGqLxHpXO-6IgfxN5H-rGjo0o6-0fGszI3AaR6QqKfBdbfWI9ykZ-kNgL9OeA7ZfCkhMCJEOyHTvGh76YFNqi7sXRVBTaLwr1CdoswxGZg59G-l-kji6fYcHR4K_1hPYLLtrr7t1IhoJbXc8doIFpYROu8wcXD0KSyr6Tw4zkRHQVmaU9m18Eb02E5LwgAtk8nXXCcP-aA0JqrUQjf9udKGQf-EOK8ehW32tKlkD08_8NthkimKLKWVWSjMwcr6cvkyLePeBGN9s9kwk17i1Ku-nK6cVhaPnfqKRSZ0Gd7yNXo2gf750uwtGv4xjoYxhWd0xomjH4I0ABHgQaGg445b3GJosK0O-DF3LRfiKjbbb1ojHCoo2HuIbzLo2LjaP_xZTocLnHcwMjLXZNXuytk5M9XZv642xREqeBY0d1sdKlWzxmxeZqccotB99BeMEUkI7VQOjOGA8HeQp2SNQ6MaBhDLesU_tYtrNpSK8N77M4km9WiRg67qYUPgr3sMfIAuQeSspla_lUWchbM3NExJIpjPVYxSE5G9xrLJF6_o08r_RyxOdwOIfmXk2JD9x1TtL53iVhnHo58AjXj9yV1tdCtfrwDs_IxT_p8vzqHXQvRRr-eL3BjxMVgTJMJwULxdihhugx7huxBi2fhNp79BuZX3e4iYGxAxyZx_OJnh0jcmf3_7UOlNXlnrTbqSbBOEacQaV0_R5kQWkuFklOpVV1c82uvZ3HUmOaZjPEexK4-iFdpYFehdtp9xAIGclCk2Dn_h3ni28FarbPEVVrfZPEFxI9g7CoYfUr7NdfDQN0m2k76Pj5icbq3gqDJXBkVVdNv2ihYNpzeaDiE5bNc-lMilv1xW_o20f53wfdl8fyZt_CZDXZ678XfwCYJVOWiUT77SOIFYYAYvmP6m15UjW0hBBK9BS3dxxlw_xfjchF9_mEbjNR7OS5-bttB62owBKsaL1cth2KgpDCFY_9zGrsZ-yfssxYbz6UXIWQKDFrXzpuULa7o1CuWGUuNVloTvXr9oLDJsW3lOEWUr8rAa3vWee-OEI2UDNlwM9Gvz6Mo2OFB2tk_Epz3kLGVIlMYHM9PnM48hemkmebuBJI-mU8sXBvJC_vOGB6rui0wXZZil_8YVOEWY1AqhqpPAcw3sas0ggHLey_ksJLTPaN2BeVdSUBz91O0ZEs3LQUINhzXPfQ-q9QTJzKQEvN7sW9MZFCs_hGP-3RMnLqO-kV_qN9NH1y3SP98gtsqxoNuq6ktQ3EVBsRO8zzpyayQVCIg_DKBq60NweUrVbxxIK980orKbmwZD7UZTUf4RtAonb9HTNcoXNq6eDwjpaycyy4UNhBlFrNURsPNmRPmVVddLKqGgzc6T0TyUoelQahmZP7uFyH76rrPwU_zHcMEhJpVJS_q0FuT8TRVr_ZhNkMOMZyXTV8TjQm9ADlTtldVxI4PrIJKZt-KO2amUXSVl9yhVIu_c4ePf1sxL_OC4xSgsTqFySEf2aUzvFnpv5ys7YD_HA5pJ7QyQ3WOOGzswGUE5Y08sk3HZHQsYOIOP_OwYaYwFwVphWINTDRwv6RKFFHrgYkTUseTu8lt-CKbbIoOT62ZI3aUM8URc2gPZa5MfMpgia7qFWX_e3H9rLlNOMhGuzbvWW15FVvVVzWjPXRWWkg3aaaYcgmRSfIBuPYIKN6pZdWSYo3bGZIc3hOLzR2wjQjwcRqklXdjlrZaOmSXDTsQirJXw3foMyB0zinL-Kk5Z8eO6wLhY46x7RILHnlIS1i2HNpN-fLUEslWvXU8cM108czTP96DDTtysWj33Zc7JvAdAFEPaCxJdqLw0u2lFXs9Wre00IhLwJKMcYxFqwV8mZyQ3zt4yQe2hbdoXxJgJvmJR-hjOfrSXajjL6fSBqkVR50xlL5z71PepBaTI5Dlppz1JgtS5s1RYKWYeIKZnzqK5uNs_Y3JnC9Yf-RrTzAAYKh3YVCQhrHkWRvIeDGx0IBZzMZkhoQGrrG-hAwMt9V_q-QbOMMS2Zta35yzo8pbTR1V-ADVtAW7zhalO_POjXx_-rC9EKTMRrShYC1C4qh4OVX2JJMUncVpnzDGtdNvKD7BmOeohkbvV80ZaMcF49VdKffpy_TF7ZCaKNQlJFYVpakjyi1hQelE7Z_vNKW3eSp6usLTTt4ikAgGdg86aHChhTScoEJohuLDcQwrz6ThaKUZR9hl7SyK1wPtFY317irhAqxPdPj389Jv8SRiZsIwLNNI9hBBYG1srNvmwWxcIwITqRwTRzmKWLpsZ8MshbEQw3wrlD5fxkhidgp0fttm1HcWsDOBaiovOX2zf9GYVF_k2sGfeeyIoxTTFfqoYRQnoR2vgkcJiwWHcY5QopQvg92Zzo7PyGPHI7ANPm3gNxI-pz4MVO2vcqPZAtijVZQczFTe3xFZP4ORER8_VKp70lOZxpUaeHOc5CoEFo8QzubIwX5yQvsTVvSZ9XFBrDCNU5ZF6dIaIbaE_NqjM9RPDnlAeUniEIWwWew8fEHfGTAKCP5lfgLrrJfYW2zoSJIeUjHqbkTzaBYE9Q0_L6VH2UlPItR0wbnWklPInel5lS4IRi_dB4JG81FVs3xFOyb07d73w8Jw8tK8HsNp8c3M75RkXcqChJkhNRPo5tdgQt5XfFVZhgfsmXlCD_Lo0Ukc3WjHA_H7wqm_NfpOlgscLJYvJTCsyn5Gc0gQEbcMPrRvITYcurmOw2x1zOHcKAaEu9momxQ4ty1gvnRsQGBzOS1W1EswvtYALewGn9U7DE5dVZJ5IJs9Q4eDs5i7XRzWKvtxrZ8DFpW-zLftUKi2O1ScagMbfnBHDXISCB4q69IsXKo3euQTndBau2AcaCJKWG8Gc7JHxlax5akS4bp3q_7zo_bwVVD5rIMdnMNHz-1Z2DHzPEmM1q_DWnlOUlq0lXSu2yxLCWASt4enhyU9odJzBvkJhT_mN3K4Mol6lbYMDLpeEeB-BHRO86aHVUIcZIIKU36orN0rdRr-ECtoRQY07ppQr_WougbdWU1CeEay51TlPyJ-vlRNQiUTZtbzysFds8TQFsCGJwKNqpxzjt3Wbp1WAmRjLGTCwQ0FC-G00nKM3Q7dmVKvonkqV3FmfXfN48vR-pz8JLf4l_kafxSQAbV-6kr_neMzwM7CFeO4030jknh4oKTbrPCOeW55-Px5CH1K5eZH3A4fPfk-HEszOPjtLGOdUBkwFWiUjQg1-sgRJxn1kx27QaQsbBeb8Huz3ZwuupwTcaqzP5xtfrd9RcFNTkUXFZfv3wmXcO-RGbwJjp08BrMV2clDCA5Qzlk1DGHxIawlIVVpt4hcVUAXp_yS6w8MmMGJtsdiXkUNABOTgbKe-3FHUy9-GBH-2Thjm6wuBCxG5wTrX6jzSQ7eru4RQ8KY6VbPSavr5s1OpKzFwCdoNYsCKI6Z_F6Ar2Q5cFU8dYsNXbSJ2O7lEzrvafURpdMJgYq9dMEdxpdsVrwwEi-tRYNLtF21Rnjt0q2149SQuepPQj3NpVVgJi_rXCEcAFUYXStCwOFHjjSLs5_M83ghlSBGaahW2CRK64QNlOLs8CAAVYVI1Z5k_Ahu0zun--NisUfQlln8l_Egblg-fxOm5WvrJcs5OTxC8nJcZ9yLlKFEcQq1Q2f8Z_fVh_G2o6IiYDCk1MgdTp4TRNeo8YfkeYdwc2FhyennCVMoeDsINR3lEV8p8CJR5W7-Q9HTLiWTRF8WTCWtPleLP0d205Rj_wfeKI1n_HczQHqiyM-YY9F7kRMnhz-MI5swvglVbM58_Xo90WoaMlMH2DibzPStuEy3xNk_54yi1pXeFSfoA5XvtQ72EtvlHIiz1B8n-nu-ZFWGNzUmGlt1nakYAIaJhFD_vh6Fk5f1sJdKVmOPyfnvN4lWdsl3fOqeIrd_CqBkdiMwaZFdn8WZxeTDBisBb2ZRy5_mBHxVLNiMDg_W3fjn6VBipY2qdQFeVf4ZuD37rXtD2cvxytEV4i9DYdwxrvlTryblRRvl6zCAVqsAvBb59VN5Jhx5os2vVPv4FUDV4hiUk1smQTfZfWMBb6Ok80t7OzDRwGzgdWZkSvOVI8RQCZgadQGWHjPKfQ7dzJlf4sOrbHiAGGvfHzEnhqXH4sjV9laGvma1uV5ddL7T0TTIp4egdKLdpdpN2A2yIwPcOAJIwVPwdPXB65a8lJfYrwyJrvBJKsXXl4nTKGlldPdKkA6LDxhilCocPzTXaDIaIft3rfmo8A6jLJ1ENKPXeLY0r4DaxRstQYLFFYr5VjoIbnHT1NIdxQ-fx-QP46Hgz0uXaOI3-CsbdUJ3QzwAEO51_6WL6fdRjmTRlkvDrBEThCtU9Nb2A2x-5sLDe_WB05fbHpi_llW6MwZUoGbYpD-IKMmpiXljf9Rg0jH4pR4v5BP2GTkq-pR8X-c-is2VfsH9RF3w-tdtEFdqSGaMAWTKPO9oaMgXhq_LHzZJAj4CZuayjHIQJnAh3WadRSmMeNonmwp61WWc15O1YdI1dIVjm5t89nspc7827t2sZ3AKUPBnSPJvWhw4RdnIax8pmsJn52E07xwhdwmiLXpbKY7ZQDSgpyL7y6r6uXWwxcHyL_jBcGdIXkXRXXPey1kPjKN4Hrni3lzNNdJuHfTfZwiySwQzGfytLrN0MbxO2Q_hqzKLCgJKFzpDc-zbiugqY2WcNB2huHQzBcXikoFG6ztg6b0D1Bax8Ryd4m4zlI4bdyLQFnsMXDON4sOY_gLw0iaI-Yw5w-cz-o52pavYAy6EaNxuEOLl7pX-Cp3M4PMsAi01DGFXieltYcdCYDIy3OrkXe3gJOw6iDdMbUdYh4PJWJ_cABDg-vNxryJBrXqL38KzMmHMxclwhivoqivp9EaBHvbcOdpA74gR7KLUp4RcCxU7S3xXby3uQoyRaLCAI4VK-CRdH_hWA2mRGQEyqrorg4XOE3mfmj3W6OgzZnFdM5Zq7TP2TBDzKvt5-HdpLjyWq8dFdhP2H82xN_8yS0QkMC5drgAdAJ8NTojZDjAoh2MO324ZWXoBn533lLhxgn0Iu_MzhkQte0ZllTx3_TfzjV6Xlwo5XfFf0a4D4_mOa__x0sqJ5LkF8aLr2popRFUy6DOlSWYG1fM12_dVHPO4SkURlqlSKgiqOpEAmbe1lOaVJRQ2lbOIKcYJJQ9DlBmtj6tp6FdjQUHTDtFRsYshDrdnlt78QIRtwhtXmYOhQtmP1yQvjJtpLeH6k5NPD1oR-HCBL7XxtkbIN7GsvXCwWlRAegV2XKP--VI7VPJNAJtHkpa2vg_Dp9a0TZrbq7WwTPVNhAhs_GF2AyN1l8Sq5eJATtrO1x8JzXL_1qK5jglfTFcRVnEWYTv-kvBlzc99quPQUlDXRyR1XkARhgccWQhy9UMlRVMmp2o50Nyp2f9Zgm5baBD4F7wv7tn4SgfO-eoVWzcgApUQIsNez2_-q4C38Bl04SHaZDLXjG4fVkMADJwbm5V8XjUI7B3J4ed-pxSTKIjX_2mxs6ILTqWQUYKqJ-PQ7WovZBuLFd21dF-IkfWx4_k3M8hVjInvljK-7FPYdyX-R2MCHC3XytUqXXmaJgtGWBAxTEf0_EgNf_rjzIiUoyaIlD9D4tUpgE2Ln0kQWmdYj3EHm74_KATQbwElL8EUP34E6yOnvM1oNvocZg4Vpen3cDiD0uacnHDFFmvoAPlXiCliXRp2NZFeG2QVr-mkdP35AlaPJhL7nV5yX3NefWK3O6Xs1IBjMotFSxvZoEGmfArzE_R_jiSYPkdVaXUhmIxQVuChqk21uOcwvhzcH7DOusrPtAK5JZJTrC0Y302EMRtfnNcBVGFCDHypPLEtGiUurqsTzeFUHkSOiSeQpv9GbpR-3tlVmKXyrhzWfEfcSg2ANG3UDxQcqJSAekrO1eC7kizAsgyitbkYhDRlTf1ROmOJ3q8CXrag_0CoZhvwrTKQZLtnwRXeBa3XADuwNabvMtlWSqOMMkTcgG61HnsvdgGKc84lAJEfen2vdZhXkZj9DjuOA4WntJ2KE0d4BsRsaqzvjNx2VSPKIeK7bC8hgbrP43ZJVdxGQd8VOfJrRT0jTdlFIRSEkNzrfua3hZUXF-PU1EtEtrmDZrSR3FK1Kl224pW1Yvvi8UJ4TOriUf3YN-8pqn0cEnAezsYcRMG3gPeZ7ydG13R4GvVTUmFECH9OaQ1bz9trZZkmKHlt-rMtSFnKyqty7ysTXoFJe1wszMVcN4jachCCq0iKUT-BHUtPFztGg9LH1xqNcoBHylZlmZyh20N_a6lSkG5HoyqZQh1mFWqhonU62z5zsd8bH_tnrRyg2NOmXqAM9YEczPVgCJBwUBM-ED2FtX6pDJbJ3hBQuohUutXSr48EFRQxaMxJ4Oohu_ULUytVe9VEQQztEJiDKjc3sB7wKJQDrtx2z8QrJiD0vUQtyf4PLyTPEGoJ4WbO3JqT0dogdsJ3WZsyIJW03r55EIgOz9S-7fPM5Iuc6bj1yWxsSfOfYB9-jg_d4O5Wzx_cC4YxwhyOMP2NfnsbzXfTz0GJtQ4bILyXdrCTcgHhDklztxX-3xXS4niMti_wNcAFbCD6xS7Nju1esjbS2_Vrv6BRdBfI-z0mJQcCzM7iHQc1rYJzvSssshhZF9s0qXJLi6XHBDjSDDstDZ9S4un2GlLddilPfAUP_lD_rQNvBBHeCuJPJ35Q4g5O02EB2alYBf_CVcv5jthg6LwnnzEJHQEAWYYCS1USxaRlNCsf9UzrqCBJaLG1TLQOb6iRT2djWTj0s9PxxTAY_Ccggf3pPVuyuSUWw54Dndq5e1xY6hf9MZk51B6OORzELK43GgziKuA8jXBa_Cygh4UUKyLGq9vxlC3pVcvGZ6OXUGOe8PtrCQtiyrhoiS_d8BlDf2ArEJ61pq7vwrFHctW1Q0NbWNhHn5ehp6Wu-apuvL_bhW74JV6jvi4T2YECuKFgAMyRpGQBtfrDIXwH0CQ7a3-bDyGxXJiPsl9b6w2lW52-Dt1VWxtMWMP9u2iv9t6HBdaJTXcfV8hT_E-eQFOXirHBhdNi3leXbeyP95AqEQa9AUz4aaOPqNklK1UVRkSEAkG6Dg2db8Dhoh7kS_ZkmSe120bYyOM9CeM0YwvQrhajoE16XnqxbekTG9MuUxn3B3fQg6JG1mwV4z8fpmgtyKWnEM_0EPNgHSsfATrGgRgaX_BYOYobDvUWH_oCW2m-_6ebaxXsRa_y81OLTpLWhR0mNYm4JKDvfQDvRrW5mT3xVBBxjzTfpaJWlzTQ7lcSOJBiAkDXOtcvqqeKwIi7jDvEHR0wN_yd11GkNdsBkoMp13-xrsSIhXO-0FzWIa5kD53fc3YrgQcGXVXjPiV_vE4vzmOkAxgDzY_Ok-9Ch0WmuXA-CA-X2b-vDdNco_36ZkGH5qmchWfM7SSkwWlttAE7Wazx6AxBXgV5cNoOf4FPW4dLTd-Oz6Jy1l1z8dEFWQSeuao9Zk6CmPQubUrfQ4Fag56ixPewXus6ZiRN2HOSxrnJCL9JQaILstseL4C6xpMLdfbiWPRYfX07zyXs9q2HKx0GvLIZGg7eHoVs2yhJ7rpXe7yAfWcZJl5hA5dLZ81Xb3DLeZlK0BzBcCVaLakKkfwcKUvhti4Aiyl88hmwlN9xCoShrDUHCNN5ORt7kYDhNJB3JI987-NgOeMKqQ__KziAmWNAikiQrHqUOjxyKR4CDfRUMRlAM3qH9wD8wpXg74_4WfE2rARtIyf6N-VEWyOgHBFdEWv3MtqnUWc_Ge6j4vrOdkdrRcT1RnUA6nA1Jf5gTHkB6Cu9s5zrzVAkKS5Pdz8XAxOo_wCubi0c8SCKrU5-PxWJlu2-spBR1hbmrv8kmWyzZ61xC8w1h8wD7IApgth8vy_84yRfgNJhSoFKVQ7MVmF65_ztxIpYR8V6YCo2obtVx2OT_xK8Olxr0gTIv3qzqcZ48SDBsUhsv0aJjWlNoh4tzmHZTVBWKvm3ifBd8EIxLmnI-QPrHy2XafvET7RaQXlcU0PLzUpmYWrv2YHCyeE_iYxxXuXjqJ4BDqwoVxtpdsN-v1OU2RlDsGS6CEwGNUYatMDZGd89RbeggZZ-q6v2mwv1NEPYlOt7Zya_E8gfiY7rrIbAAH2gSKDY_XwAt4Ybvj9kFAhntwM9cfujmqpPsHXrdlCCDNMRf12H663Zerl7QcHkcHf_JQNdtinSNjL8bM9NpG0R3upMDrb69mYlqkc4zuKuudhdJZgFvTLTkFMrWBg8mZ9zBNlSmDO_8yhwZnvdtLPD2t-v82SyJrabS_lLtcVnmfWA6FtqsactBCkTG5Yn4W4snMTtCxz2KoiJM1A7r9bc06p2nTlHKvl8wRCfDKkXae2w2td55C-ghR16U8CVP4tMSEQ4Y-CUehWA4koPGqeXDWoHAerH1tOSJ4o5MFx6M5nZK9Ay502PxQOl73AM3-E72EWgv4ILG97j2yQNyxvswAj6XMfaAW2p5JUeeF_dXp5NCo2wMS4MzueRVRTnugaHQ853cIgTdz-HNblmC8i-uefY2GXC7Yd3cNM55UzzOvDa0QiT0bQ5UUqTCG9GF43J0KTFsGSi0g8tmJ33CBa-cJB6rJD_pp2Gb8Q6dsSK5_qGezi7Xp5ZnPy7MTM0bWBq48apCAtg5U6F6bCAACowKUdBq1jpWS6fXnF7K-M1l6m3BrJxa2m4IV5wnEppOdQU97HzdtkfazKj25UUAuGuXZFmcnwXqxMK0YAJkxQVI04HvzGhGmHWWKGCOOFVPm-fNjVznxGWNBQcAl1MEpLx0cg1CR_oYXFufiFZ66C0TkfA4FrjhjhYj_RlWKQ4Gp-yx8Nzq7Vry3S_Sdexm08D244nWKJbyU42Dq6Gn3nAk_YMHAB2TvLz-1DzULBKM9DUQXOpnPT6B07EPts-JwAecVpUVEiMY_b53C2T6upmQJiNAR_UmUX5QV7me9jU7EB2n0Yp1-7Q-Zx4Rsi_BsW_Z3KoACo-vCrDcVJ52esuV9C5b2ARt_evckyuY93Pb5wQRxoMgXln4jGQuAw3Ungrs-jPWCOan-UL00vYLlKk6WeRJISANSNM18Io7qioAdWwpBWEUkThd5sgobHtYezZfXIeAhN__zUe32n170irh8tJ_3akkrle960SN7Yix2E32XMoEPvoyzbDcp77b3odjprN69PHIhqVhwCak_6Zc35zi2ndg3riTGER2BdwtOyZhyqctTnszGPnI2pZ59qOXI0b1pkLoT35fBjPgn4vmTpbWU7jkfm_A5oOit3YV99fcGoKtCTvxT7IQ1-7PIK_8KXdk-fYjlelrwxmjB9H4maA_RiEshDJkBd6-oReYxcQONOipSSoCKjnaq6_-lBmH6LK4Vki9piLg82d-rhmQLxKjSglmH1nf2Hs5VfE90cyNJ3ZrzDR428L9vYZSW4jRiq7srH03vs-7UD1gWv3H8EPtEdHcklwYSA8AwHnvWdfA904JetbjqDlXAxalZ7Hzo2feU9ux8jArrfvndrI4iU7NIiDLk7ssDcjGaGo_nzNKlb5xv2SbZsFrgrW2jIFHiPt0kGnTB54xXV_zqOwQH2iLwajYbCJQdUkQYtmEaTzMo4uU73sx9ET2nr3ov5QNJQZo5OCMghbzWAmJRHtBznrxn2ckCZs9Gj5E6uMnUKHVVjZymbgWAMuj5CgBZ0PWXGZfNiqmOx6RuIjab76KMg5h9eIV7mq2rU-3n-8EikhCo4hZs9A8PgIrlJBjWiU-SyEkzR19cBAlZyXjcA7sMV5xUWjfjQ-Y0-b9E1bZipv2kaGUOeh_s3uYllfyC-akzrGb0DRTpht5WjRn1SpzW0BlQiqKO0KmIIhnJaRQBI53nBHW--G7X1XmcgYHKw0KLwWn6Fhheu1mXJDdBIh5lx4iltd2t5Z0Zq6joSy2Z-SDfv1XFRQK6P3OLopNFrsa1Eiklx8fDWOJAwU93BlCEXIN-bg_HjaaK1Y21vjx-0xY_9ruq6mJDwV_rWftKTuEqHlNDcUG4Xrb9szdrAf2DcO_4EgiIih0_cs4mxX66h3VtPRQWuBM4OrLXIt-sAPVl6AmjuC9uhPIBPYeKKSM47hYZHsgc3MdVBAFQM-XJ-BrLFTe9B_jiau0GTjuK36jK8by0WTvi_WZ_A3G6WIWiK636Ki1MFLLMTD8cwbxcfM-f1kpNj1Hoh8uBqoip59KP9JTJQOXZL927YETCwjIK02MgnKbp-DOxgd-QcDDodEsAWjfh5t-prEc2xHkl-KBJ16jN4cwCvyycxDG8cRzuNS1_sQaX19pXV_Mr3YeiFAmSJ8ZYoY2KZFFNceW6SlQmViSZPA83PU63JgmaGiiUP7kW3GDnmdQXinCy2tlt62mxiKbSn8KkHtZB87PsXWnPpPrOpzvhq_fdsiyAGqVXVn1I5eEjHLBHjQkO40Ptss8bsUUuRkxqg3Tci10iE_JKMFhijrpTuHYoaFWBXw3-ijXqbGP13wHm5vATUbvFzqbLO-jK5U4zm18WgzOtLVnEVcCq4evzRX4N2h26ru1dcLvj4xzUAZ5tuVyvirKPcYFSbt-NeGiaJM9uXlksN77ny7YQ7Hi1IsTGwBRbxBMSA_DvoXi7Hyto18EWXWuWiMIcT18YsUTJ8uY-Zi7ONoGEif-YAL_xDX7IY2i8kD-zpGW7MwsHFhJlSPvv8NWZ_7Cv-dOSMHRGq_RuCsxEmrU161RyVWQNapJQl0bz5858BepFqQ-WNdOMl3vsduiFaw4Zos55ZHlAA4dntvtlRcss06a-9_fSPOHP6Ub0ILis3huDvLaqrAhcuc1Uyjs3M3OjJVaSzcfYxAMZY-qJo4edYbAlXRhVEV0xbULglVttAYCqtSczFbpOeUrA6EkgvxTZvUsdclE4Gj5Mfo0o4MEjl3ZQYWb4mv18Q3cNn3WnGR-8OLHuBHIPLd7ZYW34-Y_UTZaGF5Ftw01fHNZDivcp3380koPeuJh7m69UOcff7exIDk0vUb8pYd8uXA5Bpvm-zHCR0r0QhhgSFTeP8O-m3crya4nwtwdonz5QEdh0mqZrC5ilk66vSuOj5bPOgCex7cmD5ZKVBv8IeyEgiBtTnx2suRKu9qfew0FW8m1V0079OtB5MXpJqrXki6OIu__VJOutLm_68HTKwWdlUpadv82OaaZEBmOmlsiLzfmIdWZgeo-6w44G-7mziRo9G8ftX9Uuk_a9QMIhapOnKSsxfO36xNuIA5DfLUNG31xLzoOTsf9XWZGeV7jCEGPhrSYjqua9X3Hnaidu0esC-Z358gF90dIH2AzMBGJmJ1H8V83u-WGmzQPlRmbA34VZo91n4y2KhEUYXQ6mBo8e9iDLl52EKwQCgOQagdJ_wv5kUCtSYpGweYOOXgW6n6UGpEiSSmTGOBhuVdcwmoacg_1oNUFBjxzS2UYrEMiTU5zrS2Q3Mgl3MWLzOWRjDzvkf-Zni8CKxkwHaUc099H8RmT8_By1cqC8Ag6pIFM15lMYd-2GmAclTvOR8L1MWNidnB8-2ll5eGqDLv6qOAA0qo_t9uzX3JVYnfUzqjK5eKV8dnAN32RsKXCa-zvDV4S5xXOl47QGpZvTujSiMWch0h1zqzn0ZbnXCHE6Nrs31ebzd9t_wz_ShXqjkh_gUEDN_vLttJF5DQ0zwFBw0hLtRpl5jFh6mm7wSzOBdtPIJ1c9vAWYstA2YrPHy0fXbeylWqYILW3FskGcfRfqr4KFWg-c5AjspMsyKDRIjDD12j3YK9bDfmX9SVxTLgJFX_D-wOuQUEJe0yRYFMk939NJdZS8k5WlHy2f4r0X2x846vqqok0A9kAdZY2IDbgNqmhuDKcmS_GmOn87-b0aHQE_fAvzpzm-Yq2PCrMXYhExdTlUl_KEm_1Xd-AFGDDnNCpQySIZ-k8fY-MB8YtJ8V_cMfHaqTwfdUk4kUbj3_HGjD2Z8nMIHWiuE3h8zgHG6SCKwj0bg8OjFiDpfAH7_51F53QOFVLKicR6oJ4Z_Lappg3FCHkWmXyWMMhTDloKHWeAVMBrEDLaunuuuCtl6-9ND_xDK0kNJC03Q1ZGW-bDN68RpLfCq0rdOGQ-EGr_ylU25-wJ28Bj-QRx-WhTozn6AsyV7nH_AXoRQw2B_ayO5aW6TpW6j5KHdaHs7D-7inSZnj4VKiH0mychJsGBSaKDn_r5ZrcinPNgizqR3HAI_naYjA1_qhmowRIQ8yi9jXmdy5Vk6zRD5Kzk0T07AnB0trJJzD0vlJgq5J3p5PQAmsJ8dHpUiJLWsvWZ2Zw_xdo7ipCBql26SPKKdFTOIB6Vht7Bwim9G_UYMtgKX3mBg85VFaUnAeg9-y72g0iceeP5qhDWKSnB9hOQVgKFaVIpyYv6g5cU-CQ6VfTuhubix2ytqRJiFxn_89pYno4_9p5hlMqG725b5cQTTzWN4yV6wCQAkobuYWTUz_f5qCusEI1kkgw8-_Kux6cWMH0xTMdRuhABxAhreZ7XOy8vmDtT6UseIznwfQXYAjVx9sH2VqCTCavX2y8FAInbRBIHQZ5JwmO9PKFCs8YcoPBKzvo3UhvJxnA9P2zp2iAnpMK8_0BLCW_vV37RZWoXnbp14YrezPQ-CtYmttWS4VKBQKCOnDeW14Axu692BCiM--Wr0mYZKRavkkkrn9ZazsVwAyiWoqEwF4URQNpm2odo5MbtjuM_4Im3ggh7ePoTPCEkEbDlC6g5y0j7PAhB_ldaDm2JMtFcJEviQqKh9rH97t6epegmVt7j6hZ8BcyoC3aWPPYzwgc66JxfDoJOjtbMfg8Xgf13a0W0-BbmomzasrRgwRF8v6SBk7w16DEbqhSphniOVPY0ilvo8YYYvfDy7ri4PPXRriagOZkwNrVIdb-ATJ8NDewF370cj9miSynIGECYb1sUz2fecEP2Rf3dgwmj4uftQdQxojtHzHqbz_4M3-Ir6maUomA3OXAVyz0Pzak_nOOcRHLisJz1gqFfwDkk2-xNlBqOtAmyQEvpkVexLkFxIjCWVcXgQ6IV1YJuYj3FN5w7PSnPRPh-Ydczv9N7ZQr9jDi_YaWSTvXmAR0g0BLXDt5ixwv0cRkm7DCatkH9jyZ0f22crI-tdMgFC_3NH3VFdxDcPN310NXATrb4b2IMjeUt944TlT-tmpVVnW84XNdoQA1LycobO2KdgMZZ4aCS8-IzPCmx_kaZTsrBGj299rwLZjXhOxTL-WrkpW5SlNzMHOe5dDfqKkNryinKxnx6OPHpqAcSKyeesZjk0QeuFCG8JIVOuEpqxWuPZrgZnQkRPwH1J49Y7gtYpGslBgYNRMm0eAJalHMqAPLkuEsSav8mQQU8Lafcn_RfuYfodwKQfOUE-3sD1muFVTSiet69N0P8_7yvhJ9h0HBu2GipbfKRNMdypqOypv65PcNqnlx1_eGixajqqnBSLqFLoNTcrcazb8InI3ZagKKta16EcWmAi90WMPalU_9DiZ5Rx8NFvFnLP3Id_gSt0w36UitmiA__Q-iTfirDd9BNCw66jKImXkAM9zMQeZzDa3u7pP8Gv8SB1YQtVCiE0R89atr80WhP4sLQfJo_NOJX0kwNzA13Id2QDrL80DtCxj-mq-31ynVFBz6BAB2LRkE1BkwxsCo4LayVIpDoXhpHbkrN7ZLmo6dLAJ2zol1bPbjgJficT79eBtpku2D6BZgg8pY_O6OHPC-M2wder4tG32E26Oec5WfMJPes7-inEabXL_XP1MtZfE2lgOP3go0N5WEyKdL-EpJHYzrkhSB2Qy36PMlwbm5thVE2j1LdQVfV6igFvuSTWK9qAzQ8WiCLXTRsl27MepkF-2d3kbB9RJTawGkgqf5ST3epJzc2RPg9A3FHbujJHaCNdxPH8MICqmr_PKbe_ERkbXweZAxeb43_IR896mLmUgoQ90xntBVksx0AIwwFkY8chbaQnaxWKOX1-mc7jMOb6iZpWljFj-fZZkXqwVQQGVJ2ffm5dkOFcXedQOAQeAQ_aPxeFdcsx-5efSHDK8Xn7EH08oFEwlS2MMiv1wkkT4Gjf2qqncJS498VtUojLAFnXYjyzEI23cBwMP42IJVF1OI_EpieyhFun-LCw5VHK0kovZPpgyWtP0N34p_kndpnexQeuTdnXLqx-fiKzrUnnNHkhpGUo2qNTnxtsdOCM8fLl8Gm6I_IpSUkJAvGeaVJVqHBMTIlcGRZu9SOgwqBzaOAv3hjs55YIHeOuDKBOsldoXbbQkudOclKaUqyodCKbrT0lamMvXfjw0Oamyj-jebOufJ4f5NxmaZ_1IhQ2v9th50w8QknJ4BL5VbvpNTwVdKawTiFTYFOu7pbRhZ0V6hBYfLjFPkj3kgNR9jEMGLgSS169Dr8gMr3Ep7nRPes3H_6QM72zLcz32o-YwZUAB8H-9r0W4C50LtH99Kz_FIPaFZF_wtBkLO1d4EAs1WqT56Xj9mg0hjj8VdL7A16lQ2Mgmu3B7SnxrCKMwzMV_sgaENtJQb6VvfQmkEl9L6-csc65SUL8t_DW1fUe1ewU7aE-sjR0hkHwaQVcAwi4xfdpj8g0L_VnCsGTNQ8f4vuqZWEf7cspjNqQWQnKeT6JN5EWmz0KjwSCmGVQ-tNAprZMOQtdabYD1xYhXGVyymZfiBTXnjpyGeVBt9FXJn5nwo6b11QY9AS-afGNJZUF8eanCRjsgw4gKmm8gXNVmbEoMtsZrixr3lxPN1Wj-BPpfTwz6O3bGouDzrvlRs9JkAAlUZEYcxzpwgCYKq7Q2DNs3j3q_fKYhNdJ9zwgqXsCRiW6M59UTpdSaLnIwJnP3Ll9Y2GeWZasRpmnX6_pWcYdsladhvnQHna_AwS48nqFb5CDsjCazfLdxyblPn8_3KEk68saoA34JfUCi6Q79D1YtFot2BwP8J26GPfa3FoN53gWtVnmmgWkRn25jH3DjpCLq-r2uIN0rvoVPKYSgENO0BkvTrgzSJdnSinwHW6ICd9o6FVXvyU3cqhg_fSEcxXRlK6012F3ahA91pcQPh2sVbR59qn62P1Ls45L28c0GVEgoXBwuiwbwGy8zR8JKu4BsV2l3tF6mQ6rU91prK5viEZSpq8ep2gNCSSsXbt_iKaMPUWlLqT-u3mnlqSlP6TbUuOWIpbVIWnZH5qoDTniBVa4ZbOlqR85Gdw4xxNEDgo3R4CNxoCyP6eeZtEjU7KoQiNcNTe7AUcy1_BMUFOUbfGwAQQKCl5erv365k2lFBcCP89XgM4YKmLPcmp7r6mG1Z9123L0cGrNJbxN4sgZDud7gYaX86ff4nERvNuy6gG464G8uwPzfAaIQxx29G8K-gMfmv58V-vtUiDEzT8WhaFty-nG34Ne_mERo0uQIA-NMsd_joJgvcddbzXUUsZAcnVZqShtFGYC-7Ujg0qmKBJwnzU3_zyVS0ZyRlnZ5BI9yqK2-zNKOdGwus0Mr8y8jEurmRTkhbNYFF8yYxpacOPCzr6Kjr1zSvKSqaPKE8CyL8pOuR_yz2Lmy-VHA86ZnLQmg3jEeUzDqoYmEzP3EHnySdhPPA-N66lAUo4IpGC2GJPIVhW9J0KrKmAu0b4RVasQLNSHNvchovZJY8Qq7L20pnTKagOv2gG9M1sCBXGVA7wTtKaSNTOof0b4ncp-cqi1OChoyHLbRw_purELGZLBpzlDoWYpLfVC7_aLu37kQASnN--geEpS-ZSOFvLL08fzNY8wudJx1FsuMuRTHuFcjh2kL9sy4KuLWC3ZWiXKJq4BvITcYLkYLAsopi5Ejudj1xqv_e6Fk1a6gOmJ8XTIArH4lOZSzanYCYuEZIGWOmdtwjNBtTq8Ra42rVRs-hwcleDyf3WcxYc-lc444JHsW_VC9mS_Pswet3ktyokqmr2FmYopHbUt8gag1f5Z3OUn_AixyxArMIJNnMWEPZncX_beu5KkzEyD06fQeWIhWBkn91icpaWDjQ4UmpY1RS45WE1eSLdMIbMHiL-R8_8_sQ9BRv4C0SO8zJa1yPjBaKke2lps-PM4u8k1hUxWFcHQnMemayLSPcyXvgLjwrP6uEfU9I-hoCbWsfYfhITj1SlBmVWttx2cvcwhRM5ek019ONjev61AyK_J92Wy29OvTgB4EccLgZWqsWuqOJ0kC8c7Wuwrz4M_aFy2RDq922NiHAHp_yiQ1yVrPZAWx0BBKSGO5MTW1HKBvxwIXTw_uZ3j09RWzB0snlHeg0KcD4TOxjfTI7Df73D961opj9Dgv7jWykJ2t-qBdvbvPWubkUcm_M3y804A766fJHdATm5dXY82LYiq5YARGRooDasNYc0rO8Ji6w5mYNrpJU-omYaW0HNwswRcVNNfnjzYNrDHx9GSXwpQyJZXN4dEiIZieLUg-crjycOYTxRlS_dEuPZzANwtqGFvx53NjaLCAc1Yvxs8ir8cmFDqOGtqK4O8WU5GoACviuUU4k3Webvp4R2GUeLX2cW41EX0NnxzSvpV_ndTykMWGJNebrcgMOs-mhpQFoS9UKvJaI1pWA3wmDc0ZrTwg2Vca4k03Sgx3iVeZQgPDLGdHr4PnG0Io3ng390JwxqE0ZG1L6Ng413GyRK5tlkPhdyhTfvsjj9vVstIfV572sG1HEJRtZ4jM9j8heOkvW4IyKMJVRWwQ6hJ1T00Tx64-0FzRe3yFn_Q-ooOCBhwSSQphzspDFqSsdyIi7FoGryHoby67J_zrUEepEyXumefshKQ5j0BDNwCIhoJDdl8RXH6tYL_R7iDByABSPWUkEm0FZSu-ZB6RYcC5y6ulT7WgRylQ6GKqKePe50xeb9fHjlXUjdVEuTjUY8OIBnJUiUCaSNc562eUiGjF_DSb7kPHyONjIeG3fm3xEcn9Ej5okHFcfnti0Q1pM2zfyoXW1Ciz_ua1p1Cvr2ahsLI3Pd1aCvxwJC9iiIlOk8GxWLNmTBeJUGmob6e6lFBaQnPNSlSu4DWMJrgJRYyd6UfmpgQEncr7SAASgYNzEo7cGQiBnQLiPuxMBCnDJSEvU5AVlJI1A_VpwpqA61S9b2valiek7t_-rXn8p_oOiUkpirn2LlxSAsgp5zsB1TuK0XvEVW9drJGIx45kNOUSl77J8UfktOkgRIraReQaR2WRJntbOCBIEDquOVCtqolXI0fD5jmxKxh2o-1l7z7eHv9F3FjrXWwPfQeF4uUqnTi4NOwryp_cMJfPIhs8LnoVxSqUrm4TkifgrdfccGg6cZEDEBtJcL0K7PSojGtx5Jnq4FGMcJiJWAjX0X0I-YO5Q4QiMgjPLqsKE0r3wOx5SmLmwqjPyHzWA898NiggbaFE1Iy-aHfGGszyytA8hgQa38rX1P1QQWH6FEQK4japTznz-y5wPjeGkMbbYz9ej3wjj_32IoCxAXlC_3Vm5xMyx1Nb80xORWAiIsVT-EfmXPt1myt120VWHSl7p-5UzBh654iFq3HPiyl1aiqH-7vM4b1ylbSEAze2z1Hen-M1bamNsfSgvxu9Gp3E6XjPD-FchdZoQYRe6HG0q4RuUqbx0nGW4QKW7Knsr2jWNohXZOPr2ASeMJvOzV1cnPYiL57kdAEZb6ck6tBzpagHSWKB2L-ngD4-jPAbTSTFuLx9eLtKeFZqxMZGqOik-43uaO8iVYWla_ZPxXdsnVAloHbYiUr3t5jzkMFyZJ8E0VZsAAuOgrReus3-1fgFi67h2MMeuQHZZb1eOM_eFTsLe6A1RhBfUyKFDWAY7OWMzbF66Yw33PKN-8tK77eXf2B-SI2aogW-Xgnedppz0ia8vvZkY9CnuZxnnDISz_thpJIxBmmFnszjwxnz6DjeAPVWIq4D4PvsNmcMEZpkTLE91kJioIITgbGx4UMpfjR4mU_QgmChLc2jwc9DB5MiOniCEXGEa6yuM5HwtaB6S6Y4AmKxblDUb7C-Ho2rhB0_-KAsjX66MFNw_dt0AxyvJ04eQhSfB2HWwtt3AymW2VMs8y9YKTm9RybRlFbRps7D2SvK24ZzV0VrWeDfGGb9ikyPuK-TWp6OEGXalj8I7wiMgjZCAa2ZNZ316eNVqaJojmOSvhZKD6roWcoY-4eOcbO2ICwAha7z3kEDZSnNwXy-s53oflsjJYoMxwW8QkEjT3NCcGFlPPWeHy8iRYwzK-WzXjw0BxEcKXi7V_8uQ_QZT9OHMvYVWJLY6YakxOceDA5nuusAU5Gqd761BcQgfywNMdnQC9s0mrIeQVvWdJEgE_DafG-doEwgNLnjgT8rPnT6StvU6aAZBi6JRXgoJXz6IW8DZc7FM7dWx-lIz9UsPRvbxzybHiOcGMXZjOvR67mrpBIVJ8ZGe9EIx1ik_8Dcbr4U85sDtgwvETXkSa9suYhkkr0FOeu3xQv4Z24LTRBB2MDgOcPPYxMjGVSVwEhBI09TWbrMqauIWPYS1dhErGLn7ii1S5LB6j2DyohpguJqlxb1rbV9cOfWqapIAzXVdAmlYPfcWRTrqvTS0AGPdhZ5YXHubYB2fHKMr4-8GN6DxBKIzYYUh10yQwKlamw6bzwnWwgG1mqkCWmOCR0AHhrr7EfNsr4tcFjQ6BL8hStymw6jr6bLnvikGJ1eRPY7G3ebL_jdxcRWaY_1QXdmPfd194T0djLvKKgcdJnmJKLGHM_iCcK8txH44UKA9CoQoGXsuxyPF_HXobbfqcn8vtMqzZHW35yYjRGNlwYuG5NeJXcFts7A4tjmqm2LirJwlm3WwAm7DF5veV08XaX328CzxuMk_01MjtlgZoUvrVGZIq3JDKjs4QIAw_3LopnvgX_CGs3kbpnIBPOTgS0I8PB2UUOj6lcssLywq1zIcntr-htT3ThtMviLzO6ccRo5IF-wuud1gm9VNAVVJhfrxPa97HOODFAAELVDE4a9F7sGLdOjHxEut6PdQDwNIh0SyVyh088odf-ChC0uGYJbfYucx_of091sDERLDoWHZu0_0ri8RlECNAAIQx7tFjr9R9MxpzqVpXKPZ1hbRSBTKGjgRdzYF_Wq3cssZN2RTkOo-yQq1IHx8WF6fuC1U_QymI5eBdML2kDzVRAUpqaLIL-aNzzdJBz-yFnDBP_75SQBUYi35gdedaL51DApkngvc9EZ_OLswBDu9gT1WgXUNkEcjoBtbmUFmds8gOy8ml119xz40FzABjXtoXE1hxy6lSYwHSirD8JmrlNWbObVvuw3SX3aJVta7LCn50rInfcu_SxmCkx9NU5hSRx3q8qFDyX2UX9FplGR-jARCAE04cBgkKUi7yykP5Jd7XG41NJMH0o_f9g1Kpcxb12-QGSJwAcRSra8iuIj1dQz9QVGw6pS01R86zs0yi530bdZ0Djpm0djOqZ-mSiiS9xMM3AnV00oaZrTDhA9ZFM1gGmEGbuOcfrK73H09f30xp9vdn1yT-ocot5GE0mX0ZcyBkjf6Sau7a4rNWnOv2ui6qD-0bD_4qGKRhu3eXtHQdBmzmHKyOGzG5m4B6W4Ipkd9V3Wa7w8_dYeCR5n31dtQPHlc5OG4Q19q0rCftyQ0G1lcwZWUQh0T-1Q7AzVa5FCO7ET94SZbTjrP8zo2xspmgAeX1-aNMw_S4s94JPnlGsCoNigVAV1eXpzZm8HYhPv56AH_r1RHFT6CGYRDnled0mm7T_LzHGJPchmDEVIGbAQPesEp-vY-KgZiA64Y4Dw8iwJCnBtCj95uFaKAfCew88B1Kuu4dv6kYj5FEQdjt9EueNmlngT6wm2-Bg1EWK7K5hV9ZxufP94fp3GnD8AdMpqe6wNDve4CtepgQtkfwy2lwu61fIy9k9xyTKb0g60SACV1eDxxAC6GsMOIaHUVqg1JUBQo6JqrA52tso4TXIw1omg-IpY1oKjtiJ0Bdwgw9cvPlNbul4UhvTNIxUfmyz-lJhI0lbdtkVsGQli-IZBytax5Ofpj5-7FAsOG-Wk23IBPx2eTSUoiD3arWuHPgpV-CWPWqEnOCVk3NigI7t3-OjKNSjN9lEZbi5DOblGQAdRvhFJWgk5IgAUO90Vx7xqWUSlEBiTBAJgtizPf039AlyGp9lBn9Uys0l5ZYCYspxBL0Qn1AYg931wtkmWJldC8m6oTBSpyndtU8_29bHJR8nmjIBZiX2J0E_WCjostHhnkdcse6H4qUctuEyWwFNDqnFYb2dftcD1D27CyBl6oPjVB1At_as0VLqYBC9eke249O8zeMTR-3JiBYx8FHM4UM9yO98Ctheyseny5fntJXEMrXfSEoHcxvtgINYeOZb2-vh9krh7luEKo64TpjRKEAKHE7481zLRk_8DRH1vnObgGj4-C_LeQsrsYQGyD7duWV8v-A6veCD-3OBMnj0v6pfHemg97hi-MMOhRZ9NLxsCzCydI64g7gG3Yj7Uz51ocamEYuChyiE8FooxMFvRczrb28VJpV2kDvmbf1PjufIYoZBtyuDXbdUa-TAhk530YipD9QSIFdaqDcj8LotLr95xfetrEJEDuay3Al8j0ME8qiFrYi44yUPZNsqQsbiGK5L7RbZDXp0xsGXPh3S6vJxtN91MXNzgRh_6nNVCl2oZarKb5qXIM6pJSEXPmq5wHAZH0c08yzYhpMJUpW6GGAEsFydFesj6gC6EWzaD-QOG4UigFygS6Fup3FbHRjZdZ6ZP4oFyFSgGj-_WfO5fAiie-wliar7Qtb0TlzkkkNp3Ujgbaq4g66fj5UwwFhUzpx2W_XpzHi7VbL5yn6nDtjHV3hn1DY3IGHke7WespgVHnvaLwFSXk7sYVjkDk5GMEYmg71wRvrGt0iKIweGAT780PvppVVPMzRg2WHkjgnRZUy2tWUa3heaBoc5WB_1ErGNQIMMba3zVcX2XxSWEbv_G5QTuU6NwujqWtCuLG-CraQ8ycxc0llWXAUPlzJuKyu4EsIX9MoYsDwClMkICvpvrKj1Ad51MtsZq_ORljPdZm4efyENs3YImDIvkw8TmgA7onGEKTma50wykcgIIznivjlX9iYVWpsRL0FcrEGqBjRnqlEufv-QpLL0_Lw_5UXtZpKLBm3Sw63W_NhYIJh8wqhsHY8U4dfH0hVOpWOSEmr8ERk4FKiac6do71b-a6QAEXEBaBrbEyMbyPQAdBmtYgeGjJPe4Tw6QCU9tLwRyVnFSrnz7LIi26Q3-VrC4s_k1qPpCnnwwVyxgTydZSGLXvRM58Z3MBJw-IQ9HA4NnxmBgESNOYMS48hcYtXJxlPl84fhQyGm39kvXwhGwKIwXcAKWVRT60XzF1y7Egr3y_YVSht4EISTQ7Yjv7lHuvi-umo34nB4H-v9dadihIT0ufiw65PtG7LkJ4rxnjG6aiOfPs0162J9QRp6sr6k5aA5lGGsMPxe3lOKYYLoQXy8lqFKO-5pNB7cYrSjKkltj9B0Ry2-cYwyWuk5LqMf4vwb_BZEHI3Dfc3VX_B83-vZ30MqLIagp1X9X46wvEJFgpVZCKimzWlFGFC--UQz5gIN8Q0PSm37fP4cScuylOB-Eh1QF_aXipA2qnYgMtUSwBir4scQ8Tg4KDvoyN003d31lEQlKoIGcoDQ3m7YC4SoUmIL7HUjrzkHXL9k4LtlQAagQZMHz-2mg-tFQUxtNdYu59xXAJHuj8vziper4s8LgHcJjsigbKKshkQbAS_VC2E80uOO9-ghshiYLN1g6yTTzG8PVL0TIiUBikCdWMx8r4Ebxi9aOaGcxD7XS49tZXjk42uXBCkadZMz6MURpBttjhFM64ftEsVui8C5KkBg7Vicp7ftBc2l33udBbcKeA98Jv95XtYpYMhLfAFioa3hlqtKtp46dLNk1k7ra1rUnIwJmX0fu5DgJuKFF_uoY58Z0vw--Re1f4nHovI54mbcw8OlVoi0kbHLcZUteVqLFIxiMvIChLInqVz7nFI0Xb07OiXpzKJykSZMM9KoW0X-yf5DqlvjSCW9houhHhFAeslaEXmvweUHslpRx5nRpfB_vb2DJDWdKBz2HYo_35-AHmqlVimXPJ7R1fKk9yQfTOeRm9ISxbSy4h5H74hvhEjZRjARLkPIwUYNiHF726mV_jJ0VmNNq1e4R7KmtvQ7sJlVEJlKeCny6_2dZou6_iy4s2TJWbHQtvUtWLo413LYrwgbLdntdH9KfL2jba-POXSjojKvrfxYJ8I0Z6-u_PtfR4PPzDUespnmM36eIhq4WaBupjYn4pjDSKPeveepB4e29fWnCoR9STzN1IUR65m3uYYQwktznBb8WBu1iAWPjFRcxTNVrHuY5_lBoQDdjWCOYM_Ik1hn0JBFGVKKEgd5Ko4F66TA3FgI_u46Xlld7ek-z1lJiaPZTTgAc5QDU5KPuj1hhA04iZf12QEk7_0KDrWOq273gWdjrNJt_vWSqWbCHmj6TfOrRmPK3njdZvFdgwk-WVDZA4aoFUgNBrQOjJf2bo-7Xbe_NWmNp8ZO0UWZ7ZwVODi_eSBnSYrmh3Lxehi4snnKDH08-PDM0L0dj31mpo_zmWZ6e06w26wk_nO-9LwcaRuxi-O4awuSWAXQj05TSUVHbP90hYC76xJHgEMLtptVZOMwNnjgyzYGoPWNonVEq-b8a4YoPMONnCdhY5aSQ_bqDKDHTeafmJrn9Gl3YeYFgRyaB5zbmOPweH0h9OPUsgySGL9uNpH1IsQ2egintUskJGQ_Pzx4oGxKzBpDoeiXz5l2DlqftOWLMigFKrlXMyeiWX-AI47InB-0AgSOKxT6MhwYGf1EwRpYFgmXmYXwYrxrH-vrqd9nivCs22zd-TeusL-ULiddGc6KEqnKo7uLqoiQoRHKr42VZBRNUvghPvQMjxI14_Yuj6WqUdpEt_VMueoMT_QRfEUe-V2FEi7qdYTVLC8TmlKWHEMcclsWPjYQ5wW98Slf5bHEQNmXVKuLJ2WdeLPmmXfreN5b8VM0ka3wEJ_x-sLYtOisfebJQcT_0yqihLTpu8NZBx3H5hShCCEH2G1kXO2P_VcBkyooncF8w6Q3I9t8_0X2xl8BtgU4ulj9s1bxgKX2LnZl5Xb47O8jtvN7dOHaaDvyJY5WyIuTKPdd_pniOOkgn1I-LeaHukmD6ojIyqX2eID8HHxiawRp--rt2-6g4j-tIeServkAMkluiHHjiQHVGOu5yi4rBh1ujvgaRzJNb95DDQ6_1NOow3ZIdtwlAm2VnIwl9MW3-3-7iv3z_ixNLgXGSbmDjGMQgfm5ayRMXRXYRysRlposfgw2m19GVRU270Z-3AohK_dT5AlDAb3xknbW924MWTUFrNYFMQDtru09SS2IJ9sxKozaL07Pu73I80CDUcL2s1bjYiaBo1n3su7-J9i-67kxO1-ZCJtEtpcBJxaKeh_V7v8TTgsbW3jjNRwoR_VjRdHAbyqBqj60fcLiBOORPJtIKgVuICaTeS4M7sPRJk35pP539aelXn5fBqlzypLZTZu1nAnRLkGq83kLrA32KTXPk5INIf4cVtLczOHG2ZWKyidKov5dFI6h7NitplchjwPUIQYRLk-EYrHVbi9EfgTVkKoQ9OPT8tXVfXESUfO2T06nWVrO3ThEXDK2fEn3y3b15a_zXEY5utSiyZmNgHb3kOz0IOuEKBaEViXIzPXDwidx1aku_psxAPmatqru8x9nbOqMhHvX5H5he02hLcKj2yeQfwCMdE4jr7QHEYnlbcdcY9r5RZ8i1W9hJODP7Gsi-y39Oz5LP5mh8fQ2Celd8v80dPKDkFq1-lkJxzbjiRueiHlKBoqjclCGWX2tCO_5R5xMs4-Nsk920HPPs6yFibHYjqmP4XOL17dhKun3RcxYKW2RRD2vo1nYLVuo6E4lcekKKEj3MvxoXyBPb-bKKo19odoiQZ1qyF35ZDlfuBJpfLL_YPOleCJ8AGL8ITlAeKVgA8MNl-Jgf0Btm99AU91ez5Nwc20znV5cbwrjoXUcuZ_SbjPFP6KZHZrjVkxmqQRZMqw3oXZzqdRVohoA8m_TiewUK82RBhQTBD6zextSnyiJkveRi7njJ4Af90hh75RMmn8Y0BVZGmKwlyMRyYEdwiKCofAE-rTcoq2Wzw0_okpYflTwiBSoQEOZDmwxSNYtw7XTc9xAVg7-0_oSW4i7QnGFel57vD9b7GUubnoPYeqsrItnCtJkoBVBd64r46XyZxDq38x0MDlMqeHvrYoVSylzijhWp4h06lgWJTRSfoq_6hEQylhYUK0Y1ijN3PwVL_kSDFPK5CKpxiDbhgI1Br2YqIrcaNh0s5UW85hTMEBbnk4jWxoXm3eu9BNKJk7XhZY45k3bOzDWuXtVZCyRIU26S_UCmHDbK9YKFe6aJUnzErpzlEcA6783pwUIrad1HNlRW5tu5ZyNS9fvFWqQmG10UwAt3GfPpqtAlsxZOkrxzrPLzUYaqy-4LVOiv0IUa8nIBUWvy-SgpnZXoRMuvlB0bWofwqlng5z26OjQtfC_ASvBGF0jEi9SUBW1puvYMehMDQgR8j9KA_7zNK1mv8bSNUS11Fm7VTMWAS7bx8SXfCaNLZ00IvZ-CEqzp-hOzwStQb9WYEe5KGe-4V1pb9aCZz4CXjsljoRb5DZRrm0vzA7tT3YJ0bg4nXz5gqb2T8k3LIgL2fz623sbxAEAiME5d9dJzbZYyw_8RgraxMV_D7qn8pAU9CVoaj88CM4osA_2M_fgeLfB7zwVsCLIpJxLJdT_F26FstM3pDcjgUpdLK9qYTh3TTLaHBS34lwjbjRncXpwvE0m8mplFcLaiTJ0Ni5bhwhdHykvzolA_oEf4V0J99Qkp_TcOVGh1MrM-7R2cqgg9ZVb0nbqhAk7gpeUunzD-sV8mzpx120F--jlGTDNEuadhaozzp0cwRwVwgfK5UnWLonQ523YQrzHbIgWF8ctXKQ54VQD70DShHTNcLdLmI3yTYbToQ6bz6n-ScTE2PnUA0zqHWsJAm1mQDowQnHgM8KEJSCQ3CPDQ_2mNNHOnplWPkRSEngiN7Ab6zraI_7vT_g1JQUSVlhqMvl40ytcyJqqHx6BX_pRR76T0ckQ5XY_aiJw9sXRmh9sjVFA3kOwmS18k6awN8pR-jL6adzXX3MQvlaMtvz05nKYb7oOMqUk03pfNozB1sqmT_Hz9HTuiwq3uhY-cfUXIpLgw2-_C4qigZ48oNZZxbDismn_leHpSVgJ61yCwTVNRr5iwyK-mo4WUg2_9DbQ8pmxuhP5ODe1U0YGPiQkwNJ8mtbcCeRUQCzH2omSgmJUWGiwbwVqi2GKSiB30LqScUSYeZ95xPmtSjnrn57mG9iV1zIUjjsJKY-stni7Gcz7bHTpe55tB4ZMxNyL5XAnLk5aGZDYUoJ7tSoZyXjmsnjxLSmPdBCykEOVBi8QmuCXoAVliAl2i15EOiYSKP808I-fCNsFEqXsj8G8YpVleUuKtesK52VWlHRwuj0WrVWddihLHtIkT0etMc7B9BLqp6BK1gj-QHz9zQOGLfzUOhKJqh-CcYcUAYQyoYfUYhTaDDf6mUS2Fn8MXmSKzETIydOdtcSuBl5TuVjof3v9yIEGeLyPIegCdKuq0355iYJFXY4nZ7vTnAdJNVuqDzHjbhOdc8cq3q5TONMbTMsBg-0gKD9es23uJ69uSTuFoZPOB54LRu-VutotVV_hrbjB7TvLj4JhSQ523G4qFv9izZUF00jIyVcyFPi2E-LnldHp_0CPXZNCjkidIgxPUDVIisUWQDr2-Fx2SXcXwJ-zXxD5rDUFYaX_5NrMv2fbt8n1vUgfgBINESpD9Wwie4VG2sSOKP3FhBdClgFAun9tNLp2mvPdnBrA7dKEGe_KtQ-12F4epJQuNDCxEUDu13yELM8CQ32SX19d0OHx5OHt8DKEAuOi5PnduqF0sI-lACbu3eTImmO39Tqqh_fKyPNowaPdpUPof3KW9AbpN2jMl2Kn2Ibh0bijwcUREn8ytPtbHak8-AaU0ptgZZWImn-_b6pQxjuKAuJmhbiKEOJphIO_T1BwSlmCiFE8G_bnHbsoKCEydHcj12Sllr8Y5B7cwXtmtn1WuA237geu7e97LxXJzDimSnhXsGLHAsTjh-fA43A18p88XsSTVSG5qdMKFF9xOWjZuPeVtZ7brqctQXJiOqBjYliEUpj4kcofuEOuF0I_DBsbcDu4tgyk5KgEnPNZ9RziUluVgoeuoCzJaQcL2NTFipOTfMVqP2cCaRzkS1kSbfl236lQF-p7ihdW8t9VtQPfDg-KIQ6peZhssUzgfuVpcV2BQ7A7eVe0mQErof3nk5j6wSeCbK06q0vWfFz0FS0zhI3SXNPrd_FXkivKPJFQBxwI4bvrl81Is6ctQmWegrlQwe_j0I5YZJE2aQxHgtoqx54QtyP41uzjHf8-YFkA9U2ha6FqSvHu2zoA8Gn_bm_T_yeQfMAqS_BP-VOi2-B7I91UtMMMPmE8ZACrjZCYREuSqUkL-T6y3I3YK77Qz-xGUD0FMTKGNHJAecWS-NKZCnR1D3gHQ5-lpJ8Qh4-YiXbYX9n8RTgRAzInb_gWnWqOKP_yQi7yn9UiFl5vKTIE8Pd1qevjcKwBEKLeilE5Il4YX1zFeUhj06-3c2ArDqv9HDPgCQi5c8RxPrpTi5fk1efkaLhgvumWhgx-swoV1E6vJ2m2NFuPIVU9cezuhdr-hpQRnykUQS5fKFD6s-l_UI3u7P0D9MLpqO5m6-nzF2S_myeAzXPLALw5S65w6aIISaMr5pFq432BuxUEzCXF-xuN3-iL63lostQYOqraWqvoZr1V3hPETApB3huPLucjhD5MNiODxiowPuu5Ik0xN45vVAnjQUSaihkKuz9zLBSJs_LFpjWCGr8Ei4yIATCJrUe97Spt32GQb6AXj5kevy8a6S3TDoprzf4zSyxwOoG0iyzdlt5_285oLH-PpqWfBnkq4IzqutMUe8tr7J0DKVBfxRxMx40-5E-pA4N7q1bfiMFi-LHbMfH3gDkGo6_3dL5FwY0wK3IYkiHIYmoZDuepGfG1gwP4Q5O3IXc9bMRiNZUmj6YBpVCogkh4XK4CowFrOe0khul1mZqe3vOb-6nPoMWl1r7M3yaM0H5AonADFQT_Otix9P6L2hEwQ7fGik7D2byAXXUkFz3-PhGydAKs-h8eBMA8chQ7FgGsLYaU_X_VCfG-vlwMB4k3PVQz6S3XoPcEAtVg2ZC-eIctyOSBSmlxr8OykZrOw7-cUoV_-aEdeQxsCuD5eXiPP-q3tFT6VtoJyMNeX7ssCroTi_mOqlBtpLGYAuaFodjOKG4Q7sz0UjMI6axWoFZ1K17Yv-MqTeIQSlbpXHGBPK1ndetODitYsSEjlEbPSSTseSyk4G3zYKxyYd_3J__YJTCVc1n6YJdvUZDlguuhApQA-XcuJSmBXogTADg_mFpYn-sA2oyyVsOzt-rKVtcYUbp4t4A8aY_eaSjLTDwcoGnIxwqMpW1M5eBoL6d79WB44nCQhWpP0L_C6uHbuMoWZ3fi2i2Nvmo8aVVdE-9oB4pmXHqr_nU_yiwLkhxpiUptjXkaMAcdPKtnscaWV7iApl9-WakIcXiFBHNGW3HhKUlrwZFFOvsSqZAFKElw6hlFZ1YQjWvn5fZkPvirG1Oj-ipJL_rikNQb8jW3cv-qYaliRB-22q1IXMGmmfVDtB8H4xYBDDbcHaDe_fTpF-22h6I83jFPLUkgODEspozXWkJicabSK3h2Se_GkYAnqS5WQbe5--qcXWexNGqKEc0quBGGoOhQ5isqU-Pk6EkiTRCME4mXgKXuszeqcw3lvMB8FFxo70PngRIQEkIaV013cmCEimAnUNtsRN3_uMoIL0X9gUG9wOYWH0E2k2Y-azczCz8ZgDfkCLKU7a__btAPktJHSpjVufjpzaXV-QFvfI7cqVO3aC_rIlqZTxc6T43zwtM5KnsKTe4f2g2IU2ZUPrFuV8j6kWEJhyPYDOfdUKXMT17G7jmBQwT7B6ZseZBXdVeTV-m066bGxIQTCY0zaA4VIJsiiFlu6aP2W_RnPv-d3ere02mueVu1PsChVfmB65ZHkGYbkjtWtfWiSU3ydUOteIxpEK87Z1knJ_eaX25mSITyIK02NM66m0v2Ta3lSv3mpsRzyUefdV630t8bzYUflWJ2WuQuJskmyy04-YAMn-mSdaBcQa1MgcsW-3RnsKUOgOaIZgGcJ-u1C2Q0vppisj_55cNroUDcbmvRD49AOwSLKtwiiRSsmn2FlLTFCnE6R_8riblvA6p3k8-YcT6r_yfsm4VdokApp3QTqW_Vzrta3VtlyBxBzj0GebEVGLbtwlivSo6SmltxnPHS5b8AihBe8_xWRfxmDLs8zv62efssg4oBYjkaD0Hly9ab9xZXXSMqkc2d_PH_genL5B6-9XjA_bH7YLjGXKRpBrAr0ngigBROntAPFTQc9BqJdXCwnR9t3V29nHC_3GjNMPmoy4MnH1zwOy2QAr50OGaPrHJ6wmFnXY3IELsCzY9nunSWRNX-nE2gydhQsFiIBnYCSzbbnvQRyHEA-wUlDWCp8El3B2i0nYlsEU-aW7hDBRtUl7ZZ2kbIYXUIwd7L6ipz3ENfmClOliynqd5Wsc919BJ81UhsOJSRLlCG82Z59EqlLdAA-sP2nwdEXuJCDvazvx13eFAI-aWMmrIAIPtSdAsBSBPNQo5qcqbO4h29wFiE-A8gAQPPs3rAy0xR91Gh4wAo7FpzirvXSxTKAWd2n49WOrhHtzv6d2phVWqsEvId8DxoUM56GLex6Pc2djWfhr5Oc88S269HERbiqol8aex2YgVL9TaM56EfPkFdmhPr9KURq7IifBF9VxferWrpMvd7fk10fI4j5A1amBwinE3vC_jesB08C1IhCXm_xqUW-dm-2Bk9a6MlTFhe9W83yuf6d87FqQGbL3mzkE6lMRbhOxR9yh3PPIJs6PwTJ5XeLkImj5rht9HVxraO3W9AnSU2-j6XLmIL0XgAwubvx2mx160DnfiwUdMKGfBfwYDAk-IpD7iOj6DR3PvqJRxYHBTfZsPTltFiQyQ0enrJPc8wnxgpT_cHjKSeKO3cxBWqGEOFbzyLHl_LgR9DinpRJRXTq-1klAs0C_DbodlYRoendxS74uyaH8HpNA5Mo77Lv-oFh0I-EWVcW_4VVl0C917ERO1veeJgLHEk2kelViz9V6hEA4lm7W9UFk1uSiV9r9GhjLYLCrhapnv9gt-573XTzPGZV2KeGnU8JJu2cLA88l2xeanIGObRd087IQL4mxlk7FiYtvCZV1xFRlaLtM0JLlM39nddH9JoTmsnPc2bZDJNdjZZb6lLGf-o6JN_EGQ2cxJzesVGfAvSpoE7fEEIdjxT3zRPUglnp7Y7-KKXGSKKEckoVPUVHr2ZlFt02JFNb7uR_G7lyYR0Ur4MZsquYsv-h3ZHPe1wWP-uvJBeMX_H8HG3MX6KmJ9K5M8wkA_opXJcdTQVfV6dBHCKwp7kTyfJlOjIgpQgRqWBjK28MG-ebuYI9E9KSFkBlmIcddqqYhwEYrOgg8Glo5Bs2XA9m2qK4fqmXzewb9gR2mg1zx6l4vTcQ1ZDw_Qwr4L4tFlo2SQm9CBrFPBmdXuBU-6vXgX9o-S8LY5xjSzhkqW1RUubTCga4oUlkmNGwrSICEXjZKymrO9KXeODM7m96UlLhNO8mXIm4LnuLVbwQYP0aQOdK5-mjEzRWgiwWLx_Qj3GTIWkAxebNQoNxiXJlieQp5dMsKSc2HW2Bkz2__GGrcMz_awpK95uiT6CIvnL5nwhV6o1hCwS-W-pBcTYZunY9cbcd3WauSK1Sm-C55oFHLrCnMeF8g-5DD3SsW_EqqBkXtlzIdAVT5dPP7huKJGnJQNu4s2APUkrcuey9nakoLlRxEu_SkKFyzv9SXJqppj4hHEhNxw3Pehtrg1UM_8OPnVWjrOQld49bvg1BCpjYOmFH7Ql_1OCLN3aKSa2Y7EUjn3u0j5QlNeabvM7WxvkBISCZAjpqDp92UTKAHnFX4lsACt76oZh8ReiF4R6SRJ4njhzpmFj95AGoybwqrDJcNKWA8rlHtWSQB2xZhY_zsbaeN8JTxO2ggZ_qsZNlzpbihKgoYgYuOcP3LIJrdwEoaFO5z4XPx3zrnUG0YROisIB-XNh3xCoq4qRGRH8feaw665rK_0LAANXsQe11vUu7wnvc2tjuRicu40y-Tx_QuR1UF-KJ4qlkzM632dd3RDKioiNssosuGC7b1wmMpsJsjQ7uJhTcdXeqgAMjPy3FlxuGKxChS3OZPCKtacmsuw0pc6WREZoSFVXsV4UG4YxTwlyGonwriEbpuy4jlP0_opaicEP9tGK5_MXrRxbs3TBFNQu0sV1PQ4-VZ_-RWfIbQL4lRRD4yG5C0zw1BvgKMI62Ki28AvhPJ9N7-T0nDLgB7px11FRiH6CHvYYft1gzPNRGhJ6WD-BD4mfyW_SbKhYj0Vb0T9PfIMG30oDkGdPKnP2sMEv_YzV2ARzbzOHAnW-VtWaA5b62C7REoiljWhm2tMAvmlSE6GacSX99BXRWYhbeSsOYgTMT0zAuZ3S7_YGh17k0kvp5V7pw7s57GWTEriaD4CN8qBxNFwQ9Cxotfh6_ICNbmZI7eDa8aTV1Ss9E1RUGYHBNacbtnQ2Ema57bW-Lqss1QWIUxHh1xrWeZnMnPT9-HlPeCAnVRQF47GI94TVO60DKegvKooAboWj7li0yeRAgBzMoPY_eO2ZVRBuwpHIGyCL30puX3RQX7v4e3E2QiwKRsjQ0hYTkRYprZ-p-yDxF1OqS2DJCm2BrjpP_hVWwnzTzyDeBDGa6VF-yWXqL1n6q8vgKrPmu90ON-GJvDkAWH0HrUdkpj46cKtYhiE9uBG_q1w5KL1FAyM6mtRJE_NwaplDgn3ixri_T-pNYXHZ-hSZYdF1WFC0tPp6jpSxE0AHsSRmWP0ompy-GWuvhbbTiieW8VJTrMgUX8gjawz8NlZBikgiaZSqYog0H5ZcLIjzAwCOv-zbYqILa6iEGoZWe0cJkTIOXsaSQUJ0ndSrfcx24fz9XrGm9wucBqNAQuDv8OWDn-tgVM4RKltaQWHdAjGyT-v82fzdcYFf5_kx7sGa9I0-jlfSxJ65kQgrJY1imCRHRWJ7wFYL5dXDjCzpM4mCLv1QzexQp3lsTDTbcwKTRwkuyqBgiNLC4FQjq_IlUMgeEpI9AY5PBsx1Gt7aVdzPOWRSEFeN0XUfTo-TqowjWECTIVsG7HzSREK1Wbnzgky_6jeyXJbZ_I2aKNlnYlM3b-NfnT5-QZW1yxLVZOl4GtxOczUj7tUn1hR2vZU_0cGxMf0YgrZWSiuRXD7Ywv1jgWGUNU9ihmGaZxd8MTqAOlwdjh8x3OuPfJaatj7AqE1vQpqcqD2WBl-T6FIm0lPHsqlyQAdRYvKgUMxhpHUmE1PgA20gwXnbW4tpCBQsOGv4Yyp1TxvfAdAzUYofR7gzhnzocb79V1xZYTcpMrSsWFnqE1ZcjtiveLpA6f5LzCqLcPUdDiUIxlq13IOpIXcJ6kq-_D66cvlEBVcgEJkSZ98pi1cThWt8VFCtGwTWv87CpUbaVbIaXmzk8HbARuF7dCN1Uv4SzxH8VywhUnN8lLjF8Pgu36iq-3VyHC71Nq5mAjbAxrWNeFZJQxKUu3RI3qAkdn9U7brj6mGm_aIQVbE9w4jL0XqTt1yagA4XDy0T9t6yXwXoGwWT8Oq98finw0NKyLsRM-Y6DjZHtVfA5Up111XQGBOGVej64DxE-uL5PCD5kepa_HbbAwXwJtLMk1XahkxH8SDcPEWYkEnKS0832hCxyNDBS2wi1vhBnp8YDnV6wD2AiccC05DBSdF5EIuwzPcP-oxyhItxWk5nglhL9QGQ8NL7DELntvRmlP8DF_kFifg0MaG07lOdPKuvpA31A2zv-McVESK3BnPJE5yAehbVwvjpH5r2KlzUTwg8brxeWgIfElIlMxwqpYvyxMjsVNdC71xqsukU5wi4hPCsI2o4-LZyXVc9iiu3lyYFfzAzLQaNOJu2JlOvx4fa5BXEdAR1VanqpZ17DFeEMj5__gBAgXESpo1ij83l191LAwvJdmOKbxDnnMrStrmAfWYuHCciKyu5315sUac3I7xuOl9LvXHbxkyDaZ9yu3o_pwKVdEZwWYhRFHaAVfJ0_NEDqJynaPr0ju4g5ukGtaMF7XB5dkYNIdofWTrdI2wqMSUD9trli83Da9jnQdfiwyy30pNMud3VbbWrRDIk6rhNnXSJ7fT9n92sdp3QjDozrw6o7-ekOQBfWX8lihQ2tKjaUvK7MlAs10EYHJLAexxeooNniYQXHgj5Mkid9GLsTbz0p2JHyZwl3VyX656UY2uSiHLQWyVCY8cTugvp3WH3csgiPRMJni84sEibjXtn1FEpAHZjp8Qf5_42p2zeFR2rp00KA26SLNVfB0LnF-bqj2yrssdLQQGYKZdnluIcrBt3sgD_xPRjNS6vnqjiEXGV6ykKVh5IyUD8X8DUA79Yy_lzLX1GeOF-ToMujtwsNoeZu5TbRF5FJ26XZ9P59leizMyt-0qxfO3rsrgA6koLXkQ-GM03jVAxkJJPBSTQ6hRRqhMja9rc8Uxs138ayLwhwZvKKqRhBocKWad7XDlyCrVh0cmc28d4grAzwvCjXZGe7ZQwdnlUvLWOSMORgx-qSAfbD7gpmRvmAAtxyRNtrLJAQYUo02aWJErnxa-jYjWz8Fvw2zqoCxls7EyMtdqAOVb4yUfSYJs8cYHET9r-X7ZfTT545qQMa0GmG5KKSIK3QEcebb9dniKcoBZxLIkjHvK7Xx_OWdSjLuaxPd8j_p5pu5ZRIYHYVEsNaRXYzxmLTH8gu5HglAwBKXt3-ctw4bw_OPkc8UmVJbg2dtEAV-nmMykhpXBC8k82dULgAO5g3OWRTFD-CwX3BKQOBe0tjpkXOahyFUSCv07wTCgI_9D8rODmvz4ziQfSUMs2IJhW03YnkqYKqmdKELVodGQ5_aV9D9g8qL1La22VnTgFUoy3UZeSELFe3rvcHw_Sp1-Ohev6JJf2QAaEJWC2EUwrb54PNfSyC4B4NBEUBl48p637F0LXxevyH9Qtq-Q7Phu8aJRJK3iUMN0EIjpV83EvPjjlMt1iR8uUu9W3Da4Cod4CB5zzxDzUJ6xfskyqbqN70mSHEkxhsf1YByKxHnDYSi4FP1aiOyub4uAtnIEohrRbfgS7knw9NPSpwjZnQiB3ETds35DKc8gwD6laIiBX9Fb0tnBinJXT7b6rgXqd1Ceo27a67Ua86NReMrxAKGox4l1giwoF8WLvqddpvAOGmTIu3DqPVyWlGEin9EAxwOdh2TfIg3uHqZiGVkpAVwMEV2Jkkj-a6dQAyZ_C-N8kYdadDgvR3OZdOs92EAtZi-XPLosXgyWPwSLY0Hr95vXlqEfWb1X9RvmUlHCFRsUDG4LN9Zx9ctEsKx-LGEYLjJBCow7Yo_nw69nF20SPD9qpXC3kP-2OHvcAp2uF6q5twpoy34Dp0pgsTLBlpoJBn8ACxrAgowViB4nLa5kqkp7jBBNObCCZk9VMUfWMTcWHaTSU3vvw3Kx6U4rssGW2eT1-E1eKcdbR0Vkqlhrt4RIEd5aGaz1eX2X1gfF076gq3h5wsP9QUaEDzP2hllpcdXacWDdCOqJ6390EY2me3SyWDZCnCu8W1CyUXf1VbbmqKGXDcXpfdOQHmeUp0KvbjFBd6hTWKvmOPbOiDROfvBiPrr6IlIehBtWTU4c_91C7FsFJ6X0t_xRSFkgs95-vI0rTe7AjuFB6-SpQ5Ia8MTnyJoAcs0voB4hrqm3M3Tsgv-mo_6q2D80pLuM_-C8ajlkC2lMr_-0ZYixo8Rl_lPqpMFInH6bNc3OD_1fmyLCUqE84RZ6p6lOwANJ9Py3_znGJlbPmccloHJCd1LzD1RaroiLh8uYVolwH70J7rxzgC1cxDShY-6EE91sZ_wsQxV-0PFEntdnrWG_ytP-QOeMb7uzQ-Tob9OFLYJvZWalYkPcx6EhrhMS6637NoQUgZP4HBntGdBYUhn4249n-RC6AkUhFmnDFtfqX7KFQOoHHSw9K4yRbQxApSQ8Jpr3p2zlvpaogu9WNpmK0ZNATm0SzBS8Yh-zFuhT7XmiMkLP15y-qvs2b6dUNVqSinXXRLsRYjxnJi5dAhEIxF7BFnGObnzugfvLijDsDo6sAEuPj5Nhgi8sII7htE-YR00vpB2wsaW1Q0blrXpCLRaofXS4VbzBj522hJI0GwKw_4asHeLzOA4Da4Bl4IWfrMGvWYA9V15izk9k_m--o9W-d9sHE151XyY6l7tB47pz2qRnRI7YRP3tW8NEUmkzcKc2F28H_m71NiH7wgUtrLqckV2M65RI-eQFyUrpmMlMyGwyu40eFL5BJS8caJWtgMpr5uC96r1Q4QxVVx0Wleplj1R5h1uDTy_oUTTwcRSLsu-tVgxFOLdCiUosnrpotw_6H2BRpDFiMYvLBSECtPcGPS84Mth8pRB6SWWvVnR6wpRIR0mp4Mgh5gQU6GlFTc00JkK70aa9SbOooPquNhqmZY2ZiuN5DfFlN13GNG9QXu4miyvv8ANFku3f24YP3z99elfhVcFFWnfEhihkoE9f1Q2gPS2CQoqsMrgIIxPqD1vdkaXX_oCeKyXpbfB8SmP9jLxQ7obkWC0SQYagcSRd5sI23wmUbc9GjNdnQqRw2ogijizjA-lSanPFcW2LSsC7ZWugQzrgtxPg_U1b5t4_q-xAzbX-Ub0PoHCYynoBpbvfzp2ALUaxou5PfNpUbbMJEFY569E9E80MmWB3yYkFXJQjxzMlhhZ9YVj2qcXmfaeiPD2KzKpa0ysBys_dZp7CciF_FTY4XAHuKnkPVfHrcrMrPqSDNtVdmGhqW7zEbkC5oYGUYIGK2YKVqLe_PuPVbBjvM8oz-zEnKpETIEIdjOCb7qAnfgeK97ofRpYFFVGFJYPNLd1h5_Olzm62cwoZGNnE6yIka2snXA19kbcIOJr52etuudqC9o64vZ6UDBh53zkFnJXnEM-0uhOLaUFlmPWgjt8ov9LcdLaz-Pre7zUPIDQz0DHSgBMMWYHo_XPP6qId_nkfZFukvdz4snGz_cuJDf-BdS3Rdcw458sZVAHm8y1ISHD0E5ncxFwsBWw_b3c_aQ6J4nofOZJT2xfv1ap1vfK_4Bv8MWvS1ySgJeRkyqhboA9CT2C2-kJmDfVaz_CszKXtXQzwOJ-yH87HBDWiFj9DfMhqOVsTXQpz88uxVZF3RJ8zS8hDdP5nXuDd4eIBuBXTMM452SGDP3q26Q0yO3ZAXBGuxGUagK7Oo-eDTXoq479IqC_GUpcGq7e3hvbLB73uhwsfw-tsIPcT3gJGcHnrePbdb2S-lH4Lb_8KrzkYGYdU6c20eJRCSvM0XN6R7hRs0y-Zpu56nAvOexaJRWSYeC8BmTgI3_N7JbqE8K8-uQEPkUMnhWUe_9IFat6avk1AJi0-nCL81rP06kbgdpXoOiE0fkmQCi_e7AsvuU31A8IPyz3I3irVm6DGY-vTcDm46rAfBxn7zk66rUds7gVvnUdxbcT1jxhCXXlYksIavlPfOfgwZCdPEVqLFGK3-DvKrrs37Qw-WR78fMtdEg9tx3zSPHmC2wJX5Mz2qevDODttCqZF4MR_5rH3f5pjcTPTlSX2eZh1gdXpKEyqU0x-IIHH_lsNMMmywhLJlAkwGMZfbCtIl5OCPRNDelPiH-MkiYmjRy9OVrm2pk7LTw-TyHBbnCWuo2-8LsyUILzM6jg_s2buagLFQjGtaMCOE-mjmCACznuOG6XSatmHJrWngWeJ5LEMIAO7qrNLQApAthzeVfJs5TVElglSWFUvV5T3Vx0hwhWtezLdpGwMpfmW1AX0pXrhCFNuHNpPt8Y7_nV-z-Be3r6WuG0-UQ2Vx0A23PUGHcPTand39q4_xTJWjA4bKxdvkCpVKc1L9T5W-GyHRzqntquAqt_wZPJtxfnZrfpiPsaNNeqe2ycRAoNwNfK81mPC4IBwK5WNz9L-PN9YQ6aO9274Zh6zAi4VvXwPqa0Kt3VIVjxdY8vJp1myHMFf-ydd0dXUS4BNWpPmcVnE-5gsLW6SDZAF0KKm5ybG4cci5rwlWViS4i3L12bQqpYQ7Ow6L60-13u3hXEzgiPfP8VEnpwSxs8mOvubYfKP6ZeRIOuWBb21I7wl--Tu91Kl6ueuUiR07iW00aKWYApqffHT3RIY_eLs6_Q38X2LNuoKa14cX-qu1CQnuMtynDg33Ni0lODBqJAEi3BVbxrPTqMNHJoRZLZxLrNDrDDIawQpZBp_sPwkxrjTHPeuW9brcCC-G2WMCLW41MLyfnx2AxjHtHQzZQ6KdjBDJsgVtnoHgt_zZl1QiQjJOe9v6kBJOur_iKAYnpFHybP9RJCSCvm4s_iF6DGDTdwfOQfs693Ie2dvC0dqU6-mPu1sUSoXwPCOy2609GyAQWix2soJ--hD-L8odZCOAe6Cvly-TRuQ9VHKCJ_bYyhf1WnjbD3DqXgAB43gChhBaxEK4_W9QLBiY1Dtg9-aIpQJWGTzkFy7F0_rmcB005IFkOkO84PqKnZk6nm02U7dER_UQTqVcvQd68PFGQ4S-dPOq36VI8vjqQPJ2jWuWTnrAY_5oA5qBvL4qskFmuoNVV24pu9ICEkmdo2RHsfe3-pfMwmvBpsdng3GRES77y7P2H4tvcPuatkWTLm9hASHDKouHXV8dgRu925GcyT9cs962TKADkbcwRFWvZ-3z_bd5je-2ZBYnOkGnsCUQ3X6JlmpmPMdYiW38KqZUsiq49XGD632ysGcDuGOayKr7CqSobGcXJTmBRVE3X8EgucqdZjGIyUdOBYjbJ3MdMpT3phDxxnwL2IVtbknTzQwfe6UCkqJXmlu0sBPkznfXxFPnH1OuhxG0VgPRLKqn8xCTb--R1iWUiiuTbDfmnPT1tBvee5fKa1CUiQn-jvjQL-jqc4TJd81z8X5S4FHkDVd67tZ5PTYnROAsGoAY4YSU4fP2zKiJStJPgDrzGAh_c85SJaglm4qupAvg_SsukioXxjsaU0iYh-gyAWRDSEFtqOBsyZh3QQASlVCjLOXUkVz4G54_AxKmg1GRJErk82e3gmY3B5Jo6bYrL35P5UscAeTKXdsV2Qe1ahR9W4WT-qcsL2MxBU9Df_JAUIYD1ChVaODcEe1ERMSBZ4SdCn2CmYyZh3XL18JCpQompSCZHv2Frt_VLKflv82C4ZhlLD2lgmAfUvqHEH2wg6nRi8cf6hriotAOZ0vP2wpah2gY9ZiZsG0Fo68v0EzToTdABG8xxtgVsYscgJ-w65o2lblZ13sJd_FFeqtw1GEbcFuOAYaecMUZmyCL1aEgup4ZgY6pl0W_7GdXVlhhgOVoRXLsC_Hx8ZCFsRuIGkQr1VYeJuiblfMK4JMydum9cCHM-MpUHhusoJR-UUbmO2_fMQWVnXr90TfEoK2zTu-9BhdKB0p2aSJ46bW2r9s_4zT9YtbOjptqpK7tsa2VEktMr_Q0WWys4y9acl6hNI54k_SDoCXDimO7bRWQ8tyAnrKjSBzmA15vO60WO8afr1IGuFiy5I3HyZ8bM-zCL6XFxfTCOYiTKZk8MTDpAm6jhDnr2dmhIT8LnEGRXMPpCp81YBUP4vndVh1nrKfcs7Ro8BHlef9f1nqvpJnOq99h3Z84us9MHo8AlJEj0w9nrLOEQjBTbtxYVAd_KpWgAK4KjCYgvr7VvBlnuDV51hiRLFJoFDdcT9ytEtAu1C6liFIOiX-29hycNiaHUB6yRLuCMObmLTpWGfE8cmtsYoGq3DTCzIAqBKLLbiOIoMiajRAK6cq9fI3SXz7p5K_XarCvH6_cvkSS7kNn6dp6s1hC-LAhG3oOmmThyNADn8LQCUkb7wl7kNaJIkdNDHZ-f14GGkeO2W2PLg1R3nf1nE2pB_X5SzbE0Ib4ElH2VBkvRRYUgSo7ZoCHfdzpVJYlBGLsYgO0-1OQPG1Xmcz3d8FV7ClP9BYtS5PaqAMKQuDrujAl8SQ0M_PNqWhSZdaFH0VEqQLXeJP0EaY9Um3w7mzxUoURWo3ItWaVCgsa1a6kGj6RPJtDdmVtEyB9HbGDt8wgY1Athy3Gppt6VA9jO2S-bUakH85I7Godrc8072YSOXbpzQHe4-cJLlekLCybJ_slVqj_Qj1w3HApTMt_JlF1NXjGZQWShTTAqoHV2EwJwsYZoQWN_TAmybG9aN7J7zM3rd7I96ZZg9XmVtQy0y3ipEfzO3dKeCeN20tLtIcssEYbAw74EdSXyhV2k1s4EAv257HiW6NdgMZPiShfQqxwiVoNh5-K-EqiOXywACS0poLHXAfhcqgL9vjILsJhyGcH3y42m2b67BXtbUw1jjmVGWLEse7OZw-JPhnmAzvXRfXIuj3-YontnJvmPmUCcUN27KC-cl0yXX7BWnrx2ebmzTwIn-KDyvOs5TMK5DoqjSAHXIQig0SxW99idmrVzASbrRAJfI-D3RFw5wrr-VTxy6sVcTQ9jXuodQYyLntu8urqsekP6R2gQuoV0-DLoPBuYxqrchseGu6ojYw5D4kCfF_IvBruw-lZLZr7rKoPgd6jTSLvDDedIQLWioNiD75n0tftvCGn19cW5XP552rNencdBtxMOTAENg9UgOUyBJl6CwFTzzznqWRjHHBhF--SlP7VNMTeexUqz-tcIkQSmPLjq0ZnogePeiAJv1zWht7gmi4zw5iQI6T2qrNbyytw8TxpQiFRtWavLqdN_fPQT4b0OPYk_DrwWV_OAyT60z86IHWCx363Td4K2vRCqcmDA1hOiPkITFctU5-1RGVYGkfDuRGCYSi2r888A4wgi_xFbEqVUJM343f7Nz9_iZuww-3HaaaGTO-JVMMQPngT3fpW4p5UERHaT2_nu6zfHKQdXjOMbsJEEX9Ces5fXRXm7kWlqPS6VKkEI6-lZGm9OibshOtRanvBAzrJ-_87PvK2BX_XtXhJ7OLHUBSjxfJzTQNm28nBtVxtNRDcPuMRfJ4WqqRG6QLR1jDpYw4mmZfCdL1A8nm9BZuJHJT9Uh4MXPAU3iXGLXv14Go4PqZEuBUM0S_bXTyt9CfTGHJqtiCX3g6aYPh6rfll1HoaH6ay5FZ5ZTDc5m2Ek229aNP97ZOKzkOIbLGYtV9E0u1Pj75n5EMgtDOPrymz7cwRbWdBKPfVI2dS5bN8O_muh-V9QV-YOjD6zrqfLX-ioKaBjNnsLjLgqcUccxeZlFRHSNNEwvAoL6QWcTmfQ8TOg3PAfDpk0CbYhx_4hO18xsnh6m8h8TprQx8l8nqlKlc4uX_FKhVlW_q_gGyGza82oYab9wD2Y0E19nDRRySm1LgImjt7FU2nSNRVO7Sqo4UP_5T3Wkgk6ranyM64j8VUu--eFwdxvoJxRqD6DpeefXiKUdRXJxFOjL_v1nXc03T08WJs_6RpDp-b8BroXOQuBNFsOXqgrUfiqXn5QTkT5Ylb_uTfL3lNeYNniH4C-pl880ZUNPxzf1EfKmufEB1B8ReSnHMAH2cQrk7qA9pbcI25nDVPpwwnRWd7XOPWdB7X1GZ675LgxdWD3cgfhOusWBSf5puuAuflbye61lo8JWGhnYmJjK6P1CyIF5bHTItiv2heUxvVXC8MgKX_M1oYCbleG6eEwBVIoKGX6RAImjlba7DPVxizcoYkUTZWkk-XMNjEXEIC0qjGsP5EvRcfWvrqnxyLi3ggKZl_WDZgdOy0p48zKwxz7a0hwAqQQmsOLd-9Q_yPQKIjKGF9v4VLFXsWnnX3lAcwAn2VlDFk6LJaGq096HYjr6PldC452vjr3oHPmmk1faa7WWklK1IAf-gWMd6xxLkDhF1Sx1PNegpgTEh57AoJehZprSq9f9tnMT2EmYEhs6sT7dS0xRtcMfOgtwWHy_hduopjjnbAg0cV6j7PaHogKvZzQp1YXboGUAV1iSmSwrYWO5lMnyauEHUSCx-p4B2Y96EDHCGiBqfbrPh8IDbNIIQKKramZ3yt7zI8n3DjVMenisc6Ke_-AYSXRUjx2JSTh01jA94VMxYyzZHMldujFOVb4f7zLf63mxqso-Touh-z32iS4CBnBoueks-wL74VOd4hKDRWyQnCGZVuSyv49GgEV1ePc7sfIJp65fHM5ixySREqNXJzSNI41ERcgurqUnHq9oU8vYgb9AJcURk5lPVajGt-c5GQdVc7BoLLZzueFHucpJ97q-HMrbzCQF2YkNtN1UlBdGBhVKwaZyyl6r83HJ8UadiMidMf2MinowtceEYoQPJeYyXQ6SiG86qfjZZNPsCEdwUw3iqb4FfpGhrGrsQJsgq0VUPg6eejxkDrLrBblUNN5iofZFAprNXZ0RT47cqeZ-kz-k2y4H9171Kd1B1JJCNErKqxdHfAO5u_MGwzUV0cVfouFjdUyaoAf72B36NfAWT4xP0-F0_yFjr3rrtTBJ9tEMtDG8q50vVEbddX-uU8iY6H0D3KCVKU5zTK3Y5S5uHRPZ1iFNXsyXqHtMPO84xHFreSIDQAzW5WCENenfp9C3A9oQhDNBJb0KmPAIVcbY5mOMcGtBUQQOiSl1-0q6_Mph0Dhg2FpyEf1JF-fuuUCieaOobNbQVAdZNX6ukK3bTYdkb9k2Va8USuXdFkEtpdxGbRzUofaVI4SB5jGfWILZm0RiHflVuoaPhPRoF_79rtFgZqg6owACM2SI3yjeTJbJonwdpYHSihb9xdB8WZ8NM4wd3PPgTfLNUIzwn1hjslRtE5j4PHBKl69ZT7eDFcHaWzCkEKiZBO1MJiQJkykIh2wKI6Y-eRQj0874N-vq67VdU2l3dY12cHpLJpRQtZz1HzsYHZ4ycL4wGdSaA-qjzYrQV5aODwzwoPjvaJgrOKGqr4KMDArwxLEeWYB9CDcVNy-6mE4WXelSnFN2odj4qR4dh06S2EcgWXk8-w70-zxMKB4iw-jaRTMCEu_Yk7k825mGF37f2syvVsdL1AHGvqykN4hsaZSfanFcY5wUy0m1Y0mH2y2TgqXbOSH_CWxJ28UAImbvEwNfYxd1OG-DVW-JnOddx-XUWwkf9wlzoqBlICrttUHGE7STo5NqVDmcUpxTUFIU8ajHgueq-dbIPEqBFdWI5Q0cjiXqadF-bR5j4i7a-zAtRjnO4wNVo3fx5od1jfFBahSd8-zfQ2uSUQF26bCemQNGrOVkLE_G795I2AO6Y1fxTcauGw2lZtuawW7CJia8kZG6kLYSnv0cNPDnbSz7YXYl-iXIwOjjJO9KxKiiU-tPuiJXEeJGZuexSJVK7biei2MQWr5rwC8u0pmCAfUgTr11a7av4bL0_b38fxCubj3VcnrF88UjfZOYGB7RzAvNYlgDt1GSNLFpwep1d2QMVNoRmeX_i2HI-Q9HNRIA2rOrZaxTpTRQ7POuV2oS3OsHMOseZlMrz0dwZ9llddcpYUNuvW3TeYV_DNP-egqE9kKBa4Yx5xgYMIVjw2KfhiXDf-cNiMZgTIu5BbPkJip74WVTTHihvmS5F9vdCZBBZ_CPWXUf5M2v7jWCUFoXJpn5zdzBkni1ROFB9woJTK9vjoUszHz1-MAXXkLMDJoDjjAl6s6NFSUeyoJ0DLLV_UnAmiYfBgg3tPpZA_ojCy_JmSAp0V2_2gTwI17IBZ0KONBaAdYp3S8JyNdbqacPJtff1f4YXp3RJYX0iMUzKoezGmQTHiCJ7qf-gIc532O7Ft9Qdrl4YVjmGCEH4mcx2On-plVMrj37-dpY9gyV-tFDNcDDUTZPLoEbuFsskCkrHsdiLaBjpMCqmnYuyROip0lYkYcTDx5PDR7zJHCN8roRADMdd62dGh9jOeNyBLybfdVSBPw9lwTBGdikEDpmTH5Xr5zPenKiYmEU_vv7_4gG52OHPh6PeQqe30LwhEQFl0Y1HF1PO4-7V2sCHPhnZjPQUcqLkcekeCIIGg5yTm41iOXJ0CjU_ArN7EU0oiM1anURmmc1lHI_bt9x_G-z403Ze1D-AYB3-MODlDdqilZfA0vTPNdiE6Z_TkedVqLpjXSoIYKt0zuxMvo3UFE9o66nf6jODOnT9uJ6Dg-5bal39qr43PYVA1kkZnZq6JB3ss-KppdlZ-yhHr6RhjNbGFPBhaBQON4u9jUKJnIfy_y25Q6uAlXhK0mujqLQ_4vKWuDgh6dzxLFj6VjizZ_aorIau1DWeSZl6Ql5-mK8-VeBBmykJtwiT1uqm1Fc4XlT6pW0ZqyjwmHscxMTzmgkynMCGJoNSbS2leNbRBvYtM9C9pASOqFH8fvrO5pTkw5qPFeDoXeDwbcsZwTFrFYn1kqYI6DJ8-lBdsQf5n29jlBFQopw0ODU2BxlcACHKbSaMOw1HfkuDXGIsy3mLTQrK9BgXIOsuwSNK4-g4WCEgxTR6PgW--qMQxlrwWcP4KXFlpKONOVLGGmfkRfQhJxy87KtJchA_UOdv8f7EqyFT1HMy2dtKbbPDEqaCwNk9hYYSVCAHQT6XH4XPV2gsn-fhZxmnX7pEYwdnixJqGOoKaPv-JDpt8QddwLOuJVjwGQShpZGhKjcOXvgM3UZUbBE8magzcscr-rm1YHt7-P4Mj_qeGQHIIfEBgj_pKuoZMAWekBy_UUglJU0VoYwH5a-OeLcda3flzViq4aMxsuBrL3q_9vxAWXCVcbfztirfBE-Muwl8tNKawuedNEgoip37RziekmNrs4QMIULDLTduvomtFdBAGzxM2bHgQsH1BcF8sSCK_Mnpr_OQjLGhBUqyKE7mXwVYb3FTh_yg5B1UIYnnmYu3Gfz1GftBS3QGlgsrSgKC9fXCg_eRcSQ6L-eSqjCFn7ctYu-MOtvH5JvF-RMO74CSj38_UFc6gGArS5m0WPuwwXxYd83QA1pRiotIC8moBCkqyWf4pI5pgEE0M7SIR-40Al-ilCgljuDQ9srDNaIoK_OjWeR7reE1RbJVvUIZMb3O0eTd1DHSbLxhTz0mtEw-OUq375UK7gtAXyPFy-loT59NA2y_Lal-IKwF4jgRyrmgFv7WEDJYINTiSSH0zWMhRjQH1JvsaxTBusoGASimsc6zXbWXx60N5IsTw7Io1CllAXmUIkU_QyRAjRMyDwMgkc1XV5JZVxDP2KlV37Gl01j5jfeN2PV-Vm2FPFcALJJAEZHM2UZySYD0eyDO7FtTdp7ZYcC1Lvx-UMN2wkY9KzzmEXOrTgiGd1W-IPby1BoHE1NjjxR0LUvik7pBf2Ttj8CS1fTpEXIOIAsraKPEDBAVU-ZPHCZgvLJAa8OuyePn-0fNIZO4keSsEFv9J61XuDoqigoe5-qmkybsKICnODXSV9Ijuy9GJb5XytUGQHBdjiG0b1yo98F_8h3Bkfbp-6Z2wkINazkYr1KgWLKxXoLmeIYFtNWpKm_1aFbt21wfXJRzspulnzIXbKsnkf_hj1pR-3T6-kjmqcgOTHDcGxl4zqD42Qa1BMbBsbq7BIeCSzsd7nGsrnWcLVmA7W3Ed2TN4VftNae3jSvYdkAVeLUczB_s3Nq67faLaYsJxBzGCge01R93WT7-5tuIZnW-B9_PqMiR6m_Wv4Xt3DEbg_YX1fpw7PaPeSMMEmErj_ohczc3hxPF0k_lcE04xk_u0yi2P70_PeJqasMGnKpYuS4i5kWONjNiATqSl-UN_W_TUjNh89BFL3G7WzRxbS37CDw1aWMNewCe_I4FFSiffrAZt-P7r9ZIJG3DLLjb4ZAQpD55jCfkmXhw0wq2eymfVjvAPEVRsOqCRWRs4ggdeos8g0ydbe7LK2M2ugleun8ds8QuUs9HEsDZETXEOdOx2q7u2qMNyxtdatoGKrmsoWxBDU572WgEHrbzX2UDDcYR2s4mr5YWuXHjxRQqx7HLqiTg69Nlb0SetG1nN3sGDQSKX4gnBNH8PUotokFTceq2RFeY8DOQtOCQiBligqhgQJTpkbRXPiM5EqLPpUrM8X99ffo-5YBl22LS8Jcf86ADGFMQn5NipdeR3VcOnq25RosVwtt2y4AAx3DPSSaU9du6TG2COHs8caaLVSBygymo_cPO7Z3EBEAeLLkWdJXeR6bmSavboqn5Dlp40LBNBrK5Iz5djKYof7_-ulfl6asI6SgqI6uzU-KGzRk0Kb6VxghHLupMHAvRWsIWGYzeybMfUX2WOROaDaQSbfCMr4rw2DGrUaviYU4wamixPZZvkbV9cjIkMPh6r3h-7KoG-4xqubzKzVQSuSYOw-IQcORt-olFDdYsHfrRqlNuqhQVLhZFZQJu5xKdXjzzIfRnnlsngT7rrcA9hcp2Z0qXvC8wgOfYEBbRinxqzrX8QZxBI50SOl73o6E55qvjYjLhpH2rdWl1pL9NUK51jkmf_XHpy-5WkM-iQWPWJFSZ9LuVLgqrKA-Rmfiq6fKMuBqpwD19Ct6yDCjwSC5bP3UVhJGW5L6XDue5hXck0Q-fa3CY8ngbobWXYg1YebKyBzpN1Rat67pbf8T4nMWXTO9aBM2FSq_5fscwPG4tfETdD4P0J3evTWGWx2QmbCoQR1eIIGGugGe52egawOky9yyw3-AXOlDBXAtASU8ZczQBkS8e0PhJHC2aSClQ0BSe_FGH5f7dGuI8rHzFMobkhcKP3YbNRAoMsoYTlaXW0cXR7aJoI67SxvYV0NupXSAtc9tYpD9L_-_VbQqtXqJNWaTiq7pYo0weWxv12PpvyKhw9oUEHFAOM6hyO8qJErE7GjvjjaJru1Sh6TNockJOeKkrJy7nVFKJ1gwL_QbD_zY2XQdWjbvexLEcgUDxEk3YQijOOL5eyY6T7RkxARSSHIVCKnen-Jmt09qRxx9Siyq6w6K_rYw2S0dWuhXkFlmslw-yktxgwVl5EbAJOSPIuzmYX2ihXdS9vBQCf0PyKIbUxyZlo7Y_Mc-BfySjueaynmvdvnS2lPweAZKahUDJqF4Rn2usRirZTcG1zzqbyhHk66dHqSePKuRn7oyyIleahT-RqLQqr5NKZikOX5SACbNAyYiO-96SgrSg8zqJa7geoKm6Pfae9CEVG4CCAM9HZjURydRswlseORQ2Mm898vVYUnio7VIM8B49OFLzCThGB5KCrwdUx068zMPhSaZe_jxklcWzpn0XUGzjSxbxP2BERhVQtF64Hd3hvrasT10DvDFvo8OIYtbSdorSN1GEir6O2-D29HCFFx-SAL3PhF6sOOni6vx_BAkNal7js-zde7hrA3mXzfnTIS4YJv6d8XcB3WpGMhCPBeKuQmy8HvKV3iyxuNOVTpWtzAuWWKVs9Da0catgaaDsM4gKNhx3C8thH0QSUbA9m-XoMiAXxewTCEk7ilWRRmp1h5c7JWOdB-51I2lTx2GLgaXkGmNRoDH9mby87ycjImNg6el3ZCiaXwbdLtjJZolnVXrn6CcKIWZnAEJeb2R6IWLmko9fJFJYX7LDLKYjc41RJxB2iSYiY0l1o2ekB9FhjeXdh7nQWpwPLBW02FeAQXpWE2X-gClMqaWQe_DqRORI68Ml1obe7YzZcmKV4NPXhtv2I5QX_1cEmk67NvgSZzNS90myyg3082wCrYDSbV97gTm-o9H24przcHp0x8_4-MXLvQceNhnZjMJ8DaJErQZryr_5Zo-dOjQQOHQkAyYRVbBOXk9yJ8KNKo-cG9xQEW0M0uAnlwtNhsvcOVeh5bYzk2jhIG-hVukPUY-Y33iP8dKcNRrJmhkL2pnU4HAfx8lIWTY0BBpd5IPk0GxEpzrVpCMCl8j6up3DzwOiAp1W4hu0B4vWAxGPKkTYt6dQlAFXG9_N65lg0RRTreI_ZN_6QKk4Q1JzSugA6zUX-dEFFpgBPvgoFaQR9QJZoJTySKaLyMsdO05_1nDxMKUmWNhBu97yLeAAkZCO-eNuDCzpMAez81tWHLeyvaY06Ny36qKwd_NJdmNRYeIZfPl8gfgZLWVazbAvpOCBX-x882eZvGo6eh5rSHb0zrBJHn16nL6DX0i6vm-T-GC9vMw0F5TKQtoNijSkE3cj3LIXWD1jXqUj87x6wbtwAXjTFR13y9CJYo58RiWJY_DfdjZ8APO-4ObTnp3wZzT78hKDEkwe2TcCw6uIuxijBAkZWlk9w6L-9RYNYLAgmaz13tvpIKm449kW0SZP8gE3G9UZQvsP_UE20sE1egeKbABbz55TSSdCjSYLPC0ptqcPfdgWoqUs-zpia_8rEH5x_kO5ft46oLjw25XTnnYYbrwm_V98wuLsaUyj39nt4rtAmg5hES0YP_VfRDeMkrjGWnIVtdGQOfJG_tP0QYrZsac3heFK3kxCTSuwHY4j3w5aIhjD3yLiTIZ5-MaUN6QAedoiOdxh6v07KBbfiXsZAAAMfwKTPWItPSxa4mSoOS6quOigP0JI8b8g9wK3V2YXrfy-nG4o8scF-uDXiUCSjNnDOTaNu4f-Ntc46khjcnOaLdTmg90j6tqoXS3aFw-59I4HODk_cWA37tpDow55ub6B-8MKCkK2yGhtqB5m1CIb6Y00KgeU-Ra7nsTlPSOB6WzqHlLNqhDtb8G7cLeBaypLp1jNMnTakOJVc0pgBUNFAezTlOjpzXmW4hFYW9Qlje0Qxss-9nAqnFeUyJ0cavgDBWupKYmgskzayRYb2HVa2DbsIiBd6YKTrIzd93zkurKMxSdB5I1XOqIwZiM7AlWFHfREkppzcNwOF2GAmCkL4dBH4ii9l3tcauhvsIZdzCnYCIUkMZ4scUorBvbB9VxnBVDqAY382Xh7b4LL4D5DN98lQkLSd2KwEdcanP7JJP6UsFRTvnnM-TWtlCA_XGvatRko6nCPGeeyv93UnPHTAlUz0vZ_cINHjN0azOe7XB5YjjnPK-y1Cr6YXkteFzHpImqfB_mZcit59xixo3FNeBvUmB9p0C8n9lC_YY2c8knViZqarhp5OjpDoPJZCM6m5wd7wR_9DxwLR_0G7ngP-ZNgSRnNs_DoE5qTNsWfsvcRZ7N7SFGGSnWUg9YtZnShDKCu21kRtKqcTCCFCk66q4sJ7JMLb1AmwRL2i2m6OtR06IL65wy5dsd4hOmQGARVO49IWgZfLlEYjuLMhbA7WvqqCsiK6y3fJ_OcnOGt6APbyWGVtWmPbbkwkOsUAB08dpJdTrylPeLbdIL-CGe8PXFcMaHJi1vlZY62Om8AMstG1-bzxgZUGQ_bpMnt8-RE6DACJexVjR7W1q0gbzwlJU-tLyHMq1etPE_BXdEOQtvWbvAp5NbQHzIZAGQKXnvjvoORh-CXcFGZdeF1vT3T2z_c6N57u6r3c_-VvoJwrCjJxvUysYosF8EGSb123HalG3hapPpXI_-uINPj4Zsi64sl6NMF5bPF9dQw7hpvW5csULkhY3WIsXdAF4psv9Zug3Oj_umgr4SZNYRQrrjYRoMV083foKJa_5bfRDt-VdxGyY8-sBNbfn0H7aIZ2p8siO5USVaoiDxKFs5xBrdwQEhxsHYxHVEaz4z29zqxd_SGu-FnvzLjzL57Tnr71j0dmwyLXm6w1Hf97pf--QF60aAXp4d_Yr74coGvfN1fhA9JdmgXWi7myf5DxXT_TXJ4Yewx2lZrdKMoJOvYSL4NxEYobn80h59-6Gdy0NEpxI_RWZXk-duQx-UO37By7Drm4ucUX9vOOSuXK__tBJs3ZM2rav_M9INtvFyEnjl1tduvc49phDElTtOhRIdTqROKU713e3LK-olrXMSy8PGwgePcAQhk9um48xHmQuuJQDX0CQnJpFE0dIWF4QqGnbLlBVuk2a9nWgZnjEaV1z3zuKNrrDiBV9wTHJ2vMDDx1UtALCw9PMJDAyyxZEf7Q7t6Ogd3AtiFfvoJgGk-ZHGo0Ni0YWPS18a29FubUz265a9gSdZiOFyiCyZohNYe6_CZLBUh9uDSbwXuw2FZt_OvbixeNIxpeED8r9kc9LqSCJ04EaB8kECoyWRrqXS7AhQTXYwEOf2u7mPQ5P4Htv90zBQ3MuJ3Q6nN5cBLlNgrBN--g6qSwvoddn_EdI-5qm1Y4lGu12iBVfl7f34RFgGlQmJMOwlIDYCXWmg2Chi-s1s73v9fq8L4X24vxW8MKXx6dOX3xcX6pMJTEe3oci5kY_hs5pVjfgBysy9jY-anXSra82EoC1piv5qsUiXDvxX9Wo_wsVv9CNaA3isTw1_6FqZrD_jJiiepVun0OgcJl6grvtB1DkuIsvFX8yY0O99Rm1KKGWlPOOcLyl75YIq-N2jr04Q1H387S8pyFdTG1RJqfdMyWuyePfBXQVFDdMp0x1YMYLKfF9ggyOdOylJ0Ihlo4Xnu8UFLMmorgw4tdPljnM2LS6eaTz5tDR6RuBuRUI32WndoLOOKbpK3y8a4jkPrHDQN9hbJm4W3aBTb7__AzBb66vXMYJwUkcDEMSrMOfxd2FtpVMN4tXAnOW_t4nXoYNssCuOnBbgncikrqeZRs-8glJ0hJHkMM4FC6gRA_Gxp-WQxS6SdcixKzLY4kn5mHEd9uKagGuLiX6k60SMvZYRQWG_ZW5BekYk-ud3U3rDH6F_y4cMNQkR31jYLTtuCMdbOG2oaZawKgg6NmXmRBBTtz13kq7nbsSc1aeyFEPy7ho0b_IElwWksPIODcmCBOlRiLZQjiJr6_ukpdOE93tvqZHjdIhr7wfHnZs2XhvbZtpwrp72xelMib0wRN0-O5ixk-7NX0LKqd9llhmzuvFjobuGmtX0lwBPEBlbkQGFJoHbjQJsRkwtPgZBT1vnwVOVvLyIGflWdnU_V4RS4bErZ7yyfdtnquidbE97YFBcrhYd0OfEz75xTurPKmIgOLM6q0d6TLD61wY8uSuk7tcu_GsbdKapP_7T7OMgU91aSOZ606aFkUsib0e-a9WTat0bfLuA1D2mrC0y3QsHS-8eG40hQjhqSzFY-3-bekuPWzRDMueP7a5aDdczL38sFfuFKmIXuvmWDG1OrIZ10DvxUjCJz0CJ7eOHY2Wz5zbI19zSgqeYDdtdwyPdb45mWKtqxvyta5JbM5uqT4Kv2ukpbe3C8JqGZtMMIYL5YgHT4hH66YoZFhMt8UT3nxTmuPd7wmAtKG4JaeDH_oq7Cd9o65YjWKdPz1QEbBIovFbS5P_OlT62V3GMBaUFYHpBHR40KWbZ69WU-0AR9XkHGgNfQE35-2jKZ1opDLcK5L8YjY71Caavjwurv5xHrMHXcon2VPQx6O4MzMIpK59vZi0JgXCsqdT88Us1eHXGNYmDMklc0iNAAnDPwV6cYmomWXjupCnozLzpQM9qKnMXuzqPEfhY2eT7rKfCd3nKtycLcRtfRB6BfKz6cfajvKxnrXl9SKwpucFPxcz3OshWGLXZdd2s3rWfxHLu4_CP1cs6STQLvYQcu2PwdptiYGRCw6QrIgTkufrBAq9HB8xBTgDwjR96-u9Rc5ar7yYeenm7X-HQypEByac6dV1WNJudD-ordvR8EEFJMD3qwpWZjrqL-QwJt6QiEuJigs9BW1PW_UBKQZ-6Z6ESWGty1Pf2bRvyQdkM76Dd6KK6rbe3ceyncM9Qvb8clY69kSFFj2nP2leOrPtgZqc85YKfdpvjN9TLHYOyYdIj49-iL1uamwtoXhkXeZmmVqq_UG_LqngO_OzgpfRHPOtb7t3pxmcHlP9xw55SpPLgj220pWdlzmzIAvKCRe1MYCNlR6QKv9jrwu1TWAgrP_IUDjXzR-putTqRoK695rC8sax-BRSPwq7E_wkGootaSjsaWoC-R4jXlco3GpjJwPALjgSblNBrCP9pc6en_DHlr8fYg64fPqWvMiVezjLXHnCOUEu8T-d6f6JZrSbBOWyHEUUCfboeEAYVGKnkAf_tYRxtlpwXe_6FgBPv29kfXivZd_Rc8AisJjMVJ21ZTmXwKOvbb4eeFOGjdQrKITvXt1G5KSxYrpKBlrseNGT_oeLyE74pMOMfm-Vo0K4ttpkUcrXM7Q1VWh9CJjCQdkR_-f6itv3ajBHEBcu3xbw0Zecer6yW9v0m5scu8BgoAcOkWF2LNt_0j5HnwBRnWdKwLdTVQRSOZx-Qg2wom2aZ3jO65hWtF3opZB5sdFFZMpFLw_1djvJpeF0Xxs009zhHWXnO0xAP4-HSl_IckPLkPjcAJqymhVZQ7nBbgHS171egI3_Ci8J-VnZWMlxU-CDRpeey-Nxm2-YCmF8zWzrtApATzfziS6jLY6CkJlX_jC-ER2Z1xL3deoaD6dvTBEY3rIDddTdYvsMv8oJ8O4zo8mCoEYLLgr3lsUZX2lZLf1SnYYwt7rnx0W51el_SFODOuDRWpt39A1kQTv9a5A_s8psGfD_SjQhQvMNyMWPbfzEkek5PnG810tae66G1soYEbKRSH2uW0SzNdH7jlUxtbvIB7H25ykDNsZ2QQm-DXGe_XL22x_KpfvkWdG6iDzSOrhzt1RGgok_md8R3ARglMLCkqYL7K7eNFBUTfAL9rkoanYco8SWs7X_-TiSIKz7t6x9A6hmiyiuy6ipBOvaanF89rPNRacKvbJnfavoVet6ngLOajTlYRvyb6MF-a3XHjUvtvPSglPf0lL1uB35dkXGjvpheRHtmndw-K31wNDPCTBhKBv3b224wqCr8GGnCkDFVAVrTMCfYSLATNnqbPAnDG6xYAPeEciZ-ascZe_1nRI1tPsyN1UUhYtr9Q45gX-SGYOXbXZIxh2xkTfF2NZml9Xow65x8yiAjC6I_9v4Tf0FEciwwjn6xvDEloxUrepkVS6DxtUyKhkuzSv6hljk7DvDuA5nWfpDAa2xPEEEis6_T5fTa6FV7WnA69cvnBVWHDTE-ot5JQJbqbFIH-tAsyc5352Cp_Yzt9IYpOgavxwbb0SAj9PqL3W33IRBXIzdZXemL-pFie6629uTuwmokfgSPwVsyWtRfudvI1K9cCbPSJ3PwTuoQgdofV6i2iDscQueQBB_aHRxsaj54xT3LF3JsCHi4D2ifnWmKPHjM8EaIRIQHWwEtmrc_RwW3CqHtnDFyAG6Q-WW82lFoOMjYLEjWt_W4nAIb5qsQ5bNrcLV_Ge9VOOzNbrbzfAX_KdN3CnGb5cnXREE1EXTD073PVN1MkKdNPriLqrPiH2VSPK12c-wQCjneHnj5ouuAaGS2wRpIpD_E0g8MrReOntgW4x-3qigreUsdXoLcgXVJ7Lxn-MBYt3n_Ph_pxLoP2MBmDU-wJMXcqHAsLgCCk1h6WoXOz2VU4HVYu5_Ea2_Gu5leHq8zvHzGaZTyCsBSZsP9xIo8e4fUv4-Q4qZyySJ-GYxMH0bQJLnC4ZkUeqwlcNDYg-oez-qb3bRVe9Er4JLMAQxnudHQfUFtYERmLtZ3NKnPg2SdpV4Cgh3_JdImtq-e0QgNltYJH9_1JU6bPnEDJa8VI9rWZJUk_m_s20sXHgsD8ggcMdWdOdLn6SiAgIgUuuw8guffAuID52FSWPJsqzeabv51hWA9ApMNUCAOYpsx4z9uwIns4DLbhmUyDVIO6afaFLCyZ_n3aCNZKmjBW7kb4t5uSdpka077DSuCGv4a3GP8f4dCh-tz4xpeiHa1Rh6zm8zX9bz8GIHoNYZWJigA3fMA33Li44oKCmvx3wIQJHKPfO4x73nanmzoetfOZXSvXJBv4RQGUhQ7QNdQPs7Ehb5E4dJ5nAeaql3LEraopRhUU_YJSxbWxTSXBQNX1ydd-k-zYfXDwqumwlEINv0wqzVPRXdsbLsTOH5keBkJX9jYkw-VEbQgO_OiTJ7wtQ_OybuRsgpn-3qkFLemNYD6R84GKw1-JwLBiuy7FDohdSenlDKfZvp0MvybjYG9Qt17PC5UtSsHabzSrBv1zUra9jenWz92TDrGwpTsds_Rzl5aFDxfo7DLf3R7NsxW3tvXwTbZAp-pGhefDVU4sX4YXIjoGtxUa1e9DRatryfTLS41tKZoUrOZAZ0bRnXhyhImxiJ5277cHwqui0PsOuvHdPe3uTxuaAB1UdWG6qROEL2UrdIGL2-gS5_HOSCdW8F0YJxy-DKmAooUADwqIejK7fJkPmCCSHqPrdNZ3smxLq1jFIJkcRB0JOXDbFICDRMeuHUkdx9JlfRnlYfl2ibgsFweZe1wbNJPyvr8ict3QGG-0ox2__9VyM6Xs85DQJxdMVPHKubCyNAFOvVbp596xt_qwP6exE0C7gFQ3Qe31o3_klWbNlbws_ONc3n7ZHWf2TAEFx450BacZGY6qSfh8WWwcf6nUH7Hz-rkVlrsYgMmDJEyyCVDkv-blSwaeqH7lLdIbC-UkVhJcUvK1bmfmqS7dT9HN0EB7GOC8-ssItbqj-PHWFVCISjkvroJbg_1a4f94Za-km1PGK6a-Njx_HpsNWA-l45lyWC1mUxffQHUNNEcdcbK8erbnlQe4a56Ji_-xHlriCmVxbBdEWjisv-k2tpOVaOmcQ2n5D39GWRcr0gRZC533SjdsIdxT2KO-zalWewwtdnTwrfze6bJpPCXtKDyQeYExDrGVlvUSDoze7Myb4NP28h5wJ8rXLTPTMjtaZT2WfXQNEM5hkDqqxhSPq7AubqTpvIARlNF7bHjABObsyNXviqo4jUo1je7BWmSJNDv3ZMecs2KBQ7yJa1exy96PyKNcKdqtWjQSofIUzDVLDCGXbn8dGVlDQN4nFHtfDUFAxcXCG74kUKZ4lS0zIauujdP0P7Fo5DTgifnSx9x1h07F-kVIc4QpVRAfsBkXcq43-mAY1Xp9G7rpWdKSiiI4L9jrkqUI6mDidcPF1C991xTJZJfpQRnnINH71CTjtfBt2bwt8K7zb7LdzwvgbmiJcHuLLxJ4H_29eCMx02mQkNZR1tJULMBlxMXlT23GPrPy7IY49C-x5iSrqBCZOpPKyaV5BzvOJk7DHd0A-LuqSJfEO_3RcxVkKWDSnp1FEjm1beM4bS-YujJ1ypZVHOyuX2WYwkTpJQFGHXm2O3uJjhr7WpxPDoZxa8Gsqy18KzXsYi3udChnoBcrqk8_ssZ-IaGX-W6qbIbodBbHbU7JLkG_T6377ilcNmxDByUax_Lg_ORDnvEeMMnnzTdzJYp7YV8Nh3ulic2lv26XVN-0OB6iAPPItDpuR5ha4wYok78sXfKslHfjpeUqbZpxhHvTzlVYrbcFE0BbdDVGFGMCRtvKLPm7VW2HLtO-c_qvpalt8DxRyHvKaZjYSEpvjJq-DJztoHeqwMSfpS7nKySa6-soUAeiP_7KENFTdG9N7C4TzLEAEt2ddc2kjAyqspo_6i0wlGrqAmtSYaA_kUNZN9AGkU-YG1Njo9ocVVm0OUnPZdip4TZQ3oa_GhoaKBJ1-4WlLfKFu-MNN0akrWUirpe_x5SCR8zx0OqCDwWnFlwfEwAidDarpmMAOEBncuyEp-RyXhJx5FPSyKDg3hL19ab0RHgBVY8T6sh0Jakr9eVv6Q4wzSPs3Y7GF9MRvMtwgHNZvmLLPSPPsfteYCFFmV_Z-RulLjBK803buRIC0LozWIGaAG8FI55cAtBKWEuSECeACvTPkxF62sh8yG1MvM2-mAFimPajWuxLtruhrnBMYJ-lg_pNp_TIivlAGsBMYjTSfkm6lWNOnJp6c4gbBcjzq6g3CFtZhF345x_kM8AqfBM8dd4d0igS4EA6ahoQhYzBTv5Z1LDxSvrPyXQXjCNHkUKhw0gaZgOMEK6Vbx0Qx5kTZPy-nwZIKefGnyL_GvE7PO0Wa9M5mrp7nRu-teqKwvrn-aGP5xquvzNEVo6RK1Psiyv3LA6JAUdDf0UwCvrzpUAYv7Nll1C8BzBRtCaeK4Bd-v8ieehIx8WLYPn_9KyxEQHevVj0mq8RyYuSITxDBYkl2TDNHr60_bKPIqhYar2pnHjhFvfKJGVP_ghNNmWL2SlQPuUw6Nxgw-ubz4B-UdrCcAmAmhLrWRdIvX36WhT_iPmo9Es9Lo4wIwfsab2wFfc58Qq0DDC-tWJmwfmNzhPgpVA9pb-v64nZCzjORs0bE740GQkSPzc1T3T4g_LqrykU-4mtKab3qJYhVhpC6q1yW8uNZzo7B-e7aD-QOXUKo3c6ne3V3BdAG1W_rEG0a9-eGDtWQQMB_TpOTfnWnvqRz5e1PjRNL6dyU_t1pOTfP12mllPRWhuZrA-AV6TLpR0ZMnNpUggiTDJmAxdZfqOkVMwvC8yU3-4IP1pnIXTsCkKiVYtilkoSVdViC77QQEAiqKaVOzZ6iBxhsBMGWh6a9P9bACTPVz09kw6gffFNIVZlRfijkIEGl1Kkk_Jxa3Z_JM7ODu1TFZPGWq1-K5-uWcGsonhyAxERAdGtp_u-cv9KyXJFzmpnvpJig0atnETdsefR0rZDzmnePBytooyT1OToCFQ1S0fQMcSuTyindV0_td6Y7aJhy29r5l2Z8BAa7Yta7KSJGT3aZbZP38ANi790e2LXZ_-RMUDMW5xp6tXEdvOz1U29eQOWhYOTNG3pd7C6HE_Qkk38ikLeKqm4goAUWtchBWdT_kCnXFuuAg7QwN3NNlAOMJYIjwbkXQ3pBiZCvrSC7Lt_o7eRIp0sl1l60kBG7hqw3Upp5pTQSRNapiuRVM-0yp_J-bdFTjGvd-zrppoNZ1QWcFWWWUCOzopx91IQQ-4Z8_vi9ZKhENHOJ2Xankht-WCEZBiT1Vt9HIkKRW_qmxp3LUCaxPqPrAbr2-10fIUZGSf0bzmqaDj5VOuJAqIGkYnjX-LijkWuB211U5T_Qcf0VHL5z09lfTyYVkM6nC0BGOFqXIg0PPuBskmHUu0sROUjc2kmr9vyjmtKRIE3aeZJGV0nAd4Vf0FRGRzAvPTTTQ7UwtlsD4s569fmUdWo6iSlCnbTzqIXQB6GU51WzgxKcK42Ky3q3OLF6ggmSKVhOl72Bn0kbD2iWCLFlD-zqxCO_LNlwk9En4vjM1SP6Yg5edSB5PYFcDhwLnCjVmLx6C8LGkgKb_mFeqlGZk7yK4QwCBozJO0BBrJ5kgXechx-DOSR0i18_3qReFmfyYzTdLTv5wXzYZneKcUPY2BE_-ZZzl3JvL_JgWcmkF880lqT6mBXhAD_th7AnR8DmhucDQIWgwKRU38lj-1wvMNu4pYsYFbrnwhsjNLatzSkeDUGtLGRYDoqfSwrnPYYgZ4_DaO3RMAG0r28iteiyXLc89VXurDQSScB53CPQ59vZ3KuPQw1hscP76z3QAmcE67n5osBM9x_ljYuVKbsvpnEhtfdjm20VZY1REsRS8crEQ3vr8RDkQtCEk1T2TxQ1iQ_WSysVdJs404l7GsIv9TMe_Jb5DQJdIZKPZVCNw-Ld7xC0zlir-IXUuv0cAd_BMXUyZZkxqCXTYsUCnW-5VkMtQcwkOUrpywJN1tye9XXG31Q0cbNjLXLLcZxdh8nZbJoVKRat3IC0VC5ALGTo-FqaUtlF22bvOcsYtFiEnKswk5OvY3BTTrdmceX-R6S1MGhHlP45LJW1C-AhSWCtXVhJuSYbVay7Gp_apqAsqy-ZOVeEFKGGY4dPdtZLDk1B7Voe2mJxZug4y1hq7Tga1-deq_z0KIyL2MpJiJc1itfCwn0ZFfPPw0k6Xyyz6o0ocDNAc6eLSM1mfzQnMEFItfzZpIdZPwQash-FbeqSNkqMIDaaOQYsVI8XroKOYuN0OP1aDp3zdheZLGelUeciFh4Eod74QLsfulsLYJKtDYo2-tzL2fbUyNTERZK31ZWFH925S21NsYHO-IMrQxJ1NMt1nsyqTcfqOMbOl1al16xRtbeJh5rcSvriiPqCQDze34ElBFmPfGidwY_uVb-_IJTpFmhvyJNrdG5ysusz1vbNQTalZjXxJOt5NzZt8x2On8XLZs9k4PQBKSBFTLjZgKZRnY0-P2Pdc9ga9fRVRQW-kPrjyfCByHfOFCx4jMPaYufWXT0pEFlEAKDpP_2W1556Xe401590hIIR_sIvC9szq8t2dBoI0wdtYHu0YxwJfm-QhTWRNUg5yx9i2j9LHwbybMMipw49WgTQ679vSWaPKSZir8ThPkrEGVQdQWuqrs6Wky3cj8ZhrsVEhxwljFOPCTxmPFL2_-1sdbFnLz9Z85Pf60gZRLdb600N8vLQ40J6c8VOL5bqBfQiaX8gu9qjL7vVUEe8wmhT8JyItP7b9xTfiSrbhyycSf1_H1OiT_10JDR37Tx5kjCIKaCQHsd8vsuiyWKHzW973x9X3X2EzQoQgMOO0A-Q-MGJCUKkQGGAawPqqtN6RJKt3puzpN-Zqqp81dTHYZsGht0HPGMaJmv23S244J91JAuw14p3ZPQ8FatgXLntez58DXvaQ6_spvzebqwgdW7g5IpmQAko1qzVJUHc-5OG3pxaWJji0p95HYhY-KWQmar8iMKWWjBCEZL9UKkRqrvh6qDvEG3L_Ze7wZRNsh0QCDnK-1MQs8mrJCCoR61EbeVvOJ9XbWVymW1vk9KeFJyBC3GmVOLefygLehmFrEZF_VI8RxmxD_jySKU6I0nAmBMitzsmzsuQgB8uulN_CxD5PuRGLrLvEvnuyPBkm_tFc0cyeSE_zERkEGaDNPO-yhYBCSJs7D2q4kFxZ2rNcMhdn3frXsK6fScSXZI9NqmGRfT-2xxTYBoc9Q_07qCWBy33haGf7FwE295SgoZZi0u6DTDRmfn_GF_oL8XpKMMAuNypGf5ABVxQpfOKKtQ7InVrH9hzsVbMZBeRkb7oTcA9DUStkaYmik17rUBaMwr2RSn_h5muxt9Ze2A3I6_qbUThz6QC-NexXofNMgIQtDLNXOjLsQyyFtBh8_J7wg3GJy3a5Zx6m6Q_4yOZDlqGJo4zRMwwkOnqVKE6z-uYti8mUzQ15Enl3oE7309UCW7LtMk8TWy1Qr7_qK6G7lMjyRSc3HaxJKJezp69DeRcScR4cxNEp6qx8HiO1_aGgH_V7zMxQ-XIoOhZqrgxQ1kYV5IBuQAXMQ93aaMhe8xE6Ya6cTL2IIOjq0oa5g6DU3RyKcSGOWHp2sCjTobmQs6aETwMQY5MQSwB26uxqLu-PX4tA7eLZRyKU5kgXIgeVoDwPrfkM5lq-JwrrIDHRB0keCmXSYN1ZqK_6Qf5m9kq1xr91vNdaxOU7VB1v9s8wqP2IWH7-cUYeePG_N1MCGbuc7Xq6wU-Ww6RXlqN2ozl8q0yLvXycF13u8wLa-XGNVDgti3Gnmi6zofjRxckf8sVPLY69Szh1ouej7QLzOUh26Fam5JfWQy0wkQBWAi4rJxAc953-MOcDwHMMxzjsN4b6ABJAwYBxJc4YeYtTKqOBceIIsRtBP1wBoy_yg2W7_oFf290aE_u68VTKCXp_7XBwb5sFF5c2NxtMEMRq8nNqPxvCi9L9AjBSnc9ZnyLoDIw1s72iKU9kI-ZonC-riLkP9hOVD3PPQKxF-mPCxrSEMf9Bn4_Ulship7aljY8HNjjSENM7vqkdw-OvtrKYVpKQQn1lxwReUKEzGZ4zpsMhJXryzb49ePxXYHHspN66bdPfUC3CACcznwfypn1LrmwA8cvltapSq50GTlI15ONfIhtlsUErAQRXlLK95SwjFXqslQHQKzs0dRXbZwqTxqypH4UXaycObbBUA30BKtfJhh9EnoDjFZnnrpNUpYMHu_TVsYZxGhNXzZ7pAPQu_OWV_HdOHnenygxSw4TjlDxRBTAF-lQxmKIABVfORjRz34GZSjLICQcOONW2H5s5jIM-SZ4EKoVnhJp8DpUf3805g0SF3u82hvWx4qBv6fEqboMpBCujl8xcH6VwDqiziQIMZABqe_hRdHxJIidRg5crGJWujeElZZHrDQUN8zQxY3TERhDcQ801os5fKfgfeKo2fFVW0ifIhe-nO1mMi33lRwQ-xEkUt2o05gDDDAXlXEztmO4cokGyvctyfmBHqkaarhV8LnTDZqcfxnI2DtYEI4LlU1l1ZVjBMI6B-onLsgYKF_7YNlluLRqDRaKHL2b2ZtK8JG17nfT9exIOMPCsFM99jVRfJ1fHUQzdYzmMughoVJ6OZtGBHduxsVthw0FcTQRf3YCVTV19hnByCcbCaJlVleYusZE82unfXwX9YDuqOwTzLqeSt5i5eFaqznQ3ZjNqVS7w-VfUmSdZ608fd54hu3RSVu1E3YS6wV8zsNcgjgCAqZmX9zZJtcMxU5enTBsDD0WYWkH6wDrzF_SRsvhJ2805mrbQKoPhnJqh9hGNWRAR9GcQsG1YcKym8ORF2DZkmoOXHCBzA5v_5SZIZi5Al6X7xxeNLGiOAugCI86pigPeMcTJYkGxKldr8EuRzMuIRXDpqg0qkWJf-Lfo0ucRTiTbxrb0QILUfd1vR84iAQ3TWm6ts45tKd52gzvUbN_RkEPbBAh-WF1CDw07am4PNuENZXKRKhyY0xUmj3cWoWk4b1CxQxzrzDez8f0c6frlFZ7mP9Qbgb5lauLXIrS5cDyWsv6tUkkuMNOdHwRND31k1vFsE1xfE1w7HNWtp0u_SbrXz4ontKK9DFc_hIys2AtED6Y4wVJrvVOHDynwoO_pZ6c1VlJB9OHuVB5XVwwu2MrEWdYJ0TfArhW6OVRmuj2RNyGt27EtG55OqpVAZOOgn6SOetf0q89zibU7E6HMrrUUrL2Su43YBAvEYafHeOemQQG1xecND3zcepu4mjyxjD08DWbhQrYe2YeyIuxLPpgYfzpJ6E0BkZVTlwNFoqz9yEvxTv7TdtqK8GuE_pYDE9CIyk0HpByKK-RZ8oidbKPbb8PSpS9bIt0duRELrwvJerQ2ZHCXsbOUFrcR6XO0EA20sLS6fVx0K6X9vsPHZQiI9HZWah-R-iUuL6ipJQ7DSCIROLcPflEF6VtzJCZA0LWBZ6hWLaJ7cYpB0jKr3WO17fe60V4B80pAYHKPyQSXEivvLbHQj9_QzmMRkA0FAMAneJdp3A5ci_IfEA65eJfr7VFjPaO1UXbN9FNXSac8OZ4SR2FOX0dgdPysyVbs0V-TG81Q_fdYMYmTCHqAP5YZRPE_-kYpMFmI17g9nksA7cGRHOrgeJVtDFAWvJWaUtenPy5tOStsNHnezx6lRAsHzYOhaV4UwTvTUXNtD5tJDQP9eHDnGDH0JxxnvIke-ovGlcnbkCE41rMLem48cdk-_a6QKbEwTE4SrMYmlGzMxBw597YRx3Y4zieM04uGp5aMlmqpAB0SpVJe_CS8OhrNyoI2Jcl3RaiggiPuj4IQzLqwVAKqL-2HrZh3OyEr0sFmmIng9aNI3ynl-tL9mgBM1hIv87wRyclMNjGylb5O97Q7Q5DI4V5HJjWq_5ZB9OG_MF_iTKpUnWeyJmnxy5TwUqbeWisxf8PyUCruWlY3ErYQZq9aWGPE1LcsRiAhtxn_WF24Zi5-lKY4Yl0m2KvE4ktRbfxdgqlySVkbhqUiIdty_FKcEjNOxMSR4x1zB6Yj3XlgRXCD8NF2IMVhxSdLFoCazS9mw0TjdR1Z7lvpRVJ3eENSDOYj0gZPdEwb0vnyBhsw1jv4ZrEmCIculP-r6HfBGsiuW9WHB20M_EaLQh4ZuMm6lpTa9ty-iOS-Cnw6RqzAu3r7KiIOZvjLcviBrxmRVBhdrRoU0NQ5M_YiH_YY_JlJX893YZKsfKqonZ6G_o9hWuM0ut_0-GN9CqgiKAh9JGUMP2fn1FOLQOuUWFY_1YK5Z_F1KK-lpsf14bghoBJgmqlkLYJYkeV-NlBoatMrcGPJZrFsfkYkI6cKJQxu5I9tmetTI7zI05tMLs0vkCpWvXs-PnBq3GuzEMwxUMvLjlFkTHqpf2Se8OKfZ3uftCrI4WI6b9POgFthcJBcaQ5MjeY-BJS7hCBDlchCun4aJrGMtVKE1z5f1Yxq9qdR_Z9XBtZ_BvtRmyI5y2iPU9Qi7-_gRxBOhXcgdOvU4bd_M18eBM8MypuTLECohqyW5BgJzwlYYoT-_evSkpArZNmDZC7loyV_eH4hv02NJOOP9cKE5JtvFt26CzrAwKQ9mt8tSDSYMsp44GwIPfDWFdfAAWjeOt3bmhJI6foFRi98qYkgFAHbN2CtEAR_Eepn6bdcc9XS56dLoXJrL77NIP2ScXIafzLvsyQ1OINABt1viXf48XQRRlLSS749vyTvB2kBHPTdQa7Zb6-40mjVNL2PUrQZZ-NOP4t82Oluc65ZE_96VyxbVgfzlIl9-w6ev_oll69ryRHdG3iJeun3rD22WfLGhyDySIMhnFDsGeXY3hg6oeI1KBuH8RZwFlI6PbDHN7Z6G9imwzWeyAZksJyIAMzAMo_aOb2c6PIWJJToVWUW-vNwggQGDWKF7LSpxGwUWL7DSuQm2V-vCYAjuXcnTxqwNGy0mF4YorTcttxq-gh3n76ymMz-vk-Ud5Dx7bII7konOCgwRSiFvMiIQYm7JN70_vlWm9wcZoJOGVHK6-kh1003ydGcPq90ZrzxeuMmfHa-U6j2cQjKtGAL0LBd0mQiHqS5DZD8XdEk9Yh7VrflxAocCSIOHXMlwvXWlNq9AdjHi7ou1dnBDQ9WMpiPQ0SQ1E1Q3VcyQlsRK80NFlIH1A4_BJ7XzSFJLEwRpgom7nktZQPOABnlFVT72fmpc7XXa9ret7H05fOuiV_3upBurfMHEQYsx9R9iIAN8wjO4OoUt9SmBMpLKpKaEP46mK7EySOTpIpoBSFQnZM51lFakXEJpt-xd4MDuT3etei9L9F_CDBL9WuKouP7qbLQkdxzvcRzUcZ0Or0omr7SrHIefVHGHa4PoM0l4y6qa4HEgCbG_wxFh3fP97PtY9ukxSg735bSXzc8KeGH70VHOcauYC1xdSWzvUj_Z98xDVvIboX2RO_H7aP-fpGlyjxuZr79PpeL9Z5JGTmHu5N48Ld-IAqCusq2tFaC6kCGMYzJoPFCzv5ZP0LlfmSUvGFtkO3pgZUf5O3v2LtEK5IoqXyqgBOqmDcfWxC4luel0oCBzeSgzL4Cmq4XFEzmIHXAR9nQjCy8JyJXh3kN_eHgAvV0c8fIKMKpZs8kb98D1G_4qG7_0GDAKlDifJnPVaK9yjhpnMUdfjAdWvcNiQy7rlxnCPbWjLL1e73Cew2RIDabPLS6Cz3Zz1Cqw38j-YQS2j8f_MwXbWrT42yzC_7CI7WwPN-CO6eRITlG1GB2Ot0xVxPETTum3pt-ZBB9mkmBVuAAHvhg-UF5BVbzjL3JyLXSHY_PEAX0rjgcyWWTv9BFYUAUqtUACO-SNxLrYFjxI5yu6hyr3DpujgSOL1PhNsj_iKaKFENHydZObuSXai1Jk-V3YcGLIxsjTc0U2YwdQtvORIPW_33POCaJyeg7i_yzd2X37T6F0NHmQHwBYFNrSHlHcSbsNVM_E2q5xPJbRfXUtTrO_Glx-kNup4NOjcb9bfFA_q9LqzpT-RHWitAHb-Zx-SrdV807NpqL3wwkAqt44YxSenBtQJMoJSlkctaAguc7-mZNQ2ZN9cjAXx8uXx_IQ34irVC3-kJrP7wavYi9zA9xhZ3Lldo1FgLu_TWNkA8_4VZx91rLWChD9t0A9WjNwZtdzTH4qazAoIWRYQOLddavSz8IvNgyjlRBYKv2A-OKEKQ1JF4cF8qHoWKsII12aBMV1fXoz_oEbFHQ7nSrV3n8E5bqCjXpAddbta_a_dTf_TIDhEy8EE1NmSN48tTmZdVEpkXtLzJWt8cseKwQaessHFjsgUKywY6ZX5DsSNARZB7J1ZOYX08EHZ6rPNcBd7lFaNmJWd33W3raMRMkHhiHPtG8zNa8sUo-xe18Ojdw3BsWU4y0uGdc6v3L63VEDsHX1WSZlIgpUEjiT234JJNRZBhcpD8pZ-tGdXPZG2fRI2Bf79WomyXZkNSG0jAXROI74CKpntFjCwGKvA1vdCy1nlEi-gVplUjXQjez3SYZEh3Gy5siipSAKg8Kz0-zuIebZw-ZNnNq38na3dcJA3iqEzRlOb9QILxebViSffrjC68f6PmV7FrxrNB6EH-etBjBuImfFXrD4zl0cH4bRF_hcDCnoK4K0zpRb-MFgddsGenoGMBOG9kL9dYmfgFfWruzXrArP-8k1P4gvbkbOLVchobtm8Kt1ydX3f2K0TQTwsar-CChXE7iBr2wgkYsm75JmB-FXmfdwRsEmZvN4eSuLlhDitSmo3NjlyQ6VnOuR_fDEjs_PhEhegdbPX5uaBbgnvNeG-uYPj6ekl6gwxdlexYNvY6SUVxxgFzR7iAoAry7jyNbGQnXtEJ2652VpypME2rCQzROkHoo2MOKoSdygzolqhE3-eMmYCy9E3fmbwfk5vOk-rt06UVw8NOiZIGMnDsOCjn4fwIZHjUCKI7GhPBZYLiRELuQVnVuPyrPL7B0jfv89qqBNYswvHAbUAlGA998XXJu57QZjt479bKoHpOkADXM1Be6hBKt84YNCpgZMoo8UdVn_taDj8Gk6eGpH4CfcjqACF5eN-D2J_xlAX1ZonMuZGXQk_8xNVgAQX3z1FN1DAh2ttVRsjZS_RAAJIV8jP1evAcjYZerFSK7rQaRsei-DRWZHITXyzvJ_1ZgwfIT2z7SrP7oatmEtJH73OAxj2447_Z_BkeKgvQIpAFhPqa4G7nerLtdvXhayR8V62xu1PGoh9YrOJSyemVaUBl4DIdPrbKrNKW8dK6EqEHMZhBTYVpDgDMbSgmRui3aJl5ps5wF5ykKgegUVUnyOeBDqJCpuQ2IQLjtAWjgdAXZWn042tmQ1Wo63S60AVRBZ3V4Tw3MWacBVG7hbfzInEhTu8LF-QzrvVtMNSyki3dOSPvISkv3JRVi5dHSXAOK-bX80pG3r8jbTJ45dRw_yeCFT4Hh5d4LmLVUSqF9IRU4xpNBCM_4cvRaXRlXDKuNoNDGR_K0qfWK48l3A5Q4ZLQdEDeXjAbJfavGE4jLlXUWqKgmkMhx_pcDa_-yGKTjYbzQNYNmwuLsbFNiKiUYjtgNnA1bsbiYDKzvvUUf8_PhW8VcE7uMqkYEO_KpCk1K9rSt1OgEPT-YbQ0uwUBUaarJz82T8MyVgvkapMKCYmTDECS8eoMGVVBuZ9rjTCkxwcTC-cAt63n-ZW5KCmK0CpMA-dAnUbL9vmujgjBfXvF6EsYDSY3U375vHCLBn7xBYpq4RdJ_UlPK51v6P7KzurYEywr1W-M5CoxwidOHIiDYR1tBmx1NXzVKCJmv6fHUKZuV3_LV2Li5JBpzy4E_SUDmf8TUiL5xmI05-R9Qq7S-DG_OT3yY66TCgMvFX93Zq_kVJ0yahRrrUEqnWtONdHY3zfOpqSn3KM0Gggk2a7kZSZHUO0qDTVX0H0VkbDGrx_Gz82sYrFaN7juG-7wWw-ySR_oTcqFZUfyUP-Xrdu2YeQdcO7WgLdOpsoBLOX9zR5WhfiphszoWiWKbht_V9BAeo8LOOmHqtyTAkkC3xO0_UUK1Yxy6IBQN3BU9uO9XEE0poZOszjV4JXp68u1acG9xg12wWLOp36E30AoK4WBxtAf2Vfxjsi-fxp9E5_VefsO4AyvIDiMClq6nZBxIZrmbc8PoVdhTPPSucbAATT_1MthsCnTjWss7x5VwFJIYP6p5bUaMSt3yPRZZYuwlIOCmhcYXaodLAXJpEPCxpWXFDMpTN50CUGYG6rr9oFGzaHGP6NEyjTxb7nABkqg5ChdDyN6VH17DisTdt1LZku4uex77Gd4rRBaIhSWkFlDbyMPsAh9mqM_6T41YmTLki7MjZ4Bqd5toHetuD4OTUzfTiDaEj3JwJosjOGu9HmdURMMfFhHA0Hme5w3v9Y5SH4evUVbV7Sq25elYP0NH73zWKeMHWbO77667wo8ZBhdS2Ag-ib0-kJ9ASbN1T_sm6prGOWppzFgQ1XKVoN-rAOpad1g2G0BzXk26crGtlS9f3X1j-Gq91M4VL34VF7BWNV210q_GqtpNaWPN7mutbyLPk6XxBleZs0q8Pg1UsK7RBYFRjkUKK28nyclDxwQBwAzDIlcp4mTghjxWgzgnkmAcG6LgdxYwKqqZ3DZus7Y14WnuKAZvUdDQLFJKjEnLJYCRW_m_i4Owf0VJImRjJAWf5VStRqXU2fHw5qYof04TFamGbrDBHUhf1t4W06UXMVgVQf-nWmd0Xa2PKU85728W2b73IW33wGlvcu2dR9LbWYPf3lP_FW_ix70MMAhObKsxWhO9lsaGL37VEkx5J8FYJm4vovFQ6l6gdMu6Kkcvmg9UG0vcizlBFj1ynSzlSTDPG9xJIrIpRqgTm_bYeU_9FOuLTlY27JB6fQaydUR8FoU12OdO5_efPXw29d7tGMdE1Q-AmeNvLr-ewwVi8FnU_Wnp0JhUBvMxg5c_6Xf7fiR7Znikx5dR84OYW8JeH7adH5pGuUc8iI1pRGVTr4L8XGwNTKL6LQIMCBrjnZtHMvrv1X4hBoIBP7sL9oUcdJvl6O9dsANyCvq6gwqQBKnBMsZKgs5p4L6ikFtNVKub60oFkISiJMqdNPqHpDGrP2eccsZhJFmHEa_G6BNrFSOyAo-fcLVqvac110ewhlsRxmCf7mTIXLmYHUHeUUdzcBDRJkQ8_z-dJ4_r2Fv0hP6yS03F1Wp8ktHIg4s5_QbYtKoWIuxpoErOufk3nSYgVkSAzMKjVwuwbT2XjWdtN-1A284rfLkx5Tbl_l5422offKGfnXRK8CGN3rrWfyoFAFko6oyUXIDNUyXQYHQZLhPb9HJODBk-dS04aw4cmLvpFq6YVdoUDB0-dZg6-ZNN-U2nlV_KWdYZamvnljckk0coE_KSnIZzkwl7xBSrZqCCqMLsZBmeprxomSUcx9JNluxaEfQpm4Y03eKZUT4Kg3S4gSZ61yXTf3zb2-jIF9DDZldzVEPsl03ilwtmH7OLoyELeMGpgH7maJmtbbmqbzEGnwgpVM7Qcl3-A94S5cbTkFIpAEEEWD--DhoDxYkme9K_2TN6jhXoqwmhA_sIM4XiGPjvXanaWpWvQlcHvgiDpm3SG2nIv70X38NyZ72gC6N4Tq5OpuD0gBJRFsdO_oR0fVZTL8aR73apeR2gqUVRTHl8GPIUIxHPqI2ItRR29No1YbjcZtzBpDTqiSGA0HqJXxyMzfRrS2Kl5tOEDok2ohgTEmIrRS6wuYOni7iBCOsxpg5uQ4ZEt8mmW2IQlsDCkLzz2efpqdDoefEP79ID6YzLyNMfwMdSSLaxw0kZjPlnPrh9ZXx35iEcDivu-H65lUGdCizflalDU2NpMlbpsTZobaRKPN-qq8TQDI7tTImm21Azw2wr4uSbPE5aKpwKRj01qSNg7yNszjizDkWgl86DpZ2eGh19xtK0YO0wq3H-rE8gq3ceqsirR169WrQnmQSSvwl4gWlGlPR_3ljeJQObdffgexTMZEFj-oc78VYJ0rK4EgUJNEyfcY80mxPekYf3pwpJdtVaDfKP9nAbYUk8M0T3HSSSiRY06Mzuts6vRKFK6U0kZepdv2vU3Vt7ORYF1Nm_29LXKZZzwpyI4Hzf4lPUXEioNSIKs4flLmltmzKs7kFkEDiT8VKLvthEbcrrU5qBhu0qjq-S2UMNJhP1-ePVAS4sdcR0ZZo6jfAWt9WSHGQow81Ffy_S-6DYhsv-OqYmgvzKoyZ8BqYIfRBhaIase52J1P_JnHmUEZ_bNJk5-notLn-O-3VeekBdMFu0Op1sJYszuiQ3S-UTd5zJh7IlsjfhogaovDos0H7GpjD36KBSE7QMQD5rSjit8u2dg24Yc6BgpXL1n8oxDMG82Q59p3pnUi-2xQgH249He3InAGPQo49o_-EV3JtSZ0TN570eN4w7eyCsbW2qO4w8X5wI1-Xw1pDGEOzE_tRN3J96-IqFYk1FAr_G1kctilv71FLeI1uqR13F1EgGoWt2sUey2T373pcScRWmq82KYuBwkL0ydCWNof92tjn_TXsRyRJO5fDTlmie4ZrIzs0Mbdow0Jm7zDiW6hzJj3Bru1GhUjd7ouQOlPxeLzMYZly-E9ebSnlIpTfCdvranEW90iJyh9mZMX8sYsKpQTx7WpF1Xeh4dFdeAjP0d_yAWWi-45Kf-1Wg3s4Y1al3V3SrTFehQ4gkRa2HA9u0dTn7ZVnDjduQ_V3LHNa4B0iS0c9BizxZU1LUlz-XX6xOySMZB8rgAdpc78i_XOdBUJMWHqVZF7fKuK0Cs_sQdbgVuW3dJMw-HO7OfAbDOYbkaIB6heY6fhzt71m4I0oimwIKhVAqJ15tewbRCkmI6dnOQzmI2_cGwowPNpgL-rbw4LqYZ6C00WE4tq8HrVWlKS3zg9hYqbSm2LqNrYmr8MtaY7Z58g6HANZbXZmJAanOW6MGY0_yoD0cLb2rWDE8nBa-2njy26a2gjrFB4Rh-ubqQZBOMeQeKvQhvBhryxnA0QZo4YuTURP34rlukElzpX531BVm-YcMeo8nq5w2_qjz-DYFY6ssrIYJuXREABBQEy8ZNv-xub59itlf4tL4HrHBD6KTRaoyPaYswxlnML3X9mfxkLMLI76DVSm83rhNtCtXGkAgUwVeqIpvNm9GskNqW2BlQaSLdk7_InNjJk-6l7rLt9i7fm5jzsyLsfoVL2wFS4EmLAhjsV944zgmgsYfIy5jWtKDnMPDhH5-sJWrqvKgjpYuUGmyFCRuLIRbHParA7DEJaH6L2TutXWGnFHu-WnMG3T3T_erd_YFA2a6zm0eUntwBLi392wnKVUMQyhOp009sw_Pm_3wZ1vmQp_C1tlJgcRulg85l71ZZ3PtBIcYFUjGLtQK1359LHV-nuI69BovOlrH4gnNpHbOzKhCywRE6MdKvdBQiKmH4dsjNHPbnNfwzI6K9l1PBRog3xdlw8sbs1sWq2iJD1ZplU-LLX3nTmXwK9f26cQHrpMuv2URZB0dAzHl4sbgY-OkVlpl1nHJ7RqyJFOlhNSbQsGqXZh0_0DkR3r8cmX2gUAHl9Co1yKgm_vE8l1IGKNKAt0EKnOmOSBu73hfZLUnMe_2CK75-ETF3BGB8QlL9_ZS8WYCexiKtsLvPzd_aVh7R23xR27kudY6xiMz12fURasjvs7BKypshLV_6AzT51uoSf7k2elfOBJ5VKs2YAJeWzbI-iBCKAFADGcIG5DCLtrpj9SG7JX5BM6zsvE4rbWL6KjCv27kb1ymrAAMTpWr-H3aB8w3dSoLgKaUs8D1SnuN0csLP-dw1bObHGzdS2yz82a8m7VN85T6e6sv994DLey1eg-DO6dvv5aD2vR-dZirlV-ejtHZzkFfiWu5zps5oUcBjuqkCDK-_O7IIVG1gdeSzGrXWEn9EM6zxkLyBnwb6WipJ6y99_D2hxcg5-24fPjgyZxyMh9AaMWg0rLW_B3VcOoSXSmtTDk5bQQOnvoIM5cyEU7Nf2_rWtsn_v1JPhxH1dc5VU-DEkkC0jWBc4jzFwV4ghVOKPM6akU5do8EqEsxZys8ZzAoaaWybBUyrVKPCerGn-BloK2C67NWlbbDESItPFBkmRZA6xOLwupa91d1edZJXEyLkE7cylz7zu3nWAepoAlhM26MDyvbnY_P0vOh9yn7PPalC6FL_ZweTXttP3E5xp1mYKwBo52ssFweLW4BE52z-z3lHD23VNMcz7HAGeetFaGD9y8adi5sUb3QvV5h0Hj5BbQod7wTDZoZWKsrIRmJp_wDNPZxp649VNPfCyUQ_IOu1D5zmPUZgmmwzh1oIKkGNlHEKoCXgoGyemkoCHtjZu_U1tdSEJSK-P0zQc0kZ8aAISjEv9UiCjsE1Q6kzNerBifltk-D19aYGkOgH9NL1iRgNLOMbRvs4x_sIL1cR6fgK31JtKnY-IBG_7o8SRrNgDSvYm1hg22Khoz0kMoUjt_ruzQpPaNrooBkfsipQ7fpQ4xYM4I2sCB88rBYg3c38326Y2yDvCRme4tGiMOayUYrd3eOh1Z-cZU2MX4NHO-YINHWyBEjDtxyenE5oX0wg3NkCVE1mOLTmMX3iD5aPta2U-bx4SEiZRChONKi_ty5f08F1ay1NMrJDtLKoclQMcNojZdMnWtqnpgxlp1uqJstXq9ad6kk9LG-o06FyEFMGS7rqHAOBl3n9xEUdx1ZCD3ye_bChpieljs9nPVuR78rT7uQDU0ozzIzswg15Ek84OZ097HGZgzrAs2NhEoGKJ5gdOz4ePai5d2srK50nB9Co4WcU78zGSPWz5226zyAF9qS3WscKN9XhcqlL_sPZo70iNZLfEWTMKdqjWppxClwRidNuQi63BIx7URsvsmHMLzK9umOQjppoYjZaP506ajxY8jPU7OgSutMnBsMIpqG0rbkF6vzM9ZiXgK1gX21BzVb_Sm_bYu9v-j5V94g-12lEpoD-hcZywPo9iI-ARk0VxYcrhbaaZHQdMCtZx1MNvMW1pyStObuwRDLikLJHS2uzft9YQ6G_8sc6l3yJ5QNlKPFlBlbSc-XiFwv2yfqXSZcNZfnwocUJTp2l3U8UDArpoV304HUZY-t7voKuomhSXOJvZvl83WLcXH0KKkg11hJcSH6f5ui2CZwcqeSU42q-TL4vSb5oEVBTCTFUT0T22bmNyzlAs4vFRkTUUEmJKzDKdgIvQDLvBBTMFvLa_0rv5O-168RJ2My2xfHNmNhlgDTJ_GKXYGD1IAg5EFzickz3pItfViYion1K9RRm6Sf70-Vo4OwyVn2sF3fywsAfEoILc95oXmj3QDfh3oBu-K_sPrpSpNDl6JDGivbJO_JBWZ193oINJN19ZwPbDJd-QDvitzsmlue6P1-kV_v-MGolLvh14CPG4BjQ5fghjlCwmKH-gI7HdrUXMEz62OlHLWg9jQITvBI_lj8s8z5Jm_xXC17cxcvmu7sP8y-KruU5uHdxa8ONMWfGXVY2I1RiyIn3Pt7fn5k6mTTMZROSviJQ6QW8P_cBhtdyo-WifRkquuyNqJrijHOMAif2pMI_IM61MtZL7NaCpWqkYEPfjxj9tQxk2SSwGa4SxLQvhi3SgzV5BqXbsxcBBUeq0QOx_mP5FpJYqmKj43eN2KuPF0Al2cV9lb0HGnXxkyQL_dMTOxKZWL66JyHFhpgx8KjWiSSO2AMR6FBQ28yfIwGB8ADrbSpoBPc6C3ikM7WCjVNh1YUK6i4SL8d5BoQA0judo_-ecppvwOx0Sa9zH7uab8TMB1zh1YckRuO_6EugARIuhpvufQ06Sgx7uLmVb_m-7e5EnmmFWUdKF7rxDctVkwcVjw2gFsa9P_U1CbTFMNhXxxU3BGz72QdSwWUVFQQ0THjp335BONOQvLIVPvc-dPmEgXx1VCfl0RcU3xcDgGPpp9-vrVMnhV3GiLQCpeZFo8n8DmeJGbx3VieceBSk5ZcvjbfA-I37qx10isnmE69VD9_hJOr1XH9G8j4T03ypHazwQP9tL9wq4f5iR6zpjxjrBhBTDLlVzaBMGi37Bzj5a9eVibdWAl6F7fDehLMHYyi_Edf5LrBcAuxnXAb1A73EgrrlWqWXZIU4gAZ9RQrQvm6zRGJsnNLRditrouk9m-dfuaKn6kxpKuaoYowD5e-4nvow4t4Bsmn-NHEajG3NompaHiVIGdj3_O854mSsmC8k1-UWgzGtMXYGv5PEXNYTv2MwyUfzaVnfUxWAAO7yxIjCD0O1Uhi4Y1M5UQI-PxeHn7WR5bs4E0OisGSqnDboFswALVypT4FZDiCykFH6r3HTHfgeXeEgF8xa9VqAjhnRVY7JlE7BYlan8-gdt2_-ra-M2iRks6oeeGn-QwAxADu9MrIUoZnL7mf1V4UeysSQc7DjDx9_Y1--Ov1BGzsZFPcOl1X77KWgA6Wx3RY_y_A3__rpAD9u4TB2ya1fJJEbKY41AMvyTCkzjlwSkqU9DnM5p9Y_4NzaFFGZy93B11QvX7w5yG-o5_wjwBCVg1P-OuZP9gmY1kQDXVMfypUb-Rgeu8ZsJgN-3adDuy-xsSlLZiikvJc3laWDvtLPQWv5UZXWmmVzTJAbsLiEYagScYbl0henXp-H6quflZixpcGl_Y8v5eSaACs4RGXYtVK0j6BkZOgxkfO1QmPLjPWVcB9jcKR7XpIaW-d_YOOW28IK0UtYQUrTC7aRj4gfFbipaGhSqjP8AJyTtvk5DZkNypQF7SxL391l8NIIhbsoc_6wTTkgSx0iWnnKfc4pMASzOroopawyT1IA2pr3Cl8YbQyhDRUR5jLzrpHDFUjs5RZjMyMb0LSk4OMAmMQ8OrRxsFvN4g3tf4meLnsvPTgMcqZNPTEc7_uDpkNKIeY_cDGgHlpnnx33zcyrMrnnYczyzUCNrrhF9TNmY7LWvunsLArcnUbqvMuK_sMimdk8fC5yaELQOZao8pIt0qF0b-4TGvXm8m1D0lbXF7zaFPq84XMfeqiSgoKlmvhv-zj3nTx98Zv5jW21verOp7IXaRySsz4AURbsazEYbsN82DKlX6Y0pTiHGlsCLP073ze1bUlsujY-UCdu7It5WWbei33VH_fkmNcwGI96q--Pzt7p02XKLrGOVKbf_Fd_VkogIpZIZA7QEiVKsTD0yZnpTzjmCGVtw-daj-og24wHWYMCoVQ4G7FmPI5YrQNLy2x7UWb_BTPsPAe78Mbli0roB5xRuWSzVLk__rQVXFt3yP9TjB9wH7n-iteLIbgKU-GLAqKSo2770cDaMIgHdy3a5TS-YCNIQbsWND4LyKam-OhcNybs8m-s1QFGEr3rLO5PfOR7HZQIyrS7Z_Tlzg_ay_YDH7E8MUh6KpWJHW_oQRFv9xuR0eGjLR4VSK_AMmIP7XJ1wHy7bdtpeOT8WVPM3vfgGQWx9H6L0SPV0vB7t8yppDOINIrDCtsbgoHySWaG1Usm-wqwTzHlEAEUFuYRPLXwxHYcLP21BCWc0McaZEoCaLFtymBw4EkEOh2hoQP4AM9BgOD0SceFbSU5wM2kvV8JHFUANTAsnMR-EK0tokSIpTvSah9CZlL-i1Apa9WKz5A4K3AFPNAjbrd-NS1gbtwAYt7xy7m2sq71pYHKpQet5UdMGKVpRPifPLw9TQS5am-I7OEM-Ses9kKBExIJqVIV69C_RPcWY0bDkQ5Yff6wyvH0VSuHxvivMXfuAmN3v7TARpcoxVO488sDnxIkOWVbWDagQyZRnQrqL6nq-Xyv7kgPmS7CLWdM_zZri71Z3ynoUDceYTu1o8sRg5oWPi_XWqUKALH-HHBifqNMnI_PmP5Jl0HWPly916cxvhrqj84RX3XfQDfDjaL9OjWJcgeGJH3xC-5dLVe_dLsmGq3OI4or-8An5qsTiENlF6rMPjfoXlnYpLJZEvIzJhEq7f8so1cIe2iV0WWTxUN-RxBl6AxVLC0lhnOCtPlSZX4lVpyf41wdN0EF2xee0RXsJ0p4_eN5htXMO4VyGQPKdr7vMrjJyB-Bog5BMlE5_233hrd8aeyXw02Tl5_FwZCXao45txW3hvoOE2JdgIB-p6v0JaRqEdX9rTDXxvcanaA98oAd5-6j18hmoJIPOCu_VZubOT4NJWKszdj-E4S-IlVtTVoy2zZWiI4W3o73hidoPzYvncLTeiPsEd6L5JWEaEUbdkSS0WuC0HdZ88PBVddUsAgPjlIPlQ1MrOh9CtTy9ubzWa0dwTTaBZfNovKBAhSqieFFtWKMmfRmhuglJ4mdqkENyziiyXB9X_dfVYJEu1msuRrPrC331Io-gyUfKQRD4_QgTHbk-EY1VixqcKpK20sOp0H4lrxKfqCQEbPtmVTTtM4kmt1JZffJ4iBPYH8_UDCRizFgkqJOxFvcmlB4IzwaOOsbTVamel5XsX3UNC4mBrK5EBQyHz2bbGFgW_UJQkznTFXxlnWNY1FzNAoTngda_h4KWMh_1AUfegomfSnlu1PAv7cJf-v752hrPiu9_rtshZ4_uEPOdnjT0Bak82Lqc53zhpS89Lg7e_wl17_5I4RLGSQOZ6k3TsjwwU15t2QgMB_5IkZJcSMoq99xYmpnmjFfGcFhqhPKouXsWvdd5P85ypQmPUxSSwNCc0qQWfJA6_t5H8lMnwfnUaXBofJ7IljuNp5XUq5aVy4IyyEoRBCgrL50PsfZnjOqZ2c7L8KNeMUskeVSndgEl8kdchuzTMdB5JHTmJ9IVpnSPItTSK2eDzfPafB7g6TKkB15RzZPCMzdLqMk-uS_fXwRpYxAE2nmgxPROOS9soPN58pznM1wRl_-f4jfMG2LGqjGMEvVCPUDSvHC3Em7Y2uG3ZB7wIXmGVYakpF9EMCBpiYMBMhRdwVSb4kwx-NtT75cCWYnYBrs9gvw269sjybqtjyUuQ6y8fIFVp2K5NZFXn_evGtLQA0zX0OU6G9uWehSSAuvcsZL6Rv2FpZ8ofFQGHcw_oF0oA4bfhvDhO6lLpo2V2GhGmUaDqLu1uZrzrAoVw9eJcvAh-GFVzteLGb-8ejERNr4t7Zj0kbtR_429j527pvopE_V6aM42tl2fW36F0U_v4yn-wm3S6-8wd_mHuNwEoZ8WuENH-Z8OP1jF_aEyrCfgvLnoVXC73jrWQHJF4VrWqFF85VzGQqW66zQ1Fg-0vpy4jVra7FNzEKDR3KY6xotez2n-g49PTYqx_74LyP7nNmqunXMqZGEbvdRMLYa6JQXN8Vnzgluk46NkW0mOEUtlp95LX6zFPGE1ipSTtIgBYmH2f3l-jy6intVaUudu1_JJNa9RNIXZf5jKTHfwffmX_9-6BbBCZn-anP4Q09NTLfgKk4ccn7Jy8kR84h0RvwKwRLThndC36ovhzjEY6d1p8gnBc1Ep3zQX4od9EQSqBa_cF9xMvcoZLioEhpvMjYc5I-rq9CzUh9yZAAtfDCb4QABc7EaE-oUX0Tcx13xa73mGnDN9IT-DHelAiVF12rVy68Gnkiy1s8_iQ62IkKefEHZ2JL4MndlOfFqodTF1KpGYwR_OwDqUCcvq5qemidK39k_A-Ed7wlv4E4pa-fR7-zQe-AE8Y-n3n2n9R_Wxnx8jJb5j19sFcDt3_i4QUBY9_NS34szjPQIWNQguz-QCfK9tRLoB8OPJI5TS3R6z1PHBPwJS0tFponlMco84AvWNdXyCT79ND5W9CkMg7QmswQGO0jxQ8gaoFCeBIPVF1bkMNEzbQONnQwRgVwFftxQKRPvUhy_qo091IgMar0a0R2y6J9eZ9e_OMEG3TYbhCsO0n9XbWtOtz5d1F1OGZp6AOy4zacVlGDd2aQWJGEUy1iPyan31u8QsxivYJ1iGK-6ZzF7l1-7eDCCrEUTXPc_YCnKwxn2TrhBlCA5eUQFynpPE-3inllijOCN_bXG0MRifUF4s6Pgd-NXE-hivKBNri0gd1XcM2nxeJAQ75dRQ229hvl1jyEd-eYFpDdXmL3d-vS1kfdzF6AtaX0HP8pm2dA5CgBqoyPkBe8JMt1r6GLQJqdZMAf4t-T_LOyRlcd9sEA1TmS-5t0gp02ShDUgDMSAFYbadppU1MNOlweVPZw8yNgqENa0vdJ14euWqfogUkUxp9Wv2fllSc2Dlq95UVrSg3MYALrNNZqGwBoeLWeqvIVXEM9Y91l4hrmak1_M6ODevFEhiqrbGq3WUEz9kzt-krXbnJsBTCrEBReO0rHlN_r40QUIVxEgmRfj5TzdhSgEocDhM4cjtlVx92NS5ZtfsrohpdoqTvkhmpDSHH7294zaK9n7oSfKx15831ipubAxbEJA1K5f_cC2OHdmALLJI6OKE6dsxa-gKEUlLCIo2AXgjeXPC-Q9nSWtD4YxuGgZ6J8Uf3ZjgOwI6RVAkv-FydvbtuFttOnQl-yj6YLIWlu1Ma4V4OIISfVhpQu19xUqOBd73VF6ON0tBkneEjBaWcRc6xld_EE04JVQokuRADa8Kl8Jr67TIWCWf2BdNBbV2Ww6AVSCE0J_nHn4gg_K9c2SYd_jyvmO7QtSszckZuTl7jqxI7qmeTo-9pLLSj0ur2K8nw6AIBJxKKXC1uW2V-APxoVPEn1C2njWOPiUjB0eAOKu9e2Hd5D5GSMy5vrlh-ikNAY6XcrlACWrkGHVoGNb5AufPWlLbB1r6aLNT7Oe0LtXeenGjkUg0DXFE2Sm2A45NEBHhkaOV-tDw3w5qiDnjG8Q5SZ5r9W6fo4NaSCwPpdnFZjiViDaApKopGdFtw38mXEY_UHnIJC3niFNTLB8jkiR6vWTVfwxrJx_H_Rn-pqXe2D7dtsWf2xp_-NxgSAyOgdWgA-LdC9owf4mcV0jJwCM4TmOYI-TILLeh-OCS57UNqS4FJKBaGyTuIXRKsL8Eu9gMD8-0Q-frDDav3ra_iElTHlXDhdbiTGdFchnJpYS0qu3Tnn_NhzQoa0lbz8s_mCJ_ztpI2FBoTgbYoirdFhFNw1ocl_DVdAXl0IXRn_Z3ZQ3TEcp0e6oXFkK9pUVBsS1_CnfFB_QVGz7oym6OzZUYiLRCBN0MMoLfLzD_A_TDM-cDnRr2B8m-SM-YkLPvfQcu_ONfxwOwWd1nOa58UF9-T2PSy0wu_ECuVXN9UjRj-jzfNzs8PCpFgfA9DYi6m16oJ9_d03znXkJvRbeoD6uwsU2ZN-PcG_BI5vpiJPLFpfnU6DcoA3_NIPu6GxPZwBwrbAnBHAmks8Vii-UDJ_VCLg79gMWSDwBmbYdV1RTi75tFwxTuNzOl2MRR6n7n3gxQo48J3Kx9BN2j9JbfPMVdds0DGgS0-NAZL41-aPeD97V502EHedpw5-qolqhwMaJF5y2AMGqVivri80JEi4GsZcDnSDe18INamGyZ-q0mDWvZBPSz3Kj5lml5AHMCs4IV5Qr-pbvAg5O1pYYIQy8QUzCLdLUvUbYN2nnR06PucKpQZDuHeIktXQWPCRxrtSDyL8CcNt8H8zGAtDsiUroZJriad986X40_51ASyDG1Csq4qMlMn7IMzWULqtb8LN1oEAvHw5wKF6TXXE7b7vrm-ySmGB-zy1CxBH1uOJErEeW983A9roh4qEKc9xq0jAGyAk0j5Q5uC9D93TJ-TVUwg1PSwadoiDEb2IEj8EAwWQsLoTnOw0i6lw5acrxuEEFAewkJ4rEJfPDBQkCU0YYPQLmZcscuzhT1B6_EwWkEMX_NIrq90v69aDWIghHslKLN29szizglvK7QGoqn8uH4YUoPh42FWCj-_k745OnFjJG9Je59OJljklu8EnG5TL5GIsf9jHC46wOvrEmkFZZBdyrGVFCKkVb3ARNnKWvf6ryuvIqG3fskULpOWlJGvx6Rkq1UiT2ObXI0dAfyTwJWwy24I7mnAH4BRwX6kttgPlHMtzG5gdB25YWIlGEDuYX04q0r1KNbhvBYXO85SlSSGskGVLW-MIQnKJ9pAPEGM8Qb9bxam1HXsDemqKdaAN1ZtioGmo5XRrPvKQKNEzsjrmIZXL6sDdSZUi463_-eNnqUpDwkzIVZJnM9u_033HLg2ml71NhEM6lfLXbwbyMnJB-f07hI6VoE47pDFBPLHfwuUI4eBxSeIZ_t2LrhM6jv2lL8b85FUouFWzaTWjne9qmBb8kffzeFwj6t2ZMLwmWRyDED6FE0pMJFP7GItGV58QngVnU3EXJ6SyhKCaiViajjjleQB41uIiIZfTdRItFD2cmokjpuLaMrv85WIoygY4lbweS8IZWAir5Xl5FpKz9RtwsdbRm8Y0-TuyJw21vuFVWlDXkuvUPXk8ac_bHRo5qzdhOKxQwmF7d_GFTLk0pzh0HdFrCn4RsdlNZvcnGp8slUP6WlPc_aCHfRSwYekE69ELI0Q2K7WTxYHIaPplhbJEPh1o_5kYFmsVx2XtOsrgASxWzroyBrclu0hjzHLzNlUZRN8CinRJa5HcndiUKzHmISagJwxNbnv-loqVATObMu4q0wT04RQyGQLYhlQHUtea1RYIw-6tmE5ctC9przZpWKTvBCW14Tb9NL7saPFhvCPU9PXdv2EspD4z1S4d4fKuiCiRLyFnCepneT133azsAHbefrvvBy_33DMgOHi6IP0EelifGyC5fCB3nX2RVm8dARLOK7x_DQFHqPReus2ZhpyjO_NXlyoaT3m1pXDkWfik3QthIIraXG9HieyCK7JZzkBhnpSzyjitTvtWs5FfqWBHXInC1aXfH7jfULhpkP94kEHSFY6OBKKH5IhIPUCY3-ojMlDFNQf2zfklP9_i0tTyXOyZR8239W2SBi2fOdNVqJbO-cMSOiiq8--CWNrmtERcZ_KAWZA9NHpVeBFn2R1IfOYAQfY88uenbY2fF091GFfjq__JDLE6KuB1XIoxEMz7fuzk8SxcRzarq7BTRt3ed0ub_tmOP1uKyLJiuAVYx4-pkceAtm_Vdyh2GfkX-ASIFcfIhFKjRObLZ6oZebdBSw7tBf5tRnlC8omfFR8t5at9GP-uy9v0DneNU922GqJw_zkjJx5SM1EVkoRYbgn5_Di6z6NMgG1Y_HkGVLD2qgMWCth6h9lhWqocCsSQaglEnrXqE9CsrEmPy3r2vI7s5usaW376vxRfwjyc0aOWTY1-WkJzHFqLuBBjU0-GDtVWkHUrUAodZAmm8X0I34LUb8TfVCRjwpt4MFORN9j1ad8JA1H8mJlKIK_iWR_KIu3JIQHwy0zMURAYkiS8-4W2iycVR52dkjo-KpczeychRo9_v8uhMcmHgL-1qITh-n6ZLo6KPXJ9-xTuyzGjb1QmStr-720l4GUJvJfTyshX-VIiXkxH7tTZNYoVOBxg_VFxohR8U6Cnmdplmc0x_LgZFnYScC-EgqZU6KqrQs9XKQDEOWM-NzB2Qyix1zhPgxXFdWORRZVU37EpWAbyFkD7g-LuwVNYha-uLmM75hmjshiNnjUElQkDPeUcFh-WIs0UcblAtUZw9MQ07yhzDIOkG64GgoZ8iwuUiEQJ7EfwAMoYO9S15IDAselldjbP0ElnwfjxfHouTZQMGaotzbwLMNOIiRBHz4ICKW8R1xbG1Il6DioJt92qh42ae7121T-HsaE3LOLmOclhn5Al8RsfpBh4igXZbQ_3uZUXG-FakZ0wj0eK_lOD2-_yFXnATPuMxfc7CX13V3RJBp2hfK8B_4lyD6x_G_H2uyCtCAv0Ptvcr34VfV4VraT2ompRftiJPAex_u9qM9c9K5auDQ4N-oeejBMqnvV21fd4AUk6DoGeLdoo5HcbWxyNtpwjZmLNAXp7wkLAE-c3gyyOxJ3ChEYgxClbvtSLsCFRE1FfQRJLKqo3VJ9XmjYLkFoRGST_6-bmIqYhmjlsblY2-Gb8icM-vypD8B1ypj0HGqvmTFQ9jSfyIRTw2S-vib-uEIwf0o492FNCXEK4InOl2Ct27mdWdOieieXRvu_2U9a4UnesSEt_ZZF5VJ6TXrnTYqP6StiWWjNc687vyW74Kn36D6ey8siqKCQvQnQjjvHy79Px2BWZ3ssQe3MRO11kbuJXi9WQUmkZjaHNYUY6AWLef3gWDooUMceu4owHmblQEQbd4bgqTQVyGGJF2NiIMyV9xakQZpRY1d4EqmTROodv7IhDzXJiVLD_GVoFdiUv2308vAdDm2Q2mRn8Z-usd1K8s-8TheOVnPA8lVdoySJquyl-8kAYBf_4-pyQ1BN4aUSOTeJRQdAavnbg01vWPgPmmGY-YShvZxAf45dru1twJJ2wXwJWm9teioiSMowwSsRGNKilRTizTKdrUMJ5CRwsYH1EreuadRI7GOkYEg05zokdNjRT2SCoavghdbYFsrLwP_IYjEBIT5lW2XEifcpbi51vKhyaPBgQEhA9N2Ve4cryl0AqKg4K9EovTTXT8XvVPWQPBMzR5Yt-JCGd6SFFochVkSMo1gkc-2L3oZx6yscd_PyKWG3w1u19ZdVCZ5vopG-x1_gfjM6w9eEZLbhbxdd-MxHHNmxwcZcFes5-a5Eu24DHIoyxb3o1UO8959nsAxiAEVBIXz9J4uMBBO8tvbNmUJRpiXkFtzistkJGoz86hBF5c13eoQ1Rz96O5nZHc83p0z-L-J8wTedefgc_Oh6zKBSC7YmpUZsHLs29KeIj2qoURpvDEVdcWFQoojv1HpJCiNPMROW_RBao-eWql8I7oBD_DG7mGHl_6OIWrV6vyNP37QkhoefYl7gTsX5T1vkIIYGdgCN23QCYlL8rMfV8HVDjplI303J2ddjQTZZJhisQ9Ie5hXLfZ6sMOER57bZElV1rizhXTbKPCVHgFMJnQet5Y82AmOVa4hTzkoyxQUoXKzdZy5GA1QwGt1zlcwiKiezioOBgGo8x3JlaVurrXR4QPzKF5rYEz0yN2SJ1P9hLSXk7ZvB7HrhBbor8byoeDb9LGGHLbg6kL24k-pqNo6pyQgHie0FE-jjVbst1KHgREIsNrE3qb8ihFmguAPuskk9EaoVrZ6yltyKo1DuLsKbufs9EfchF7GF7IQfiW_DgdB_L5I6WBtHD_ETZVl6ffGDmiz2zMVhW2r5glFHk7Qq_Pn9ZdE4AgoxbAy7ENfzFHlgadFp06dMC88xTC0gh173CvHzXBySRCUFN9fgt_QvVCWJIjY2puMc3s3GNEihmnEp04Cs94aETx4Gs990w4-u0QMBh_Ib4m8GOhG9DzeVoqwgGmTEFuu1DVETBjCnhqTgyEV-3jkP5vJW63OIoU70oYn5VetZ3RPf_IKld8uqkw3aVYaz1rSxEI93M002rlZuQYl9zKjY_2sucQCUKPMgZejozCsXf3QXoMb5Vrrwb-R42xyScTpt9fYRPBcyT--9J7qILKtP4MLEmsB0epJNvmF1ukfK61TpT7z_ufdiizHUVHpwM46rnkyscJNy5RcZ5_Jw1aDdEUNV9ZGUnFGxbOAcOssoT75swBzZHPC627P94U5_OyQ5MNFx8HWUhhfCNe5vrPk25c_5ZyvTp17cQO4YNVp-puIXRXfYDMWpsjM14K4DBZBNDukr6BHy9y2GShiS4pNBgaF6my04Zh6FXRzVfbsOYTrWhFVdxIXy-1nC2ghdpoEA5oUi5u3dHatIIm5-Qh1yCOW1XHV0dxkCb50bc9gnlQPB019sGfuG9SHApdaeI7joxpmjRwLtvenmQdlb7eWopKK8cXJxGIREK2uaMbstkZAMnZzunFIAmlEX4NchVmzMOaCQExuqw3au9brGRkQF1ooxG9SfOidy2eDZ25-jtThanrraLdQoPDC5lrxvTLHGS--tWsCNfROQMyq7x8T3P5GqMaAjbHo0IJqw5O1Tcmhz2B4cPH3mtumS6PXLjJfo6seo3o0v1VBJoKZvjpQoA1n5qgMgYYFGCmU15rBbj2VOUJFVp6AlF43H3Kobwav4tNiHZ2k6836wgH0A0elG5Aeg1src3hW6rQsPXD1HziSj66A0elNlM9MejAQxTczY7MQIVjj-fs8KVdEHaIu3nxNCLQeJVDq12qjF4M7BnOOzIAmmVi3Ia78hLxt2pldEtBGO7tEEYWpJZHl2orQM4WUGGwPjDWSUzAQ-Cxoi_s37aWkWIqrWmr2abaWy2eqKiWnWlvgei_q05KXCxiy2Ok5mVczJ26vTMhTsrpQVacbplyv3lLctEF1Kd8F_OZFqgE5AxxWMHBguyfZnID3iTDSkYxLmSQlZ_3Q9sHUAjqjMcjelgIu27xf_f2VrLEoalHNYdNixI-CIIYaut7LUASsfyg-H3_lOuq-su4vXBeCgGqmZxqmQ0KATcZ2AXxXmrU2IsIpEcBs1GFdQzkYNWwa4InwCrD2bQxM9Ur3epN8WiBiYwyQ0nDftFpvJv7gfjILGOGgYnQJsJ-sUEuYVh5fwVja5QOK1xEqMhn9uqBX20QX-AYHKh1C6mRlSTRrCNYheRFieKqOmvm9uRi0akT9F9HHmnleeLoVdRoITkGi-ZG2U8eRIOEbMTE7AvMV1J0NKzny2MkdvXuPdnReznhrOnbxXZVAnPCBWc89dPkPaEsqQLj0B2z4mtKhYxuGsBHU1JFwA9S5Nffj_owAMFtFiHbrAvhYEpJDrMvrO6lxRN2atONWvr6Gn_YLaK7CKBacf3JO6RxdSVj3fjzJfKabrq9VFYbioHRjzr_VRM6Bk_LsAwpgmWVeOeHu2S0tYIx02ryO5xjHe0BX2Om__1gs9BoN3Xq2CITEBZOaxI-F2_3dktIpyJ8wsWkwAtRusF7G7rhGvLxwPVnpsAFSKZ2x6o3vUwA_C9hGaCJM3P4duOcdLSgvU1AeAwWMPmeAK7p-hePxPtSpUE5Zvr6RPvIxfHeDHcXxsxX6I-T7MpAqu4N3GZUlGh8ULhCUER3682Qh6BM5UIXOxIi3KsiRyZzpdxBv4ukl0EVBzGACx3_fyEMfjJz37CHgLo_k9tqUJ7_YJOg8llUmO7B6t4gYPVdDBe7IZPKXhpWIc6xqB26e41SRqQIk-bjl3rLhftVbYWjbdkIdkoLvohYBKnNRxAYleMix8TVh4fmVTf-yvIEsnLCNqHfU6uDkDHC90bK-2451s4H_JVs76Gk-E_coObokcQQ-H4Du3Ps4-YDnhxWlt4cA6sZe1xUxEVFaBCuXf9EqfvMXZONNsd9zPOb6R165Zz3-237zo2hAhJLwaIzDkxeTp8q0-dX0Nh3SXRFkOnJ56WAA3xiK0PvKNsirmCAtpuLIeZpeHBUe7DQtMrn91HhtFLHGYxObrOd4iXBEO36dk7vATaDq5p6fS6JO1GYA2xWs8f6oHITp-9_0iYL1Ds6tozoE_ELH-Z9fB3zr6qtBDrtKzdX5IuLrIiAnY0fUuAwgaysgDjszZtaGH-hl2sQtoul2B4fKuTLrtwEP8b0E_stnQ5W-VLO-hlld3IXUBEyfrrbWMt1QI0lgJFR91Q1pJLfCfPKU2BUWzVH7aeeE4m7-BPeOrj_jrVuk0doR1fS7GUATLXFkeuxQieBWVT5jXlX4wEjwnqf7qDCvTTVaKi6xgF41umwzGiWlHkIC61HhhvHCzXegi7-w70aYhQDIJYuXgiJtx5Ruup0gQ1zUczoweYT33mB8NK2HjNXNXvKbHn_HdzFtWNkFuho3VOhtwdOgmDrgHQOYcdIVYoLbXsXB6clP718ZBHfeUVDw8gdjastFbt3EQH8p70IKcFm-v-V11UWa81tkt1TOTxjgfCO-2abAZbfrWLe4cEs9xEQg9juPsUlDVB6J4IZJTC92ZVeeVk96rQYcLaDUGuOXaNzPPGzfrvvjWZaBAL8YpS5AiJTla62hRx3wdqIV0zRrVf7xwIUWijrcRgmp1zRuPnn4O3cnCyzS4L5SRpSCxeoFzHIVFhNFDY4xt7iHMmYjZV7LgCgHBHi6Qn9BzKAz5F6Yyk0W4g6lcZlS50-1FQCL6xnDN1GFf-YW7XyPl_mtlJ5z8QaiY4VlCBL1Ddpe0Gci7jm1hEFp9d1Dh0qK1W_I-JjBXFns9I5OwSS9GZOZLdD7bPkYgLjqytzmSqJ2VHC6Hx-QjmmVdiFg1sz-UIwO2dSHtvRZLzPQ50nsslVxw3h1URcGbz7DgSV5V2nIq1_2XpqxDLia23dNCqH9n6EY_hi8YdauvNEDBWC0aJ6Pi3azut0GWEkKbqY6hKbey9HVno6LJsVEyM9K3U2ufHD2sC6C0NgaQHcaTl47EaOugRhO23w-tNo4TT5rvM0S_yQpDQ_9GPox3II6BRdQIAwNsnJ1GLOq77EESDNQ68mIf9PwPQPaqj-C5JWqI0cZWBpmS1s79_b67rGw6S-elYwpbWq5A7ozh2wm5X0yR7-vzAKmF4cZmW-g-BxaaKP9fWvCsu4ArR5_EBVANsISIObk4uKD_koPJxxNEXJIefOBIxxQ2GRiA12EpPqkAkO1Tx-44H5pGwnk8W5jcRSte9l8IxfaxCC7vhcWcKOQR1T1a5lluRg7pOyOhbqcuNEZt4lAH08kuUQFuHk---tDZVaInYcwzYmDFUW1JlvicKq6QOzX7DYjzwLRWfWs8fk8p0IfSN39lmAOm_8LDgkAagrnBbfPHrVcr0mVNivrP-rbQwsAyEmCg9DoTmjbuZiXF4FwKubC8tyzIcX1sROYzbO18ddXFVM7WnuBAydwZ95raT-qipWRaZB61CmjFiepI1T0XWd1UwAD9DM0cvnASeTHJm9I1emfyW0woi7x9saM5yhI867isdG8gmAJYJ3tTScrH-WeuJxB3BYYbHslgL1mXI6tKMPG26k6CphFJQ7uIEx2e9ZiZ36gpevgJgtbVVLqhjy_a1OP4yLqQLLOrkVxSjxt--eth0AbojdRtDzbUxN5Rx0TCj5cLwKaueDztDhhMLsX5JQck4to414dHQWz3So1mu2fyfTyBeE2x_9GxsXP77SDXl_IVpZX-qYPchLownTYIEVvM7dfLgX9s7pdjLCCfYLJ8J11aNwMMBU-zyeZ6hGH6eJzPHunwxtHKGkQlz3gsnJ99wJhRSZ7GJk7dX9HTf0GVlxTsy7bt_5c47A1s7IlCKXBy-jhJjgdmgtgY8RHl_GC4vFlwUGwewNaCL4FX5EPfe9BaaxkT4ONullggR9u_tvyXov43xs4cpQdMW_sfGg1K5VNFWqprZn3TokIeI6S3d9Y2YBmgHrOBzLTkJJX2VjQ-RMkBMw1ZYnDrtcMY2VJ2C51thfaH6hdIj-NwWdxPEaWd2jgsL95ynhZzXmlq0jFF38Yd0gaMc5oHO7LSwUrmJT5M1JtrLy4kIrHqvVtCQ9Yq4-xPrKIP8P3OjhbyQ4ZW0uw2mHcegBfnTsFzUDWYqGAif08rKxSGOHFe8TteWy5ex70tqYSHaYK-uN471GpY_Mv5uWvS6_8ll7Q-KieREyyVrxkNzy06f9-etQnS9s5qReTXMwaqfz3OHx2h07hMFEAQhH3q90r9oQuwCf4-g06wGpQgNAjPJqAESAv21yY-Gzj8-Pf772tkX2WwpZ9BK5F312iaSEvinOMG_fc6_OPTWXVkQbYGC0LAMTz7Q3edo5MFvHog9xDcu2AzGnCfqAdRM0McvQq3jFD1uODS8yy-lVuMySujbelmMeUVJG0hsxkoQhXKQjOXQLo_WNB3YOtUbAnJmtaudPx6KOnr7OFeyMKuw0y9EowsP51DWoEEjzTnE6cL5ooSJ2RFUgKc5H69BQMTMbapXNPqFSAUd7ajeudFvxDcDuAVLDkqppMALHLEFJyUVT1s8hMbsFK5jBC258TEB5XBl7jV5WmhvtGFDbhVsu0kNJY8y56aSBmvzMVLif3IOtFDWEB5DZks_fUan_ma1_dwhcyHWFZvyZiYpjogQor4PiH4VDizet_huZgXZwplOw242hUUAgKiRufL524xWeAWhevxVEGTIZhyDZlND4NgXGBKEP5NVh3hROKVkAxvChOtH4bHPTh7ksK7jkKXHdzBIy0mvydh6d9lUaHH_u5iNlXC07sqGE9wnuZqe0AKlYgwDUfHtjegUpP1kZ_cBd-_lMBhnlAQQ2Cx0ND5QmdfSo7669OfF_Bg667BYGb61_o1LPWCrieCrgqoikG5sfV9kG6iFgioUpxUFK2yuoWpOtu2zx2t2sZorf60evbfghlTIJ7hXVYHplf00mwqSRfgQaDcvtQTMyu8FuzwhnyuRHiRsuQLsqa6eaXQhFufekf4zY2W2uQsQx7kdhAxoSXEWmGxupoUR4MxkjKEVOOdNZxuJ7SND-Tji_SwWnAKl4kqIaA7uhK46Sv9nLVmvKr-zAPrdM51ZHuW9h7iahOg9C7VGDCcKvw2rrStKPNeg4v3V9ZmP5KR9W03uSyl2WoG-KPk7AnSv-J1gTmL9a8CNO-ommtoKYb8X5gqvtQ-mBDBYoIeJGgIrwVyTQUVKdh2ulLYHz8xq3JUCa9Julril7NKXKr9Te1Z6b3TYkyyTEzuno32GtRcVwB1o-nxk1CxbnBaRdQAb1xbQStAEM9ilDCcV6XvlcaSqzHv8DeKzPER49AaR2jJ373SfYpIFgkUmbhAlznbtDX5NJ65TRi2hx8TsmA-5MV1BzQPLRBhIJp5JWqu46edXUrzqBJA3af3T_-ChPDPhrCAj4R5R4vEpehUcZvSAW3MArUphDMu5NsC-XE0SfwMSabl-Ln91xiFEsYpqQUxZolDtK7IhD82SCdtn9Y1qXTuby0qAfgpfjxc2sLQeOgdAhsyx-PsX4LA5-ZJL_IJ_lmCZ4HSF_571ECtivyM6uC5ZoaYhhLjaGGxLKIHvzHzcsJvSTrx2VTjIrqB-j0ExUMFO8ttrZHS5SmlhFqVh2hzexxdCl1ML7sTOxX0ChmX--zubvT-AGQvlsPw4RyNOY8CUyyfObCZKs_XH52k2w_JQHCryk75V9Arxfm7fnoVhq7EpzUj1V2UQEa4h_cruX3huZx72nIelR73ngf0l209hN2jggAQPbotLXUi-gX8vyXQG3RB_5qYTHQTGpIpDZfWSKl6-lWzKYUnoL2oxOhiyuT_covnKGCYndVf5gunfDoIN_FWuP9OEiyr0C2q4bOsr3qwl4J9It2EwR1yzA-epNQBAOSzbQzlCC8GSk03P-0lfCBzV2R2llGMFbIKtHjv67JEaFQrpUTpZmO9FnDD8BNpaKScz9aI1H4yd1ay_kYExp0Smj9twKIQtkPzqjJYJQ75ADXYglGzdziz1qWgd3WX_TOvgSbWkP9Jo-NbfttmN7evKJ4U5purhXRHMzyqi6-JknFMBgIvVnLpUlqk_zSqjaMWcSiSNUKGH8z9MrSgAnDGxk8dQBb1tasuWDb-aCxfNQxKwQ-FmAX8dDor0MdR0j2DkO1csLwiinjrPFUrKqMl9PL7JmkVk0pTCQU-3T2do61zWEX3lU45RjRPyggoZmQ1HbsEKbnroyUIytTpLZZjCR43hFNLkqQik_99nv4L3jBjbAmV_Rtb8x3O-YTE_tXrJosbMYkLJhgzsaqURFR79fLrJJ9JrppLQ4ZO8H0-quH1lfFWYmqkCRDWL6VDu2ZEthDENPRHf4hOyOP9tW9BLxMsOqqqwdBP8aPXBHbmLA-6no6IT2mw23niAnBh4N2mshuHxBrpJ7txkgHB-hW9Np_7-NrcS5IwM2FDDrTAJSPpvtyJ_VRseCLZEf1eFWC8q7hqBdKswEm9wDybpaYaGskR1Gb3GBrzkCxCz3zqf4zpabUjoGt3JXta8R9OpFu0bFUWrCVi_62nkcyZJlLmsm-6fPbLFaY3wUEILhq9hNbT3v4BxDdHQNOhJvjEDUC1wEZ_uEb6lWuJ-yjJAs8IzDogs_JQwqnEXm14kzYHOYI8nI5fG6XZGMoFpwo4SAGFDXc9wQXIuBrLGX5xYvFLtIDAWD0lGtqcd2VkrdVIoYMJXWLc5pNtNUz6CY3IRxZ-F0euINenajuOt4ksbruOlaPOlTQNRvVjbk_GTZ3TRVWn5EXDLsaSwvqgOU24YApUy3dbCXOsk7uIRBlSkYFmCOJNHydgMOMwlcI-WXukLWKtk5gV97q-c9zz1BXwWN-MfU44gEwjidMQGkeHhofqMNhvepsVR3NPMifsqqceKsr1Bhp6WthoX1FNXthQjJ9SdTlyqtV-s6lfy3rE0MUUMvFSm7aLB9HVIhUjp3DmKYlg_fsp1WWiHGHaD3LNavnKmKCyTSxjiQWw-_gYaTKznuNa47PQjdz2qvFIZ78VR8HSm0Cn6hhgt4ZsffCClDAPlx_YZL-MVKOABeoBBUrIF4gdQ0oKjBk52rBF_8FfcHb0mDWcLOCM9pNFFl05-Hq5r9kOqZBoS1xn8QPE7bcNB48GcGkwsolR4q4Lt1xVoEOA_woSHLQlvZn5v1nlrhYw60ByU4HPpMj6D9qUg1qLMkey2sf9xtSZEMHKgiItF-nVGAthaeaY7deZpWxFI7RkWsoriRAAiO0vvfLxK8Zn2mOYagaZE8_7U0c_8GRbl3K4rr6mGICO4k5JwNqYsOCOxJP--wYe1K_QwkYM_DZ_lv2-NNK3dXcVVoRB9BaJf4p6QLQhyQ6F8ph_ty2MVAPTOnl0xwi2ayZ_lLTmuUUuUyv9adSEbDEkr7umpU_2l5S3tspEU-CSNziqQwnTSK2K0BQ6Ole1wzErHU1ZECGQ8eZgqMuiv3zEchv3Er1YBjaguLGRZZF1Z9IvspaDys204hRbgE0of1K6yfsPz2iXxS-eoPlnjwrHcW3KsqzjJ2N7JFvahsbOEoRi_XPeh_IPx81QblFqT9qjjhMctiXOjP5vp2UQ8xzljonehnRJRPaftXLFb0SJ6PrD25F9unv1-ESSE6orvsL8aNHz6YtSjlrGKMBk3GqeQTbg4GSoaNOUmojUCZ4PM-xCxj0Gk8PWon56woJ1Bcj5FEr00uCKNn_Bu4oU2iRFuFvfgc0eFmTuH1wkFKL3-NPTyZoKmQSxeGOA_BMv0fe6LHrJl0g1zbxMhiDfCYDhJnyNYLvV-gCL96ZnWTwoHFlRdgf7VEPJ7Fiea4rMeUojWMcLphPa77VuY5Okb-nDybJhjzCVA7keyW4CsCc6-SaaPodXoBgn7eMraMM-8uZDS_C-1tF1zQgmqUh-u1qnHQUnXS3RkZJETE965_JL9FxTbrNxe4VCOK7xZ5fMtH9_oioAUYHLn-yQ91s2-HWAj5TlL5NrhrDG_5cXwqnxX4Hj68AJnPNP4DadXhMS6T9qJxVwZLaivA7A9rkG8LsBKGOnkLpLSFCn5mVQiRx4IEzR_D8X6rSNUb_ISFhGCbveXQBaA3e3TET5IUvgHekDUSKeB7lINlfVKWLdMuQeCSprebGS7DtF3nlOvP3N-_LToD4oG5ywGZq3O5Z9-XGC4eFdVtC-Vivydlk46M6K8EuevfCOzU-MN2jRh-1-szY_iCbfZ9gi1i3YGGdan2W8DuAJgesrwTqsErQ9soEMk5ymlivw7JcPR95IfCY_dg-cbyhzXdpfnwzRgM4PRLgWWWCGwyivtprDME0uVzcN9zFpUnwTwXPfNitojJx9uiXtMVLEwP_ucjnPNiNW_tuU0wXHPQpEpBQvRhly0iBx5o3ken2O-TIEKFRvGlxcWt5ngYEM44TXUm5LKtIsG4z34ZSb_ibkcZfeSOK8qQT-cDMEEPC1uhGfBAl4f2fBmde0vfAXA4ael8S9K0z7DOaWahDQs8p9RCDmc94JRoWcAidSMgK_sGfgxAs-Z3Eh6CJS5iMOfduGAjr9yRr0yDnDFgF21TXDFEtdZko9W2pPVV4acHOA-hUuAFPzzyuH-8b3ldJX2JhYG6b7u1YjSpLbbjZwRCmE8QyH1mgPz0fekyvgqZ1qHXyrzDkTOh8kEmIexs7hETsIBE0Ce5VNZvFpxCk6VlrgPhQK0crUp45zWShMKJOhausY27toJgcIS4y142eDDdSvAxFxIWVCnQXHtjc873dpImh4mbdoThJlbVzyQtVlkjkPteYPCAhI1c0DA-Sg-OSed-cOljEGk5xfANLBea9XF6djB2Idy6JIbe2ylbIorXVASSL1xY62CW-pbje_z-e7qnEGZMK-AWfqaWtZLh8ChIPNoOQCHEHMzAUECkgSRG7NKtM10EzGAQG3sARu2eWuOo7VWClh6UCPzhRBdxqN3fJlnVXPL_BKw5fe0uPDr3PnZAcICrf-tdlQqORhTMlBZxrIjaVyJbCdhQ9o39pOXq3wxI_Th9dE53GYF_IYQ0ov9MnJHIxWCoWk6GYnbgItX9wUGpsgHpD8NErGfeXbOHLdj1CAAJlnzcqUchUlKz-RShGfYUkiVkLqfclNUSfhczepg4vnh5t7DDur53EUloJVObRVcHlpfhWQ6UljsOm7Jo5ciuBq1BEMH1f-umhrlcSxV4hlMqaFtxi5keR3PvHJNyqjJDSgEtSzUQRmt0GqFMefG5Ww4tLAok4AwNB8FXFYwpvs7kkPrgU0-G3fHyF5UNSXYlrNXOyutA0Myn1tPJGlFdCUKKgx7GWV4kMNLOHve6bWMOHliegAWuAG7s-3IdR5Tkmx-eat_YHMCncFihtQDJJVK6fBy4jgARBt9Kc72zyu2TxdSCa1EwLzTzcIJcSY_x2KoHI25n6YEr_TYeVBLNeEj34abpcqcKUKgXuVPIrKBnbI4tbn-QOKDXRcKlDYdgW42-DpPFlJtolWd4_T_HMbWcutRghULxXXkIcFfoOJ4NWfebjwEATNn3QG_AG8PTmvFQwSyUMo98f0SA70VHfHAN9XMFRNsEkN6ifxDOGJSgvkplsmKW-RaibtgMkFHz1JE6SBcQzfeOfdF7rxKXYP9Ph-PHO7hH4gGnFQht4XbOhHOCXdrQQqYyQGSxHzGfMSWP52Ig16JJoIcMGX6MjpMP2561bgzuGwUNR-Yi77kUaNjnvSm4IXZiWJEIj-sC6xmWCs5S2wWC5K-RfpUGKJfEpahrcVZbMt6wDRzV83ttp6v1UMwtm5MA2regY9zuGFoYrPtqe5VtwK2ODVzLav22iy8rpCJubBlT0SPB2CguLauH3ykMYgab0EUKcqShzH5o10S-eTn5dyVdp4lspSMGQnN_k-M4EQOTInPOWg5GUh-9BbcxNwNAcOWqd7CrE_TMowshcbtgofNcyIDqc9aerRKENJZPS0rRocykhYjXAN50kkYjWCE1vg6uakZBKQIRSXKPlAocFNGSpFoqRIZ90NmbUV_m61ndv63Io-uyFjgLyiN-rK9dCHgAwvQfDiMY4qUzHKHYBf7UIPZ0tcq9y7dWYtbDXSsJvzrxB1yJEL_QduAFsOrd7bLukMAKghep5txPYsrvANfHYXs0P_7eyPNc_ocN3YjM5FrDpuLc0RGWvi-UDLZw5AjJTR4bAGMmKj6s1JafchvGaMtlfMLmijsfMElG4p9hJClE2_QJ-UAAOBNd8xSon_UXsNlBn9QxDP5j2Lf6joRc6RL6hkcFjVxYeWAg6DPnp69LRIsZw9R3J3uHPSgaRxnOsBlQNG19CDEk_rghzrt3bqZlJ-Rtg83YXm4-oDpvKf1Y9S6PSiicGgWkm8ajwQ9h_wJAp9YbBwtB6fIyKhYvif4DxpkpW4oua_Un99U1YMLXDyUGDTO3rKy-nWS9XR60eiVQiTiIsFBwtVdUmGO_Sie2AqCqVDk0MhDAfjtKZYZmK0rx4dP89Sxn5N3ze3uxIT28TXXjU1FEHSm_clAHgRHuBG0iTC4SjTzeFQ-Rpx3l54LX3c_Ko12NQBeZwK5H57NoHpi2R4pDrA-CoXNGtitjrQR1Nrp8TKoZYliqwOzHXKIsRNNP1NW-0zeHptK0vfGFmGl77GdyF9vepgNmPrJ0QbG8J0mwreBKIREleQFYZTMzIIeLcnIuKPBCIOBUykVr5P1I_BR4nbEgnYA116FLivGQAMW1DPSJ1Fk-oDPfnplVspcVrb5cii-6nDjS5r4cuuplrTlFw3uvwa1il_kk84LXZkZRKi7tNu1ofc_KBziM4Df4RRL7XhAx8FqqE_wiI-YT0W-Pnw5i39Cdfw6mxtH-AArZShDzt17vcdhXYh0fenilw-pwXoN688YUyrgyWVXLKjVXPFOFnVOGh8tgDSjJF2t2XPXUR5I5OQS-9j1JSBDAj3TIfa5zqu3vHhM0w3s_rMH3QzM4qTycA0X3MDfh9ov4YyqI-rpf2J0qiJqs653wifAEb85DjTzMSaQE1X7bLWThW0nXCu9ucEKhdQr-QJtw-KQZicOS-fTDXR5jFcuiLz2OL_CFI1G5SdYdbDG_tzO0aemY4F5e0VXdvfiq2gAaEUsPG6Ypq-Z9BoeC0t1M7I2XIVgVyxddLrfW1WOBY9OFE3U99C0xoFfxrTGBfv9-p0Jg_dBu_GXIr1TxDIvqopsveM8hTNLEzquxGuNXcsu_mOXyr6qOxECXSuBonF7DhaFMs_s71W0dunvu46uv-5bMnkCkpveoUqumjQajIfMC2zZIwsKHqt7itU_B4wmEmGBJ8_mkGy12zY60jAe6jkHoOQfFpCe2zBLEYyC_2oImWmHt5TybvTb_jpp5znZcMiG3eP3b1ZzouecgX-8C09Jr2Ek2B9axgWdcjlgqoWxDaO4U2eVaAyLtELkhnCJTpm0kI0e_d8AyhbZ2uh2Z1K9tdegx6eIluOkUsM8DopRVfG2nYuiCpU6XdeCuLrwaQ_bFiFNhn-AeG_bE2pxZUynRjES0om2r-tqHl_Fq99Wb6nZEfKwREE7poyglEzSZYbvOeW9Cfr5PRciogFMDDyvVZWNvAYHwmIReAmNoZ5sIBVrzNISAbNGGlB877t35dpkT6ZjfzMMd1CnJRvR_-5LTm3ZNpcaIwyL8fBTERGzM44-jH43O7D13i_ob0eUx4yJcrF5eFXn3xJW04oHOe1H4lspikaXRchp_rNm7Bnqk4j8yYNpHK7o_uZ4EenKhyrEEfNOShSHeDOQzN8ZFMx8RqE4LxkwVh0AHezB1T0jiS5yPQ49uHEKjXq3FeNiodlPMzifWg2gJeu6o52JlGDZWqw0tlr_uV9yGTlDRdmw7HMR7PTclrzUWUG9W5M-_NAgjjor9Qgp8sYWoIU8uwMp-hmvT9NdAJ0Wd6IA8TH5y2lEGcHDIUPJNnodHWKfxKhGwsVUrdjDd3rla3EGw1bWnUssenx49nyDsiIPaYLwKgvbQEygctaw4tq2Y62LX8PiQuY3aUxNq7JagBwCb-mFSOSdVxerwh-N6DzjVHNKrGLHBKeHOWM_UMA-y4DEVYB4HeJAu6590ilBT4vRgUgF9_3d8olbsHuV4j1c7LhHHGzi6ZwhAT31JrTW4zeIzH24OTiR9-1Ty57qpi2cFDaRGwwlDAY7d_z51w9mWS1ISJmj0xtS7fFMIoWM9fnInHpA6So3XUbupBEGLEmE-FKzSATy98gDrE7HBGUOVvk3UutVUb8wFrc36sf4P8OV-JCAUHgZq7phYdzpIZeBnk3llVhAsIPDC6GwH-VMTJf7ioNt1k3Xuj304TTAuJ1b96ctzeZfbqf8lafVtvunVYdkV9Y6HueGKCMLMZ51S7P9k5BWXreH8AckWSEnV9ADsxugMrzRDVGHOjcdljkUDjhGeZCMKKU64SAM-FWX-7M6bqxB_RhDt8gZH53-shPiIqVvOXvNNR89nHYoIUAOyUHMyU32Y5Na2rfilVXui6LQIT5wTkh6A66P1_CPpgjMWN7_DI3_syJ7VdvX7W3YaGSL0QNskYLf3E28NDK8LSwDGQw-nPJCc5z1xmt2PcJ9hCaYHF33Y3hE1LYv4Fltb77zUnWnZObugoeiDDv0ObxnZ06_giRei0k32iL6-ch9Cl4TfoGLgYg06JsS4Jjlmy1fNB3rTk8Ip1B26px3lWSe3MU_7F5z8SqnJS5KPirGOU_FwKCYd55wtiLTjO_ccCk43kt-Vz3FIJ5lRnRR3cBcbeP6afl0PXQjbwv6JpdNo2v3Pw87vRYkH2Ap2bbzWXO5uF0jcOwNknGw9uHtbeyv-F7u70Q7wx6xWUhitRmaEDx8Hkg8AvAa80b2J8fB0YpzXsUTZeY5qxXmYboD33g0eGTgn-urAKhC9U_6yy2XRYJpcuSRAX4pG5WYtLT2Iu5NRA3_tEcyxuVtXov3-YXm6B_8c_9pflryLPX8E1DdVdFPMIaCDPfTQVZTSC8vPTtloc5zfIHItYgwWne04p5yaZv-4k48uwXKKZYHfihMOshnuQxbpnkRJWzZX_opiNYIy2cpaS1scBdv59HwQtArXo584Jyu63Wbk1ebpoNbgy1V2EidVcPg8A4k1yXXPcw0kBPLcC5vji3e_NQOxImMoOIcodsbJtiJ1yKpV-H1Wc4bdh8IDSw9WC-7LJsltAuPnd5543KoCowGo9xej01cwcx0ClVQqIqwzC5g9uDK10YPbl5Dmv29POjT2eR45G4HVlzDCZbPxkd2Q5UJ_gDYvNgbioJLilchZxPJzjdXBK6Y-AeghAqNHsN6cB68Q0WHN8YmUN4efUv4otTD-U972yncM4x2Gy6STgyFhOLbwq3M2S4vzIc39HYIbV-QGAALB_qPaB99nVgiAlmAOHBebK23zNDeCGSbcnD3XIaShVU5S0SIHKOgmVa3Bb-tuWCQ9DU7XAVckmqWIKPOIRlUtBYrgnNW7SDs9y5vfmjQPVlMbEVea0aO4CF65ekAC5ZLj_DAI6OpT1zrSk5wbLu7RP1qOA58jdACpTv_Apc8-dVSq-CabW0pbGbexzPWDDXMXCPw8yTgp0Q4PTV2zlH2rOFT2Z2KFSsbiDOUuzYqLQZcBAOYx73_EfHrs1-kueOFCCuQCXB22kabyInMcvNLx-fJf0-ufxwTLe7riAc-FL8da4b_cgq_YnXq-syBwE-01ojKfq3DciA6nOL0vSsN2HP6Y94ubDSbHQ4sFUjoMGIWIu9h9jgKac9GhRh2P_vcF_6tmZN4weLzvGK1qID16B93RksNDqezmBnQ-fBRfhdY-rd_4G3ZnY1yqlfkiD0hs8Y3JOff4BmSapENrrulB7y4S5pSfDO2JAbO2KSSqU6qtL4sO2EbYeVwETtVRDmtIppKzLHUKCjmNiI4U7zjL8jsxv60qjygNP1biFk-meWGybKBlEz-jpqpHghwAdYRNsoucbI5hitAQw1TQZucrrVJ4D0DlZi8vODIC6fvmE6PiNfCDTE0kLDncQ9IrF3c2iRNFrtB1jEOEwQWasx6paKjHLbDnZl1EZ6qf8w9GASSn6vO6RDe6CwIlc-_JVb06G7Vuh8AFKMa6sAbkNF4-xzXJxNM1XMlIrCTLIF-eIg9O6phR03FKl5erWoiG4fjgqrXy3A7W8CfTmfzHvQeYwKZ6H_2GRf62yBmdOWWJjdJMU_WYrcpZo-E8xEvHmKrCfNJGvth9BxBQM2AdzndLzvdQ7XL_vPNcYC_vCRQnGMThpH2upIfzR7ngKsIxHcuWzlhQPPV4a819GA53BYHkRf491UN_F5O32s7vnkaMyTwZHxUO8jXuj__ABZEwF1L_LWn7nTxYKdTOTAh_gLKbN5KYr1IvEqbMkAkrHjlH1VaKLf3MLsCBI0JVJW4f2zNjF3ROsTNS8EMoq44UWvQ8CC7JU931WkfL7g0qnG9g3ZXF42w80U9rIXVcKAGC0y8ayLv-XFMbThR87YUaIKyVuQhZTg00DLObnv1Crjyz1jwkfUmuXQxmqNY6s4HCJ7vVVtyWwwdYvr5vKo4luYY0EweifNzkoklP0btwL9u3KIQs-8HaWjIvOowby-hL1Udj5jTKjvx0-LBPPuRi9nh74vbhOuYkxDgzIAz7q8boTF3qqOMsa0RL5fWQ-9FL5S_Y0ZUjOIOW8VL0xy4_eMlGdNf5arjx5yLwpn50-C7PtnPEv5XrKtYrAKRBzSVUiK7-oQ4jRMVc1VqWF4FcrzMn8Jbw0i4FXd-T9xwJxjKcCbzBIFJBhYHhVI9n4ZhfIvH83JS1QU3jyRBdBShY2OhZnBSQ5SvFO4IXj_Y9rRTZNQm0fIdEmNStkUocut65bpt4X3AMqoBFvRZ2wsQZf_ze-i9UPMI4XUR7WfR4oIXiaMeHyDTaZ6XT9i_UWzq45sUV4J2zVX9bdAu9Q6jiuJy7wzMXRDNr-1yLlv9XDvTUv49SnGekxJCAo8hPXZdOCBMklpWrGRKEWob_XMy4bGECm3bNb3QMAPOJ20I1DP5oE9YFGfTwIGzIKo7lpoFn27w2Maud_mIChLRzh9k_emNhjRjzYUndVS_BqGUSGiIskhxzXRVInPSlV-xz8DZRjXZcLvyF6-HrqLElE7w2ngDL5gqcLgds_jHlYcNTvShiz5KvKrIxjdK-GJ9xutHK45z3-xyKPyTxStUvhC9AqCH-tCdypLGCXhxgnJzXinOmPsn-uTnx9vKFyeRzTmpA_378k1pAuT-8YVgNhrwTgUi-MSLZ4aXI3DU7qciH0R5IE-UNL8uxW9pp15sqFY9gbQnDDBEjYmFFMY5Kvd92KB2YwBJeM-JJHO3DSFMIWW57a9VK5MD1ZN3ld2O45A9Lkm2V3vZsAbrHZYzr3aAxmQSajJvDsspak9Ur0Yq29vqNz64D9ytJT4aqYbZ7niO7tUQGFf0jP_yrE0IIlU2H5yIsNPQLU-GJN3fMqwuPHJ1NTcvs3WJhwrsonWl2p4eKHVThkPXsaPU9EvqM0YZ9W3SlxLk9WvISxr1x48TzU_LbNz4_8m9yzdPKGeew2y9Hr3JC3Sf6xmMQvEjUebn6iI1WKwa4FFDmIYH7IYUu5yZIX4Ppk093mGnhNi3D-10AsRRRcUEQmrZK1glrO5KvZKak8Qbl2qJdUWIirLA25VJZqo2qFMSj_cH640B7rybLLKU20WmzM_oW2pJUt8i5EXfRzSXDKJW-XKQ_-95fzc3czkUnEuFo5jG54KTVvNEINiYf-cJmgMvvo_fSrn3krZwJdr69NqRVfDG_LZLkiTIQG4EGGkotTkCRUlGo0LaiL4nuK_3KnkIOyX6uBjfMJTyW6tAc4fPipGIYE6n6-6h8V_EAUDfG-m6K2mvnXZRS99krZ03uIxZvuVj2QxuzBwxocZbCkkYg086KwjNxSCUertGM7qKFnOrNxal7_uEsNuaEMKSHT3rldimTyLYCKSI8uT_QyYW8Lq9xVMKE340bWU2D5THsHXwh_LIf9o03EEQAG59Px-iIu3UU6o7H7HZw7jGufZN5uTTJZrcW85ux4DPIRXDKVvAEx9XDmzNCn9B_IMIvm0BQYJsiwx6AnfvBr0MyMzxMJzueQo-h0jFu2WzERa4rPLX1QgY1WDO4V6QHcmi-BZr0VfzI6JCgs6go9ufnTvQFGt9U3To9DHz0ftHFLiMgl-8JgzHS0AXGxVRR6MIuE7pXzfZMKln_6EVjXbSlb193fgVEeQcAzXNTSv3RSBacDhfbD2LMi8JfiOwJobuLXzRv7DFzQoJaUu5b9JB3GAC6U4XQR9vV7Z65pCBP2aqyldz3iRqY_OX2jDoqrlgoZrYEiX7aPy8tFSFhrw2e9l6MVzBUXUeAV51SACSkEi2D9KpS8bjELzbaalhCk849Fr4W-EiCgNqe3vFKQBNT-3WfDz53DHI883w31O8xc6oW7n-da3DlNjqY7eXVmmpglNkF_EVkQf0-XubbmCnJcV2kUTxge3gt-3S7QfCzGen2cBLp48iHtMBi-nC-Nr_8LcvXeDut8DzweyaLMb8iK5NE7vgV_4qBsckFQ-S3EPydk5peiNz4E7pmhk-2HbZPPk-EDI0OQ1bDRjRA9bZknHBo-mKV82CZ8ePfwNMxFwXEMZOrwA-rQnBCIrlADf6VTySbJs7Cigfi0QpkQZLyawBqUbLuKxJHI96_RMchrgz33tVrnG57RJ50keQ80lrG_JALBdP7aw612gWDPcFKV3r2CElxQnusen-FhtuZBrC9pb-MoJ1DNXDvWgV0ocDPH_dr90nrPrcpRUpw0cZKtymoiQzeU--dHqrip4ZzkgqKB9VV6C3gyiDk89HLihUGVzvtQtYxI1xII5DWmtd7eTAPVOQGHfK3tav78J69T-KGPHvpmb_5N_3GVsh3BcIt_qeMG3yETakZZU-INmDF_en7cjJA1lvwgWqdnzlL61vacZrs-Mh9nXLmtBpBAb4or2UGzb-ecZz8imGZXQOO1B_ZEoEyXazoQ_WK0q5lWmNzJAiSxJtwTniTcSKPIAE7ID3mircEm7FdU5ichOWKwOmOEQwU8ST9gr6jgqFDBcg0eg5F0jO_rJyclST3fRv1CGBHjIj6IBjGyvSrLG3MgjJRR4TZBobkeN5aXpd6aEJyUc9C0rD4zQhO1FNVOw6fovtiemkH0m12TgelGB-oWh300M0Nzc0PYxiq4BjSKhvtzBOrTxOeMlQPZqWY7WegFs5S_Q4qJqM8KEuurHMaL8z1gwFF4Otgg-LVZzk2p1spWyv4HM_CYjqY0F5Fxm9nWjA9c_I_kH-P_X1HiHt3rLYBX7lTCnBTwMtCJRS_QOeVyNdgWHfTZHmLXtMJV0XB54qfgqOfGx0fn69xYWt-iuENHnsHzqLM2NK8lsKdy1OgEN6dDzFNUmK-ZW2dMrIXRmKPqYFrT_Kxys7woFc-DtNVnRx7srX4-yflQQKxTb2-MsrbXfkNtDW0VybEPSHAKg1qaRKCiUMD18ieX4tDUeCdqCH9swebG3Z6i4mAhDzM_yzdrsarWm30emCuddTNqOUxoRTnPiCBvSfEFAf20j41u44a-DPLgJxa6jAi-gtiCZX993mrXJ7_Qlzdos1R_AYchO7r_BpsRq6w3lrL08nvElMlA2fnlDMiiRnE42T790MmjAlcCRRRATLIyGmV3Dyxg_teyEEgh8mhD3bhxKTnJ31jupnczK75z4RpjLO9MRjnoVrDgmVQXs8jYC0ebx90icHDLL8PEpi4_Zn7M93SlIwOL2EwVUY2EgYfaY8v9krTNsKOHCjQ7mw-N-zKC0odt7_853VnWDE8qTI4g_nOvR4ICmtlENRob1g4MMUETcfZHGbMWw_Eca34NmAPNvK1G91_Omx2-Pat24c9z1hhgrgKsw6QnH42QwoVDIXNVt2HEW9tXk_uMktyjrRNdH-4unu7YwGDB8AWP8ZX2lSbLSjLUQ30VQai_jGc-khoK5Tee2njFOME1q1q8TRNtV5MkAN5_lC_VfUQhJfk6Ht76vfc8oeXtluQtdWSKHzTYlErLzeb-V8y3YckLzStIyAz71kD5Cn5AK9HsCtDSCwbx7nGopRhe2qAEHEneafIYbWgPzIO-UIrMiBRJ6iayU6fUV72TtMXMkKwoplx_AYYcpzRgWaDvC6vCFFbT-Oc7e6LhCm2HL93kiTZfhvIVaGpN1goREsr0B9GvohBYaSNJz-ic5rhm_TAAA6XySKbbAdp6_66toNXntiV7I2NQMdIzJme9_bI3vGA-3itcNsamlTjd7tGEccQDJe8_O52_fs7mK0ykcaNoloom-SfFTZUE4ShjpgEccBLCSMWLYqzk6zXK0i4vqBvN3Sq575qvee26RRB2v0RO9KeH3BJb96o1SEgsmeLLvnrTdnRy_9lkOLfUGbkNgI7drTLy028ECyoXSGKUNWGijcjUA9M76yZD85mAk__7DZzcd7oXVuQ4c1kZKvCnlM1N8nYF5RITKpKUxMuOLkAbNHHbWAv5mlvhXXLisYYjjQtZsxNFSZoexAj_ue4DFb6UsRTj5_8HMC0mrmSeKTkD75l-CIz3nzQ1sCTdigcy-jix5DN6VydtgKJaqrxcBwqm-ucEMr4kGbM551G-6UNkTMLh4GKGLyObJk99DAeS03aBYOr38YiecNS5mWxl8kE66fRRMlfNv48NQ_xE1SXpusPCMtIFFt1Qz4rTQGGI8-bYHw6jo7XNouuapUG9VG5-NtxmmURVzOfx4ZzJXZtULm35BRPclFcgePuUoQ7sOu-mfMzES-sIT5NBPawDA3wvod7NclLi7yxnLEk8IoIcFzcjh1EvQe4KIk--swPZg4EddTreAdd_LWB5kf-WsYVEPVU9DOHkJZeRF1LYSXso1Ub_D32NWb5Gj24pHBLOwBaMmXocrHJy3FWtY4Zi8RjRtqhBxT5dqRUaeMh5R5FvwoiJVeJwQKCALTlXDlSOan-5c5AK50qDlBT9RilOpJ0stKUuUnGYSlCiSlPec5DVKKtLr1vfdpBGx9ITCbFXu3RaREmI72qkqQw8OqWnayvP659ezJIz3B14h76RD91S-acJJCXOlKN3Fl2npdP32azW-Rt4Y8O8BKmH2RyDrS9NmrOdGGsc5HL6Dkng0N5_aEeyLSrnFb0NgzHCnam370hcrBiP3-Q4j6GGCSRn1tUpsjr1GNuTo7dEJiIuLlxXUFYFlbk-925wD3dNmepBaqZDK5ePjfCoJeXEZ4YIPoVW4MCkJvhAXFXKi0nAV-5OikT_fnY97vYUK22dKddaNYpi4yfpyyVq2cuVv0MAR9uOLSfkMWsuEgkunP8Jm6rMuXHXVR7mhq9Uo8rJjeDvVydMcbYDPzg2bVraLU90E_D9ZWMCeMXTIEyt--oddaOfKaUj2VZqltQqdUDYCLchz9QI8pYhvwpFFF3Ot2efA5gEwQY6HXJS2f8hz29DKqFx0x7tFaITs4b0NNIJX6htanb0PSA-4-2fhL3w965lGLdgV8HSPdVPahhCrBWVgV5x-yEJZ2uoymwWZ8CuHp6EVdHW3RAhbQkh5uEAllys5BlkeTCu5sSeHZfIzPRso9JPIW8E1zkZfG5-Bgk4-vGAO34-qd8GO8JFJJaZqfvX08oYOqOQcOZLntyZ6FbnifnA7VQ7Y4vsITu0jbx3AKxA1rPSwUeCpzX8tZ7PKBKXTlqnOHOoFKlM8J5orwAmsugK0XrW1N_gteSDjHL9mnIQhKqxra5XNAiS-FovcaifnYALa6AXhuiFgsp2x_AD4V9tmOJZEgf_XT7ph6aTzT_lEKNL8Jlh8C8piKqC39KTe2IuzIJweAIxgFfZMNrKsFES1lhxFICFNi3KUJ0yeMj5mCggBa5hZTAIME8-zOFzLr7ixarX1NAh3Ppwl1o4bAhjndSB9IPaE2eOwDH-JvOFfOZZc7O_95ZIWFj3r0fTQqQr02cEb-9mx0ftMOE4zT2HKIga--kU_teRBdqpPWcoj9soL_x5kKVM69hLNiAX7D_JDeQ6V_E2lfRRGBj-T_ZZKtWR_gh1rTU8iaInp8nYdIqBN1PLaS-XyBcoUg9r9B2FKneHTDnm5IUsFkftBDQkrRIEg_SyhHqHJlcLLQqQdT3Di0ZCOXQup9wt80XW8VPJrG7Nif0SprRCZO6nbTXuDXHWeOIIgYkgGsTKGAyk7r7mxWAbN__egJ0vJdEzZfQMNkGLeUf8vVsGJDIY-vAX6oFr-QYWlSoMWFqRNw1rMRw2T8ofVgOrsm8uL4N6ofNxGkWnq4Kh6d347dFDuwFSkJdopIH-dSG9biLckuRAjFY5npaRM7oHO6UwDRG1Xv8PA52ou9k_nL5U_g-imGFuHJwKQjE96lrKhy7uGL9BavvLkqi7z9MwGD3NGucyPyBlKXtaIwuHgwq4hb7cOoearoVXHoLnj7wC8K_vcs8GDEfDgP9P2umPkk4OYIXI8LEJDq3Uf6-4fW-odP4hej9CCbSaIA6Jg7TFF6wFgH8bpQYVoz6Tmrj9oitWukpgIblXHoxxBcNWJ4OUKjeBPGnda5O7avrNIIN305fjt0qLaG5NSqUphWriJ4gxgT5wZnufFcAYZJqMCsMUZ3E0a_sZ5KjCfjgpIKuAHpp8e8Un_SLpv8pXwrw8aFFBf9NoBckYJ-vS33jyxmNs1gLPCCCSvcrJEaN6UCiq9VC9lmTDJ2FsoQcIHOIQkDmpJ3vDfSqf5YR04USbGz9zk5a-tHv_OJKz2B8WF5ND2RBWqcWPEYPBoHKDefqcQ0mSk3D97al1tIEavkzC0EYfa_RzhA5ElhyZAeIWTjy-DqQesEqVa-0bbtBL4sKu0e0qgWE3shWW-sRXE14RcHFFcKouRqtEBbyWDyMUX9YpS44-DC7PVzC8FhCKtJxfVl0G8hekjNDr8mhnn3nzeBzW-infVTsJ98-WU75KK6o868-J-ptpAKSBP0YOWtoPS_02D4T-oKV91fLOH8G9PIA9xYPELT7ARFSEwTDAoreAPB1iOeBSN7jXTuZyhXc9HmGiQZw94r0t3J0jkl2S1CnaBwkNkYhEtYPKxeBvapdblWuKQfxfqBUMsDN8I1W3hsaCjWjfbsrzLcDtAxO-7De2pTjin85s-9Hk0My5YZ6TqHxcG-ap3PXEKgHANYCDvapslTUSDdoRSPF7rWXtvav5OenaDIi67BRnrSKEm6-RO6ioKZNXxvMkBIe7BGA05SBPX2gj6csJHv5DAHJRfB5F7bBsA4nBbexQFpn_Vlhm-9p_kWJMQrsmnH8BSJbAACt6UyukzDdWchdamlSlEMJ8pPplfFB6knULJnYFsSxH5y5jgryz1kKjHCP1U-hwtEAHKwA6wtDwyuf9J8S-6cNhmR7nfbgpot3gQah6fJg1V_gpKBYMCCR7HynRXPAGSlhvWlQ23i4cLhuwUA3tOeGHsQtk-oSBAiLNltczsjG-yuJ2KguHhvWsQXAzD5r3AvRe_dwUKjd8GyVewnRFTcv8vTwkgObH0iiDl3CnJVUs2jXcHK6cWkiiPPNuhPH8bT2WNl81l2WZiLCUisK_XmEWf5lIwhbtatUUylKdk_kHigKGkk8BIQdO375WyAJVFLSbSXZ8WsU1LrmjsHUKvJvsSCwD8r82LbVb-RJDJO9guQIsdJ1iC2TJXi1c_zJuFu8B2QHWFXZDZTxCAlvUUmKJgKsKO17LOhmIfpZaXAIrMwkCBYG7vGarEKZB5br8a0kfeICN6b-HChQtwIhTtwxmDcC4N4aa6p5JMw2R7s7LPuf_2cBRiNLq7rEzkXZCKYqLSSAvzpU4JA0Aau6lQRv1WYMq1WnYQiV_9M95jxvBbhu8bBvr5eUfP-CsGWIwMBPhGtKObiwId4WPNAoRxQEa54o6fkCESeoCq1-ebZLi33SQTESnwy5blP54jDYys8yASax3klYzuGCOyFywdYsLPJl2ISlDmmtxjbSUEV2UtDmIpGYkf-QwM9NFuksjch4THgax-FDyHO5IPJN2h4vsoYooWM2ZppFluLOucnrbjFGmKOeb-IZ2FSEzXvKPvOqORU3yidOCDbQWztvriPnG6ydSqItHp7xdUVxG9-dQHzcCx7gP8XGPGS0hmjh2h10vYokpHcIjqo2MSFNuU_CSzI7DA2l9uW-9r6I2It87xo8ujTv8hTBBo0mJlkjKqXMiFabuGvCHDnDZJXW7imZkIG66CKEs6VIbwo_2x7efiLXyZykdGrmlPiEXiCQIZY1-ula18wLbSa-3wsY9-NlU5K7On3SUj9KfwXB6EAzTR0ebn36E02wWJrGyFZQs6BeCGcwN4obMnxlHXCn-gNkN2wwE2FtoNQCy_UfEFLmtCMyhKkx1_EKx8-x3T1bNVCBafNCPBfueVzhUOBAiEd2rkIiU3fwjIK1Sd_AgCeQTWyDgh2l6tSQ0tfh2jx4IS1oq0NDEMszqYmlJL-2I2OHA8frKtIUlth3i5AMXKZT6TcYqaEp1rqPmdLOPzNNbpAfHAbW-JguZM0aJz7xbrb-6TO23vI9eli9VHn-rJOgci23ocLQf48-GSJtZSbDjwzQybrEDnw9DCPN0oQBo-eqvnau7h8rpyaOGZ-kBotgVWzBxcmm9BOv-B3mgNqT5MhyXRLWvtMKFwHQcFL87u1ZJ2Y1mKN-4s0j5W47_SjWB9poukCe6WxXLB1576r697J0o5LXCWf9F3xkMWSdaS3Vynh8PbuH7vsxYS9WJvVKAwyiEcxNBesgqoQxhTAMqCQusGVKPWaW-I_wJOOyD3BrcMsTtXMXVq0_NCQo5FXSaiX8Rur9du-Pgnrzkolu1H_9_uv5o7rrXuyXlR4EGJMvpRmt2F8YmxDjX0lLSLtPZZs4rMV3vyfKg_sTmHLh7Gt1-BQctvaSj7b5iHHceMpnfjt7WSd6RO04mBmbTBha-oIqy67FEwpx4HQInSVrJjU0EWG-3W8uMNpgK7mTWq0L7YKx5eDB7M2FjxNYM89hDINoWjZdmZV_UQxD85nxKyDoK9X41wQZnmZy__XmiRAOmIIg6dIRKHS0jVOIXSkggfoJkCyO_JOJ3Od2xUEBMF46UruLA_Sa86xcFtF8CmSCAXSQZJIzNKvbnYhq0f14tR6toH2AxwEaGD523sK2fsh3Buvm6YRFeDSzTP8XZZ7B3Igavox6N87iS13wF2qAZNWv5GNjC56NvS2Uv7ZeV8nQg-LeUNPZ8HGrZTpCsx1GT0exf30AuFR0MuWFHVrZEIUXcRKqRQ61Ab5doXF7i9VPkKQfPe6tLRXF5XaERpEvQ0AqFPQZqjREm9Kce8-vTwalHBXrs-KTDG54JYEH5WUtGHnBrSYlK7LG1-F_M41xrt2exhDYSV7EtxO55JZvNVezNI1Q_xDI6yP4PqAfqA_Ayx4Mp3jyAY4aV2LryzTcFoyLIrMIwd_pRvg9WE3426Mlsqbs7X0XrG68S337l__2mKJNP0IMdG0HgXl0F1ura6Hi9yiH9kdC1vlJoYj60D1XPC833-CrTWNxxi82G1t0QTQCr0BWvpmWqKyBxsSyZxGi911txzr8fnwWMS4904Do95BtlTlOAxvht8WVeROZWQnjTINYRkfLqTRyKl5YwXmbCMujbWIoBRb9tLhc1BMGMMuSQFSccBhZ9KPFNOEkQdbGi6y_BH9WBzaV0Kq8mFn4EFjXu_ETVHq5_9_5PRIl5hOlznEUvqlyJDSsztiAcC9Z98L5PIvQhEhT_jMz97nTt6iAyCmqGdlv-zgwjoiF7AZVZC4gFsM7cHG59DuEES5fR7bGtz7TDsU8RB6c_43Bg5PC2hyU08U6Shbh85zCNAQr9JMdTp-zQOXkKhJ0Cw7z06MX8qr4J6T0hvEhJkPZKhdRgO8icAZ5QPFTEJdPkkkhtXtcztmlioCXBJ0i94-RWGVZd11T_-B8tkJgAfOk8NoFvO80EjcHLAO3JSrFxVvZjkXxJ8PSmxJ2QLLoKcPDdSALkjxphim84mhLWbKXGSkZgT2mtu4XffvX2trpmxwW7khi5pjeMuzDp4xOLEZ0DQyLgJAYSyzH_beHOnHtkXGHpYolNuSOo1wU4DhWWieU2-GhaTnZjgb6qBzwx11eLadHLV0QmKRrinBbsgKdI1oBOBinJgPz6rpbFuKKARflggY0G9l8tYvEE4nzsPvC0pnLmWO7wJ7KSGTsfa77SrjshxeCr3R9Gm8Us4sF5VuNHblufnfCHs1Ludd-1aA0BC5kpNsU7BSNWrW4pdCP-kDnYPTMW1WCbe1czyvAYmgenxHjBDhKx5C5IM3VxqeEHYxnz2MUnsyQaeiT5jyuF1BLdgOmnTBCWwNe85QwzolBfPTYgsnzj1I-2eF7XRazS3FG723F-YztO1fmPuvXoXntyRCROnUtSOtWamsg6C-1q9tYZ8YsQkpX1KVmnK8TWOJl7GHXf1Kc5yEuMFdEwknGPV7cOHk1k-eotAjgD3smbnG_1kDA608aF5xgqUZNYVzG_N5K1nsMF3m42irjl0JfKu2Rxl1hRinsgheSum9mSF1x2KhWJEVZguPphBRBQ3uyueiu0qQ9cIpc5hMuFa88WRwCqADNxu60x-YcSlNy_0yyqy4ASnbMWkLajb756vPY20ABoQkRHmpWkbN6v2ItYhTPhvL5m9CCFIorw9-ti3R6dMvBYe9pX4FXgZKTHqRZIJ-9WxNHyNCfwRfDMMC6xCHYX6dr9sVSuatbRHZA1m4n0G6c5na6StM1mGwjsxhH_TfDDcck95pFNWnqeGFvKQoVdpEiOJ_0601NDLDq9pzIKoezx4x9Zpx7PhP4g73Ut9Kd4_O7WKmJ2PTtPXzbsj5iKu20rfDcLlLQv-E76fTWv97MfaCRa0q0C-FthGArfOKo5ww5WTRvPe766kczVRNiTgI7hHgGsi3Biksb9zAsv9Zhu9lAnnscX6aryLteUimDOJbFb27ol2H3K6AO8fRoOXHNuJBrTcBFI4fViVFDRLQ0TqXBEmK_lSrbsaK_7urQAbvLIK-WbwiCy6cPNeR6ah2EUaVD7MIrJ1nNdT4RG3KTL-h2veAR7hZgwF39Law-73I6AY3NPLTZP-Qb2iqobGPQV_nAeh6uaZ05Z1-VfJXGH7VOGygBGqWRzkt7vUAA9vAwNFe3mdGO6Z8sCEsgWKjYvwsaiT3qgg9H0igxjEyuzrrNsTrTe4HAihvlPiH9gE6WjpxX96p1LXQyEVUAapascjFwwJZ9_bazlOM0g7G1Z2rHVLconL3zlxGouJ8zaBPoGfkPoiXaS8Mf-cQiTg9bv_YbG3os8Ekjsvld-kIdK8HPIGEFAa_kuKBATtQEj0BnaEdhSENLvtBaotdrJET3RCQdvj4SPcdASe7AVb75ScZqG6E0g1brXTXUKKpHlV0zqIqqrR0x1XiE0epad2B2sAzclQN4xkqAF50SvKMYBXAP1UdccptfNYJDT9-oKhN93tt7QeRy6ppUY0siVN3TfoU-LldjX29SX11uIG562P_BWFQDYs_aQ2CX2MGSNYTxKW9a2z3exFoRy7sE3CKAqSBCXDh1SnRXsMPSpjj5aZgiUs9fV3p4CeOn5Ysy9cY39S03iqeu5qYJz9v1CHQlshxIsAlKKIPqTR7xIjKCOstZCdoQmkpY1M91Rh2xskI3ytsFtnaqq-S9O-bhBb7zYoXfSg9P_lVwZM5QeQaxBu1AuCdwOQk4pKx70kPJW5FCJ1CaKk1yfkrgkkgOCe5A4ZN6fUx31Q2LkfLbiqD8Ai7mEbKurxLwBG7C4tqBAS7dSD78QK_i6YN-_R0eOpb3eyUf8PTO-Em3l6kWS-qpS45NKv2bKKRruM1f-gg5WyFdY8a_qASX-XlvIulTx-wgsI3HHykJ0L6lsf6-GNkFneybMu1vPD4LuHM2IVmeU-6KHjZyn5AzNWZIqz2IiZQpEIb5l9bU0EKMb8H1pHIAVCbqHkU90tFnHR0lNV-fhabDxtjZ6Crhf4kS4c5clNGozG1DvPOEFdbEYlzrc5Q9DE_6k5OiHSdjQrZbOSQA6Qauy0I3S4w-MNCdqOkyy43cxAgxVy1_hBWtgd0e1KDUHbpLiuSF7aSiYaVIs-SNwiH0FeNXo9tRIlkGMafmhVo7X3LM5MwdfAGOZHte_a9x9NPae85_TMCzB1uoD1rdanMDiCK4OMj7w8hhkMSQgcARRghkm9qnWSvd7cUj0MnUB9AvH-WS-VR_R5EKhJ4aXtdS3zWOlcTez098E5uhm_SOM-68_s8T7gCI1QYVz-dPBsjTzp3Ib0N1sT1TDfWlkEwERcAqPMQjTJHbJu2yDm0Pq68Zyx3-5lyOiPTNlIyMcAj9GOA9OJ6w3KJZZHJkiUv024OgKGnLqeZQFvzXn2u7sdXk-PRuXQFC9QcjBPcUbykAmmSgWZm4CUbkQY2CVqBqeP0ajYC-QqRW_khCNgILsEI2imVqF7YA79Mf4lvcOh_3swKHhhJ4k5eqOrtz5NVkLv2gl3kzVKz-UhCuSY1XNhLX2jk7DOjYpxLJXIa0NqluyV9vGUm5vS-ZOX8r-8-xJWpQVrsC0KSAwdnDLujAz2inGd16txFiN0kNSuetwvD8v0kkx_OOXR3QpO0bZeSoWiaWRYZrpMp54reprM_kdJFUQVin5mfQhKjxfd8e2kbKIq_RQVZLVndeR0EEX9sgl2JhibCPiehEGBrqC-3GLhn0hKYlBXGAJi3oqSFKmMDcIjifsA4NOpUalIgvZ-_8Ikgns6TzB_irbJgli4Zx0ZylEtWtcXlBWq5JPlbebmBYojpelFJpnjMwohgMr5xhTTTDbI5gNhbfUtgZmBHDYhx8GW71ViNMLUKOwiEowm4ceCrda3FFSJ9OTpgSuIc3oXUJop4m4MD-NTYdcx-XzL813A1dKF0p-2QN_Wgz67kqGHv3Yes23rqfpjjsd3zlY1P9OmugNwRqRsLdWH6ZbUjnreZ3b4pep72PjpOOAL9ioVD_jOzJU_RTaOs4zEoLMvHG2oojfAmelN1xz5O9xSQIZmfyHioj1xpCrVabTsIrrG2Nb-m57F8LsHReCh8HeokV-OKbCtGyspdZgnuqGuyoxbKBOJqgEgJ4F_gcgsfEnoKlisiSFq-VyF4Qn4hFbTk4xJMPGtjLPbtufT_mFkASXx5Vo4eAwOK4z2Ws7DuuUPlJ0xWwUEsJ3r40raW3UvaDQ3_6bUrOc3bTjFvUWz68y_jjaRf1ndKSqDv76JpqDomWb7F9P-yQ0IWpdDddZeHKL6MUA3Orb8GF347Mrth0XmPsKwhd9NuJFeIuPN69QPnBTgrKoSY-vVnjjOFG9tDXbWi0g1DvTt_ROpIdhq5cFN_e9Fa1RlvqxNDoOTDToLNSLfnygjqn8sqPjA6C7hGVgReR_Llz2yuXiv6FTQOLk2oo00-jentYopaRF-EeUZWh8RyCc5teXcL8m227LP0RIwpnNuE-YRtnWfpIQTQk4ShFopSIH9rEDA8G2eepqavaKb7U_FEtFoj-o_a7S0Lq5YMAT7AtVge53zRxSZv6GcRb_Qo8ApX9MGlvcp0uJka_D-hov4qjTX3rPkvWjsCNpexSgQC7YDor5A3NmJyezZeD8d2UsCqN5A99KoZYrYAuGKq11LpEJE1XcPA_skOgpFIGid4Ys-9vALE4c-b5tesV1fEu67efR8YPC6yIEmJUJPswcK0V8AvXn0MmnqePfIxmGzqioMq1PaKS-eWoWOIl78I6RALf2S8TJyRF-dquEwHLtzAr3JFnF2KT87cZcXvkz1GRZyEdPYeCTrXQVD180Ab3tWMeGvb1PpWTjf12v08cehs1y-4dt_k9L8axea72Nqy4yXnnQV7YXM1dFrjzCVWRtYxz1SFHGSj0MGo1m4Ts1agUzZi2Kx3vdQSftdimB0ZBWm2PYR_mg-RZvuspYF54o0loJSTmEd6lZvmGu-eINHoca_LxZJOOkJk5QAc8JBeK4XfNOLFOk0ytAK-TCrgHdjhmRfOdrH2_aiayAurkK7YNGWP_6y3ltYq0nW2OGifd8uZXRAurPPc-0LjOoH2Rjr8isLsXtvv3CIrL-O89sU5DRkZYaeSHpqbQHBntcWv5RN-PbytmIqxcJP8HqGkaVqrnELxuJTeCefSoxPl8F9E9S5JFK79-GyOsvdLbGF0zDbAlFups2C3rt6DkjJSw9x5wyaSs4UCaMZSyLi5WlDsw2q2ja2ZcLDSxNGqLJ3KWqwsGFzpBQ6PPrAcxIoWq7XzfRzTpGdm5WX9B68KV8HeYG5qg1jNGADtBAQx3Z98LF8LH-I-ighpHd6ogdvhmhQGgOrjekrJU5fIKY-Adf6luH6KTKg_zX4eH0NNBJzNCAWIsvJqDZoKCNkUIDrxcx2axVoGcSuKS_6uFK0jMhGT8t5OEzRUwJndfeieCqhp8L2rWL9dZeA27ukE9WiSdesZj4l7zl3iogRq90FO86rKkfF9N-AKPcOoQ7mVByQIovMTbjj2vFYjNz1mDPW59AskIttZJznCHvazR8_JlPKOxNOgSaCvz6MlhPMqDaZDuB1a_07jCGaO5sKzYR50PskAlOWxE5VOWiRof_7f_IHmi9m0LQ9-jcTX9lIChqwD8gcYoqH5wh-0RpIy4Q1xt0Bz59xmSAHPNnZcN2CRwtdh44GS87mibXzWpQ6B2xZkpdMvGIrFYevg7oPrisO1Iw9Krby4GLx0ja7F4xHyTweDldBtbOudE9W2i95x-8By9dFi2rNCmfehXQhuPe1sjEC_Qawf9lz1oSBq52zjIE9-9JLj5D3It8zwf0oFdVDN3UJnX2Vr9jXwrHoH0IfXIgwHz3qHCGQKwzW_v0hLpfgaY_9YpKt4gDxSfDyamK2WQHb2iw9dxty927jfrz-VgABk-_zU22mHiK9ThTKOUSGnbaXO2zHRRZEFVY-P9ObeZ3sx6pVTEV4FHtCCEanNOXKeNpxE60cm8OZELRTVO_egxsW_EThoanoWFI1sIVSs4uuEaKGvlZjTxDAt1fXJaIuFiatYugJ77YYivpeo2loSnrz50ou72dR63QyBQiNT3kEPwPoFsIx3qHU-7fhctekEHmYIV-hINDKEhUqVH7zF2Wic0GnBDzr_TUbfbWr2Hx7Gg9KODZSEVO3pCFqpqCwhCRhomsG8mmsHZdcVB1XfwEsoUXvM7_tJf0iahgSvr3v53Ki0D2hYCheWBkol_7pJ6e7nTSoo9P7bWlLDJHTvMeEueUUTwwtfeJBbBYr_j8sP-vPanGxLwH2CEEIRdALWrGXHgVoCvxxcZRNgBOgUIMTosSDolTaLyijfihjxTyEiJ7cbvjonBHgoehN0aF7D93gL54KuknFSay0QzNx-fuGyF6wTeXKDb_1cjja2edVwihaxH3YmqIysmXNQ7w2CdryEBFsjcQ5o95vlB1UjKkSoWag0_pCCO22w3OyHFegHMMTvrMy7GArw1YKb5_yK7m_We2KlWEMxarQnIh-Xb8e0qc43_utSREaAOIEm245BvJdonm1XEMYDOhHXAZO86rpzJImpl4i2xWiI59jR0PmnjDa7FM11JICT4I0Z-TbBznOs8Np3g5KNt1zYbVx7B-y90hS0p_WhHPcgICDjjUnCHKtWNC_b_zJXvCvkE5RQ8KhPDYXY8LwAph6sKBm2lhRuQnNkFfVzofK-f6weICSUlFx_pQeUSuI97--aF2_Q3H-8mmeyTUkhuzXOQic-O4SyXmnf6eKN8oUfRcuJjsERoWktZrsF4nJp3rF5XZKANXPj9r10_6jAmULHg6JgudE6RYIB_Su2yMSfK4l2BIz6zsOMazMo1CCxMpV2OZHdavsG_-qWchGihd7AjP_ivcWd5m334EFdLQNeY1QtVWApsaFzfAJqW_DYv7OdwtSst0qXwTwvLLlobdHos530YaKBQiAilbaqQjNlsPlK8agsoRJMd05YriI04epofcHx75udFfG-r7wQyRfutpS8PD3Cm1r8cT7z0h-JZP-wlseTQedrxLBrClGbBQsUDNuxa27YNOf3JxxNseXSIbX-9Z7H1SH1Ta3DxPKncELpNdhjrMPC04NOroQ2V-QLWU9Hs7hJ9-mgGCTKrTG_yiqnZspwfTgk47jsFsWWhPsNMWZ-t7Pid5CYe7kq9zbe9XswFFPQtIi7aV9Pu9AhybJVHZv_tK1m5qTSAgoO-uPiVpSJxNuALa6VjX6z-aHABB5C92s2ka_1D5-9TVcGf3eJVjssQdsOog3BhM0mV8pT3U84xJwifAhOlKbkLOBrKz7q5jQ8kPfxxEKXA8MpzMJV5KIYHaUWAl3A1ALilYVeNaPVcIHE_kNEYuTjsjnLqUn1-_G4wmyP2y1Ygu234ivooZvx1LOu2zdamlqaoJLnbyikCKEFXky5ozftP47WjuH9ngNe5hVy8oWK_fm4sf902DGqO3dXSD23Dg4brG9HRGUO5Y6raRG3l6g71xCoNXxhd4loomwX5jzHt8M7D2_n_9uk-_QEYEdmv-hbtcBEhLL150jgVBT0lWYgzZkuFM6PmTh19KxG7_mIMX2apddsyPdoGlojWnRBgFn--ep5Dmrj_hiTRmKvk0adJo04WoVDRHxzZ5qMOn3ce7cUHtlHMjVNv7FuAxgJYxzgqEqTWFXwuZoL_eXYuAYNG8SRvJtnwMewO_vOT_zhyWTLziMgZ2k-THlshUs83LlpCYsAb6ejKT_bCmoMLu4PI4zH_I716Lmyw3EKOtl7AdR42OxNot41AB1nlX8APhXa-4117CZYQJdKNDrYBegiaVEpn-vhU9bB4Ru81LsOPkcCQcvP6V_Du0rE0ht6KPTYCrJU8oWZ6sCadrV63ymr16iNR1rM4B8k4LmZRFjeO_sHQk4qxFTf6R46SJUBt9f8y98pOz2SfAd8O1O4lauq0e_VQBgJR7q7aBltUs0354TtShVuUGfRl27foiiW-Vqy14RsNTzlUDXm1GZGKOC7KdCVXn0t-I2cwqXy5cb6eN5S4LO3rtbq7P1JE893jVdD1xQuzT6sgASPa0SaK_IHl2rRU7BdxwI8LPNDP5ncy5RcMlAHmHplIuH0U1Nzlq4c7_JiFilQFO7QVFCnT9rzZZ8V8MXajfcB-vQ4-H1IL-DK1Nf363eBwi6pPA4WMNjN1R9X2Nvyas32Di5D6IXAqnSbJt0vFHvubrHThP4xDfknG8i2FcpSHCRG8Uk0Z9o5gju8kbM15Ys6Ef13fxqUuqVcTlCbCnelccn3lQepThy7K6KXsjK2U3EXfywlWJuXH_4G73iKKK-qoFZ5gm-0bWjwB1XPDGbW2p5kklSpAJAILzFYBL-_HdqP1OHJ9IZwqJaKwxT08vvqV8N7kL8ma79roj-nqLyg_x5-qEPsh6TCra_1vW0_0YK-aVnHJPa0ffHe2vprK4VQP0BbOeuQ9UEOYmklxqMt-A2JlBogwqx5J3M2Uj6NSuXZk_Yg_Gzz6Iuv0Dx_fFfK-1p_yDDquLrgj5zVymx8EOHVIfRhXk21zA9svcboIgcMd32FCEpBSQKm7g9riPtNxpyOrMnevBzol5mC5m2PWPYdX-AojDL3yhDsT8LyDBD45tJWJEn5fYuMiUyw3i-RcKl8huCG15Tb3T78yQDEFnUxyZNcECVQcRPv1iI-V3cP-yQov29rTKAK6URGNudCy7q0mHzNACh_4stp_uZ4IyQlazaumjiUvpYf5KHg09VldFTNNvhdNrwdukgkDb9UmVrKmMJnS9fTM4k-ay64FZek901CDlUUUnRtxHn29-1YivXxDgJ8B6oOyX7NSoIac7BbDaYrS-J4vFcGrjj65MLRUbGYRpML5jG3mPPVnpwTBQoBzlnXX9AXXRmUSm9sRxBYcFQMMJdamkGnhntX1BpO4RU2XqreoFouS3A1nwgKSP6lRrOHsE9xN-lxwIJaLtrczXEcPL6rDOFczWSWjH_JH63TuDBv5Jfi86e-Tgp-gridunpIRyAaiGeIk2KCizERICXLa2yci_WPoN3sWYJuwtI1m12-287hf5GW7yz7NN7Py8nkE83EDgksxacWpPUuT36nxuXz2BS2ed7DO1X9zfaYGRDk4MmpVo94q5eRn0wiBePyWSkbfYpdGkMi1Fh9KbAEA6hKkyWJqiBz6Jo_8HyPY47qN4zyIaOYktL72RVaCiAPPsNJVAC5-9tZzCeRzjcJaU6G_Fi0-vqMPGIkRkiUbqcr8AOGzUPkBZUKR4EsfHOFehJSRK74DDTiO2ymwbhGrOIR9pstGo9ScF57JsmugOfbX4E3vJSFbztsf82ylPSiemYBlIpsWumQythx_A38C05zlMN3R0QHf3c3BBgQXmAty3HaDfSKWfRrNS7zDpBOKFLVWqs_bmLr-v611V9xRMJbQ-siRI54lI9TN-ntMUbnI-rNIcGjyXMbilXutuoklaCiRP50qq0KuWq-QW7z7UpTcK4C3FAO4PL2-VRqGRomX-tq_t04cY0h5TPq3yr6rwu1zssgyhyKMKFc2qGaMZ5RwATMPAe3Q-atlGGyRrjWOtwd3SMF-xxaKnEqVZOqEY18dDqMsa9Ex6jQO0Qq9IcKsO5YBz4Eb7R8yPQBskIkhoGlcAQkuQ7dgZPtj-6f-r_h3re1gvhdYyHYd04eaMyIKzaQcqS2bFp3bDaOTuUbyvTKC3CWVgjWEVwgIWV6m4FCLNXzoI1RjE_vq34hyvjNsy-RtQVb5I-vamzaiQiHeHCKPeWazIUTAaOON_dtS6_xN0i2Qu3uEgysQXg4jJSo76O8e0gKNgiZSmw57vigo6-Ug2UqxyW_GrvKX_oXyD6rvLKmB_8Jzxb1S219-ea8B5kaFHDt2qivrmLc2kY4tWB0s2svgDhpwCKBCccfPGgM14-vSpHMiGEMMCAmu3GyFjt5Zj5eTlmqSH0B6LdVGY5l6j-jV3RMNAMM0ZmlT98Qs57-tTc1wI8fXqazruyAo7QVvfaawGcKUxu_cskKX3tWhz7GjDVnRbO4Av_XTunrVrDY5TZIixQyuE8g57RkOHPZrDGDJAVhUSKg9xn6cfcV3DA0qRYrJWM62QWYi44Lp9j2vJP-G_VuqdEqoUXKG3kbNjmrQBnZm5PcZgLB1Al1DYt6JMVj-RivPIIzyerPN3_n2GYQBiu3DtLMMt5kJIH-3KAQuSwr_ekcfrm22pDhbHr0wEsgTb7F_sXpJPZo5gXJ35brmox9-bBFnJHVoF-Q8UNVkiaDplFoZm9OXuBdbNe0NuwfZzBhTi8svZdITWljcWthr0LNCvxYSfDatopJ2TMnf47jM07Bdgp3T2vDj98toaDaO7_CPaDgL-kjCmQKDMY2HZFGB3385C7VKoyroBkMjK2WkcG20DGv0kiPeqzM4l2YkjrXJR9WbIVBfW26-e7IYrCr0tbQGX93CXYH2hc0NJKGXxhp-ULHEKBNVjOtohtqVJS4ti5G2snQovyJWZ3k8bVcfWKXmbW94Rq-EIkRyGwWtTVafwsY6HwetrA5KearRze6f_4DCurgk2lECfSESbtyUX77JGgxnr6G1o2LLdtW3f19E-lMjAcKo0hVdWvOh145X8yv9uSTmk953q3Ucer-NRcWxBgZSgIJNHoiUpjQaQ453cWXPMOjNyMFQEHPKmA_uxGyQrbBGVb7cBBvYpYmUDAkv2mksWYhvhKd9bAj8blWw4lU9PvPBssD3Ytoc49PegxyO8ZoM3Nd9MzelNhaGOZxBZLEi-0xpOzb7DGUFUqNSAHLOqaHMb23uWt8zwxa--Nf3MGHFH0TQBBH0j5lo0qHZajOGFAiEvcF7n5XK2ndzp6nsdhJvmQFmmNtpiglaOKLCyV5ztgV7OPYAX9HueFEYkElnJ0W1GrxOKBVev8bduDXnsYDVmTC_ZtHKz0zdzpW9-oqu3FEdwcHMJDbycCHZimlhuq0_4Zzh1fsTQILRIF34i4XdYBoIXStr7vU4Pz8_JsPEwZYubOKC9tG60kF8OslbomdI4SWYSz3jH5MInXsFDywIf7SVDg2BkETzbYvWjLFVacVX9t2N0JhzjeFViNfi7C2ewWO0jcxvpe4QAOSFv5qTKrgVX2SZzCtMA57lGNuo_cPRza_eCYoqvRdrqELQLJQx7mtv59RgKcxWZOS7-uIRzt6jSg_3uZ-nYmX5z-Jo1nYY_NdDMSqeUHzxODZOmk1sUap27eB-oJu3DkmnMLXs4LohkKClU5sxnPqNAyKZc6uHgZyMaOePFwDaV7CVANo8NceEhUB-Oz6tslp3XgOgSXZ60oKY-CahtH0c8FXcjFxKJGJQ-5HySteYcDpiL5mwIKu8qDlC5xB1fY_PjqxuiyDAP-m2lokucyQnrX4y3ZSfBk3qvTKv2xQWPO6faGR9TF2oEf5yk1o4s5lXWFZOTKXBkTPbWa4QITRZs9rO872RpcyWoi1K2uG3ojF5hQQnPcij6Cois0xMrwn4gEyJIlAryHln54WBvutUD7EPAQ4WQKtHvwcTq1LOU8GPJAFBwg4l6dIEYFlqcPcWHP-pRsSacHxQTO1wmrnwSnfVf8DAQhp1qD9LORTwORWbG9gDg3YadMCODeSmCQS0T31cDtU-nmy97ifUEpG_A1HhYGEhUCzGBfT7dcSvxxEbv62oeL3oQ61PjaBe1mOnd061VPi48Ft287LAm3egGZOPLgYx3D6Pm3AjRaJ-OdVDQndXxNExhUtNnLUySbwoEklgFuPHh6mMWrMLcrdqGDQUVcTMPlcv_Y8W8EwA_r3aVkUl_j1Ep-X_zpoUjV99901San4zxIDpDBJpvPyJthznrJ94D6vY67HjVo3ASx-ZXHA8F4HmJlAVboMVsDF5kVpLRiyeGr9ki3SFUkqhoqT0EfNZ5Roq4drdVlTm0b8G7bvsX-r61AtEGbmPSxhrA-vFBN3ZKxZHMPDTIHwpXsc-P2kLEPIjSLAVrASBXsy_uIdVx870LLr7G1Ben16Mnm4wVCodYiLceyDSbZhfMY99xfgVqswfJH6EE_4mSj8T1W2bUA7K-f3XISDsJoKNlyDPZmdfgydvizvpp47LH7BzYYx-0E5c31r1f9XmAIVcFVuO0rxEOEl6Q9RB3N8J7RVosLT0S8oWjZBnxsBrPsE5dbJQWhsmfgChfQ8Lzydxf3Lj3k6KxV4GCjk9Psj34ieBfbaFnXribmFQ6P7wRzG1XrVx1ihabY2xNFUmZp24IO_hCzSAVfITVKQ7PKmc-_i6aE7-FIunbfh_W4kjUgKV3RQ8rZEByevV8IwbD71D2uM1Gb_TrGfy6mv0yhEg0-bL_jh4siyz4fy77OH7hVP-R_UBl1hT2y1anbHKM52oX6DZ48CFXCIgeOKQuer1aR_-xqBNGAZaTDVUkI4cx3p4TM-PNOjDLRDaTOqFp7XKuNuN8nsyRAIXkiGDXBCaceu5Z8HjDOJ7zXBKWdj7Em2z6xwqXCbkNjR30IUPhxQF5-eSoJgKUDmThPdSAeSzVwsgRc24wMfPRQ-BNlC5_dGkMwgYiXzd97HRT4H-4sRaP0-JXyNk53x1j1rQ1ErqOtGJpaXQBz5Dusg1nd_6ExGb4Ibsk92wHue_O7rCNpny9Xuy_oWjYNuceLgalylcsu_uKYYhI_6wtEuKHshRq4_-nxJBnEP3QpEQ9iD4bQ8wD-vxE91SlONAS9wkNuMqi1hW2G_l_bds_Xvc_XU2gYpOTf8gNuSIDZhhaQmZQuf7kdxUVBKTfjrZDSvRtwrcVRojJBfqqHcfYfqfQB5RsxAWnNbrl9AKZ-WQfS9u4mCbOHZOpaqOsCgHOUY9JKfQ5IyTJ8DQhWXpbDxugfQBJWZpWAAwJl12rh3LXB6BLdSE2AYjdSBcnvvukgg7TJCu3v_zsr0vkJ4WC0IpiXnNkMW8elkhVQOaEIUER2rOdNNuvClaorgLOOSWIutIRvn-co7Tf0zTx7wkhDmdaxjvW7mIwpQ6-kPF1f545bFI_IJ7hfjuZKrLZ4ZgZGKWUTxZwWikE_6eKKNGfAESHTSMEclWXQNK5Fhn7SLdybYbXRrY7d8Cxi1BC5NNIV0mQPkcYGzlyjOvf-n_jzWvvCks88KlUpy-v2YTd_SoLiwPkFt8ehDebRaREcx1mzrCpEPa9XR4igpIoM74jv_UHwpIxf3m8dKWhuYWdAdrwIN7Ax_454kdO8p06YLDtecoaAo2aUItX-le1joDwl3-X7t5hgi9n4la2sjIVo0fCiXhUnWS5UiUXxCzIrL4_Rlf4kRv5X7X4kLw2o8Ugt4eFNTRI6d_49f69tvS-4fIaFNJXC6Z7tGEZnGAg-W29FnRC6qGBKik0pcIkGSfpRiTG44k_6k_29W1z7-pSujO7HEisS7749h6k9ZXLY3yt8RhpLwQpssS90UdUepoayYw8c-GrtajHYEQzjndsJsT67YWsO2pT6aCFnvHb4TdcgDkQHRYJEbT76InW4fJW6DpwqCeYXm7vn_wP-6w4F2iXZz0QyERXYFZ0I79q5mugjFIS9DuuWHjVCCuIamtwopTTo3Ji2WUvz-6TOkFO-vZptkWeFufb9j78ghmJGTufJtUivOlmwFfsNhm9NH5bVKFJj3vDp0NSonpr9OSV7w8O0xI7EjLcOe25lwpsKytKod4O8VG89xqdu35ZDkv2NUdAPlX1jbDfg8-V9toXjOLWDLUISweK8ZoRu6hUmn-V6LSB82lpXMshkBkZ1sq-YLWbPkZEl-0_ih7La-qZJKIY_xs5vTPOlp2jwLZ_ndrVklMMMs71DP8S-I3WFk9J4azeJwc4d_xUvwKsmQKmKgCV_aLhzE18LxlnOvmfZB402d33VwsZ-ziABHnj65BNATkjzobFMGJ__dPA4Us0T3VGA5aBaQW-p7RvxzudxLdoM53E3hRBXOzjxTbYCTHTXozUGP_q1I1htoOo7JZMXVxPZ1BJsBhyOQQ7cpxj3P5cdDiGTjbrcvNg7bNJ66783DY1zZ-wQrtP5YDN7b9gNgSnyXIElJrl5gfw8O6CPxUC2TdhsD_yFp-lvrYW9PszuV8JQ-iM80-wYFSByI5eg2rfllz9HRo0w8Moer7tXN5o1LIp8Y8llpBnPugKUsGeDhHNsorphY51Z_O8lWd0ptTni0VbZViQKwQc3Qij2w2cACErDmE4PDJEQZkuTL4f6i1ICXWDRtJFFgF5lxqFq2N1ztPUcIcL8hZ19OhrGAKzAv3YQ-RoPoWmhN71DzIDsmBxAH6N5XDFOXPbfoZ7eQVlbe-q5pBUSktzBT1MKFqBzaX5aH8p8t5RQu4sR_kIU50OXDMiJOpypn3qT77B1P0WgLG40qHcOJdh1oYyLFrapNkKESyfEEoKBhJQJz36qCxp2JivhH0Oal16huvs101WZM2FmbZAZvDobK8L8Gvv5xBdbBgFrndk0eFn7-W9o7Sl6SgckxLtrDtk4lPZ_ZnpFfNdsLra-CPiQbPlwo4yxvQxdcRoF_RBltJKaPW_78djlUH1CLGd3VCRXcW6c6toHLC8LLLN1MYVfWmUPhLMyLK8ZEnXYtjtndsdIjfMDrKgLjI4wiEoPHztqXoUafLExre7i1AWZYl5vWBYZny1cpjcIfBP3T_UR2Yn-TumZM_VdDwVuZt0wNckcFveBh0ulbcLlhHhrBmp4I4T2JKP6dZsNagnxurP0Ry1voOVOdwp1WHYFzVmjJG1pq8AlHyfMMYqNj3PF9asXFalsprVNK0JA02uzHgT_v05QcV9tCXHeGZD4BnS7OSHoUbPqBwZFQtTlXl09pF8aHqGtcjTupT1lTbCSHbLboAorDTe4khUC4l-80e9H7OCFATMBi-GhejFp5wsJOhDH1SvxUekOu-b_7GSiWRxlfUEMJG4U0LM7apPW850HoQ3kFjOQ82pUt53SFTiCPkC1yAm2c47I9n6YMX3RG0ljOWtJrfsgDe_CfsxrjGYrtXrfEyug627XDs1cUzqWg_cyqjSECmMOZXgMcrJX8Jq3Qt8BMFvDKxurvc5fm4wXSd7vtu9SKrLeQO-yGuL_N0JbikQdjqd7H9TWd7HW_RlALXIjES1barp8sSlppBNBo1ki-IzOedAjnzKu2kxRrKn4Y2qUnHU93Nt8KeNY5zwxN43hTRweiFEh1-ktpWIK2Vg3SoiivlDEfpTpaE_sMzeWS7dksQQYcu9O-PyD4AjZNewoQGGIwhEdpd3GRhMHHa5kO_oLoJJGDvsZFZmaQQsjyWy2RBZTdpCMkIjET8c0mrg3hZOHEHh3RQPB2aN3KNKNM_trNiHjmPUClpCFSunBazPjURZmPmenfRYxUp7uzKUh9s2R10NtoPvdGMEvidvPPcu5HPnud5WHpbMZ4Qq1UcdU1V1O-MPKjJQuiczjiZVsVmi8Yd8lqA-4ezSeJbQm2lmerVQIEC6AkXTbYRN9fX85Nq3spVtZJecLFfMr50KqBBgn-TtbSGMnYPJjbtN9YNVDGeTtUZiiWQEet8JYkygv6tEdxsC_mwPPnyBifxpZsFKtd05ww41S0mJhXNBzWTesy4D6bcnUB7ANEIg3x8tYjS8K_WJDeIK9GP3zvBvhncDyJWdEq8BlsWnzEUdDP1iTEeT96IfJzCpBreuyI27JcYMbrQTH6F1ZAaveU5H3Ng7l8syMHoQK8xqZ6iOCiNXf0HE3uuCxHuTB0bn0w5KOwqUOx2i2utGyTexE-5TU5EyAb9hQ2pThfsrDzPA1Om1YEitpEJZAMExvpLXjW4K2po6cSh_krjVxmSo8ID-fvpWkwGOOXsTGNPWGTRk0aISHs-X_9F9iguCC5zMbGxrPESj5rlSM5DBwfd0XNBONigoJt__uaycFQoRGKLIGME6jH6mD7rFtS8peNvzlXprS-3IAdUBZchUEEYv7A4WozUg4OvsCgmHnP0jjyjJkOYWvxQkdeZPBeGWDN9AgVzzEw_0W13pMyMNrEEiLatDFULdASd4WprMtVB6uyh3JxQOa3ST1vl00YAm9N8Ah2q4CFcwB3oneBjlwltdprxSz_W2ulSYTRjoVTewLyuBTeijxbyf4q4kRQfqVQNEmJ5W9OHC4l6EwWWgk5IlBxwzTS4mHGUICtBe-0gUl9urKOkmjYlJtACfNB_BjYeV1af8NvZvTBC3C31wNfPBiRBa7ARHypynWRDItd3px_W0JRIdNjBDKNAEmHYFfc3YY6mulGotLixXCfDTFn3sYZGSCPeGcDkH3Kb4PCjJ52utrZ_Qfa51fAUiQOBf0B_e9nCAnDdP2OigqbWs_MS7QDY-5Y-EZSBrZLuEHHTvll6LQWDl6QNLGBzvEG8DErNqqTcIZXQS-nK1NtUzSE4U_AbywvzTbK6q5n29Z9B54NUbxnFPaw77ENFXwMx-W_sF7RXy0Pl-bSF9s9OdumrOsNNuamTwbFFVP9i3ZwkxDMT9QLDIZVTSZ9CgsFCZLqWKI-YGIBQXrPZd2wrglIL2WFGvQwOREKViG1G_Tsx3gzVscPOG_56UaZizJ224XefdEMzsOt1-CoG86y01JQo7m0-gMYAQcJVAyWN7e5L6j13VqNIJfIwnOmJucTWSTBb-Jkrg828MZgb93E6nYdImUHG9OVeM2LSj1YqlgfJn8RrQi-vdwpx8j21TAmmcGZD4m6YGf8fyrszX3FR5BTGTH3Kg2d8H00WB29amyi2C41y5NUEVtGpEdq-a-QT9xAMU3TJaueciobnJnge5nn4kM-ZmCt49MTeyJWamCOlhjdlCQO9YRMX0vFiJB8itKFK9v7MUrwlto3dwrJo7PkeNMZyNQ9A8d7HnIZvFE7cbzXy75fl2hlpNGRBRF8ncxNuhNWoYHxG0QxPyHFNixJic-M8rYcF61yIcTWEPALHjqVWzsYuRJxqTKLhpvcQXohsEEWCBaVKC67BzSkkhBChpcyFicjxSfB__rwWZlARoqqxXTNQrRx6HzFxI9x_ZfIs565AFcVFzDxuc1qU7ynaK1WOq4I77q-7dWQJBggYcT1G_YKEfLPx79-fUaZoTQL93zfxkG958soO19B9gy9_36cucqfP5mAR2gmW1mhpwrtGNbPpMQSUS1TrvlOau2F7DJOelCBE4MHczLu_g_V-iEXvH_oQyuij5-e_68wxoe2xGYwnCo1LmUS61xoiGFD6OJ8-qTzgsljWcjC_3LupqltA9Vu_JiZou-IXt8jdJoWMpCZL0xNnZC179I8Ji6aEJNOFsBJ620iWqelGMNqbPguu2gxjHFrzgvo_LNlDgafKQSYcxwIXtnIMVyZFj9A-qFIYJr6OwTRMIuzYcfKz-pCg3dfxtpdwuHIbN8hg9Q-qRxB4pIACwuumyInT7a05k9AT3aPocKr-FqtFv40i0On_YojfkWu_7amzi_l8dQEAUE_K-7U0v3IaeZiipbuddN5gC5HIzz77ag2fb5JMGM2TqitSGnI77RRpj2yIMgezO24dYA4BO2d_DTK5ayTwLvK88wzZvx5Rnqm0ygHCFKwfwqpJA57JBE_yMBu_sucnRHmLiOxhls3NROdbuR9KAFEqUoxTTCKo1IOxaINJmii9WIJDZv_lvzxFoBPDqcbveUdx9vSWIxVMRhRLTIiozwMFoQYe-LTXaaMxtLEjqIjPP88wCAJ2QlJo6JMSrsA64so8FcQkVt6mPu_U9XTR7Gvd-gcKNPemRM7B_Ru1Ga_ohM60Eg1iPV8WFHC2cI3wZsltqWRQABEVeTxQOi1IsV_Nj_g_qZi13S2E1rV6ltK3jASIzArFUqGcC3Hs8JhMYnwEm-8PojEaZs9JPuREX5iFTS5y0AUZNNYCyFSpUIisWvDvVsZIx7b0Rlgx7PQJ4PgRHccl3X2grIjjTathhi5ZCCuLubbI0bBLqb-_jLLoDgRqynZKkz29rfcNYqZ-2-kg7hVzjOfLQVOR6fk5KdXF9UpfMzgcZ8avqIhzFj12FwvcWkrOa9qRYmShzN-KG4HmKJL33HPZYe7UszIu4GaUbbWBWcAL-qI4vaKtDmwmboKAJyj6Kbg_NmPVd4WKZCKkcI3393noJqKqVDgGdyBmFsja4IxHJNAng9en0xRLNOrTNbU4mw44yca4oKV8L2gCF7YqWoXCs6PYMATDhlHcEjKgmh9ecLEcdMGheqcpR9Dps3nQq4FAdz3SnoQxBaEtz2XqpfVC5FoAoztzUz0fUEpqNWBNKGoOCSzHyY0Nd4hkDiDWL3MM36fK90B6GoTH3mScFoQ6WxmLzOE4ImZdWy28SpfOzNaqvPQ-oiE2DShPmwlbm8WO9a6M5GczprBFz_ld2WLKY6tkYui47qEAMQGvd_5RKiqcFfvHi7CfPW0Yz5oJ1rP6aUsG1Ua5Csv1hg5gwaslsI1TCbvUjmR_UHb0L8E1sxch-WNU5Z1kZs9e3hCXA4-U1IkY8Yz3TgXmNmkaKweiBHV8lCqtYkUZWZH09tCH2tGJxgSVb5sP2iaoybhsuUEKAqLbvdDTjutx8jNcVDF6jzFY204Bb0-BfJP9m_Y7XxEShqAVIhgDFQSTuZhCsWAdshgy_KcQoFTyLlr8pCoAp8AwPrLUNF0xvEJNmzWyzPMs5Qj7N5P1IX5x5K8gcpFsNjGvajAXhCIaCihlVzeZjx7yqG7Aw0qix3dTL3STMPcMVLywznmyKqotOKJfFrWB7LG_9rh3jtpj8a6ZVDCHNUbSQlYrFrKkNluPZrYiFCx5X_rPIVG31pNDZ49VxrsDwOrtUx14mj__AA4HMstUOs9NrUjTqwFgXBJ4YMaELaBhBKYijk9IA-5jF5oP_lb2zCL8hxVk0Jp4wvaIh5b28YOdhHy598cTcXDIFnhGb18zHUuiO6lFgwbv5KFD4_iKcq5SdfXUP-rshehHkfWQTSwS-uFtLCHtWSd-xpwHT6AU4eezU2oiloVloXIc6mHZEP4tDK1ai3dWFiNb1DcIwiILBIMNOTLw1e70EreqdTGjmSxGzbIxo-dGGef5NZ0rLFkX2e4Ea4-r7AIr8zxCZDwT1L-19AiMokpJHndBLsxaeNDO2JpE0FsJhkDT2O42df-ugsHNdLMkEyAkVtvK3PaNXH5LPJEsaXL_1FYv-qurK_TSW9SHEgsZC-vyAqYsN8a9fChgHKf4k8KGIlVGvYMBVuhYADim4jENbYNjFLJPlxHNzgy-4Ugd4JhiyWqNpzcmwl4Fhrmo9Qma2qnIL3ifLNLnybHiKV1jhic3WaregSxkHqIxK4_zluW4EKo_yVTawXR79WHbQgTCZpQ06TVXLHdoabKunGCkWUjUFPdTfbpXcxVZws_CRTfDGfm54KiUDi00c2EjnuHXkl7NHgrfOOSDAzv6GWvXkcNgriyhx0k0d-MX8qRv0wGK3SEIBmp6bafk-ugEXWxjRUS19OQ_oELP2Zcal7eLCoW2QKd-AfBESZtoJ5nrJS6vHYcZUTtvV8nONfCyP3CngaBWcrwkyXt3oxi_eGPY-o84mLZUetrF6I1kezEwuHXybOrW8GgMJqxiDwtVRgWCOlIgjRrRiGSKpivXlm6iD8IuREf2tfr5tShJRErnL0m3Zk-JdJ4UUUiXMsASTgawoHYkrMYiyqekNSz6hESkHYy_szKR2bgHypeKxmFybbVv3Hqccm8_goOQotsvvoWc_8Ap4jBMjCU-Fpsyail54ZNB6P985-cwydDOsB2mLlTeCFD5J-HHBr4NWq80hT8FkCSZJGSNLxy2NxH1M7gAEhft4dOqzrSCzEq_G2LVos4DrkMAu4Zm-KdCiXU3PZHyTowecGBpPSoyLnjri3JORFJo3cvYBOidC3-YDnopiiVZiXrPR95Kue1K_Irzlkx7lL1Zz_GyTbsQY5w_V0Mj_sz_KxaSNBAxrFyagy7-rbJ3KE9pD_gLP4qCMjBL3xImE3OwGpI0AqugNuDqFu8APKjs8gqRk4QpGY53oLl7Te9MBDxdfxvxUafn7PR3GsM9LcgpUJEVNtYNHtg-nUNY-FC6wVbDc12jrAGfTCRhm_mc7jEXeNuqEZ2JVGZo_LiKznNBuPyAGjSfmPg3hgYHNVeKnKID8bnPZ78k-5sc5CwUjpKVhIQMReWZwLxENK2T5dj0tIopZ2wULaet5ygqzcgXtcqe81LHUNUhbpGm_-sDnuP5uUxHgy_rMzas2Kh0WURyNlw4ZBTuwEsWOfcL58OhRRkLWB2CWTES49hojytlmOd0Q4mVLqoysTpK_fBgzHnkayivo4IXecPcyNekM-7JbrT9judzBgW6PLJKqWWYNFzSyR8gHMpsWSl65TkY-Bv2A4fyxte453ISKHQzIpm4AyfWY9oe-PKXYJMvaPSbLMCOURsOvk-slWrON15iGA260zYiz3APdxCcn3QCuvDNmDJ0LeaPfbT0PiHU4qPVi_L9QlXsm4lB3WXfK67LK2xhWx-UFkvGceU4h6d7HyvXtOcHUcmnUzKbSL9e8U2rSxb-Ue8fmXHvYyWqq4GjqYk4EC9ldVlMV4I6uVGJyqZETTR_lAEfr0hltjAGKVU-Wz-PxoYjZT3zNu7Pdn__Ic-BanuZkGNnMKXOusgmNUnXCo7LMeHevlBH5q77bAdDSYt278UySlH18HkAdqTQaq1Y7tyJsRppIpEUWt7U7tnaszFSdIAcH17nb4vwzkbyYOE7glHyOPR2aUbVPEJYQWEtQmC4y3D6H8zTakGM-GJ5ayj9xHbVVhpSQLhpWnJNAVCXhNaSv_T7qfxXU52cbS2zDo5jc26Nr907AXyA76uCDyoNOnAtAiTQcsC9KMnh69BLWk-Swky9R4MbMoE0Sn0tdx2E9Oa4pyJieLvKRiFbLa8X2Ubxa1Bf6gV78PdKLKG3Syr-tWQx2NqlKqQVr5cY6j72RyAqN-Qdmh2lcjMuhvAPUGfX19YhcHJBzrijkc61ngYniQicJ7YpZ7-tCW66UNMvfcVT0JbeIubkGbIx_tMxH294HZgFt92cm7VmYQBqLjtQpJ5EsaoK8F2OQC0ykiMMbo3XOON_QGy2s9yKzZNFk2QEJ_wQu02UxN6dpNMJpVaXUlnsHKCp9TwgEhJyIGlYnPF7Myg7FeAe7C4KNO_-ORC3B251NCAms8dfBnMDQT4KcpYbBHt84zDGH96XeO9nBG5_y_74iIZb2xAsCK5kH1tMfbkHMgoyXPEZLEUrWqDiu6a2bqXjdVw3osKGpCC0anllSnNrmLvRdKBEvERbviQy2o3DTxbytlg8Z4GK5Sd2T5AlYgA7ghF_yBQcsk-GoDN-iOSxv3Pi6aBpdy6m3vkPdGM1GjYM9K7jIPtNLuZXFD1buHhEpKvDy6GpM0xgfoTkjeDAHqMa3ly0Yi_uf_7xLnHtmynCa6e2RFN99eqA3NT1SalrZxqiOTN032TFdwToA1O3N3EucTeOQlDA4XAOf7si1wkEsffktJL6bmZfGCpLvEkK3t-5urNby677q0WroSw9HNxlcWSPBT2oQoOTM8i92iAau1jUpiex8Zp_a8qVai8tpJ8emKrnprKqySnW3rsOTt05LAECiQU_-VVs12p6L3AVdY8Fr4Dz91_DCgnjNJCTgSaV65CUW75Lv0WzLQM4cTONRWZD1eCg5n3eJT2IYHf5y8wrKH5CZxsW5rILLMd01jjHqP-vnb_KDp_gBW7rJ4FjFO2d8akcwF7q-L3NeDpXhaWhfaq2BrdY-RaLpNGomnGk5iC8jAnSP3pFwkiMeQ-pgcPzusHT_vP9hdY5DSYXCHIsicwRb94STaqqZOABGGRmj06dmww9LeujRRwa2kPn2gm7Fj1dfShmEmFZIrWwgtYoVTidIZ1JKcDUfBbezJoXet92FgpjB7mhTyZlfY3voP9Dt0ibEmMLVvIgizEghQiAAviOb7qhznnDYYUhMLyPgcIvoFpyiJr2tpwH7N9YlS1wxsJrAJFkQJq7Rrustr6x_IwEtoBsxSpN2qmmqB5xPvgUGNnp5_w6-WMsvnBxP-hCcG88QNp9YWu-p75aa_wes6-ClLbmyoskvXV8VQSKwhMCdj4Q7f3Sq0Qjtok_lUIpppVjr24YtFN3CYk0-100czxGZUoq2uopj7MPvBsXMKGx3Zo0fjibv3zc3Sq8Wpm89OBeMW9BotCBILnkkOHSQ0AXiC7V7YCfvb7PLXikN6dAENbIVWbqZc8MB4wwKb3GWqqrTg16CkZx99MEIMmFeqaE2hszoUB62MFh6ZQbWnI5Hw8lIGGIDxzj90F15H-j76VoeDANN77KMHTikNg07tdM4W6771_n19i-pTp9I9bkqBGyIfsY0BS8LGtyyMofOiCu7LFJ97GOf0rAdMpYUOkCCha-GorDlFwOK0d6gLmEvoHKqb9nlJZxbL-RqpX3Bt6sQQMmOA9GXGQBoi1bBAClU0GEnGwXdTI0XWhWpZ1fUHX3WZovl3QWli7S3xKIglf_gTb2LlXcSNJoHjijwGOOBMt_KKOYUQY7DtpqFtrS4UwGzHzjWX0E_byNowalxLaWr21fRkpKHobu0opODcYqK0VW1ZSgeas2b5aGfR2wAr6hjQDCAFFmif8MHAEKvxvUK5NWcDW3uR49si3lLdtLiWrQOPBUMTXHqSb2ej0Z3H_JxHfjC1ASQaBWdLwjzpCqsGjEKK4sMMl9kcSpSkdj0GJ0K3gvCdtkzkciPAGaRStbzKd5fiO729Kh6agH0SRX9C_hCjtgc4MP6poHkvF60EGdNJ29QdreQOGG5Ks-H39W_IphrMY1J9yLDEV4jc_b2vWmabJytqO18YiqN0n7fWCMphbfCFZysDykLHGZN6BodYbEKfJA48Am9QLkfTQ3JBRY0JUx8To2pD46kHNUFPooPggOS9-Iz5QczAWZujfWJ0JxwhFqYXCKgN4P_WjXySXNz-UET5Yey_tTU1pClm2MIbi6SU6p9RLTFVgT-89maM0JKLDAddFfRExkyKURC7jjAcEPHsZO8JouLPa25DvAS6mjQzN7boOa34hyZ5qTLNpXiYdJPE8J5HRVxc-9a69ZK6YB2CrC7bYUKIbCbdx8mvGv7LBMtDH8nBNa7ozE6D5UiaQR89OvaNW2zAiH6M-p_nLKfS-vXik63Ci-NoIAqdQDT1nbLITH740eB3bdVtbA-uikih_J8UkUnLTurGgKM3x1kNqN5F5zF_KDVgL67A85asKqQ17lquLiq4UPStZXbbQVnJOQrcdWGwtrhnB2FpxOPSXQY1Ue0WRnf0nCN3ZyX_0rYTdYETOtP3C-9cI_aYmInZfLHEFtkJu-0g3lsP-440mCDohVPPMchME_k4D7Uz09Q7TfxaxrMdimzAqz5RSW1GX4W2Xt18sjEgfQSJmzS_2ZryxNRpLfsiGezEMWHTZ-T4W7E7-zSHUzheoLVkdw1C6wqKJqS8qmeuldboBvpslHuF8ft-C9ka0H-AvyMWyCOMBAO6EtszLOxOTJbTqDOVn3vxc3Xu5ihgefhZT1-WSCB4phWuIAr9ukcTsm1lt-wBfntH07sudfLGIOsfdCZmgcKoKO1OcFfG1eqRIQIvNrQqESDrstMdAbkhYGZYEqRXJ0MGPKOWz-hJHr7hgvyP1hWmXTusx2yECRVL5l62N9VtuXHM9R75aPIAdClZaLeZ9QkPrrmXpqKzTh5kbfw1zYcCn-lA6kWaxOn4dcE7J_SbVNzLBVCJBk0GC8gtnotGS6UBLgHy91-LhpAMi1nLLrj3-swfttV-Ho625tqgDZhOea4ddkY4qkADc4XZol0lfORmWHnkaBZ3Ot1YWyypipdofXAj6rcce9caW8OFV-ssy1slOITqGu1gcTpCbfRn-LYuOGJUoGmvsKqUYiXzprxcUMBql29JhwJKxBiJRU9L_R56XpIsaoa_F_3jXW-P2wJ3Mj-XXTU2wZmY-6sFX96VCIt3El7xZCaf2UPJKm_vd4QkceXqEhdtDeBqQzU_Bk-5Fc1JWjZkysj6_ROmPspVPa9-vJwHF7wFdp7T3_lAoFS5usMS7uWEfaUAcmLl1-YlUmf2DW4-FfmH3rf2m8NaDet953qXXfkLm7RF4V1ZrZJLsZt-n1zA7H8w7g8ezrGpVpe83vChEF_BQcvt-Iw4c72J1VScOH_fB4_ll8V6Pl4-KRO1XWFyxXUXad3yE5k1UztQSGedjjZyqPvkuWbuTxCrrr_6xmbetJ0yyKe-oSJixX850r9jQmKZ7PcmHOehPo_Fub2PLCAnsGtb_QletfLfJIj4t5wOPXq3jqhvyoSkOtZ64V2UUS3u_ibrZ9Kqr7O9qvz__Bxf_oVapVYqw_4h9ft2Cz02Z39sV_zZ7bLqHMlSGmeNPGfHbVzPp89te-HuwRUVyM7gujxbfAtpomSV0O5O2WWNtBrz6XPUtWoTnFoU_ukx21oSFC4ZbhW6lpCveb7e2Ml_u1Hvuj4--KPa50RbaxXy0_y-VcdnveqXh7k9oOoa5dl6QYeYsxgmHWIogH6QYS0K0YVxmQgrCAIQarcMCbUuayeywHrFA-6JzUkpPlKuAX2nV6awTrkx0HutHKPtuzvMbD8oVsIc6RmgkwXKWdG-RNXCDLS0DYaTfA2_FuEHcG8j-53jf8uo_Li-JG3ASHuf0GeMa-kdo1_0uhyUlXuuj6A607U79099PcyCBCVL-Aw441csM0hxznpAJYDsrSUQ9_n-CVqKyo57A65VG6f4FhTyng3z9Q0Dh8xiEFCJWhQ1nuI3htMKPuH22fRTrc5PSvnatAiWIeDQkxZ-8BPjToeonv5KSVNoeFl6Q5kdCHKFX3qUqHjm5YK1UWhsumnnvKDqjMGZi3nLgJ9LUsKrrvYa9zVPTuaHSTJF4iRY44cqFGE9N1rWWZAh-5EPqLqX6jAnokV4qVYveyJPavzSK1h0dJXi-SBy5eSbjkhMuF1-uFc6GbAsaQLjLylAVK9GH-oZm0ZsDV5LfQ4njRKa8BPXDmZ4GAsswuH9nTqQMe15mIy6is_ZJK0vI2FGtK-DeS9brJ2u428kkDLrWbX2KE13yt6KtUEsADVH9RDQkjEb1rmLApokHRCnaP1_59bwgd9gT-XPKcWyOUIyKlxeUHHsyPf-WLGaYVcyXBsw0vGm_QN9G5eUyK9oDIVZvzWltV-9fTaOJPvjaPNLfpbvK2OGT6tf56X-nq74QvxoP3mN-jTXpdFl4HZoiciTRjYG2hEVpkPInyTSnYnwE566FsoR-pUXy9-bBgDIMqaHeIWFbRn5eOZ6zgLKXVo6ugJpYwYTlExwdNRfUgWaRjf3jjpC2V-jU4D9W6r_EKsnlaFlYjnE1PAE25I-oNtWFkp6kFKUAFfrdEjhvCXVwZNPfqkw88fOjoCxIO-7RiWzSoOorSdi5_T3zdqpbqoW_oN9mZ8MPqnKVH2_3VdENFiwIbsyFyy5rTXekUBYJbznHIeTf8POA2B0Sxf9K_OTqpDDdnQRGCflLL4ay1EIHbXZmlfuRavgH7_F7HYQu_fI0jhzmcGeuOaoat49Atmm1gPEsrPACKxRTtM6SfNilhc0h6_eBXKno_nzYxFbamib4RlV3RtVybmI6BPkpRwZ4B_t9jBozMLQxQ5Lenzyc146T5GlE5uSi3OGINljZMJzf6yOLBG-u9NPAGEjiV13j52ynKJbSN35GbgKy-_GCiK5-Z7G0_UMoB3eBsPYE3jp5_dwBVnDOcF_poTlK3crXzFEpomJSdkYvlCZUHpsYViOFwpyvXh_MOULzdQEILTBzo_4ZD8MB6gz0PFHiNwF6yDCxaFTVMw3VG3uFHq0J4XJu91BD1-hysQUkUKZRj0v1eZ4T8zCB2q0ZtYNURcQv8BEbp5YzdxapnscocUOeEhC8OZoakmngCetuLCa6ltvMq5wfGQ4X8Yam823VHgl_BO97Yw6V0BPenlxS5L_-uzsdtazs76w8HcEWLyzeTOLcTsZsmtIZOMMV7Lj4kOR4SZqALgvfvXhhB_ZIY4cYsskbA6iEShIeR3olepKE8sz66o41CfO7dkjv8TgLUng2-G3UAH9ry24f635anGhDjivfZK10Clb3fDqBYpNyeIdj1GNxfJGsFfpU4KqQ4qbRILKTmAsUhYQJM0AIxrft7NsnQFY5nHLUpQGVv8M6iFOuC97rVCNKfKgLcQvy9lFThyFMsv4iAdk_-GxneZ0etER0gv5_6ubwLVc-UfF-MoGRpcTGjghy7qAM3Sd58cIj0TTt-tXpVLMZ1EYLYZojQ1v7WZEmjYKwxKcfeOKKlP1_KT15N9NiF2kzhSjLgWFzsW627gyIL3plJlWl4eJrsnT3DbNMcE50_UxjVI2EMfTCu5478cxN15o_2z1sIqiVEh-lsUJtIr9vXI29Z4MntxjvzjoQaiysA81JaGtxdArBIMdz8e8eWCLkjB-j-iFBt0tbQANQm8Ay028ikMAG4KWsgH0TTxbPDDfhnJlyiXpvLpYsawuJF8WkLFUKX3-ln5Rf-0MEuOSC4_6iSOj0fI3-UK2pBU6TPrHpjMYxgW3GvTHySnF1s4etHP5M8yTJ7R4lzzSfocMx8rgVBtdH7yd4WKU3cSaIGFsFoB2vQK8wFznT0UqXdesVAb-E8u0evkPzGsivhCIW8zNv7Mv0sSZRc8Q2FpqRaoUF7-MwvZuc761jrOCazRYI3nmnzq1a-dRzTaFWO-ZuvB0x-AbujbBMfsyM6z6KzAb8ZmpRbnehPfalTulh5nX7qxbju0UMFPv_j3aRnjkzsOdEoUd4r7VoxKb3YqXLj0y_aMS9lDNT59MfG2e-jrtubX-DjNSMtvYCp1BB3tOXpTNzSAgyhYNCHN403y_qliRXmxicEM7l3j7Cs4dOPR1tBv9IEulIKi5WSgq5ykTlajzgfgEuxBCSxLFDnkuzrglaL3Pm9M3v2nJA29BF20y5QPn0i94M8Skzg2HP8jfA5zNJIMAYHJhaHdP-byQcK8QjcgaEsl4wG7WFxmplxo6fHgp0kFcTLP0FaCAYjrgnR-eEhpNhSpPWWOGKueHeXH2KBpqGjzUmpMuAM4LMuhAqPi6SwL3dwSws0L2IMh7PZcBQ2dZgqi6ijcbbUmqrcjX2Qs7H3GMZApjEVRAzRCVOATa3HGQFmrrC_vtwB1_-s0leSWZe2iJYuqyDNDv3jZw2I_nz-d6WOCVzkKIUSED0a5UEs0tC7V1tynOVfQOjCp-pqjhzIM350n-R2YSwc5srqeSEA97J_ziQRGvQW-BDYZEqSwOayTBy8wYZNSlzAfzy4BX4IKPrJfL9tfHbMIuh_C6ZGyO0Nq3cuZ5IxI6mlwFKsPrn4uIEwNlnaN4LhAvXT_PHU5R9Py7ryXb5SgXqog7TasLSVpFmaLhGq2RpO4-kykxZMLPS03P0HeVqKBqyJDt6fBdt1IB75kSzHyQQJU3Fiolsn5EHCO3-1Rn7xnU-Ydm5fdE6lYHygfK10r7zaHfPnx6MmTGNEqaAu2XHpeEgr2HTSm3LR74QODzpUR1yXBwDtMEmlP0rY9eja889Q1gBzmewsBg07tvM0A38LZR_yNOWezST4Qx41rjpXU9lfLdnyMI9bJ2XCUr6MQwfD4R2nhy4au43VOwX-1LXpcSFsiZAxcLgPLDCATJZGlmFTcim4Y2mw0LKUapW49NTDT-3nK8SfjcmlBwkk7j7cVYzBjPWdSv6YFDTMrwdhkNFGtLUrNm_78qcL2YNGdfFg7eRQDNT5wLFbBSDm5cwIXeFw9RKM9Ch5KHNNWCVMduUxQ-Q5r3MngYCphG-n0g_r0EmN99OeYbYsrRXIJeIjPNa2XKQ60yhaN2iYHDwZUnFCptngsCCS_PkOXs8vXDOQfbxRS-ZCG_YHzBrUBAIuHp4BinsbvKT_l7ILpV5MdlawsVybdj_t4E1ARaUzhEXWjCtfKZEBQ5bM0ag95YDFRaJPkWX6V_ENR7uK6MSRzPcF_h5wRSkts0vhVpunh4P3GEnCDbWcnX4UBIS1i6mY9xl16iBTGSw-JYETxRpewrlYPEhflO9ig3xPQlGXOOjLTodvv5ACE6VuNzZDnnVgV5kUXhNiCNc2bfNi10PnL_mrAzme-nP-soCQHMc0_M2YAO_l5H9L_u9SG-lgX721_igfiO7w0tFqpqVjzPxE9CuJ56JNgTkLi5XaH-r_dY8kLBO0r9REnH4CUoGL6W3SJtVcQWwb1JuHRKTU5sL5zTa85F7gdkDwxfhUfkYPa8MNyLGMe1q_6mMMvxgRCTOkCSozyUBUJYlSMkK65eVJyGIOlQzJEFPFPsXD3jlc8ttkUnA8a4NwcO3w4rUiy4MwSi2TVSyWub8cwzAGXwcnPovTRGIKT8e0TyvqBriJ9B03IKTECJbHk43tYPDBr5eGiASdGHpdBo_IMyngruTm4LxISRKUrW2ccc7X-8-fh6NBJGSLlxI11ON33lM2VtjVm3KtR47P7LTKdWlQoFSUWik4ouee1uLZeo6dsaXRf101__xzhln45RfM-jwJTCt26r3lRGjkXeOiUoxZyhzkxAI69ix2r0LF_SFkZD3rJ0J1wJDFLoc4O_PsJ9o2XJS2FQBOFxHa0zME9MUZyU2xMQPvtZrvrD5ASVdwTk93ei0A-2FORn2snZDzK0IfG7IqUqFyss2zwqhbEqohnW5pkg2sLxk_DERkSToGeD45QBI8jye0PfFUIonRJb6Z4kNol64Qo34PB06CtJdsPrkwlcJD1tFLPoRBkXKQlC8YHxTL-FGycK1igQnhdFgAftuszKbHS9BYOFY9mt0XD7N3JEUwWXOyAFp4j7oykHOI9EhUXQBNnt3O1qM2Rs75_3c3NJixlfApHTH1Q0sEDyJLxy_PEffy8nOeUkT30Hby3wIWTYjS3CcaIHCbmE9sO2I8ot11Grv4zl3mNF6KhpGzmY_bdREG9I7AZ0Bm-RjZyZBEyF4qFcN0Wz3UbY7HjQnUnCFrqweCRoTtFwzEsOjhHLSgN_1BxpOMOzHPjt3wEtQD6IinJv4NUYY4d9fi9RXExRtQIfj5GiK9d92wf6mIZyEk1Ee_hwwNKMSa-Rw2ppPN6PivBzr_hzrsKc-zpDum0omAUMvl8G9xMP-AHHVfI14AZuK0Nv-t7BOELuma--f-LNuyGHdkoG4iOydNWVPMfLX3sQXSHDuRZlZQFq3rLdunGbqB3q1vVshsZuIWYjrrHxO1kfQ6tdodsg89Tkunq6TJTd0je-jvSl1gpImYbXI1T08DNNClkznuuuJovj1G7wLbQX1Niirr1hodZTiQkqw-hDceZe0Xvpo6VdyOXg9zu2pjstzeGTyh9Ru2qmdBz-FdVmjlkSPuZBS44R-fyKdFEpo-A_1eQQ48k61jFM8I40AOZ88KwvvLAlaHzJgh1SMe4e8qI_5-mqrzyBv-h_5b7ESD4goINxW0hu6o9lLa06LdhFkA6sm6meuUrzpRPxX8d2gIiYOjnIA3eqebIJV6fAZQmij59vPXqeO4QmHLeJ3qvOQJMoOmLnqUtYiJ8SXlj0sZPf_HL4yB2zuyozEggvazHcGo9xveHJbL8KLa8tdogjtANUMCEk2ONHJ0N9u4PsD8b8Uhvs9pBX9nBye36XQ6TC01gh7sPrrgTxKRa4gvdTN6hOP2IxhICzvx1nm4Zl0_jf6D7DhSmvRQ8t9cjUHPIfET1ZVvMQPo_VuNmFyyXbKHJ61lyP1dLFKmuvzDpcR8j_Y5ySA2ViJ7njiZ_cNm6F_Dn6riHvYmsb2_oJGm2L_lGFVPxvp-DkVAFnXYykpXJlca4YA4yp3LuZT2mBKTgVhm8doxIrO6wgP6bh4MLf8poOBTFi4w187vEq-yqZk0r2tUaQSS9rmOnrdM2VtYDV20LWQPFjG003CUg4O89AecuG8BO3D95_ZwWOgktBx3pX4hpw8m0W2sSx3Nhz2dT9ERqpYu9OX4pTMVO_M3_NVNlZKmKyU7grNTakOymg55YRiGy8j67Lsr-GjUEim4acYnx9SsbB31qV2gIvvPAmmO2-DiWoFjlJvd1ETsfyTa4NJ2iB-3V1t0X66sSfcgHv98goxe0nMPxjRTZ5pEKAvJeROquDS9xTEylsqTtNoGebZI9qcI6NadH2MbFbVsx83Igct46UCn3bVMCfVPBy4rw0p-bM74IUDShJZqTnK-AJoQyvsLYJ7GU-l4USTiS0yub9b9U7_cpaknM23MUyj1Zc4VZfcKstVFKdbGGDP1ZR4jD1mtJOHbz13WgnA7Aq5tyTWgnfFPc07C6RBbvCG4uSvr1u87b51JSGK3PN-Y7wGn6oo4-tWcBhJo4aqWS5sBOYNow9DEo_huxHCYtmX-xSB_E2TdykGmUiyqMkBKPNR7pAH3fhcsZPcCYwfcd6beCO4NRzlUr3nrE6SYuxhuBuu2Xq0Cde1LdsHSg_8BDElxAV_N_xEfH-Evc_lzLW0muXd-OooX_qvunuEguf_4gQEnEXyUdOxz5-pXts3kVKsieFyeREoj9TK9nP22Yn9tc_KdhlmXsBI1e_EtPl8ip-VeKjxzNnaK9NMMUBg_rsFHwD_m7aWHgGdOClxu9KJ52F1c531X_kDvA6OlRwqWg9JLnaWIfdYF722Rg9S7rXQYXurCabHvqn32cEOVnLyuFklexk0Fh9bqdlf8VQJQjaBdUxg-EIRnCNH_vbro_ILWjEkpqLLLd6ZbdlG7BY7tnpvGQN85JMdsgoh108Ut1bKO7otpeMw1pyH5sSmWtlWNsnWa2n9s9SSyYMfkDMnVH29kXh38LmHsP8CvUWaPo0emDls6ySt-fb-kKYn9JIWsnEV-NbJOjG3AeFt1a-kijmjstyYUPxrEk9oC6YizKexdYFQm8VT_mHQWrnyZCBftT-kp0TmyhJ29-RxIx97iT_4a11gr6JOQx4ydUptjVzFOtF7rHKhhrtKfIECRqmimLlPuROxWYjVMNJqVL_nQmIv0Yd_7_Ywir75vHsixc_Yu3GpxGbw5-cfVKKNWd5hXJ3at-phyYDcGky1hDO3r0Sl9z6Ap3-ipidwXyt4wUFE2GdjiZYz3J4UjkfUXU5lT2Am_aRpTUb-Ey3hCPBpe_2zAA4E6JTj4YyP9kzL4m7j6Ml_UQHYd2ANE8O44Q0PHWBrLGzN4stjIxBA3F4lBP1RENeouB5fIHh3MYFVXmesbM1KR_RecUY1dTVVyzA24lCWdP6EMy31Mr5wNl9JjEJ-veyMxgx7Ga6xXZmY4QIL3Q-4szUNXEGN63sk3Qvk0EV9DEMvnXwuAk0bmhfmvPtJKUo1AnxN-6UejekymszTxm78yao4jX0CIlVTCTECrm0MiZmltgDvmNWe1QdycS3HKAlM5tPz7-U1SOQLdaBZqVTIu9bipxGxtqgPXv9IKATYcomcwCM4ZbVpxAA4WEODBWdXcOSEJbjFkb1InS6ARppnLiLEBz3TFVrO9Yxwlmps-_m7SaOvivrUqkYvIpDM4VviQrO_SWJePguKSf07O2gCYRHMEKOF2NipmoxLgGhANun-mevcCwzyLDVIr7NyCIAoDj0xMSt4K0aHCb_EfcLJk9JSFnuJH5RMJi-vpw91BMddUIynxjAYwPN4c8BthyP3iOWFxmL0bvQ63lUUWvBwslQ03GaJQHH2_7shMCY4LX5Xm4tN6Y9H0T4nCv7Ex0vQkisS3xn07UtTt0QysApQCNNnxOt_p5L98vXsjNdf1cFsZe3TohfOSVLFt0Kak4nx4q6mUE1see-r3ODMNSNEqJfOE8HvoSzV3HN1J9jODMycP0gLCOVZ7DtMIBWdfdot-Db2X1k8reLEaEFMnWy6HfpcjH9uHaJjPZ4sDWdK_z7IWLddV1TFj26czC3Y99IM2Mh_Ink5YEUSwyTolKGY2qIRHPZZw_o06DQLFDp773pc2kFU7UCbpNtPZOd6ojlDzDtoyfEVCvNH1MtbjBT_Wikc2HZDaXO3qYv-i9L1jnoc7MRAOuQRPHX5VsyqYE6ChpApEUpDe2QZzyTVVIad9250QzOXYJmVCMvF4px_xUxd-SYZrRJjBOSMf8Z8BwSwb_pV5MCl1-6vfHQWtcBFmJu0KKPxD628Zh1NM9tGrX2TB-08KSOcZXOHbSFt-oIJeFPfG1Ta1uv--kSABolVQQMGI1kzuN1DQHiHg4KIQ9bJZsJyQ-p6liWHkGg6EWo6RrKwqETN35iE8KQztA54CwS8C0fJpFdJYLNn33jK2NFgbMPyEZqALSmo9qUtmSHRPwgOTp9SQws46Qg3tBzWsDFdmS4RcYYplQABEZZBrB_u_SRukjmWT0y84nCw1PxDHxyl7rS4GWsR5z7qobnFgG1olhRMUeF2wFLIiEur9HGqQFm9olUHY7EUNmyldiHWxW7LlseOzTMg-UhiSE4xwcTEJf7QzyJeeGUZ-dRWRDrjuUs80VC1IcECpiw4ocpJA01E44WagBzDmHYMIpSYCiDPNlGDEr99nsNSi6bk2s3QoOgf986Vgs0DxyLG_mQD2ZSBc5LeRbA8W_cjVUuCc-v8IB3S5G2ZVVItKnlBe5j1k9FC9ywQHTVrHVHK50GH1bXLovPrc3Bl5mnH3qYcxixELGlLkg7ay8KFXRZPHm3UCRvPh3Pww-0ZNZBEcqixZJ82wBOgUo0fn4oTNsPrCQe6FS-vzn4BoJ0XCGX8Ftd3yN0Qxqjdu3iyFGgUcErinwak9CLsNQQcI1MgwUHycXL3MX2iC74EOg15N2hGeaUB3DS2wT72nkCjnbgYNuCbLkoS-elvfrI-XwjhqGmhR7wYBg1ZQVksFxqcXb5vFXTyefVQGBPN50nB80Nq454XuQpSfVRuVrIdCpna6LvI8jw2MgjaOuXeIiQ1ausGm63yDNnIOpj9i7AdeBr3ZXly_MSw_eK54kJa9mxILKub9I_H_wpHODG8xCZ-Jm7jM3v04JcgaQoP3vOc6UY-tee-2YmauN4e2Yv9KAsQSWTtMnFJFQ-H7aZfXN813fBzuBg2H_xvdC-Srp2-GPzqnNaU-YdVoc_MIDI0n2_0afB7rEWqP8dcqttdBtZOFVGK1WAYKwVTZDwrZZzcAmD-28tnUWedHdnXB0lh7ljO_P3SFCc-rRhq8TLL6QFHXAfc9QDNJesf-1M0XoGgzCB5w0js_CV-_pSh3Qh7BELnEl5_-pi_FTY-v6BN4Qf37SI8WUPvq500ltJMPFYrOKpfvUkt_6Ugrc9kJwDBbIEDE0Ovc8ih2CB8sAfwekfss-_DK8dsqhB5HUDVGhqkzADItRlhnLJMnWuITYVmJegXlwvrfWXYgbsxbp9xDperT5fdzMhFcmgwVner9bGM0ju9aiTZpjhNxUhVk47Lei8zqBP9uN2cwfWZ90RzWCst0NR91HVZTm75I5rUL9olCo-ms8rkDp_AvRJF1NSof1mFeFMrYbyv4E2p_vW4TuCAGunMdZ9x2hXcgC-shPcQ7Ez0KfjS899wRlgpD48vwz-Mo2yoYAXqGzaOV4idKEGaOQy-Cokj7XnPlu4UwU4ejZcTTPPM8AIGvgVX3cYAvLIbO3nHMYv4Ni2XiejSu49CHGhEvU7H7V8k6qoMXuJpoSAn-PBkSMYxX9wxK7omiCQmrvQDtxQxtHOyxEtjSuwdA8QOGy2s7xO9I0PuA1_7OZwpo1_Fi7ZArkVAF0H4d_5OvtcP7pm0I0Emo0kxswS7vIHMF6jhZq4sJnJapj3bioOLQwVkLBv0OjpherMUcpIcB0-LLH8V5EYXScWGXnm7ddsrTQRAjLfWK5W4-mtHWWde7Tbgaf-oDANA4t5jrcGQz90x9B8-Xf_nUBkU0pNE3yc4r8YvESU7NoSQ2dHO-IeWtZle6TWxO1MeM6hJXtmmIL1m0RNhYm2dwXbRQJdhRxXBB7VZvY5mguLQVrzAv-dUUw5BmdtIg7lbbaY-tjZB_lF3bbPBiIzVLtRZUV9c6LqkPsEX3OMMUNB8J8NvaXE60xy2BJa8BdkIxcHcP_MkpL-VqRhAcCfoT1hcOcKvXwU8SYBurSfcnqGTKgGvS20x28VHlT8yeur-Er5055zBAy3ovgrwqilzpNp702kiXsronOSCRs1ZVcbWn1E-cn0tsRIBqGV8DoAlRAuwJuGtaC56EzPKCiJppYJGn10JuhF45iFuybFHiWNiJgxaX8EmSxwmXKehsSjYjEgF5anETMD4DyNHYB_NH-nA-OMIzKB3FAFqmLldspidOqDs4LVjuOFD4OZgDjwPoQMQFRF_I8QbFlomExiW_B_CZ2X2sPCa7UxwjRD25lZV7zzcQ0Hah2brH9zqGmBEa3flFmbIzFouD6OHDr-OYmyuKtl4Jb5MVH471UgkVHhNf_zpUkSoCPtKVfalDKtXdKvZQWB50yMxbpeD0g__cRtnqeuZKUpbFd0kJKqdfutXBEy9kjb0bwkmflVruAP4rYtLEH9kbdv81YJjG4XjSggJotmGywZPRgPduwujj6aArgAlgUBpJQJcKGbveZkA29VpNAOgXCERawsTWBKzepT6a_JxJ-M-NZW7Qulsyz18dBRNYKW-aa5xguW7eO_KCY4K33qwQVcb_82eqM0U3RLQbqdMHecI8Fq5Gkmr4q1cgBwudR2B7AEfu6yPa7zXzSED8VP-jJCusb0u6Wv7u1TVwfcTZDbFJxTkqu5EHdZFHOj8ZxuzodaDfynm6rvimwoMELsyXs1rSOzY61M2urPU-TMdiJyQJ9_5oqAR8KgHXhIclJubsJFX26a559lmYdsQzQJmwSpxeu3okT2_DubukXYDx1FPR1BCFwULFpkloOBa-LHrkYwmpbpjPaf-mWR0jjIHyEubf3QztWE0ZOkxP5wvFmz5QOG1B0nadJ72nqnKa1uyvsoJYvAnJtSp2D6F5Awc0g72hjTN6h4gNei9nL_P5dbLNOCBsZQKLZpxhBLt__AOxntas95EipirMNAlrNt73KeBejIuPP5xc1lKhR_5zq7D7mbkM9b-Rsmq5EE46Y6-dtkH_iLVKthwcRhs-JjabWPKZP7MQ4Q5XCf_pmFfg7HJuepBr3LNXvsYD7jmb5vx1tdmNKYLEvLjceGXqbrG4ofG4n7QSqRj-ylhAm0YptDChOZCXQm5o5t3iOdR1VBaiLiTncUZKEkZZlc17kjCiH-_k8jIrNCL_hLhGJkvNn9c_kV004tgqXXvZ6Sj-HPxmAOOXVQ6JpYKug1M4X7NsRhShBQCoFrAq7eWcuSq_hPlyiXOs3fY-XEpr_aRzREoSjNXMhTRrZGgCVJO9g9GlbR3QBOVyYdhW3HlNVBGKiegEl8Z3wP79XQvQYW4dLc-GZ82FtRbgakE4WOydiWrtdNpYkjEkMNLI7h6NMgiW0slkEStnTNHKDJFl8qyRHtBYstv8Vh5vxvGR1etuH7XvGiqnxG_gCXLnb1MiyD97rhdwEQD0TwqM7ldLGop_jrq3XWR2dU4n2bikO8LwFcupslCmJ-9Ii1SO292utqK5xgr_mr_LyL50JDWLKy3wx5xQAfqPTsECTy1wr81tleX6TxKiGQvE-ZQCyfbYThxcvGfWSz7NU5wqvKkls4p9hkKrxIO4PIta7VVYKcEyNp3t5r6YZ2tNpoCNU68n_7ml8aKwBZWXaHfrxLQvM3WCsNtycnD043SkGPKh4m6L8kctpEmHc-2MV3Qrv0_Xc04gm1m3AEqKXeCxxyR6PEzYWSwUivD98fv5g9ijmn19d5FG7YJX1JS8zpaVsMe01yUER7Z6VtuA_eX5IqFjo7IJOWzDWqByeyR_e4Vw5x9NZm61fgc2QQ5sPYXHi375X_9QFVnAfDjpNNiF5fg9cKbpw0P0ZWikRLGGzHA8HKc9JPrSdYx5RcfWfHbDe1bXfxREiOrzSdPj-prMY0cAJVDOXZ_7RYyCkPE8BSW8l2BXhqxxT8V_SjQxXE5elBKeFd95SZBsI-X3c1T4i1KryrMjTTCUIdC6DZF796Eqplt9yV11Mk_jV3agj0F7rG8jSGZyZ8bgEFe5NYUpAj1ZfsAMGWUL3SqRG3AtvgTmKA8ldJag1vnv2WbM9SQYg7bGaNv-Nub7jhq0FDvcI_3K4RJrQ5pNp7XeRtBF_Rw_I1Ps2STpREZXpjOR2PpqWB-HFNTwICv_vbv0_GNSQC2YKsXGA3qOA6r_y8T2mSZyGEEO0Luyp3009zQiWY9w3-QWdNxrZkWzjGJCNJxf1axwNYJNvpg-GYpm2yoNF60aYlPYA4dcasIGOWKENy6r14PLxKMlYE-LDATf9WosXq-Q_sbKZu6lBNcZ8IAtu4qFapWRjcfYAYVFu0I5M_IYvyU5XwU4XMrUZyhOdvmI2ekZNg0pe-0YD_dgnpRjJoPuXSEJ057CEFwmq7Mk4z1rm0rmBcZXGOUkaIIYWCE6BXubDPga0GlNgm2fd4C96X3j9tZyfel_6DOBA7KS0GFaIL01vWka_xuDYV9FLWQp1-Evbpn11ub2DsjhhD9fKpak5IcMHPBpAhM2eH_bmXlhyHnw_Gp7KYXjormBfFz117bYlSlXiI2GXxVf6Nx0VJ7bDqmo1GRfvP8HxW2gjf2mWfFC5C_74Q_yJD30QsNeBbepELoJMNHGk7ZDwLLzF6VM4R8zIEdbeyZrLEwR0jJofTEH_XuZMKp8WbzpzI51rwM2Sg8zDdn97HFVTbx5C-KhOfh4lERqxVw-0uHPdqSF-T8zEmgBSTqsgbxuXvFUfRWkTHOM-EMbUDWuZjb21Jsv6qirAs6ucDhq7XKqu8DDWLI3lVdKWDpdKAk_eD_2gPWXWjhtHTHFNLumLw1_f8cPnzdyj82aCIlsdejRpLutVD86ZiZauG0jbCl41L-xvlQ4hNQyo2MCHXS2a1L9Ht0EsqZ9o8_yXYdHP61wTXUJtNq8pPbVksF4-TrSWcbZownzC94D6TJGLq7kXiQ8lmE4ulZ38TN8XGuM7r1WUdPsu0kRC1XM8msuV1LiyJmndpXM_X2ZaOL6xgA2H-CRcWEvXh2fz6NzUuU0sUZmkbZJJl68Jp_Nw6VvaDG3UbfKrQk9ZJgcE3lqNzkcLL7QVgUt2VZXC5fCw7hs6n5wriniOgCt7_r67X3pqcSG_854qQRGvSkzyrXQvPBQIfHgHFVmo-D-kpnxrs4CPS8srVPSlpfbcK9ca4KQbo1oEt-kmC7UWyjVrfWEos4jyAAS7pPAQu6gxC-vtNBa0hkz_1e_gtWjIU5bEhzP8r41_OppyTlG_zAj4RKfH2lf2dH1pNmEkjwX9eDt12mm4xOtwzHUdMeZfKnvLmOXP6uUNNg78u7on4O66_yErJ2PUFva3viM6-9gSg8Ph2qR5FcJVVYrZA19734-Xcn39b-__eSEmR6QW__6LqbuMGsN91guj1v3boSwYdSBin_RDnpHnuhFYfHFygQlSzhiynpIcNYEe4dEJRSwMCB96lO2eRVT-QgfsgN1DLzgwulANH7FUrCLuSvPVa6dBV1wGOvHqk7LlAs-YEvozGQFZRVD-L0SqPDcpW7jRXZ0t4n0rsTxB0bgMeD2PRnJpHxVbnK55zGkgzIJNJZxxJoO5GY_U7L2nuMwoICA67cqOkdu_mriz8OTKKLp5Ua3_Qx8dCmivIMamjoHix_Iix0VH1X47csggY6dM0d2LdUtiwgqxMhfrLSIOdxMBmn0se9lbYNs4Yt1isIASyWvB0T7jnmswBZE5Gf7Gx5h36euc9vFNy7Jec6JOZ0j2uA3ilhrrHrBhXlyqfvWDVyDZkErqYq8F0WBS4HGe6u0fpJCk24GANZT7fLMjE6TxaoFkWDpbQYV9QODCui2fwefG3sXkBRbvJAns4FWxZtkDcKBCwexIbubj3qSGud7jH8pPkrGIfq0Rg4Iz2e8PVQDKHChaRTIWPxIZV4hPjJewwcTxazzXS1A8hM4a_S7WGV9VchcQfJUAD3a0nc4uLseKkNpr2ucBOn6hsw_vr3JcRz_4rr7DdQteYroPtuU-2s_QZufH78dANVhjh3Ft92rmT6WjdiB7j95cP1oRQpUMgcLMfiOLg_iDlDAZm9UWtD3khhyJHYOSUWvSceKbfbc2C8_SkifRDJTt5IQ1yK0RlcdAWVEnKzi8MIWpAIU6kJ2PNrgAtHEfgsHiXmeJPsANUHaU6wcQ5IlEQnnj51YfTyp8fXaf5kcYo-7lkW_YkPaHfjUSFD_XvtYdHGIkg-z1EvOBZ5V_cyAOVO6DXoTEPtlln_CXkuqsmWC-ktbhxR5a8YBM-52RshWTDDALmfS34MSg99qa0rttJnbZ1Y38Yon6JIqUYFipCRgG5xWHGJqaeTgxOfr_gA75YNKro1Yo8b0IM9pUoP3sMbhqYEWlj1cr0LdL3ip6ssKR_3LKpZNTsGxBSWZEfP3_lDx-2pgUefU9H23ceHpW1VjJTi6Kj9hRjoe0TCME9dM_Ppwkq6Kmo4lpo40jUOwxcsUH5y4vWWs1z6gNXL8qH898XmfkTxwQDkDEe2eY-v8BkyRE9xy0TVSx_s0MsUkTttJKab_UlElbxj4F5qIbfShAC4W0Vt25exJB7VT9DxpOSc2d2u2BEOY8sisuxDtod2hY6i9S53SEYNoTB8FNqtTznAjzXKU7JcaY_6LiUKxM4OdatSKtW0OT2kpiP-U8p_x7mMUOfjThJt3fuHOojBYniAnwN0cx3_eYt7tPYHu6H7MaDk_MA8o8wHWBiIhuL8scFpeQ5NzsAGQxAaMgD8c03vuttnm4tFdTdjxFlRS7vmQpJLHXUQx807yd6yRUFRiQmLdudz13uMl10NS0DwTyRXwq30PtdMZ6Pb_MKHQj-X1URu_6Af0dWvwBmRsG0vSXlZuMMXZE0vmL07AsbS0hbWVXigE20BJDbLoKT6HOsEaw4r7EFfhyZLQaSkPMOgPzMRkR5eJCUSu4qjCxsnX2amg12z-ZtnYXiFOWm-_ivtzq3oJkASgISZvsMhMA1dqcbQ8CJqVe1eyxdHx17EpiITPkLELjkHXbeRu_VaQ09C1m-I3E6eEJVIZZTP3zYjg44-amdoA-LCQXo4jpwIit9BWU1IxJPQDUr1JsSrQYXmwUS4TEN-mcIX0rm_yuwBnn9ALbxLiSkAkei5S1dceArdzFP9HVBJnKvnT4uOqi94mEJTVvSRgJS_Xj2lyYxxO0to2eNVy63eun4GT5DnEkekPeK6xGsOkT4S-GNNFVPDAJ_tF3Du3Bgka5w5bJNOD9stkCL4QYCubotb0-eAtGVdRQtBtZWkZV5lq0QzdwFjk6bojKhBEQ6cw0nMCzmuPTpjgrF9g8awhYDoEPfwmE2iVSNtWzxDuglm1hTFFu4jpcRBBp_YHyyMEP1YIbYHmnM5FoTcAZCgQQZN0r6idVIUncM0aFbItCIg0j6CVBsOwxsggLEeaT6HCXtgRnNj2hwAlm2axrDGE2PSIgl0hndcGCwzr0ixzluKi9RN6MjE5eWWsR-pKyWDSra7yiAW_9Tr-8ymCHGoz4SHN_l0A-n-dUs-2wn8Nw3L01mLc9CXZnlcHyh6zGMtWZsjqNH4OkH1XgplnJEE_0fi5IkGylw0eo1y4luOHV4uIiZVl9AGsznUYzsY-Bl6XiREiTWQjMrnlNNhpp50WxyzLdo7MCjbZfw6ahaB1g1v8Zy5LBGRCNxclQyHMKYGX6jYgq3dC4bRic3_9AuJ-PljsArIdApOs3H8BFAuvPg-rbFs9Kvbfmup3VJJWjVxgyaY4m1vIzYzajUjCjoiL4Q-pg9TqkJggD-61FsDiA--oVL7mANSKviLr6X4rRW7iOuJJAlQfFwrT4zDtTS0o70NVmzOyVt6BzfqESaUU9a2I2KOq-3Xquoea4BB5uYID-KrUoviHDL_MH-HLBbxN-v5YdUMPUXbjcP8gEYZm15-2TX9iTZNr-0QdkMcGRuiATSVQbYRU9vEdCnqT1nc9NLTGC_HD0xcJoiKd60yXI-cNnjUXQG3qA7QarEOFqE44qNKKASqwmuBiqtF9dM1EWIBAhNZ6NYyoJrxd5HmmeoozIcSvfrmBIuItaE8JYEKePGECoxs85ZFZ8WmAi6kYBl-Sk40f86753qrTgVekw7R6VJDOQyK3ou1O6Q_o593-3eBeGd2UYx99uIjtBrVtSSK3LDqynZhzIs7ESLZ8Y0GHszYeRB1NGWP9BfsywrgyRPaY8dpHyWlOh7c0HJL4xBusqzk74TqWW9jZwB6-Ga2BkDij4j5WWqoJBgNkktPrN4Rq-4BRNuo5CR3QnGMVENVXTb9kbOaGW0EH5fjT3FOZHPvEOSCpyILeyd5mvvcskEy7gfnoUA83miADySjPCtGuyZWDCdzaXns8-qSTANBpNNxei5uB8kzauzE7GJPrCRnVfOpSeerInLE-0QogDqFTHS4H76Pll4nk3efaM-tMbSOyLK1CQWlFKMmGNOXDOQvedxsczm_zK6TkgXVSO8J-yplZjUTm2wTOjATgOV7Li9UBmQ_16WNtzLuMXTytAlYZRdXmrD9O3WCbhpDc-MOixdMboiQJU19wzV-dc50Et8QsUEylazXZOr42fLFuK_e3h7nsW9yv436uXpynunC_7Qbuz_Z-sfUlormQvu9z-pla9pWuHcMeDAu0tIic-3hi2Zkk79xY7uhXPra35yZKgdIAYHlxk55qRc3Pf1SM091j7H--VMQaWW0CAUMJniEjfPf7yEaytrZP_59gyJ_IPZLzykR8woNhe0gO_Q5PAsX28m2o4xKuSXVJt1uFhZOqS32FwaHeOzvZCPwV3HHOFLh_uKUk1_0JScDmKxerE-yunBp5i0xt-nVA10ENZdzQKIYSUqZJ-ddFACsEcvnJTSKOaiMqgdeh6M5QYc2yuIYQxW46AkQPUuvGnqD8sVNyuzVaXdkVOzcBfX2Xh4tkW5edBfTCJU8m1ilv0_818WosealB27K_Y7SAdyOLEPh7fPjuKN9P6foXNLyyL3OZrvtSkm4YP1fiXlmAT6ShI6CTV3U3_Lux7HBFiV1T_LqbpXuRY5PhhqtJGE4xKFvOI-yPQ9Wwm2o0FroCX5hghw3rmu0_okHBs2tkpQPJNz7srzegUojdFseSgdxzyj2yi2XBd6Y6_wQceDdv5oPQeK6wDcZ9oeGVV2n5N7BvMuKhyKiVwwcmgGen8FAJ2_wg6eEIvUlUKgJZvyarWKZF_Ox0AP2RLV9mNoA439U4xtQ4XICq9u346HffmkM3YBM2GJc4jnMnWMtFlYwq9ISXovpHHLgzAm5TnZdne5RRJ-tT9nYGEFf_NlHUgH_r0GWiBvp7W1nVYhv1bKuCN9NVWQcXvtswTkR_C39I1lSaDJnNYNLiiMBfPPtH3T8eyG7RFQVcPS4UAiZWFI1pfQoBqRTcfLARkwL0PsEL2lsMl3baQGiEE3cQxOOB3RiUKnCRNZWENoqmqk32DVQnHffrRyOb94G1s34ldbj1ubeb4rRFZ1hN8BDdQqV7nwUPTo5i1XtAzzY07AJXNXqu_P9YcX59SVxepTf3j9JCqxmtvP9FvZSL6NCdIxSm7OoGAmZ9t1fS2LiId3WVc9DRpN60rw0R9s3vYLGe6Uy0u-weT6JHkv_W2nrFKaPwySSkE0vXinqiS6Yd5kXBPrqqxwr8wpQY6Ag0nWuzIlKkyCpi-GkkJ_F09cyArSkT6fQdnCGetK27bMTATAyb7BZdmRdMkVXYtT4MWwvgjhvyWcH7VRwoibyPz9fApRxg2aWeqzmuqFQ0W4NhM5PRu6JM6w9Bgln6ULF8O5sAhqycvZD7fNVtOY2za89BaEqY63TXSDv_bzwGqhEbKnSxLCThDxO7P_qk8E0YV7yuUno2XvuBa5yjV1blW-78Ftq5ftxdB1KAR4r6roVaKgqJqHFkZsryLupwgHN1hW9O3ASdMrExnveB8nc-b1CehbcgdiiwgBqm7p5i6GOYI69m0WStQ0bRAOG5xkZ4uXA3BAob1DVsjlkkxEiCfH7Hcnbvry1x8Gg6UOtYrLhH_yJ9EMRhCw4H77nTSzFjGreWLciJQLyh2-yH3mSB4PP8kyjrLeGzpUCnXo8PsK7nw9QFwA0PG-DZAqNZGr0KG0ceJYNcUkIeRp3UiFbKhWCU0Y6C1QzeaMfPWV-afkw2dw_B0jWy169b7Hzb6EO_1TGDYolY4wR3ky6TjUiEJHValloDBy2VLmTry_L8fzwCHCI5R7XeZuj_6MXiQlpXaDTto55pkzWsXZ1GF_A6fpx-IR9tVdUgDOFWQmD1xMaLUCAlN_mG-YQXvDo3WKTD61-gzTbrjft0-6WzXDGv08vkaGxTUCCksX9EdtSik9wXwGFFrBw3Ebo5n_tsCJ3dwg9bOiuEcCm59JSdeTBvd8FY6XbNJCPsq-T8jaBaCyyMplk-l_rs4bK0mHaNk_g3uuI5oW2z3y5jCnrrKM25x_1ed2oK86HQ12yB7jMyHzkC-TlPU2z1ynYZE5EMQtMpGuFy4a8dLUUfX-W8tTzgnVlqT1yIcxrdhGmxs_a6Bq6QVuWsjTN7CsSGEVcRbt9OGq9ovS-logAH9ogPpbThlxeQ_-tDJyAankZn8H39V1dBxXy1qEFRFe2sNQlOItYFnqgo6XU0orV7cP5KxDcFIP_4YDUvVoZiMLukKcNuixAoIIeVK6CHpThMIkbd6Gkc8M_RzsRIlYTeyWS3uobfyG4EpSj_A3p97v_tljkQGD9N8Ldc2tMejj1E3q64NKKShE8hlfEuQOpG_EbxZxvm9rf3O-mtuSp6ET9jb9gSL0vAplBaTeApDfl4wDAHSmib3MbYdtPksHZ2SYKMmhQg0PdkuuK0NdO10u_ontzz3eGyNMUvY84NeWFvgvOrnmQ3u5GiWmyGK1-uP1Ye0q40J7ljpPbZdnnXSZ7H_hakTIMNln2PdE8lEW5TePr7DpDdgXF0x11k1bzucKXF_Xe-JrvAIJCLyK9xlQd5SfdEfEzw1wNSXYe1kcpufq9wsW7H_L8sjqMELG2Uouxs35ySzYT9ybJP50ypTH5YIX6GK2uC4TirwrMhGlRJTKEWYnsk4Tu2p6nVeBQltsd9pNIfPvwKmNq5LskSnCyIRaRKBy1aBrUN8Y4IMD7PAvacb0fNgoTG7RsQIPCV95Bc_FApeiuWN4i_6WzdDGSsi4ySRJGnGyrx8jrRi4Y9foSK9uUMfVUue_WfZQNe--nW6bh95wR4tAP_RyNVKAIjCRcDrkt5x4Wq0IFrXYN8maQJG01tRYGTCqwkYiOlPYUIZ4DXnR-toJsEGfWZ2KRr4co22roIeef8WznFx5OD2qCx3cIzihuJWvU22f5uLRhnVEpLe1FDMLul0KhtoRx6agMHg9H67HzIZE8zhT6kEDXclXyJb1bltN20_XaUsPpzsmQqlseWOMiohowUH2CTozCk6kTcxhwXh_jFRVpnWuU7bISkF37pDKMGuGURobGzWGmIZkzfKFB74ER_8gDUg-I2M9qsfoXh1SLIEE3Bj5RF11APDvtvFNGWcQSSMQ9gzuU85qsTDhDn_UBmJDPTDDNQvT7oI7niEpQYc8UUu-BoeGD5DLruXW3RTQNKK7UNAsGc4Ur3j-oaXn9gIIIwXgMphEwn7T-1dXQNOpI1_VJ7-srs3yKXIpzbesCnvI0hi56ozMdW6pZ-YGnz8IRD2FK6Dtoip80X4izJp_0zyc0ls5xDtYaLo8gCBS4OgpxZbQc4wO5E180eLk5WnLtLvskt6VATlH-vmSf1hpDk_BXkhn6d_cHPaMA97f1isOm6xbQ2VHrjSw_lDBpxjEAP8iy6jApH5WNYc7TsvBR1uMLcEx4tNGgkYl9F9v7oi8ErdJJ2wfpzoDj-Ayur2aqFWZi_LpndkHgy606bjnuvabBzYgktJHPeawlPCFEhUyRDXszle7eEk1_9-cHUlihOIVsOsuS5EGcvOCUuxfmVxT0mIWstaNsVa5moESSHYQlMsdOME1Hxfe5_fMDclfSPIMiDpzcQQAnD1YAf0AZJdyjGaSOZ9w51M_CSAIgxLOHoAYG7e_aC-EW4xvzBkMynEWL4fcx_oJiG3Os-8kunoIGp2U80wOozj_PVfDWVsO8wUl_42NO_CtTXrmxZEmgwm1i9LkslNM-hUdHqnpOWiEP2LqRAuFxNlPEY2MWZwSfxASBhcUirXOoneKz0IJUTFebLTaxX6XsuloCPMX3euBrT9FgacLy4w4aLW5aQlDGm_SvKbUkR1gnhNKHA-TzknF6hVpz9t4WfZz-ROXEYtMPgRSsdu3X93nq-hXYCdYoiTXVTV8z3ihAWXzLFyQIY_KTqkh2JZ3LBqldnPzBOSecisawFjaxp2rgMDjjYOS_h1tvoJZc8KP8Of3PUfJRQIdX1aBBbBTuGS4xAzu2kmVW1edr-cv1QIYXozIAjJkb6ldLFWZPklx6NJCobJLOCrG26ZQKOt8STQzGz0blipcfgHpHUmfhqF0WRf0qNNtdumjxa5yaaEv1GF2_sRZTo10xRj9VdoXzBsi8r_nbe7LrARBmyM6ljOHqpfSQ9u6Aah_kW8PTtBxj8Hv199ddcV3TbHrcS-KlILGpT8o3ktrrUBGd4Lha8nkMRNX7Bo2-F5xoO1aU7MQGp-7EveV5JLuCiG8S8aoAybQtcDfqMGBo0YNa4fGlufQcktz9i6BJf9usaWcTf4P0Vc3XYnEfMnwFM44RpNTCiBTD1FGoV3SN8JGJPcuVsSPmXyj_g7Pk2gz0deQC8x6OT1SGpPws_tBGF1wv_bNmVe3ZrkNtikYNnnTFse_fJ7ph-ccwJR4w6Cjx6bnGy1gwFzcp5n-7KICy4ZTw0gJmGU_32x5TyxfIW3K5BHGBhE9b5ySAujTBsoAgAvz0GFngYO9uTObB7Sxye8J-mJhBn-J8QyZmB5uxYb7rjnw5fu_5ZCQg_W4n-0xtrIS1QWiCHCynS-kPGGh6gGqYO2kdiqIQmKu0aUXOKREDJD3ewxXlueVDw-aeCIYS3jMjzugp8Sz3RsLez4iKVbjtlijBDnZoA-6u4gH6-SlnsUh8R1n_AMXmEjkS3x4a99M_7ziT_R8UkPi1a41xvSl6jK3DPrWATOJJXskmwsHKuTFhVm7fQuenbxfZ44Rnc5i5UpsT9npwpI4ULqBy2kTzGJfGHjeFi1ncLNPg7dYC28EUVqu-3wv5pKJM1w0F8-PNsz7HJeM5YwlAxiCW64RtU5krNM6CR66-mNM1XmnRK778QCaLJw5EyLf8xr0c0IDKAN8BHYXVzniAitqI75nXr3ovYtIk97PZzzix__NWHBTp9S-56DfvezbG9R6x1PNNXm_XIGtgE6QtwKZRaWNrSe1lxSh2YDmw36Rd0xk7UDDMufMQuFBgjwzI8XEl7ENySiepPhorVIk5GcNYAWbI0FZb79C2GF-KA0C_X4XnS3EHDYV-Spw2dkMpGp1skhgjdLh8N8I9dWtBPRWKqTxHsoBA1lBhE4F27XJXhXDJD8bWlT43K8Lvlpob4W-8J-55lIz0RVEoLWjN0vY-iWtv5RfF9_uGLoA5x-CWfw-ecgTZt1KrXuUeGVllVFQWgLfgdM1ZP4aygVKiWm2sBXMuLN1f4uDJSU6mfybpBGfmX8rxOS-nwXfhFkPKs_CGb5OnnaI5uVMadDEJgt-AMVKsXMbtnZuFCynuARuGC63z1PieSpY3gvKPukUkBjmUq7JlCML7IOWAOeBGwXvP5E7jY_c7kZf-mnuFvGMgc0ybihxxvwvfio8Ak9N_LtEXE-DUTDPxHRj56MhaZQvsq_56lBrhb3zIrm6ngFRy7Y_7hiq0zIHHjJIH9bVKqiRpk6BDblNDvd-stO9z6yQtYH5wtW0Noc9ZOsLjNbZBlkmlz9ZIYXNfEPmq_x7PiXPXNLk-jRBhoVhS0st9REkVAfOPkc7aT4yHl79SjhvQWWT4kSkUtGtU7iJmQmmk-dkFi6AFiUdr8vN-f3Frg8BiNtt8KGIE-nT7izBe_3v_5U7zW7nbSewOfwGmtFagOAxQapj69-WKSVMlmsgYXVevKb6zIRwtP9MOwk0cRYylDkxbEaNKltmOphpL62vbOk9n0WhefkQOXFIMFeQiYBxUkKfTnzujZU3BTxCe4RGgE1WKktLRBX3RhBeMq0LdsqBfgJS5ruqws_Z-P3sh7Q8h6rk0fjbL-XDgB8FiJXBTBVfnC2yyxK5C-6pS5moA8OIVzgR2SGR9BMzJG-OH2PXPmkqTFyqI4RoXzmlcLXisV6N2THqCuHDuAil6bXRpioVzyPA5wdIcc_dlaMYftBTFPSjTWG4DK4W5WImiGnSh-2_O3ewpuRUBMFRRcBAuG3Msn_pCW71iqNK40vb3dcwQUXL4McuODjnfVNnKT4Teq_SJvS0P1JNG3ulJGuZcEvWrCilfLfh8vMXuxfzJrshOB2EY-JJnbapjjC7_H87m8EyIDHwJkWuJpcvfYpeytaUsAC8Cx4r6HUS4FMp3JqEwOKrN994pLnMzbbSoYdAgKiUKYbE-NHuPxlWNNQ5bRncDbspUH_4av669GUuriNY-YM0jcuZqldztzNkp8eD0rwJi-3N39ovu-BEeaLv-eQOsIr-K3NA10JQ2rvHVh9hm0GE203fs5nhtFn1zAMVg1XAekaSh5uzg6UDGtFC8Lwy0pepFgisI9McCOs0TkJ7Hk5CQU7-98Y0yFAw5OyYpWfLgIPV1bol6F6dj3uQC3iMrmqotT7fCiDVYgWUfAXrv7MEUodLwU7m7MEYVzdIvBSYi2R_BN3G7fPM6l9eqcZRAyvL6HYgAj81xSXJUrYVKgc-WfzeYPFjSgiMKP3aZrBSVdLDNle9htGPcBjzxQ0NBaukDs9JRn-hJINxhEwC7QH7K0-o5aRor2MfMsTgDVh5JtuIjWS9i81UEPbja0UrZEnVVbcA3X56lR2XY4yS-sp1sAyY9OZ_KVUUJVERX31RwYd2shJnL90rjufU7N_repLia1VQlSlljTzTX3sKM9F1vqYGdPMJW7cTKacwkyyiFgrQy6zSBu1rrerj-_bBkK6IXX4r9E7mS_8ZG0NWBWFOPYKf4-RdHtwGE7LTlQ3GfHnUpyefvaEJ4W32_9vYH_K4tXzIE-5qzRZT_J0vLX7ZNblZekK_v6QBLPAFkWF-iYGNVEFGWeh_jRoWVqd92JvKDll1v8IodmzLrwxihDT28l00WfA_8t1vMocID8sqMOKfrqT6tGMg1rTOQvB4h_Yxgo5bAghQXnCS95fe-JaqnUD5dYIuI5WTTbLg94tAooe-8_pIcjnUu5miFYIH6RtaLZt3K_NXq7twu0okQjMEy3glHre1IdNYjzyYM2qVJsULEraRlITCjaU1OvX0H9uHs6D6_YQSys_L6olkFbHe8cSTG0grJmYcgmmc68oQwMC7FpJDPMRGrQnwgWyeBkoRAbq2K8w8VYRYkC8Tfa43sSM9RjMrVhw3m30FyLVx-_i14dKOGFdwY-S-3l6khiUOq7E0PtB3nkM76SusAcFX6FN1SiYb0fPL8NRe0AmgNAZF3a80RppQwWOg-n2yYh4RSkFmVdZ5mF7n9rLCmWajRXzc7av6clHbtA1DBEFerVH1wcezrLOd-qKBNJ301PKLWGSay1TncC0lY288GXgO4sMd0yCnLLbsvzxlBVNOqSA9RNE2D10QVj88u7mf2x9ZXcZNFK-i8KYJFtW_0-cm9KOBSyAow8z-45kcDasDUPf4C_jNLDPqtjmv2Wtio88wf7YR0u9UfTURRutdD9dB_-E75edQYWQ89JWRgNWDkp8E9K8q2aGGXxu00ujizEiLJf83AKx7_45f-kHPVNkgFTrgR3Zn9p0UeOyh93Xr0QeJiosQsV0AaAnDpIWVBY2FGgabx-Je6c8TOkB4EuNFSPO2qbbc3UXh8Rp_rdF-_HYLRtCa3qyOObO00O6YlABQ1WIahuR--Axj5uRVjdC74n2qLNEnNonAeaShAYCO-fZELGsTpUyFO9vA69ehspLOKqNhdf-OVF1KyU4rvyXTW_jQ9UV-MjAIZh7gecW7Jzd3stA5owi07x8I-UVPL4Ivms2Q8PSvX_19xL3i61E5BtLTb0gCx86wwHMlKUgvrxMPDk5W87F5DybVrTq75-0Z33_t1yQ-GV4U0pFRJYBBP2omXyAt6XMsteKOeZ11Zmw-VKFBF6Nyfj9hiXlUbzSp5hGtBoJwUF7fP6QYYZgTu0pBoMJoUwNG3HHrDCHPtV6JnG6GSIyg_xjI5pXy-Lh5PwUWYF7m2SUI4QMCS7OovyiESM2XWrRXLYA2zY_UcrtDYX0rUnkreHpTcHwUCNyhygNIu_RpZHNp_gJZj-Rhij0NKFdLiH4g7cHnVqIkqlL3TWpS4GBHed6gLM_VmoLd8dEOktce_O6xfo7yWdOeP2eNZtzJihpx493LX4PQe8wxuXMeCbwMImkmO9L2YrQ8kp-zp1c0gqTYr4Pmvi3fdRHq4-N-qGFDx2icZo9kscsnFWwcXiDQVKbgJNNtnCQICJ6iK9viHluuK0QZcQCJM-5WVU-SdczF4f07DSKDkzP3vFNd-CqFe3Y0paFKcu7ePm80owR5IlE7SX7w0xCaENpT_y5y9LR603cgwClutHZva3E3bFXnmLVVXeVswVAWX_Z5FYCJKOwHmJ84n1WYVSFHh9moCVVkERaXaCsvLMQ96tVyjHqfOr2Yjvbg9KJQiRGVV7nF-ymu3HXCRnMEyV8McADFKOv5l3sE0gpnufs_h4rEwvCT_87cgaunqq8KHlv-fBAUEfTWlEYb0hhBzL_EP1TCH1G9hUiMogSL0ASj6Y0zb9S5Z9RDeoggAFyfaXzjTVoGkPxIsygUNWh_UaI5f_sqF3BxPb8rSxyq5DOlaMaaf3mo9248r2S-QnPsDlcLMvs_AMXoWHE4UiphnDqqXqDXXV2Dl5vYwr7qaSGTkC2sNFekg4STh5gf70z1YDKpeKutECcQVAw66pswcbH-U4Ce6TBEto3aeALX5oaYbK57ZA3MAimxfdWWAGOrN0HoqkrfKI6VJNw1m7K_QoTOKViM15U86oGKIBUUVTM-oCeRXrBglUhZH-SJDh1Sus4Gnz9NQF6WLAytXu5xi4fhaUzSUNRtdaYoqHmhpvEOo02bMeOPpoL9CN4Zd_8-jSSRAcNr_FDLRSks01zrknzxtGMFoQ1m5yYtGp-gBs-Kzfvb6OH_VvQccO68mpOByHefhVosp7m-ycWKbZpDfYEVSNlppZlCHDwfw3DEuD79sAvvhFRbZ7c41qDdHkNIMXa61jqAbKhFcwSzhkVDl-srip6Fc-mYsn2l8RBSKMDayxO81c5lbfdj22PQ2RKWGlRFF1nGbxV7BRaa5wsndJE6GPju575n0DOlKsPJrllI-e_Ql2FC8Htnl2RsJRRD7C1RHUw6AbT24ODxg2skrza64XPFzLWLaJMo9_Lsh2PKbPDh9nXWaWuccdPQY0P_UV04VdcbNdmFcDihwF_DCWTT1ib3TcYEVbUtCuaqYvMkbejXEGgUcIkAYy3RtdcuhcAc3kiEUsy7r8WH3QiJITDE3BB3HdMREFZNcAs8g1UKAV_ryB-Gw2EVcOPxKHrayDYVFOQSgRAWhduaUo87m7vB9CqY1LwbClwspaWTDMfvftoVfDjSWFScF3EbmhcRAs3j8ytPIe44RAzTeoWCYK4f9BklRBp_DXJR3IS0piGF3IAc-Ivg99LYkh48UXfuBfulpjefYgiWUyem1v5GIJFAIXxHFkoI8ITcR9jpbDdoxhfAdtyYQWa_6Mxqoi_rDFJjATqFqEXXOQTcJnxFGVNLyvtvEt9LEli-7O88CDn8CmRERtFryoJcFivpOtC2n8vyWQR5rKMVX1Pcmps_-sO990HB2nazJJUUXsjEI4oPDysDXsO01Rv2W_X8stOSwiyBy0-72RKI6aE2VAup4v8c9B3ccRQ59vzkuEqrVn8uk8UHZf9MNuo2KI9j-d2ngfYAesTR62LKn5j_zGkltBTvu0Vj5sp_mpSYcKgO2gKGpnq1iYjFUK0k1k3HXYaSPIupBKLQJiTENvm8QT7iM5Nkl1c3qvfLUBs2F7Zi_c7aNNws_TmXHymEmd460tZXTQzJkH3IaBIxuNGk6-bMXUdn2315ZdhaAittASsSppSSRT6uBwLWNXvUmVR7Gy7rsZrz1ArubAEKSiOEekMoAo-hx8A5TjVp6VnTLOnL6IyoErZ97YeZtU633Y-7N2MXeGnkAXKWDX7QoegU7FCwtC2Gi9ob_tlYKeuiSgdsnb5Mz7vNiSExsRSKd-UPnpFscVB5FxDof_LjLJ47ZtHLLfIzsLYtwn4shJnFUO-GtSt310SwRqqK52U8pjnur7gyMRy5teGVIRGrYp-gmU4SYW0YvvTCpAgBFqxRuhQGuGUJy1kwbLQx-miVh2c4jpCpWVvnbTHcLYMA6P3Lt67dRlNTJIxVmkYJvxD5ZElC83F3TjlupIQ142zHqoZTukDwN99cS3dQheAGsIIkqgMruRMVLhTIVQxuFAwuX_6OmQ2u6u-qRQBMPHNxWe1Atxj_2EusHOdPlKeLRvQSMS6xEHjo28GzfDiMUOxA5RsJD6v-ncNiMGlH-4BWrRgItRyPYSUIoL9marDi7l_CZHJRT3z-QkIECUpuQ-17NOHo0XTJUEjgVQ5B2Y3OIwWEA1TJktdHlGf9Yq7Ua-YSh5C4T3A00fqPQ43u0pPM5EedOaOC_B4xW8uTtajH8MSQ5SFSxs--9VUr_OEtWU3Zxkf7ttzcdaTtDHo-IB-eTfGKx0_0fLEocGpqXOdqXnX8yTrwihK_IcEIz2xlgToaiYPVTpN8cP1d_TZDNbrbf9HMEzaoH0AujKwLtcSXrBRHkom4RhdC7tUBc4wP9eaGU8Z_9kUUoUzYGUQ5SxYUafr2Rv5B-w5giNaf8I8psrpxaUIcSyxXeUYkEZiGxY2Blwzv6t5j9-PhlsLxyQhzdPjqbBCuiFMVWZ7mY5id-ek4OdNOY-56HUcLGMIZDs-Nju-urBgjUxSoZm8JuHELCTb9YFq1ABhqoRtbaBlUVGkd_folygK3XWPY0Ncy4vwuz_-JSUfIo22KMNGlE0wVtCStijBGc4WNYqt6HwMwo2c4p_n2pr8dfkNGtEjUFE98avpL-Px0iQxMcLkWbZ4O5yNwyXxePgxTMaS9Trp5AD-Phgd0ZVZ0nNBrZWHmGqS4_kIi4oQ1o9AOk2NhZjlgpgWuHjMdp_yguVdOOsgNgr8Zk-qxyBCbdiSrXYqg6e73rQFgfQbGcGhnpSjF7ZocBtyprfHQp4xtj3uNbceu41MspeuMyAlkxPoSOGL1LU96rx-vGgVsSGUjb9ieb7bBQHqYhis360jftZlDY27kLI4-QM-f37zPxTBFXvQMMiLwIeoifHMYlNZry-MJ2udIldETa5cHAspG3Gyh0wk8C4SCg6O0nqrVnje0SvSqw3mqcQAbuG5wiCkMyhjFpr_c1EfNSu7MoJCYi9kNSMRnffTuJI5NAlm28Lw1EeTQaix2y8kFwDDzSXyOQrALufU6mUAKhV4F0yKmEpNcA47ZGyZUlpSBpa7JqctjoA5LnCPeX4JZJlLE9Y8WtlCdlEdfbyC9mxbO8ookHz2eSHTk24Ev--hvRHWhPxzfqWkO-a2mp8i-2ewu06WH7nrKy1krhsftQayvpqey3YJquvDbeSpqUwo1R3xXO7TEuGk1DL1PP7Aa83Ybz-5CiYEKRsdxwOA-lKR9e1Ox-WxzmTV25zflTR_OaWzQPdwj80azAg4iXNjNHnNdAAjs-4MJHMAs73na367JTt-EcICDhIyDl2LTVImGT7lHh1u9TUyH1Ku5-5_EJp2op_9GuSntvWcBIoVQDFOw2SnLOwkK8OznEMm_P4mdXWo0oj0EBOaeHMUeCkx_zF5s0xsAEWui0pY7821-S8p_is5aEDOgLVQQbkuB8b8HdvGqKmvoWX9xgqtUDJPlCcd3skYic3_4H9zIE2voCns9h66GFFGV5C8nk-R-uXulKI6Zg3EmlMUPhFtLj6fu-up_Nw2tm6MqwTtq7WPJCOGuoJEG6_X6lSnz95FgRSZY5_s6Pg_EEcO_StCgT8DJQu8hB2uJQmymMDarcxLwen-BqH2vckoQoAtGaW733SP9xYppSPiwMk8mA5zR7c-Mg65Zum2aZUYBe76G_3gs4PL2-pj4Frt7-N89vKm8l6erqLnRUVa3ZLBdplc7Ba9KbsUjclUBp4ZsVGdmqc6aA74VAIobS_r3UlBKPgnlVcnvkFbvrQeqR0olSCgRdFzMWVNE5-zWgmIl6WoR9KKOqyDZupUCv6h_seRxoXlctZtt2dBRFuFoDhIUiYEp3oKan38huhB1piwYg7Y0j08sQPxure-hrh3Ip3CnTWi-adN_ACysXHOK35n0swkyKWoGiFg93zkQqWB_D_9KN43dKiIEwRS96selMDHHGbGB2WLGmD0hKWm6LrdnCptCpVMoc6Jc_Q94Dp9_r-Q2AhbExeo7yb2BVWQGUrG7t29_UJEIeEsBeUMkIlGWiwaTIJrdnWFQU05kjNJiLYHBApTH0Db1Bxp0e-rlZuo7ArT3mj-YegdT9kxBGGGVI5Z5GRJaMF0nYzjZa7U1Med4C06BhMUgubmSOAgB9hsspGtPpNEzRJYigBW3K4NCrLd2RgdJ1KpLIU19nl7FJgoz6lYiBFKZxWmJSs0llnZnce4_8iypflQzs1urawTW8qmlVH379BAA72Ij_UQm2zXFxe3X5YoXD3WNFYZvnk-yxKHMV_HRcvw05d2dt1nvKP5HQXAxiVSQxxWe0L1su5Rv3HiDAaMCbU0wTj_pKsS31JTZAgzFsKkERYyLhRwqfSvC-1YhqR4qntzszX88alHzX4s9bzMgEVQteIgPYwHa_g-EpG6qQdnQmy5ds7A3U1T-GNk0tI9zwztJr4LkagUEKV5kXzsprJz_DAbNCr3_SeEZYb65kQRkl3OLrgav13OkHAHz7ECFr_t5nJDBkAuIn65rRp78dsc_CY_hOQ_VYIlKYSjIB6V8EdV3cYvbfd0Xj7QX-qJF91UTH3yB3saufykkqXYd5YcY7tLsdEOHC5y5LaSV1PihAu32ybfFqku0KhnwqdnDE9SB8cEgZd2GAOYFdSEnsokoQSqN0t6o7CDw0pHHzViajJjoBXP_3bi8UWdsstzPKkZFibNHyjKNS3Ie867MQsEq9VLZWnurd1uRngl8vQJtotYF85gKp8N3hIO9_Ihgkpgvy3Qxb3jOlbk7DuvtX_y41DU-L4hhizf6n7L5lry_Y4GT0yRbxHJN4K9UE-o7eBXfHG0p5_FBvmtfl_yPp1WgHzjZvlmuKR4J5u8N1fzb4KTYLwCwThMiIGYpirhxwil1_9bwAK4_AuHBIkSKARXwfeTE2wF3QtRCqpGRRA9hsbleexQBvFHBdUEvBEiW-NR1qnv27UmY6Pr3RE9Pp1LS4IMV6KiHlLlfKcpt7SWS6L5hO8zpMCIOkvQtssk1hCkDhuLbTr9k_NxMrhgXHpFmi8Kx-TE_zmI-EWRElVUIyw-F4y5O9pQNhejHmOtjPsK-zs9Y-IdOdZVfp43ok7QoSzIj1OatKFVlayHvGvKjrnj5qnbW9lSTMZQiari3mq6Cj9ecqdcYYFJCVLn1Wd94Pu2B_2sioeHnIs6yxBuaFvJA-KjiA7li0d47fZoxgS5YXt5D4y5NMi-lfLz8tcHVg1YLVxAWsKTzNZ34BiWb_6kAPdlQlInp4k2XsiaqpIEj96amvajuf7OVXWWIz3GJRnDxJnGulqlUg_TTCVM74arB04YvBTBtZWsQDibbZleKX0UvP_656M55v5qnmso8kYAEhw1hrg7p7FvAUT4va72W_e96Ax7ZGA1_9C2jazeC7C1alHICZNEJl9AJSMmMxOkQf1-XbjXQRS7_D4cTr48a8vU8GJnGpTXdaXpsllvL4LDoh6VCzWsRxpRmhU01zVAdBLIQbRBLG91d4zll-kC4kkBOsJfs9uFvRLEqXac3Krjqj68X0oZLcL1Fj5eGQBDL3EbAcyoor6OZU_xwWrJyuPS_y4PZca_MD1aDhEYI7qCB4nTm0DTK0kcH5hE6NKv0t9GPYpCf0-PutdfKXIOWQBYZi1glchsZGXQXW3nkGlC6_QxdX7H9zkx1ktn2OBb6-5l_6Sl9IoYCxx6ibNtbZ40T270WftBYu0E34yqUrQC0-hZ_oAKoWkoyhCTTvL1a9lwnW2h7vD690CS7vzaDXXD0XfE-oVCnvm_v2PJatZJp1jvVF-Ys3Ri4InYdzH95VY10icKFClQ0ImD_70v-vuQ6iXGp7Gbe2ccPEgZ8s0XDp0-l3fQKJ5Mk8scQhgrIwIWtz0VJ-qGI6WR7pp7-VgIGaf-g4gWu2tYWCpnPk1AWarp0BlY_OFwuUugPbFvyAXBUeJ7XzT191j0IwbI32pJ5D5XiXNCCFG7cn8jmAKwb_xheQuO_Dsygg2A9RmzJ3DwkqBqFg1AaGByiHRf1-Q4Mvy7akmDXgcsLYLywSDPfvk2OzZ2qIozzyfmgGosWrVrKKFUeZifR-oziN2dcDcIdDnXLKgEu301bFbIO1KZ0O6HZrriAuvs7BmGNow1-H7QPvYw4AgrlFmbiPhZYXzeXE1BjWRj1eBsCbCSknGwnMZVr7090sdWeYrn77BrelSW42HKFbD7gkFASjKPgKARpta7Z4SuKw9juh2RqGqh_BRLxKuMHwZduPlocpRZjESog49f3MQXK9N2ysGmNhxK4t8kANUU0869EaB-jBcDgTwzk6t9JPbsgvTY2eTSfEguFbyVycDcaqbLkY35Enlny8gp1-kCmkHFkXF-tX5myQdtg5B_FzwEFcwPt-1zyH9kYEB9C8PB99n8X5YW6cezadq5AJEBwoS6t4MN6X-Q7mgerpycGl6KGVwecA9w38CdhjakuDBWqEoe947rGdJmzv_bgjH4sDuh7-EYAbi-FhIBcluTjp1nHOvRB4rjp14ET6LBA7B8o-gylDm-MUvaDlTP3qF0ZtCwgPWlAQXDhzeGacjeH7JApP1_8HSw1gUPnypipNwRVeWm6UlwiS2V8oqY-eR6hQrQnhJ32_rg3sWp-lI6IwCmIxzProVWQTR89xwOvEbxGJ1gRYCBwryXAiQsqnBq-JhICHfZM-Zex3Lt_-MhX5dUiQLNbp0PScVk1YQsC_AVYEK9WzCDxm6OlQadFefiW1Y0ZDmD2aDk-LhQph01U4qlJ3Y0XEOAi9QSigw7QJahmlig6vHUlbBNBuzm91VaT0G4b69TxB13K88xttV4vp6PeCcVl7W0pSLIgWHuZRhlFDVPl7LKbr11kJchtKozT3BvxHer2xjku_ZhWMSQiHas7uKOIJnTv2C74_z3ceROzWmdBOLdom3FJO06LZHQ1_8rDc97GSpf1IJZ3IS1mF13-vzTVFGggsXUmSaoONzpj6VHqF_wbJSiHiRx0Z34I89MdaxzsVJ7YJwpcHDv3i8RE1ewRAr7at7JCfMyqB8ud0UBygSGo3jrWzntgVHg360A6HK-4MVcQMCOqmccCYbKMwTZv0tQniXjfu1AppWBG08XCJFF2yU8fXulZYT36DINSJhOR619WfDk-TDnHDz2RLKuwYjjPegNa0HT5xH2k2I31PNB8EOfC9oKjSa8LgGltAyP8c5G3TPGI7_beGaXcn4iFm2IIuxuRYpQaOUlMCWrnDx2Drc8gqx1NsWIg5qpEBfAlk54pPx7S2HJRXvLuNiGMtnCQtfWHAR2DfipoNRQWVYG6AJYMoq_IrsUgE0wG6EZk04T0yDwAEHAxhnVEGzrCG_Xp-CmNMGIWC-SeuAiNWWpPZCB_Fj4ybZ142UgpqTO6kvUyi7ufFbbVGlgWW_Tg62LZQgKGwE7X_hFUz1eFQxhKhneC8WIkGdfXJknWul9a3xtEctTvp_zDDxXV5FX42MYY-phuSBx8kigkJAEJmixEm9Lm_O3amkZSFWLlfuQY67ufzD5cYt-QPMaQDWuoEUvCzcH1pufMWrdx7Wo_utUO4Ji-N0-trH1OUm-wrNoEDMMNMbooHwyeFSFQhNxNsQ53QefvU9gVnQ198B7ZilSx-GxpNq6HzwkEnTKjMRVHakZcdQyk15p-v25PC1ub2f73F-2q5BxUP2GD7nD13k7j9nrdTXVv42mB8neiWlbQuemBH61oWs3ZqkIzaIpgcpKhcxwETnDKeFNAASlRKaIcWbEM9WN1jlAyU-p5M91pwYXeMTSWk707mrmG_oD6M2RhTyd0sH3ZAtRzPPJg2V5324pHICQwbTj3uoYXAwYfS-tAsizSkau8mluopJ9-vxxN4YDZ6f1yh_f7hCUb4JobThTvC7WsXfbxfGpCfNlVLESuVNy0yvK1joQ_u1RdElach6j5sCnYguRg6T1ewjPKJ-Dx6GUJt7oHDZt5Nu8gP4cKWEf5DTS5bZ0A5_TKmMAGj6mjtFE0p2RftrWFxMkeeNSY3EbvFKQRHa5412-ceK_FTs1Vzxf_UMBPmSMFHKoFSgLJVPZQDvEnzqIUr3ngV0unDZjzemsZflCEG3fg-XVmnpUC-ISSvJFR7GZe7_89-ZGHOEvYBPyHaqSbFtC9fPSfyxU5f5yPX3PTd0pHad0HGtK_qpalnaDVBPR3PsFXucb7KCZEFdG0gzoZIjVVSeXhfpNVSqMNKmiWfiFFmfrY2qqglXLRlxLt6ZNR3hdMGJOKMIF3HqraP-bT3SGh7h-nUHvaKcrSYDPTj1TUhzS9aUfqyfhoPuA-FK7HdhJdKX9OdK2ExZFWSqVst63h13XbAr1xTgDIgQFXgJzdcw_nZ337G2yqsDbFVd-v-vfQoNyWBWap9-fecUF2PF5utD1VXSjtDkURElXZRWghK4XTtIMovS1UrYyq0ky7ZcsT3ko7K6RuJrOnb3XmpuY5IucEVo_ep0G4xZTqmyxMA2DwSQiuFlvflUsISA32Wvyi0tmOMK-FVaxm_8SB5bQXzrZqjVXpcRmmACywyEgmBSLtTIKXoBAyTNUr4T0SCqL68JBewt4r5zxOIPSXb6G7x7Mde_Fj91icy33PBcCgiUC1WpH4G-16XpW9XwTZvKXUS4W2UswqXjurvCdpWG8Qo4RTxsGx2QxST8_3903FsCApgHpN-85iYN_RPQPBqlkoOg0lVwBOzuFNJbO-AW2AgTQG-QIaE2aKWLL_OKksawpuAb-TLM7eOU0-IAeZN1WxDGaTcFTOugfuxLT0C_HOij76iUXnfMyUdR268hgx4VxBau9uRBNKF5aXLOz-rJfKHbBmw16T1WQd8E5sf2aiSK61L1hjLrFYTpoD2WSJosxraRenTa_GrWoli3L-1mpPnUc8BJvHsdS8cO8AZ1eXuOSH8IT6nVaipugJCTrQVZmeLNSUn7snRcAn-sieTMkptg-I7PNnwYziA16MC0SM5Y7sBRys-HJ6lr3jPzuLdBHGfna1Uxz293VnCJKn8VgoJggCuChjWKUP4idF-YwdccxqhAaB-toy9jfVaQG4odXu2mEFomubPPKzOXI-aDjfPWQZ3DwBA336xB0764u2ADXe4FFQ0p8RzkY4xJPJJhRbaHeh4m4NwCwClY-DmdQnS5JJi67zPDdVaAXAEE-H5jJIudmrdzXQSMHiFtwE-eyUOGeKjTqA8jqxb53xQ5txarj7SlJ-4oJh6PrGVKf_qoP8MO5OdyqPaiWA1T7H2-AEQWv7McTDLGolLvQ_GQSkv6kpwz-pmArvcXXgYfNXAll6HL-BKfSr0cwqa76VkozEUmuXVjNzS6OY4zdneihZvAHrzJtfpLGG6Tlv07wWjZ8Qx-kMEDKd9bbim7f92dSg9jmikHQHvOwDLobIXCp2nBg4HV8nBRvvkAZs0nxm7UKo3jyzt7YrK3hBogaJdPn7RC3nd2I9zI5WuGkx0UcQrHB-uAnSCOEsxJW3JIromXJxJ_izzSXxlrlSgtBN6ixupPBNUoY46HIH-QYUAFSP1C2OXhYCES6Ke_y9aNj5P4xepDpRHXzbXefpkJ0urQps25pnAcPxmqd3dw3gpQFVQUYk1DE3z8X69pDnpnfG7ratEWOqSSuET9QMC1SQxT_qPMzJg3S26W7xaDHZ0uI0dSvOGwZCbKUH-nugKqraJffka9OIi3LGT7bq_E6EaEk_jS8_vZXWbQDw41oe2-DnbF-HyAyJpJ_PsBMUGd3k5jZEJvwlsoqmtlU0YnwIU72X4GwYTBfFtG1jt_b_PsxWQS-XRgUsDslHwIw3BBhj57nykIgMCOHCpWLdsI1reE-CM2sL2bdcqp5BiYbD7HBPR45QI6tPLcP13zKl4cLXn9hYkY8ZpHJS-ThbnwcZ0nvO5RTVoHyvQFJtMubBLzITfBnwjL9zZWDZ-hgXF7uHPnikwlt7QBah0S--k2OP1Qygs9499XYYUHkWZDLICAkxNZPX3Q1-C8eLF5qRjQLDaZaBnOajN2g7hjv_dGjK84qhd-pkIno7wrfFr34ScaNxTHysppk2HDw0D3qAnCAeUGwldIt5IlvugupNIYy6ON2pgJpjz-3LF8Zx0aiL-1tQQ1IIQlrIRTyMA81ztWzR3dUhPryubOiibfTMm_rSYAjmEE86x9aZyO68VvgOx3TtykazeMO9MG7QHcUgLkxYg8GB5Ov09r4B-pPZ0acrNhizSCWu-wEItMzrC77EbYCPBVERRHvSCdZq74hcOZ_ZqkgEniVUIo3vikJex4YzE8r3Y33tatv0Xdjpn6-wUvTc5ghnA636X77Qwo-5K1rLdrP9yTMsfhWt0fbFS2m59P8hyH32fXtXeh6Uh_OsEdbTYXLngROEbw1tReuo-GfN1hSyx-PtpqIZuFujKKhOwjveejhU64I1vyyRkZIHq9eT6LIu7AmQlBJWHrC8W00iNbtUghtz9_w5d0UBBTVCPq6c6k1rJ1MRp7xfM3PjiJ8eMHjNTfMDDWh3VdN9VgAudJTVNlzN0Btc2EidK9encHB570uEPe8vUEGd1jWn1vbj-QLeLX9Ra4LAmsQZ316hO-582CxIFU9X8frEa9L5KHFwXs-GWtXwyiWCshhPoZoPjfbub3-cPiLwM1RenlyuMowKongcjR86ibe_C0ngvL2o6minflbWSPTXnkRPWKRpwH8cp8oNOH2oZelhMfJ3Hvteb7ZrkJc1tNjitpaN7I2oX3pd14ZDIm0KaonN4Kj-lpibZ4avcC3VD6efn4Zs4CpdVcfBKXTFJFVL6ljpXJ6BtsxL2hnYj4-_YMJPyG577ErunAMlWVuCp1QVjC1-PKpJuu2CkxN3vSE3jq8jZvL0hG781d7i3__OB5Tz48tx-MO-4nybG3Fd08ojIpPmImWDbNu3vmI9TYhQ5BF0-fj4gEcn3Hzw4v9TQGQbK2MaGyr0RfFjB7Goyg5gPZNiSynasfx-TNKFbwJZsOkL4GJTzmZNoqg4CpQI_b-o57f25OLvRCB5YUTYI3e6bLpUwcalx_Sko1f05oOrrnllh9_9Yu1K01K2tQjDypMOWoLGpPnP3l_FCx0BgIdsUlrZhU1WFgvq_bp62Tw6Ki8YFO5ZCZPiaziS0MfW6HecXVLJed51siB_eG6IF0y9Sos_CBpfysaoRHaft-LzHAh4xkPQJ-3SauKjILdTtf6XWCa8lkQUkMVO3i5W6zkqrX2w7UdAD5f8hCc11BpZoPaThdI9ADbHAm8uRHZ0Go4pQJE7L7Au5q0MlygGO_eZSg3TPFsz9dTuha6W1rMHhG8KHqqCNzZvd3dwsQmzkKPeHJ093oDGTWay7ZvIpb6dHZgLmjybRMCwMKP7YwB0_KRBoGSqWd1PPBJTHbgJ01L6PUF3fz6ebqV_iVoWSqhjRdp10NS8NGSKbpbOrWZoIftPyCjvtOcRaAPchnYA2aDuROGM2qKMvZ0-dZawsb6Xg5n9Kq1IDjkKvgnpnoBz9FC1UMUVqKZywSULj-RjKiQul5FGZgBx_2zbE7VfzE5XvwuyZrt2yJU0b_agLhNxloiXJgelQ8CUaJYBjy-CoOi0jdd1aeGG2oawxKHluFaker_7rzjhVVF_qeJfo0zp5MvfCgyBt1V0mKclrBKjYoHfLO_dAazPl1m5jAYEhjQBHe43o-jzdIoVajCfjYlUUrn0noSROkrJwFHZE2VcG0vM7OdQcQVgPpMAjtWIQHLuXu3akRoRV_cO4-T6Rpo5jQE6-N0CqH_SNfGvt7k30JeQ6DwONxW3csaRwn8Fh6nsvLi-pgiziw6MWZJs6Svv9vVOAGZFgzjJ7GQok6t2Kv4lDImY5JvJZhjD39ML7Ac57pNVcV2jmzRDfAurAezeSgeD5xlg2MdVlCMefXSwybj-DV1BOTDJ-JctvxLbED4V5vFtJdgK5Jy2RCNHkyPfFPxPTPzz4Dh0VRk7E-T1qp4sm6Xn9cTXvku4PI83XZbSTuPaYDYVMjh5925B7i-JDzqmiY1NTwLDXGwitHNBXwK0ZDXO-XIVVoFHNOGIxli7-bmplsp7S3-YzQsH1UGxxPLU7d8xonvFuORVff-0j0hrv3rHlyZgboDTa2JW_o1n7eld_Doxp7KF0TUC1oyrEbdpXwg-vE7JSkm5BxlSBF9iWeBriH1siXCMewJFgTm0z3j7t57rR9th16o4oS-KxLPPWuVCQuoU5idrSTWDlvG1-YcxPkvhxoXxOx8fh50z8OiYA8Ks00Z99t1xKL0Hz6aYN5wwwwjhgD-xTFX51EeZPeClJFUerkVk6I9oD6S5K9O_5tTmDkKnFXze3T89CeSs1ZTj6mVV7HkCekdP3FeyX0wyr5gsNfUwH66w5eBWo0cxmi2VWsuNJhZB-15hynSUyLOKTQ2hwLxYzqAzrhGlB7j5Yedgw8_WXeN9oqfNw_W-TMDOusQF5QVqnJgI6S3fzjw7Z_a3-ISsqxs3Bblg2I9OIktGsAIEKwHJ3vx23XgwNr5ySKpQWYAuIdTElyBYgoKpvaCcJk46djNnaK0oDaRRPmeE8PBRSuECg0GYydWVTCbH3ZE9xXeto1yqXGiKVcFqzwF6HGi5mJo6nkA9mtGaNqxDm3DOihCUZLheRsz5kn-P6IDwyuzInqk1mR2kfg1InapZjp5eRJaQ4pGb4Xv5jjtomWpHgRulrMiBxtu_yyWajr6BqV80llVyHwGQ1Z7XQvmMmp0YKFXtI4CNjbMRCt5CiZXMr-sql0eyKYDsOQfck9faz7MFRztDmGwfVmzDlv4A2JNa-0904zqpgDwPBX1N20I-e92_omJUMiZ_xVooDQQbm0aZt70kUhS3gmzYUyIVn7_UablMxm-LNsx1vkr_X6DcjkqihuIbbaUW0q86fPXoFW2i7ss2V1x0vuP9LLF7c-m7NiNRYndcfBYUamoWjsNq487g7VeScOXYxvCT4JmNeqiOiRbfw0vL3qPsTxsVnwm0H1JCEr_9sgE8E177jq5-lEgRHMX6WOGrgz43hsCi31StV8ZYdNd-3CVwuW9q8pvCDt4olybi8FHNx6oPiSq8Uu-Rib1Mn-h6OmrXOuZgRn2zqaaKi3wXelQbGPj7NHoEABDy1-wLWC3NaK9FUV44BJxe6OS7QK9QsdH2kCCZBZTw2m48MkLz5pUmRk_tyHNIHdBYAnxtH0lzEu4PHx_jo-nu2yDoppFiXxHU-qRFRX2htibWMUGJ6KwO-7fC94EgErtyi0GFC7amYUr2gWa3BEgu2oDv7LrBHz0Pl91-CFTXczhYG5KFfrJIE9GL-MpvNNtNDeQiThd7KPP5UKt-jcIF-M7W43rnkMudB_zGzOrYBE1_5d0To2yU3kzxACcZv6lKQCLcajEVMdJHov4ep4CM7eBywLoUTbthR2SkhQUF9pug7ppOlUc_eEKkJQpL8j2hHu5uYf70R-OpjaNS4k1Rc7TPlVjzgj_dA4PZ6roGtrEzJkdlDXZlX325to3WLxj_AQnz-ID8cYzeRI8UgL5i3sQUldsPn03YSWQW1iXcEpkcFZMM4_yz8i9rY4PWnWE7iRJXwIkbXcWr3o0T9XgE0SpqPP1kSfAn73t9Wz6loOd3Hx_2B7pUctp92EKZ5554luEC9FW97QfdVdlhIpyEC3-11b_9nF-6pen92XxYjtG68awSkK0X8mzkErKBu7m6rgYg_J9-flBB5V4kcN2XpGjCVIgjx_rMO3uHYvaExpziWpOdSv8w-2jhmhdMt35N5MnKc1a_riawB363XcP5wvNT_H33Y_6itc3KrwQVYzNzTO400ASg8UxpiL6ALydiggtlhnHlfnvy12pN3OYlb75dk3td86zMA2KrMlD6pKmJlJaDR8eRZBJv9QunTGZMpa67w2Qw5XLa8v88T4oFeIMclhV_d3NUsLjchhoF4bWPvQapz0xEIxmD08si3-HBjCKGTFwOx3IpGYA6cFSvbAL-8gPELsEpeW2DVdYi5GMjUXLjwej15QTkN9lAvnuqgdOSEkF_RkxnquE5WEx_aIIunyy_1ZUaCD3lox-C3xpyRNl8VOb3T2GXRm-6HNhhvfh-S6BYe98KZiIyyDRpL-Q3MKk4eSmKGx-nU7Uw_mZydeWKgkgXYWk_WriEv7a1PlViwkbIW1HNqAd06N-GOADSplh24Lh3qZwybOdfpMQDT4XyFmyf_tg_nmTQknniyf5okWsSA0PFmVQTrou2NYGUYgMOq1K1Uc4FvSC6A6ye3RFRrjKsAOUSb-H7M4gzSzbjasOIGX9ZSsnSrSNCfPxGHg2CFRjXUN3JE7djyuVNHVxJ7EDIJXqcb6V4hIM7BMYuSZagHkwNE4Z41CiASWZqGx-QxswmL4F_3blcpW8mR8es1VZXbqIwBWFY9CgU8Aj8SEbY0plIfMPLf2TAajK-3g-nHKWMXCurr6BhI8ONGg8rsaxagE8mvlx_1S0kN-FnGQHA8qVrQoDWpizgKK8UFzH3mXaSt3LidJwgpMDMnl-HUry9qKpMtzG13PM20Y9XJ537oXTQK8g8KWn3QZzHNvRVvvbyzU9R8zZ0awx3Z-6nhj4wJ3hVmuRPeriIL22JHpPkRWA5dOlb6RF4u28ug8-kPupLDsXIUgvCHTCGTnJD3y5ql7uSFKn8-nrICpeD7umCoiz3_sc7ren0LT6almaYQXiS07P4yYWYCcDavoMTgQNssRr2A1Z7Q4zHpZcq7C-f6CQ1P4WCKQHWuA9oblzTs1UJULZOQfPs13oA364UbQA-_nPWhQwNX-yH4GUxnphhWeQthtmxRrTVMn6EERNun2sEul7GXm2M87LwTTKK7Zebt1vMmzo1IPlP7tIaCevbEeWOPmzrytw7kdxbKBNi4d2myDUwpVtEhCm3auQgNP7FkIXuIvPHn7kQ36zOM64pDZ_MB7fJVe6zd-q6fEFkO1XFQEjYom0F5j0FyRJQ0wuqAdtsYsfjb1rd7j1W8u8O9PBwFUcFGf1OO7_hD_Wk5Qkr9DIGDSd5vScf0_wRESpletURB8MvokYjwLuu0a5BmjodHDbARJhLJTSdPjvIgN7_AKsAfyAdx2w0z1nVHzBwu7zVGFGCk3FSfM9beMN0SFhUHNtColfzHVPQKpYuLzIZMqrFtvVRCxrJ4zVy8NfA_StfRpmQlz-68-JM3PPSuluJmOAEpm1ZML65mY2kz0goAqBFDn_Wu2-DeXohDTQJnejfNhWl1OZr_mvuKHWM1ICJihrrOhox_Zp2iILlXKUlpU2kKojCuzqv3HzM0k2s7dFa2YStFwzP7hI4NHgt_cKGJFI_ZT9LkC4A7NQEFxztzlOV2Q7ZTBg2HB6DFDD-KUhMt5nGa6PGFZ-acFP1vVXl__cy0eXV2uepTEfLYaHVRErzcvtxDWrCODVAziFhMAdlzfGTAC2pHeyiVNG6TSmfxP_6-PCmf_xwIn0auhw5uY_lA48EsxSMdHmB140Q0peBYCW2xf6FZ9K_G0cBxO4_Kq3ea91x-dxayOW0sRhqZck60Gkn80go9enUxhPuBpPrR6oFowF2EjQl85RZmlocKxVKZrdavkI58b2jc5lgCrJUHEFOaoZEEMxl_7mEo4HuVf66FrM2DhWJE5ubcDASLPZXQMd3hbiHv53kI0dt0ZCs6FdYsE9tTdMMCtO1EF-Nd5YRYsvoB74hnBxp2pBxm9yg7l0xF5MKcKizKI_4seKymX25jZ-cpvd1ZkUtb9MT49xvmzNafEiewfAQh8PnGBW23d3taC7kFPH8R2EB6EgDP93H-Sq-akorCnHS7Mz3h64muu6FzrPJuDakZ8r2fiGQp3qHPAa7cknQxZ_CcgNsEShcqmk34ELr9f-wOm9SNG0K7AEqT8JYG1HIp64fg3vveYuvww-7LuJl5V6wYSDZ4efd8OaXh1bAAHv5hJcNst9vHuOCYon4TZlKvjHY7-lUAf5t3-rZ2Ihji7fBwh1E7vmjbB-TQlcCm5f9ZsOZuA2vIW5VNLdkJ2Qsv5bEeJKmHWqCa8g2wNyiQgc-GSO6B4Gkq2hcL7dK6a_PCngueivbgdeCNIr2Vl7-6CrJE2O2A-LBzBge_Q6TNAugzrlZvvfDEVyuC-Of8L2bv15_xtchc6AmVs3mgIXRwpFYAx34DG8n_udyC3zqY6JIUgenjvhNeqqk4yWBujJyHp7phz0VztxDsct4hz9UO0Qak_8LqLIoR2N0ZBm4GKmkXq3Jx3_lUZ2bMM-3NON_8QhspBBTbNbxHU2IsES-l37IkPziHcvJE782izMo8_beZj9JXhKB5nz5SmHIyK3TBP3EPULF5X01GE_hcX1chLdYMYfgnTMGk3Rc8apaqkwKQgbvZ-gvCc31jhzeJuUh-19fp81lSzUnYkWfhMzcgVNUyAzTxH98ZuUZTOeOd62aqBSRCF4WFAXy4THdRTyvcTO-kEupdsibfBkxG74YBdeZuhSP2QHARvhA0Jxd2PfGcgktwLGxN_Jdb1XkDyLimijRO5O5vpgEI9Cep542xGZpcFY6GRWJl_x0prdD09514_PtFi5KIqrV752owEJ3gURkqp4a7pE4ofmXDVbv_rjp-FVvHyeUcoHvsOA0yKARhbxKH3J8Ic1OhonKb34j32PWZdrtBQVOn2j7jXaBUqVfTmiPeAoC56zDl8XUVdBr1scR_LfloHzFEUFHIynR4YrEwtekW39Gn4ubZfL_cHAM6ZqRWSxk_NiBgxRyUj-kccFJtepWJIIK-JBxyJTtqApB-Hu23wFsu6IZBHVA74wqNTKx4p5LwAdFXyahrYJcURrgMV_ChZ5jeB07aN8i_zVRm7gu1YXGYUu0O1cS-wbMGh09aifXg-AfZNX-op7EbEetLe-f83Azcfqq2iHTw02xpJhYz24aAfOM0yOBONYt4D-FB8zJtK-GH0EiEhT3KE1_350-A46T83w2i9jvgf8uYnTYWrdS8i1u4NMvkX6S5He4OXuBOodaO_DVIXTvPuFJjgtjncXQxNiPkMdWKeKgL-44F6lOhvJOYRMeqFepMtr6kA5cLeQndZTOc5FrT34HVlScxxOg75sR91I-zMWBViPny74HqyNe1iIfoWkn4yBLeKui_tY9IghZChaXS6CIxaRXBy3hPg-eeynpQOwFQws4nWH8RuK7lMCyqteXiQ2venoRlNmYpRSC8XUmV-161GUGdo7FR3Fnlq6nz5nQ9RhbuYyQkrfoE4m7JqyaMOXHSbE7sdZrQ3u8ycHxw7Ym5LN6_0ARzUhl-SKfQM6GZmMM9BVdeJ25EjFUzT5h22KHj19WkZhhi2PtP7euwaUFHydkPLdwWj9mFVlXpLiLvb52caO020odZgA8YVJMnxfzBjGxnDOvKkRYyrbiUhU-p4BJpgRpqctYPQjrgGeMnxA_JhnYL3-YgeLyQFDkpnV8tentVvvjr_zqoSlhOeKWDUnOZjCxlslJCLmyenW_-9PeVlOda9Re0G33CC_y1BueblF3H4-qXkMjYCIM6XqevUZ_8lIUjKGuebXOy1xtBmhj1L9jR-mKDjLg01BPp_IBJT1BUavf0Ll-ZpxSE13lQe23NYpWwEeI2V26UpjJXCtme8aVJ8B_q1iGLAwYrefaWl_1g7cDgMGfSmlPpK57OMc_rfckO6owSSR-vQfevvazoWIE36EGdQBv0rVzb9DHM6uAExTPEmZSNCaMAfrQqF8xZirr4SXeieurf6sl_iUQoGeo9WB1Bzj7KJVJ5ETnjNb4b4w4GqNlV2d7ryFG-mvCopmRV_jsRYW0zPwHkLb4sD7U_TP0CP9ZKthaIsZ4fYXB9mjr8dWgQFjETzdpfRH5woDlWbMlNUP7IYveXNQs_mixYDwh8vSxSNP1wh9frm4qIgqtOZNOwluBFuCmx0Dm_RdexxeCXYoF7Q82raFukdmeGTBOhR14pXAXuGjNkP8ikJqvsh0A2nXYW_NWS8S3pameZpq60dOosMqzQzmdnJLt-aBFmNAhl9OOLc4rlVTcjvPz3DLCLLGJ8xTgPx5nvS0z29-wyRzSQ5tZ14iziYJphCp5OW5C9jS4uVcKg3Z3yGb3_VQM3u63qFfS-wvPbhiBGMafhbWnmsvU11UQaAr6ttEF6bfpKWCS84g1d7mpQOvRwCBm8DOpD-MqqqBnBD3WHdUtZcBiB3pGC2wWSe3enHc0_GtPBZ5qy3RPfvdzUHuLCUXepE0zx4lspteIG1cYWCgrW-il8Q9XMTuLrFq8V2s-jXBp02-qqoJzYqCcWm7QOYB2CqbpvCrb6Zs9QfO6o7f33uNxKvAlfq3GlYmeRBSYKM53TAmAwIxsBixMY3XClx7Bs5OkeB8npPAXvzwkxREzWFOncb_SNtQ3v8jHpLXH4Dgy5L5IXC9ZqzgODdBUh5xr64t1m2UTWPgk6bTe5zpZiccoRtPKQsYP-XYyPjpeAPmjG3pVQhkYFPus1M_IDiU6M3UTW8bOsISL27tRcm9O1uJyRcW45Audxnupt4GnXbjhon23Y2TLxamipg0o44F1UhsDsfi3vKpk4sM1K-oqiiLvq301UwWHWUQtOir1xF5QHParvhnTTK1sLzJgPHTVHQZa66xhYdPHLY9lPkOTaqCFCRWx0StwB5IxCL2W1tHM6jEX0maQcAIUCE24HcXY3830zDrkEC8EaZ6EssCeAWqaYIMli1YevhUtiwi55Yt-yII7xkSzWRf-4x3He86nItZTr-AhL2A6uU9Dt1SpeRiQ4yYbTnm7hQXSKt3PqCnerrUNqAo7cOM32PWn3t113qJ5-2XNibWyls9yrEEIzTHuvxpVMhCbnsdoMu6s3HnDZLRyLVNVWiUOCMh8KZ1wma3oPotTcIm8uqaHCmNU1vtYw-T3AXSEBxfBPRw7c5Cu0ITqto28SB2Z22HCzpjR_5CrXUUsyTt0kQ921wa2Dh36ZyVZze5M6ze-WETTUtUA6s6SNighP6Z3sbkqhXOd9ShF90rifxMxsyvLnQu0DJIw7Gsp3hPMRJ6-N68rLXkUQBmKZU15wSR39Z19vJxeEkdQBNUQecHh0R486wQlX6hPySMVzrQxsQBceukiJLS6WJd-s7PsEOl4xQhnK0IGHqz0NIFoJtqZVC-2_7GIAN44aWpUQawBZe7lb8zRyWbm34imBZO81H01mIgw0DACytE548BqzBzwMtd93tqU2HVMFvE9z_rzs_5kxmY2X2aqpGT1jHkvYYEbatm10v2-4RMyhtuw8Z3YoizxrwW7MYtOGHQDXEF0DTV0TkWZOGdkkKjyOrZbO1YUrwEHH0xb_2UEAI-jBuadP1mfYBlK2oHqkeVT5TpGpCSoOP6mtLZXE1_kI-FhDiGgn3W3i6d_PQ9m7efwLtQTP7sVDnnRzrZjaa8mn-f5ysA8sOZe96UPLpyonSKzMnjxzOQA_KboSs6eN4aiDlhIetcbnrWzuuPty-KGG8dhHnHmajRb53iWspyhJf65A0ydPHIIFJ-0Ix85fYclfo2BadvwDOg218B6cgaB4bY5qFzvTpkg8-W_FbhN5AgJioUWZtXgPGp1q92zGIbGa5yPDyxXT-30FrS8Wi3fRoAM6R5YNezbVkGkzmIQRPCHroqVi9myxoqqfLf5owGTJ725MC0bwt-yHXzsewA2YM4rAgSQ9XtSOlsYpkh0mVdZ3LTGbivk8WCka1Zj7EhaYwD-8fQsX3sRtj1k4kgvI1hThcnI3T8pjdke1lFeF8dOLuQwCAZTw2qz22YlFQ2uGMsqrWZtx78w3GDke_sk6oAGyNJf4Bmu9CNJZBvwEsM-7z4WbHtFToaQ0Rnc-AkEjcoKsSUk0plhaDb4R_vNi1-Q2cczNciqGElHZeBD-OoKE67s3ONs1KTe8SHJYqCDx1XYH3_arI39zJ_4quM9Q1pGIztwNhMgPR1sJy-I1kB1TCYc-F-I53mgCRNVRB7jprf3oxLIrGuwIhR1mrGHCeJZ8cQ6in1QL6ZIgM9pFPxrpdbCQpGijbDbZ4gzmpj8cIvesobdaeIC-kNSwKo2ZXK-CBU-lN_--2YWz3l_JQtl90aDLkpSipA43y2Zx8ipSX8Zf5vpWlduhEwiMwxXUMYu2DPw1qmkU6kFCUBNyVOm_KoAQMdYARH3qrmWcAUpl2uoMQsryOynaorXeQ3qUajkE2Za2tL9cvrUOLIeULUR9CeKkCJxWt18BRRTjvZnkmUEa8-_nhxwxCrP1I0bSpQOBMrOdwNmiWDKMFyZz33fLlf_lMeV3SgRG7G1-nXbH4QJmZgQJd--jUF1mRSmQjEXp-U2tUH1m3n104d7Y46v4hFDnAjHW7aqQ6lFnmqD_LXfTVKA55fiGxXJXQ9tqpNGcY7u9c6dvBAz4fv5PcmKwm5AVADnZz8d7SvxT_fVsZBIYBJF8TnsGi3U7BiPOW7OmUsqHQmeiUi1Fxk5T3KeYRrFwaDjoeNWFVGNqYR1Fe4SkQ8omvZRhgEUA4bBZDcEYZbgLlOb-99S7AAQvzIZEO3pz29e2nGsc1gFyUC1zYyGNSUPEKobYVtRnzZ0PN4U0m8GeD1MB6K-K2QUGnOAegtpoPqHfGJ1fjlcS3wvTHcG6NbGzfWy_pEPIhvHxu9bgwoKQtDcGkL-rfulXX8bTDqxoDMYAxTGB69_2nSmZVOjR12vmMOslKqaBBCATHA_RzH6j-dL5U5yrcz73IPfh4xvF8E6Yio0Ok_tcG-pGYg1CDcawIYAUyRlvW7Ma0NWCJnvMqHtmKM5nP7y7oOQ-sOvYS7kQbWQW9dGsEdfDKsc7Va2FURXjKwQI9P79sTQkVRncvXSZwtqrU6ICWMnTaTgyvRuAAmFDwkAsBoyU_kZ2HWfzp-osnYySo6hF7llHzX4k8gqjGA2GRRA12fTuHNkJeszk0nMauJyOeO5cJRA0MEhS-WLCWpESGzYEvXAmZx-H0OIEwDhrpBsEIsOTeKc-8GoEJFivCHAZ4TU3Vu4ZM4We5ylMPBKaCtV-02NXDjQdQKzFQRZYjS4Y5Z9h5K1nf5csMahP_GFR4kjWPMnI5cZhcPpX4hRiQoAemXRCMP9BPnAClhrhDeDp46vVxSQCNClH54Q5Sss6OdIm6BCV-eFBVTw5VSMdCPrfIgIG0Fkq0RGVaTIUX0zWyWkOnAMEpR8VPpGuOA5jFTi5Ynxrhoi7Tdm6EaB_sDEwLWJT32JcCnm9nChHfxjWobqg-7oEt9fAI9BMvuGQD4SeDanFczkb_CzZhkku2S04tDAgjcB6wrXBJIZneeVFwZB9NCCmKitCF3xqCk7FlBk3h72Mcsvi7nWtDeKevoDLddcGBz78Sswr0f9P2PLvB302A0IIi3zo9m4lXdxLpuOrb_mPnB8hOxHG3MrflwZd_jDuct4ARm8NaKqHqe-AcJ3IO3WbDTdRa6EwP-We96vEKylzgNShJXuKo4r8XT1_gxNlbGYpGMeXn498ORRF3Qjvl7MmKcka7hhOOz90XCJpMRIVZcVszy6XqiRhCDPZNU6KVbDB0k49h5Te_gSidZvU2-4yg9PK__s0VN3zmJ_8LdY3T9OG-PsAPRsbF0vN4h5bq7TGMkz4Q_rud4VBs9Qe9zl-Hz3uxWHRC73NJG6_3wmur7H_Grb4mlUgQDO0QXV10PdYyJlof4b335ND7gs3MpuNoBp8xmMEXQdwcSHdhYOFx3OylWaDQtoS61-R8YxlAHlMVd4gcplgMh9WHeKn2xlSfB7pzvGQ6GpbIhP6eCzXq-82u-RkmP_6xjSsBFg_hKzUWKpi6Wa8cW3of79wq0Mi6FFVqbl_PNMwgikKNNp-wDAbgxfPPzCLLhKh65joF5Zgq-dChIOUeIgPDoxSKoqPV-Dqoqbgyai1oUVO0xsRnlNwwRK-_3-HB4GMyFgoUJSvIhRg0iUULfI8U23-4oI6qn_YN-e6shghIShccx0DErHtcsVrs6gAlL4npg8Xxml8y58wTqT4ydDgtTHYOQozvRco2HZzNYnZasE_2Yljkp0Eos-CQogSddbp9i_qrHKhqEQh5w9HmaemXS-gJDHxDNAuUWmJ1P3AuzajwWb8dID-xNN1NQ22sblSQwhzL6wZrDZE7JPquH46DYKCTNGzL03zidsgMrgUsRFY3Iux5pe9VKxQEbYj6jd9GeRjzRpMtvXq-2_RFGW2qp12xaAnbYwK7tIRWMG46f6KvbS_t2eFOpfScHIY-XCrzuxy3FIuQegJc_7XuVYWws9OliRDSbJ-z_cwoIiA_b16HKituvfS1vQNA4NQ59lPEfjucrRpqz6YiAkhca-ZEuWPExRgDxfdriPYidO6Y5XwcoOlVg5WO2rVzFV60bsGrtC6SdykF7bboBCrA2jf0tNNtMhH4YUfBCaSdkBwzGgO3UwLi7VPmKSLe0zXd3sT4PBkW_7FZRQ2twpKlasFAz9py64MBkvX6itvgQ44YWk3vIYN3MVLXnfLeLEdYThbOAfr40Kt90LIAiA1E7S5yFzIXm5-kxqBQix2qT5amghgyTxF36GwD5vw_sieQ2Cc6ZSFefGSMDJE2jOqF1Db2JqLVpHSxz3rxGOy7fXVv8R26J9cwgzhIxQJAfvOxGLOX7z6i-6FQ_bEdSTmGtfJ770YGd8GalWS_0hP5Bg3pDhyxZUygKDeBDLykIqOgxitgvIJ_fDKtOSYBqg358Zhkqj7IHP4RSDMtohT5e0B6tR95PRVfhX_YWMNtVf9_i5zCIZDrxkb7EYtg6oEiodfWa5ntREQgbDsZyFED2Ijpn8oqDaiyRgkOo0aSbo3EOqDUoYyWNfQIpmYgRJoug2_eFpL49GraDAaOe3LfsXOSjdLCDIXHVeqYT0wOoH8Bk7Gqc-jbixt_UKcxvpnqn_4ALiALPSSo5nB49JE2CDlQGECMMBcowpPEM_3cWAzbw2McfTPzfZgYeEWtGrgmSn-trLq4oAt-R_IFBWaXyKfO3h2XQQdJ7DURNCmusUwr9Y49TYi4dNi_iqvZwNbP1_-0aY39ZwS7NtZ_HzGx_0Rm3H-KVAH3BpUsdjbC4rllXTP8sz4j3p6S-q0lBUPSWInifxIodnpPiUpwr-NXtYoCpnAc16ZGarZSPv6JFbBTXMxL8ki-8iZzDyHD21M5Lyb3olhmj6HzY5K5QsshEL7pWpNxIZBEVnlDC9FmLVM3EGvRxqlh_ayUIhigcvz_3hmzsKxx_i_BZTbVwxIsk4tFYACtUSAg2Mdc4VslqJaVpfJnq-XmY4_DHA9KKS7d5YjPAQ5ohqQ9zLCupw_gzlSNddTygYst0rz6jWCaz66FeDHMY0s5JSzY3jp3foBps_P2eyw_KgtbTB5UewO70gq0FAKL0vE_kVGK_tPRTl0iAmcdM2tzHWtJc7aR7xB21jnMLVTbPZmWclsucHFuOj0ZHCkIQCmHO0Az2UJ38HNxqPPOHFuLQamihlmTDtVrEU2XbEKmuFR2CW2zN-Lwaykv3EWl0I2FEsetIobUoUIdriVI7IS59TLAeTSqunY0KBJME6GbshCuS_SyUDOuoIZPhfPv6w1grJzQx-SlkXlr3cv0a6J8fuoVF03_DwSXYMiMojXobKf_Kz859mP4CLbk_0cKHd581vRzNf9LIjDWe-0ZDAVAl0EmTZ2nPqLJgQ2CH1PcqAdB9z30h_d_MHnJ7Fg7VCTQthzLmq6FZ0cPoytyjNpJIG1GZx86g4xKFKCSUK_ntj-zFiV-5HPOmyutbtnDsQ1j71HNPq5wClmkfXkRvf7Ia3N4JOVlOeIgZ1BSFVE-AIFWmiQLyGiyTXM8zdPoYxp2g2wo0VWm-wU5_qHR7p5_20HqR8-r0smdyiRpmBLj2y1OPBhNx1-HiRgOS5GED6Lu-HuRLcKNtq4VfiStB8dp89diO4wdP2STTGePV5xBWBPERMCT80sF5V_un-Q3CYzrvc-6dekWtx23NfWmKdu2xRlCsbDNM8xDCjDqofolI2otUa73Q1Y1sriwbz5JNqDpwwrVE3qqBBQ6nm7V_j2synnLOgwKBnIaEEqatqjBfeI1CjKoGOCoFxOc-hl4C8uTE4zi2lRaGMSAQlLfti0HZGnTT11NHI6sx-RvOS5Qj3xYC6GlGYS5A3vaqtzhpyBlYrL17MK-UpxJE8T9ZkHmkHYPfQL8qNY0cn3zBK4DsHjzWufCxzLbMIW1hXqGksKuY-RAkJTFcuBIJAiBmJUKm7iqW7srNESes074qNi41l1egsMyjkN_cN43M1TTthgYxGi2Iu1LT5bj-WWY4wvdc3ILrQ08q98wP8TxQJxo7rC7PlxY3gRyEfX4ubKiB6I12tULQPPFdu1l8ahd0usiksJZlpsEtzC4TPOz4Nf9yhQCcirFKNmpcl_E-87iRdLdA0gPMNPW4gtuqoBN7KzVXcTlZsfHKUeU6jSaLlyyyV6vkvXxCcZY0BRRfPFM_rAZDWq09jMSnet6rM1CltTkknW8Vq9vxyAQpjYkDyoy9tb_pun4ONiR948YjUgsOJ5h-znu5YaUplxfzBct2SYSxPwt7VwIhJ_Nmz-ArX_arKz3BxvN31ONxyzGMKUxWCGh8UXXWH9QpAmNbvc5yXfIrVkyqVEvY9jpLughyLalGKVojQw1hEvypcvS1JHdl1ZR5ieJg5Kb-Q7vsDDIB7kVzbHqkOFYeioW-V7jjbISrSDw2AykbLAWI-6Sn-lZxgMaBN8KQtOQBQhGw64RqQUVLdmRoUZztyDcI-2vzrAPKNq7yWlG1dP31woREUqwgi3yhikyzSTcJJshEnmRrBLi5aRrr2gXTIggfdvToW1WNFiNXMXxvZzRCxBUJYLlC9RK9sAmLv3yE9iDww8A5GsprthQKqWNv_L0_CcSFz57uFvHntLip4Ss8MbrvqMryKTOdiUzTAP2axMN_8qklZzq1fJAYedP3ywAfsZoZoKiAujxYfACHXfDvp2w0_S11aIpQfpfBjJLVfy6W4SoF1bMVxawF4x1yWPo6fk8t1Hpow42trZjVJQQsRp5ppfD8O32YeAm_nOc9LqzcMj-ouNtc7Hm8zv7p3LMtZgmsO3lTkY3SWpxMwSbyUSjoD5ZZpEDMZ6_8Gv_DxsvhyTuXa7JQz9C6kcsVt-jXs29VtpOsLxQqZoTcHtpYQEl7djf-eABpVU7Nc0LTzvI7YoiSIFctuWz9AjL3d8a7xHDVc1qtN1cMKqRi3kRYGliwVHMtU8a09ni5x3hG-jVAI7hzBJgsyVPW9Nfvm7nCV0FRBRvwMtBHC33GVY_MGycVM3Poes2OEuyZyWnU8j6wl_GGu-LmURseV__t4Xn13Dci3GwnsyB5W6a2uXeW0OYdfHSvy2c0OFU3Hu2NoMo3mvhrEF0a1lIXh1uTqhMnJVyPHDC4nlxQNzmyn_NJ50-cPCPXs3eGdzx-14FQTNZT1eiSCrUO_NNsJSlyMbOy6LHusXIcPiX0Rnf3JjlNZuebgC7C2xuLCfcxK0ulKDeyTLIQB82EEC1_qhSHVSRXprxOwpbzBwyr_O0jv2yybkCSob6B0eMrHPoe7N2dGTzjXbfy516SQYYTNZmoXcdIFxzmNdRm2gqa_J19QDKmo8zHAd1BTYMeN03P_5RbCYIfeJn5BRMSidcbt9Rskn33dWiwtSuGA8g1cXAAuAUVPm9k_ENk-LWBXjnrmX43YzPOhD0RVwasn2_rPZUJiipzEQSL0iiaghvpuTHrWjODdkr8pVwqFrTh4PodisyWSAa66G5v-zz8sgXrV2uAwepj-T6LctNJ64sjeLuAhPliDU_bLLvroqGwhKDSH3yTKJuH7AyY2SxQZsw6lHtDBj70D5Jm-D0HM_AY_bSNXOZonCpxS3WGSulWlAA7GWoviZ-UkCQEmqeYNYQ2M_89fCu7ZHBonKFF6Z756BNOKiPwHGwBP4YJtBI-4G0mH1djODIpEvhWbwzRs6D-k0XUr9qK7wlTfJOgJ3yENj986RPY_rR1a4SFUJyBjIF5nh6fODZ6pEOYXJwMrkA47cfUN3lt_eLcgHTFDGxeSNSbTrl2u6PzrPFxB6u10GQ_8nnqWOChnChDK_RH1J9T8993M22Mdz_vpA1b856iehAkHR4W5qAdg-MEJHf3WtbTyg6PRwQdWeLBWikvXfqYuy0c4WcO4C51C4gG6SX0nemfSP29yC5mpejZGLvG_Ry1qJ0z2qbtFxWb-dm0Q1TeJCdt6cYbiz5Jw2fXCWd6NeYnUKkuL4FzaaPgBSd9EHtcZw3oQHN3k8OYWBUD9B66b2QlatrwL_WeMk3jwEgpDeUT1hYboIfgc1lBSwDVOg5Tr_LulYO9woSB2V_1pejCEkIpYd9fBPl7ULkbwtKzbFVToGeQayaab1lz4xcyVBJrobSg41_sYex5iD62Ho8CRG70myiu9j1kwDDfKZkHN0TZU5t5YXV-LDr87te1sc5quNn5i-l_N2WX4C1j9mOXUevnzi5QTd_Gac8ryYCYGYE122rYCwnfZQDY4DxkujZNIMyU5f6cXL5iHMagYViE_sWu4A7-sNxbALR0p-78eJEcLB64MvX0mR67vSXC50he1YcNYOl7JYFU0DPIM5bPoq84FqCNpfpxOpbCiGquLKCGcnLFSlIueNDOb3RsAFk9z52ds04qkd8Hm2hWalMUA7ezQdTN_bHWKQRLC_CiuAjmzz6PdujkO_MPfkkYb1HpTDStZS9fG8_kXZS-Lf7_K-XBGVLjnMjQOIk4jd_hMMuwuv9IL7TrJ8xoo2pXTQpa9DL6WLy0L6mki8n6aGA17EALQJfxNlSvs6L8ZfMx-s9JHqRHahhUR1UcVlt7IfGaWLHnv5GWIyjKz0jcMe4iR_hRaPsj0sEL1Q9nmvxZJ9JJk9IRh-LX8dm7oiLS7eTgTIyXEuUEOAjSzoEL-jcVgQAj_RYvEjWr67208ME01cQppSZdPNBQvsoWHc8d71XKXCmdZ6TAVr4DIxW5mFj6XMl6DF8MYTNr6owgkySNvBY7SsEcDtaNIkvEK85xvRaFyNFsM6N8KVcJ2W23di_a9GPqgEUbcuuwiQs8wgAzrpxKt-LuKOU3bf6PJG62Wj4LEuIXBTJpB0tgQOkkeoEZi3Dh9VP8eE1zVmAM1WxgziyKw7zNGj5TXlfNG8m6cB0jxsra6wk7H7PY4rJ6UIBURqjItrublr7XeJdVwJCzlwm8cHu8a8hpsW0t6R7ewb8759U60DPHXX30XCTfGya0NTt3K4t96cZbXH1pSjtKp-41r8yXjT7iOYvdXA3iJw9ePf9Bml8hDiubpsK-RKr82CjBfPaGgNbcm_tVqyV-AzWFcXXuLL0rbfrDia2JTEJ3gvlr7KVAceUxK0uKVLecDwDOh3QGJI-PQOwktUfexUuecfX2e87TQT5aBFWH_H7bk9qo-nti2G3WO9tW3HIK0DOJp7MO_yQcMtYkBsXmRskKqqkpkL8VQqaYBRFAvmSr096pY1J1G18n3iPJnc2zgP1tkUzlUMdmNEQx0oNoZ_M90EtA6LYv3IJVUE1u7MDeV6pfsopSWLJQHNyJCT2spS9S3pqv3KUp_2KuRsS8vFpkGh22rxhqt3ff-LheSE7uxwgoAD_WhUoa2Nrl_B1fZjYou8UxwJTezci5WPiyFVdKnr66cxA479LUjGBefE_ufBy_3G8ZDoJtorxkVu_35hDJthexWEKo1l4uoQmrvzx5jiRsOIru3UALMW4ug4qr8l9oOEOR0rvms6N2zmlKE2V4PipYZgEIr0BEVrivLOQ0w7Se2GZHptkNXKvi1Ukx4m5i14ICyMPMSwD-qYUE2D70VUTVb6IeRm3OoXLG62b3rG5QekU-vQbE5ZV2OGH1zLEpKwXmGCRUXgLEFTEBmnx16FoRoE2k5XMzIuDTUs-4DQBb-q9g_0-dFYuKIXbR-uGHLm-N4AGqQemWYX8W-fGFsvKI0fMMrhmTpoBOdRFQtkxpR-MOq5AhFzsFLO-h0NC61z89f5zDpXyNnUwCYHD8s--OLcSaUQCg9nmVhzbotPldC0_9xBee131l_Oh4a2SaBcN2KwbPhNzBGnyECPrPSN-Seeer1WHAxAzbuMFCKMeZfkP2a0JXayxFWvg0zRUHuC1OVhGN9YWwIbULEw0WmZABqodzZthsBkamKpCFu3cgsGC8Uc5kdpu9xVvDSKRrmaTVdqYofVB_l-0s820NAoBKguBRKqUce5VD-LTMCqzFAhnbrfx7jNlBKBEdh59mfIxfPDBUEAp19ES5PFANbvo2yfWl_DsBnRMseubF1JDiTVN7YnPuY-7yeDVvP-jxWya76MtaJ5s9dzsqMIbzrArTMEWNp8txuJboYHIoIoUaGWSuDGbgJyImr3OIwxAKvAQH71wAuAUdcTQmiY8tWPtUlxwpUMx7mRwCO7l7rl7iKfiTf0oagYzR4Ix3V5nFZHixfk2jrmbAvWu7eFhOIjfMWC1yO6qntwVHpZlUOtnl0-_wGjgP4k4Qr5PhykaJDReSiG0swOK2cOPtC7mpm5U9vJ-F0N3lv7lHNIy2nWIS47nmGRtE7MJvPr4SqsVCZWoE-OU501Nu17U98tPE7Sc8CTnKvUWYuzCdsfXFGeK5aJQSFGJUTkLwAHXMTaNQ2JjPPKLyMwZkF-rVm9zwPEpek_siwIE2jMHmo8rvMSO55tua3GgrQ-6GiqZdiRgucYMS_aV5SyVC8vgTqysCeA8IYRvoIVzgWVVstJNRoM9CSYZnOnU7Qd0sQdZrWehdID4rW5tN1UxRnMhbIyLXaTQryr5wmnSyrbo3qWQXp1QMb2HZZA9Ve85024o6ZEDIEqfFzH_7Ypx9FJuOPtl80I2LOrLuUc_FlPRJrlE5klK2dx9p8E3emBGfMxhHgqulPS_mpIYwOigdSKMSWNgR2HMwpq97RAH1rmyqK8dVbh4RuBDn1x8hNhBO_18saZVnIemJhmQrwBt8aqh79TDjP-yfXU8RJukw0oVKeWnaWVOcJXXGMnuqQlcMY12uyiBLi1NmwMoWFdcpbvRIDuaq2MRmFbzAYbmqDIDhBID3KP9q6otVyO8fpvguyaYCfRrfNyEyyQu3gw50ZKlMSusLO1rOJiaJtOhy-RXuMEPgkJ0_hZ2JkqjCHzj4AZlr4crwXJ130TvkPCmK6QA3Fb1uAphVjASJkpfxFK10Vms-sjpwSVX6tJDwtdaaROaeBAv9Wt4Gs4tyZ7PRKTzFrgU-CAQDdjqhevxgH2TFo4UauFtge3aearAlft7t7SkECt_eti-ZgT736U6soBEsEtAQKcsu0XOJu9CiS_5C8BsnYebgZ0wVQ1m2dxCJ3DN3GHjEKIa1XwXiJuIbaqZCVTLvw_lEcMsT6B_Gh2zTpX-NU4aNt3QO0AGs_uz6tgKOKWSjrCLW8aX4KqhY33mcphwls7uZGnDB6amBBIDsLbdYC0A5iEYh_fXDpYJ6KQ_SZxnh3iZuCFLkKZTdrvhokfEmMjJnyl6Up_No3xKrsKrUahWr-sKrdotk9wpfFuv7y8s1bno-DYEQOtcYQ7bO5bUzdpG5TyPp5kfIep0gMu9IBmC9i-cQPEMhH-9dGPOpqama-7j-Mt6v8pfcC2A6luuDlBJoCxG-ioKOF0R1D-Cc0UpIxI9M_QeKQwLl26ReUV_LyGLTq7IpDaav95qkhuerRuVBm009-1S0elIllSS_6RtJLbp9hEb9g6zOZRX1eHCM3CeeCQge7MHXvqT8joFV2ve9EahUOQaDckWCj8ULBMQYxS3Vi7RKPoF2kguqetfreBl9qid2GheYngJC1IwKJxBIUz_vHGLKwZgHVvRrLxYWcA1tucOYf5kd76MQkvhILWATg1esOWVFm_ZyEKxPbhSNBtzqZeT0DhORX3ZbAjsppe1Ydrg-JbK8NlfTiWnPbm55CtkTxj6T7SwTsFVi0TZStepnS7EX2ktU5S-LN37wOxUntbib-OEW2pawJ_31iPbx_lFsQN0nsV0ytyKCW1Im3Rkxzf7hYeP5tsq52f9RXatwX3Vl5uuwSxSQXNnGQn0Gp1mvFvLzbVd8Pp61VO_qsZRvH05NQKL7aIbnyuh99URcsIIuN6CIV4HjCZ4300tjE9HepIoDkKYBGHW3r5m8yYsRXhjQ6gxyz6HAs2x2MvqlHSSF-crYKbiSsnG1AihkWMtf7EnxgcjILDz1U8e7Zsv_a1T7Tk_UFyaW7z78vFkFzaaXHd3WalQPtXwa0bDG7fLSrhNFFl8Kos1cSKaEF0wZrmfWSEaV7DHmE56vFFTPVmd_XsQ7bk9TZS3OafT0pSPf4IdExQEsOyhwwsiGTZqEMQlH2r9W-TdkahWk0GXY_I0c-4DUhz4yHjnsxYzAJ-j5Tj-gmaRO82-T5SICcq16eL93f7Za9perpSNHEJuuUHgMCg3LfKcCaIXQQnPZJMubvRVJJowSJjBOusD8CgTMsdGosLcy1RYosfeMeWaS82nvorCwAZRO3ctvwumTPmFd0KXqeYTtt6AZRoTCyxikgj-PkqFOJFlxLx5LVgSfgZ6IWFhMbJP8FIQR4xw7c0wCt98pXrt-cXa3-ig6653tzNGwgz6YhU4ukJdohBZ8Bnu1kgQuBzBE359maeGV3aQ9lbkkE4t5RFkrN8RYbFz5q7rzcu8hA0FoM-MTZSv2ntaYdW5ywsfKM1H66Bc6PUaDqONKos7n3pv_8erFVSaqHSjJaG3M5UznlrFhy3BAxgxCPTo7lSSK0wA2urI3SUPFHW2p78eLRKMSD_4oy6VyoIow2QPzMZz5xibkSqFUcyb-ZNNWRYhtV8QoOk3FJnLGsa7RrSxCydUFYX5U0R2_WGssRqXPSr5APa0U8qHR-Jcvsk_KOBfI8ka2w24j0MX8TEKm0P8DuHfyfp7GHqPKY2pSL9RrLrZvCp0mj3sLf8a637SbEO-sf9xbUYJDZyhxwlxp_xFG2iVIB_Mg9vaXQUwKbwwFMCifzUqGng7nP7yFufjJTiqVQPqe-aq_lGGHhG9lUGeHqmItcSB3KbVZr-JEWo5mvugvJBVT9Gno_leFjJTzYPviQwFlDzn9dCvOhDtOP2KStPPZtFNOT38D3sl7b3zN6-1VsNGDhYEUFCDEZKjTaDhZgTS7idvRk-VDZyOxwEYESdmbDM7xWjWeSAmsz9i-AxDiBGSU8g50CtGKxpiSkIAffq6SHYJb28VC2kMKnstE2M6OVjGCKqaJ0b-3NgEa3PeQrHDZb5cjLtNtCErfKbkFYNyLIcgoCjFHz1ucwWBu7aTSmFvje6DVofDTaxC7oVIGHIKMnf_XCrRWx3L97cSpJa07kl_1AwXXJcvXs98egVqZPB9kPx_sHZPhas5BmfHioLJFLnBNiuJG9EfRPeL_FEI06pw5w5HYH0c-StrH6HNq_lfA-gzPeR5hmu_z2oN1JHcOR3EBStV82fQoidSJwMy9L5IujQ7Hgcab7Bsk4WcaI6p8xG_GhOlVIOKMN2BHh_E3U7l1H2qj8Uc4ZLA4fsvlxLadPP9R1GQZqHVuTWAh-healtFxcxLLSFcUaZUdEFoMe_Ybg0fAddDJy-nrsI6bV2dZvIRcH1dxd0WBWu0fKshwruhlXgnGYLFUwITwSNKJcZ7GBxyMn3QmhWVwwyuDKsfHGGoBg871Ijn-XJ8cuA0tpk9p8MCPTtjqADDukvNvUp3UVqDndE4UZgfVDmSlUG8TVEFVssCjPVJ3WIa23LCPGOt8lyD1EpkMzNdQUwsdPk7dzBigWqZrYGclhpnY0eAQGc1bnkFjF5gL3SbK4nKJerI7A-1MeuIZz0oaqaKhrLXfv9AbuWQAVrN0sKuzVWwLbEkBTdNNpcpsdhfvmqeEsYNFfY7mbZVBez-pW8IEvBai6EkZDxkqIIg7rCvfb6nKToBZsgouXzogbw3u-4n1e4pTvDi9SS-70KdqPUoRvF-2_Pli5CBFSuLQqtax_Nba6MuxSy6piNtPpu2alILIk7b1SbQi7a3ofn9KALuhUv9PIWmkdj-mGZrxgxdbZut_JtARegOmQ1yWLBLaUkmZLT9pOgh65dt84MTmbOJhiTGloATHoe3dJTYR0ciqC9k171jLS4WZUG3F5VIHFMLbN2oCVkoEYhjHVHUpaRmsVJyW9cw8_Ea9svN7BXWKXfckCMNrWVrQJljEqNilOb_hy-f7X8wt9Hl5hZOcXq7s4JexCF8uf4t3uihak0NbL5zRJq-oYWhGvSS5zAGkIIUX7GFaD8gMKIjXBzIGHTV_jYuwSCA1SbTpdHhK-QDmLac05w8Hj_UFzQjhpndnNeIi2-0F-ebD1lZ0TBa-Awh0AXpeGGORdzWpGjYMimo8Ngyhhc2c7b2zbAJRT6ed07PIrUDCqfxFKTaNT_jHEt15wV3XnJzalVVvwF6LaxN8NbWjbpapAcJ5hVxWbNL2v0_eXROzjasQrMxgDuAV3cNntq6HG2zJ8n-UpBmNlrmkekDAN4UL4zsRkMrDliNNFfgCZKPsyV6I5pnSzsYvhazD5VSghBoo1aUX-kfEWdWSNhy_qfwwSUnum1HUD0RrM-kKV8OHnbSjA42yOy4iYlwdw5K1uhXpWMQymi_syse_uWrDQ_LAnH-gnioRQTRzkghhJ7FCPG-hgS0QZCIM2qCCRlDS37ZqN404vxOl16v-euXTCniF7wIPCoJn7WXdF1QTY66YcPrAIp_XcPK1pWwhYPriM6AduywlCKFWZh1Qt5qy7jY-lolD_jpItRTWIAsd0jF-qzmyoOOwgrYBAbrFbSut9Son-g2yEgqbWQcTEXbO42rAaDBTG1oeGvyNih2mSBcpTWDzorvsFZrH7WqDhBBpBMzpch24psLNCHPpXVbfdi6P2g6XfIhaALXNBNWphBVdafTjZ8lh1vI3YrraDCGQ2pTKsqSGIErOFKPBHjVzdcbNamW-s4kT5l72Xr1ubnsuGxWs5okiVMPDoAsUiD2wWUbGgRQCwvqpaQx9QEqhCz3_P6ckDQCM3U48Kl97xarMLoNODcFnTo1xnCQOohl_y3qURpXKPz165t7qWqDjwjIGuA23qA4CZ0anD4GxKw3ALX4I-08JzAq61b7wMaFjeacraeKHW5z2e8PFwqFTzVGbNIevVH_QIOhxelLKjIODdkiV-T6DkKWvyeQYpkMp7pwWiKkf0NspX8vwzRpc9tBaarffPb1cunY4pzyVAyz-Ro_l5h98bbmPX-x_sFmLBOVZaEzdKf1rblUNm_lYPhWXvUv0I0xN028-q8Qg9dUR0Amz3RZ3R19sok-Ia6qJy1eG3BPnP2H5hHCrQSXvChHOuDaBZwbcrInLDcjVKAmh0SUBfEVh1L4HQMOE96_slKM7yz1VT31GqcC6dF3j2osXw7N3AyK4iiWyGigzXqSXJ35ysPF71PnuxIuet7KvZGXxGAq2PlowxFIAJWxZ7Ky3oRhhHpQnRoBjs3ocaxXXBYPqxYoD99_4Ag_1GIs-hrNroBIoswpm5VLQVRCROvBw7k3az6UJE6ulsgm9IzpUsAzraD44LnDGsSl2atkqsv1FzyUiUfAojbonUmGtX8B_L8KTKUZLwTrpOkBv5xR_a03HWTx9_hc3i8ZXho8XB8GBb1U6YPsSootdub_vf4O652L9N4mW7iXs_3115z3mXp1S-3Y6YfB3atRWKYfuH_7a5XtASMKlZFw7qIRZVRm1UJx-DXxdYOCuuONyonemR69JCqy968D-Z4vg56zVeaD5YEPgigUJ9ruYmQp0lt8AemkTwyZcJigL53FPFQ7NLAAmU8Ew-gc_dp5qKSAY8CT8olAsTOlvD-ZPpQ0vONq-5pHkZxL6qK5bzF8iQpo9sXxi3Cbv9TNQ2IJ-hZFhZ0hwmE_RJMF5h9lldU7-RDbJci6vcVHALPEpJYqzUuzAIiwkZKFzBZmQIlGxn4Mylwzj5u5g_4iXKwCqt2VfEiGYkniR9-4rRLzf_-ZMVKMPmoCr0BHlMnIsM5E60dx46h4mUHduqriHVGa_o5tZal9P61aNbAHIlvYEbiJv-CbWA8-dpMpybadrAaCjQMuCtGQADVzUZ91p0Q2YkontdVjwcMHCspqlNKareJh-e64fh3M4dg8gTX1Un5T1oNJAqhAa-hZnC55AhT2-J0LSkMPq0rGy-z0vSwBhTn921TGR9jNihnHSK15qpI2qMRW2Ve4D0bxOLFfCXn96lsMhOO_V0EbOa3WNUxtAuTv_q7-faMUSNsAMySLX5eGj0xE04RiIFNkrZnkX971enx_uXrN4P7sxFWJEbXGJL50AI9EH9IakAK5El75TYsmAvcYt2g30mYa2KNAbEQOnSlP8nkv-13vLRrnUGk2BLbgflRQ-o_GktUXMQwhiFr4MFB-Xy9mXnNiySDVxPhUwRyJ4xAd6LXb-D029XM6BpJZr2EJh-jY5PzEz4FfIUQZuo-3VQtDrXJOiOxLJXjlOuHDBVbtsbmhwUm-Xh9L0jO4Z-rxowg2P1Z_5uRq2tmZHOlU0B0GD3RvYWayNeZE26a7YmOiSQJx_kbEK3Boz4mIEFcl1RwYbjo0FxwNttOL9CLdvNO6_1-qZeDbJKJ7rJFvf-hnNHJT7Yop3yToZBbzwvCRILfI4ctavbltNi85-OSSJkloJXKWECxhSNixTXdLA2QiNOVR5xiOqytDU1MUa0rdKwTcrbR75U-31qnYJHd9d3KJK3rbd5AN6ARuYvlVZ9mA9woKOxvNw5d7-SA7Ih-Dvz0WOM1ujHpJy76HGnP_xwm6Obc3tNDvyukrggR6Gc7Mfe27n09xQyXNBX4_LhfQtdyRXrt0zm3ZR8S9vZfzI62Pw2D0TsTMiLsUuI56_Fe7-8aOQqJvqqV3E6v5-9UBpyLfFB7mEycXL8tZ1hNknWMy9hZfCbVx7W-ClHe2h99yrM6c9TijZAiHit8n2apSTiy9hO898yGjVrhMJ5vNZtxBjWB060Jj7dgIgP7KUs_ELVDwcqeF0q_r78alxJ3OJ-k1xTAHfnhcIqO0fcFpFTPTmxeqcAh1XbpO34kUXDzlIYYqVx1ljTfnvaUfyxzNDaHdzstzuMml_5f1cK2U-dzbDl0yfycfZNcQypW0eFbR65KLornsevqhGWeTGTmBlP8gsJ7AFH4rRcW8qziakgTwoFcp-cHAZvAe_0yISy2iM70Bf4pQKA9LI5IIcbX7kd39ZSMHDguo4LapMpaJoTYKaDTno07RGtwgDhJxSQqY-YsZhzrPgelUADGudltXM7gGN3gp_7FPWxvlssFZ6lZ6a8UpR8vXRrrvCFi7XTkNvCISPlX8tTnKZRVJMdWe1jzlIiQU5FBWa4X9KTDTZlNSg59batSARU9jTas1qWArneQknV23T3RQtBx0PPk-zWHQyPg5X-4mRSHJUeQ2kpIWIW8lccCuLvQ2SIwDMmvYOjbmrTfCnMMQdNEsSSzMDCtz7DZTtBnmPgCjZQmn_IyGvSuO9rY8wXQNNaTQQezYc4_pKWr35LoYnrUdnCbpjAtLSfHnjsF9-odG3iunOtAKFWifltkCnBt2a7FPTCmkcmtcCfvynhslYo7hcVFtmNlROR8w0QzjUjvPEW-uL0NDnJxua2wVA09Kuk-UINMJ0eZJ_7n3kgTID9D4XqpDmXLYZxEo4uTOjZQKzroLRCIAw075qjf-GYqlS3J-r8hYHUf1Wt2K2akmtvpmIuusoOnrLTx1O4x12qUsCi8lYS3PVkn1j_rfysPNKtdfQHUP643RZPG-xYAlwZuIQHa1h39WY0g2BLRWZsA2XdHtYJIQp0ikFILDkK5Q1G8IC4hUc_IgT4RjFKDzeKAvMqTZuRGn980XVUWqlmbl3NStlcdkhP8zheNG6VSxfVDYOPY5eLbCOHPYeUOS_6eIEWm4YTGXZfkrgL1pEAO7EXeGnJ4Vnhw8orwbFu8I6G0ItTux97_5sk9NGk-D2WohSujLV_ESD8phJTD1bNH_wrrcfnS_QiILNs1-n3q3ZUe0EecGgh_4uNUey6JSBFv3aNsfb6rC0GT9zINRgCGKFBHbbz5woQD7PrTdMmDObKUF5P4dIusK-gzcBbtDv7N4oJOO9ql-UepNDrWZjQp4fqk-5HCAWASYL1LbDebRu0kKvmz0OhnqKVsEWDqmDN2YKXPzZ8yDCnvXwt6JwaSADsBw_38UlxZDzzecZ8YAffUkZWSq0nfw_u-iqhHllR-71PojwinZFVZdm_fbfFLgUkk3l-yrVZ8RJmr72t4oaLn-OysVJJmAcH3gxa6vhopxfHC28MCpmRcPmBxakgnD-zSZAcEFg2Q03taXtbBJdXeL1HrKoQ4JreucH3NGDh22oRapR04guU7-4FLYKtRoegDhOAiNNvr-62r9KgODiiy636PQWnTn07s7qjuy4JUZMFu6elszdNPyXAVF5AX8L47TWgJbLFQ8TYPEYg8YR9r1gQcDUpM4Y9jMR5xGnP302bhzB7nulwJsUt-IfIryYL5oBiEEo2FEfTZQOtaef5z4fwGw3T-PS--EVj0KGnL75XKJjxIcVfYOedAe27ghi7UojtNF2G7ObTVvzOlTq3WOOS4tp5WFypYtEx0nZT_F_s-reMXi0omjMz5R_NIzizuPJcWbgFu1Sip_5EJHQzCSMEBPymE96ZoBa0SYfXvIcQVNpxYToXw_73YKJgV3RUYLiZR1IAHPVAoibisLLSoxE3w41O7JoCRJv8zt36fEXNw6VVKiXNEmWVhpL3uVKGUre5jVb-dW7C2alDzQYAUV6jdAMxy9_vK7PMuwKyeYenGqYXqmPji0j_6F-pxlVQ3KfQ49vMwF6RlzrhugQXT0LL8PNx67N5_l3q455LcJ4zHgrjtdjEeQHwctvORdq6Y3PC_FoYuGWV7bNuxai-XFVpLWJbH9pDa9pO0wl-REsBujNsFKJl501N6qT6PyH6kW8F6e3xY7TEiL5-KbqPsmO0YIWiE3q5C6XeDNcwDgCt4UCeR39TfgPNUQ4U1DWx4FaS0cCppV1CSb1-SwxJYWBy-56Vr3klwBCAGk_QhoBIslcnwhlrIoyCA98e0LLnmRHHhJYSb4EyXUWZkze-ctPwX5u4PPYFgSImbo9zvbsvwcXq5cP3Z8dBPvej4ftBTWFU-DgTHGtJ_Gvt1L9T7XfGViRxyGcGpEaFD72QFN4DJizphdPpHT1SiVXfEugRHBtvKfSJpMNmZ0ngeQckW6_mt8Lt6JGxeA-gVdz4J5jZsIB85bGPFOlzq2KF4QVhhZDGFYMerUKa0yqYLvBj7azMqGIvhc-L8JIl7z5CmoQNlmdU8mODPPMGS47VCc1WVGSP2qq_z9dLTpogwazQs0DibGVqj1bEKkiRhDyQJ5WMNEvB3hv8MyCCkMRkUayARrXSuDITYU5d0OjoOGhl5r2RjDHkJhdVGhP8PCfFyOHI1z-2hSrOm3El_WVK_16K6gq9m71bMs1gY8j1BmmciuoXHhy6vrJPTEN3QVAFWp6yde_iKvFzAt0r1Y0VE3-mGwVZ1yZUANkABSyXORHKXLXcKxnwjvMVy8-gfU-YUB584gbnECXna5UcaJxN0r9hqqpKZj4rC-dcsQR1FbvVZunR4Z0I7W2vWOlCTkOrcH5GBaKd7cr4t6KIcTCCf2lDAM8jVphvjOAcqjbYgOGeH6EbL3xLTh8vYaoJ48jHQJ6CsumYh9hk-QKzJi0YdGSnZh3wwkYT4w6PM6ay0q28lTVtFHfWEmkE4QPT3IPeRZ0zHA_qnY2A-UD7Me1l9Qj5vXNfNEX5IhXz5P9OnqpZp8ajhmJRk19BhPBLiNtpp--gl1aqME3Gmog1EdWa_7vgOX9w8Fkuj6bp36cGR82n-Dj_Rau7bMnKHmRCtMdZWWT_m5462-e0QBOeIbQwVa5ib2THX0OQfBDqlW65LYpZkIziTnAXEYnyDZ6tNfNW49X90YjjbHYIwXTrUgo4pda6YGUfVxUiSR9NFEv_jD9chTSk3zGaXqysFgET8IpuMEsxkRoB89BmXURY4ZJXIH3FBHK32A8sJPfxWRWTCXCTP7niIPhATTq3Dz22e4um7of46Uq0RUoNtIuHskEyMvCUDHQITdSAHF9medH5ZAkN3fHopkFUKOcJgNAfsqy8xcxX-3AAyoeiHhHWbCccA3IZl_UhN-RuUZAWBwvbO7boUhr3tTCusg0wq492YBaBkOjjs_ezTFff6tYYeeNMN_slLpeJif7Iq_tpBopyak13RSw8pEz-0iiFXA8BL99asa6NZVOElzUrY92Mxr38XfTRZNefdlqep4Cy_CpLKLIkvJz5z5sjwdWF1QgB1y08C-lmqmdXhwm_5zVg3QvNgrqdouMns2dZI2JowZMlysxzDjrZiAiH740oZCWk7F9kvAnI1DbnLfaMuLOGjPWn67mhSIR9a5QBFIPZg9TbSOLbpNXe-FYA_cmebn88624T74O8UIS6vM38azyVTyc7uKzbnveJb2qVynJuZQ-tHFufYehokHaA-7AhBDRSejUwc2TyYTevmjQes7LNATYztWAyJmD6BKBi2hh1VDC7OrQcWPpOC0y_3a3IXM6TOr4C9fBu8uBt1azajtSngYuQh8XZVaTb_GA9Z6NbzOyObjaod7jnyM4N87QZQY_qZ-jDvbyE68Vp9HGTHmBdSDBbhGhMBv5hbWTsAecHHLth2_tFnXue1S9ak7uTMprpvQfI0j2ICYCGuN8G02dF5AeKZRNjeQNf47zOGtYfk8C6BCyRe-oZRn29_0wLxCqYzICL7-17Fl48W3xwlMqllcHsU-s4GbvnEy2TaTIvOO-EDFBhqQC2OqnoPVS8Chez6mPzwDCRXckGuxiM5hi41dfKBPckdJgQCsNvBhLnip8pOqzktFm9-dR0rgdCWkrGkbOHiEcqx1etxk6BEpHe8JNI6epoMr4Txf35Zj2ddMqcCO-kO4Xlm3dClWRFsGTxYD6rQi5to1YD7yDDYfOPryg4gXpMyuCBg3S61vc6McKyFtm9SUXLnvqOF_zZ2B3fk7OrzTK6j5aTspEpJ6MaAqYd0OarsdF3lieDNpDnHuwvMGRsk-KEyiI4NPFxqEhZ8LxnK5TI9n_7N0m3H5Yg6JSXrQnb6Rad_ALHKUm5nHSyKoaEuGlRxihlhAOn0bNCmXFPFjBlGem06arIF764V4pYU4UH7zzKiLSD0Qlg6CbOqwEfmL1C8lum8e-OpJK7sFyL1aAKtmvCP43S--XdNUwto4HQxm38WHVs4S-E8rE4Yn_iRHNkbFESzWG_452SNDFeE-k7royvBwb8uNu2wNDCF8iDK4FXibQoo_WdIFDj1VZ_ExThvc1VzGAttzoer6j4o6hmsDRo_MAVxgKMZhTvOmtKBLQcdvW_9EmS7ELGSsGp-GV0ckoZArG1so17tqBNiYd4WEXVsampfxgVkTJDKWG6WrO_RKODqfetJS6ViTtS3PFNnhPkQ1nuwjjvJdoRPwjFkrU875srH5NLeBnss2k5uMUuxKJtGTfyod-QdnIeEpYEvMJmEZWSWMuOeRw8rVbVkD0hQJlC2ryOmWbZ0Qpskxzh6zJ9ufSmt-71-7mg3SfJlCtEDwPy-35vgi0eOVgUzr92tJCsOS9N09ywIVVg6G-uoQ6P0TjJMbPgMyQbFD8vqktw_6m442zoLSdyk9wxNACTLn1n7z6hRxRAD83VVEtCn0Gm5UtKZ5E_NwMRZcvA2ArLJ8Uzr3prJEOnYf5WNvHvDB3HuojbrPCLGf_owzfVnRNrAu7w42FGliXhBUsdkKJI89KfdCbQqPFZt2ZTwGtfIqN5qD9kN-IN4gZ1-iJyO-nPdMLir5Wv8rgbmAYDL1yIytaOl6A9eI2g0snALwu-gLW4dU7ntPHghSLOnyMd7XJ3f1blwtJmnd4Gd7M3F6b7HXIC6XXyhxpnssH8t4FM7ncUScYZaib7XNmFliCMaidy-ARJYmQmTsagxSX_S6jC8nmOEJ_RlJrN8L0wCHfE8XDJtVcYta9ZeMMPG2nas30v-NsZMf2-FLpVfQdlBbU9rweHeqxvhVJkz5Pyn9FHyLyPxM-DydpI6xtxzcBSJLi0diiiytnuyZfLMSmF22ME6QEUtBGCXeYf044SggfUiL-wEIYhFWK2-QoUfY2s-aODeuM9aZz0_GvoaIn9js9bveeBJAL89XVCRJYCSpf--YNuu4FunMOIddp-wDJCIG2PPmT10whalqIsg8mzdUZbc1_02EKjiChma7MKeutlGn85teJDSTKvLX1KUyyGE5sHUsrdK99GR6JlWVjF-VUVugrWCJyVpk8pB_ALUcOtR9LH-ZCuM8eXT8FOLIPNYQJZw0_tCsBaf2ionj5SfvbIrd-fUUj2_8CFnTnvqxbwMeTPzuPq9pJyob7go-2b6UqcJAJG5FdIhmDdtGNGXxw6CMpm9unBuPscITgk0l1KP8ZnLi92GfrTYTC7oANHCYmdQkI9l2_DL4wU1nzzAG4BxWmdQvAZyWmfM1YZZ6UpyVndNQ2wfMdlRfVH_LVmSiF7P0V9EeRSJJzJ2ZXVJMNoKYNrB5EkaazyP0pPVliIx_Uh9TG-q7xLnpW60mY2d9kIgAnxTn3aBUk_6h2XVKt2uCOWGDPjmSgJdihQ7UnzYYMld51wj_ipyAQy5ew9YwHIAyNzjONUP1YjVt1S66m7IQZoXyX_71YUMY8SX5fNGpeAvuGjPbtfd5aEDhOVxevZQKOadZq1KmqsjgYcGExmvipIISQR-VdTsgNBDswhsMvZor-BiQFEIZr9nfgR6uo3Qy7E9t5thPcxTI2bZWuK5AMdCdTNkLBhNn-1DNncQ5U_ZLTw5QMCc4zozTx0Xg8zjsGpLJX4h9POD8CsV0omCW6fu5hW2VzEwcI0MujxFMV0ligoasQgACegTYeQSjcNvHZ0Mpl-d7ERxyd0J2c1BaX3XcM4tZh_pVQQn8B1mbzikxh-RiGccF6j40LrAHunWwJURNPMnhVCJHOUZk-ySzNKYeRsAAEqwP5v11CwMBBnoYTXIhvVe-ozr1Qx96dvBgh012sh2dwRSrWXyv9yk0Jf2LIT1eXih8TZFojc8E87BhLDDm8wyLwtIzQ9tuzaGO5o0KuZKhgVZGKDf1TzxSNcmqCuMM_R1dhCgqZOipynVm0Zn-QWT0KD1Hz_6gla9dvv4R-JvTHOvsINxhHmNlywd0AKmocelVAOFZEBsPfINCavWgh444K-Y-Irkw0zIjqYrILYtp3wABpDPg_7M5IBja67_C7z39l9sS3KmVH8H6GnLIIld1f0qh6R0jOlkbIhZQAR-K4v_XNHehHi8cvAn9kP-doJWB5rugBg9YzMT76jIgmkMU-vlz38wNBh_6CTeXFI1h4mss2DGEtB6ZTOmECM7-4-KnTtZGemyAPh8sDLzhJIr8MilHoX8N5OaV4xJiCIqjzIa5yiMUZU7HaabtidefCmiNU04zBbOStaTedyOMXrLJJUlan9TNFpErqOaoKCb8an8ZdPdO7CHb7BhYkFsH6OrKvb20crxS3yWYpP4PFIybuIBAEcfhZPiyZwoS2h2qxtA9U1DTU8Zs-1xQb3g_CxACVH4PP_aTC0C5Uk2gTYnzZtgj0ZoPko-9HkKkXQNigYsTjmiSL6hDEO8Jc5uOGfGd5_UEGzJSfHoAST9P6a1G0jlOikRWgt7JwDHWXQmMagFYNdSZ1RFcG_nsyUQC3J4tXaosL88JEDhwj1tMK6jRz2fD_c0muFJ52YScW-2OwPPeEZeJXdyQp7WYS6MciOM1F-KvXJJDhDx3xWf2HSW-ru9r3nn6spy3soIPXU2nRJcmCgzH8MIdQYnPPILJyrOHFyh68vLZyTinaENRTDVcWzSU2geENB5syQnJolK7zSHWwVzXLDlCwr2a_wCsBxvKqshmximoP7Yq332e31nbZjRMoL8tG1cu5Cl5A1vQKJQqXPLGsmMJxelHkue-h2SBncNiiCFs1lOp8TiRdvG7mpXVQhYvVkbYOf1qBi5IHujRumSMTu-csPeE3xZ4lyjEqLN0PoxZMQgRmztr6TJKw1il8IrCrWzBxaiv_0iibWG7-nzVM6XsvHT3Ky5vuaMnaS8G-gsYgP9vLSrlQm_2q3KatjggVy0seHmg2pWY9bEWCEN373IL9MJ-LyfhOFc2A-yjnUiEahvVSGM7od7Iex8TM9ICFE5EiMjhW6Lfvf00FLnGuO8FF1ITerKc2AZk2lnnNmYuCezq6BfqL0mwHUE35XeMKvmJpBMAxi1UewiInzBZt3kD38YugBbhfkSOlYoUXqG4BLk_qHGJXgbcVKohxWkT8QCYUQBD4mKcsZOt-ui3CudbFvDcpshfWDwZjfwrb35h-9EQ4PtSCtih2CrKVkyOpMyX6GrzIQJkoeCiWe8ZcwBXSkH_n8tjl9H8Y3Li6h0JLsFFgmcoA_TM5RXvxaHelwTs6Hvqls-EIrnKbGW5KT7FJ2L_ULTiWTVg34FlkrkCO_Hwd9u3sMXAw5PtE4UB0CrSkHqDajq_Iqd00udFeDc2NSbycThfPybBXxmuRP9gydvrTkxRdN7xqDO43fBSe4eTPxs0l6NGHNk1a5Ln__RlTiMQPt9cEeaMhVYjNS4WmPCP72RwLkq6wNuFn2BK-opV8eXXlH778tYrCu9TmKNfNkduiHt5DmyB3HT72L6xJvtmgjgtsGCPammGPnk8greL2wz9fv30DNt7WAbNBuQxGRJFi6_ulGNfWUu-ngGHb31gQsDrm6f9Q1SjiDDXCqjJ2dD9FmnUeSG6pBZw4yU5bMkDabNL6mZ4iV-RE6NOQ5YrSjlctCqXBaBvyCRPAkN_NcrJ9qPecnca67NpCadZTtno2uFlr4-JQRUaZEaFawE2d7dzYzUp6tJrC_VgZ2w6x42ex3ALWYCpAtlIFB8tPYhPOaTPUEVVn6Zd0BrIofMPFiPku24BXBIGrO9zEf_dZXibU4QmlS1Y9UV59KEE_Dhkx69dYve_b6qV51d3_zHY3GOELRj1twhX0ycSyTZCLxNV9COhyVqmrBNaDLN82iS5JAofWXW35N72ZAd2QO6YhFEeklt_Iz-cksuo9u93TGAUhq3J92mfsupcU6CsjABrIZksdn4U3csq_Z-woz1h-lEg9eIWgU473IE6lR3wgGx_Oi8LW2wRtRy-UVZbIPix26vi1GU1Yh9TkYeEc-IwLjaeod89M4NReVihrp9ahN8l8snIvenNOEWw2n3Ns5bo5yTfqJIRTMKVQ3gW_OkcCWMW9PJgZ8gGq15uoieh3Vuk_sF8KsF_RBcBjsqbjM7V6rBfUqHJgKVQRFm60YUeazSSB7PMkhurn2cIBpi1MkYO9XWz0u2kayrLNmZrjvY4fC_07Z47Sf1-oKS5Bl7VN3dzPqGiHgFuGXQKK_REe9J0Ay1Z2LVRHd8JUKGh2dJYKSYz3cTDgEtuHuoibBji6Qbw84mZgCGuNPG8B1WPwYpCNbpmVSSLIogPhzbKLdlBwouYbvPt2TLkxhhsIfPipak5fLWJ_UDBXwncnsJU0uQ61WeurONG9Z5z5eU3RY0ZPpvgzgAOl_T0RoKq2YqY4zab1CIvOeABZo0Q77MufED3CIxakvQetfls0PXVze4JZJpPaxkeF5h5MLDZOIiez4jqGaD7FwqSSaK3yQPblJupFwy4pUUasgmpOJ_1HVAC7lgSdCu4gc8ShT8opFoAMAgjfi06iZL6DUUP_KYo-0yepvk6BgcPGuYR_pIpatlh1M6i7UEwdNMTVq2V01hMCj3wIKb1aKWm409OAjpT8MbFPWz-mtGep3vxRXyBv1RCS4_YenXyVAGEhQh6DlIcGSh0Dt2xtIXx8_ApWr61CUhhLAWpVmBSPpmhHe8Bmh0byZHJd-G2VtbIO303Fl0VYtBzcqleUomNCol5FC2SZ5y8qlrbVKGayg97zN-O9ORgQPouFa6flmlD7fNcFxSWdvULyxFDpslWRG_w3Q0bm90_aMEwtrWnAD6giEaZTOFTDrLL7QaV457-OBzS4hZGRSx05T3k5RCv1aaylH7dsdGjJEB16rhrJHfR_qZxN1wUn8DLbBHJxVTfGBTmm9IjQli2M1WqQiw1TAKcyCEyQB9qQxiaWevLvXrpNoXqqHKRv2QXAgy8xDCgXUPCCD50ltGzhGILE_uF0riOlsOG0XNlzfLwhdRaaTMaXmG45-Gll4cCKMmQ5Jgg7B4Byr1ghs3d__rkM1knBjEjE3M0vTQiQG5joX868YFb_wOGNmGXGEXiqKbvLBHcGPaF8sH9BwSxYJ_GZtlG9q1ZqJo0ZsX9oTk-DDGgr0mGGdO_-9H2DcJKOo9cus6a54G-Z9VAtZGhFW-mXx5FAgXlyD4VaIjHZ3QtLN15o4CdImWPLBMMtllEvx6tB--_sjBKsB8puHbEnXU6LyBVgXy4zyWGqf-COhuviNzL2P6DrRAbKFjFAleCafDOFqxqAwDVeSpOcGj3nZwN0oIj-ib5Ik_1pH2aKXu3aLnfJ7_R4RD67kjAzZPlnQnUSjdm-WMC2aLJsaiGpvxMcUEPhg5sCg2BmMUnDnKLWdMaUEcgtFBhj7p0Q4HFbq9sCgjmF32OFO8rMtMueEvghavVOQbtOUQz7v-yY_E8dFUz8n53PpnR8Kp2RPYXM9Z9h0PlfMDQIh_eppG8rPp0oxPsSSFyZshOnv3rxVi1gKECiwCsl2IMqTnzXwwOzi2d40omZqXtGp7jMshgcObKDHaxf-S8wi_xHz-kS72IwB65wYAzyIwK0HJNkD-rqlGAol-_35JZMoXeZ9m_HsBOdbCHOs0MbobHQqXhbS3DGx0T-v7nTqoFdXN1e2iokjFa6CHkr1LSdTeRs8s-eojUBZbVony7QZSyKEkjIoC7bVrhVHMAFIscwaPx_F89L8lfLKRq4NK1bsd_6e2SXL_2Z_Oy_W1vDR2btjxfs9oeUGBL7Lw9BxqPMSjEKQC0yxz9wRqTtEBAs5K_qk-3Yrg1t1twDYaVFvpTxg3HqvccK9Nq6-fzGKrXjmvfg9i_uHMDIxupegFM21z8jJsEnC6FAcCTrxjrzOaDqkD1Mf2c7vI0AlBA2gnbKdctoPc9SI-ljTrgNm9NoRhfxBwdWV_BmKYscjGe0b5lDwn2A2xlC-52OYbKIRGEAdN-ndFtb7df1G31qxQQtWiB9e-cVKvTG8dNPbNptr0pPp7uxUWJhhRq-8GXkgb3mSDpMY4tTnMa3tQFCqrNc3vQCapPqxfNab6OJbmHeOFqjgatFEGKsykIFkr5gPtfFB_D6Opav-Lus9KQ0NvNsRGO7xgewTanZL67-H4DlokO6ArYD0UJnEPmfXqR6duMZDICBVHWKtBUNrU_jan_HMM3TOejHXHjVMTJ6-tUwHEGQmQYmrIG6Sk8WRuCbo-w0_Y5sIgC3vijK25ox0DIsueLO74IXxGGDa_ZQEfoTZX9UKe2Iuqbgej-AkU82DhCEvgKTsAq1EGu0JAWrW2mrqsw4XwFChzPI9jIostFRvGynucG8D_ZcakR8uWRLtyMyS1CQesxTtzWygRcgZFTMc2pgC3RMMCXImY4VJm5ui2t7xDuAGcT6WnzxT0Jr_RCoc4xDof25M5NRHxsbs09fuK3kq86eAmrQv0clirouhpz8jLWTdLv6DyEzS2CiXxdhr7VK842ANTNrHfffhkBXEi2s04AL5QleZdqAsAMR2GXK6ILiwXDzgPVAZDcxRnvfbeF6ZFRIjP8AUcHdOUOFtRhkkdqBlLVrt3k8_1fJzZsO1UkGWd3FjEMj0ku3yvIhO9q03plsbGRbG3dApPJ134afZeGueTr3SM0scnF7Ic09iPFTrDLSITIM64GOUk4d1d67YEUsRmkjbdVKCKE_Gs1HV7_bDvWRhzPdLdUXhc-h7-ioQkyWmNb4COPmv0M_2Zi851vY8LixG1atiGRZ5prmY0J4WOoVmA1s5dXGZXvDqrlUZOTLdrCCwFJ6wGfDGVVAOqPM84XtZXIc1NUmkEeKIzOqP6vXwWtHSycC9e08r52bKxDQFCYUVJx6S7-SskB_EAEX0aoqmCd3RkAxEEPv8Z05VInc9JTXZaIuTGQLsB6ZVC2Dl44YZ5EaOHNfbVMHMqFQJIpC7VysgEUJFZR38B_AifwQ-r663n_DfWu9vvuQfbxEZ_TxwuTa-Ff_nFy22vUJP0j4gY1YeEolxNoO0D-OP_llCOmuFeCih6Lhl3R2KeW5A5dvAPmgtzF9Sc_w6Q999LS-fN6s1kZGtFUH3KWNewuNln9Z-H2QhFv2nIJW_zaGxZkgGoTPPYP1hyHI72oIR2Dj3FuI61ILNGdMr8hPRL-0J8ldJlTFRh1KMmmdUTs7RIVGXOBwXuMqVITIymKOPboy-ScC3YytoSkqPV0O9lVcnpojkoK_etPcdOeCLuEuQQMhD_spi39g6xcv452HuJS-q3Ge7DdjTvHBqHltiWy8-fqTd0zaf8XSr-OZ13CKB3Yj5grE0PqjP44gnvi_jHXBqBDBZB_xL2-VAj-ncdbeRwm3gunkxvNODGvJFrKH0sIONdn3HhBMr8l0DW0Y059Q4EzCR4GElqW0uaKzpJZKo_ULDQUk_wgYPP-VO4ObJ8Z96RThCyEAHOZcJt9GuYbXqWHKF6ppXYQxcjiKmqKr3vFsB9h19K2DcI5yG6I60KDTb_dENhtc954DPi4mqstbJ4DybZBZJM5mz1DHc4LgsDycEJBIe8xb6uahHXilfudVSra9X-_y5vPvtN_Pmtvt7q3_gBLCnaiqxhMOmYzXPtr1nyxslV8ldjowUkhCbkk9rzf_D1QNgauYcKGRb3aeSOm1DOWI98UR-5vtNXOeSuiZPWjgGWHM6DmifYBftR8V63T0cXbxydCS_T48aC0Gq3GFA5LpJSEEQve2wvHvDS6JbS1pcczsoGELGZmrtFpE3kGZMSzLMz04mQHLBuKZkcs-xPVHfM8r7tdUu8AUoELbVfKKmMj63ePdDrXEvlneS3L7CYL99TQcUDZ0P8knq3gzSrxbTK2Q6ucPs2KGxm_UtKXi1979FfxvqF88pXq2wIgikoElAGu00YdqdMFsWEN3wup-BDnzem2zexk0eL967BLdNwqScbI9qaDYWcMnoe__id3wtW8oNiN9jeNdGkuD9Rm-FbmYI1GyuR2aIujVyTUrNtIrD2MqTPYk9JdBnVHQsPEi3zvOsJwhHSzrwBm3pqEgwkXzSDu2t5axsXdYSpjBXCzXSmwb6YxUMdpBVzWXkfdUDxStANEroGFA1QIxs6XI2wBACKcbWh3jeK1ei8lU6UqT4rDP4dhNN5klIc7-WoniUx-yceoTn9G3aEBimKRuCIgh6JDEwO2RU0GPIOF3u5QRizqIUST0P06DlyrrhQs5WHi-8VqFpn9irkYtv9i_1-ZuLztis4q6nw1q_q73ES5yCsNYprK-mzqAkmzzgQt8xTOyW1axmCZAuQV-B1AI4N8-Oray4cMLqcA_HBm_oJuOj0Om07easQqYiNlLnJgCDPyVl2PUv5ltAPVLE9fEqXnNzUpszHB6pzexT8XEmLvS0foJogc19a23cQBaNLIw-YHu08xX0nt4msCNkpoHQO5HsEe3nNvcgtN-UlI38AFz-SKGuVqwzyrl2bM8KUFinuBAf8-on4_Pnl6Qg1ReR9z0REAN_StCmQRYsaTszld6GC4R6qGBizplDOKy3NaqAqli8vgG8Hvk5q9bIGqP3nV0F7O0TJD6lH6ylhObq-pZvtBOp2WDUbD4yg7evrN2Nh1bVQGEKZF-sdJPU9jXSrZ77pLXEe7l19iqha-Ila6GNauzkZe7R19TtE8KawrAl7XHFLqRln_o6BXIeyeVHDjGg9snTRREpk_5nEA86jSB17iGqZdUuRNz5Nqc0MXUAeSPgwyjgebzVo7n1NLvxkWa1bi49RPWUTviU7QpIFl_85NPqnema3pTJ-MfK5ijZ9lEJG2n8xCiDoaWgZ8qITNhqwZKBwKlSDteIjZOR3pX53zSHE3UjK6c85Kl5uj-mprjTzMfPN88-bZff4dnTilT8isIcUVWWq5_hOuhJbMgHxsNo6XN4uQ_08kJM1O-v6SX7oZBEMhTKFNiMMVi6PEaqHBZdXnSZwIhn3AVYBDfBmgCYf8cPEWrnnkhD583L6MlMoxjPTuaHPqawJYwIV3t_BKceWI305mDGh9WqQ9WyikCVouVBz3RR7no7zmCXXhrNLYgj42zTl_DA7KWONS6asvq3bFi84xbrCZ3zpH4v9ATwYS6m-esj_ZOkMhqYfUws3T1Semu6t1B-1IMM-H7kTLhN9ljPkoDqykEJ1uWDm2aBLk1w9I9C-t9J9oBtTO7ujNwfdjT4aW-VsX_AHNB6yTjx6lKhz-4n9uelEJ1zKtckruAgLs9SkLhNoiZMKwGygZu9BI-sny9eXQL35T0yG0wqUcpVQFUdJEOqtj7e9GqW0vPRfWUbVsq6Y6WwxyKGqtqufic0Z188PgmQjOQghdrfQyb8fbiTMTwCP1MNoSmGUVVwLgvNkruZB_uf4iH07TXaToUDa13vcOYprm5nhA2cphWR7ZsrgWZ6FjGasyIVNY4DXQtxn-_rgXtmEFLD6mDKY3j1yxVn9rJPkGX7EfPRWi7cErdoYWLluFITpmjNn8nCo_cTy63OlNrRgR_Qf0Q0zUkCkR_uhHCLjHmY10IJ9NF__cgovIxHidYEdu2OGxXbW01qYUSgnLvOJW0Tg9qP7lx_bmhtJjTR3lHOeICm8Y7mum4Ei-g5j1DcHjxYGmI7kUmQvsRQVhzyE065uBsv7zxsoxbkobD-Lb0XY2U9uODe6u8Kvodx3OPmcFpoAYWhIxBzpmwGWeOnMDxgDdJybY8uroFOyWGnQqmNXvdkG5dTZfVzoa_7a05d_BAq_hEqG09ayJygIjA8ZWIsFAO-uxU--FwfDQdm5ckxM2thMiWMCGcj3rE0CMu44vEBSrnOq_TOINw7r-R5vnuRoTi_7mcPfXVoI-i8PbAqGfYD25mKHXEb8xtS6C9I8xdviG9SQr9ApigUIjZdY0fDYXim3x_ns1_rvZ68Dwvr75ArqRAnxwhhdWvbf6txRUZU7-RsfLn1z-Blzyzqzkc-pJUlLNiEhOJiu-nqqAbHKEhRmCkxXh4R8WZx6a2ga8JdpGTgZyYpoa-FtkC44JmFJfnzBMU2z8AeMUheP2lLDzGzGN_dZ284YpD-SzT0T69M87DqL79HRNP_ETqxBpu8n903CZVkHaaQ1QUA7toTZfFhuTeoLMaNhfhVljWGYDJ_Mbx_r2Ov2Bnav95d7Kuwp2e1SNl8piYawokwSO6Etbs3SX8wToV8ZUV1inL6UJBJOPI5_WRTMU66roggWp_CD29kgtwf4PXTpy30zxPOMrOFz9nqIVX3Bv4fxP1HcmGknSEcoW1Z5OiV_c5rptv8KFiTCYR7wqh78tDzrnMK_5B9cDKpuOktjzIMX6lGglNsMBweTyNq4YPljBT4gK5YQIgm1C73hFEZMMZcVHLeM4HTF8xiy84KAnqlGZ_DRDZIeRKUkS22ECA1rqdQ_t6lWK7zHzoUj4K_gp6gCStxJ7PBYP4m8yS-tzF3x7OotbwJGgJlwzWafc7tr3Q4eoBgaNwBaMLeKFaKX-V4seL1J9w-5LWGXtUZYIC1AEAEF-zBEQh1fXa5eWIlmWLLdNXPUYsCriw2oHGwmICjV2j-TK2nuFL7EMdeG7ch3GcamPNiCRr2RY2eMhIZm4UxfHUTUXOjZ45jPn9b5qVoIGDZTjx5BJJIZ7jeGWK39oyx0tzmLNgrDYp86G9HCbNYkB3hzFOkHxWwSszule7gaMAR_cmBm6-0QDY59ojLn4Pw25uX7ZZ2PgPqIsUJZtGmcm2DEoVXRF7WJLZIQglkAIn6mYZimhHXmLnSqmmrYW0iIP96I3hvdemv7coa3dwFZNOCF-M9gf7R4vsYnQVG9VAHYqhItCI7vXJx0X0cjjiaCaDMqwlIo-UcE78xbiLb9yPuOkPRe1bMcacjAIuLVeCzKH1BPyCcVyyNyl2iuWg8NeMxSZhfsJkriP3D15SbgFqz_W9rI15dQwMH4zj9Lt8qBUjPxfSwSSd0OdwEf2GUpHe8l7lxFA_Z7mIV-GMAIOefgefR6fzeG66dQcIOMTzpxXLnCeYIZTMKuGRZ0K1ublhv9A1VfYvnSNUHCh9Ty4EhYixX23PvPmcTH5IRh7iC70n6KNes-C6lwFU5923UUROkrjKYefhGnCeSAeJ_I0E6TvkCvi2D2AcMQwNgFoNWGFFoSfiE3y5QhHWe2F5xn5TaGm92QM-E6TpKnkYL0L9894kgilhgoniG10ZNlBdh45pM6KzbUiPLLbXbSxUUX4qVu0PXCb1kmBtyk0DFM13jk5HIlxN7IMInGRQLFoQLMciWiO7_sB1OjzLy6PaE3COnLgWeJxtoDhO7Gca3JgvZDFe-YlcuA_tOzgVhHcKO9401LL_eHddbTJHDO40y1_CQOEktGVpCmiTVXm0X8EPWBB1cOaWdyFTVo9V4YpbGyoSfym2_6aBwNJ4wFjMIHir5uLujhJ4uGmQF1L9T4ZWCX_mwzAlMFx4g9lKEVIkbEyUUwE8MzQU1KzwMOwk3sXWd6P3JoeUNcHivOGhatEOSzSdFGrinx31jeRIDEvujpOU6OInAjL0o5ALFPpGaSuqLKxCADUCWT8e2dSuafwz0sh6jwYYRBAIw0cgD2q4wH7goff0wFzlHb_BCufSF6rztlVwh_yzTT6YVVVlimex6gXNGnWkCyZQn_BQV-Vqqbhan78bzizKS-3s6xhu-ASTenXw7NJrIfNgJxxMCpW-abHn7KXbI4K7TpmbRZH1NyYLjbq_VOpXoNwnB3-MagYHxMr4C0brgm4X1U4yfsKKuBrGSZtet5eNQiExi1unIDqRQczTWgWwkOoUZdVezu32Nk6S1d7Agei83s3nP0tNMlV4r6ANog6nPo_aKbSUlGS7HFs-9KBiewoTKqIydSjh57O3XI5n5kZDvYPljEf-KHSghlzOQYCMVh6z_hT8e3q7B57_0evNUFGO4_Hvny6f2KVJLpIBvtCdkqhjUe7389X3OR_G8UNP7gGPBrf8hV5XSIZPOMwvw6COLCDO9J975ZZd3ec4ZcAqN0BPJ_llmbOPqcT-JWrqs4ZVRRtySAd1_L0ANpaZL62Mc2x8liRsxt9akcTg3wd8g0Rnx2Dno7VUL9reGlUMU9fKyUQNchwbhR6harBAN0nY4g40STr_s_lsIYt-7EI1K_kt1Xgu-O3lKmUT25cSvAyUc-kvbzDJEHsUmOOuRdH9N6HnfrC3bqj6EJx_42mY6qcRDAE7mk-elQXTySDrrq9MsyzH-asw3DssZ2D2TFmaHpw-aKcgATQXBvm0aRN3Wlrp_g7DouxRJNL0Vn6FJPfUoK17udBCwkeS8ndhjk-Kof22FfsvfrB9OQSZYgFZmRjaKkYAoWT1wXb8czGmnng1Slcn-Av5m0JW4C80QUGZLDJM23UvEAn1o9LDw8okbp8WPu8HGKYjHkUGTE9g1I70tHJKZSsc7dTmXG_SOG4xQ8jQ4h8iK8i_9eIPxnIDIKQKVSmg8MIzOVyTgj0f305nisKCMWRngu8rNgx4HmfIhzovQuW4tqZ1F5eoe_eOvvC-Y1VxlWM-WeHwwNSOfaV8sEcCrME75IPwINFdyjl0KoZ1dU_MfGP5rL4JR4tqstKyJ-FL3Dk55MJFFKoGFng4bFncd7naT0LKjjSfD0tuv91dbxnrg3GXcz1P2MET5CLMQOBq2dGtQnciTgvGWp5htnLei_dUR7W2kPPa1BPtj9iTysvScXnI4DKQ1GdkBSyC3l0iWLwuAiOyZezWYRCt3AY1FtIEaAABPfB0OL72OQ6iNMG2lmGiK6OLK9VdwYIDq-MnSedDGQtpWNtU2Jj3p2SfQzK6a8nDVTvUJh72g_8Fv-DVgs8v9Hlx1fMImGLX4Q3bvsH_E_293pcnNDKBLWlm14eW8Bi-Rs-LlinSXli9zAhAVuPDAalEoklk8jDfuYRPR5-kw2PZKJIcuAVxUQjwEILL65oPgg1ibhm5pMtZOU-4r6KYDiUr2dUtooE9SPDo1RrI-N8GiFm4h7jTUZDuTzWkXlekTXmzwvALrcv3_LGadqtyjp4sy1dkGxKiIlpV2DX6F0i3HSqZOF2Fl0xujn3VpNdehDtwAVP90crZ6P4F1gG_7ac2V2g51tUgZYRoecK5hauSyiO-wPcrAOX00apNqPCjIZxpHXPNorWjXtv3Lyt00-iHDibzKMBTDFF0XsjfLosY6lCOVy_eNmYZck2Tt2czGLBt-0K9kHAqj6Mo6exiF8RS9bTjLWB3YjDACt5lB8ffjjIcY93-qbbIN912mIYgTwuAIFc9AP4q0h2t1LYeaiNHT1IPLTelJGHQ6o5crvZPHN_2OnCxps4HOskUhd1mw1yNeQvffC6BlfdUr3F_x9eObNdn8JwroJpzR1igt9nwzMNpRCAH6sGOdoOcYyoC2aXYCTGvjeQlOX8a0kXQVKka-Bcd-6ClJbwusr2ebEE_j1JVE_oeNbmQXq8rTaf5F-ZJCOmSjhRgwyNCzmhkA1GNh3694B3SYJgvqKqJ-dpGoETdT_3MW5sUKgbvO7e3cuwSWoxrHganF8hGslunGAT-9oKMbf0Yn-3KD-rttwC-6QlbfQ7OAeeDFtasWgyVvG7_qpu8KWfWOSPvp5R-2VCby0w9SU7YuaHA6ET4Twh345ZZP3BzlfpaThaf94WFS4DSE6_CkVMTqtWunArmyCRY_gcFOFVzCyQHp9ddqHrp0VGf8SUGjPumbpcQ0zW-IhA4V6HqVM_GvjJQ9NZd3xtK439Chj8Q_HCRdJBeQlicDbgo5u1YHjMhtl8dDHtzd29PWJeYXug57XQlAGh_MoPLCqnzEZdlC3aUfNucFQPoM8c3GiGjyFL1yAymhLoYs8y4pSqHHgbu57xB9mc-xCw6ESou7QtIom6b9XP1crnuZS5975q56DmYAndOxS0Y8wetF-o7zblvTucOTBk4846HJwonuITh7Uv9cUY7zWJdRvt1PQHM__tdE7J3_Y0Al5fyX4GSgRa41tXW4TMXv5DrmRSUqCJeAr4BQGM-4LnvB2bJSoxQniy2JYhBr3FSwCDDtiEvRV524B6Mw0qUxcOVQF5FF5Ipxeen8sCupoEWy1uPxoZdQkidzsQAm_G4Pj9vcch7vVQQ0Lz8_hzqLPsEjlt83YQ_R47unS61uGw_-LcrFNGomST3_4KvBV5PsqtsHhuHm2xRqVCKrW_cPFYZ6DnMCTOOmZZ5Lfc4OPPrDkc9afgmm8UkzjKOtu3_oLM_wbn7-yy9wT9u42aNeu5n0j4p1M0rr6_LMxkIpYzxcuehO6K79E5zFRyX-1-dGN2Q7Rlkw1HszgtlaxAkmIrkW-iOMUg1ZI5l262MM2V9BfdJUZWb2roS6FjRTX0tROpOOj5soM0SDuJLj6OShQxQuuQjNYM02UIsIpRF1EiZyNKdqVunmZ5ITGgJQkrV2OJEv5NsjD07pxbJZ7m1LW3VKHVYasYzoyJ7Ev7Ta2OUtdiXPAiKvp8bNlHH2AP7MfDWJjUTB_AtHuaufNHFDnXmdc6foSzkV-kSwbLdRiZrvMZdy4u19hwON2FtpmMBNtqj0IrqVREe-5PqGkqZOz5xtfUmhp8ENLM406vRSjm5e0-YpUILsiwqFoj9VnOUz8pRKmNL9pHdMDzg3WufE10prbM0Vyf0R9KPBMWELO7h1zntym9ZGaIz7_L6n-9Bm2eOPVMj4yA6L2DuysvJRMrnyeQdfZcZmZtJMbuXcL9sOhPN4Hr_rWLw189hEFsCymhdg79w3NP4Zd67ggBsoyd1Q9Srm2pDWojOyKtT_gBwObeVbFuetpLQBVMDTklLQqhhxZxC2ENoyFE0FCQVV6Ews7oohoFLMdeHD6ahT8tQBC0Mzk7iCbthrHb0J21d09Ml3ootTqbQMq-nzE7GejswKfwFBcd0m2zJDfrOAGEGShQg0TkTjiOJg3HdOobssFT8OMbJDGy-QUhDGRha6LxAsreeFB083IXXSTaqjEhvX2vgf96_1rkCutRoQDaey1rM7qaD3l54VQww0L0tvSbaDw7wI4safcI7CEfpaTKs_iSTh1WLsyP8M23Y5p_fUcHbJ1yK4F3hubJaVpYSYQlL-96QFK-XSdglJaZjS3uK4SKwXcDgch9H08QpNv2GQ9kv6CahqPH7Vj_NxOVzqv5EekDKu7TLuq5pM-gza36fr7ekrufvYrVa1pC-1TF-aFEnWI5H-lEYZRfrkdgybAEIhkJMysyDkfeu5FnrTtQR3cVltbKYVANA3zmLSmD87zdxX_OLniclICsTivfWs8Ca2EpzgGpH8eYEcquQXHN93NX1NjnoiPZHo4y5O0SUaHxpcVNh2tIjxZ9gyKh_WD-K2ck2x188C70d1EZfndC6gC0tcCrUfRyFaubnA7hC7CA_tsWhaCWz9L3RrpVPNxN8lQT-rV6qrNL9W3H_jO16z6zToyo0qlpwiIX9avU2NtV2MRnwbInd71GvqPQdZkaAJ6ArKiZrNWck7tPbqNcOk0Xwk0m3FcxbbFXiuesJGwy3pZ3QDditV44FdE1kGlQkqnv4J7mm_s68XXHQiwuZmBuk6lVnvA9o80bjeinY_8mh1jw68EqYI4kmaaLpNfHvfPeVKDreDmAy7vavt3LPXEQnEj4JIijRF5akRHB8LJTATWEBftvQbUqaQ5NVpBuQMEoNNudc4zDY3xzKxes7qwkGvmQxH7O2HkMU1yCrKe6x9ANhn4M2X5geO9SJPiViI860M-iEO7_3ZBogXzXiJquBhfghivsk_LTQTEHeBO7rLE3Mm7pB-RJsBmQJMl2J2eW8Aij_-lmhbyHt5SndiOZhhHqQ1BzPxDkdu4c0w6Vj3DRoW7FZRrdqIwMfbvcVA4XjIN-hxpmBA0cq-QBi1zip0X3Wl-X_k7VK2WMK2dFqc53wxj5Kk_1gHRwtUs6NMgmvHG1o4hdQlvO6jEIPsfEaWPo43t1073gyXduFF0BX0dGf0M35_2nglUmQAKjd6iaLquNR0YT-9OdozGDj0lXg4_l9PYT-et387INhIDJ5xB7nFpYY_rmyC6rZlqnvR-6ZtzDcCJU_Ahm5lMS5J9aCmr57mpWfdyjd2uAuPstyZEZNjV4F9B1sBi0CRUzpKBDSxf8GZRAF94cMUap5OT-N6pkFZyiwvbDWMC2DtChzGjTZqgCdAUMPTc9o5VStfUaamNocyCu2PivKBR4ymh9QoFxkcomxAd0o2Rapda_0iq5Nfnl1ntKynLnYsDVBCk1tKnxxGGeifuSJNmdv7FOOc3hEPunAbjMR_ebP4I7YF_Eje8-s6vVYDUg-1r3I_1YZvlqQLMN0bDKlzPTeihPDb0MaAY0E-RuRYq9P1cYWhXTo2cUI_Et3nxPzBWz7akY3ZbhtYr4xghfB-TPNt7iOpJxGXxiqSs9hfJzJrjXcFYQOrytqA3lJEeHXkFwyEE6fWMaHPkHSFMXFupA0ayNe_3ymUe6Fb75yP1GMmwTSxsUdVbqN2WGk45HJUUmm8WcLt62bg_HZ1C1lw9IgvqWZWgbGUmT1jpqNKSURm8Oy_dhSOmNl8bI2TkSw9iFS6vDTJnJLzcKwKp_PPD3rA9bvg2naReo_RR2Xbv38-v15Imk9R2eNLFSjlsto3ME_ke4Ovn9sjtzqInwF5t1RUGJPfEX_XXnFXuVdOfMSbMTRpxfJFQniORQwdIuEPTzPLswl8vD9mHzFHpSqFQ1CF2MvKZxmU8eYCgx2b_ylfV0zSRL9muPRm_ZhQEG0yu9R1Lq12rGAnfZI50dE8zDrC8DEcy46KEy97RwlCODDOMs2wDxpHYY6GaIaLcR2KyPjUkhmZX8C-3O5EVFIDuubHHAA4l1uM3bZvv41RyYdpfBAFidhtJyJ0wWuxjf9ak89N6R4DOluMjJYHXxPGe6RqNq5pNcel_a0a0UBOde5O7CGRPqhtiU4pyB2QHhD_c2zAcos_CWYY85-iJvGID_YIF36laxCV7-IgaqMAo6To1FqFk3ZumNt1V3N2K29xokQtWglFx3fFM9Yf8FhXHG2iHMfIM7hJ4taHNuwEnD1fjzmikNkG1qAeyW7_UFsRxz6fgIFfufJ6KVJ2r85-Lrcq0EhVqmBlBmV5966Is15fMWJrvltOLRxfvFhv3sgyhZ-NfxXGD4PbBuLqwihoFYEYb0aD9eMdbi_sYiNbrquQEFwxSv-hJcXhRxiBq8CbUryQZ6aYcr-Tw8o1PIbG1IQSTBOWppPqAemVfn1eoAASkCX_8bafozNVhI0giEB8k4t9prnjUwxtNQ8-66PtvsbZwBr9L2MGxRTFT3xTz3BTU0-0RA5MZ4_hI2Gw72ASx9GnCuUxSbT699iTtH52HpYzVWqzibwcFrepOEZG88M14zIhyob5Ul4RN_mLjaIriDtcyShmzOG3UupsGWb9OJ_AWM78pyXwic3q7oSg2vrb1TCULOzOGjfObBlhegojK4_MkaNEYTCmU6n8xtEPt2cD6RYw-G2cFoZAOpcfQpnkjPqW7nHuaZFQHR-bku-Z1C2kByKNqEGu09ckkH1Op7zOgpM3i8f4xIj7VWbeW2WVZf9ONt66q2auqB8J0xyQ9Ubvilqh9TMYRIWezvfnfGQISu1zOqEiRgSc3qyb86KSE45aMG3pgf6VaXzDpRoZExPchzc1D3_qyXUIbgibkvWsvzXx41Z3E2walKKcKq6t9GO04Th6ImR2uua5gEHCuuGlg5b9rmv_vBNdVD3IQ0hxeJW9tu-vWpWFohQrmLGMJdpkBzAO4vaLyIQe1xBZBBF6M6CWVFu_rkkPmz40CI3Mv1WXjLLJRKqmyvJjML45VzmdtiBxNOGGDuIAGqFpAX-nGbxaHPBZboihcrh838alrngZ9DlsyFf8JCdMxXAtKd5hymL1UqUMKiFW3RJ5vSmYn_c1UznrGyyUMf-RRlU6Xu22E1D0V3BVKs2K7DVqPCw5Gl8BbOrXfWb3_c0Q4FptdGLOJYtADMT5vdU6zZq79Oqah5rCB2P62fLHOWDwRqfYDVWAlE97QzFGbxXcdggTDdVImLLRpc1r4B6edUUidP0weEEK_Nze30UTbrf8GPF9_UVUpll0w6HFabCHfHOHT8xyU3vPyrJoMwU6IfJ2ezz2bQSuxpoAoAIjyzY5fJHP_SE8wgS0LW25Bxc5hdfxRP6AIzntEtWdYUJBUVVUuvVx2nSgRWN2mdrQRxjS4-3QSyZsv8_ARw92h4SApr0E5VduzvujP47rUX_31eBmGXOySibcsZtOADxJEqUjiaeY7Logdg_RPap0w1ZBir7pfbMV5nnDb0CE1HIza8lcRkVIXnl3fDUCylp4wwQgVz524lVA0eb8DEh_wLbt9aof1Ge7aerF55GXtK5BmWlRluPT6nxVttaFMXH8EdpLu-tqE0A2Sj913KTng5rDXVAaTSidYz23ioR0nHfH76V3dy8LPRHB5_fkA8VtXdUEv-51wsJ-p40f6YxakjS7awuUjGem0sWxSYPABwB5R11Y-iPg2Nf6n8jvN-uFzLx8TZ27LulZLJwhYtVcd2um8e8nmUy-gnlEJgY7UhGzSzFqO4m48BnkLVvTMezAp68-1xCDkFhl8eD3d8lUzlejVH9p_5q1n2EcbrLC0KQ8ItWLkWW3PE-_xn0Oq5ZwDOuzNY8uCVy0wrD7jnhfajV31GT5J8tBF9RgJ8V_CZ7To7ODMLMGPryL7j2nStPXA9Yh3hYjuWZQ5WLnj5qblTQ4O0dCERtXPn7gGso65UN9JlI2sx78rO5O8KcmU4MVJSKIsCTfKIs3lXwbjWwNsdwmpxum-Y5S0D6LcdY5Nc-EYY1Wns_tiPhRWqQiwBDa2uU3ACH0aLMPyErchQYzjOQ7e_pHRWMdyvfpDmLW8OTi5llw7jmwg7L0ZW13qlHzQqUaExQofB3QuHknRxOWwrezVVDaoo2wumwXyyDDDGbwycCuplVFFRqb0dxMjOJ2DsqtmKWPbYSkf9YUEYqyxYpp-ykXTCNvVfNRw1SZAVFDV6yuF5SVixkL8i2PVZXMpcwf5WGZJh8DYV9KZixEQk5BgHYGxQ5KpTQdsj0UE2vQ8se_J6ZCcfWDxoVL9aIQHnTYUIsD3xkqPG2Lh16DJPK3QND8u1bEN-dhAwLP6VxzJhyqF8sYeA814ZF4L4urGr-Y6AQla7K8aLbOJKKWVfVnUg5VWS7EAmqoN_E6eZfZS04l7ZWKL8OdN7MNb_MnbgUN2KthFgd_5ZMRJtxqt3c8JIcfjUSRwAmQatH7eLowvOT2Tcl4I0oRm80IxONmGPvjGW_NL5cgkkp5vuRfdezT783q28NSZrB9-UFKA32zjJQk7JCRFJ4SqPSag2vxh_kcqejERk7gfAVmrk9Iq9c7w39vqpurXDIjW1yRoM0b4hKruYb0l0pZyNEAwGotiCsAUJ0IHqjYpcSjL96D2qzNxhPE5tplU8zYW9d6c5UuuzGnwBjINwoKHdjBRqBdo8Nx2AAjszo2hfPnlC7qPGMGdWEZn7NX-w0nHn9Y5KsZ_MzaO-hOM8AZ4eBOL8QdVkRsumHXhApqudhTmrrcX9kuKcGlW_3Aa-x569SgRURObquVLdLU5WA_mKhs0__zIliBuNm8VL5bqVTNBW3ucgla3Fgw11H7eC67EJIrBbj2j8lAHcorwT5SIA1_3giLhwP-rFcpkaKXgIKzc0Zl97c8YkZ1PZoyePzHClDNk8aott6gAOH8yfKO4twxNVs9ysIJLy3NDbTLvc28PcypPjOJHNudsR6LNueZLNSwaNzn8cPuwO_O8qxRdMY34Xhofe4_zSwfxq00zPOfL0w3-fbgTqc0EZyFsvBEE0sixgk7blimrUvXu5ZNM313zFplsb0rUhPv_rlhEIju3DbhQUxvkvC6N6hwlxbIgZZwCl2BwhZIdBIGxrtko_1w-0reRIDRvxVW1WfL88c1HWmBRiOE-mKt-NRjvM7bSViEEFKxUNdJ3jhPrgzAWEaepeepK8GyAnsmgKlx5RM8EdsT0gktJV5C_FfArGhDWnJgC42TvJnSmKbe7NtYgk4bBLxm1Jjdr7whbrrTb3bVkCu7UihsWbm3lXscl5ULrVIAElcJswUp-u3tthk5iiyR4_4Jc7k9P5NzrcLwJm7opzwCBvZBwYYVeZBoLkXY5wx2ndoV0rxKmzW-GRXZgArvHtncw21cV2Pl94zAu3CMveXC4zFZ79CJEF2KbVrWYnto_MTBEiNBxISs_dGyQNFXcIUbYECAIXzl9Hjq-NLDVasHGMSRq00eBm9GxUc39jspex8JgasRWSwzOoJ4WKruycgCPvbbnvKkIGJZvg36_nHTsHlCIq8I9F7n5we8eOzIZVlP1BCpokbNHNbVj0SMsxhmAjoorVnTdI-cRe5wYfr658-aMJ1NjO5c0BXYOGo0eabCKmMhLmzZhW3iHmcdjQCD40DK-4EQR0a8liY7z7v3CozFvQWjLejB283KDTLgdcpOiSrFi_bpTqwRprNGGBKaYRzkMfh9VbVmQhFk7DgT2tZVGFCUm4WYyfNlP1LMr_OCa-b97h5rXBg6MgosKcdjiLRDHXZrjjLIe4igB4z_Y2pul6_KAl9WhJ5QlkOcGEtXQsZ6Rw55zZcSyvIBzxpnoWrNIeVJw4BqwuwztxNVEDHE7My5A0yVgsEFanXzpeo17qLE0aEetxlbziqgKoZd6Uuc8ZdVFTkZygBgO0zWM1slZRlPO5wtv3oIvdo_Mwq1CvakXTLVwTIqZJ2tyekdwe9icNv18B2ZQU6KLHQGr0HlM3PVE-wUPA10uUWKtl8V3bPX2o6GrTqV9IoP6zWApexiD2AgS9qap3t238OM70Bwh9hAIpV16KwEmBQU1EqOTRwPYhFoWhcDnzsi8CJHNXFPQhJspxjZzKIjlxBySrY9f6NJwVpqripwvk3FY5TWPoKMVIdM1TMSUi75ysMxumdXJOIN0XllhS-gTd-sky_5Gv1-f9Je5CPihetdZSt-J97T0KUVJNszJoFG0dpftQBIgaMxi6MdtBufK2827PrG5q6hgcjqmhLGmlPmAf9XNjL4ZAgwiV5wekSmPwk9bq0h_bwLwMUcFTOMj8UYN1OiB8JMmqi88rU4UudZFmW7JptuDnp64JvYMkv_KaKzR3JNFXXbZE7_7rkl-T-eC0CdcI9kmQffVa6QcFXwV-MnpW98qVgpz_iMK5ApP-lYe7QB8BUHlBJUReo3OpsnD5s-XS-RPjIetWWATIuIZqgo2Krdn6Np5wIZSQ7-ibpIzukOQXW30RIb_2Y2h2Ik_qD_PuuyV-lv41Nsi-UiSbkBeiZCPsC8mWD6mS-3omV-rZxcBZ46jmadeODepu0DX6xbBx_y9hGAKIL83bncdCjsNufL9BhtBaKOxI9jbOtAI1cRBECQKARboImbjoAv_pgtmZMsgba8FiCqSOgalSLs24qrbseeKECaU8tr6VAeoLfoaB6O-piy_dEnkln9K0xAdWoGKYvUxnenY9geO_R5yF_L4ThILVxEZq8IlUFDgLbxAzC3qi3SJtc4GlKMtbeWs7oZ61SvFq9g47W2J8tU75UEyQvyoLPG69lIKPeKmLN3_qhMDGeJBdK3JLLH6d_3D9qkYA9kvhcC9qzKN0_VrnYmfFLK2_cFHiECdu0UX72LC4g4jrEwO_mIwUnTLULyLjWhflTqnt4p872z_Yii6QBeeyWBK0UOAEjhbJkJfzQPR3WEDh3dEwq3Ya3wN31c9ifS2c6NHL8M_jTpFVgzh8c3fCUSNP7dMGeKK7h7doFkiuyK31S8F0lbBAcD3_bjs1-TBnzZLKLstGhusON0-bbs7VBrn74rOzzY86-5WjFdv1P2F45Trgi8rCDBL0aH_fHf5zq2cMWBlS8zDkrXaFLqp6qBUte3zzWIXx2QIrbiSvYtgR4Bdl7tDSHH3NBT0WV4oDkpKa_gH5irI8jSocMVlyyMyoUImelU7oeE08SURRxjFVlZe7C_GJh9yTlqrN2JCjJkYBdOuKDqQUH9Px-qSaRzMGS7f38-jAvgg_zlQk4JLMXQgb5FLciOG62mcp1L1O7kYrF_v_a3SSl7ICqIONHXZKKG5n3FNl-907Tk9s7gWDIlzvLJ7d95AgPJyhB8mozqPtOBnpGHfPFMxm_3CxgVZTgIrvpaZR5PIVNYdgfUxZIOX69vQ0xyIfJukSOHKOtq556sy5kxY_gFTAA14WrXjx8Zj8nFsu_BwqQwAqgK5a_dJZzUrZ5JzmR7tkDLdG8DeNHTkm1asKDfatj3JOvaacnlI2eGVsbGPa0sbYFE3eEhNp6uYY4OFJGJJ1PDQPCDM13_V8yqyS6fYaZTq1NI-zvR_lqdgZKOIANDQ1rrBiVpDyn7IkoO2VFNisKKY3iW1Gj7KH6llBIL0bBBrvuNIVAr3BiDvY8hFW6IGAg5sSUHTNXNLkCrCyN0l3CsI6CpJOVjV1nw0uRDl_E992UfxyXwbWBTcc7QFLAUCYftiBr0hMtKNcJOpMV08YRoW3hQO74SqzrcJOqjEzNJECIp-95z_GadbF5OOqy6MInBK1vATm2EWLr2HDA8pwZpnthqkuVJgDsLKabEOfB8KG03jumD_d1Rzzt3EY5bFwNkB7GJNHaPGPaiOZNUmSVU_REh62zd76K3q-kLuk84u0a6Fsdfewk9ylpNMo0bOo1NoOWAKmAzd4ps16piXrbGdabby4zodldMd4KbLtOfuqVQ2lfsAVrBXUHb1LRUh_CXkoUnVKXZE5R1CV7KiRyPB96rsybLy__o896sDmym1AdAX5pHitS4JP3T21txNcOtsqa35caHOJPu2CUCA3MqNmeQChBGeZbQKNcx4k1evqUTQY9ICFspA8e5SsC-saw6s648Fp3tSlmkuj6UG-UzlAhSld7hbN2WCKGVWq0iPt8ryhC55Pw8ahYE949TtKpg6BIk0UQGYRRLK7R1V9wCcuN7LEJLXurWhU_wD6aThZhs9OymMEVRXlxa2yaJt7p8QXz-3jOzjRUcdCIx0Uxwb8VLcX1vkoLAiSphGNLB-MRDswROgpe9-Rx1yuP4XKmjncR-pc8RJmBTzw_7Y9aDB1Wh-ooWz7Fz3fUMbBT6Qftq9LVcUbCQg3eJSnWcdnOhj49hLbrC1RYmoeyMVnkQJxr9phbRzbXLCAUzAz1ZWSvY5kDOFjWPTBnf8jtpo5mPys6frS2LYMLmoNCYnwvUhNZCPFUY79OaDiWxcxwoMWwB-_IT0DoFDGZaiR0k6d00bncBZSn7pfDYGpeGgmfIAD9ltyOhfQU5fRcv-HNt04UPuEQ7Lu1mDsR1JG9AuFsmxbPQfF1JKPw7ceU0RBGDXl2F2M4JLHO9R459MDopimnqUMEj8sPmObBLMWaxuFhBIqfa8hO7Ci_sxX8pGeC9K7g7SwLHbymH2AcoD96mekIfxpwNGK_IE99KDA4MOJGgtp_hsaMN0o4OR9605eTwV8u7RIc-UsolWn4cCXnB6DbtS8UNP8YR2oi-hU7M19071xpepml-2FFzqiFf-0EY3IUnU52WpA1vTOxhFoDjMq91Iqf-HEZtZ0J88y4B7vhemLQIZP8WrzTixjYWXGa7HB7yHgkPtC8jmrdPycqu8kIc6XRe7_TV2xOVjoSZc1o-j6J2F7nWO01L0PBN5RHvNqeSBUcGarmqcLquDRMqHuRlCG6mJpd_PnpKn94KXNxULAHCMaDIlvutmnqKxs1wj9GYOq6-5xZSR72t8OiITwzgMmoEJyttng6jd3blTmMlF6g5pv0ZIERGwYYbH5rMYxKU9bk5WgZHQk73W9FbN2Gp4ZP0ug3tcwGhmnKwz7fCw1to8FS-ii5Yqurf0tBxVr0EZwFos36iCAlriuG_JQGW3hMDS2XUJ_kuM3NJIHYUHpluMaSR448EWwin69_6jxS1DnLINJSkhtQPgyPF2FtCoHBYY_ijJSgF5U-AjRJCrj4j7T_E8ggPRAKphOipe1I_ntHhl3JlSO_EJ7h5LlBgO9m3auDSRPgW309yM2AB0yFUEU4QBV56Z5kFG8OuctvJAupXABUm0Ucuu8cKJ8SSRWrnCEi8YRaERYnnsSRQoPptg3ktX1eKsi1GKOe7f_SijYOQdgEKqdd76IhWGb5SFKU5NC-jn9ATPRIAr2s3rAjQM2itHroefcEqVc2PXKTFPMPx9sVO6Rr1qGia5JoyLunAKrFcLdsqJcf8vv3og5SPQTAwbXJEDgganPW4RtCDIhiOnvxG1Av3Ht2UnUCO6kweoLTNnEYj_WmkczcJNfg_CKK4HsLhTCIkx8MjFMgGe7sIiT15Uqq129Y3BqPJ0dnaMS9sKABXqMBeRkEVvHNIlCNLVhz9a9s_K953lYy65pTqy4RN5mVEIIZpZV0YnK5twCLpuEHqBIKtvoxOFmXFi0-YupJI49aPqpjDUGmz_8sNMkFfjniI4eVb1shVTMTO1k8CVqZV1OTanknhu2C7-nD8HkOgY9HcZfEPJfb2mVovL1dEtAu8wvLe80AwEwYfhZ35hWKEa78xA87TE_pVDjvpS3oNOivj8sIZk6MQA2RHaHsQytP8NvgZHuOp305g8OwMOi7cdB4M5CcfZ85LxSkCoBJgrGnGvhm53i6TdVopm5zAc_w_BiE7UN2xpok3x_Rr7yuwCJnD68kuxNkSRPVhPdYc29y4-HdIQJIU_zrsSFuYfXbUgkHvm6tz_dqgX_KNPvv5muf_O1hk09DIBuKBWLGlmvIGciTO7XDFRswUuRxfXnzP_fvwMqJWbIIGc6MeP7LyPMzDbtUJdHPdC7As2WbXSYVonZOyFJErUx9lo3hRyslK4YqCFCbhBQFJuewYvgr2QUPmeBzZNgomn7BKktKyumKAZwaT1mOBfl920bAFi4cQbN9faeUIQWnRzrzNpnjEgOm5_16bEwfxX2Y1jiR5tArn8-VtFODD8Y3YIk_MS4z2XVVhnLWN3co9uP2ntkDCl7Wyn92BkKdRHtyKVRwpXG7FxKZlXq-OWlQKrHE2IY4bTeqhrfIpwfr_h-Q7VMM1rj4UnBxkbSQdWHB0kohQXM1FfEU3jT-P2GQXMrAyyz7MS4WbW6YM3DFyuQUQ2lLHhOT2vZ9zadQMGdQSn4huFKoDGnPeF6P3BElqDRneSBb36X3Fabzz7Y0XY_WBURFZfDvNOCrSAbxA9QAcrOrlE1BWGEypNehLZnv5KWu9GZfeVh6dc5WVof1k7v6SpC1leUKpFsTJym8UtkfQPH9hit-VRFy4mf1GWwJBmpODC70TTD-qCdb28AJxbf3-tEchO4F32WXgNwLgWoLbea5PD7T99RsEMVatWPQM926Y8HHTr8ufY5A6NBl49Mt2RVSEDaY3AN1uk4c6SiXZ4U9DWD8HslReIH5W7RRftdVQmcnQi2UpVV0SLPHGYVS2yUvPW7aBFlO9hY5ds-oMOQlP-Uu3zhoKCP0leBXqemwLwz1t1I0N20b6qUkPndTA-LaunurX67TNiEvMfKVKZNDk9biadMLAFY55mj5To5oprmPwfkvDhZ3paDrI5wyN7KZHKqN1xW4l2Lu7Qy_YLwdBD9nwuEb17jruBBryyeZVpy75FiANCzeew-CIV8WljMnDetXmDTu-zgdUuVLPeYc1s97UI00IXuPIN1hKa1DiwxJhng6yhm0fXTXuq6_-VOEm8THr1p8bPVjZe_7T8oKbBFyxLoko8YWPqAeM8cNru4AWo4XT-6W3LCp3SfOQt3oCvkN8C9SLHw3dYm3gAjlYPUhe28e-YYldnmTFEkR1Kd-CHXWbkcTWbqLMYaoAhtNS1NV1qkKKVndN_9s2EoTdYXfXh0CVzTlSkmAleliDI_nrCnbY6Jrb_ybVZ4uammqivhJ8dbQSL4hcpQ202bPlY_ZBRjz1XI0tleTnDpEQQS70bYv0nCwRVH0lpfP8P_RUQQohrjqCStfEYkNtJ34kmWCrW3KSSloodTE5hIYtDJdcfIdj_1ijDp-6QfLC8SCerA3GTU36MJ7rJmbZzfEg46LhgSW2D9Mu8qgHEp_P0Z0O6HAbDQmGxhGwUXchd4HXi4ODFeVPKr2FKDagZ_TSS3quhahcb1KbugRcQMePHujSa1LqU-ljav6Ei9vXFz-bElXYCB_QtIiQu1gpWw-rIlq_s42uLP5X-k668czlat-0BdoS6wHfsxqMVWnfldvFawGZO7-sBSXW817u8JO_MeQu_lpTtSfHL5qWo3kMzmTXxHvtoAg7Fp1LIWpNWGfER7s6BFJtaPnNRETxlaoIv-qXnEeKOkc5Tswvv-ApYu25h_d9UQ0EVeqHI-i8mAHsT3X0vuU2tGmuJ9zrl-WzqWT7-DzSfNj31DMuadE7avPraPQF471nnaAN986rNgX5HjHU-ioBqoBthBVMCSvLcMN5Yuuq5J0vx7RFqNsY312eAYLI-ozPKPqtG-BKyLfiBdtCoYGtyTLeeQM5I0jxdcF8NFwFIaeklpLEjCwjLv8Mc7QDpDMWxliVPn_YZ7ezA-d7OquQVJG5rB0qVuMqPldnjTf-N_jtyR66eyB7gdo4U3O1uvdzk15z8pWwBICLfq__CcAfbgsIJvdqhD4ZehFUcxAjPCNBmVJwrN7jBcGShfkZHcujVr-V5jCWHz8QfsVQrTw2nj2EWFbjJtUMTP7aTNOa2oSng0KwOwDaLexJeug26T4w5GmlW_rv72cF_dlssXLTPcdrvqGM8rvlkVSZimnLWjVBjT81JJNtxZMatgrlx_zG2NJ1aAV0mO-GfgQ1tiel-y6rf6tz6IGBq-whExtiOwjInUvvPnbl06t8Djd7yLN8sjhTfn4z4VMHC6ciCURl7HmDRgYSL0s5WcUC88rq_94rPJvnGgJpacbh0pzPCP0D3vMvN2kj7VZcETPo3sLJZCOlH7D04NJ12blyOVsm8HVVy0IesjxIEitJmEM7SAoPkHX1FErjvbQMtXsaSn4ve2PQseJWtWd8I8cR6-OrF5AUi1_uTv5oerJI3KgQ4ziiHgXFWrgGv8IZ3S_2RsGLTfg8WG8qtgIvEJWaewozOVrWnTW3VQH3RDbQXQaYQ0Cy3EBOPz7wgd_huch2tV1ykmrA6IK6KZrAk3_AJ3xjKqhqA1eYfPaDQeSAblBj4KbCG0XGi4U-qC-YEa6qBmGvoxtfd7IvmPYdcIFa5Jc-BvDV3DfBQckvG7OOtQIVskVAbR0jEKL-IkUawHaekCcuHhsYZgodVRvAlSUSh0pwusjQ_arOUcnYjHCBbktwV17ZcVfuqD_qCfsTec0BZKI-_VP-JG_MyIxNCeUP7nQPbyoE83sxn3mCUI_jI7iAoTaohGUc7zm1jsGh1jwr7jrCCDt8seFrn7R4fi5OypzGyVagVWLCki4UXFVBeT6brqJ3ejqkg7X2Ycp7gJKeuHUV1lvM2E5B1Q5JAXdJyzlGPDe464z0b53_oKKsTn5jj4bOr8w8m8sslFatOidSo14Fg1TL-CbupLwElCRH0U6IIFrCBo7JewKC-CqA0zXOQCFwrlyfky9po2wZ6mkDGDFIRLSNOpxqFBh1fRMR2fsJ3clCAn1iVVeuOJvrfKEHPS_z3ZpBVJ1NlS3OzovKT4bbFu98QvBH2ecm2PQwE9iABLpjJlXAgThv8HObdWyrAfaZRCD8p6P2PIbm3k6GpW6zwhh8xuX7fIpvqJEBm3Znp4ZqFareocuNIj9yfpQ8t8SrL6B-MPXUDvVtXV_nEEToJRF0KkYZL_sa_hC23MPl_enAhFsxvK0GV6LOnwSZuO46A_68ima-0n49zlQYf_OEbgf6N-S1X8cicRruMV7s3P4L93ALm-31wd6NuxY3UNFOx01HuGrI0kgd1I3ZhbjhthRPsadfSMih9yV9i9jDKEAYGpbnF9dlx7r62DB45sTrX4ETZb2XwrX6vDkhX2TT4xgP4nXkvDpFcPWEIWNCF_c57llT3Eysg6kVHLidKY1LI4oysJfYNkOVV2SFnJh1WFqHetVa7cbnnlexr872WGCqaVg4ZYyuIPqusOrzwMT33S29jOq5M-QQlB-UAi66vDf_dleW7Ufe0D4padFsufB_ev8cBhQZaz78sUwc6R7vWZLzg5t8I7txMiPES48t0SoKcyo-3Iqr6hzmlEGEBFH_8B_Zr8884fX1DPScmTGWJbr3azBFyGe5h5t1KvtMmeHpGZKk9Sp-fwuyC8deNZcy9f8db7si9lsA2h57vEXow0OIE4aAdtl0EAWJ9DA4Rs1JVBJESl1_v7GWHMY9oFZdG0O45ZEV_0mh12G1wKxwChHJV0Tj-hiUpAMi1E80TCFdr935OCNn6nwYo11ryPuq6WPoRPdWTxkVkyxrzzTW7ch1YyXXndMIOQlIXhl2wjKzhjG3XelpK4IJt_LWW1hQPxoN_xjg6WvSnONHdyDqodA_BuoxWG1WntbzotB8EMWwQhHP8ajzsp7d-sSEZArLalZ4QGDZ8gICUF2I4WmWacdmrqOPbH1LssMI7Y9_59jlCervhB1jr8fAlP1wZZc9QGEVWelr85blXJikLHIYgEnutpIZQer_9R4D7pdpSJT1jtd1YKKENOMW7tV7o4jSiyI5XAtPxo8-K1CEnMlkAcQU4WTZjItfB5di-AEr8E2eztBTeS6HCd2s_bUEzE53jyAJjbOyRuUq_iky1q371XgWkhMlerjLcszpK7ET0jjQGH2oU-mkAvyZLXj2QK76CY0yZ59yDCqoog2aj8I4anyQGIvPEcj9H4f_bncZN7livLCmQ8VswqnmMo-kpEEaltppT4410qXhykQZ3U4PcKEcA9l-XEjt31wcF94dZQlGdFb8S1zelcAvWJfeOxnlmVINLwl1td6KKzCYW24_Sj39eDUpnyQ-JyMth54Ck-zYPs29vXDQHL19IYtzYhJGuGVmNgClj9Sew4nHhdLfI434PlsKJEluWJctR3oRocUXrspngvsr-A5yZsju0sd1ZkoEHkEpUxF4dpy1m3tvULK-3Hf8HT-eGiLf_j1XoogLDCEAGfhpWQHrKvowH-QnaNY7NIFJDEhEzaD0_eZUjAcnjAM6Nd69E2SoYvFMefX9Lc_1mUJNQkHDx23H7H5lrrX6XtXqIs3hhjkcp7vCiduGCvjO_VVuRuNuQAME3gw92VQWSpjvjPAUo7F07yr18rbSL9_2Pl9c5EfueyHHNWkrfU4gM9iu6j8U0_bzsu45OMkC03TFvfH-WRBePorThIwFiV2Iz3dVTK7dL43SixJqA_bo8OqSSz6nTs0nCopo7yRjOhEYPjWOHa5_pe0FDlQxUxnxBoPICpJCFRT3EgdZ87mwXUuiHjdIVR9v4NIl6b6ymp9QjiRgKfLdcpZlbrwMB6kXpE_o5j2fep3LeOxmMienKh0GrK1KIa8Ih1xh1aHZLAfxWVkr23MNyMnr9LrEV8u-qGyavn9JqsDtk9OFQSt1GCrUUEmmkxx9IsN8TBtHOvMUq0ku4xu5_EJ_A8OzAXIUChJOHiI76CCd_rRbdq-zH6FzV05NU2-70v8smm0oUXnOzqH_NYckAvf4XYN4tln2SwboxDTct1CfON0g7v1pqeXjjFKtbJ54-fC3_rhPECPosAzV454ZhHv4W5xoir3EQpmWE7qxTQnUzIPwvdGqtkOp1FX6AAxR5Hlh4HazhI79CCBzpFkqu7X4E8yPW5j2_a7pxUIB_8WVOh7tCdGFTTtyRrthP48FL9-yEeKqKuFwAlZu2KjXhB4UHxEgeM1qfgO7Z28gU5JVOodWMF-f8h4-4PmXXXob4hrUfpfwPjvchqrEInOJHDs-85oeVXih_XqsjEzSo3J3O-vqUyCx91J77JChMIhzJdhojRB3G3KcfMjPnyx8OOAAFlRzanKJpB3CMnLWe-BANmDhP88sTsauFIxwBn6hAJ-XCvKPgi052r42LSPhZgUhRKQKDJhrVgKvXmBCD5P0JPoMeTI8F0UjwM9gzqj8xnTQSmfRm7xLkBBRKy4kBWYqjdt-Dw108vDiaRzjoxJqcBiDooFBB0HcFIUVqY-dMIRbOWZtVsVItzw84XPdtfppQGFyvijfe0L6WbcvA_axpDtCaWp1a-2PaX6zqeyHXEKZCaagVoqLACSyTqXtlInKlLmriNc2dwDP9Yr-QaEB2F4B8JxSFhnMsDwYSZ5djv8nLCq6e6_7pUbdQxOKB3i-k8jYdmT1it_W_V-dMf7RvbNGzbhrY9M9ko5mbw0J5GGCo50SFkC-dLB2IJT3qHPIugtEDKG2TqnHoKYuI-OIhCU3FqfUazBIvCcg3AMPpsfYML-Cxn7yP8Q9m5kDV3jUy0bAsRJY1Xzt4yVHiVOcQL8hryIuWzAEaeOvG4r-H6a5xbt90RfDrOlgCy2-L-SDfDlv_aUjyeRxn1IrJgmUcRs6Oa2E-3M7FXo-CEeFQ63VfjhLQfsei_IJfqBfpXlnlBQiufJ8yEJTVf2dp0kRzaHMYIfoIWDXqQfJgeM5JAPLwDtSbFEHtLXMXekaIh1AeX-WAU0v_KoDH6Uo_C0yk3u5psdkB1fr528x9LCCttU5whsmtq4PCBjp3-uCHIectUi0V8Zd8z87N0PgF3jzZva8R43U2Njt-_u6VimCY6Q8GAr94ZK08vzxFHZpYDXKhQJR9dyvoNRVgxYuqd73-SOQdxskJYGw3e5T2B8wGF97CfBZEockjpaH14VE9q0k6zXDFfGaxwvmrMhu96ypZoiGxP-n62SRkAxUe0dq7mJvJrsRRP-EJ8vG6qSlpUjp0el1UeRk-5YTYgKsCoJnx2ClEc4Xw9OOmoQjAS9duty2zPBiGhpjDb_vVz7TsiekBcrVqaKvDjyKeV4LLFFT7XfuuosqxSJVyiC1h_x6HfuZ288He7SU-s1zzHlaAeHirlaFp4FrJ93nn9ciYG0RNMJGb-yvRSNtJT1tOCa7vMU1Bt01US8haFL6BIiikGLz9eqGcJqMgWhpsQGlT17jzxNCmj85o7NDt-jzyXOfhwkxpC6JA7HWXI17HXkVQutYOIJIgX-Q1xeRvQHZ6iKQg2AYk9rNNIykxKurRzg882jIQ91S0zYIaeVzaoLJvXR6XduYvaK9-GCwvYLaMUl_8afN3IBnNICy21bp-FRm7tpOkmWY7IJNjU11rlLz8M6zL5MwGo6SZqX56wbzbJbnSc400a0O0uVZeDmtvy-yjsHjlhGiF8eExZhbLLkiM38BtU8xlF1oO9q2wlnzdcwZsckj1UqleukJqixX2BJcG472qnURSaPeCgD2WBFFq25cfCgyzedVAHTmuge0L5Krv8ScslumkvA7tiBKRJYLe0ECv2uxPRtWCygZDb0Vf2OT6AbrjiYCiRIZ2XyDAmhDG1c16xMfDIKtc0U8zEbPuhs-9gHv-iFVdgx5Cpq5gkEuTQcfb38t0fUmaqn5XXAnxF5qG2Wq1Vd5pZ4wgerw6GKXEm7fyZ1uDPpWdlhJwKwBPRsWRzs2aQuuPC4VySxdYwZlMTFJp3I1e5ix90SMOmEuWQRea42ftvh-uAIZ-nC1TieHKNBMrP-EfgfmqOSL2DfA2Y6Im-5Mf3OiYh8hsHwEMuYnthvVbbQ7EJFCXvJK7gEXxOlfPvkRAt9lwA74JPsxs_rHWUsVUXEpOJtYPYWYb3cLD5cjVczMjtfvMA62qAS5fOMyUcM3BSQOaUmDMtNA8RAqtxT05MUSOXyEcM_LgtV4TXhnNJFm7qN2pdjRVZnzl5b-4bToZuZOEgD0wuJy0-887_8Srr1ZqbzjQ7FJ3xGFO_covwsZDVxWHO0jpcbDQAYZbZfQtGLlKWco2CXOUanrNJ4MdMJK62y5KBxLZnhTALZxQvET5M7pHR0I3f0IlNYM8E_uZfDSgaTK8zg0UHF1SkR8_zG07rTAA--Zn1a7TsyFSvFyTWRijcEk4yGoip_refWnG3JuzyjEeu-u_W6TPYY1upYdIRluqHVEGXIMIIBrUHikOOvpbrKGIcTKUSQRl6ih_uOgRnZF-KHEJEmTUl0YxOT7M9_fIVflqERso-sgcH7gwQJG66Cx9et5ys3I8-1v4zZd6xtgqCvkNZo5ZrbqUUWo_K9gk2DdF4fhZrDtoC0k_EP-j7_43AXMSJaS3fZDnuvOb6_VwivBU2UDKbKArdnads1AvHDiBDR1kjSXpuZn3EEoG40YjmEcxdn5-Z-41ffsH5o-mT3n0c3bwxbpco3-86N-46q54b95gpFdItL0dFn_oRUM4TowHsSsozY-rTB423tVrxAbiUS42nO5PuIGuSaFZRZe2RRAIT94JynlCccyJsdKVNSvpDOJs0qNEoE1PzeHxibNIPs3jLPslEpG9I3sTFCw16zs2JJmFav4eWAKRuAPmqDoP3aq-UgVh4K_YjRjq7wJ3TxQdzluY27NuEonjCDAdwLxxINGHZdKpYeCEQsbrr2GQ3XnlPlamStwfEIuEqNeIVpLrKKdVHyuhy_P5NYC2ng3qdxyQZcjaz-0WJoSC1gGJQHMCixSbZuWIHQ7LMct607PStxRm2iotpx2memQ2tVLqPrJ3imqvdbDXNXvCPOHWa2RsL_UB8XLRla7ao7FNRb3g9WDktvwN067tRBeESr02PAzjtjjCYbVBN00eWAi5YbdcM3sT39c0AtJcr2NQAajk6pst9PVmFjeYpONtW8BSA74FDogt1AjiroZBvygbrlqWSpjAXku_-AEj5NPw6b_vILnY9uA7Ok3JzLMxoYhDSOzRnb8uMnBiAw4PhaKX3U3v0ijTOpD49KE2gL5VUUwllg_hatpies7u7vplXlkPLMFyEyBIYLhksOgf4OU8rbBUDsPerihUSnkftwKQgctkiXc7bdnKLvRpAqglk-0A-5g9p8r4SPY4c6KRuYBlO3MH_rnAhGw92frj1K7dmnC7GGGwxUNbBjaNMZcGMYsXN2VlFDM6VZQnhpMp0NV-93CgKEQiJRVilV0E32I1aNUi__Iw-KT_Q91yur1INPhesNX8lk-kuN1uTKjY7pB0XM2O4_AxxLV060p411dx0Y5KxEEavbuXJ6rI4dlG8NF-T2m2NO4ct-fOe0jYHocQkFsXunFRRlh3EYzZdisA0MNj5Hkmr8VGwYdbJPU5i0OOZJybcgfYlFh7aSeB2Aqueik-N5QDf__fECbqK8akSMx3vWudq-OsInOPTOn9AXeVQnrSiKhiE2KEkqr6iyUBHVx_o5fsmrhKdjEgy9lmKMgzyweYpLcYuw71ZCh4hVERNzVYSVzJvrAZfuv7T7RAR9FTXpS7Yxzyr3bNqTqVfHtAArFPzZmY8lkg4g2KeZ6xHsQewPFNzc_Ru14n65h-oBOjrZR-sainbVM2hC2JOE3AbzL1lKW5EEAWXWtqA9Z3-0Y8VDAiXe4QxjGYyOGPL9XBY1mcmjqUEkUJ7OgNUyp8rsxyM8vqJh1XW1MiKqa6TidEik51M4YBBM0hWt6C8uZ94p9nHpq6zK6TPrO7fvlQlbZh4RmRHHNUHzVZ_LIkp5DblfNykN6tO0MOUCdCwqdaGbywZUtr3a2MFI71guJ0kYN7Wz6DXxZ16mVjzCoQJikjJlHIDkGoKhRj4_S5L8RZkS03JjkKnbyzWj7e4M1fCkSvLmN8taIG2p3KxKMV9PmQWOuieIPz2BO-6pH1-ZVeSBjNCvuxLh4zZM_0Xbjgagi7Ncsm3EaW26y5euSBvu_WsaoKiOsSd7AmcovU_kqhVzRco82jjzIr9n_9hW2U1m98Ljthpeo5ulO77UFCt712oEaXU4OsD4kShodj7ETiN1kOWZ8nN-16lFjOyNnUc2Jm5SECJCDS2TWODWxz-49JMpqmwNbiT38Ibw_odQmAeLZ5LS6kHyY5VIcB1OVn2QbxOCYfir4pJDzBuNFyqfElcRThqNtpdrtFsDmlSlUJSjj85iaXP8uhJsqnGKeHVB1bS1fg5ICBnw9z1xpINq9WDUNQa3BqJ25ZELZpRZsH4PSiAXmB7ddGhRu4g7QUHW8R1aoC6BbGtBcqqMXhzoUgqXV3CsjZ_5HLI0KLGt4SLVKF9WPncOlfxBmXlgyCt0piCPnXnEsXHRrUSUrfUiqKQ2voKWaZB-46xAGrW6hv30j-YwMTGNnnIyQOUzBA2q1SQv7VQOF9zm3aWRTaRngK2DlOQPMD15vp8rxJqsfFbTPktoiLUYKdzGnbNiPQs_ZsrH6MP3UOE66EeQlSvO0UbzB2n0qHEia1w3Er7Wg1MvQIrQ4ahguzbqPiabBCmSidEGZ6MQPEDv-7zodAfyl_9YkRc5bFnyc7PKnvGgcCqo7l4THJQmSTxvZ9xo1--1qDp2nlU4A33ffNBN0os8iCKUDKgvHaT6-tMGVf-BN2IsRPr358zWPxukA3fgGPxACeoGRBPWrLB5y1Fzh5Y6n6CE0Ul89Nacj94_VAX3rY2BZvOCAbFGzm7i-afGy9hfUnWE9zfQXWqoHVafsZB1xbxicTMN2_NHlhwJYlSDZdN28uDh2v9rPYa91P8KxQ1LAM7IIZeiGr_snpHfR2RPMKHOZthEnmtk3HQwnyrzn0JPCwsmBfH2sNwkZ0fB4vyHjef247q-nXSTkXSNT0EMw_o2t6GR-8BByv11AOLfHN9GEV11K2mXiJyQriNBN318zojlF4r6cAKXYV8HmH-AYbzkWSQIxJib9JLA7AkaeryyHifg4S6FnbPy5IC57nX5zm7nBh4yH0D8hDdcJ8KoN9Oq28AkugUEmWv74wdm5T0-fwO5Mo1PA9Bodjf4d-7FhMfUQVAFFNuNV9aB8INFL3c464wejJgv7MCzvI2QJFCthueaj2G35NlE_JztBBa3RX4IUPbTlxueWrAyTopB63gM-jgFaR2-o90SBRJFzjYj495gUVLHvqkp_Hr4S8Of84BFdlUkX5YRFnynF8Y13S_xN1BP4PJGB2uOYsLNDEpezLroC0SzVFqm-GkCFPHhKR5BuMvKr6KUG3VlhJ6W2uT6SLFe1s_t5jK9kt4gdADAKNSXC2W5L89NVO_sUiVIIVo0g7m5EYK6t0ybT0dN-9l2Mp0U6jyzreJfhQnCsWz3wamQBdBD2UaODL_kd0vLPcFk8QpWG336jqlSTKX_D_zhq6Ddc8hL0YwDb2W6aSZ-MEfVYrBxSkXqEATl4RPT5g4L6hvonUZxgn_0-LphsDb3Oh0GxITBRquHhLCXp40Rm2m6ARxXpYe1chxAfw0r2QXZLQOS_bsfFKvIXdZSXv3ey3QpyNvEUcyl6wgghqfl8W1LOF9uoerJWpDBsARSLwlR63GDU_E-qwvfW_sOMokBGwCWbwPWYVd3s8ZveRyttFWV4NvlKg6qEjVZKJ8Dhk5kTAykOJPl2I8_DKsjP_e2vbdF0JShe77ROWucAfZYytpEUxrh50J_xzMHYJFfXRoFckVaZd30LwqME3fb98KPV-la0cTPoqJ2z7ud_2bzJEe9CSHIgn6gRqJakdZE8YtaKc053KpA3W4KAk-0BU2Z8MXW9dUqI9ZBHFir3Mq_E_-dihoiUaKuRs-eawT_wN_2iMTrnGB07WpZ-77zMW6fGe2cwpJXlxgmwiUmtIJBksXoIgUepnTutNC2od72BI0rQqCrKzHGp6rNFXp9Cha0Nb4yaIHCv7kZlJYyu1t4EEEA8VFb4vx_6tNdhrXM4PcFA8e-o8OwHCecQkvE3jEuL1SPimxEJYYMepEeoLicKibwe-hg7Pgd67QmlefY1t59oT9TcrMKHT6p2YcTf-2GEvOyvJJeMebtBI4jVI9DDignC19G3sBRy5cZ2yb2YSolcGtzNougC009epYIrx__CajK_BxVGsxO5-g5itQfVfqO81fVFhJ6ybUDeMtgiYzYKfcJaFFBTsJ-FequT2um1WrXGR0AclUsgfJxdBCFvwrV8ryD-IdqD3OVpdKmBkrgNiHGJUtgVw-tMY4anWMMwAAzHplNF0bGcMHRs-vABpYFWXue1FTjCJXm68NxIPEiDZzEzOzTk5sN8M-eJfmPmerqAJa4yU5LJjdIEBabDyeylcPB3aoLPOBwupJP3-X6AG6AC4gCVE6atTXmDmpcUWK4YH3t0iPkdWqEdgIDyHG378l4KACwS6-KqdpipBxKFn7-8PENJkgYVeX09-sujziY4uhJcRb4sapKjQDcFSVPcp6R5QWt11-b2Sp1r17A50LFz9DLgYUH6fdc2xELI5Sp7rTO91kyAko9IYHOpOdvpB1ux2FeFGl_5sSj4ZlfP7gYkp-S0Ty4tiLstRvdYaC9r7z0bO05lb0ZWzyYQZDO5d6Uw70OxAWTuq5Ei5FwtbcIyA7qqbZGO7E9Jlgp51hkzV10A7swDoDhSIicBDcT1XAIAkeRnB-RivoKB2XEzsQVzNXS1hMn4a6UsOfwoPcO7bj1Q_sAEuRqQwGhaR9VEhnW0rnrhBG423ssMYbmDWKS-NWk8qCC6maPMdzTfOMNdm0BDIH4E7n_TgAhngWm7gb1OWEYOYVPDOuPPvUe7-7xzippmNSKm5iZ1lKfZ7FrqCHYc7FG5bLspGtkSiM8-nZgtEtGIgwQ7jR0MARQxwCH33n-291Nn87F1DYwc3DpmbduvygsY39Susokss-dzx0etA928Als3odChZN6bM2yF_EAlcX6n-57wi8ydMZiNPvvt6wDmcyp7J4q136V4Skn7VIGnEE7NEasGBuFwLjZwjsVI6exD5wy0xkylCW8ce7Yn-K-NkNgY5_6DSzZkZlb5qJZa5DR1qilU7Znun2sItZ5QK4OHFeMFlg0FhNNNV0N9QroWwg0p91f3svRhpTht7nXn8amMPXWY6RHm1H1hcOzmkeI2TTJOUBvpK7e-7gKp6mpKeW0GxW__NlFIM6nSry_dizW1a1mymXqLO9yk1PL0--ndgiMHGtflxooGJCY0_UipriwkoDL_lO1apRChmz20m4mqBLfo75ox6eZlVSPiQN0V0LqmuSF1pclZXyUvhhk7_72iC0skE6By9LQgioULGUkJ9jWoBn_5cAWnS-rDbnPeNN4Ifabpqwvkkz8OAIbsyZ2f-nWsS31OntXXXG4Q3O3yPV3JKaoZ5bN09lMKYHvO5qua8V3LKjpV_ZGZJcfcn4ldoh76wM0OquzqU9xub9cqLEXasmcIiq4s7DFljXGi2n5I-Q9H0vmtz1ryAdCcCTaTOD2oRcEKIRMWd_rcG7E9KCsCue6miyj2gvehtec2gOnZlowzfDyl9IzRMjXSNdDOmFAh8gJEwaVjjb0JfisJZBKykCVGXh0WrWaJyiw_HMpP-Kb8QkMZQb_FdOM_IThBmMO4iYDGp6Xshqt6xYZUbb4hjZMc_iTi8ZNv9_lseUbMNsP8wGLRJ2GVdv6IcHVinlVPE0baSpncg6RfvuOsIAbHRsrbPUdZyKp_l_iNmXzv_FbGXLV9JUy-9-bSh1Z-2koUoVuScqH6BApN0GdmXXAXcLJmw0S4riLK4acb2NhAIRQRGqjR48el68rEKkEVNzsSzsvufrYtQNFaTRVIbf911Pd4iByfdNtkF5fdhSffw0_BzNLXFpu3qMXmDJauwSPiggQfvb3lzjS-p_HOAIBhWtxjQDQlrfuOoTk4g037LVF9GIi8M_CICGJAd3er5sLMr7qP2qodMKt1_SaAoktK_6_5_lJEqq7mVrhdnq8t0hmYihL1LJrTErbcrpLfxCUEQv68M7KortzVX94inY-5x4LYy-hGf2tyHXegMIrd_IvjE07p0wFdabBjeHXkmLvTBk5YKj1CTHKGCnLXBggTwIecli7VhFbNQz5kjRdvvmzBsfrJezOeF2-bJ2pqwDssjsbTg-8NK83kWA71oNTgYmGCSJoq7REIqhh0vhxhKF3XwaSSme19yzqe717Yt8141kOhaHQtOdAJ18-FuUuRwIfL8fQpYa0G151FiLxY_hp3g7iFlT2kJ1U6COX2Dgug6YbJHYLWKFSEKA1K4xqXEHIjUmv53TJnrRfBDSQIaBNi6ahA0sYvcFWrfS6OXU4GklxYsYT-2EgNmdT7-IKwo2HTamAE2k9fLb-S7lHpJ6xHRvQNqWidfSYQR07Z8WSOK3vsXMEkEZVV3C3__lLU7JfOtppRFC8e8ZaR7Zhrme8kphbJRbd1Ki_GYPJLfHdMlbR5SHccrF8QNIeQViEJafMNP7GohvlUig9vTAB50MHhjn4QNoDuSDjW_i_smgJlM6jQAaR9xnIkLQjasGWZhYggowhbPA8rbZahwAHL_hOPzhferMLmsqV0Zr2-_v82chp5DbUbw7TLTR0LjpVp0ovj_ynapuy3q5PvpE_IJAKOidim55eheYmZZ-jfeZ1XbwFlFBuUsu0lciv0HfT0Hp8TYCLD5zn8XCfem9gqohwav4cjRY2C5itc22SiP_M6V-sP1e5_k9l7LrDgZ-g_Ffmbg5X7XZSylpUqckI5csVVZhVA2iaZ9xaUmkXIKCvwJ5XW6GBbB7SEDt0dRoTyDgDJPPROAm13OXX7XeuXOYMmpLGmJxk3tpflnv0n575iox3qf5NZY39HVCZNk-05kbKP36Ert__vc3FUWkfUYc9dQe1W6eVMfinvTf2-7Ie_vEIhhkmuTcGYPJyY1803MDa86Y__je-bTrVGTIr3BVAYPAbijK3Pi3NZdx22nlPcZggGOwsHBGWTf0jVYJQPtUN4G1tCNjk-k8Hp9q39RhaWfOEWTOACO9waNmohe2yTnPElcDOOVXxPGEJ_1zyE7DWh1_8Wy4PxZB5KmsTVyMwUPFfe0Cv8UvEhI0D_ZhHuW8iD0xiiEHZAwOA_d9Ezpn8DG1_2sUy5mx3SrQYWPxGW5l_qwS0WSCbalEDM9kqAqOFGAavUidBXlMby1kqKILcfpfNalsc0Q5DbylogW46hA4eoxe1nREkiUv8Qpv0ysNpquNl8R_exl6fCDAYO_oQCd2nMI0LhK7Mw5yNoz21fixT1VmWsSiNl6UAAYdx3qte-O3kOXB4FnOlR620-GLgoYNgvLyJZmyBK8nWUB9y6ptrl4igiSSQiZr9RPhd5vuHx45KH_wBjOYterOz3LATiGJcK_H5gM-oyKUN5jN4Sd1ziW_AIKBJgYO7C-msqD25ONld7a7Vjf9bZNy8V5yvyNAKYG43tUe-efJDJIueAjfZYKfyGhrBLCTrnVJDWcR9dO4HOzDTZnIjpOJleWPdhKZde8dr6V59kT7oFqQrGpTg-v1Olkb4osWGmJtj7ZwhKdl0lOkNX31URyrdN1vT60-Bg8EoHJ50twKz5QQm8bA7TTBo_f_7wUTkokW0sNa88DCM7WU2HYymCjmfEx2ByLeTRxWD5K6RWANEvjBeMbWDSQOvp6JLI846vJG_Hg65KWjYcPHirkgsQN_sPrfqeWsdvFgUMPecceizhDRDTJ-DgmDwC5028hdf77MFqMw1Y0-mIL_WGbYGbAsaUmsJ1XXavlS03fdIXsc0CeYcAsDyxF30vU_-6X54FBDbzDGDkPg94eGhz_xNOZBiuXwlzi5wm6c_0mtpNgAQfN1uTyMIDyQZZAmuhCvtZkphIoDOInI7g2jebqQbbIVkdnd7m6DqYKXvxXlS-P17EdaHOAeaOFKc6nC2LtdZKC_TAcBU274TZD5gffHdBb_FqgXGcJSTmKvWcsm_aJF-dTSM_AsWCHtw3rm8cCisbsktwNPOgD5Ana2nn7tLcgsq4QL5_dtYxJXJE0il4tb9OKLLAyshgmwee8GRGc8gBhIZ_-TXlb201pk5zw2e0e-DXssbCVbI5YHYnoL6--EMEkde1-I23lNKxwgx8F_5vFqXaxvLuzBeQwK2qR0FlTHC2SMc6f676g1QrY_ZRWW09Uas62VjjU1tHHNpBjuV6dY2re2KTr01Bh8LiAKt0s3qKZ4xUKgn2baWkmkfqU2DPNsPuNi5UQ4f3orw4gIq0FaF6neVRZfEOid-h6rBSuMGsyqWnzSMhm8_9TDA_c8NzeflZ6qWydQRuzzNqVCcqh9h0s0bZKUqBExRbQ_DY2klQE4h6fr6C0uWSTWSolCeDB1eH59u-PffAknivliPl7cf3y2HSTkrMuSuolTzLgtZD1zk0OK5h8Gg5iJ7Mah1DudjMXPm12_hrRemPBlMK3kViglAQVM5Bvku1te4GVvbL5gIfqNnJetKVNzoFmschyAKv4OgBVl4qbdOkaYufmSjl6Q7n8PP-FT3ptOFyyKQnbENckgVrO6RjgBjroaeALzW3cAeMG_rN5CCCyY84HjKPTI67fEZnJEXbgoVzl3nNStGksVphp8V3wRh7JOAJzFLmIrGlAqg1H9mEL-V3hryIkZ_G4N1-XyoSA0kUcKGBXIwMcsvdIBfZV7-uneKN0fke1O0SN_eEB_EpQtGwM7m4J9qXxBJlSLF9hlUDzTXgy4pEfIe2AvX1LaCch-2QHAvyE7QtSBn-4uP-iTgQK0lQbyvgWNHE6BddjV9m8aSTCcU-og7F3brFsKQjrpjem5jbu4jkUIFjgVBfkxnodQGOE7AnAagVqpRcGdylndzMa_dgdu233QQZlIcQMUIEkS7-s_5fwJ_EV5i_ZxRgvRe9WtZDNJ46NZOWtP91TYqKrni9R-JNtMQSjPuoVi5Z9EedTPSgiEqbwH6CwSMcJ6LWzz-3DLudnTWcQCW4gj-JiBgqYIQDRR-vSko31KZY6gC_YiX2LBGJfm39ycCqMUZPLWY1o4FF2k6evUF04tKZ8ADe8Pnt16cXGpphgTQUqIiD1RWE3s7B7ygimlE1htb2ASrlr3XznOSduNUTEj-gDlfHPquWIzTpVVQHIBRYlAMT6aqNqyi9BO2h34mn2iPVZi1QwwxsnoUf_t_MAALFkrAC889YyiejG4TFAoDe8cWDlobHUCYhsbbdOTkmmbGYoaiBXsulbKPUbH0HO9Ke08PhWqZG5Rs0C06dSFju_XGzdFp4MGoyQ-G3zWOcmY9BPa_15Jejk82FY96pnyjD6YyrYHa2NmuBZXeWPVZuSD0F6WA8EVpr4W-IMTKi40SBizgUtQzIeraDK0CwUH2f2EXnUJD9acNznvJ9vfYYVlCTiyRon2ejANT1dl3yDLig9GR-Fws0QQUPVgaGUv2ZZCaydl05yDoOOdXA4y0nZmYZx0ZO-x1r-wiSnYlsIgmRWAFTbm7h_44F8RXImh7768D9mBT0GTZdb4BoLKXCf-sEV6XRXBDTD7QLJSGQRK2uEUUx9kahmMX2utb1N_pk7tAEhDDCkcZJdn3ewm57wJlLWg0DcykhLla0mucOZTrHv-WA5pVu35qg4l5AhYfWl8A10CEd5YfU0HEyWiX-UQUzSPBYvs5zKneIu9QYGWVhEkehQuPqJ7CDIZDsuWcxyawDF97pENODeJUPOs3bvDkMGwm3n6Wa3dgqgzqS24djOe_QgfZhyirQFqGPsvB6ilMwpcWLqTiq1llRA7ePc58qW1bdvhXncmJpYim9iNv6TgCnVcGGtgAkEUO0WJe98kLvwuk1iss98NxYoOmFj5-QQC3xdo3dzhIQxn56Ru7iLDmkWeA6JSXUt6blyqMeCZJhqxDDwQy9xkR2vFJ1kn-sEGt6ZbjVBY5rUXRyT1SCtVZ2iMAhhptNO0d_BAN5QXCmtXYdpQSZp6Bb6JcnfiCNRO-CQWvZ7lbZBtgy8rP1rhAIvzkDSie9O4KVG80dkHte2rf0Z54x5IrmXNhhCdoFzZeskjBAX6n4JiL8uR8_VMYIA_3MxFlcHqYD7JQN06XislzMpOftHi_1VtGbwtFda8j_Ji34n7eruhStg-Jy5i8R9b_T7ZuCuIYX5HA3i4eSw8Vne8PM0w3_FKj64_yg-kVcqj4Jj64le_COgUKJ_L8_eg9Weng11E-_WePO5yqJ7-1brezD_1rIEWFFwNn2im0a2b3DqJpsiS37LPOTA2muWtT2K4dYLq3cbdGMLVDpW9ySnmmHAKSf3ihfPZf60h1M0vf6HPnhFz7BGXPSNEI4tKzLsQnKRst7HgLadxbnA91wLlDlnNbRlCANyXdE39j-UpNNfj0VL0af1_6nTMRADGULEAT0tw5AVw3ZtXjtOHHGUhKGbgLHs6q0JCjar7Rn0JUXEH1sLz-CvN-35GmKeoLROVm17X4uVn295yPjhKyYoU0xmf50NyBLjnoUEvRdiB0HfC2zsRPqro6EneJnIsgj6sBoDN-9AOzQ9A4zOWQXNR2uDPQoaWyGxH1RcXPO8dFXMKGzNDac_KRLIQWjzlPdE39mKIi_ze5dkph1xHbahKGTC4tEcppjtd9WbdxvxxPYwpp1SzWZI2q24uuTWfSrY4ShxBkL3r4xyiYtgDQ__9F1auAEJPmEAQI1crStNDiWoBrnXwIAYPDRKMhEsz7_uffa9xDXoUDq0VJpWuGogQNXLdeQEcuVGrbkMB3xuMwxmXdNyZD1WtxBzBCQTJWqrRUnmSxhwIRMypAoIAb4iPp2tKXASevjPk3L7D-EmUEE2iXLYJ_ctQL2M5RUuTFBnfn0iDtTzjQEprdEo5UH841lCKXeoZB9aUjUpG0wKn10RDinqWgQzpWQGwcsJBZNbd-OllVbGG-eRScHW2aqHYorYgO72j3ee4uVO3TA6EHYH7L7DZiyc9kkb0uIeeT_dKx5R1tsd9y4jB5qGH65JaosU6AdaxPnz_eWrgH4cubJKXewSDr5D5jFCKiP4ePIDXjeAbyS52PkLDscCtAZyGbjxZp5aOMnWbrcM88plaowdPMYh5_1oP4Ehod7IjdWxViqMDwjamOAoy-VfP50tti65u5jtGZ0Zu2zKC2PxLm0Kr5Hoh_-GyuromPjBtir2PP9JyB1NzgS8UrMLqIIuCp-AffGqnWL5nR7F5Sjxu5FraN9AbFwTmyqU5VJHlYcIlS56XeqtUp-SDCe8WY_VGG5_7o9abG5GpKaEZ6eJUQnegQPg-xnpXLAXOMhPkFGFiB4uM1uKyFn6xqe4SkMD9VygGztrtDwtlbR2Lk3C8y_kMwm107OZqBazwDzibx5OKKYvVNNAukTBPuLLk6rOcfGIr1omA0qBdXuPrZujzRjqkV1OFNMUnnLJuu-LwVo2tDOK9H-NIrVYE8wcKMpUvVJM5cD-ZYSUXtWQHK9rC7C10y7MgD1xxNXXQNw2U1Y2H5sVKRWcIuKokslHBROLCI4JPHombFR5WnzBoKtQKaV0FXfldsnsWU9V0aFl4wOv5Hr9OtuZ2EnDvRfHY3ZAQ5rakhbQVQcCuFnsDv4hU9n2o1k56zEWRpdDLxjDUjfmeqVowu618ozDOJJDz8S8n-Xhc5L6h21_zxlGFYqZ5fP2v3o0JMhrmHGYUJfEo2BWD8XB36v5r9vOGXq8rxeRTx4luDKnyq7W4o7NpAVBe2GnudiGGMyqAyGoKVdTkTFy5a9SHkEL5w2OMHGgzqbYgoJXy1SoVAUwzr22Fc-l29Juzj4c6fm92NmGvdLlR9x2TYafqij_iQlJt4inXDCqrGawJK203BLf3nxATQYHe3E_oJHrJHdqbx519QeOhLKXCnAmcyk3PY62u-qPvWEyiK78JqQR3UR1amlESlKyEyn5M5fyLD8ryY8YYyQyxSqEWRIj_ku0mO3m1duCbQXt8oAB3JMOMLILbc1zn38hkHrkF_d9IT-Is_V-lofdKZoZ-XwcytXdK8KIKNlWtHJJQC69_eeM-OD8eL95-NHBD0mcqCIWHS_XSfbSPwK9xIQlWtqfn8ZXJx_c49ocZf-MZ7-gHObT2sGXIfrmhjPgY_TIQJe08wHv9PYJYsw0bN2AWFdLdfoREyBV8KTvdQEbZgGD_9PVO9qSod_3gdV3_AH-8rWwAtwMx_kJ80XTjzTpjQqFbNq6Yw-_yyJntoKrNJmQUHQnc54gtKnH-0DSIOaF3cIddDA11O-OG3A8FjOtS69UgacEbrn782b2pyHyQTi6gFDzB6JDCzsAaWpRi4FRYavXDPR1rzDj0675wBAWAwQTJ0ic7Qm1C4cfAVULu2PpJWU90VKRvoM_9S7OydFnufTSLl11scIKA6eNlMzVydpKzi0C8h-kp7XPu4d0hEe1L7L8ctyY4gI78dvg93NS25hXWDh0rqmm9CF5_STU43UaM2aXgutnw2eXYkUN8-VpA0BHbZbI12-TbXZaTsDAGtUWvDXxqe8sbuPpu70px98Zv0pG2tt0EB7c36Dei2z31zl6KCGYKpERJTjIC1QcCQ8UGJkkBpCiAJ5kXHV5nvLCaXrqoTY5zjTujtfvTZG8JoCHQggspBb-TjPb7Od2C4thBUuq84jlQAq0mnLzM-0DyHYWU4Is8lKBGccQJDBrUeMwHKlDXutB8OCnRRY2mtk4aQQKmPKmHu8rB27kRUXce1glMWfee79ot8ENQHcChC2fNqRQGTqV4A0eJv7gaDOvUM_uNKZ-xfOy8GY0KB2lFYDQh01WL_E76DduQIh8bznDOX0ePPZd2TLEqGoJK7KYjy5oRgjpANMuOtbiXVt-XLie0xtCqraGXvcPMDFHeF_Gb_WsNz_Nk9ZQ91b7ERA6nf3HNY28uGHzSSixstcn1qLoW60pE2vVf3R2ICNN25ZrZqKFHFW16R1R2fbEOWtjMTNNT2S6ge2M08inwAOJxfOq78P8qmnC51uDtSz-SZ_-HHE62zrLIc1Kkshdi20jHCXtwBmZIW619fTNNW4SvFxCifsK6iOR_q22-E4gkF-1sLNe8nEKf9lEWvkZHa1kxWKHRAQn_wXjmcNMoASRZgeXwq7-9hFDfi0j8N1ukG9NJR0GmOWumbPo9jwaeA3s-wwNJ-bIgSgYyEmC3ZN6Q0zw3Y2C3rGh-7V-USj7Ti91Gp8C4klGLigwTLylZlUgjDNXRrj0yHx00g5RMov5Y=
\ No newline at end of file
diff --git a/app_loader.py b/app_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2ae61a1be1a4c766143bc7e58d1649688188e6c
--- /dev/null
+++ b/app_loader.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+"""
+Encrypted Code Loader - Hugging Face Space Entry Point
+
+This file decrypts and executes the encrypted app.py.
+DECRYPT_KEY must be stored in Hugging Face Space Secrets.
+"""
+
+import os
+import sys
+from cryptography.fernet import Fernet
+
+# List of files to decrypt
+ENCRYPTED_FILES = {
+    "app.py.encrypted": "app.py",
+}
+
+
+def decrypt_file(encrypted_path, decrypted_path, key):
+    """Decrypts an encrypted file."""
+    cipher = Fernet(key)
+    
+    # Read encrypted file
+    with open(encrypted_path, 'rb') as f:
+        encrypted_data = f.read()
+    
+    # Decrypt
+    try:
+        decrypted_data = cipher.decrypt(encrypted_data)
+    except Exception as e:
+        print(f"❌ Decryption failed for {encrypted_path}: {e}")
+        print("   Please check that DECRYPT_KEY is correctly set in Space Secrets.")
+        sys.exit(1)
+    
+    # Save decrypted file to temporary location (current directory in this case)
+    with open(decrypted_path, 'wb') as f:
+        f.write(decrypted_data)
+    
+    print(f"✓ Decrypted: {encrypted_path} → {decrypted_path} ({len(decrypted_data):,} bytes)")
+
+
+def main():
+    print("=" * 60)
+    print("🔓 Decrypting source code...")
+    print("=" * 60)
+    
+    # Get decryption key from Secrets
+    key_str = os.getenv("DECRYPT_KEY")
+    if not key_str:
+        print("❌ ERROR: DECRYPT_KEY not found in environment variables!")
+        print("   Please set DECRYPT_KEY in Hugging Face Space Secrets.")
+        print("   Go to: Settings → Variables and secrets → Add secret")
+        sys.exit(1)
+    
+    try:
+        key = key_str.encode('utf-8')
+        cipher = Fernet(key)  # Validate key
+    except Exception as e:
+        print(f"❌ Invalid DECRYPT_KEY: {e}")
+        sys.exit(1)
+    
+    print(f"✓ DECRYPT_KEY loaded from secrets")
+    print()
+    
+    # Decrypt files
+    for encrypted_file, decrypted_file in ENCRYPTED_FILES.items():
+        if not os.path.exists(encrypted_file):
+            print(f"⚠️  Warning: {encrypted_file} not found, skipping...")
+            continue
+        decrypt_file(encrypted_file, decrypted_file, key)
+    
+    print()
+    print("=" * 60)
+    print("✅ Decryption complete! Starting application...")
+    print("=" * 60)
+    print()
+    
+    # Execute decrypted app.py
+    # app.py automatically launches the Gradio app upon import
+    import app
+    
+    # Explicitly launch if a demo object exists
+    if hasattr(app, 'demo'):
+        print("✓ Launching Gradio app...")
+        app.demo.launch()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/assets/bpe_simple_vocab_16e6.txt.gz b/assets/bpe_simple_vocab_16e6.txt.gz
new file mode 100644
index 0000000000000000000000000000000000000000..36a15856e00a06a9fbed8cdd34d2393fea4a3113
--- /dev/null
+++ b/assets/bpe_simple_vocab_16e6.txt.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:924691ac288e54409236115652ad4aa250f48203de50a9e4722a6ecd48d6804a
+size 1356917
diff --git a/assets/dog.gif b/assets/dog.gif
new file mode 100644
index 0000000000000000000000000000000000000000..dc8c8a6311682707431133e6e04eaf4f71c369f5
--- /dev/null
+++ b/assets/dog.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f42cb8f54361ad303865d9d4907abd6b7c55f1a2eaac9ff4a49ed73c385a89d
+size 7111659
diff --git a/assets/images/groceries.jpg b/assets/images/groceries.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..755e1896c5518a58c0327189f3a895d5216d9753
--- /dev/null
+++ b/assets/images/groceries.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7073dfecb5a3ecafb6152124113163a0ea1c1c70f92999ec892b519eca63e3d3
+size 168066
diff --git a/assets/images/test_image.jpg b/assets/images/test_image.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c892906580b9f8f4af4efc8588c65b316820792c
--- /dev/null
+++ b/assets/images/test_image.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:979f120edcb0050a12d5b4a1f1eaf6bc888b89f675524e7ffcf6ae5b77aa6bc4
+size 70626
diff --git a/assets/images/truck.jpg b/assets/images/truck.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..88b0a07cdca374ea006351c86bad82d61ddb98c5
--- /dev/null
+++ b/assets/images/truck.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:941715e721c8864324a1425b445ea4dde0498b995c45ddce0141a58971c6ff99
+size 271475
diff --git a/assets/model_diagram.png b/assets/model_diagram.png
new file mode 100644
index 0000000000000000000000000000000000000000..62f4a9f2032756eb1a3270c6b41c4f311d8b16de
--- /dev/null
+++ b/assets/model_diagram.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f138d1aa02840a108b7b9578a817c974dc6fcb0e696c84121a48ff0976686ce9
+size 723689
diff --git a/assets/player.gif b/assets/player.gif
new file mode 100644
index 0000000000000000000000000000000000000000..20aac3035dd51050fa048bd001b7ed837752feaa
--- /dev/null
+++ b/assets/player.gif
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5d1a3bb1955e78afe3a77a030221d7dc0dad49fa00acc7063636573825bf3a8
+size 4357923
diff --git a/assets/sa_co_dataset.jpg b/assets/sa_co_dataset.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c7689d55e31201d3b2b66d64ac4f9353a8fa9cff
--- /dev/null
+++ b/assets/sa_co_dataset.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f1ddd6e5d4130cb37e0b41f474ac78e75483db1b8964552b042bc8bef03b9cd1
+size 1014424
diff --git a/assets/saco_gold_annotation.png b/assets/saco_gold_annotation.png
new file mode 100644
index 0000000000000000000000000000000000000000..fee70692fa6c28f37488a91408b521adc9610966
--- /dev/null
+++ b/assets/saco_gold_annotation.png
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd4a6322763d00c667d66e3628f794625619de918e96017a9276a2cd2a0b0ec5
+size 3947563
diff --git a/assets/veval/toy_gt_and_pred/toy_saco_veval_sav_test_eval_res.json b/assets/veval/toy_gt_and_pred/toy_saco_veval_sav_test_eval_res.json
new file mode 100644
index 0000000000000000000000000000000000000000..6df5e4cf1fd4a8ebadd6a396587f9839eb5c4616
--- /dev/null
+++ b/assets/veval/toy_gt_and_pred/toy_saco_veval_sav_test_eval_res.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d491596003e0dd7742705ea17ab84375e468d3bca3464b831d9d504e550ff8e8
+size 12166
diff --git a/assets/veval/toy_gt_and_pred/toy_saco_veval_sav_test_gt.json b/assets/veval/toy_gt_and_pred/toy_saco_veval_sav_test_gt.json
new file mode 100644
index 0000000000000000000000000000000000000000..9fe578d912b10b08aa58e4958d08f73460bae3f5
--- /dev/null
+++ b/assets/veval/toy_gt_and_pred/toy_saco_veval_sav_test_gt.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a1a964427d4e4225f883ed3d08ffa1d3613c45ac35dcc9426f2a25031c92a31
+size 3398949
diff --git a/assets/veval/toy_gt_and_pred/toy_saco_veval_sav_test_pred.json b/assets/veval/toy_gt_and_pred/toy_saco_veval_sav_test_pred.json
new file mode 100644
index 0000000000000000000000000000000000000000..b57f43748bd3a95031088dfe7894821af40c9e1f
--- /dev/null
+++ b/assets/veval/toy_gt_and_pred/toy_saco_veval_sav_test_pred.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c593f97bb4701a8c34df38bfebaf9091c6ba4f056860a789c38877be6403652e
+size 542939
diff --git a/assets/videos/0001/0.jpg b/assets/videos/0001/0.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d73ad8f6b85e547f8f14ceacb3efefad7e201c57
--- /dev/null
+++ b/assets/videos/0001/0.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1435526859e0acf2bf48622113a76f6df186c95e5a223cbdab173c794c7cb28a
+size 144735
diff --git a/assets/videos/0001/1.jpg b/assets/videos/0001/1.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..01832619bae44c02625914caa33bd63b80a9c7f5
--- /dev/null
+++ b/assets/videos/0001/1.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8737dc77b892c9d502731028b2cddaa5d387e3e5f2cb55d4f53d0086a02752f9
+size 140838
diff --git a/assets/videos/0001/10.jpg b/assets/videos/0001/10.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ca87d14821723bc0de5f0def618606b11e0cb847
--- /dev/null
+++ b/assets/videos/0001/10.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cff6d920f7f3732ac92dcdfe1cd39c169bb4320796bee39008869de99fed4c9f
+size 136979
diff --git a/assets/videos/0001/100.jpg b/assets/videos/0001/100.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4c4449308ed6a53f7169e760cf0b56d82b03b7cb
--- /dev/null
+++ b/assets/videos/0001/100.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eab7f1f51c40e6a01636f589bf5faf5312ff26fa65413f0a8a2ed78ff5cec2dc
+size 114892
diff --git a/assets/videos/0001/101.jpg b/assets/videos/0001/101.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..71dffc23d17127fd2d49a435dc0b891481e779d6
--- /dev/null
+++ b/assets/videos/0001/101.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5edbbd8e3978b64e298e2fc3539340826e7b93e3ca1273cbc0f5db13ea6c215d
+size 116960
diff --git a/assets/videos/0001/102.jpg b/assets/videos/0001/102.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3506db3220a1cc9537375dd0c85db16646f69ca9
--- /dev/null
+++ b/assets/videos/0001/102.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d152993219fd4c8c010a916471f5df3bdec885987a97fb93debf00871578de90
+size 113558
diff --git a/assets/videos/0001/103.jpg b/assets/videos/0001/103.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9a50eae0d5ed4e488c85dcfc52f945935735bf65
--- /dev/null
+++ b/assets/videos/0001/103.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed32e7f0a21804fda6c0f2edcb6d0e5c0250bd81629f6dc9f90a5600b400c34c
+size 113630
diff --git a/assets/videos/0001/104.jpg b/assets/videos/0001/104.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..44cd1bed7b8feb1eecd8254d6f4d2ea482a20613
--- /dev/null
+++ b/assets/videos/0001/104.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88dd37286877da2c01be63608ab8858493d66a405a9d5607a0085138a8eca55a
+size 113028
diff --git a/assets/videos/0001/105.jpg b/assets/videos/0001/105.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..53b8f9b3ab44f843d4ccd8335b9a5ba78ba8735e
--- /dev/null
+++ b/assets/videos/0001/105.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b1464ba0752e9eba604b298b9d2d75d6d95a67a708d44eaf09b20975566d388
+size 114848
diff --git a/assets/videos/0001/106.jpg b/assets/videos/0001/106.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..847bed470f84da7afe2e45cadd1c322661634062
--- /dev/null
+++ b/assets/videos/0001/106.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b06813a94accb0f85a5fb870cfdc388103aa3994d4ec0f713fb7534ad1ccd9da
+size 112503
diff --git a/assets/videos/0001/107.jpg b/assets/videos/0001/107.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..43ed8df9a8819ff6aea5d570c5c6a4485f48e9e9
--- /dev/null
+++ b/assets/videos/0001/107.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ccd998fb51d68629f93129187631cb488e5b64006c5e811f28fd376e02394d2
+size 114131
diff --git a/assets/videos/0001/108.jpg b/assets/videos/0001/108.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..600e2ff395f5aa505aa847bf6cef3aed09fbe753
--- /dev/null
+++ b/assets/videos/0001/108.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1fbd78c48ea5d6fd5d76ce81b40799dd5bac11e580799fb25575c70c547ed471
+size 114064
diff --git a/assets/videos/0001/109.jpg b/assets/videos/0001/109.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..da25a740284d809e0eca5a803979d61164f564a4
--- /dev/null
+++ b/assets/videos/0001/109.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2048d2d7a775be1b137712670ccdeb9ec99bc0ee5c9b9f63fc8d9d595a205a98
+size 117234
diff --git a/assets/videos/0001/11.jpg b/assets/videos/0001/11.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..12bf3fc1d589588a4c4677e5293b4055a469179b
--- /dev/null
+++ b/assets/videos/0001/11.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5bf579b61dd99d97a2d653a4dd84a6da4da49a5232ce37ec434f701ab6264f35
+size 139083
diff --git a/assets/videos/0001/110.jpg b/assets/videos/0001/110.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..23b40ec91da990de0848a02baefc4d48b274703d
--- /dev/null
+++ b/assets/videos/0001/110.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96c617b759eb41c6274b871e16f3a44bb3f923b8e04add8b99ff7e06631ef72e
+size 115336
diff --git a/assets/videos/0001/111.jpg b/assets/videos/0001/111.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f39b5b1e6cf0a6fe49daf2e9a01d63879435a48a
--- /dev/null
+++ b/assets/videos/0001/111.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b4c19eeda86fc014760559ac712d2e968d65b726e298a232634ef5baa68d92b
+size 115464
diff --git a/assets/videos/0001/112.jpg b/assets/videos/0001/112.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..20e6b180f6845057c59ce150a66b4352006037e8
--- /dev/null
+++ b/assets/videos/0001/112.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd282a9a38bddbeab5029eccac21779bbfb507338be95832a69241652ad3186d
+size 114639
diff --git a/assets/videos/0001/113.jpg b/assets/videos/0001/113.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d41b397c69a62ac29290361a818e6a0fd24fe618
--- /dev/null
+++ b/assets/videos/0001/113.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9a22daacb5d110f9fe9fe22c4c6c0da2cac23a5e4ab7855bfe488560e7fd658
+size 115696
diff --git a/assets/videos/0001/114.jpg b/assets/videos/0001/114.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b1b059d044cf5dc28510c61134dfeca6c53a8708
--- /dev/null
+++ b/assets/videos/0001/114.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c0788c8eb42f497fa17298c550481340a58b43b963803d8150b6b23f44a8a83
+size 113725
diff --git a/assets/videos/0001/115.jpg b/assets/videos/0001/115.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..af7fafd74e890bdc534ca315e0609888948de13a
--- /dev/null
+++ b/assets/videos/0001/115.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0abf8b1efaadd40216d01aef2229b98551b4d21285e7d7ef8aeeb5dcb43fb2c
+size 112547
diff --git a/assets/videos/0001/116.jpg b/assets/videos/0001/116.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..630950797fc958255f6a054020d4880971f2831a
--- /dev/null
+++ b/assets/videos/0001/116.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4dee6839ead2cd8fb090bc78b60734fe3f405c23c5135232fe406b35e64a6654
+size 112455
diff --git a/assets/videos/0001/117.jpg b/assets/videos/0001/117.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4335a8fb7c5ce8f4b04a02f4dd99fc4a6d9c52ba
--- /dev/null
+++ b/assets/videos/0001/117.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13b7d62ad016537bd97d0ffb1859949f6fd451a2fbbc61c4a85b4530b8ec26e5
+size 111589
diff --git a/assets/videos/0001/118.jpg b/assets/videos/0001/118.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..017b94740dedf329b2661da651f7c9964e2a7cce
--- /dev/null
+++ b/assets/videos/0001/118.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6536d624c066f724cf3b4a0105468bb1ac07db693ed7bf9ff70a2b2b9e2c038
+size 109833
diff --git a/assets/videos/0001/119.jpg b/assets/videos/0001/119.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..af055823e91964e5d521398be43cd5ca4f507513
--- /dev/null
+++ b/assets/videos/0001/119.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5bed2df4d3b61ba2a31dda705e3877f314292a1b6a758d87e544a122c862426
+size 107291
diff --git a/assets/videos/0001/12.jpg b/assets/videos/0001/12.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..46662b47dd7c4d3a0c12ad5730066a0128d2d058
--- /dev/null
+++ b/assets/videos/0001/12.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:37e5c028a4d64cca094126cff67349a921364af492560d5b2263b8c7490806ac
+size 137290
diff --git a/assets/videos/0001/120.jpg b/assets/videos/0001/120.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c4d549e8a1e004055aa64b28400d13606bd794f8
--- /dev/null
+++ b/assets/videos/0001/120.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:552159698f89e7e61752ead392e31752cc1c4b63ed13e641e0d09dfaf9f9646d
+size 108245
diff --git a/assets/videos/0001/121.jpg b/assets/videos/0001/121.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ef342c12452afd04e610b0544be4fc466c6281bc
--- /dev/null
+++ b/assets/videos/0001/121.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:766a0c1f45368d44af0de17c0587d97bac757210535b3b1636cceba64885fc11
+size 106212
diff --git a/assets/videos/0001/122.jpg b/assets/videos/0001/122.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c47d1da1b51a1a21b55dff1f1c780e41a22569e4
--- /dev/null
+++ b/assets/videos/0001/122.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e96bce9ebea0bd1ac23767fed63c2e281fb24efcf562a932836fa4a425912b10
+size 106275
diff --git a/assets/videos/0001/123.jpg b/assets/videos/0001/123.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b8b20bd4dc4842fecfe55cd50c4ab5f3cf1d034f
--- /dev/null
+++ b/assets/videos/0001/123.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba3a3f66bc2e8e1a34ea1f90f5336611866376ce6442cf975630fa80c2dbfd4c
+size 108177
diff --git a/assets/videos/0001/124.jpg b/assets/videos/0001/124.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..178c048c8de30f182a64efe8b49021be83fda0c7
--- /dev/null
+++ b/assets/videos/0001/124.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:caf7d9d7f8578e4ff1a30b63cb9601c660ddde63763677d1e2b7df30151ac58d
+size 110821
diff --git a/assets/videos/0001/125.jpg b/assets/videos/0001/125.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9cae3b2514a5785e46d3cf166b6ec9db2ec80b65
--- /dev/null
+++ b/assets/videos/0001/125.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b00b109dcd864ffd1506e4c1e0474b642c3b85d3b01359af6301e13d8f030a5
+size 109711
diff --git a/assets/videos/0001/126.jpg b/assets/videos/0001/126.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f7a0f222858417c1c25e20dfb72faf5d2d4f3eba
--- /dev/null
+++ b/assets/videos/0001/126.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e63278fd65842481a3b1aa15913e43726b1c4e1a80821263517bcc92f58c1f07
+size 111690
diff --git a/assets/videos/0001/127.jpg b/assets/videos/0001/127.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e6dbd204a9a5af4c2e0cf5bf47652bffdfbf8dfc
--- /dev/null
+++ b/assets/videos/0001/127.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8691cf96626ad05cadad36318bd9e0893c022f774c8835091e67083ebfb54ecd
+size 107194
diff --git a/assets/videos/0001/128.jpg b/assets/videos/0001/128.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9fee762d86589b2fda4fdf931c336b6e6e1883cb
--- /dev/null
+++ b/assets/videos/0001/128.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3204b9ca4a070165e745bea719ec81c5d44a7e3143f8c27959f9343349df5a7f
+size 106901
diff --git a/assets/videos/0001/129.jpg b/assets/videos/0001/129.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ce183f405c8b984d909024c0902f3a2878d73037
--- /dev/null
+++ b/assets/videos/0001/129.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56659ee06fb0cce6cae2c0b76130024959ca1143ea5d1058a51f104b59ef6275
+size 104808
diff --git a/assets/videos/0001/13.jpg b/assets/videos/0001/13.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0bce14ef6183e3d0bfcd995e3b9fbaf6ae2d1e3e
--- /dev/null
+++ b/assets/videos/0001/13.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e58f9a4d8d8e2618e8acf4223cc32f4d4bfc3f8609a8a89f1f879f59cb395b84
+size 139696
diff --git a/assets/videos/0001/130.jpg b/assets/videos/0001/130.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b4e724b7ec5a743af5415e6a0c114a4610b558af
--- /dev/null
+++ b/assets/videos/0001/130.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:24ad665a172c1ba0a1077e11d94e1e165ffb9bb5b2e40caa737da0ac9b0f3ce5
+size 106705
diff --git a/assets/videos/0001/131.jpg b/assets/videos/0001/131.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c81cda16c3f01b4113f56e99ea8943064b6ca919
--- /dev/null
+++ b/assets/videos/0001/131.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e57a47506284a5859aa1a27bf6a00f93d66e73950e8fb4fc63e74782eddc3ef
+size 104139
diff --git a/assets/videos/0001/132.jpg b/assets/videos/0001/132.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..78878e60da8216afe8e02c233cd16d80e4111e79
--- /dev/null
+++ b/assets/videos/0001/132.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:914367db3e39748b929a0458e13af6b15e4227843622d28f71736c8e4c655f4b
+size 105454
diff --git a/assets/videos/0001/133.jpg b/assets/videos/0001/133.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..557916d15caa4ed188e780fdb9dec6a53c2d9af4
--- /dev/null
+++ b/assets/videos/0001/133.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7f5583f21cc67af2c170dc60f19f97e243e843334330018179ebfdf4878e60e
+size 106032
diff --git a/assets/videos/0001/134.jpg b/assets/videos/0001/134.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..57cfee6ee076b9f06b9731aa1c9a5a1c1d2a5531
--- /dev/null
+++ b/assets/videos/0001/134.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:166f81fb96fbfb70002660517c3f48c47ea98ac041083ddc1a58ed46360c3bba
+size 108894
diff --git a/assets/videos/0001/135.jpg b/assets/videos/0001/135.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..809a70aef89f65c2613ab19e510e6b15b5f9516f
--- /dev/null
+++ b/assets/videos/0001/135.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8c04b9d46cac60998527d31c23f67fb5ea93aec79bcee4723634a8bbf2638a25
+size 105804
diff --git a/assets/videos/0001/136.jpg b/assets/videos/0001/136.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6c1470cdf9abcd22e213055db0559b099348b014
--- /dev/null
+++ b/assets/videos/0001/136.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5733eb0f107e4ee85f6c03e1d119623f3ca9797af3d353716d087e9801fb796a
+size 105560
diff --git a/assets/videos/0001/137.jpg b/assets/videos/0001/137.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0df9175c0954343ce16a51ebc436c3de0db9db04
--- /dev/null
+++ b/assets/videos/0001/137.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90caa78663ab65891ee636531159599640ba344402992a69766c94ca618c5a25
+size 103404
diff --git a/assets/videos/0001/138.jpg b/assets/videos/0001/138.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c4dbfd5ddfbdc13aa9a67a0e0f635d51af472aee
--- /dev/null
+++ b/assets/videos/0001/138.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d5f0481b304e35352f1a6daa8c7ec0a10dff58046b9dd02087522d7c3c2af27
+size 104280
diff --git a/assets/videos/0001/139.jpg b/assets/videos/0001/139.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..aa04250bacdbcc7266f8e9633be3454a346c51a8
--- /dev/null
+++ b/assets/videos/0001/139.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7c94cab4d40aa74bbf6163b2c131b5108cea69973fecfe32550d91486389a82
+size 101986
diff --git a/assets/videos/0001/14.jpg b/assets/videos/0001/14.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..08c6efcf94d28a53f65bd62ac1de8fc862a102b3
--- /dev/null
+++ b/assets/videos/0001/14.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:004569a92a7594bbedf9e133bc100e9e3c5102e54896a63474b5b506666d8042
+size 136832
diff --git a/assets/videos/0001/140.jpg b/assets/videos/0001/140.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..789a78d9a3c40fb292ac149e8001f8d769850a50
--- /dev/null
+++ b/assets/videos/0001/140.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d57971d6a2c6674077f4d3dcdfbe3a413a841a89045a0792c56413dcd15830ce
+size 101797
diff --git a/assets/videos/0001/141.jpg b/assets/videos/0001/141.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..826556d222f918864b91bf2944d45a02404d6ef7
--- /dev/null
+++ b/assets/videos/0001/141.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5724bd1c1cf37cd9e83d41679a07d986ef3a073ac3333a7b60491ea05ab4eaa0
+size 103841
diff --git a/assets/videos/0001/142.jpg b/assets/videos/0001/142.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..27c2e50ec1c02da9279281df1e5987f28554debf
--- /dev/null
+++ b/assets/videos/0001/142.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ad4a107bf0c66d99e120efc17d5c413468f7737022172a91a1c3331ef9d357f
+size 103674
diff --git a/assets/videos/0001/143.jpg b/assets/videos/0001/143.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2ffc344f2c5c0fe1a7a2b69779677f3a37d23f46
--- /dev/null
+++ b/assets/videos/0001/143.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:29ef4d2e76d696e707ae759f73ea3610910a9dfc1b61169430ecb9282af43020
+size 105193
diff --git a/assets/videos/0001/144.jpg b/assets/videos/0001/144.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3141993e9726d5e5fcbc5994d26b5d3f0100c5e1
--- /dev/null
+++ b/assets/videos/0001/144.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e1713d5b621c9ef2df63a63e8777b1bfe2e0c5447bfd7934fa938bfaf5e5336
+size 105773
diff --git a/assets/videos/0001/145.jpg b/assets/videos/0001/145.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8716e9ddbb120b2fe753c088c9fff7ee98dcea3c
--- /dev/null
+++ b/assets/videos/0001/145.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc0af0dcc7dbeed7f4254e86087d74b21569678c1dcb7b9f783078a58d7d9340
+size 106558
diff --git a/assets/videos/0001/146.jpg b/assets/videos/0001/146.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..04b2d82f78f55f10330b9935ff52309d3026915b
--- /dev/null
+++ b/assets/videos/0001/146.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4dfa4bc424e6121d29dee36b3ebf2370ce32cf3284832f731dde2e69d142c5fb
+size 104831
diff --git a/assets/videos/0001/147.jpg b/assets/videos/0001/147.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..014deb06832537a69247c0e84c4ba8baf9951f54
--- /dev/null
+++ b/assets/videos/0001/147.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2306c63cef38f21469640be26e1aaf759c272a35f7cac36d360d0c2c13cd88d
+size 103141
diff --git a/assets/videos/0001/148.jpg b/assets/videos/0001/148.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cc85e7e683f1c846a0f6f3a544a0cd0068412b7e
--- /dev/null
+++ b/assets/videos/0001/148.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7a48bd81ac8ea7ff2860156d50c74d061a8375d934c8f88994e08da55c757c5
+size 101322
diff --git a/assets/videos/0001/149.jpg b/assets/videos/0001/149.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c4702547cb9933f3b2a466877a27453cc1ef1c5d
--- /dev/null
+++ b/assets/videos/0001/149.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6613d7428785850711e5ec74a40a9698860845cb40d8fab458fb207252c5218d
+size 99522
diff --git a/assets/videos/0001/15.jpg b/assets/videos/0001/15.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2fc2e17846d433bf274f26914a6ec9baca8363b6
--- /dev/null
+++ b/assets/videos/0001/15.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08a7ab7cfc36c24acc270bc01c127b906f73444a1fb0c4df841525d8e115e895
+size 136253
diff --git a/assets/videos/0001/150.jpg b/assets/videos/0001/150.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c8ab3cb0a23013a96d0aad0583160f5e749ecdbc
--- /dev/null
+++ b/assets/videos/0001/150.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b9d69840db6dc1a1c67341c865f1f9e656b6f777040fb54cf453a5372330991d
+size 99950
diff --git a/assets/videos/0001/151.jpg b/assets/videos/0001/151.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0f2b294e25dfaeffccd7214be5d69872698ee84b
--- /dev/null
+++ b/assets/videos/0001/151.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c400a30ba38f1a9f316db4f3d0bec3fba453680eb8e4f3a354cc7bdb71ca3fa
+size 101664
diff --git a/assets/videos/0001/152.jpg b/assets/videos/0001/152.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..365a5db0c692f993f52dd2dd10591318e5407acb
--- /dev/null
+++ b/assets/videos/0001/152.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fe36cb10da9f6302c3101e97a42c7fcef3a49c5a8c23730f13bb729868464ccc
+size 104799
diff --git a/assets/videos/0001/153.jpg b/assets/videos/0001/153.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1ba592c1fdb0b2cf04fa3d7d4acdd93aa25cacfc
--- /dev/null
+++ b/assets/videos/0001/153.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c668079c06cd71deadb85b8f49cc1088e98c9f9a29bb7e76d6c1b612ac0b2a2
+size 105942
diff --git a/assets/videos/0001/154.jpg b/assets/videos/0001/154.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..97747d24944df99ed21cef5c195a015b65ab0a45
--- /dev/null
+++ b/assets/videos/0001/154.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0db668b80d5e75fa85e94b0c7dbc0f74ccd8c9caffe4e9974de91d07219d5c7c
+size 109328
diff --git a/assets/videos/0001/155.jpg b/assets/videos/0001/155.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..502bb892fd22c7a9123d25d485729d3fedd37d2a
--- /dev/null
+++ b/assets/videos/0001/155.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0a5e46dcd2ccd92bfdca883c063932838fedcf3aebcc3fbca75994f88021fe0c
+size 110632
diff --git a/assets/videos/0001/156.jpg b/assets/videos/0001/156.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1c37b58e197e31510291b1954815f719c6cb30ee
--- /dev/null
+++ b/assets/videos/0001/156.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5d28104faa8b611069defec86a854cb744d18b43a3bc0ad0c860c6f65c4eb42
+size 110476
diff --git a/assets/videos/0001/157.jpg b/assets/videos/0001/157.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dc694c1d0cd45228ea7700a2ef1b947e836d055c
--- /dev/null
+++ b/assets/videos/0001/157.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:756c8f097fbe0bfd87fb037983bea4bca9d20680f53d0deef5cbc56dfd11012a
+size 112064
diff --git a/assets/videos/0001/158.jpg b/assets/videos/0001/158.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..714ba3a84e01ea3aac2eb60979362a4f3fa5d68f
--- /dev/null
+++ b/assets/videos/0001/158.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a299adec0bb9c8b8b1710e942059f99cc44a88bb3fc9299581860910a84eee9f
+size 108201
diff --git a/assets/videos/0001/159.jpg b/assets/videos/0001/159.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..996343594e0098de0f00543ab08322e55d86949a
--- /dev/null
+++ b/assets/videos/0001/159.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:91f19e9e1183f67a4b9d2d830e4721387fb2778e5bb66f40e4413271748cdc50
+size 105813
diff --git a/assets/videos/0001/16.jpg b/assets/videos/0001/16.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b593af47101bd92c99f7fb5b8219ae9147b560ea
--- /dev/null
+++ b/assets/videos/0001/16.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff45fd8e1cda13029b868ba46424a618fcfe9f339d9881fa9cc4002db1708403
+size 133859
diff --git a/assets/videos/0001/160.jpg b/assets/videos/0001/160.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9140cfd4a9bda59c7c35fde37211197678f37a7a
--- /dev/null
+++ b/assets/videos/0001/160.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aa58f804515f10e636eb6572b645036829e14370f738b9fb540d120514c1ca8c
+size 104432
diff --git a/assets/videos/0001/161.jpg b/assets/videos/0001/161.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f9959c72b1983026812db8eca093ccbb38741037
--- /dev/null
+++ b/assets/videos/0001/161.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:17a86f42c51d30418b5758bcfbe4efe3236516ae8dc31ec3c66973d4fcdef531
+size 104821
diff --git a/assets/videos/0001/162.jpg b/assets/videos/0001/162.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8c7d4390335dd4ede6df57de8c2c00e1b1907f75
--- /dev/null
+++ b/assets/videos/0001/162.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce325f8995e46db3f3aa3d1beff58fc5dc6aae6ee06546c2cf9ceb2b51c2c2e5
+size 101664
diff --git a/assets/videos/0001/163.jpg b/assets/videos/0001/163.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bb1b2c45d298476993970a661fccfe000a025939
--- /dev/null
+++ b/assets/videos/0001/163.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7322f98d54e7417ac949c23b816accd2ed7c623c668e5511a2d741f7698d40fa
+size 99099
diff --git a/assets/videos/0001/164.jpg b/assets/videos/0001/164.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e769997213205e8d1e2469c90c158d8cd77b3f15
--- /dev/null
+++ b/assets/videos/0001/164.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d1ae9ea9772561d5da24c69dbdde59c37b4c09d183df5c066c24bf43f54f383
+size 95445
diff --git a/assets/videos/0001/165.jpg b/assets/videos/0001/165.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b15ef82072bc096dacaae379f7782d2cb1b1c32f
--- /dev/null
+++ b/assets/videos/0001/165.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:005c1d27873d03b24d5a40a7388b3daec39a8ca51169b039ad6ff5f7e31e4fcb
+size 93732
diff --git a/assets/videos/0001/166.jpg b/assets/videos/0001/166.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3299add5d99777dc71fb162ba1a47b97301461cb
--- /dev/null
+++ b/assets/videos/0001/166.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d9830dcaa04720698925cc2bf601559d4cac92a1239ca8223232a2d090d42a1
+size 91335
diff --git a/assets/videos/0001/167.jpg b/assets/videos/0001/167.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..043dfa36867b465f94d64aa97e9c1d9f20464bd1
--- /dev/null
+++ b/assets/videos/0001/167.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:949065925f46f2b7908eb21339c1a6653fc75667f0c812d7ab7402806a944b02
+size 90055
diff --git a/assets/videos/0001/168.jpg b/assets/videos/0001/168.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e4c1f1199ec99decea2798190f3376bef3fcb64b
--- /dev/null
+++ b/assets/videos/0001/168.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e700a4bac2f056eff361d0355e121ad382b76551ad3901dcdbfbff8ba402b8c6
+size 91309
diff --git a/assets/videos/0001/169.jpg b/assets/videos/0001/169.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e1d6ca35ec40dd2956ad48c6659b6c103aa57308
--- /dev/null
+++ b/assets/videos/0001/169.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ddebff6a471ad0b61988328b6076a0c51c6e61ea9779e4da77c8c2d90a208448
+size 92395
diff --git a/assets/videos/0001/17.jpg b/assets/videos/0001/17.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..611c7504490bf7495507ea0808887377fac34946
--- /dev/null
+++ b/assets/videos/0001/17.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ed2e51c6be6843224b5544e99bf1af73ed2227afa6c658ddc79b83e32ca826c
+size 137219
diff --git a/assets/videos/0001/170.jpg b/assets/videos/0001/170.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..627bd2c094794a91e981ac0032f6edc599c64b65
--- /dev/null
+++ b/assets/videos/0001/170.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dc5fe61da0467624da2a075747e777f2d1b1d2b04379329f249d6639f21b4ea6
+size 92827
diff --git a/assets/videos/0001/171.jpg b/assets/videos/0001/171.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bae4c9d6b2d88bf05745ab3d4b1be4832133d902
--- /dev/null
+++ b/assets/videos/0001/171.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da8077cdb6ba2fefdef739cce5d910bc3d73c5dc7a9a78f7b6cb56556c5c618d
+size 95072
diff --git a/assets/videos/0001/172.jpg b/assets/videos/0001/172.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a78ea7f10840d10836f21ff9952a3a7da9ec7526
--- /dev/null
+++ b/assets/videos/0001/172.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:65ccdcd50a64d37b6de6aaf0dc27bd9a6fecff4d7b517c3f55df532547115f2d
+size 95348
diff --git a/assets/videos/0001/173.jpg b/assets/videos/0001/173.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f6ad1978452cdb9bbb3f5744d1e21814d4cfdb65
--- /dev/null
+++ b/assets/videos/0001/173.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a31f564d8fa86ed3f5d039fb52f4cc2309ef6cbcac2e118fcdde097ee1db072
+size 98325
diff --git a/assets/videos/0001/174.jpg b/assets/videos/0001/174.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2663cd0ba30d63713c733b33fd2e492e3cd45fe3
--- /dev/null
+++ b/assets/videos/0001/174.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d16c10c5a75264020a21ef0a2fea6e59d3417ce377506fecfdae902f77c8d609
+size 98122
diff --git a/assets/videos/0001/175.jpg b/assets/videos/0001/175.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..515a1fb4b26ee56d0ba6e225454c559fd4c1fbf3
--- /dev/null
+++ b/assets/videos/0001/175.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5ed0891cad2f0b2baefc5517ad5078272fe9d733d40cc2aa39eada5fe680e67
+size 99072
diff --git a/assets/videos/0001/176.jpg b/assets/videos/0001/176.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8100df5592baf86e28ffb0c3c5b9a91523ce45ca
--- /dev/null
+++ b/assets/videos/0001/176.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:42c696da1b9033b0d239473fe13ddac5f57b605a48e115d4de711bc21e3979bc
+size 98659
diff --git a/assets/videos/0001/177.jpg b/assets/videos/0001/177.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f9913093c1819f6764c436c7e546297229fbb656
--- /dev/null
+++ b/assets/videos/0001/177.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de4829b4021c64056239a593babd20f2e0fcc8b1f786d79354ab08820f16bf2d
+size 101683
diff --git a/assets/videos/0001/178.jpg b/assets/videos/0001/178.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f26613febc00f71ffaa09de5708c9defcad904d4
--- /dev/null
+++ b/assets/videos/0001/178.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26b6ed2559e6ac0210b047cdc99602aac23ffbc326e47b11a2a8e20ef78373b2
+size 103428
diff --git a/assets/videos/0001/179.jpg b/assets/videos/0001/179.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..068a73678f40a62c7af9e13a70ccc2ffb3fd8b9c
--- /dev/null
+++ b/assets/videos/0001/179.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c922d397f7995606dfa91f1b7539b6d98af473f1999c2feb7b93379d1490f29c
+size 105165
diff --git a/assets/videos/0001/18.jpg b/assets/videos/0001/18.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e2036afdb83c9a792ed2c8b1f0c3156bd9b0deb5
--- /dev/null
+++ b/assets/videos/0001/18.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64670be575e716b49c8162198acce54abed439e3062704a8c2473ac124ddeb20
+size 134975
diff --git a/assets/videos/0001/180.jpg b/assets/videos/0001/180.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..675a074a94449c8eaa8aefa24e0038e44aa9ab1e
--- /dev/null
+++ b/assets/videos/0001/180.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a03d98debea8af24d6b5b37a186466653e494eb2187274be68646a5be89c3792
+size 104485
diff --git a/assets/videos/0001/181.jpg b/assets/videos/0001/181.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..49d58cc20e8d67dead346572835669bae4f52177
--- /dev/null
+++ b/assets/videos/0001/181.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ff1dde3bf96ccb5666e70053756d30b467c46becb27bc47a272812cd48d7bab
+size 106915
diff --git a/assets/videos/0001/182.jpg b/assets/videos/0001/182.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d34a8ec1bb5b1318bea3bc9bd425cb2dc61ac87a
--- /dev/null
+++ b/assets/videos/0001/182.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4b8482ace2b46776efad6acf34628860edd23dc065a862d47c1eac76fa282af
+size 106408
diff --git a/assets/videos/0001/183.jpg b/assets/videos/0001/183.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..533a9129bb91c6c5468e6893e8276982eeee0220
--- /dev/null
+++ b/assets/videos/0001/183.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f6c988427ed972cd9bae6bc1d44935e691f386eda75b7ac8593d8d7fdaab592
+size 107851
diff --git a/assets/videos/0001/184.jpg b/assets/videos/0001/184.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4c6417f4a605680bbecdf9e05a07e55889253807
--- /dev/null
+++ b/assets/videos/0001/184.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c22a564b01cd785a36dd0810abce897135e81a675627fd5fb202b8275929d6cc
+size 108456
diff --git a/assets/videos/0001/185.jpg b/assets/videos/0001/185.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a81ef998e73b498e866a18312a05252ca35fa4cd
--- /dev/null
+++ b/assets/videos/0001/185.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96d2335647198c4d2cdfa608d2baefeb77b710b3053a689518e23f52bbe155ac
+size 110333
diff --git a/assets/videos/0001/186.jpg b/assets/videos/0001/186.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c7cc4625f89db0704d55abab9629679c34de0535
--- /dev/null
+++ b/assets/videos/0001/186.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef5e19b63456481cb4e6cffa0c997faca374454d83155212f0582ad397846ab9
+size 110139
diff --git a/assets/videos/0001/187.jpg b/assets/videos/0001/187.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9e04c4c0da0bfaec2d9b53e0bece1ca891d82db3
--- /dev/null
+++ b/assets/videos/0001/187.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:abe341c0974be4eaca57c1e27f3f12948a8b2741b51ecfedee92755a04640479
+size 111588
diff --git a/assets/videos/0001/188.jpg b/assets/videos/0001/188.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a5b7ce31caf053616353d86d8ba48bc030abcb4e
--- /dev/null
+++ b/assets/videos/0001/188.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d5daad861867216d874f6b13cbdcca68a333a6507df1a61ac221409d04e7a610
+size 112513
diff --git a/assets/videos/0001/189.jpg b/assets/videos/0001/189.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..56c993ba5bfa306f8ae2d8cba6b3af9ed88f031c
--- /dev/null
+++ b/assets/videos/0001/189.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2a0b47ade9b650106c5281c128092bc8ee8e54846e1afd8093ca3614563958b8
+size 112673
diff --git a/assets/videos/0001/19.jpg b/assets/videos/0001/19.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..83fb1164c0f4b5f3912c3cfd965a96bcbc8494a9
--- /dev/null
+++ b/assets/videos/0001/19.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:460251884995872cf7dd721be6ff38403ab3f0300732a3559c0c196d6d2a0f03
+size 134470
diff --git a/assets/videos/0001/190.jpg b/assets/videos/0001/190.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3f555a195758dfca519ffb5105b3ca1efecd34c4
--- /dev/null
+++ b/assets/videos/0001/190.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de52c83ee7a2cf303314ce2939cfed89d999538f6cf061d997c8dbc235715cba
+size 115240
diff --git a/assets/videos/0001/191.jpg b/assets/videos/0001/191.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..78fd4ff969a54c1e4e0ad9b9021388b3f3b145d3
--- /dev/null
+++ b/assets/videos/0001/191.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e169b512078475a6b924207a8ec032074f94ed17ef7e34a88b1dd25373024fd1
+size 115866
diff --git a/assets/videos/0001/192.jpg b/assets/videos/0001/192.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..eb95c980949c89e22daad9518c7ea5d5c699344e
--- /dev/null
+++ b/assets/videos/0001/192.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b7032a240f4ae24ccf627d9f4920d429e26e9bca9a387d645417433d1f2f4c6b
+size 115428
diff --git a/assets/videos/0001/193.jpg b/assets/videos/0001/193.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..33383606d96bdbee504dd25aa931734729be266f
--- /dev/null
+++ b/assets/videos/0001/193.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:21da47d1677b9ee1c8f5f0587f6d12bd73cec98face645a4a7622c52f1dc0a45
+size 113806
diff --git a/assets/videos/0001/194.jpg b/assets/videos/0001/194.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..de64639982c501c55913570fed1b9d1957e427d4
--- /dev/null
+++ b/assets/videos/0001/194.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:48fc8a8ac63503ea5e260bc09bf38f9927d3e36f3615998d24a8bba9bca6ecbc
+size 113688
diff --git a/assets/videos/0001/195.jpg b/assets/videos/0001/195.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..39135c2ca578c27c8355dbec8cc18fcfbb78916a
--- /dev/null
+++ b/assets/videos/0001/195.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0018364c50db8d300fc9a718249accc7bdaab8011608f1c82dd0d2fc1f4e739
+size 111104
diff --git a/assets/videos/0001/196.jpg b/assets/videos/0001/196.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a4e9cad83ea30bdfd34c0482d1ad4a39e29dd727
--- /dev/null
+++ b/assets/videos/0001/196.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:419ab91ae107fa9ed6b240f5a08e322f1ff10f27a3b25623fa201cdcaf30ea32
+size 111082
diff --git a/assets/videos/0001/197.jpg b/assets/videos/0001/197.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..02b62feeb83e303cee7dcf2b80a4893ec5e8906a
--- /dev/null
+++ b/assets/videos/0001/197.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3b2a18d8728a6d8dee23625a758866269cd14cb051f11f9c36d13b41e8ddaad
+size 110636
diff --git a/assets/videos/0001/198.jpg b/assets/videos/0001/198.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5ad7b5c7459e7113987cb6af4036324d337807ea
--- /dev/null
+++ b/assets/videos/0001/198.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af87662bbe50e77769925079c5514495588887c165f05b943f9e1504cde96f47
+size 110228
diff --git a/assets/videos/0001/199.jpg b/assets/videos/0001/199.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d29e2c9010c9cf70cdabc241999a405aca4afad8
--- /dev/null
+++ b/assets/videos/0001/199.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbf0a11d3662bf03fce20aa23051ebe0bd84db4b03f217488662ccdb2b45ccee
+size 107637
diff --git a/assets/videos/0001/2.jpg b/assets/videos/0001/2.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3009e9aa637c0c5facc06733bfa35dd54a8a7153
--- /dev/null
+++ b/assets/videos/0001/2.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2eb03caf8f90e06dca30427b43e5e9d8480f4fcaa742c025118e61268dde52b7
+size 141732
diff --git a/assets/videos/0001/20.jpg b/assets/videos/0001/20.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9ca1d839aaf6089b7b7f16abca88833bf9976f29
--- /dev/null
+++ b/assets/videos/0001/20.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67e955a26ef2961ab1a60dccf7fe3228ca5b7bd250c58cf071646c816376de30
+size 131756
diff --git a/assets/videos/0001/200.jpg b/assets/videos/0001/200.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..34d0fcff615ff1863f7d436658ea4f721135ad1c
--- /dev/null
+++ b/assets/videos/0001/200.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08f828db91c0b2051de4ee37fbc2168dff4dc79b9366bdf27ba44f8ec8c775e1
+size 109912
diff --git a/assets/videos/0001/201.jpg b/assets/videos/0001/201.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8c18b636cfde4cd61d0a230b8fca1f59ad293405
--- /dev/null
+++ b/assets/videos/0001/201.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81bd316458418203c5a98ca76906d1cab1170113c1ee0a010cd850cef851322e
+size 111229
diff --git a/assets/videos/0001/202.jpg b/assets/videos/0001/202.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6c2be9e4e25b14540c288148ffe9d22bb40253d9
--- /dev/null
+++ b/assets/videos/0001/202.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a5dc84aa0d22ce5e959492c32cb250742d3138bb01b5ce16b5d4af45c6a318b
+size 110185
diff --git a/assets/videos/0001/203.jpg b/assets/videos/0001/203.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1774ef13f46ea6a470b15e062189c5e35353dd07
--- /dev/null
+++ b/assets/videos/0001/203.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd7acac6b561e3ab3dcc184876a8d3eefd61b2d57061b357ee68506da009aea6
+size 109905
diff --git a/assets/videos/0001/204.jpg b/assets/videos/0001/204.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..75290a0c663da9a6de806274342a4a4c6b55cdf1
--- /dev/null
+++ b/assets/videos/0001/204.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8d1e49eb87e625ec7e43f1529bb4ff91381fb8942c1ad8bca0e15b030b521ec2
+size 111308
diff --git a/assets/videos/0001/205.jpg b/assets/videos/0001/205.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2ad08475ac437a41ed74f605ab4edd5a950da207
--- /dev/null
+++ b/assets/videos/0001/205.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02398e4e2e038ecfef2f5baaaf788405db6c64ebf3dfaa241ad8987e7320a75c
+size 112663
diff --git a/assets/videos/0001/206.jpg b/assets/videos/0001/206.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e91f08c66e388bdd2901f81b4b354d08cabafd6d
--- /dev/null
+++ b/assets/videos/0001/206.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7dbaefbe749ee1fd08b064e541efa91c73498b198acc8feec48f156b69e0a853
+size 112831
diff --git a/assets/videos/0001/207.jpg b/assets/videos/0001/207.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9e339e96ef33dbd0b05547aeb5b1148f397ee7a8
--- /dev/null
+++ b/assets/videos/0001/207.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39414502dd245b37e3a8c88df0474248396ae8036b23550866cbf120fb624c58
+size 113676
diff --git a/assets/videos/0001/208.jpg b/assets/videos/0001/208.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ad1807c4c26a723435c67abf72c028d9d6b69108
--- /dev/null
+++ b/assets/videos/0001/208.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a3c230e288330b6f3e38454015536536f63b5dfb7d188f8b215f8a96f9a137b
+size 115141
diff --git a/assets/videos/0001/209.jpg b/assets/videos/0001/209.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4dd51c153e15ba1d5b6726165c14b7c7ef0bd374
--- /dev/null
+++ b/assets/videos/0001/209.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbcd373af8752aa90df07cb8ab1a46e4679f140768cf50f4dc143cbad26a2072
+size 114684
diff --git a/assets/videos/0001/21.jpg b/assets/videos/0001/21.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d11e9ca6752e80135dd53c2550c73b5e39a9753c
--- /dev/null
+++ b/assets/videos/0001/21.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd4dc997157eb7802d35a169922ff3f7b605edf63182803397996cbe95c4ff0a
+size 133380
diff --git a/assets/videos/0001/210.jpg b/assets/videos/0001/210.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..709b40fc453e50ac260428e1d92534c727fc465d
--- /dev/null
+++ b/assets/videos/0001/210.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:78cbc0cac4523223ae522464ff81a84235434d842f1674c1c7c21626730fac4e
+size 115338
diff --git a/assets/videos/0001/211.jpg b/assets/videos/0001/211.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..95937948f95e5089b1dc22669e3207b5f4072e69
--- /dev/null
+++ b/assets/videos/0001/211.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f75855f988126cc73dd3c6ba61f61fc378b0e2058ec14c37c2892de235d2251
+size 116102
diff --git a/assets/videos/0001/212.jpg b/assets/videos/0001/212.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3b495100f26c8833cd1553863bbf6e47966a520f
--- /dev/null
+++ b/assets/videos/0001/212.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:912c0cc534812d79675d184619184e17c77233ec836d05de6c7e436c4b6b9ad3
+size 118306
diff --git a/assets/videos/0001/213.jpg b/assets/videos/0001/213.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..58a62d4e706fcc88015539a254773ae828624e5f
--- /dev/null
+++ b/assets/videos/0001/213.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:800c421a9594c68fb44bcc76d590a3e2c8599105bcd049f22bdade264bd12404
+size 118180
diff --git a/assets/videos/0001/214.jpg b/assets/videos/0001/214.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..81753c09528fddf0ef266a60537fe289118f2b2b
--- /dev/null
+++ b/assets/videos/0001/214.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75476c87a942cc29363989d2a81d404f9ca37eecce6b150f09cef1871e8ff0fc
+size 119534
diff --git a/assets/videos/0001/215.jpg b/assets/videos/0001/215.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..93e6c9afcbe584b9a1c4a654a960677cce5337cb
--- /dev/null
+++ b/assets/videos/0001/215.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cbce84ce858d5eef84d53c2d003c20f7e6b08756266fa9fa1f11009f1dc0cc5
+size 118615
diff --git a/assets/videos/0001/216.jpg b/assets/videos/0001/216.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bfadd21dc17ac5ced9154139d7808f7767f2b3a4
--- /dev/null
+++ b/assets/videos/0001/216.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8a014a8958e2f9faf8bdb26bf8e9889ff626fb091936dcdf2fc2fcf57336798
+size 118744
diff --git a/assets/videos/0001/217.jpg b/assets/videos/0001/217.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6820874cc6c490f890f89befbd9b05ae6e65bf03
--- /dev/null
+++ b/assets/videos/0001/217.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:085f330328b5caa9ea0fb8b87369e7facbe88c8fe97a17e6ecc3884d8a918fde
+size 117355
diff --git a/assets/videos/0001/218.jpg b/assets/videos/0001/218.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3c586dc220ab27a21727577c1b7a479b21517661
--- /dev/null
+++ b/assets/videos/0001/218.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c22e75342f3690f0a70d51bfd3d995465fb8ffce928bc680446a83ebd58ba98c
+size 118636
diff --git a/assets/videos/0001/219.jpg b/assets/videos/0001/219.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dd35a0e8d6dbb4e7b55271c2daa84338f7814bac
--- /dev/null
+++ b/assets/videos/0001/219.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2f4c7c7d18b99fe212f8be155a156f5d1678df217d6f5c6625c3497181026ebd
+size 118097
diff --git a/assets/videos/0001/22.jpg b/assets/videos/0001/22.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f0dfe95a9022f657a04ce99644b6a1d7b060797e
--- /dev/null
+++ b/assets/videos/0001/22.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f5b129db73e9ce6340765c194e4b9a132299e8a876464658ad1fc28238b6e0f9
+size 129552
diff --git a/assets/videos/0001/220.jpg b/assets/videos/0001/220.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6354a37336ea3a5dbef5a20b4aa8fce0354f30dd
--- /dev/null
+++ b/assets/videos/0001/220.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:54d690f98b8b0ab83e2939543ae9cc32e76a0aac12e3111e970ee65b86aeb3a5
+size 116820
diff --git a/assets/videos/0001/221.jpg b/assets/videos/0001/221.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7253e02e826897648f7ca1118ddfd7e38e155cf3
--- /dev/null
+++ b/assets/videos/0001/221.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9a12929c509fc69fdc6e42060d68f83311025b78b99ad23b00ffb94e4c38bd9
+size 116065
diff --git a/assets/videos/0001/222.jpg b/assets/videos/0001/222.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6418b32440c86f0a708336c6ed414aa5c61213f9
--- /dev/null
+++ b/assets/videos/0001/222.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1cc72d924e867a9c2015d72839f905a79d58d78cc01f83c80fd0d734b4aa2a61
+size 112955
diff --git a/assets/videos/0001/223.jpg b/assets/videos/0001/223.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d18b5993a90a07b66b01316a8e2df86371afb315
--- /dev/null
+++ b/assets/videos/0001/223.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a2ddbbc1074d7648a5dfe79d31b2d0060c2643b372f8a3252be26d444aae5e6
+size 114240
diff --git a/assets/videos/0001/224.jpg b/assets/videos/0001/224.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c77db01cd00236b28d2a58852c37e14465e8908d
--- /dev/null
+++ b/assets/videos/0001/224.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a6a555d690b4431124b0fbf07b9b69f67a2e84d8c23d9b1d39b7a7294c8fc62
+size 113348
diff --git a/assets/videos/0001/225.jpg b/assets/videos/0001/225.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4e16469b2bc2c99d59a497e9121ccab06c1ba7f0
--- /dev/null
+++ b/assets/videos/0001/225.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e9d73d444195ac98fd3cd5da5aaa3bb8f475a600d68ae043f3b39c2c2c1720d
+size 114280
diff --git a/assets/videos/0001/226.jpg b/assets/videos/0001/226.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..391769826d0316e0bb6116543f835a60afaeeb90
--- /dev/null
+++ b/assets/videos/0001/226.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b50b4acb94d4766cb9b56f3992833212d89c7e20044c55cdd6fef724fe8aaa18
+size 113622
diff --git a/assets/videos/0001/227.jpg b/assets/videos/0001/227.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0d04d667ea82ff8abacdb550f326c1025b6e9d11
--- /dev/null
+++ b/assets/videos/0001/227.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e63ca99791fe42489d1143f958b407f32fe8dcd9d81dfbe509735bb887278d2
+size 114390
diff --git a/assets/videos/0001/228.jpg b/assets/videos/0001/228.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1821980e2143eb61e118f4e2bced35ee905af6dc
--- /dev/null
+++ b/assets/videos/0001/228.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4e25d27e6f4176140910d65c994819325f9b438522dfab0fb6cc0504d6f5b83
+size 113074
diff --git a/assets/videos/0001/229.jpg b/assets/videos/0001/229.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..57c5e549c5e54b1c9cdc29a4cc74ec61d3812457
--- /dev/null
+++ b/assets/videos/0001/229.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01d8465328104c042ed601a955124a673f5c0a6623a38628606e3d63ce75b9cb
+size 114228
diff --git a/assets/videos/0001/23.jpg b/assets/videos/0001/23.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..30271c07633519934dada527df642a010927ef4d
--- /dev/null
+++ b/assets/videos/0001/23.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2bc1e690b06ac748e948bf23fcfdb40389b7fe8063b2dbab54f4f5d32275bcb3
+size 130061
diff --git a/assets/videos/0001/230.jpg b/assets/videos/0001/230.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ac81b412dfea48c61216e1f523c71cbca1278919
--- /dev/null
+++ b/assets/videos/0001/230.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cd757d2c175df674c36e0d03718663098d25653349fc0f9cf8c78a91ce4ef37
+size 114100
diff --git a/assets/videos/0001/231.jpg b/assets/videos/0001/231.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c4b0f1998c4a2210f7b25e615545a7e69ead924d
--- /dev/null
+++ b/assets/videos/0001/231.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9ef6dc122baeaf2e3b20873a5e8fb75dddb1a252a127e869402c68fa46720ca3
+size 115717
diff --git a/assets/videos/0001/232.jpg b/assets/videos/0001/232.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b95787264a5d63f95d54a8d78c20df283e3c29c0
--- /dev/null
+++ b/assets/videos/0001/232.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01ae73883f6382a5a568398944bda1a6d7afb23caa78bace94b52c3f7230224d
+size 115066
diff --git a/assets/videos/0001/233.jpg b/assets/videos/0001/233.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b3480af9c09ce0c2e22e268315ce812b26f27172
--- /dev/null
+++ b/assets/videos/0001/233.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6f4ee7ed8935b885e5bd27584967edf78b6db5ce76869d552f1768b268718ae5
+size 115876
diff --git a/assets/videos/0001/234.jpg b/assets/videos/0001/234.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..bcfa8e203fa596e93ba3d1d2b14372d2b9b7cd62
--- /dev/null
+++ b/assets/videos/0001/234.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6eeef5948e1df5613c6c9111737900efecda5eb9eb5f025a06069ba1afd0b986
+size 114662
diff --git a/assets/videos/0001/235.jpg b/assets/videos/0001/235.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8df6ece8c0fd0e774515cbacb9708dad55c8bfd0
--- /dev/null
+++ b/assets/videos/0001/235.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f0f6d4bf0841b85ab0e0e5859dc3267644e9c387d94e2b1725b5d20ad678258
+size 116735
diff --git a/assets/videos/0001/236.jpg b/assets/videos/0001/236.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..28800fe76376d67df3186c86453cafafad7692ee
--- /dev/null
+++ b/assets/videos/0001/236.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d72b00f4c78300837a833a8176bcf037707117a6a0dcda6404a5c8c8938da74a
+size 116941
diff --git a/assets/videos/0001/237.jpg b/assets/videos/0001/237.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..893f37df1b8fced346b200a8bee5d20980640ac1
--- /dev/null
+++ b/assets/videos/0001/237.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a821ac53891ffa863ac26139cb1d703eb43627bccb950cc16e6b90013e1fde84
+size 118439
diff --git a/assets/videos/0001/238.jpg b/assets/videos/0001/238.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..26a92779ec5b503077a61ec9f6d53c6899a20f00
--- /dev/null
+++ b/assets/videos/0001/238.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df44c2674bd5f4b8426414c57bdf9afac4c9629ac782b3981132004b161fe209
+size 118906
diff --git a/assets/videos/0001/239.jpg b/assets/videos/0001/239.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c2d4feacd5d55a37e3b0ef57c418dc5bb1441ceb
--- /dev/null
+++ b/assets/videos/0001/239.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a92508b89b0710bf6074c87788f44e7cc12c3caeb23a26ad72739c0921a72a87
+size 120955
diff --git a/assets/videos/0001/24.jpg b/assets/videos/0001/24.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ed2517b003b7c4ec1896148bea328436c6e052c6
--- /dev/null
+++ b/assets/videos/0001/24.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bc3132ab0c04f8186467a2a0361cdbb59ba500ad88e44431e88a97b34149cc40
+size 127522
diff --git a/assets/videos/0001/240.jpg b/assets/videos/0001/240.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cb2f182bffbdaac9968dd7b471785cab4c4711dc
--- /dev/null
+++ b/assets/videos/0001/240.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ce1600d02ea05234fd353fe7aadf58afd8872bcabc583f26870abea94197ea24
+size 119561
diff --git a/assets/videos/0001/241.jpg b/assets/videos/0001/241.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1f223ac969c582c76a79fefe6ee13b86a3b7c57d
--- /dev/null
+++ b/assets/videos/0001/241.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9a681ece70310bd6cf352919846889cc1b50f8dda073b72803fb2bedba21311
+size 119000
diff --git a/assets/videos/0001/242.jpg b/assets/videos/0001/242.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e20df64550fad86fc6cab8abd80690736a14bdd5
--- /dev/null
+++ b/assets/videos/0001/242.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e8774407d6e13ae52e93b1bb1e80aa8aff208563e425954aa6aee6eb1d6b0f95
+size 119105
diff --git a/assets/videos/0001/243.jpg b/assets/videos/0001/243.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5d081987be628e9193b3fd6d9e6306c0ce8315a3
--- /dev/null
+++ b/assets/videos/0001/243.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d55b46dea737dd747691106a67a08d4a3162a8e98a6dcf8a515807a7deee24a6
+size 121182
diff --git a/assets/videos/0001/244.jpg b/assets/videos/0001/244.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c91eb9d2fad24dc3f2bd12ad45ba0bd1eb8b3c06
--- /dev/null
+++ b/assets/videos/0001/244.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed5021a02c435825f5aac34b3773b20dd1c2d374f4f9463bdb8c2cb01b53e8d8
+size 120874
diff --git a/assets/videos/0001/245.jpg b/assets/videos/0001/245.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2081030fb9e233579fccabcfddb803ead8d16c7b
--- /dev/null
+++ b/assets/videos/0001/245.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1027fb0a269222991752584beb0bdade4fd585de10cb9f02abd431b0a87815ff
+size 121641
diff --git a/assets/videos/0001/246.jpg b/assets/videos/0001/246.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..093d43c739b9a11767453757fb2f5ab1925b0bbf
--- /dev/null
+++ b/assets/videos/0001/246.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9502e0052549cae5bf6de505ecb9dd7788ee9bbdb58d8878c9f00a1d846f2ac0
+size 122006
diff --git a/assets/videos/0001/247.jpg b/assets/videos/0001/247.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3cad13dff29e21d4b60da6627f661765a3e77dbc
--- /dev/null
+++ b/assets/videos/0001/247.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4fe09bb2f172f6e5bff0b1c367c1319865f604556a6d4c878490589e87e40b5
+size 123641
diff --git a/assets/videos/0001/248.jpg b/assets/videos/0001/248.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..724e8d8070313789f52a61621f1c505e32a8789d
--- /dev/null
+++ b/assets/videos/0001/248.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0946e25b9c5c16ee6b5158edc61b57bbbd7a551e88e52116fb85699be206edd7
+size 123086
diff --git a/assets/videos/0001/249.jpg b/assets/videos/0001/249.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e6f2f613ad52debe008fccee68539ed7a54816fa
--- /dev/null
+++ b/assets/videos/0001/249.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e6c1dc8de6a1e5280f148d522f02472511eebc05bed595ebb3c24ad9631edbe2
+size 123537
diff --git a/assets/videos/0001/25.jpg b/assets/videos/0001/25.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c987d0e41cb1041c596a782edc858d405f6e88cf
--- /dev/null
+++ b/assets/videos/0001/25.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1aeff052a2d1dbfba80b9e9b29347655f125cb996cfc08dbf4ffe448f1a92e70
+size 127477
diff --git a/assets/videos/0001/250.jpg b/assets/videos/0001/250.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ad58d0649ee7f2072c40e83b0799b30114cb3e90
--- /dev/null
+++ b/assets/videos/0001/250.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99a4cc129478cacda62d2b2ef89cdaa814063da1227077976064f4c45b1e928e
+size 128329
diff --git a/assets/videos/0001/251.jpg b/assets/videos/0001/251.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fcdb66c08280472a7d187a210847a310c3337fe2
--- /dev/null
+++ b/assets/videos/0001/251.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f69b8a854e9807e8b4980461cf344dd8c6ffe3592b59e8b37809cd114be2de0e
+size 125682
diff --git a/assets/videos/0001/252.jpg b/assets/videos/0001/252.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1ff1c07b2c5b81ce6ffd5f34e52bea744785316b
--- /dev/null
+++ b/assets/videos/0001/252.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90baeb96b20b6a699d40b7686a766ef94e28c23acd2645869dd78edbbd17a707
+size 126396
diff --git a/assets/videos/0001/253.jpg b/assets/videos/0001/253.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cb5752c314115634689a9994bfbe38f2d57635d6
--- /dev/null
+++ b/assets/videos/0001/253.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4cf9b36319965babb7534969dfad2b80e885cf79fbab07dfc728cda378fdcdc
+size 126236
diff --git a/assets/videos/0001/254.jpg b/assets/videos/0001/254.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..88fe404a888bddb3e323afdfafc6608d46f5d1bb
--- /dev/null
+++ b/assets/videos/0001/254.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d9d6e71aec242bcdee96b8ad7edbd7f5f5fe65e8f9ceb0b16a4e6e5ddba870eb
+size 127341
diff --git a/assets/videos/0001/255.jpg b/assets/videos/0001/255.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9066a75b8830ca17901369ce577396b01a6d0d05
--- /dev/null
+++ b/assets/videos/0001/255.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc7d474a2b13a7126e1dbb7824f53258116c580c49cab460c9ed0b500934e7d0
+size 126594
diff --git a/assets/videos/0001/256.jpg b/assets/videos/0001/256.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..eef96f5e9582f70c5366422dfa85797e2e706add
--- /dev/null
+++ b/assets/videos/0001/256.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:083667d533c8d12afb406ce4d07b755e3f65640aa521c54aeaa4c81fe365e3b4
+size 126045
diff --git a/assets/videos/0001/257.jpg b/assets/videos/0001/257.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..eb481bc32d8abde5388b3a04e30f6b278719a14f
--- /dev/null
+++ b/assets/videos/0001/257.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9f7219925190e1b77782c4bbc0fcbbd8c302fedee7bd2e02a99b12a46df2c87
+size 127822
diff --git a/assets/videos/0001/258.jpg b/assets/videos/0001/258.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fb63595433864efc9d1fee2becfd3a3f228e39b2
--- /dev/null
+++ b/assets/videos/0001/258.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bdc04e7e2854b52439f6ecde5ba7626b1f50e3c17b8f27ae9e4f018ac1c8dfa3
+size 126161
diff --git a/assets/videos/0001/259.jpg b/assets/videos/0001/259.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..025d8793d217c085892cac6e5ebe0b66b3ff6ed7
--- /dev/null
+++ b/assets/videos/0001/259.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e422b57b65e40ae12b930b087676aacfa03ba3e2c81f6e6f5e497b9ee7f553f0
+size 125186
diff --git a/assets/videos/0001/26.jpg b/assets/videos/0001/26.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..51242327d78bfa445537beaffec2749d28146ccd
--- /dev/null
+++ b/assets/videos/0001/26.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:748d4afb622ee1612ba8752d103ba3111a249447f0f50fac1c04463f98f735de
+size 124551
diff --git a/assets/videos/0001/260.jpg b/assets/videos/0001/260.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7e0de1f775172a8a130fcef637134b2632d10d26
--- /dev/null
+++ b/assets/videos/0001/260.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f258a60d818fff3a4c5e0ee0dd90bc9319772136a0a0db69f491e5701f7481d0
+size 122808
diff --git a/assets/videos/0001/261.jpg b/assets/videos/0001/261.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..dc63582d03839aabfdcade2ca55c5afb92a4bc04
--- /dev/null
+++ b/assets/videos/0001/261.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e1e195e867abdf744eaf0d3e8a13678194959f25c1c1ab348abcb81b3572bf48
+size 124068
diff --git a/assets/videos/0001/262.jpg b/assets/videos/0001/262.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f40cc2e50e3e0d8cbbc3171078f00c1eb0f1ba58
--- /dev/null
+++ b/assets/videos/0001/262.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b658765cc74b6e98b56799e064416cd2a718e1194bb70a6f78bfc8abd7a077cb
+size 124104
diff --git a/assets/videos/0001/263.jpg b/assets/videos/0001/263.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1e2fc80428aca7974ccc3fed004a6002fed4ae4a
--- /dev/null
+++ b/assets/videos/0001/263.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc05df5f57e60755540c500f3b23d7e1e287b733bda57ebe80a1f6ee7ed4dc50
+size 126530
diff --git a/assets/videos/0001/264.jpg b/assets/videos/0001/264.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..eb6122806a8ba922a990223fb97f299ce2094eeb
--- /dev/null
+++ b/assets/videos/0001/264.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a88b262c65c4e52ee4ba10feaa5d2930868edda97d30aa630ae00766f32e301e
+size 125481
diff --git a/assets/videos/0001/265.jpg b/assets/videos/0001/265.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a06480c631f369d0010bf51e9e4c6b4356569158
--- /dev/null
+++ b/assets/videos/0001/265.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f13512122a887b47d8c7e74e312e01ba38bb52d2369052957299fa5c6d47d18
+size 125518
diff --git a/assets/videos/0001/266.jpg b/assets/videos/0001/266.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..63bb37e5921c6f0c6c809a11530b6cc593a8ed69
--- /dev/null
+++ b/assets/videos/0001/266.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:990d94f2010cd82607fb3888a0b81e5c9e3fbb14a80995ea1c380fd855c2a98d
+size 123541
diff --git a/assets/videos/0001/267.jpg b/assets/videos/0001/267.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9f962fea6728e3a20bf69f90b0e97868aabb787b
--- /dev/null
+++ b/assets/videos/0001/267.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2ac5b69ebea338bb9b0a72b7ce506c1e4639b5fc2ccbc2fffc20c806d00a3d8e
+size 123916
diff --git a/assets/videos/0001/268.jpg b/assets/videos/0001/268.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a9511eeb969995b4fd589b4433ac9f586f195db4
--- /dev/null
+++ b/assets/videos/0001/268.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:799fa7c84379c908bfded48bce6949b5c30216d9975325e6ff602d0ad1e610d9
+size 123609
diff --git a/assets/videos/0001/269.jpg b/assets/videos/0001/269.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0dce474d29622da2e93d07553008601fbb2fac1a
--- /dev/null
+++ b/assets/videos/0001/269.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9db27e2d87e00bcd9369b9acea06be2d5096781e31563b211c5ea941f249887
+size 123909
diff --git a/assets/videos/0001/27.jpg b/assets/videos/0001/27.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f5b3e3e55409f139483e71a02cb0679b453bf536
--- /dev/null
+++ b/assets/videos/0001/27.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d69c7e1566e35c0e722a284abfe1596888b121b788ec9937e3e360e2644949e0
+size 125380
diff --git a/assets/videos/0001/28.jpg b/assets/videos/0001/28.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..819b921a9730f6d28e26839a6a1e121af63b6747
--- /dev/null
+++ b/assets/videos/0001/28.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec6e4b16202a7377bae51e5de326ced54c3ff062f8cff5e47efffe6f1f847285
+size 124457
diff --git a/assets/videos/0001/29.jpg b/assets/videos/0001/29.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..46d2662c21bb45fad5ab4c5c66fd938ff98e77e7
--- /dev/null
+++ b/assets/videos/0001/29.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:87c6b9e80dbc5e64d516673e490d9005adfa890bb6291f02d8f09181cde7ad06
+size 125253
diff --git a/assets/videos/0001/3.jpg b/assets/videos/0001/3.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9a2cd5c7f1abe69155abc5d6c8c2dd2dd4a3c95b
--- /dev/null
+++ b/assets/videos/0001/3.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:032544f27d6a9c3181822102d72125582801a181da7e75bec95e0384a735d8db
+size 139801
diff --git a/assets/videos/0001/30.jpg b/assets/videos/0001/30.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..40b14c2b3bbf3e5d5898599ee2cd4ed91aa15370
--- /dev/null
+++ b/assets/videos/0001/30.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:186cb03739b193e78a036a49b0ce1da178b1b0eae0f9051eb0b5f5c2a439d2d6
+size 126275
diff --git a/assets/videos/0001/31.jpg b/assets/videos/0001/31.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e9ccf430fa4a36bb68b0abf012bc5234b9be0768
--- /dev/null
+++ b/assets/videos/0001/31.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f259cf6aaf817b3570d30c9055e7ae235dd82f63538a6b543046dd99a47ab309
+size 127245
diff --git a/assets/videos/0001/32.jpg b/assets/videos/0001/32.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7dd8b6a0dc1641db1bc17e9085736f11587fdb86
--- /dev/null
+++ b/assets/videos/0001/32.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc88aeb091146b9a538c71fb2e39f0d32c8ef67d7e3d7dc3902163f352976b3c
+size 128126
diff --git a/assets/videos/0001/33.jpg b/assets/videos/0001/33.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8da7b3cc796adfefd002f0df596bafaae3708890
--- /dev/null
+++ b/assets/videos/0001/33.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b5abd0a91d438cc65629782fffbe048076f78f96d70d7aea6bf65f19a966b50
+size 127397
diff --git a/assets/videos/0001/34.jpg b/assets/videos/0001/34.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c6e5cd4804b2e1ad2246ebb14920764dac1b3ddf
--- /dev/null
+++ b/assets/videos/0001/34.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90d84020483550058dd7d196ce71f6cb549646917bdf9d05f30492727ada0546
+size 126980
diff --git a/assets/videos/0001/35.jpg b/assets/videos/0001/35.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d541106d99ef4bf5829352d420cec906fc08e15f
--- /dev/null
+++ b/assets/videos/0001/35.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da980c3f91b6763f67087b515c8c70286899b097a4249fce6a651975a0d5bdbc
+size 126933
diff --git a/assets/videos/0001/36.jpg b/assets/videos/0001/36.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6e5ef9cda16ae51dce5b53ff0346039f251d2cff
--- /dev/null
+++ b/assets/videos/0001/36.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5234bfed011807de2c829668b3424d2827e2f0924ab2ce077df3afd2488dcb6b
+size 124335
diff --git a/assets/videos/0001/37.jpg b/assets/videos/0001/37.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fa03fcb6de0e051048ee876f2dce201c559b13c3
--- /dev/null
+++ b/assets/videos/0001/37.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:da3fdfc955048b9fa4b8af8215a187d5d12d0aab774556264a79e3f1dedc0139
+size 125305
diff --git a/assets/videos/0001/38.jpg b/assets/videos/0001/38.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b439d2140d8221548c3bc40ea7de8d1741c9a264
--- /dev/null
+++ b/assets/videos/0001/38.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:633bbc4f16f9961959e5b19d1675e5777646ee44ab5dc58248346726790667c1
+size 124350
diff --git a/assets/videos/0001/39.jpg b/assets/videos/0001/39.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..946eb2d75236bad7aaf28f3e25d597893ad3bebd
--- /dev/null
+++ b/assets/videos/0001/39.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cb6f15490b490fdf08f6c556bc5ae410a30a01bc30c76a216b813b87a5969dad
+size 125424
diff --git a/assets/videos/0001/4.jpg b/assets/videos/0001/4.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0ea803e47ad974cb15063350243fcd9add58ce59
--- /dev/null
+++ b/assets/videos/0001/4.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:435b9652924519e76af6a0625613f9c7a92db55b7387958f510963f5451ee0d0
+size 142809
diff --git a/assets/videos/0001/40.jpg b/assets/videos/0001/40.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..466f5d245250eb25ed4dedb9776bed5ba0b22bce
--- /dev/null
+++ b/assets/videos/0001/40.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c88dff0df91709aa54c324633f3c788a2a5288c51a41a4288f964e2adaafce58
+size 123992
diff --git a/assets/videos/0001/41.jpg b/assets/videos/0001/41.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b62981868d3e1324402be43bf7b6d856c3b38354
--- /dev/null
+++ b/assets/videos/0001/41.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2a52bcd97b393ddff7114712c45b925df54fc6e35683fe18b3f70b8ea0fcfdc
+size 125065
diff --git a/assets/videos/0001/42.jpg b/assets/videos/0001/42.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..627fd6372d89b7ed106cb2a2e1a5d48fba186070
--- /dev/null
+++ b/assets/videos/0001/42.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6a91a0b3c136cb504cf866869f23c109b9265a0aff012d4034602733243c2083
+size 125116
diff --git a/assets/videos/0001/43.jpg b/assets/videos/0001/43.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c4cc70cee88d6af1630e6e5b5343529186dfca3b
--- /dev/null
+++ b/assets/videos/0001/43.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:81d6b3e57243fdbedb5ee9c54c1067a331df3bb7e3e9d492ffacd72bdbf909bd
+size 125902
diff --git a/assets/videos/0001/44.jpg b/assets/videos/0001/44.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ad2b680ed71d85a796290e514c7cd2b30e1b89be
--- /dev/null
+++ b/assets/videos/0001/44.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:32eddd4e307f27ff864c88e2c9ec5631693acf5cc99c97c21bc057ffe389a232
+size 124980
diff --git a/assets/videos/0001/45.jpg b/assets/videos/0001/45.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..89038ce039b9cd9c0b23dd91c6f625c383c7d3ae
--- /dev/null
+++ b/assets/videos/0001/45.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8513ec3c207ec852c43f7dbf45d54f8ac696504cf87cb56f988f7f6f55cf4dc1
+size 124449
diff --git a/assets/videos/0001/46.jpg b/assets/videos/0001/46.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7381d1f1c82729c7aabb2838baa2aff7585f2b30
--- /dev/null
+++ b/assets/videos/0001/46.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bef770e2352f90b574ebb6c858dc9ecc646070ed5e95f894d7c78b9a6258268b
+size 124041
diff --git a/assets/videos/0001/47.jpg b/assets/videos/0001/47.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b9d1be9df050b13ce23a5d8d5ca5462a4ed4fd9c
--- /dev/null
+++ b/assets/videos/0001/47.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:264c24244177d47fd6c462bf0dedd308c7747a563fe42d5bcc7295fe7f681685
+size 125093
diff --git a/assets/videos/0001/48.jpg b/assets/videos/0001/48.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..847c32a05217b79c047297fcf6d3fa381b51d756
--- /dev/null
+++ b/assets/videos/0001/48.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d508e4758044aab366f12efdbdb3a376ee150415e73d55882a919d7981176f7e
+size 123358
diff --git a/assets/videos/0001/49.jpg b/assets/videos/0001/49.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..68e9e2b1eed9fc27ed5c5988f528f16b1060fb2f
--- /dev/null
+++ b/assets/videos/0001/49.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a85ee0bd9eb33b0a19d8b4ed40dfab88a1247f595f12ebd1a518b1367df22fa4
+size 124614
diff --git a/assets/videos/0001/5.jpg b/assets/videos/0001/5.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f4dd6c8a87d8cf9137bf78c62037165bc8654b8c
--- /dev/null
+++ b/assets/videos/0001/5.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b4b2c6feea5f318e2865a7e08adbea101d59f53bcdb5187e6df1d73273ba4d5b
+size 139860
diff --git a/assets/videos/0001/50.jpg b/assets/videos/0001/50.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..f6e6968cb9e3921a7620b5db75c1a5860b8038d7
--- /dev/null
+++ b/assets/videos/0001/50.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f47e6cc3f4f6f32d8dda03bad9af7b52db966b74f9f7f8403780a1e6f4c63b8
+size 124420
diff --git a/assets/videos/0001/51.jpg b/assets/videos/0001/51.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3e6f78ee10b661e078ab3fe94bf908c2a69566e9
--- /dev/null
+++ b/assets/videos/0001/51.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b70e62f9a528644a73ca9e5601d6d286f17389da7ab4bcc8429e9837aa28e0e7
+size 124744
diff --git a/assets/videos/0001/52.jpg b/assets/videos/0001/52.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..c7c59125027eeee0f8d3a489d0ad22440c2cbacc
--- /dev/null
+++ b/assets/videos/0001/52.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ab23ffc8345ab2f08089b171fc6be62bae8d3602a24892dc505b103acb30b0db
+size 122806
diff --git a/assets/videos/0001/53.jpg b/assets/videos/0001/53.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..de284d441b65b49e18a3dfc40570b3d5b689a3d8
--- /dev/null
+++ b/assets/videos/0001/53.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:830de3d62be80661065dac7cad6e17be1c70819ddaf020b768d7e64cab14eb0a
+size 124103
diff --git a/assets/videos/0001/54.jpg b/assets/videos/0001/54.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6c1ef56ca81dfed9b2f126388fa20d13c832b818
--- /dev/null
+++ b/assets/videos/0001/54.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:10a95964278e5c764551631a1964a1260d5cdea6a072d1207e30e5d0f159489a
+size 123796
diff --git a/assets/videos/0001/55.jpg b/assets/videos/0001/55.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..02f24253d368cf9903b32864dfa4de8b2b1b9dc2
--- /dev/null
+++ b/assets/videos/0001/55.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:197cf504b2b1099772c22352b563d1c5d650d82fc379f6d22efbdd1d019d7577
+size 124214
diff --git a/assets/videos/0001/56.jpg b/assets/videos/0001/56.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..455ebef61ad12763156fa363de795b477a3770ad
--- /dev/null
+++ b/assets/videos/0001/56.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:efbd7e1f48194f667a0542f64b496d3b942325047dadd131d7cf618c2d8684b1
+size 124626
diff --git a/assets/videos/0001/57.jpg b/assets/videos/0001/57.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..92136b86ab2812f3ac571a35249b34eb3707f0c1
--- /dev/null
+++ b/assets/videos/0001/57.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9e551187b805107b02612ba9197e4d1811e9433fb73ed8e8881c5e006c63959
+size 124236
diff --git a/assets/videos/0001/58.jpg b/assets/videos/0001/58.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..863894958c900a742101e36927d5df1b1e46afcb
--- /dev/null
+++ b/assets/videos/0001/58.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84b9cc3a694252cb7449806703854d6839c12fea0ba924aafd2d681bac7563f9
+size 126238
diff --git a/assets/videos/0001/59.jpg b/assets/videos/0001/59.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..035372984750fdb86cde20cbdf6e45097c6f45e9
--- /dev/null
+++ b/assets/videos/0001/59.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b65c17574a2b1d2d78af952bf3d7aa24b67fd7a2f676d941504a43aef56850c
+size 124453
diff --git a/assets/videos/0001/6.jpg b/assets/videos/0001/6.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ad55091435424c708f91529ed27386cdd6cfc5fe
--- /dev/null
+++ b/assets/videos/0001/6.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e1fe024222c18d8701c61080b782736fea7240cdad32db8d7ea0d588958c3e0
+size 140116
diff --git a/assets/videos/0001/60.jpg b/assets/videos/0001/60.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9d2e68259e515576d1404532a110a553c84679b7
--- /dev/null
+++ b/assets/videos/0001/60.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6aa943e4b9e5466222487d69b8cacda721dfe5716fa72854e0251af62ceff991
+size 123964
diff --git a/assets/videos/0001/61.jpg b/assets/videos/0001/61.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d5e1ba214a1a703e930bf6443e3454bf32573317
--- /dev/null
+++ b/assets/videos/0001/61.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e206c302a054e3b4b6f51a3f8d3b32e5f0df7997fa2451298d8d8ec18e7d8563
+size 122163
diff --git a/assets/videos/0001/62.jpg b/assets/videos/0001/62.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1b19d75c6108a9cbd1f7427ab3893476b708e3a2
--- /dev/null
+++ b/assets/videos/0001/62.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2004a1aacfa91285e7504e92b6d28341cecc31b9bea9b69c74cb1eacde138396
+size 125603
diff --git a/assets/videos/0001/63.jpg b/assets/videos/0001/63.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..824deeb2897c83edd16195e8cc17fd5cbbcc740a
--- /dev/null
+++ b/assets/videos/0001/63.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3a474dec78a72fc3c4ccda4265b3c998b5c869ce3a7fe5eb712c028361d07d3d
+size 123772
diff --git a/assets/videos/0001/64.jpg b/assets/videos/0001/64.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..36c03e9649c64dd900c8f80c38a2f1310996ad88
--- /dev/null
+++ b/assets/videos/0001/64.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:709a8e47789ac0f5148b9e6d422577224085747c96156ad4f2ac9a722e55e18a
+size 123005
diff --git a/assets/videos/0001/65.jpg b/assets/videos/0001/65.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..98c8048386b0de35a24de9a13b204f2c5574400f
--- /dev/null
+++ b/assets/videos/0001/65.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1aa97bdccd3ec8fcd68941cc45bea6c91a3d12466157d5061c78a65d1eeabbdc
+size 128147
diff --git a/assets/videos/0001/66.jpg b/assets/videos/0001/66.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3c57c7f7b137708ee6eb86701a87100d6846ed9e
--- /dev/null
+++ b/assets/videos/0001/66.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f731cc0719ee8423e389aa061adce8e028a5a7412fe43d2783a7180189932b7c
+size 124353
diff --git a/assets/videos/0001/67.jpg b/assets/videos/0001/67.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a777e29f8394397e210d2a0e091b129e3cd676c8
--- /dev/null
+++ b/assets/videos/0001/67.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d4e465238a61eb819fbada2c83e5fcae37d41ce39bf621ed6d63a326a09ecc3
+size 121755
diff --git a/assets/videos/0001/68.jpg b/assets/videos/0001/68.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..21331be77b28be0112252c5ca0ee6d0db46f2405
--- /dev/null
+++ b/assets/videos/0001/68.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4e9ad14cf471ee21dc3e93dd9ee9c360bc61784f05b166cd940b823556afeee4
+size 123913
diff --git a/assets/videos/0001/69.jpg b/assets/videos/0001/69.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a5bc54b092fee8d2133a3315f5df5d6411a70e42
--- /dev/null
+++ b/assets/videos/0001/69.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a2b3cab9d626e12c1ac66fc87b6dfbc200909e42af8ced61e4668297bc95b4f7
+size 118381
diff --git a/assets/videos/0001/7.jpg b/assets/videos/0001/7.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..fece167ec8a648152c4149f868436c383e67eff5
--- /dev/null
+++ b/assets/videos/0001/7.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0309926234a365a8080d14f51768443cdf882aa639bd7567cfa4377274dc0909
+size 138059
diff --git a/assets/videos/0001/70.jpg b/assets/videos/0001/70.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..44d3d6dfdc4ce0960d80f27584fa3cef881322a2
--- /dev/null
+++ b/assets/videos/0001/70.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b770c20be009c57bb1c2ce6eb7c483a2faa46b51a02bc75cbb47e5a3d88c230f
+size 120780
diff --git a/assets/videos/0001/71.jpg b/assets/videos/0001/71.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e27ea5e04d80e2337e3384545215c2126db88db7
--- /dev/null
+++ b/assets/videos/0001/71.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c6fcc718b2a586f468a429df92733b87d9241b06737a62cbf632867c9c105ea1
+size 120099
diff --git a/assets/videos/0001/72.jpg b/assets/videos/0001/72.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..3ba847d35a195fa9f4b575d7f97b8bd7e6224c70
--- /dev/null
+++ b/assets/videos/0001/72.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9342d15228ab37667a9ea14fadf295100e7c0435119d0822c1a74f9cdb493f5
+size 121437
diff --git a/assets/videos/0001/73.jpg b/assets/videos/0001/73.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..4b156e432eadc04751d4c87be7bffbb6551eee5d
--- /dev/null
+++ b/assets/videos/0001/73.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bbe2af099377cab4022fdcf9529192c368e00620ba742d6d861b11dcdbfa9a33
+size 118698
diff --git a/assets/videos/0001/74.jpg b/assets/videos/0001/74.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e9293a6e90d6939ca2a9b2b9ee09bbfa514f4dba
--- /dev/null
+++ b/assets/videos/0001/74.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e9380390acd2b67040b810f27579ab02f8780e9a6b650349d711feaedb4c8cd0
+size 117838
diff --git a/assets/videos/0001/75.jpg b/assets/videos/0001/75.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..72cccdfc1a1379f6e5c5195d7c1f9faa48898fd4
--- /dev/null
+++ b/assets/videos/0001/75.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80de7e2fbe93042c82bb162257ff2546f5ddbd7ad446ad70ac4bb884387e9b0d
+size 119795
diff --git a/assets/videos/0001/76.jpg b/assets/videos/0001/76.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b06aa1bca5fce5b3312ea18ba0ed56e33afe4b16
--- /dev/null
+++ b/assets/videos/0001/76.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f6cbfec9a753f2d42154ab5ab7ea0d9a2e04d2b4aa7e041d75fe308ad9f21fb7
+size 115392
diff --git a/assets/videos/0001/77.jpg b/assets/videos/0001/77.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0724eb14234183f99cb9c447af261f13919278d2
--- /dev/null
+++ b/assets/videos/0001/77.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1dc65f5fb14965a5d3fe918974555e96abdc93acc91662e50366fe0df87b9bb9
+size 117661
diff --git a/assets/videos/0001/78.jpg b/assets/videos/0001/78.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6870223eaac629869e8a13095010bb9aef940b63
--- /dev/null
+++ b/assets/videos/0001/78.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8be8995f1fe312173403905726c799670b5eff72d4392d94656fabe14aa40558
+size 116678
diff --git a/assets/videos/0001/79.jpg b/assets/videos/0001/79.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0a19fadf1ba6f2957c5cfb04a4eee4a0167672ee
--- /dev/null
+++ b/assets/videos/0001/79.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c0a7c8a5c57b98c289d0680b9f68ad6bfef96b79d650a88dc176457123845bc
+size 118967
diff --git a/assets/videos/0001/8.jpg b/assets/videos/0001/8.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..93e491787d72f2b7a3e3e76244e712dc3c715e83
--- /dev/null
+++ b/assets/videos/0001/8.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d0931f2c3a85bea759cf9149304f9c70edfa4cba359d963af2a99fafc7a882cd
+size 140469
diff --git a/assets/videos/0001/80.jpg b/assets/videos/0001/80.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d075b189f5079ec994bb198d14937826fc3de254
--- /dev/null
+++ b/assets/videos/0001/80.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:58632b5a771c7e86edb9083dd5be2003c9094fe2f04df108e0cb7359f248a98e
+size 116146
diff --git a/assets/videos/0001/81.jpg b/assets/videos/0001/81.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..871b3606201f878c911825ea7076cc5db8824c08
--- /dev/null
+++ b/assets/videos/0001/81.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82151bba4bcce8eaac2bcbe853af7ed54cf5a5ca91052c4363b7840db2cf0e33
+size 118054
diff --git a/assets/videos/0001/82.jpg b/assets/videos/0001/82.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a18298832ec5eb273d5ec21c1b983bc992d426bb
--- /dev/null
+++ b/assets/videos/0001/82.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b208d9be0a4eaec4547e97a7fe50a0b647360a97e010f8daf1838f31bddbcfc
+size 119128
diff --git a/assets/videos/0001/83.jpg b/assets/videos/0001/83.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d23864714ba5f9fef8429355d10fc167f236f777
--- /dev/null
+++ b/assets/videos/0001/83.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b056847c1301e0acd2b60830ae23330993505d056841bd13874c6e1fd1ebc8c5
+size 121993
diff --git a/assets/videos/0001/84.jpg b/assets/videos/0001/84.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ca352be2416e7a4709e326eda37e365feb8f011c
--- /dev/null
+++ b/assets/videos/0001/84.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cca3b6c2a6ae9b546fe544d37a5ffa346130104225c4432d9fd7d3f528f59c46
+size 120404
diff --git a/assets/videos/0001/85.jpg b/assets/videos/0001/85.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5052fa5aa3a81c392e196498b6029ef526eedf58
--- /dev/null
+++ b/assets/videos/0001/85.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a60b3525633530a2761d0de3db081bd23ea573347463b88565043bc2b3537b11
+size 118562
diff --git a/assets/videos/0001/86.jpg b/assets/videos/0001/86.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..731eee7c1b3a978764a4a28d826560ca9b6569c0
--- /dev/null
+++ b/assets/videos/0001/86.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:161457ea926b8587ac2f66f20603d72bb63ed31ae057feb99d5b1c555dbaffbc
+size 121415
diff --git a/assets/videos/0001/87.jpg b/assets/videos/0001/87.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..0f5e1247a2c243e96aaa40f2b3cd8000722b5489
--- /dev/null
+++ b/assets/videos/0001/87.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3470f9605142da596b6a835a40027bd299099699c5295ba2aededfd947b6c4c
+size 118496
diff --git a/assets/videos/0001/88.jpg b/assets/videos/0001/88.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..ca2c558fb55672d16f5ad33facf7faa04dd5c0fd
--- /dev/null
+++ b/assets/videos/0001/88.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:358bbe2f8b2da282906254ab264c3e1200042695609cc1e5d12039ed99b679a9
+size 116900
diff --git a/assets/videos/0001/89.jpg b/assets/videos/0001/89.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..cd528826eeea6902fbf304569753629e38b6d17c
--- /dev/null
+++ b/assets/videos/0001/89.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:455f5dcba53876b2fe658aae72041632b704079ec2a3ad3356679132e55d1856
+size 114929
diff --git a/assets/videos/0001/9.jpg b/assets/videos/0001/9.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..d7d2c481a23c0f77aa56fc2e837f4809e4ef0658
--- /dev/null
+++ b/assets/videos/0001/9.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8554245786b72d1c61dbefd6e07bea954498ab4d56bf14f8a24e0291686c260d
+size 140359
diff --git a/assets/videos/0001/90.jpg b/assets/videos/0001/90.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..48f774c135b8d5237724160c4e244769da7ad665
--- /dev/null
+++ b/assets/videos/0001/90.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:324de19320bdd3104140d84ac63200540b16f952450d35e60af600f32612bb1a
+size 114603
diff --git a/assets/videos/0001/91.jpg b/assets/videos/0001/91.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1aae9a6e0ebdb1ff180511489e5e26f49f406914
--- /dev/null
+++ b/assets/videos/0001/91.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bfaa1195223e165ac9627be9fd10f3245461cd08d4085164c479dbc74730d267
+size 113788
diff --git a/assets/videos/0001/92.jpg b/assets/videos/0001/92.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..8469f0e69512009090e0930077e200a11d547955
--- /dev/null
+++ b/assets/videos/0001/92.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82466a22d9d085fb24cef0aa8c393e01d9140ae8a800772c446c65335359a616
+size 114035
diff --git a/assets/videos/0001/93.jpg b/assets/videos/0001/93.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..5af157d2781041780e68f00277c15a94286c6c65
--- /dev/null
+++ b/assets/videos/0001/93.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:885e5a6496c5a6a1e1f411acfdcf83eefb327e111dca01fee8022581b5aae7f2
+size 116893
diff --git a/assets/videos/0001/94.jpg b/assets/videos/0001/94.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..a5abc2598848abcd89e4e2d4eccf61574aaf1f27
--- /dev/null
+++ b/assets/videos/0001/94.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6afe6a61bc8a9b9f1cec68a7d273a4e4f68ced111f8e913be56dd38a4682b1c
+size 113084
diff --git a/assets/videos/0001/95.jpg b/assets/videos/0001/95.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..6e5e436a027c67a29a55c7729a97c33e0b7a92ac
--- /dev/null
+++ b/assets/videos/0001/95.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2f54e6506ca1cdfb3115877205c936e11a7a83fa8b4818dcc8c7bb48924aa32
+size 111817
diff --git a/assets/videos/0001/96.jpg b/assets/videos/0001/96.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..1ac7fe0d0752882be1cb77d8a9ea0f2d5360a560
--- /dev/null
+++ b/assets/videos/0001/96.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e84a7210743e0d632dcd661610e204c285392880dc14a081b4158cc1f10851d
+size 110882
diff --git a/assets/videos/0001/97.jpg b/assets/videos/0001/97.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..9d8d0dbf83bbe7e30b366870459ef31bbd281d9c
--- /dev/null
+++ b/assets/videos/0001/97.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2a15df7744f9fa6932ccb22226d5eed87a006427b57ea3a0f01f1c946a3b3a9
+size 113221
diff --git a/assets/videos/0001/98.jpg b/assets/videos/0001/98.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..309006287138610dcfe8568c75b12fe19c51bd33
--- /dev/null
+++ b/assets/videos/0001/98.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fe41a3d7738234a4e3462056f7466f1f1646d4e1de62fa707f2072a3c2a717a
+size 111474
diff --git a/assets/videos/0001/99.jpg b/assets/videos/0001/99.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..7e7d7794a4f394cbabbecb34c69efe2fc2c64eee
--- /dev/null
+++ b/assets/videos/0001/99.jpg
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ecf0ec65bfea601f2b189d0f42c357396da2b23646465889e8a9383d1cdda39f
+size 114268
diff --git a/assets/videos/bedroom.mp4 b/assets/videos/bedroom.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..07f7eb8eeebbb81f7c9169121fa94495381bfd06
--- /dev/null
+++ b/assets/videos/bedroom.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1be76d5d19b066e8ad7c565d88a98e11a8f8d456a707508a7aa35390def70e30
+size 2380401
diff --git a/bedroom.mp4 b/bedroom.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..07f7eb8eeebbb81f7c9169121fa94495381bfd06
--- /dev/null
+++ b/bedroom.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1be76d5d19b066e8ad7c565d88a98e11a8f8d456a707508a7aa35390def70e30
+size 2380401
diff --git a/football.mp4 b/football.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..50bc7c54de05ee103e9a253de49d6e727ae23847
--- /dev/null
+++ b/football.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ee673ec90911b6bcf5e7e6798b3fd7a935882b97ce0d6aa44b2e5d37c4303b3d
+size 638434
diff --git a/openfield_5mice_3s.mp4 b/openfield_5mice_3s.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..0c1ba075618107657a6048f9279916712836e0c2
--- /dev/null
+++ b/openfield_5mice_3s.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:759f9b4b618a8d67b89e35b724cc140b16e89ad68c437f4d98dbb6bd7e0ebefa
+size 743353
diff --git a/openfield_5mice_5min.mp4 b/openfield_5mice_5min.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..a4d9684dca31463df268c41823a0c700b141cf32
--- /dev/null
+++ b/openfield_5mice_5min.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:044f7123db2b05178444747d818d3e9a16c88319c032bffc71b2f6da63801a08
+size 155602739
diff --git a/openfield_5mice_6min.mp4 b/openfield_5mice_6min.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..f0dd50e88ca96ac45d233b637e9c2bee6227eee4
--- /dev/null
+++ b/openfield_5mice_6min.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e10e29f1d6a9d9581222fdcdabbdff548bd863af75f7be6b8b5052bce5a300a
+size 186614504
diff --git a/openfield_5mice_7min.mp4 b/openfield_5mice_7min.mp4
new file mode 100644
index 0000000000000000000000000000000000000000..501ee74fe62c80daed7edb8f0487ece6729e9584
--- /dev/null
+++ b/openfield_5mice_7min.mp4
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b9d6d3f2782bce11e37805c45eed19431ed1b12e580e86d92e73a31fe44131d
+size 217612562
diff --git a/packages.txt b/packages.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f5247aa50ef3a008666bf130822fd1c2dedff086
--- /dev/null
+++ b/packages.txt
@@ -0,0 +1,16 @@
+ffmpeg
+libsm6
+libxext6
+libnss3
+libnspr4
+libatk1.0-0
+libatk-bridge2.0-0
+libcups2
+libdrm2
+libxkbcommon0
+libxcomposite1
+libxdamage1
+libxfixes3
+libxrandr2
+libgbm1
+libasound2
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000000000000000000000000000000000000..e4998de0f969cd0302f96c38cb355bcc0d4287c6
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,131 @@
+[build-system]
+requires = ["setuptools>=61", "wheel"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "sam3"
+dynamic = ["version"]
+description = "SAM3 (Segment Anything Model 3) implementation"
+readme = "README.md"
+requires-python = ">=3.8"
+license = {file = "LICENSE"}
+authors = [
+    {name = "Meta AI Research"}
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
+]
+dependencies = [
+    "timm>=1.0.17",
+    "numpy==1.26",
+    "tqdm",
+    "ftfy==6.1.1",
+    "regex",
+    "iopath>=0.1.10",
+    "typing_extensions",
+    "huggingface_hub",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest",
+    "pytest-cov",
+    "black==24.2.0",
+    "ufmt==2.8.0",
+    "ruff-api==0.1.0",
+    "usort==1.0.2",
+    "gitpython==3.1.31",
+    "yt-dlp",
+    "pandas",
+    "opencv-python",
+    "pycocotools",
+    "numba",
+    "python-rapidjson",
+]
+notebooks = [
+    "matplotlib",
+    "jupyter",
+    "notebook",
+    "ipywidgets",
+    "ipycanvas",
+    "ipympl",
+    "pycocotools",
+    "decord",
+    "opencv-python",
+    "einops",
+    "scikit-image",
+    "scikit-learn",
+]
+train = [
+    "hydra-core",
+    "submitit",
+    "tensorboard",
+    "zstandard",
+    "scipy",
+    "torchmetrics",
+    "fvcore",
+    "fairscale",
+    "scikit-image",
+    "scikit-learn",
+]
+
+[project.urls]
+"Homepage" = "https://github.com/facebookresearch/sam3"
+"Bug Tracker" = "https://github.com/facebookresearch/sam3/issues"
+
+[tool.setuptools]
+packages = ["sam3", "sam3.model"]
+
+[tool.setuptools.dynamic]
+version = {attr = "sam3.__version__"}
+
+[tool.black]
+line-length = 88
+target-version = ['py38', 'py39', 'py310', 'py311', 'py312']
+include = '\.pyi?$'
+
+[tool.isort]
+profile = "black"
+multi_line_output = 3
+
+[tool.usort]
+first_party_detection = false
+
+[tool.ufmt]
+formatter = "ruff-api"
+
+[tool.mypy]
+python_version = "3.12"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+
+[[tool.mypy.overrides]]
+module = [
+    "torch.*",
+    "torchvision.*",
+    "timm.*",
+    "numpy.*",
+    "PIL.*",
+    "tqdm.*",
+    "ftfy.*",
+    "regex.*",
+    "iopath.*",
+]
+ignore_missing_imports = true
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = "test_*.py"
+python_classes = "Test*"
+python_functions = "test_*"
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..da9328c3b110b312ec087438eb17850331bc1de0
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,52 @@
+cryptography>=41.0.0
+gradio>=5.0.0
+torch
+torchvision
+opencv-python
+opencv-python-headless
+numpy==1.26.0
+tqdm
+Pillow
+ffmpeg-python
+timm>=1.0.17
+ftfy==6.1.1
+regex
+iopath>=0.1.10
+typing_extensions
+huggingface_hub
+spaces>=0.43.0
+portalocker
+einops
+hydra-core
+submitit
+requests
+tensorboard
+zstandard
+scipy
+torchmetrics
+fvcore
+fairscale
+git+https://github.com/huggingface/transformers.git@c3fb1b1a6ca1102f62b139c83a088a97e5a55477
+decord
+pycocotools
+pytest
+pytest-cov
+black==24.2.0
+ufmt==2.8.0
+ruff-api==0.1.0
+usort==1.0.2
+gitpython==3.1.31
+yt-dlp
+pandas
+numba
+python-rapidjson
+matplotlib
+jupyter
+notebook
+ipywidgets
+ipycanvas
+ipympl
+scikit-image
+scikit-learn
+playwright
+
diff --git a/sam3/__init__.py b/sam3/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..14270a6fdbd0198784934a0399f7a19b7405c65a
--- /dev/null
+++ b/sam3/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from .model_builder import build_sam3_image_model
+
+__version__ = "0.1.0"
+
+__all__ = ["build_sam3_image_model"]
diff --git a/sam3/agent/__init__.py b/sam3/agent/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d37d2aaef52ec7de01999516a2b8c3e1fa4986
--- /dev/null
+++ b/sam3/agent/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
diff --git a/sam3/agent/agent_core.py b/sam3/agent/agent_core.py
new file mode 100644
index 0000000000000000000000000000000000000000..f0016c7ca668b651b09a609d518432ff6307867a
--- /dev/null
+++ b/sam3/agent/agent_core.py
@@ -0,0 +1,563 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import copy
+import json
+import os
+
+import cv2
+from PIL import Image
+
+from .client_llm import send_generate_request
+from .client_sam3 import call_sam_service
+from .viz import visualize
+
+
+def save_debug_messages(messages_list, debug, debug_folder_path, debug_jsonl_path):
+    """Save messages to debug jsonl file if debug is enabled"""
+    if debug and debug_jsonl_path:
+        # Ensure the debug directory exists before writing
+        os.makedirs(debug_folder_path, exist_ok=True)
+        with open(debug_jsonl_path, "w") as f:
+            for msg in messages_list:
+                f.write(json.dumps(msg, indent=4) + "\n")
+
+
+def cleanup_debug_files(debug, debug_folder_path, debug_jsonl_path):
+    """Clean up debug files when function successfully returns"""
+    if debug and debug_folder_path:
+        try:
+            if os.path.exists(debug_jsonl_path):
+                os.remove(debug_jsonl_path)
+            if os.path.exists(debug_folder_path):
+                os.rmdir(debug_folder_path)
+        except Exception as e:
+            print(f"Warning: Could not clean up debug files: {e}")
+
+
+def count_images(messages):
+    """Count the total number of images present in the messages history."""
+    total = 0
+    for message in messages:
+        # Check if message has content (should be a list)
+        if "content" in message and isinstance(message["content"], list):
+            # Iterate through each content item
+            for content_item in message["content"]:
+                # Check if content item is a dict with type "image"
+                if (
+                    isinstance(content_item, dict)
+                    and content_item.get("type") == "image"
+                ):
+                    total += 1
+    return total
+
+
+def _prune_messages_for_next_round(
+    messages_list,
+    used_text_prompts,
+    latest_sam3_text_prompt,
+    img_path,
+    initial_text_prompt,
+):
+    """Return a new messages list that contains only:
+    1) messages[:2] (with optional warning text added to the second message's content)
+    2) the latest assistant message (and everything after it) that contains a segment_phrase tool call
+    """
+    # There should not be more than 10 messages in the conversation history
+    assert len(messages_list) < 10
+
+    # Part 1: always keep the first two message JSONs
+    part1 = copy.deepcopy(messages_list[:2])
+
+    # Part 2: search backwards for the latest assistant message containing a segment_phrase tool call
+    part2_start_idx = None
+    for idx in range(len(messages_list) - 1, 1, -1):
+        msg = messages_list[idx]
+        # We only consider assistant messages with a "content" list
+        if msg.get("role") != "assistant" or "content" not in msg:
+            continue
+        # Look for any content element that is a text containing the segment_phrase tool call
+        for content in msg["content"]:
+            if (
+                isinstance(content, dict)
+                and content.get("type") == "text"
+                and "<tool>" in content.get("text", "")
+                and "segment_phrase" in content.get("text", "")
+            ):
+                part2_start_idx = idx
+                break
+        if part2_start_idx is not None:
+            break
+
+    part2 = messages_list[part2_start_idx:] if part2_start_idx is not None else []
+
+    # Part 3: decide whether to add warning text to the second message in part1
+    previously_used = (
+        [p for p in used_text_prompts if p != latest_sam3_text_prompt]
+        if latest_sam3_text_prompt
+        else list(used_text_prompts)
+    )
+    if part2 and len(previously_used) > 0:
+        warning_text = f'Note that we have previously called the segment_phrase tool with each "text_prompt" in this list: {list(previously_used)}, but none of the generated results were satisfactory. So make sure that you do not use any of these phrases as the "text_prompt" to call the segment_phrase tool again.'
+        # Replace the second message entirely to keep exactly 2 content items
+        part1[1] = {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": img_path},
+                {
+                    "type": "text",
+                    "text": f"The above image is the raw input image. The initial user input query is: '{initial_text_prompt}'."
+                    + " "
+                    + warning_text,
+                },
+            ],
+        }
+        assert len(part1[1]["content"]) == 2
+
+    # Build the new messages list: part1 (with optional warning), then part2
+    new_messages = list(part1)
+    new_messages.extend(part2)
+    return new_messages
+
+
+def agent_inference(
+    img_path: str,
+    initial_text_prompt: str,
+    debug: bool = False,
+    send_generate_request=send_generate_request,
+    call_sam_service=call_sam_service,
+    max_generations: int = 100,
+    output_dir="../../sam3_agent_out",
+):
+    """
+    Given a text prompt and an image, this tool will perform all aspects of agentic problem solving,
+    while saving sam3 and MLLM outputs to their respective directories.
+
+    Args:
+        img_path: Path to the input image
+        initial_text_prompt: Initial text prompt from the user
+        debug: Whether to enable debug mode
+        max_generations: Maximum number of send_generate_request calls allowed (default: 100)
+    """
+    # setup dir
+    sam_output_dir = os.path.join(output_dir, "sam_out")
+    error_save_dir = os.path.join(output_dir, "none_out")
+    debug_save_dir = os.path.join(output_dir, "agent_debug_out")
+    os.makedirs(sam_output_dir, exist_ok=True)
+    os.makedirs(error_save_dir, exist_ok=True)
+    os.makedirs(debug_save_dir, exist_ok=True)
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    MLLM_SYSTEM_PROMPT_PATH = os.path.join(
+        current_dir, "system_prompts/system_prompt.txt"
+    )
+    ITERATIVE_CHECKING_SYSTEM_PROMPT_PATH = os.path.join(
+        current_dir, "system_prompts/system_prompt_iterative_checking.txt"
+    )
+    # init variables
+    PATH_TO_LATEST_OUTPUT_JSON = ""
+    LATEST_SAM3_TEXT_PROMPT = ""
+    USED_TEXT_PROMPTS = (
+        set()
+    )  # Track all previously used text prompts for segment_phrase
+    generation_count = 0  # Counter for number of send_generate_request calls
+
+    # debug setup
+    debug_folder_path = None
+    debug_jsonl_path = None
+    if debug:
+        debug_folder_path = os.path.join(
+            debug_save_dir, f"{img_path.rsplit('/', 1)[-1].rsplit('.', 1)[0]}"
+        )
+        debug_jsonl_path = os.path.join(debug_folder_path, "debug_history.json")
+        os.makedirs(debug_folder_path, exist_ok=True)
+
+    # The helper functions are now defined outside the agent_inference function
+    with open(MLLM_SYSTEM_PROMPT_PATH, "r") as f:
+        system_prompt = f.read().strip()
+    with open(ITERATIVE_CHECKING_SYSTEM_PROMPT_PATH, "r") as f:
+        iterative_checking_system_prompt = f.read().strip()
+
+    # Construct the initial message list
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {
+            "role": "user",
+            "content": [
+                {"type": "image", "image": img_path},
+                {
+                    "type": "text",
+                    "text": f"The above image is the raw input image. The initial user input query is: '{initial_text_prompt}'.",
+                },
+            ],
+        },
+    ]
+    print(f"> Text prompt: {initial_text_prompt}")
+    print(f"> Image path: {img_path}")
+
+    print("\n\n")
+    print("-" * 30 + f" Round {str(generation_count + 1)}" + "-" * 30)
+    print("\n\n")
+    generated_text = send_generate_request(messages)
+    print(f"\n>>> MLLM Response [start]\n{generated_text}\n<<< MLLM Response [end]\n")
+    while generated_text is not None:
+        save_debug_messages(messages, debug, debug_folder_path, debug_jsonl_path)
+        assert (
+            "<tool>" in generated_text,
+            f"Generated text does not contain <tool> tag: {generated_text}",
+        )
+        generated_text = generated_text.split("</tool>", 1)[0] + "</tool>"
+        tool_call_json_str = (
+            generated_text.split("<tool>")[-1]
+            .split("</tool>")[0]
+            .strip()
+            .replace(r"}}}", r"}}")  # remove extra } if any
+        )
+        try:
+            tool_call = json.loads(tool_call_json_str)
+        except json.JSONDecodeError:
+            raise ValueError(f"Invalid JSON in tool call: {tool_call_json_str}")
+
+        if PATH_TO_LATEST_OUTPUT_JSON == "":
+            # The first tool call must be segment_phrase or report_no_mask
+            assert (
+                tool_call["name"] == "segment_phrase"
+                or tool_call["name"] == "report_no_mask"
+            )
+
+        if tool_call["name"] == "segment_phrase":
+            print("🔍 Calling segment_phrase tool...")
+            assert list(tool_call["parameters"].keys()) == ["text_prompt"]
+
+            # Check if this text_prompt has been used before
+            current_text_prompt = tool_call["parameters"]["text_prompt"]
+            if current_text_prompt in USED_TEXT_PROMPTS:
+                print(
+                    f"❌ Text prompt '{current_text_prompt}' has been used before. Requesting a different prompt."
+                )
+                duplicate_prompt_message = f"You have previously used '{current_text_prompt}' as your text_prompt to call the segment_phrase tool. You may not use it again. Please call the segment_phrase tool again with a different, perhaps more general, or more creative simple noun phrase prompt, while adhering to all the rules stated in the system prompt. You must also never use any of the following text_prompt(s): {str(list(USED_TEXT_PROMPTS))}."
+                messages.append(
+                    {
+                        "role": "assistant",
+                        "content": [{"type": "text", "text": generated_text}],
+                    }
+                )
+                messages.append(
+                    {
+                        "role": "user",
+                        "content": [{"type": "text", "text": duplicate_prompt_message}],
+                    }
+                )
+            else:
+                # Add the text_prompt to the set of used prompts
+                USED_TEXT_PROMPTS.add(current_text_prompt)
+                LATEST_SAM3_TEXT_PROMPT = current_text_prompt
+                PATH_TO_LATEST_OUTPUT_JSON = call_sam_service(
+                    image_path=img_path,
+                    text_prompt=current_text_prompt,
+                    output_folder_path=sam_output_dir,
+                )
+                sam3_outputs = json.load(open(PATH_TO_LATEST_OUTPUT_JSON, "r"))
+                sam3_output_image_path = sam3_outputs["output_image_path"]
+                num_masks = len(sam3_outputs["pred_boxes"])
+
+                messages.append(
+                    {
+                        "role": "assistant",
+                        "content": [{"type": "text", "text": generated_text}],
+                    }
+                )
+                if num_masks == 0:
+                    print("❌ No masks generated by SAM3, reporting no mask to Qwen.")
+                    sam3_output_text_message = f"The segment_phrase tool did not generate any masks for the text_prompt '{current_text_prompt}'. Now, please call the segment_phrase tool again with a different, perhaps more general, or more creative simple noun phrase text_prompt, while adhering to all the rules stated in the system prompt. Please be reminded that the original user query was '{initial_text_prompt}'."
+                    messages.append(
+                        {
+                            "role": "user",
+                            "content": [
+                                {"type": "text", "text": sam3_output_text_message}
+                            ],
+                        }
+                    )
+                else:
+                    sam3_output_text_message = rf"The segment_phrase tool generated {num_masks} available masks. All {num_masks} available masks are rendered in this image below, now you must analyze the {num_masks} available mask(s) carefully, compare them against the raw input image and the original user query, and determine your next action. Please be reminded that the original user query was '{initial_text_prompt}'."
+                    messages.append(
+                        {
+                            "role": "user",
+                            "content": [
+                                {"type": "text", "text": sam3_output_text_message},
+                                {"type": "image", "image": sam3_output_image_path},
+                            ],
+                        }
+                    )
+                print("\n\n>>> sam3_output_text_message:\n", sam3_output_text_message)
+
+        elif tool_call["name"] == "examine_each_mask":
+            print("🔍 Calling examine_each_mask tool...")
+            assert LATEST_SAM3_TEXT_PROMPT != ""
+
+            # Make sure that the last message is a image
+            assert (
+                messages[-1]["content"][1]["type"] == "image"
+            ), "Second content element should be an image"
+            messages.pop()  # Remove the last user message
+            # Add simplified replacement message
+            simplified_message = {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "text",
+                        "text": "The segment_phrase tool generated several masks. Now you must analyze the mask(s) carefully, compare them against the raw input image and the original user query, and determine your next action.",
+                    }
+                ],
+            }
+            messages.append(simplified_message)
+
+            current_outputs = json.load(open(PATH_TO_LATEST_OUTPUT_JSON, "r"))
+            num_masks = len(current_outputs["pred_masks"])
+            masks_to_keep = []
+
+            # MLLM check the mask one by one
+            for i in range(num_masks):
+                print(f"🔍 Checking mask {i+1}/{num_masks}...")
+                image_w_mask_i, image_w_zoomed_in_mask_i = visualize(current_outputs, i)
+
+                image_w_zoomed_in_mask_i_path = os.path.join(
+                    sam_output_dir, rf"{LATEST_SAM3_TEXT_PROMPT}.png".replace("/", "_")
+                ).replace(".png", f"_zoom_in_mask_{i + 1}.png")
+                image_w_mask_i_path = os.path.join(
+                    sam_output_dir, rf"{LATEST_SAM3_TEXT_PROMPT}.png".replace("/", "_")
+                ).replace(".png", f"_selected_mask_{i + 1}.png")
+                image_w_zoomed_in_mask_i.save(image_w_zoomed_in_mask_i_path)
+                image_w_mask_i.save(image_w_mask_i_path)
+
+                iterative_checking_messages = [
+                    {"role": "system", "content": iterative_checking_system_prompt},
+                    {
+                        "role": "user",
+                        "content": [
+                            {"type": "text", "text": f"The raw input image: "},
+                            {"type": "image", "image": img_path},
+                            {
+                                "type": "text",
+                                "text": f"The initial user input query is: '{initial_text_prompt}'",
+                            },
+                            {
+                                "type": "text",
+                                "text": f"Image with the predicted segmentation mask rendered on it: ",
+                            },
+                            {"type": "image", "image": image_w_mask_i_path},
+                            {
+                                "type": "text",
+                                "text": f"Image with the zoomed-in mask: ",
+                            },
+                            {"type": "image", "image": image_w_zoomed_in_mask_i_path},
+                        ],
+                    },
+                ]
+                checking_generated_text = send_generate_request(
+                    iterative_checking_messages
+                )
+
+                # Process the generated text to determine if the mask should be kept or rejected
+                if checking_generated_text is None:
+                    raise ValueError(
+                        "Generated text is None, which is unexpected. Please check the Qwen server and the input parameters."
+                    )
+                print(f"Generated text for mask {i+1}: {checking_generated_text}")
+                verdict = (
+                    checking_generated_text.split("<verdict>")[-1]
+                    .split("</verdict>")[0]
+                    .strip()
+                )
+                if "Accept" in verdict:
+                    assert not "Reject" in verdict
+                    print(f"Mask {i+1} accepted, keeping it in the outputs.")
+                    masks_to_keep.append(i)
+                elif "Reject" in verdict:
+                    assert not "Accept" in verdict
+                    print(f"Mask {i+1} rejected, removing it from the outputs.")
+                else:
+                    raise ValueError(
+                        f"Unexpected verdict in generated text: {checking_generated_text}. Expected 'Accept' or 'Reject'."
+                    )
+
+            updated_outputs = {
+                "original_image_path": current_outputs["original_image_path"],
+                "orig_img_h": current_outputs["orig_img_h"],
+                "orig_img_w": current_outputs["orig_img_w"],
+                "pred_boxes": [current_outputs["pred_boxes"][i] for i in masks_to_keep],
+                "pred_scores": [
+                    current_outputs["pred_scores"][i] for i in masks_to_keep
+                ],
+                "pred_masks": [current_outputs["pred_masks"][i] for i in masks_to_keep],
+            }
+
+            image_w_check_masks = visualize(updated_outputs)
+            image_w_check_masks_path = os.path.join(
+                sam_output_dir, rf"{LATEST_SAM3_TEXT_PROMPT}.png"
+            ).replace(
+                ".png",
+                f"_selected_masks_{'-'.join(map(str, [i+1 for i in masks_to_keep]))}.png".replace(
+                    "/", "_"
+                ),
+            )
+            image_w_check_masks.save(image_w_check_masks_path)
+            # save the updated json outputs and append to message history
+            messages.append(
+                {
+                    "role": "assistant",
+                    "content": [{"type": "text", "text": generated_text}],
+                }
+            )
+            if len(masks_to_keep) == 0:
+                messages.append(
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": f"The original user query was: '{initial_text_prompt}'. The examine_each_mask tool examined and rejected all of the masks generated by the segment_phrase tool. Now, please call the segment_phrase tool again with a different, perhaps more general, or more creative simple noun phrase text_prompt, while adhering to all the rules stated in the system prompt.",
+                            }
+                        ],
+                    }
+                )
+            else:
+                messages.append(
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "text",
+                                "text": f"The original user query was: '{initial_text_prompt}'. After calling the examine_each_mask tool on the available masks, the number of available masks is now {len(masks_to_keep)}. All {len(masks_to_keep)} available masks are rendered in this image below, now you must analyze the {len(masks_to_keep)} available mask(s) carefully, compare them against the raw input image and the original user query, and determine your next action.",
+                            },
+                            {"type": "image", "image": image_w_check_masks_path},
+                        ],
+                    }
+                )
+
+            # Create a new filename based on the original path to avoid filename length issues
+            base_path = PATH_TO_LATEST_OUTPUT_JSON
+            # Remove any existing "masks_" suffix to avoid duplication
+            if "masks_" in base_path:
+                base_path = base_path.split("masks_")[0] + ".json"
+            # Create new filename with current masks; use a clearer suffix when empty
+            if len(masks_to_keep) == 0:
+                PATH_TO_LATEST_OUTPUT_JSON = base_path.replace(
+                    ".json", "masks_none.json"
+                )
+            else:
+                PATH_TO_LATEST_OUTPUT_JSON = base_path.replace(
+                    ".json", f"masks_{'_'.join(map(str, masks_to_keep))}.json"
+                )
+            json.dump(updated_outputs, open(PATH_TO_LATEST_OUTPUT_JSON, "w"), indent=4)
+
+        elif tool_call["name"] == "select_masks_and_return":
+            print("🔍 Calling select_masks_and_return tool...")
+            current_outputs = json.load(open(PATH_TO_LATEST_OUTPUT_JSON, "r"))
+
+            assert list(tool_call["parameters"].keys()) == ["final_answer_masks"]
+            masks_to_keep = tool_call["parameters"]["final_answer_masks"]
+
+            # Keep only valid mask indices, remove duplicates, and preserve deterministic ascending order
+            available_masks = set(range(1, len(current_outputs["pred_masks"]) + 1))
+            masks_to_keep = sorted({i for i in masks_to_keep if i in available_masks})
+            # Change this to a update message telling the model to try again along with information about errors made.
+
+            final_outputs = {
+                "original_image_path": current_outputs["original_image_path"],
+                "orig_img_h": current_outputs["orig_img_h"],
+                "orig_img_w": current_outputs["orig_img_w"],
+                "pred_boxes": [
+                    current_outputs["pred_boxes"][i - 1] for i in masks_to_keep
+                ],
+                "pred_scores": [
+                    current_outputs["pred_scores"][i - 1] for i in masks_to_keep
+                ],
+                "pred_masks": [
+                    current_outputs["pred_masks"][i - 1] for i in masks_to_keep
+                ],
+            }
+
+            rendered_final_output = visualize(final_outputs)
+            messages.append(
+                {
+                    "role": "assistant",
+                    "content": [{"type": "text", "text": generated_text}],
+                }
+            )
+
+            # Clean up debug files before successful return
+            cleanup_debug_files(debug, debug_folder_path, debug_jsonl_path)
+            return messages, final_outputs, rendered_final_output
+
+        elif tool_call["name"] == "report_no_mask":
+            print("🔍 Calling report_no_mask tool...")
+            height, width = cv2.imread(img_path).shape[:2]
+            final_outputs = {
+                "original_image_path": img_path,
+                "orig_img_h": height,
+                "orig_img_w": width,
+                "pred_boxes": [],
+                "pred_scores": [],
+                "pred_masks": [],
+            }
+            rendered_final_output = Image.open(img_path)
+            messages.append(
+                {
+                    "role": "assistant",
+                    "content": [{"type": "text", "text": generated_text}],
+                }
+            )
+            return messages, final_outputs, rendered_final_output
+
+        else:
+            raise ValueError(f"Unknown tool call: {tool_call['name']}")
+
+        # sometimes the MLLM don't know when to stop, and generates multiple tool calls in one round, so we need to split the generated text by </tool> and only keep the first one
+
+        for message in messages:
+            if message["role"] == "assistant" and "content" in message:
+                for content in message["content"]:
+                    if (
+                        isinstance(content, dict)
+                        and content.get("type") == "text"
+                        and "text" in content
+                    ):
+                        content["text"] = (
+                            content["text"].split("</tool>", 1)[0] + "</tool>\n\n"
+                        )
+        # Prune the messages history before the next MLLM generation round according to the 3-part rules.
+        # This keeps history compact and ensures the model sees only the allowed parts.
+        messages = _prune_messages_for_next_round(
+            messages,
+            USED_TEXT_PROMPTS,
+            LATEST_SAM3_TEXT_PROMPT,
+            img_path,
+            initial_text_prompt,
+        )
+        # make sure there can never be more than 2 images in the context
+        assert count_images(messages) <= 2
+        generation_count += 1
+        if generation_count > max_generations:
+            raise ValueError(
+                f"Exceeded maximum number of allowed generation requests ({max_generations})"
+            )
+
+        print("\n\n")
+        print("-" * 30 + f" Round {str(generation_count + 1)}" + "-" * 30)
+        print("\n\n")
+        generated_text = send_generate_request(messages)
+        print(
+            f"\n>>> MLLM Response [start]\n{generated_text}\n<<< MLLM Response [end]\n"
+        )
+
+    print("\n\n>>> SAM 3 Agent execution ended.\n\n")
+
+    error_save_path = os.path.join(
+        error_save_dir,
+        f"{img_path.rsplit('/', 1)[-1].rsplit('.', 1)[0]}_error_history.json",
+    )
+    with open(error_save_path, "w") as f:
+        json.dump(messages, f, indent=4)
+    print("Saved messages history that caused error to:", error_save_path)
+    raise ValueError(
+        rf"Generated text is None, which is unexpected. Please check the Qwen server and the input parameters for image path: {img_path} and initial text prompt: {initial_text_prompt}."
+    )
diff --git a/sam3/agent/client_llm.py b/sam3/agent/client_llm.py
new file mode 100644
index 0000000000000000000000000000000000000000..85e513c4c90aeb32cf596f04d94eb7ee7867bedd
--- /dev/null
+++ b/sam3/agent/client_llm.py
@@ -0,0 +1,205 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import base64
+import os
+from typing import Any, Optional
+
+from openai import OpenAI
+
+
+def get_image_base64_and_mime(image_path):
+    """Convert image file to base64 string and get MIME type"""
+    try:
+        # Get MIME type based on file extension
+        ext = os.path.splitext(image_path)[1].lower()
+        mime_types = {
+            ".jpg": "image/jpeg",
+            ".jpeg": "image/jpeg",
+            ".png": "image/png",
+            ".gif": "image/gif",
+            ".webp": "image/webp",
+            ".bmp": "image/bmp",
+        }
+        mime_type = mime_types.get(ext, "image/jpeg")  # Default to JPEG
+
+        # Convert image to base64
+        with open(image_path, "rb") as image_file:
+            base64_data = base64.b64encode(image_file.read()).decode("utf-8")
+            return base64_data, mime_type
+    except Exception as e:
+        print(f"Error converting image to base64: {e}")
+        return None, None
+
+
+def send_generate_request(
+    messages,
+    server_url=None,
+    model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
+    api_key=None,
+    max_tokens=4096,
+):
+    """
+    Sends a request to the OpenAI-compatible API endpoint using the OpenAI client library.
+
+    Args:
+        server_url (str): The base URL of the server, e.g. "http://127.0.0.1:8000"
+        messages (list): A list of message dicts, each containing role and content.
+        model (str): The model to use for generation (default: "llama-4")
+        max_tokens (int): Maximum number of tokens to generate (default: 4096)
+
+    Returns:
+        str: The generated response text from the server.
+    """
+    # Process messages to convert image paths to base64
+    processed_messages = []
+    for message in messages:
+        processed_message = message.copy()
+        if message["role"] == "user" and "content" in message:
+            processed_content = []
+            for c in message["content"]:
+                if isinstance(c, dict) and c.get("type") == "image":
+                    # Convert image path to base64 format
+                    image_path = c["image"]
+
+                    print("image_path", image_path)
+                    new_image_path = image_path.replace(
+                        "?", "%3F"
+                    )  # Escape ? in the path
+
+                    # Read the image file and convert to base64
+                    try:
+                        base64_image, mime_type = get_image_base64_and_mime(
+                            new_image_path
+                        )
+                        if base64_image is None:
+                            print(
+                                f"Warning: Could not convert image to base64: {new_image_path}"
+                            )
+                            continue
+
+                        # Create the proper image_url structure with base64 data
+                        processed_content.append(
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": f"data:{mime_type};base64,{base64_image}",
+                                    "detail": "high",
+                                },
+                            }
+                        )
+
+                    except FileNotFoundError:
+                        print(f"Warning: Image file not found: {new_image_path}")
+                        continue
+                    except Exception as e:
+                        print(f"Warning: Error processing image {new_image_path}: {e}")
+                        continue
+                else:
+                    processed_content.append(c)
+
+            processed_message["content"] = processed_content
+        processed_messages.append(processed_message)
+
+    # Create OpenAI client with custom base URL
+    client = OpenAI(api_key=api_key, base_url=server_url)
+
+    try:
+        print(f"🔍 Calling model {model}...")
+        response = client.chat.completions.create(
+            model=model,
+            messages=processed_messages,
+            max_completion_tokens=max_tokens,
+            n=1,
+        )
+        # print(f"Received response: {response.choices[0].message}")
+
+        # Extract the response content
+        if response.choices and len(response.choices) > 0:
+            return response.choices[0].message.content
+        else:
+            print(f"Unexpected response format: {response}")
+            return None
+
+    except Exception as e:
+        print(f"Request failed: {e}")
+        return None
+
+
+def send_direct_request(
+    llm: Any,
+    messages: list[dict[str, Any]],
+    sampling_params: Any,
+) -> Optional[str]:
+    """
+    Run inference on a vLLM model instance directly without using a server.
+
+    Args:
+        llm: Initialized vLLM LLM instance (passed from external initialization)
+        messages: List of message dicts with role and content (OpenAI format)
+        sampling_params: vLLM SamplingParams instance (initialized externally)
+
+    Returns:
+        str: Generated response text, or None if inference fails
+    """
+    try:
+        # Process messages to handle images (convert to base64 if needed)
+        processed_messages = []
+        for message in messages:
+            processed_message = message.copy()
+            if message["role"] == "user" and "content" in message:
+                processed_content = []
+                for c in message["content"]:
+                    if isinstance(c, dict) and c.get("type") == "image":
+                        # Convert image path to base64 format
+                        image_path = c["image"]
+                        new_image_path = image_path.replace("?", "%3F")
+
+                        try:
+                            base64_image, mime_type = get_image_base64_and_mime(
+                                new_image_path
+                            )
+                            if base64_image is None:
+                                print(
+                                    f"Warning: Could not convert image: {new_image_path}"
+                                )
+                                continue
+
+                            # vLLM expects image_url format
+                            processed_content.append(
+                                {
+                                    "type": "image_url",
+                                    "image_url": {
+                                        "url": f"data:{mime_type};base64,{base64_image}"
+                                    },
+                                }
+                            )
+                        except Exception as e:
+                            print(
+                                f"Warning: Error processing image {new_image_path}: {e}"
+                            )
+                            continue
+                    else:
+                        processed_content.append(c)
+
+                processed_message["content"] = processed_content
+            processed_messages.append(processed_message)
+
+        print("🔍 Running direct inference with vLLM...")
+
+        # Run inference using vLLM's chat interface
+        outputs = llm.chat(
+            messages=processed_messages,
+            sampling_params=sampling_params,
+        )
+
+        # Extract the generated text from the first output
+        if outputs and len(outputs) > 0:
+            generated_text = outputs[0].outputs[0].text
+            return generated_text
+        else:
+            print(f"Unexpected output format: {outputs}")
+            return None
+
+    except Exception as e:
+        print(f"Direct inference failed: {e}")
+        return None
diff --git a/sam3/agent/client_sam3.py b/sam3/agent/client_sam3.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2f64b7713589104ca815824396f2599768e4ec4
--- /dev/null
+++ b/sam3/agent/client_sam3.py
@@ -0,0 +1,138 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import json
+import os
+
+import torch
+from PIL import Image
+
+from sam3.model.box_ops import box_xyxy_to_xywh
+from sam3.train.masks_ops import rle_encode
+
+from .helpers.mask_overlap_removal import remove_overlapping_masks
+from .viz import visualize
+
+
+def sam3_inference(processor, image_path, text_prompt):
+    """Run SAM 3 image inference with text prompts and format the outputs"""
+    image = Image.open(image_path)
+    orig_img_w, orig_img_h = image.size
+
+    # model inference
+    inference_state = processor.set_image(image)
+    inference_state = processor.set_text_prompt(
+        state=inference_state, prompt=text_prompt
+    )
+
+    # format and assemble outputs
+    pred_boxes_xyxy = torch.stack(
+        [
+            inference_state["boxes"][:, 0] / orig_img_w,
+            inference_state["boxes"][:, 1] / orig_img_h,
+            inference_state["boxes"][:, 2] / orig_img_w,
+            inference_state["boxes"][:, 3] / orig_img_h,
+        ],
+        dim=-1,
+    )  # normalized in range [0, 1]
+    pred_boxes_xywh = box_xyxy_to_xywh(pred_boxes_xyxy).tolist()
+    pred_masks = rle_encode(inference_state["masks"].squeeze(1))
+    pred_masks = [m["counts"] for m in pred_masks]
+    outputs = {
+        "orig_img_h": orig_img_h,
+        "orig_img_w": orig_img_w,
+        "pred_boxes": pred_boxes_xywh,
+        "pred_masks": pred_masks,
+        "pred_scores": inference_state["scores"].tolist(),
+    }
+    return outputs
+
+
+def call_sam_service(
+    sam3_processor,
+    image_path: str,
+    text_prompt: str,
+    output_folder_path: str = "sam3_output",
+):
+    """
+    Loads an image, sends it with a text prompt to the service,
+    saves the results, and renders the visualization.
+    """
+    print(f"📞 Loading image '{image_path}' and sending with prompt '{text_prompt}'...")
+
+    text_prompt_for_save_path = (
+        text_prompt.replace("/", "_") if "/" in text_prompt else text_prompt
+    )
+
+    os.makedirs(
+        os.path.join(output_folder_path, image_path.replace("/", "-")), exist_ok=True
+    )
+    output_json_path = os.path.join(
+        output_folder_path,
+        image_path.replace("/", "-"),
+        rf"{text_prompt_for_save_path}.json",
+    )
+    output_image_path = os.path.join(
+        output_folder_path,
+        image_path.replace("/", "-"),
+        rf"{text_prompt_for_save_path}.png",
+    )
+
+    try:
+        # Send the image and text prompt as a multipart/form-data request
+        serialized_response = sam3_inference(sam3_processor, image_path, text_prompt)
+
+        # 1. Prepare the response dictionary
+        serialized_response = remove_overlapping_masks(serialized_response)
+        serialized_response = {
+            "original_image_path": image_path,
+            "output_image_path": output_image_path,
+            **serialized_response,
+        }
+
+        # 2. Reorder predictions by scores (highest to lowest) if scores are available
+        if "pred_scores" in serialized_response and serialized_response["pred_scores"]:
+            # Create indices sorted by scores in descending order
+            score_indices = sorted(
+                range(len(serialized_response["pred_scores"])),
+                key=lambda i: serialized_response["pred_scores"][i],
+                reverse=True,
+            )
+
+            # Reorder all three lists based on the sorted indices
+            serialized_response["pred_scores"] = [
+                serialized_response["pred_scores"][i] for i in score_indices
+            ]
+            serialized_response["pred_boxes"] = [
+                serialized_response["pred_boxes"][i] for i in score_indices
+            ]
+            serialized_response["pred_masks"] = [
+                serialized_response["pred_masks"][i] for i in score_indices
+            ]
+
+        # 3. Remove any invalid RLE masks that is too short (shorter than 5 characters)
+        valid_masks = []
+        valid_boxes = []
+        valid_scores = []
+        for i, rle in enumerate(serialized_response["pred_masks"]):
+            if len(rle) > 4:
+                valid_masks.append(rle)
+                valid_boxes.append(serialized_response["pred_boxes"][i])
+                valid_scores.append(serialized_response["pred_scores"][i])
+        serialized_response["pred_masks"] = valid_masks
+        serialized_response["pred_boxes"] = valid_boxes
+        serialized_response["pred_scores"] = valid_scores
+
+        with open(output_json_path, "w") as f:
+            json.dump(serialized_response, f, indent=4)
+        print(f"✅ Raw JSON response saved to '{output_json_path}'")
+
+        # 4. Render and save visualizations on the image and save it in the SAM3 output folder
+        print("🔍 Rendering visualizations on the image ...")
+        viz_image = visualize(serialized_response)
+        os.makedirs(os.path.dirname(output_image_path), exist_ok=True)
+        viz_image.save(output_image_path)
+        print("✅ Saved visualization at:", output_image_path)
+    except Exception as e:
+        print(f"❌ Error calling service: {e}")
+
+    return output_json_path
diff --git a/sam3/agent/helpers/__init__.py b/sam3/agent/helpers/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d37d2aaef52ec7de01999516a2b8c3e1fa4986
--- /dev/null
+++ b/sam3/agent/helpers/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
diff --git a/sam3/agent/helpers/boxes.py b/sam3/agent/helpers/boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..6cfef58b843281363ac6cb5994d203f972221836
--- /dev/null
+++ b/sam3/agent/helpers/boxes.py
@@ -0,0 +1,438 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import math
+from enum import IntEnum, unique
+from typing import List, Tuple, Union
+
+import numpy as np
+import torch
+from torch import device
+
+_RawBoxType = Union[List[float], Tuple[float, ...], torch.Tensor, np.ndarray]
+
+
+@unique
+class BoxMode(IntEnum):
+    """
+    Enum of different ways to represent a box.
+    """
+
+    XYXY_ABS = 0
+    """
+    (x0, y0, x1, y1) in absolute floating points coordinates.
+    The coordinates in range [0, width or height].
+    """
+    XYWH_ABS = 1
+    """
+    (x0, y0, w, h) in absolute floating points coordinates.
+    """
+    XYXY_REL = 2
+    """
+    Not yet supported!
+    (x0, y0, x1, y1) in range [0, 1]. They are relative to the size of the image.
+    """
+    XYWH_REL = 3
+    """
+    Not yet supported!
+    (x0, y0, w, h) in range [0, 1]. They are relative to the size of the image.
+    """
+    XYWHA_ABS = 4
+    """
+    (xc, yc, w, h, a) in absolute floating points coordinates.
+    (xc, yc) is the center of the rotated box, and the angle a is in degrees ccw.
+    """
+
+    @staticmethod
+    def convert(
+        box: _RawBoxType, from_mode: "BoxMode", to_mode: "BoxMode"
+    ) -> _RawBoxType:
+        """
+        Args:
+            box: can be a k-tuple, k-list or an Nxk array/tensor, where k = 4 or 5
+            from_mode, to_mode (BoxMode)
+
+        Returns:
+            The converted box of the same type.
+        """
+        if from_mode == to_mode:
+            return box
+
+        original_type = type(box)
+        is_numpy = isinstance(box, np.ndarray)
+        single_box = isinstance(box, (list, tuple))
+        if single_box:
+            assert len(box) == 4 or len(box) == 5, (
+                "BoxMode.convert takes either a k-tuple/list or an Nxk array/tensor,"
+                " where k == 4 or 5"
+            )
+            arr = torch.tensor(box)[None, :]
+        else:
+            # avoid modifying the input box
+            if is_numpy:
+                arr = torch.from_numpy(np.asarray(box)).clone()
+            else:
+                arr = box.clone()
+
+        assert to_mode not in [
+            BoxMode.XYXY_REL,
+            BoxMode.XYWH_REL,
+        ] and from_mode not in [
+            BoxMode.XYXY_REL,
+            BoxMode.XYWH_REL,
+        ], "Relative mode not yet supported!"
+
+        if from_mode == BoxMode.XYWHA_ABS and to_mode == BoxMode.XYXY_ABS:
+            assert (
+                arr.shape[-1] == 5
+            ), "The last dimension of input shape must be 5 for XYWHA format"
+            original_dtype = arr.dtype
+            arr = arr.double()
+
+            w = arr[:, 2]
+            h = arr[:, 3]
+            a = arr[:, 4]
+            c = torch.abs(torch.cos(a * math.pi / 180.0))
+            s = torch.abs(torch.sin(a * math.pi / 180.0))
+            # This basically computes the horizontal bounding rectangle of the rotated box
+            new_w = c * w + s * h
+            new_h = c * h + s * w
+
+            # convert center to top-left corner
+            arr[:, 0] -= new_w / 2.0
+            arr[:, 1] -= new_h / 2.0
+            # bottom-right corner
+            arr[:, 2] = arr[:, 0] + new_w
+            arr[:, 3] = arr[:, 1] + new_h
+
+            arr = arr[:, :4].to(dtype=original_dtype)
+        elif from_mode == BoxMode.XYWH_ABS and to_mode == BoxMode.XYWHA_ABS:
+            original_dtype = arr.dtype
+            arr = arr.double()
+            arr[:, 0] += arr[:, 2] / 2.0
+            arr[:, 1] += arr[:, 3] / 2.0
+            angles = torch.zeros((arr.shape[0], 1), dtype=arr.dtype)
+            arr = torch.cat((arr, angles), axis=1).to(dtype=original_dtype)
+        else:
+            if to_mode == BoxMode.XYXY_ABS and from_mode == BoxMode.XYWH_ABS:
+                arr[:, 2] += arr[:, 0]
+                arr[:, 3] += arr[:, 1]
+            elif from_mode == BoxMode.XYXY_ABS and to_mode == BoxMode.XYWH_ABS:
+                arr[:, 2] -= arr[:, 0]
+                arr[:, 3] -= arr[:, 1]
+            else:
+                raise NotImplementedError(
+                    "Conversion from BoxMode {} to {} is not supported yet".format(
+                        from_mode, to_mode
+                    )
+                )
+
+        if single_box:
+            return original_type(arr.flatten().tolist())
+        if is_numpy:
+            return arr.numpy()
+        else:
+            return arr
+
+
+class Boxes:
+    """
+    This structure stores a list of boxes as a Nx4 torch.Tensor.
+    It supports some common methods about boxes
+    (`area`, `clip`, `nonempty`, etc),
+    and also behaves like a Tensor
+    (support indexing, `to(device)`, `.device`, and iteration over all boxes)
+
+    Attributes:
+        tensor (torch.Tensor): float matrix of Nx4. Each row is (x1, y1, x2, y2).
+    """
+
+    def __init__(self, tensor: torch.Tensor):
+        """
+        Args:
+            tensor (Tensor[float]): a Nx4 matrix.  Each row is (x1, y1, x2, y2).
+        """
+        if not isinstance(tensor, torch.Tensor):
+            tensor = torch.as_tensor(
+                tensor, dtype=torch.float32, device=torch.device("cpu")
+            )
+        else:
+            tensor = tensor.to(torch.float32)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that does not depend on
+            # the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((-1, 4)).to(dtype=torch.float32)
+        assert tensor.dim() == 2 and tensor.size(-1) == 4, tensor.size()
+
+        self.tensor = tensor
+
+    def clone(self) -> "Boxes":
+        """
+        Clone the Boxes.
+
+        Returns:
+            Boxes
+        """
+        return Boxes(self.tensor.clone())
+
+    def to(self, device: torch.device):
+        # Boxes are assumed float32 and does not support to(dtype)
+        return Boxes(self.tensor.to(device=device))
+
+    def area(self) -> torch.Tensor:
+        """
+        Computes the area of all the boxes.
+
+        Returns:
+            torch.Tensor: a vector with areas of each box.
+        """
+        box = self.tensor
+        area = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
+        return area
+
+    def clip(self, box_size: Tuple[int, int]) -> None:
+        """
+        Clip (in place) the boxes by limiting x coordinates to the range [0, width]
+        and y coordinates to the range [0, height].
+
+        Args:
+            box_size (height, width): The clipping box's size.
+        """
+        assert torch.isfinite(self.tensor).all(), "Box tensor contains infinite or NaN!"
+        h, w = box_size
+        x1 = self.tensor[:, 0].clamp(min=0, max=w)
+        y1 = self.tensor[:, 1].clamp(min=0, max=h)
+        x2 = self.tensor[:, 2].clamp(min=0, max=w)
+        y2 = self.tensor[:, 3].clamp(min=0, max=h)
+        self.tensor = torch.stack((x1, y1, x2, y2), dim=-1)
+
+    def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
+        """
+        Find boxes that are non-empty.
+        A box is considered empty, if either of its side is no larger than threshold.
+
+        Returns:
+            Tensor:
+                a binary vector which represents whether each box is empty
+                (False) or non-empty (True).
+        """
+        box = self.tensor
+        widths = box[:, 2] - box[:, 0]
+        heights = box[:, 3] - box[:, 1]
+        keep = (widths > threshold) & (heights > threshold)
+        return keep
+
+    def __getitem__(self, item) -> "Boxes":
+        """
+        Args:
+            item: int, slice, or a BoolTensor
+
+        Returns:
+            Boxes: Create a new :class:`Boxes` by indexing.
+
+        The following usage are allowed:
+
+        1. `new_boxes = boxes[3]`: return a `Boxes` which contains only one box.
+        2. `new_boxes = boxes[2:10]`: return a slice of boxes.
+        3. `new_boxes = boxes[vector]`, where vector is a torch.BoolTensor
+           with `length = len(boxes)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned Boxes might share storage with this Boxes,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return Boxes(self.tensor[item].view(1, -1))
+        b = self.tensor[item]
+        assert (
+            b.dim() == 2
+        ), "Indexing on Boxes with {} failed to return a matrix!".format(item)
+        return Boxes(b)
+
+    def __len__(self) -> int:
+        return self.tensor.shape[0]
+
+    def __repr__(self) -> str:
+        return "Boxes(" + str(self.tensor) + ")"
+
+    def inside_box(
+        self, box_size: Tuple[int, int], boundary_threshold: int = 0
+    ) -> torch.Tensor:
+        """
+        Args:
+            box_size (height, width): Size of the reference box.
+            boundary_threshold (int): Boxes that extend beyond the reference box
+                boundary by more than boundary_threshold are considered "outside".
+
+        Returns:
+            a binary vector, indicating whether each box is inside the reference box.
+        """
+        height, width = box_size
+        inds_inside = (
+            (self.tensor[..., 0] >= -boundary_threshold)
+            & (self.tensor[..., 1] >= -boundary_threshold)
+            & (self.tensor[..., 2] < width + boundary_threshold)
+            & (self.tensor[..., 3] < height + boundary_threshold)
+        )
+        return inds_inside
+
+    def get_centers(self) -> torch.Tensor:
+        """
+        Returns:
+            The box centers in a Nx2 array of (x, y).
+        """
+        return (self.tensor[:, :2] + self.tensor[:, 2:]) / 2
+
+    def scale(self, scale_x: float, scale_y: float) -> None:
+        """
+        Scale the box with horizontal and vertical scaling factors
+        """
+        self.tensor[:, 0::2] *= scale_x
+        self.tensor[:, 1::2] *= scale_y
+
+    @classmethod
+    def cat(cls, boxes_list: List["Boxes"]) -> "Boxes":
+        """
+        Concatenates a list of Boxes into a single Boxes
+
+        Arguments:
+            boxes_list (list[Boxes])
+
+        Returns:
+            Boxes: the concatenated Boxes
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all([isinstance(box, Boxes) for box in boxes_list])
+
+        # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
+        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
+        return cat_boxes
+
+    @property
+    def device(self) -> device:
+        return self.tensor.device
+
+    # type "Iterator[torch.Tensor]", yield, and iter() not supported by torchscript
+    # https://github.com/pytorch/pytorch/issues/18627
+    @torch.jit.unused
+    def __iter__(self):
+        """
+        Yield a box as a Tensor of shape (4,) at a time.
+        """
+        yield from self.tensor
+
+
+def pairwise_intersection(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Given two lists of boxes of size N and M,
+    compute the intersection area between __all__ N x M pairs of boxes.
+    The box order must be (xmin, ymin, xmax, ymax)
+
+    Args:
+        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
+
+    Returns:
+        Tensor: intersection, sized [N,M].
+    """
+    boxes1, boxes2 = boxes1.tensor, boxes2.tensor
+    width_height = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) - torch.max(
+        boxes1[:, None, :2], boxes2[:, :2]
+    )  # [N,M,2]
+
+    width_height.clamp_(min=0)  # [N,M,2]
+    intersection = width_height.prod(dim=2)  # [N,M]
+    return intersection
+
+
+# implementation from https://github.com/kuangliu/torchcv/blob/master/torchcv/utils/box.py
+# with slight modifications
+def pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Given two lists of boxes of size N and M, compute the IoU
+    (intersection over union) between **all** N x M pairs of boxes.
+    The box order must be (xmin, ymin, xmax, ymax).
+
+    Args:
+        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
+
+    Returns:
+        Tensor: IoU, sized [N,M].
+    """
+    area1 = boxes1.area()  # [N]
+    area2 = boxes2.area()  # [M]
+    inter = pairwise_intersection(boxes1, boxes2)
+
+    # handle empty boxes
+    iou = torch.where(
+        inter > 0,
+        inter / (area1[:, None] + area2 - inter),
+        torch.zeros(1, dtype=inter.dtype, device=inter.device),
+    )
+    return iou
+
+
+def pairwise_ioa(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Similar to :func:`pariwise_iou` but compute the IoA (intersection over boxes2 area).
+
+    Args:
+        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.
+
+    Returns:
+        Tensor: IoA, sized [N,M].
+    """
+    area2 = boxes2.area()  # [M]
+    inter = pairwise_intersection(boxes1, boxes2)
+
+    # handle empty boxes
+    ioa = torch.where(
+        inter > 0, inter / area2, torch.zeros(1, dtype=inter.dtype, device=inter.device)
+    )
+    return ioa
+
+
+def pairwise_point_box_distance(points: torch.Tensor, boxes: Boxes):
+    """
+    Pairwise distance between N points and M boxes. The distance between a
+    point and a box is represented by the distance from the point to 4 edges
+    of the box. Distances are all positive when the point is inside the box.
+
+    Args:
+        points: Nx2 coordinates. Each row is (x, y)
+        boxes: M boxes
+
+    Returns:
+        Tensor: distances of size (N, M, 4). The 4 values are distances from
+            the point to the left, top, right, bottom of the box.
+    """
+    x, y = points.unsqueeze(dim=2).unbind(dim=1)  # (N, 1)
+    x0, y0, x1, y1 = boxes.tensor.unsqueeze(dim=0).unbind(dim=2)  # (1, M)
+    return torch.stack([x - x0, y - y0, x1 - x, y1 - y], dim=2)
+
+
+def matched_pairwise_iou(boxes1: Boxes, boxes2: Boxes) -> torch.Tensor:
+    """
+    Compute pairwise intersection over union (IOU) of two sets of matched
+    boxes that have the same number of boxes.
+    Similar to :func:`pairwise_iou`, but computes only diagonal elements of the matrix.
+
+    Args:
+        boxes1 (Boxes): bounding boxes, sized [N,4].
+        boxes2 (Boxes): same length as boxes1
+    Returns:
+        Tensor: iou, sized [N].
+    """
+    assert len(boxes1) == len(boxes2), (
+        "boxlists should have the same" "number of entries, got {}, {}".format(
+            len(boxes1), len(boxes2)
+        )
+    )
+    area1 = boxes1.area()  # [N]
+    area2 = boxes2.area()  # [N]
+    box1, box2 = boxes1.tensor, boxes2.tensor
+    lt = torch.max(box1[:, :2], box2[:, :2])  # [N,2]
+    rb = torch.min(box1[:, 2:], box2[:, 2:])  # [N,2]
+    wh = (rb - lt).clamp(min=0)  # [N,2]
+    inter = wh[:, 0] * wh[:, 1]  # [N]
+    iou = inter / (area1 + area2 - inter)  # [N]
+    return iou
diff --git a/sam3/agent/helpers/color_map.py b/sam3/agent/helpers/color_map.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ea1b29b37231819549cacea0d815c5ffda118c9
--- /dev/null
+++ b/sam3/agent/helpers/color_map.py
@@ -0,0 +1,150 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+An awesome colormap for really neat visualizations.
+Copied from Detectron, and removed gray colors.
+"""
+
+import random
+
+import numpy as np
+
+__all__ = ["colormap", "random_color", "random_colors"]
+
+
+# A list of 25 bright and sharp colors for segmentation masks,
+# generated from the edges of the sRGB color space for maximum intensity.
+_COLORS = (
+    np.array(
+        [
+            # The original 8 sharp colors
+            1.000,
+            1.000,
+            0.000,  # 1. Yellow
+            0.000,
+            1.000,
+            0.000,  # 2. Lime
+            0.000,
+            1.000,
+            1.000,  # 3. Cyan
+            1.000,
+            0.000,
+            1.000,  # 4. Magenta
+            1.000,
+            0.000,
+            0.000,  # 5. Red
+            1.000,
+            0.498,
+            0.000,  # 6. Orange
+            0.498,
+            1.000,
+            0.000,  # 7. Chartreuse
+            0.000,
+            1.000,
+            0.498,  # 8. Spring Green
+            1.000,
+            0.000,
+            0.498,  # 9. Rose
+            0.498,
+            0.000,
+            1.000,  # 10. Violet
+            0.753,
+            1.000,
+            0.000,  # 11. Electric Lime
+            1.000,
+            0.753,
+            0.000,  # 12. Vivid Orange
+            0.000,
+            1.000,
+            0.753,  # 13. Turquoise
+            0.753,
+            0.000,
+            1.000,  # 14. Bright Violet
+            1.000,
+            0.000,
+            0.753,  # 15. Bright Pink
+            1.000,
+            0.251,
+            0.000,  # 16. Fiery Orange
+            0.251,
+            1.000,
+            0.000,  # 17. Bright Chartreuse
+            0.000,
+            1.000,
+            0.251,  # 18. Malachite Green
+            0.251,
+            0.000,
+            1.000,  # 19. Deep Violet
+            1.000,
+            0.000,
+            0.251,  # 20. Hot Pink
+        ]
+    )
+    .astype(np.float32)
+    .reshape(-1, 3)
+)
+
+
+def colormap(rgb=False, maximum=255):
+    """
+    Args:
+        rgb (bool): whether to return RGB colors or BGR colors.
+        maximum (int): either 255 or 1
+
+    Returns:
+        ndarray: a float32 array of Nx3 colors, in range [0, 255] or [0, 1]
+    """
+    assert maximum in [255, 1], maximum
+    c = _COLORS * maximum
+    if not rgb:
+        c = c[:, ::-1]
+    return c
+
+
+def random_color(rgb=False, maximum=255):
+    """
+    Args:
+        rgb (bool): whether to return RGB colors or BGR colors.
+        maximum (int): either 255 or 1
+
+    Returns:
+        ndarray: a vector of 3 numbers
+    """
+    idx = np.random.randint(0, len(_COLORS))
+    ret = _COLORS[idx] * maximum
+    if not rgb:
+        ret = ret[::-1]
+    return ret
+
+
+def random_colors(N, rgb=False, maximum=255):
+    """
+    Args:
+        N (int): number of unique colors needed
+        rgb (bool): whether to return RGB colors or BGR colors.
+        maximum (int): either 255 or 1
+
+    Returns:
+        ndarray: a list of random_color
+    """
+    indices = random.sample(range(len(_COLORS)), N)
+    ret = [_COLORS[i] * maximum for i in indices]
+    if not rgb:
+        ret = [x[::-1] for x in ret]
+    return ret
+
+
+if __name__ == "__main__":
+    import cv2
+
+    size = 100
+    H, W = 10, 10
+    canvas = np.random.rand(H * size, W * size, 3).astype("float32")
+    for h in range(H):
+        for w in range(W):
+            idx = h * W + w
+            if idx >= len(_COLORS):
+                break
+            canvas[h * size : (h + 1) * size, w * size : (w + 1) * size] = _COLORS[idx]
+    cv2.imshow("a", canvas)
+    cv2.waitKey(0)
diff --git a/sam3/agent/helpers/keypoints.py b/sam3/agent/helpers/keypoints.py
new file mode 100644
index 0000000000000000000000000000000000000000..040810fd0a7c1e693175d3cf8eee2a5951ff4cab
--- /dev/null
+++ b/sam3/agent/helpers/keypoints.py
@@ -0,0 +1,244 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from typing import Any, List, Tuple, Union
+
+import numpy as np
+import torch
+from torch.nn import functional as F
+
+
+class Keypoints:
+    """
+    Stores keypoint **annotation** data. GT Instances have a `gt_keypoints` property
+    containing the x,y location and visibility flag of each keypoint. This tensor has shape
+    (N, K, 3) where N is the number of instances and K is the number of keypoints per instance.
+
+    The visibility flag follows the COCO format and must be one of three integers:
+
+    * v=0: not labeled (in which case x=y=0)
+    * v=1: labeled but not visible
+    * v=2: labeled and visible
+    """
+
+    def __init__(self, keypoints: Union[torch.Tensor, np.ndarray, List[List[float]]]):
+        """
+        Arguments:
+            keypoints: A Tensor, numpy array, or list of the x, y, and visibility of each keypoint.
+                The shape should be (N, K, 3) where N is the number of
+                instances, and K is the number of keypoints per instance.
+        """
+        device = (
+            keypoints.device
+            if isinstance(keypoints, torch.Tensor)
+            else torch.device("cpu")
+        )
+        keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
+        assert keypoints.dim() == 3 and keypoints.shape[2] == 3, keypoints.shape
+        self.tensor = keypoints
+
+    def __len__(self) -> int:
+        return self.tensor.size(0)
+
+    def to(self, *args: Any, **kwargs: Any) -> "Keypoints":
+        return type(self)(self.tensor.to(*args, **kwargs))
+
+    @property
+    def device(self) -> torch.device:
+        return self.tensor.device
+
+    def to_heatmap(self, boxes: torch.Tensor, heatmap_size: int) -> torch.Tensor:
+        """
+        Convert keypoint annotations to a heatmap of one-hot labels for training,
+        as described in :paper:`Mask R-CNN`.
+
+        Arguments:
+            boxes: Nx4 tensor, the boxes to draw the keypoints to
+
+        Returns:
+            heatmaps:
+                A tensor of shape (N, K), each element is integer spatial label
+                in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
+            valid:
+                A tensor of shape (N, K) containing whether each keypoint is in the roi or not.
+        """
+        return _keypoints_to_heatmap(self.tensor, boxes, heatmap_size)
+
+    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "Keypoints":
+        """
+        Create a new `Keypoints` by indexing on this `Keypoints`.
+
+        The following usage are allowed:
+
+        1. `new_kpts = kpts[3]`: return a `Keypoints` which contains only one instance.
+        2. `new_kpts = kpts[2:10]`: return a slice of key points.
+        3. `new_kpts = kpts[vector]`, where vector is a torch.ByteTensor
+           with `length = len(kpts)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned Keypoints might share storage with this Keypoints,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return Keypoints([self.tensor[item]])
+        return Keypoints(self.tensor[item])
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.tensor))
+        return s
+
+    @staticmethod
+    def cat(keypoints_list: List["Keypoints"]) -> "Keypoints":
+        """
+        Concatenates a list of Keypoints into a single Keypoints
+
+        Arguments:
+            keypoints_list (list[Keypoints])
+
+        Returns:
+            Keypoints: the concatenated Keypoints
+        """
+        assert isinstance(keypoints_list, (list, tuple))
+        assert len(keypoints_list) > 0
+        assert all(isinstance(keypoints, Keypoints) for keypoints in keypoints_list)
+
+        cat_kpts = type(keypoints_list[0])(
+            torch.cat([kpts.tensor for kpts in keypoints_list], dim=0)
+        )
+        return cat_kpts
+
+
+def _keypoints_to_heatmap(
+    keypoints: torch.Tensor, rois: torch.Tensor, heatmap_size: int
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    """
+    Encode keypoint locations into a target heatmap for use in SoftmaxWithLoss across space.
+
+    Maps keypoints from the half-open interval [x1, x2) on continuous image coordinates to the
+    closed interval [0, heatmap_size - 1] on discrete image coordinates. We use the
+    continuous-discrete conversion from Heckbert 1990 ("What is the coordinate of a pixel?"):
+    d = floor(c) and c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
+
+    Arguments:
+        keypoints: tensor of keypoint locations in of shape (N, K, 3).
+        rois: Nx4 tensor of rois in xyxy format
+        heatmap_size: integer side length of square heatmap.
+
+    Returns:
+        heatmaps: A tensor of shape (N, K) containing an integer spatial label
+            in the range [0, heatmap_size**2 - 1] for each keypoint in the input.
+        valid: A tensor of shape (N, K) containing whether each keypoint is in
+            the roi or not.
+    """
+
+    if rois.numel() == 0:
+        return rois.new().long(), rois.new().long()
+    offset_x = rois[:, 0]
+    offset_y = rois[:, 1]
+    scale_x = heatmap_size / (rois[:, 2] - rois[:, 0])
+    scale_y = heatmap_size / (rois[:, 3] - rois[:, 1])
+
+    offset_x = offset_x[:, None]
+    offset_y = offset_y[:, None]
+    scale_x = scale_x[:, None]
+    scale_y = scale_y[:, None]
+
+    x = keypoints[..., 0]
+    y = keypoints[..., 1]
+
+    x_boundary_inds = x == rois[:, 2][:, None]
+    y_boundary_inds = y == rois[:, 3][:, None]
+
+    x = (x - offset_x) * scale_x
+    x = x.floor().long()
+    y = (y - offset_y) * scale_y
+    y = y.floor().long()
+
+    x[x_boundary_inds] = heatmap_size - 1
+    y[y_boundary_inds] = heatmap_size - 1
+
+    valid_loc = (x >= 0) & (y >= 0) & (x < heatmap_size) & (y < heatmap_size)
+    vis = keypoints[..., 2] > 0
+    valid = (valid_loc & vis).long()
+
+    lin_ind = y * heatmap_size + x
+    heatmaps = lin_ind * valid
+
+    return heatmaps, valid
+
+
+@torch.jit.script_if_tracing
+def heatmaps_to_keypoints(maps: torch.Tensor, rois: torch.Tensor) -> torch.Tensor:
+    """
+    Extract predicted keypoint locations from heatmaps.
+
+    Args:
+        maps (Tensor): (#ROIs, #keypoints, POOL_H, POOL_W). The predicted heatmap of logits for
+            each ROI and each keypoint.
+        rois (Tensor): (#ROIs, 4). The box of each ROI.
+
+    Returns:
+        Tensor of shape (#ROIs, #keypoints, 4) with the last dimension corresponding to
+        (x, y, logit, score) for each keypoint.
+
+    When converting discrete pixel indices in an NxN image to a continuous keypoint coordinate,
+    we maintain consistency with :meth:`Keypoints.to_heatmap` by using the conversion from
+    Heckbert 1990: c = d + 0.5, where d is a discrete coordinate and c is a continuous coordinate.
+    """
+
+    offset_x = rois[:, 0]
+    offset_y = rois[:, 1]
+
+    widths = (rois[:, 2] - rois[:, 0]).clamp(min=1)
+    heights = (rois[:, 3] - rois[:, 1]).clamp(min=1)
+    widths_ceil = widths.ceil()
+    heights_ceil = heights.ceil()
+
+    num_rois, num_keypoints = maps.shape[:2]
+    xy_preds = maps.new_zeros(rois.shape[0], num_keypoints, 4)
+
+    width_corrections = widths / widths_ceil
+    height_corrections = heights / heights_ceil
+
+    keypoints_idx = torch.arange(num_keypoints, device=maps.device)
+
+    for i in range(num_rois):
+        outsize = (int(heights_ceil[i]), int(widths_ceil[i]))
+        roi_map = F.interpolate(
+            maps[[i]], size=outsize, mode="bicubic", align_corners=False
+        )
+
+        # Although semantically equivalent, `reshape` is used instead of `squeeze` due
+        # to limitation during ONNX export of `squeeze` in scripting mode
+        roi_map = roi_map.reshape(roi_map.shape[1:])  # keypoints x H x W
+
+        # softmax over the spatial region
+        max_score, _ = roi_map.view(num_keypoints, -1).max(1)
+        max_score = max_score.view(num_keypoints, 1, 1)
+        tmp_full_resolution = (roi_map - max_score).exp_()
+        tmp_pool_resolution = (maps[i] - max_score).exp_()
+        # Produce scores over the region H x W, but normalize with POOL_H x POOL_W,
+        # so that the scores of objects of different absolute sizes will be more comparable
+        roi_map_scores = tmp_full_resolution / tmp_pool_resolution.sum(
+            (1, 2), keepdim=True
+        )
+
+        w = roi_map.shape[2]
+        pos = roi_map.view(num_keypoints, -1).argmax(1)
+
+        x_int = pos % w
+        y_int = (pos - x_int) // w
+
+        assert (
+            roi_map_scores[keypoints_idx, y_int, x_int]
+            == roi_map_scores.view(num_keypoints, -1).max(1)[0]
+        ).all()
+
+        x = (x_int.float() + 0.5) * width_corrections[i]
+        y = (y_int.float() + 0.5) * height_corrections[i]
+
+        xy_preds[i, :, 0] = x + offset_x[i]
+        xy_preds[i, :, 1] = y + offset_y[i]
+        xy_preds[i, :, 2] = roi_map[keypoints_idx, y_int, x_int]
+        xy_preds[i, :, 3] = roi_map_scores[keypoints_idx, y_int, x_int]
+
+    return xy_preds
diff --git a/sam3/agent/helpers/mask_overlap_removal.py b/sam3/agent/helpers/mask_overlap_removal.py
new file mode 100644
index 0000000000000000000000000000000000000000..386706d0ea0773ef61a0011aa926a72cd0dddd21
--- /dev/null
+++ b/sam3/agent/helpers/mask_overlap_removal.py
@@ -0,0 +1,128 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from typing import Dict, List
+
+import numpy as np
+import torch
+
+try:
+    from pycocotools import mask as mask_utils
+except Exception:
+    mask_utils = None
+
+
+def mask_intersection(
+    masks1: torch.Tensor, masks2: torch.Tensor, block_size: int = 16
+) -> torch.Tensor:
+    assert masks1.shape[1:] == masks2.shape[1:]
+    assert masks1.dtype == torch.bool and masks2.dtype == torch.bool
+    N, M = masks1.shape[0], masks2.shape[0]
+    out = torch.zeros(N, M, device=masks1.device, dtype=torch.long)
+    for i in range(0, N, block_size):
+        for j in range(0, M, block_size):
+            a = masks1[i : i + block_size]
+            b = masks2[j : j + block_size]
+            inter = (a[:, None] & b[None, :]).flatten(-2).sum(-1)
+            out[i : i + block_size, j : j + block_size] = inter
+    return out
+
+
+def mask_iom(masks1: torch.Tensor, masks2: torch.Tensor) -> torch.Tensor:
+    assert masks1.shape[1:] == masks2.shape[1:]
+    assert masks1.dtype == torch.bool and masks2.dtype == torch.bool
+    inter = mask_intersection(masks1, masks2)
+    area1 = masks1.flatten(-2).sum(-1)  # (N,)
+    area2 = masks2.flatten(-2).sum(-1)  # (M,)
+    min_area = torch.min(area1[:, None], area2[None, :]).clamp_min(1)
+    return inter.float() / (min_area.float() + 1e-8)
+
+
+def _decode_single_mask(mask_repr, h: int, w: int) -> np.ndarray:
+    if isinstance(mask_repr, (list, tuple, np.ndarray)):
+        arr = np.array(mask_repr)
+        if arr.ndim != 2:
+            raise ValueError("Mask array must be 2D (H, W).")
+        return (arr > 0).astype(np.uint8)
+
+    if mask_utils is None:
+        raise ImportError(
+            "pycocotools is required to decode RLE mask strings. pip install pycocotools"
+        )
+
+    if not isinstance(mask_repr, (str, bytes)):
+        raise ValueError("Unsupported mask representation type for RLE decode.")
+
+    rle = {
+        "counts": mask_repr if isinstance(mask_repr, (str, bytes)) else str(mask_repr),
+        "size": [h, w],
+    }
+    decoded = mask_utils.decode(rle)
+    if decoded.ndim == 3:
+        decoded = decoded[:, :, 0]
+    return (decoded > 0).astype(np.uint8)
+
+
+def _decode_masks_to_torch_bool(pred_masks: List, h: int, w: int) -> torch.Tensor:
+    bin_masks = [_decode_single_mask(m, h, w) for m in pred_masks]
+    masks_np = np.stack(bin_masks, axis=0).astype(np.uint8)  # (N, H, W)
+    return torch.from_numpy(masks_np > 0)
+
+
+def remove_overlapping_masks(sample: Dict, iom_thresh: float = 0.3) -> Dict:
+    """
+    Greedy keep: sort by score desc; keep a mask if IoM to all kept masks <= threshold.
+    If pred_masks has length 0 or 1, returns sample unchanged (no extra keys).
+    """
+    # Basic presence checks
+    if "pred_masks" not in sample or not isinstance(sample["pred_masks"], list):
+        return sample  # nothing to do / preserve as-is
+
+    pred_masks = sample["pred_masks"]
+    N = len(pred_masks)
+
+    # --- Early exit: 0 or 1 mask -> do NOT modify the JSON at all ---
+    if N <= 1:
+        return sample
+
+    # From here on we have at least 2 masks
+    h = int(sample["orig_img_h"])
+    w = int(sample["orig_img_w"])
+    pred_scores = sample.get("pred_scores", [1.0] * N)  # fallback if scores missing
+    pred_boxes = sample.get("pred_boxes", None)
+
+    assert N == len(pred_scores), "pred_masks and pred_scores must have same length"
+    if pred_boxes is not None:
+        assert N == len(pred_boxes), "pred_masks and pred_boxes must have same length"
+
+    masks_bool = _decode_masks_to_torch_bool(pred_masks, h, w)  # (N, H, W)
+
+    order = sorted(range(N), key=lambda i: float(pred_scores[i]), reverse=True)
+    kept_idx: List[int] = []
+    kept_masks: List[torch.Tensor] = []
+
+    for i in order:
+        cand = masks_bool[i].unsqueeze(0)  # (1, H, W)
+        if len(kept_masks) == 0:
+            kept_idx.append(i)
+            kept_masks.append(masks_bool[i])
+            continue
+
+        kept_stack = torch.stack(kept_masks, dim=0)  # (K, H, W)
+        iom_vals = mask_iom(cand, kept_stack).squeeze(0)  # (K,)
+        if torch.any(iom_vals > iom_thresh):
+            continue  # overlaps too much with a higher-scored kept mask
+        kept_idx.append(i)
+        kept_masks.append(masks_bool[i])
+
+    kept_idx_sorted = sorted(kept_idx)
+
+    # Build filtered JSON (this *does* modify fields; only for N>=2 case)
+    out = dict(sample)
+    out["pred_masks"] = [pred_masks[i] for i in kept_idx_sorted]
+    out["pred_scores"] = [pred_scores[i] for i in kept_idx_sorted]
+    if pred_boxes is not None:
+        out["pred_boxes"] = [pred_boxes[i] for i in kept_idx_sorted]
+    out["kept_indices"] = kept_idx_sorted
+    out["removed_indices"] = [i for i in range(N) if i not in set(kept_idx_sorted)]
+    out["iom_threshold"] = float(iom_thresh)
+    return out
diff --git a/sam3/agent/helpers/masks.py b/sam3/agent/helpers/masks.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc78140fa7528b3d1e093522a3de96313dad3ddc
--- /dev/null
+++ b/sam3/agent/helpers/masks.py
@@ -0,0 +1,560 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import copy
+import itertools
+from typing import Any, Iterator, List, Union
+
+import numpy as np
+import pycocotools.mask as mask_util
+import torch
+from torch import device
+
+from .boxes import Boxes
+from .memory import retry_if_cuda_oom
+
+from .roi_align import ROIAlign
+
+
+def polygon_area(x, y):
+    # Using the shoelace formula
+    # https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
+    return 0.5 * np.abs(np.dot(x, np.roll(y, 1)) - np.dot(y, np.roll(x, 1)))
+
+
+def polygons_to_bitmask(
+    polygons: List[np.ndarray], height: int, width: int
+) -> np.ndarray:
+    """
+    Args:
+        polygons (list[ndarray]): each array has shape (Nx2,)
+        height, width (int)
+
+    Returns:
+        ndarray: a bool mask of shape (height, width)
+    """
+    if len(polygons) == 0:
+        # COCOAPI does not support empty polygons
+        return np.zeros((height, width)).astype(bool)
+    rles = mask_util.frPyObjects(polygons, height, width)
+    rle = mask_util.merge(rles)
+    return mask_util.decode(rle).astype(bool)
+
+
+def rasterize_polygons_within_box(
+    polygons: List[np.ndarray], box: np.ndarray, mask_size: int
+) -> torch.Tensor:
+    """
+    Rasterize the polygons into a mask image and
+    crop the mask content in the given box.
+    The cropped mask is resized to (mask_size, mask_size).
+
+    This function is used when generating training targets for mask head in Mask R-CNN.
+    Given original ground-truth masks for an image, new ground-truth mask
+    training targets in the size of `mask_size x mask_size`
+    must be provided for each predicted box. This function will be called to
+    produce such targets.
+
+    Args:
+        polygons (list[ndarray[float]]): a list of polygons, which represents an instance.
+        box: 4-element numpy array
+        mask_size (int):
+
+    Returns:
+        Tensor: BoolTensor of shape (mask_size, mask_size)
+    """
+    # 1. Shift the polygons w.r.t the boxes
+    w, h = box[2] - box[0], box[3] - box[1]
+
+    polygons = copy.deepcopy(polygons)
+    for p in polygons:
+        p[0::2] = p[0::2] - box[0]
+        p[1::2] = p[1::2] - box[1]
+
+    # 2. Rescale the polygons to the new box size
+    # max() to avoid division by small number
+    ratio_h = mask_size / max(h, 0.1)
+    ratio_w = mask_size / max(w, 0.1)
+
+    if ratio_h == ratio_w:
+        for p in polygons:
+            p *= ratio_h
+    else:
+        for p in polygons:
+            p[0::2] *= ratio_w
+            p[1::2] *= ratio_h
+
+    # 3. Rasterize the polygons with coco api
+    mask = polygons_to_bitmask(polygons, mask_size, mask_size)
+    mask = torch.from_numpy(mask)
+    return mask
+
+
+class BitMasks:
+    """
+    This class stores the segmentation masks for all objects in one image, in
+    the form of bitmaps.
+
+    Attributes:
+        tensor: bool Tensor of N,H,W, representing N instances in the image.
+    """
+
+    def __init__(self, tensor: Union[torch.Tensor, np.ndarray]):
+        """
+        Args:
+            tensor: bool Tensor of N,H,W, representing N instances in the image.
+        """
+        if isinstance(tensor, torch.Tensor):
+            tensor = tensor.to(torch.bool)
+        else:
+            tensor = torch.as_tensor(
+                tensor, dtype=torch.bool, device=torch.device("cpu")
+            )
+        assert tensor.dim() == 3, tensor.size()
+        self.image_size = tensor.shape[1:]
+        self.tensor = tensor
+
+    @torch.jit.unused
+    def to(self, *args: Any, **kwargs: Any) -> "BitMasks":
+        return BitMasks(self.tensor.to(*args, **kwargs))
+
+    @property
+    def device(self) -> torch.device:
+        return self.tensor.device
+
+    @torch.jit.unused
+    def __getitem__(self, item: Union[int, slice, torch.BoolTensor]) -> "BitMasks":
+        """
+        Returns:
+            BitMasks: Create a new :class:`BitMasks` by indexing.
+
+        The following usage are allowed:
+
+        1. `new_masks = masks[3]`: return a `BitMasks` which contains only one mask.
+        2. `new_masks = masks[2:10]`: return a slice of masks.
+        3. `new_masks = masks[vector]`, where vector is a torch.BoolTensor
+           with `length = len(masks)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned object might share storage with this object,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return BitMasks(self.tensor[item].unsqueeze(0))
+        m = self.tensor[item]
+        assert (
+            m.dim() == 3
+        ), "Indexing on BitMasks with {} returns a tensor with shape {}!".format(
+            item, m.shape
+        )
+        return BitMasks(m)
+
+    @torch.jit.unused
+    def __iter__(self) -> torch.Tensor:
+        yield from self.tensor
+
+    @torch.jit.unused
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.tensor))
+        return s
+
+    def __len__(self) -> int:
+        return self.tensor.shape[0]
+
+    def nonempty(self) -> torch.Tensor:
+        """
+        Find masks that are non-empty.
+
+        Returns:
+            Tensor: a BoolTensor which represents
+                whether each mask is empty (False) or non-empty (True).
+        """
+        return self.tensor.flatten(1).any(dim=1)
+
+    @staticmethod
+    def from_polygon_masks(
+        polygon_masks: Union["PolygonMasks", List[List[np.ndarray]]],
+        height: int,
+        width: int,
+    ) -> "BitMasks":
+        """
+        Args:
+            polygon_masks (list[list[ndarray]] or PolygonMasks)
+            height, width (int)
+        """
+        if isinstance(polygon_masks, PolygonMasks):
+            polygon_masks = polygon_masks.polygons
+        masks = [polygons_to_bitmask(p, height, width) for p in polygon_masks]
+        if len(masks):
+            return BitMasks(torch.stack([torch.from_numpy(x) for x in masks]))
+        else:
+            return BitMasks(torch.empty(0, height, width, dtype=torch.bool))
+
+    @staticmethod
+    def from_roi_masks(roi_masks: "ROIMasks", height: int, width: int) -> "BitMasks":
+        """
+        Args:
+            roi_masks:
+            height, width (int):
+        """
+        return roi_masks.to_bitmasks(height, width)
+
+    def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
+        """
+        Crop each bitmask by the given box, and resize results to (mask_size, mask_size).
+        This can be used to prepare training targets for Mask R-CNN.
+        It has less reconstruction error compared to rasterization with polygons.
+        However we observe no difference in accuracy,
+        but BitMasks requires more memory to store all the masks.
+
+        Args:
+            boxes (Tensor): Nx4 tensor storing the boxes for each mask
+            mask_size (int): the size of the rasterized mask.
+
+        Returns:
+            Tensor:
+                A bool tensor of shape (N, mask_size, mask_size), where
+                N is the number of predicted boxes for this image.
+        """
+        assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
+        device = self.tensor.device
+
+        batch_inds = torch.arange(len(boxes), device=device).to(dtype=boxes.dtype)[
+            :, None
+        ]
+        rois = torch.cat([batch_inds, boxes], dim=1)  # Nx5
+
+        bit_masks = self.tensor.to(dtype=torch.float32)
+        rois = rois.to(device=device)
+        output = (
+            ROIAlign((mask_size, mask_size), 1.0, 0, aligned=True)
+            .forward(bit_masks[:, None, :, :], rois)
+            .squeeze(1)
+        )
+        output = output >= 0.5
+        return output
+
+    def get_bounding_boxes(self) -> Boxes:
+        """
+        Returns:
+            Boxes: tight bounding boxes around bitmasks.
+            If a mask is empty, it's bounding box will be all zero.
+        """
+        boxes = torch.zeros(self.tensor.shape[0], 4, dtype=torch.float32)
+        x_any = torch.any(self.tensor, dim=1)
+        y_any = torch.any(self.tensor, dim=2)
+        for idx in range(self.tensor.shape[0]):
+            x = torch.where(x_any[idx, :])[0]
+            y = torch.where(y_any[idx, :])[0]
+            if len(x) > 0 and len(y) > 0:
+                boxes[idx, :] = torch.as_tensor(
+                    [x[0], y[0], x[-1] + 1, y[-1] + 1], dtype=torch.float32
+                )
+        return Boxes(boxes)
+
+    @staticmethod
+    def cat(bitmasks_list: List["BitMasks"]) -> "BitMasks":
+        """
+        Concatenates a list of BitMasks into a single BitMasks
+
+        Arguments:
+            bitmasks_list (list[BitMasks])
+
+        Returns:
+            BitMasks: the concatenated BitMasks
+        """
+        assert isinstance(bitmasks_list, (list, tuple))
+        assert len(bitmasks_list) > 0
+        assert all(isinstance(bitmask, BitMasks) for bitmask in bitmasks_list)
+
+        cat_bitmasks = type(bitmasks_list[0])(
+            torch.cat([bm.tensor for bm in bitmasks_list], dim=0)
+        )
+        return cat_bitmasks
+
+
+class PolygonMasks:
+    """
+    This class stores the segmentation masks for all objects in one image, in the form of polygons.
+
+    Attributes:
+        polygons: list[list[ndarray]]. Each ndarray is a float64 vector representing a polygon.
+    """
+
+    def __init__(self, polygons: List[List[Union[torch.Tensor, np.ndarray]]]):
+        """
+        Arguments:
+            polygons (list[list[np.ndarray]]): The first
+                level of the list correspond to individual instances,
+                the second level to all the polygons that compose the
+                instance, and the third level to the polygon coordinates.
+                The third level array should have the format of
+                [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+        """
+        if not isinstance(polygons, list):
+            raise ValueError(
+                "Cannot create PolygonMasks: Expect a list of list of polygons per image. "
+                "Got '{}' instead.".format(type(polygons))
+            )
+
+        def _make_array(t: Union[torch.Tensor, np.ndarray]) -> np.ndarray:
+            # Use float64 for higher precision, because why not?
+            # Always put polygons on CPU (self.to is a no-op) since they
+            # are supposed to be small tensors.
+            # May need to change this assumption if GPU placement becomes useful
+            if isinstance(t, torch.Tensor):
+                t = t.cpu().numpy()
+            return np.asarray(t).astype("float64")
+
+        def process_polygons(
+            polygons_per_instance: List[Union[torch.Tensor, np.ndarray]],
+        ) -> List[np.ndarray]:
+            if not isinstance(polygons_per_instance, list):
+                raise ValueError(
+                    "Cannot create polygons: Expect a list of polygons per instance. "
+                    "Got '{}' instead.".format(type(polygons_per_instance))
+                )
+            # transform each polygon to a numpy array
+            polygons_per_instance = [_make_array(p) for p in polygons_per_instance]
+            for polygon in polygons_per_instance:
+                if len(polygon) % 2 != 0 or len(polygon) < 6:
+                    raise ValueError(
+                        f"Cannot create a polygon from {len(polygon)} coordinates."
+                    )
+            return polygons_per_instance
+
+        self.polygons: List[List[np.ndarray]] = [
+            process_polygons(polygons_per_instance)
+            for polygons_per_instance in polygons
+        ]
+
+    def to(self, *args: Any, **kwargs: Any) -> "PolygonMasks":
+        return self
+
+    @property
+    def device(self) -> torch.device:
+        return torch.device("cpu")
+
+    def get_bounding_boxes(self) -> Boxes:
+        """
+        Returns:
+            Boxes: tight bounding boxes around polygon masks.
+        """
+        boxes = torch.zeros(len(self.polygons), 4, dtype=torch.float32)
+        for idx, polygons_per_instance in enumerate(self.polygons):
+            minxy = torch.as_tensor([float("inf"), float("inf")], dtype=torch.float32)
+            maxxy = torch.zeros(2, dtype=torch.float32)
+            for polygon in polygons_per_instance:
+                coords = torch.from_numpy(polygon).view(-1, 2).to(dtype=torch.float32)
+                minxy = torch.min(minxy, torch.min(coords, dim=0).values)
+                maxxy = torch.max(maxxy, torch.max(coords, dim=0).values)
+            boxes[idx, :2] = minxy
+            boxes[idx, 2:] = maxxy
+        return Boxes(boxes)
+
+    def nonempty(self) -> torch.Tensor:
+        """
+        Find masks that are non-empty.
+
+        Returns:
+            Tensor:
+                a BoolTensor which represents whether each mask is empty (False) or not (True).
+        """
+        keep = [1 if len(polygon) > 0 else 0 for polygon in self.polygons]
+        return torch.from_numpy(np.asarray(keep, dtype=bool))
+
+    def __getitem__(
+        self, item: Union[int, slice, List[int], torch.BoolTensor]
+    ) -> "PolygonMasks":
+        """
+        Support indexing over the instances and return a `PolygonMasks` object.
+        `item` can be:
+
+        1. An integer. It will return an object with only one instance.
+        2. A slice. It will return an object with the selected instances.
+        3. A list[int]. It will return an object with the selected instances,
+           correpsonding to the indices in the list.
+        4. A vector mask of type BoolTensor, whose length is num_instances.
+           It will return an object with the instances whose mask is nonzero.
+        """
+        if isinstance(item, int):
+            selected_polygons = [self.polygons[item]]
+        elif isinstance(item, slice):
+            selected_polygons = self.polygons[item]
+        elif isinstance(item, list):
+            selected_polygons = [self.polygons[i] for i in item]
+        elif isinstance(item, torch.Tensor):
+            # Polygons is a list, so we have to move the indices back to CPU.
+            if item.dtype == torch.bool:
+                assert item.dim() == 1, item.shape
+                item = item.nonzero().squeeze(1).cpu().numpy().tolist()
+            elif item.dtype in [torch.int32, torch.int64]:
+                item = item.cpu().numpy().tolist()
+            else:
+                raise ValueError(
+                    "Unsupported tensor dtype={} for indexing!".format(item.dtype)
+                )
+            selected_polygons = [self.polygons[i] for i in item]
+        return PolygonMasks(selected_polygons)
+
+    def __iter__(self) -> Iterator[List[np.ndarray]]:
+        """
+        Yields:
+            list[ndarray]: the polygons for one instance.
+            Each Tensor is a float64 vector representing a polygon.
+        """
+        return iter(self.polygons)
+
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.polygons))
+        return s
+
+    def __len__(self) -> int:
+        return len(self.polygons)
+
+    def crop_and_resize(self, boxes: torch.Tensor, mask_size: int) -> torch.Tensor:
+        """
+        Crop each mask by the given box, and resize results to (mask_size, mask_size).
+        This can be used to prepare training targets for Mask R-CNN.
+
+        Args:
+            boxes (Tensor): Nx4 tensor storing the boxes for each mask
+            mask_size (int): the size of the rasterized mask.
+
+        Returns:
+            Tensor: A bool tensor of shape (N, mask_size, mask_size), where
+            N is the number of predicted boxes for this image.
+        """
+        assert len(boxes) == len(self), "{} != {}".format(len(boxes), len(self))
+
+        device = boxes.device
+        # Put boxes on the CPU, as the polygon representation is not efficient GPU-wise
+        # (several small tensors for representing a single instance mask)
+        boxes = boxes.to(torch.device("cpu"))
+
+        results = [
+            rasterize_polygons_within_box(poly, box.numpy(), mask_size)
+            for poly, box in zip(self.polygons, boxes)
+        ]
+        """
+        poly: list[list[float]], the polygons for one instance
+        box: a tensor of shape (4,)
+        """
+        if len(results) == 0:
+            return torch.empty(0, mask_size, mask_size, dtype=torch.bool, device=device)
+        return torch.stack(results, dim=0).to(device=device)
+
+    def area(self):
+        """
+        Computes area of the mask.
+        Only works with Polygons, using the shoelace formula:
+        https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
+
+        Returns:
+            Tensor: a vector, area for each instance
+        """
+
+        area = []
+        for polygons_per_instance in self.polygons:
+            area_per_instance = 0
+            for p in polygons_per_instance:
+                area_per_instance += polygon_area(p[0::2], p[1::2])
+            area.append(area_per_instance)
+
+        return torch.tensor(area)
+
+    @staticmethod
+    def cat(polymasks_list: List["PolygonMasks"]) -> "PolygonMasks":
+        """
+        Concatenates a list of PolygonMasks into a single PolygonMasks
+
+        Arguments:
+            polymasks_list (list[PolygonMasks])
+
+        Returns:
+            PolygonMasks: the concatenated PolygonMasks
+        """
+        assert isinstance(polymasks_list, (list, tuple))
+        assert len(polymasks_list) > 0
+        assert all(isinstance(polymask, PolygonMasks) for polymask in polymasks_list)
+
+        cat_polymasks = type(polymasks_list[0])(
+            list(itertools.chain.from_iterable(pm.polygons for pm in polymasks_list))
+        )
+        return cat_polymasks
+
+
+class ROIMasks:
+    """
+    Represent masks by N smaller masks defined in some ROIs. Once ROI boxes are given,
+    full-image bitmask can be obtained by "pasting" the mask on the region defined
+    by the corresponding ROI box.
+    """
+
+    def __init__(self, tensor: torch.Tensor):
+        """
+        Args:
+            tensor: (N, M, M) mask tensor that defines the mask within each ROI.
+        """
+        if tensor.dim() != 3:
+            raise ValueError("ROIMasks must take a masks of 3 dimension.")
+        self.tensor = tensor
+
+    def to(self, device: torch.device) -> "ROIMasks":
+        return ROIMasks(self.tensor.to(device))
+
+    @property
+    def device(self) -> device:
+        return self.tensor.device
+
+    def __len__(self):
+        return self.tensor.shape[0]
+
+    def __getitem__(self, item) -> "ROIMasks":
+        """
+        Returns:
+            ROIMasks: Create a new :class:`ROIMasks` by indexing.
+
+        The following usage are allowed:
+
+        1. `new_masks = masks[2:10]`: return a slice of masks.
+        2. `new_masks = masks[vector]`, where vector is a torch.BoolTensor
+           with `length = len(masks)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned object might share storage with this object,
+        subject to Pytorch's indexing semantics.
+        """
+        t = self.tensor[item]
+        if t.dim() != 3:
+            raise ValueError(
+                f"Indexing on ROIMasks with {item} returns a tensor with shape {t.shape}!"
+            )
+        return ROIMasks(t)
+
+    @torch.jit.unused
+    def __repr__(self) -> str:
+        s = self.__class__.__name__ + "("
+        s += "num_instances={})".format(len(self.tensor))
+        return s
+
+    @torch.jit.unused
+    def to_bitmasks(self, boxes: torch.Tensor, height, width, threshold=0.5):
+        """
+        Args: see documentation of :func:`paste_masks_in_image`.
+        """
+        from detectron2.layers.mask_ops import (
+            _paste_masks_tensor_shape,
+            paste_masks_in_image,
+        )
+
+        if torch.jit.is_tracing():
+            if isinstance(height, torch.Tensor):
+                paste_func = _paste_masks_tensor_shape
+            else:
+                paste_func = paste_masks_in_image
+        else:
+            paste_func = retry_if_cuda_oom(paste_masks_in_image)
+        bitmasks = paste_func(
+            self.tensor, boxes.tensor, (height, width), threshold=threshold
+        )
+        return BitMasks(bitmasks)
diff --git a/sam3/agent/helpers/memory.py b/sam3/agent/helpers/memory.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d51f1b0b1a1ba7008304973d5c631d001ae3c85
--- /dev/null
+++ b/sam3/agent/helpers/memory.py
@@ -0,0 +1,87 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import logging
+from contextlib import contextmanager
+from functools import wraps
+
+import torch
+
+__all__ = ["retry_if_cuda_oom"]
+
+
+@contextmanager
+def _ignore_torch_cuda_oom():
+    """
+    A context which ignores CUDA OOM exception from pytorch.
+    """
+    try:
+        yield
+    except RuntimeError as e:
+        # NOTE: the string may change?
+        if "CUDA out of memory. " in str(e):
+            pass
+        else:
+            raise
+
+
+def retry_if_cuda_oom(func):
+    """
+    Makes a function retry itself after encountering
+    pytorch's CUDA OOM error.
+    It will first retry after calling `torch.cuda.empty_cache()`.
+
+    If that still fails, it will then retry by trying to convert inputs to CPUs.
+    In this case, it expects the function to dispatch to CPU implementation.
+    The return values may become CPU tensors as well and it's user's
+    responsibility to convert it back to CUDA tensor if needed.
+
+    Args:
+        func: a stateless callable that takes tensor-like objects as arguments
+
+    Returns:
+        a callable which retries `func` if OOM is encountered.
+
+    Examples:
+    ::
+        output = retry_if_cuda_oom(some_torch_function)(input1, input2)
+        # output may be on CPU even if inputs are on GPU
+
+    Note:
+        1. When converting inputs to CPU, it will only look at each argument and check
+           if it has `.device` and `.to` for conversion. Nested structures of tensors
+           are not supported.
+
+        2. Since the function might be called more than once, it has to be
+           stateless.
+    """
+
+    def maybe_to_cpu(x):
+        try:
+            like_gpu_tensor = x.device.type == "cuda" and hasattr(x, "to")
+        except AttributeError:
+            like_gpu_tensor = False
+        if like_gpu_tensor:
+            return x.to(device="cpu")
+        else:
+            return x
+
+    @wraps(func)
+    def wrapped(*args, **kwargs):
+        with _ignore_torch_cuda_oom():
+            return func(*args, **kwargs)
+
+        # Clear cache and retry
+        torch.cuda.empty_cache()
+        with _ignore_torch_cuda_oom():
+            return func(*args, **kwargs)
+
+        # Try on CPU. This slows down the code significantly, therefore print a notice.
+        logger = logging.getLogger(__name__)
+        logger.info(
+            "Attempting to copy inputs of {} to CPU due to CUDA OOM".format(str(func))
+        )
+        new_args = (maybe_to_cpu(x) for x in args)
+        new_kwargs = {k: maybe_to_cpu(v) for k, v in kwargs.items()}
+        return func(*new_args, **new_kwargs)
+
+    return wrapped
diff --git a/sam3/agent/helpers/rle.py b/sam3/agent/helpers/rle.py
new file mode 100644
index 0000000000000000000000000000000000000000..277be79b3d762f4f5dd10f01639c3fb4cc8af956
--- /dev/null
+++ b/sam3/agent/helpers/rle.py
@@ -0,0 +1,122 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""Some utilities for RLE encoding that doesn't require downloading the masks to the cpu"""
+
+import numpy as np
+import torch
+from pycocotools import mask as mask_util
+
+
+@torch.no_grad()
+def rle_encode(orig_mask, return_areas=False):
+    """Encodes a collection of masks in RLE format
+
+    This function emulates the behavior of the COCO API's encode function, but
+    is executed partially on the GPU for faster execution.
+
+    Args:
+        mask (torch.Tensor): A mask of shape (N, H, W) with dtype=torch.bool
+        return_areas (bool): If True, add the areas of the masks as a part of
+            the RLE output dict under the "area" key. Default is False.
+
+    Returns:
+        str: The RLE encoded masks
+    """
+    assert orig_mask.ndim == 3, "Mask must be of shape (N, H, W)"
+    assert orig_mask.dtype == torch.bool, "Mask must have dtype=torch.bool"
+
+    if orig_mask.numel() == 0:
+        return []
+
+    # First, transpose the spatial dimensions.
+    # This is necessary because the COCO API uses Fortran order
+    mask = orig_mask.transpose(1, 2)
+
+    # Flatten the mask
+    flat_mask = mask.reshape(mask.shape[0], -1)
+    if return_areas:
+        mask_areas = flat_mask.sum(-1).tolist()
+    # Find the indices where the mask changes
+    differences = torch.ones(
+        mask.shape[0], flat_mask.shape[1] + 1, device=mask.device, dtype=torch.bool
+    )
+    differences[:, 1:-1] = flat_mask[:, :-1] != flat_mask[:, 1:]
+    differences[:, 0] = flat_mask[:, 0]
+    _, change_indices = torch.where(differences)
+
+    try:
+        boundaries = torch.cumsum(differences.sum(-1), 0).cpu()
+    except RuntimeError as _:
+        boundaries = torch.cumsum(differences.cpu().sum(-1), 0)
+
+    change_indices_clone = change_indices.clone()
+    # First pass computes the RLEs on GPU, in a flatten format
+    for i in range(mask.shape[0]):
+        # Get the change indices for this batch item
+        beg = 0 if i == 0 else boundaries[i - 1].item()
+        end = boundaries[i].item()
+        change_indices[beg + 1 : end] -= change_indices_clone[beg : end - 1]
+
+    # Now we can split the RLES of each batch item, and convert them to strings
+    # No more gpu at this point
+    change_indices = change_indices.tolist()
+
+    batch_rles = []
+    # Process each mask in the batch separately
+    for i in range(mask.shape[0]):
+        beg = 0 if i == 0 else boundaries[i - 1].item()
+        end = boundaries[i].item()
+        run_lengths = change_indices[beg:end]
+
+        uncompressed_rle = {"counts": run_lengths, "size": list(orig_mask.shape[1:])}
+        h, w = uncompressed_rle["size"]
+        rle = mask_util.frPyObjects(uncompressed_rle, h, w)
+        rle["counts"] = rle["counts"].decode("utf-8")
+        if return_areas:
+            rle["area"] = mask_areas[i]
+        batch_rles.append(rle)
+
+    return batch_rles
+
+
+def robust_rle_encode(masks):
+    """Encodes a collection of masks in RLE format. Uses the gpu version fist, falls back to the cpu version if it fails"""
+
+    assert masks.ndim == 3, "Mask must be of shape (N, H, W)"
+    assert masks.dtype == torch.bool, "Mask must have dtype=torch.bool"
+
+    try:
+        return rle_encode(masks)
+    except RuntimeError as _:
+        masks = masks.cpu().numpy()
+        rles = [
+            mask_util.encode(
+                np.array(mask[:, :, np.newaxis], dtype=np.uint8, order="F")
+            )[0]
+            for mask in masks
+        ]
+        for rle in rles:
+            rle["counts"] = rle["counts"].decode("utf-8")
+        return rles
+
+
+def ann_to_rle(segm, im_info):
+    """Convert annotation which can be polygons, uncompressed RLE to RLE.
+    Args:
+        ann (dict) : annotation object
+    Returns:
+        ann (rle)
+    """
+    h, w = im_info["height"], im_info["width"]
+    if isinstance(segm, list):
+        # polygon -- a single object might consist of multiple parts
+        # we merge all parts into one mask rle code
+        rles = mask_util.frPyObjects(segm, h, w)
+        rle = mask_util.merge(rles)
+    elif isinstance(segm["counts"], list):
+        # uncompressed RLE
+        rle = mask_util.frPyObjects(segm, h, w)
+    else:
+        # rle
+        rle = segm
+    return rle
diff --git a/sam3/agent/helpers/roi_align.py b/sam3/agent/helpers/roi_align.py
new file mode 100644
index 0000000000000000000000000000000000000000..640f8534594670b5622ada5980569b1833ecc1be
--- /dev/null
+++ b/sam3/agent/helpers/roi_align.py
@@ -0,0 +1,75 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from torch import nn
+from torchvision.ops import roi_align
+
+
+# NOTE: torchvision's RoIAlign has a different default aligned=False
+class ROIAlign(nn.Module):
+    def __init__(self, output_size, spatial_scale, sampling_ratio, aligned=True):
+        """
+        Args:
+            output_size (tuple): h, w
+            spatial_scale (float): scale the input boxes by this number
+            sampling_ratio (int): number of inputs samples to take for each output
+                sample. 0 to take samples densely.
+            aligned (bool): if False, use the legacy implementation in
+                Detectron. If True, align the results more perfectly.
+
+        Note:
+            The meaning of aligned=True:
+
+            Given a continuous coordinate c, its two neighboring pixel indices (in our
+            pixel model) are computed by floor(c - 0.5) and ceil(c - 0.5). For example,
+            c=1.3 has pixel neighbors with discrete indices [0] and [1] (which are sampled
+            from the underlying signal at continuous coordinates 0.5 and 1.5). But the original
+            roi_align (aligned=False) does not subtract the 0.5 when computing neighboring
+            pixel indices and therefore it uses pixels with a slightly incorrect alignment
+            (relative to our pixel model) when performing bilinear interpolation.
+
+            With `aligned=True`,
+            we first appropriately scale the ROI and then shift it by -0.5
+            prior to calling roi_align. This produces the correct neighbors; see
+            detectron2/tests/test_roi_align.py for verification.
+
+            The difference does not make a difference to the model's performance if
+            ROIAlign is used together with conv layers.
+        """
+        super().__init__()
+        self.output_size = output_size
+        self.spatial_scale = spatial_scale
+        self.sampling_ratio = sampling_ratio
+        self.aligned = aligned
+
+        from torchvision import __version__
+
+        version = tuple(int(x) for x in __version__.split(".")[:2])
+        # https://github.com/pytorch/vision/pull/2438
+        assert version >= (0, 7), "Require torchvision >= 0.7"
+
+    def forward(self, input, rois):
+        """
+        Args:
+            input: NCHW images
+            rois: Bx5 boxes. First column is the index into N. The other 4 columns are xyxy.
+        """
+        assert rois.dim() == 2 and rois.size(1) == 5
+        if input.is_quantized:
+            input = input.dequantize()
+        return roi_align(
+            input,
+            rois.to(dtype=input.dtype),
+            self.output_size,
+            self.spatial_scale,
+            self.sampling_ratio,
+            self.aligned,
+        )
+
+    def __repr__(self):
+        tmpstr = self.__class__.__name__ + "("
+        tmpstr += "output_size=" + str(self.output_size)
+        tmpstr += ", spatial_scale=" + str(self.spatial_scale)
+        tmpstr += ", sampling_ratio=" + str(self.sampling_ratio)
+        tmpstr += ", aligned=" + str(self.aligned)
+        tmpstr += ")"
+        return tmpstr
diff --git a/sam3/agent/helpers/rotated_boxes.py b/sam3/agent/helpers/rotated_boxes.py
new file mode 100644
index 0000000000000000000000000000000000000000..151a96f6628f9947aeea3ac0b85d8303a4d4294f
--- /dev/null
+++ b/sam3/agent/helpers/rotated_boxes.py
@@ -0,0 +1,533 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import math
+from typing import List, Tuple
+
+import torch
+
+# from detectron2.layers.rotated_boxes import pairwise_iou_rotated
+
+from .boxes import Boxes
+
+
+def pairwise_iou_rotated(boxes1, boxes2):
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+
+    Both sets of boxes are expected to be in
+    (x_center, y_center, width, height, angle) format.
+
+    Arguments:
+        boxes1 (Tensor[N, 5])
+        boxes2 (Tensor[M, 5])
+
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+    return torch.ops.detectron2.box_iou_rotated(boxes1, boxes2)
+
+
+class RotatedBoxes(Boxes):
+    """
+    This structure stores a list of rotated boxes as a Nx5 torch.Tensor.
+    It supports some common methods about boxes
+    (`area`, `clip`, `nonempty`, etc),
+    and also behaves like a Tensor
+    (support indexing, `to(device)`, `.device`, and iteration over all boxes)
+    """
+
+    def __init__(self, tensor: torch.Tensor):
+        """
+        Args:
+            tensor (Tensor[float]): a Nx5 matrix.  Each row is
+                (x_center, y_center, width, height, angle),
+                in which angle is represented in degrees.
+                While there's no strict range restriction for it,
+                the recommended principal range is between [-180, 180) degrees.
+
+        Assume we have a horizontal box B = (x_center, y_center, width, height),
+        where width is along the x-axis and height is along the y-axis.
+        The rotated box B_rot (x_center, y_center, width, height, angle)
+        can be seen as:
+
+        1. When angle == 0:
+           B_rot == B
+        2. When angle > 0:
+           B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CCW;
+        3. When angle < 0:
+           B_rot is obtained by rotating B w.r.t its center by :math:`|angle|` degrees CW.
+
+        Mathematically, since the right-handed coordinate system for image space
+        is (y, x), where y is top->down and x is left->right, the 4 vertices of the
+        rotated rectangle :math:`(yr_i, xr_i)` (i = 1, 2, 3, 4) can be obtained from
+        the vertices of the horizontal rectangle :math:`(y_i, x_i)` (i = 1, 2, 3, 4)
+        in the following way (:math:`\\theta = angle*\\pi/180` is the angle in radians,
+        :math:`(y_c, x_c)` is the center of the rectangle):
+
+        .. math::
+
+            yr_i = \\cos(\\theta) (y_i - y_c) - \\sin(\\theta) (x_i - x_c) + y_c,
+
+            xr_i = \\sin(\\theta) (y_i - y_c) + \\cos(\\theta) (x_i - x_c) + x_c,
+
+        which is the standard rigid-body rotation transformation.
+
+        Intuitively, the angle is
+        (1) the rotation angle from y-axis in image space
+        to the height vector (top->down in the box's local coordinate system)
+        of the box in CCW, and
+        (2) the rotation angle from x-axis in image space
+        to the width vector (left->right in the box's local coordinate system)
+        of the box in CCW.
+
+        More intuitively, consider the following horizontal box ABCD represented
+        in (x1, y1, x2, y2): (3, 2, 7, 4),
+        covering the [3, 7] x [2, 4] region of the continuous coordinate system
+        which looks like this:
+
+        .. code:: none
+
+            O--------> x
+            |
+            |  A---B
+            |  |   |
+            |  D---C
+            |
+            v y
+
+        Note that each capital letter represents one 0-dimensional geometric point
+        instead of a 'square pixel' here.
+
+        In the example above, using (x, y) to represent a point we have:
+
+        .. math::
+
+            O = (0, 0), A = (3, 2), B = (7, 2), C = (7, 4), D = (3, 4)
+
+        We name vector AB = vector DC as the width vector in box's local coordinate system, and
+        vector AD = vector BC as the height vector in box's local coordinate system. Initially,
+        when angle = 0 degree, they're aligned with the positive directions of x-axis and y-axis
+        in the image space, respectively.
+
+        For better illustration, we denote the center of the box as E,
+
+        .. code:: none
+
+            O--------> x
+            |
+            |  A---B
+            |  | E |
+            |  D---C
+            |
+            v y
+
+        where the center E = ((3+7)/2, (2+4)/2) = (5, 3).
+
+        Also,
+
+        .. math::
+
+            width = |AB| = |CD| = 7 - 3 = 4,
+            height = |AD| = |BC| = 4 - 2 = 2.
+
+        Therefore, the corresponding representation for the same shape in rotated box in
+        (x_center, y_center, width, height, angle) format is:
+
+        (5, 3, 4, 2, 0),
+
+        Now, let's consider (5, 3, 4, 2, 90), which is rotated by 90 degrees
+        CCW (counter-clockwise) by definition. It looks like this:
+
+        .. code:: none
+
+            O--------> x
+            |   B-C
+            |   | |
+            |   |E|
+            |   | |
+            |   A-D
+            v y
+
+        The center E is still located at the same point (5, 3), while the vertices
+        ABCD are rotated by 90 degrees CCW with regard to E:
+        A = (4, 5), B = (4, 1), C = (6, 1), D = (6, 5)
+
+        Here, 90 degrees can be seen as the CCW angle to rotate from y-axis to
+        vector AD or vector BC (the top->down height vector in box's local coordinate system),
+        or the CCW angle to rotate from x-axis to vector AB or vector DC (the left->right
+        width vector in box's local coordinate system).
+
+        .. math::
+
+            width = |AB| = |CD| = 5 - 1 = 4,
+            height = |AD| = |BC| = 6 - 4 = 2.
+
+        Next, how about (5, 3, 4, 2, -90), which is rotated by 90 degrees CW (clockwise)
+        by definition? It looks like this:
+
+        .. code:: none
+
+            O--------> x
+            |   D-A
+            |   | |
+            |   |E|
+            |   | |
+            |   C-B
+            v y
+
+        The center E is still located at the same point (5, 3), while the vertices
+        ABCD are rotated by 90 degrees CW with regard to E:
+        A = (6, 1), B = (6, 5), C = (4, 5), D = (4, 1)
+
+        .. math::
+
+            width = |AB| = |CD| = 5 - 1 = 4,
+            height = |AD| = |BC| = 6 - 4 = 2.
+
+        This covers exactly the same region as (5, 3, 4, 2, 90) does, and their IoU
+        will be 1. However, these two will generate different RoI Pooling results and
+        should not be treated as an identical box.
+
+        On the other hand, it's easy to see that (X, Y, W, H, A) is identical to
+        (X, Y, W, H, A+360N), for any integer N. For example (5, 3, 4, 2, 270) would be
+        identical to (5, 3, 4, 2, -90), because rotating the shape 270 degrees CCW is
+        equivalent to rotating the same shape 90 degrees CW.
+
+        We could rotate further to get (5, 3, 4, 2, 180), or (5, 3, 4, 2, -180):
+
+        .. code:: none
+
+            O--------> x
+            |
+            |  C---D
+            |  | E |
+            |  B---A
+            |
+            v y
+
+        .. math::
+
+            A = (7, 4), B = (3, 4), C = (3, 2), D = (7, 2),
+
+            width = |AB| = |CD| = 7 - 3 = 4,
+            height = |AD| = |BC| = 4 - 2 = 2.
+
+        Finally, this is a very inaccurate (heavily quantized) illustration of
+        how (5, 3, 4, 2, 60) looks like in case anyone wonders:
+
+        .. code:: none
+
+            O--------> x
+            |     B\
+            |    /  C
+            |   /E /
+            |  A  /
+            |   `D
+            v y
+
+        It's still a rectangle with center of (5, 3), width of 4 and height of 2,
+        but its angle (and thus orientation) is somewhere between
+        (5, 3, 4, 2, 0) and (5, 3, 4, 2, 90).
+        """
+        device = (
+            tensor.device if isinstance(tensor, torch.Tensor) else torch.device("cpu")
+        )
+        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
+        if tensor.numel() == 0:
+            # Use reshape, so we don't end up creating a new tensor that does not depend on
+            # the inputs (and consequently confuses jit)
+            tensor = tensor.reshape((0, 5)).to(dtype=torch.float32, device=device)
+        assert tensor.dim() == 2 and tensor.size(-1) == 5, tensor.size()
+
+        self.tensor = tensor
+
+    def clone(self) -> "RotatedBoxes":
+        """
+        Clone the RotatedBoxes.
+
+        Returns:
+            RotatedBoxes
+        """
+        return RotatedBoxes(self.tensor.clone())
+
+    def to(self, device: torch.device, non_blocking: bool = False):
+        # Boxes are assumed float32 and does not support to(dtype)
+        return RotatedBoxes(self.tensor.to(device=device, non_blocking=non_blocking))
+
+    def area(self) -> torch.Tensor:
+        """
+        Computes the area of all the boxes.
+
+        Returns:
+            torch.Tensor: a vector with areas of each box.
+        """
+        box = self.tensor
+        area = box[:, 2] * box[:, 3]
+        return area
+
+    # Avoid in-place operations so that we can torchscript; NOTE: this creates a new tensor
+    def normalize_angles(self) -> None:
+        """
+        Restrict angles to the range of [-180, 180) degrees
+        """
+        angle_tensor = (self.tensor[:, 4] + 180.0) % 360.0 - 180.0
+        self.tensor = torch.cat((self.tensor[:, :4], angle_tensor[:, None]), dim=1)
+
+    def clip(
+        self, box_size: Tuple[int, int], clip_angle_threshold: float = 1.0
+    ) -> None:
+        """
+        Clip (in place) the boxes by limiting x coordinates to the range [0, width]
+        and y coordinates to the range [0, height].
+
+        For RRPN:
+        Only clip boxes that are almost horizontal with a tolerance of
+        clip_angle_threshold to maintain backward compatibility.
+
+        Rotated boxes beyond this threshold are not clipped for two reasons:
+
+        1. There are potentially multiple ways to clip a rotated box to make it
+           fit within the image.
+        2. It's tricky to make the entire rectangular box fit within the image
+           and still be able to not leave out pixels of interest.
+
+        Therefore we rely on ops like RoIAlignRotated to safely handle this.
+
+        Args:
+            box_size (height, width): The clipping box's size.
+            clip_angle_threshold:
+                Iff. abs(normalized(angle)) <= clip_angle_threshold (in degrees),
+                we do the clipping as horizontal boxes.
+        """
+        h, w = box_size
+
+        # normalize angles to be within (-180, 180] degrees
+        self.normalize_angles()
+
+        idx = torch.where(torch.abs(self.tensor[:, 4]) <= clip_angle_threshold)[0]
+
+        # convert to (x1, y1, x2, y2)
+        x1 = self.tensor[idx, 0] - self.tensor[idx, 2] / 2.0
+        y1 = self.tensor[idx, 1] - self.tensor[idx, 3] / 2.0
+        x2 = self.tensor[idx, 0] + self.tensor[idx, 2] / 2.0
+        y2 = self.tensor[idx, 1] + self.tensor[idx, 3] / 2.0
+
+        # clip
+        x1.clamp_(min=0, max=w)
+        y1.clamp_(min=0, max=h)
+        x2.clamp_(min=0, max=w)
+        y2.clamp_(min=0, max=h)
+
+        # convert back to (xc, yc, w, h)
+        self.tensor[idx, 0] = (x1 + x2) / 2.0
+        self.tensor[idx, 1] = (y1 + y2) / 2.0
+        # make sure widths and heights do not increase due to numerical errors
+        self.tensor[idx, 2] = torch.min(self.tensor[idx, 2], x2 - x1)
+        self.tensor[idx, 3] = torch.min(self.tensor[idx, 3], y2 - y1)
+
+    def nonempty(self, threshold: float = 0.0) -> torch.Tensor:
+        """
+        Find boxes that are non-empty.
+        A box is considered empty, if either of its side is no larger than threshold.
+
+        Returns:
+            Tensor: a binary vector which represents
+            whether each box is empty (False) or non-empty (True).
+        """
+        box = self.tensor
+        widths = box[:, 2]
+        heights = box[:, 3]
+        keep = (widths > threshold) & (heights > threshold)
+        return keep
+
+    def __getitem__(self, item) -> "RotatedBoxes":
+        """
+        Returns:
+            RotatedBoxes: Create a new :class:`RotatedBoxes` by indexing.
+
+        The following usage are allowed:
+
+        1. `new_boxes = boxes[3]`: return a `RotatedBoxes` which contains only one box.
+        2. `new_boxes = boxes[2:10]`: return a slice of boxes.
+        3. `new_boxes = boxes[vector]`, where vector is a torch.ByteTensor
+           with `length = len(boxes)`. Nonzero elements in the vector will be selected.
+
+        Note that the returned RotatedBoxes might share storage with this RotatedBoxes,
+        subject to Pytorch's indexing semantics.
+        """
+        if isinstance(item, int):
+            return RotatedBoxes(self.tensor[item].view(1, -1))
+        b = self.tensor[item]
+        assert (
+            b.dim() == 2
+        ), "Indexing on RotatedBoxes with {} failed to return a matrix!".format(item)
+        return RotatedBoxes(b)
+
+    def __len__(self) -> int:
+        return self.tensor.shape[0]
+
+    def __repr__(self) -> str:
+        return "RotatedBoxes(" + str(self.tensor) + ")"
+
+    def inside_box(
+        self, box_size: Tuple[int, int], boundary_threshold: int = 0
+    ) -> torch.Tensor:
+        """
+        Args:
+            box_size (height, width): Size of the reference box covering
+                [0, width] x [0, height]
+            boundary_threshold (int): Boxes that extend beyond the reference box
+                boundary by more than boundary_threshold are considered "outside".
+
+        For RRPN, it might not be necessary to call this function since it's common
+        for rotated box to extend to outside of the image boundaries
+        (the clip function only clips the near-horizontal boxes)
+
+        Returns:
+            a binary vector, indicating whether each box is inside the reference box.
+        """
+        height, width = box_size
+
+        cnt_x = self.tensor[..., 0]
+        cnt_y = self.tensor[..., 1]
+        half_w = self.tensor[..., 2] / 2.0
+        half_h = self.tensor[..., 3] / 2.0
+        a = self.tensor[..., 4]
+        c = torch.abs(torch.cos(a * math.pi / 180.0))
+        s = torch.abs(torch.sin(a * math.pi / 180.0))
+        # This basically computes the horizontal bounding rectangle of the rotated box
+        max_rect_dx = c * half_w + s * half_h
+        max_rect_dy = c * half_h + s * half_w
+
+        inds_inside = (
+            (cnt_x - max_rect_dx >= -boundary_threshold)
+            & (cnt_y - max_rect_dy >= -boundary_threshold)
+            & (cnt_x + max_rect_dx < width + boundary_threshold)
+            & (cnt_y + max_rect_dy < height + boundary_threshold)
+        )
+
+        return inds_inside
+
+    def get_centers(self) -> torch.Tensor:
+        """
+        Returns:
+            The box centers in a Nx2 array of (x, y).
+        """
+        return self.tensor[:, :2]
+
+    def scale(self, scale_x: float, scale_y: float) -> None:
+        """
+        Scale the rotated box with horizontal and vertical scaling factors
+        Note: when scale_factor_x != scale_factor_y,
+        the rotated box does not preserve the rectangular shape when the angle
+        is not a multiple of 90 degrees under resize transformation.
+        Instead, the shape is a parallelogram (that has skew)
+        Here we make an approximation by fitting a rotated rectangle to the parallelogram.
+        """
+        self.tensor[:, 0] *= scale_x
+        self.tensor[:, 1] *= scale_y
+        theta = self.tensor[:, 4] * math.pi / 180.0
+        c = torch.cos(theta)
+        s = torch.sin(theta)
+
+        # In image space, y is top->down and x is left->right
+        # Consider the local coordintate system for the rotated box,
+        # where the box center is located at (0, 0), and the four vertices ABCD are
+        # A(-w / 2, -h / 2), B(w / 2, -h / 2), C(w / 2, h / 2), D(-w / 2, h / 2)
+        # the midpoint of the left edge AD of the rotated box E is:
+        # E = (A+D)/2 = (-w / 2, 0)
+        # the midpoint of the top edge AB of the rotated box F is:
+        # F(0, -h / 2)
+        # To get the old coordinates in the global system, apply the rotation transformation
+        # (Note: the right-handed coordinate system for image space is yOx):
+        # (old_x, old_y) = (s * y + c * x, c * y - s * x)
+        # E(old) = (s * 0 + c * (-w/2), c * 0 - s * (-w/2)) = (-c * w / 2, s * w / 2)
+        # F(old) = (s * (-h / 2) + c * 0, c * (-h / 2) - s * 0) = (-s * h / 2, -c * h / 2)
+        # After applying the scaling factor (sfx, sfy):
+        # E(new) = (-sfx * c * w / 2, sfy * s * w / 2)
+        # F(new) = (-sfx * s * h / 2, -sfy * c * h / 2)
+        # The new width after scaling tranformation becomes:
+
+        # w(new) = |E(new) - O| * 2
+        #        = sqrt[(sfx * c * w / 2)^2 + (sfy * s * w / 2)^2] * 2
+        #        = sqrt[(sfx * c)^2 + (sfy * s)^2] * w
+        # i.e., scale_factor_w = sqrt[(sfx * c)^2 + (sfy * s)^2]
+        #
+        # For example,
+        # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_w == scale_factor_x;
+        # when |angle| = 90, c = 0, |s| = 1, scale_factor_w == scale_factor_y
+        self.tensor[:, 2] *= torch.sqrt((scale_x * c) ** 2 + (scale_y * s) ** 2)
+
+        # h(new) = |F(new) - O| * 2
+        #        = sqrt[(sfx * s * h / 2)^2 + (sfy * c * h / 2)^2] * 2
+        #        = sqrt[(sfx * s)^2 + (sfy * c)^2] * h
+        # i.e., scale_factor_h = sqrt[(sfx * s)^2 + (sfy * c)^2]
+        #
+        # For example,
+        # when angle = 0 or 180, |c| = 1, s = 0, scale_factor_h == scale_factor_y;
+        # when |angle| = 90, c = 0, |s| = 1, scale_factor_h == scale_factor_x
+        self.tensor[:, 3] *= torch.sqrt((scale_x * s) ** 2 + (scale_y * c) ** 2)
+
+        # The angle is the rotation angle from y-axis in image space to the height
+        # vector (top->down in the box's local coordinate system) of the box in CCW.
+        #
+        # angle(new) = angle_yOx(O - F(new))
+        #            = angle_yOx( (sfx * s * h / 2, sfy * c * h / 2) )
+        #            = atan2(sfx * s * h / 2, sfy * c * h / 2)
+        #            = atan2(sfx * s, sfy * c)
+        #
+        # For example,
+        # when sfx == sfy, angle(new) == atan2(s, c) == angle(old)
+        self.tensor[:, 4] = torch.atan2(scale_x * s, scale_y * c) * 180 / math.pi
+
+    @classmethod
+    def cat(cls, boxes_list: List["RotatedBoxes"]) -> "RotatedBoxes":
+        """
+        Concatenates a list of RotatedBoxes into a single RotatedBoxes
+
+        Arguments:
+            boxes_list (list[RotatedBoxes])
+
+        Returns:
+            RotatedBoxes: the concatenated RotatedBoxes
+        """
+        assert isinstance(boxes_list, (list, tuple))
+        if len(boxes_list) == 0:
+            return cls(torch.empty(0))
+        assert all([isinstance(box, RotatedBoxes) for box in boxes_list])
+
+        # use torch.cat (v.s. layers.cat) so the returned boxes never share storage with input
+        cat_boxes = cls(torch.cat([b.tensor for b in boxes_list], dim=0))
+        return cat_boxes
+
+    @property
+    def device(self) -> torch.device:
+        return self.tensor.device
+
+    @torch.jit.unused
+    def __iter__(self):
+        """
+        Yield a box as a Tensor of shape (5,) at a time.
+        """
+        yield from self.tensor
+
+
+def pairwise_iou(boxes1: RotatedBoxes, boxes2: RotatedBoxes) -> None:
+    """
+    Given two lists of rotated boxes of size N and M,
+    compute the IoU (intersection over union)
+    between **all** N x M pairs of boxes.
+    The box order must be (x_center, y_center, width, height, angle).
+
+    Args:
+        boxes1, boxes2 (RotatedBoxes):
+            two `RotatedBoxes`. Contains N & M rotated boxes, respectively.
+
+    Returns:
+        Tensor: IoU, sized [N,M].
+    """
+
+    return pairwise_iou_rotated(boxes1.tensor, boxes2.tensor)
diff --git a/sam3/agent/helpers/som_utils.py b/sam3/agent/helpers/som_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ca96d66210f0c0af8dd28c39f6d011b644b7ae0
--- /dev/null
+++ b/sam3/agent/helpers/som_utils.py
@@ -0,0 +1,406 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import colorsys
+from dataclasses import dataclass
+from typing import List, Tuple
+
+import cv2
+import matplotlib as mpl
+import matplotlib.colors as mplc
+import numpy as np
+import pycocotools.mask as mask_utils
+
+
+def rgb_to_hex(rgb_color):
+    """
+    Convert a rgb color to hex color.
+
+    Args:
+        rgb_color (tuple/list of ints): RGB color in tuple or list format.
+
+    Returns:
+        str: Hex color.
+
+    Example:
+        ```
+        >>> rgb_to_hex((255, 0, 244))
+        '#ff00ff'
+        ```
+    """
+    return "#" + "".join([hex(c)[2:].zfill(2) for c in rgb_color])
+
+
+# DEFAULT_COLOR_HEX_TO_NAME = {
+#     rgb_to_hex((255, 0, 0)): "red",
+#     rgb_to_hex((0, 255, 0)): "lime",
+#     rgb_to_hex((0, 0, 255)): "blue",
+#     rgb_to_hex((255, 255, 0)): "yellow",
+#     rgb_to_hex((255, 0, 255)): "fuchsia",
+#     rgb_to_hex((0, 255, 255)): "aqua",
+#     rgb_to_hex((255, 165, 0)): "orange",
+#     rgb_to_hex((128, 0, 128)): "purple",
+#     rgb_to_hex((255, 215, 0)): "gold",
+# }
+
+# Assuming rgb_to_hex is a function that converts an (R, G, B) tuple to a hex string.
+# For example: def rgb_to_hex(rgb): return '#%02x%02x%02x' % rgb
+
+DEFAULT_COLOR_HEX_TO_NAME = {
+    # The top 20 approved colors
+    rgb_to_hex((255, 255, 0)): "yellow",
+    rgb_to_hex((0, 255, 0)): "lime",
+    rgb_to_hex((0, 255, 255)): "cyan",
+    rgb_to_hex((255, 0, 255)): "magenta",
+    rgb_to_hex((255, 0, 0)): "red",
+    rgb_to_hex((255, 127, 0)): "orange",
+    rgb_to_hex((127, 255, 0)): "chartreuse",
+    rgb_to_hex((0, 255, 127)): "spring green",
+    rgb_to_hex((255, 0, 127)): "rose",
+    rgb_to_hex((127, 0, 255)): "violet",
+    rgb_to_hex((192, 255, 0)): "electric lime",
+    rgb_to_hex((255, 192, 0)): "vivid orange",
+    rgb_to_hex((0, 255, 192)): "turquoise",
+    rgb_to_hex((192, 0, 255)): "bright violet",
+    rgb_to_hex((255, 0, 192)): "bright pink",
+    rgb_to_hex((255, 64, 0)): "fiery orange",
+    rgb_to_hex((64, 255, 0)): "bright chartreuse",
+    rgb_to_hex((0, 255, 64)): "malachite",
+    rgb_to_hex((64, 0, 255)): "deep violet",
+    rgb_to_hex((255, 0, 64)): "hot pink",
+}
+
+
+DEFAULT_COLOR_PALETTE = list(DEFAULT_COLOR_HEX_TO_NAME.keys())
+
+
+def _validate_color_hex(color_hex: str):
+    color_hex = color_hex.lstrip("#")
+    if not all(c in "0123456789abcdefABCDEF" for c in color_hex):
+        raise ValueError("Invalid characters in color hash")
+    if len(color_hex) not in (3, 6):
+        raise ValueError("Invalid length of color hash")
+
+
+# copied from https://github.com/roboflow/supervision/blob/c8f557af0c61b5c03392bad2cc36c8835598b1e1/supervision/draw/color.py
+@dataclass
+class Color:
+    """
+    Represents a color in RGB format.
+
+    Attributes:
+        r (int): Red channel.
+        g (int): Green channel.
+        b (int): Blue channel.
+    """
+
+    r: int
+    g: int
+    b: int
+
+    @classmethod
+    def from_hex(cls, color_hex: str):
+        """
+        Create a Color instance from a hex string.
+
+        Args:
+            color_hex (str): Hex string of the color.
+
+        Returns:
+            Color: Instance representing the color.
+
+        Example:
+            ```
+            >>> Color.from_hex('#ff00ff')
+            Color(r=255, g=0, b=255)
+            ```
+        """
+        _validate_color_hex(color_hex)
+        color_hex = color_hex.lstrip("#")
+        if len(color_hex) == 3:
+            color_hex = "".join(c * 2 for c in color_hex)
+        r, g, b = (int(color_hex[i : i + 2], 16) for i in range(0, 6, 2))
+        return cls(r, g, b)
+
+    @classmethod
+    def to_hex(cls, color):
+        """
+        Convert a Color instance to a hex string.
+
+        Args:
+            color (Color): Color instance of color.
+
+        Returns:
+            Color: a hex string.
+        """
+        return rgb_to_hex((color.r, color.g, color.b))
+
+    def as_rgb(self) -> Tuple[int, int, int]:
+        """
+        Returns the color as an RGB tuple.
+
+        Returns:
+            Tuple[int, int, int]: RGB tuple.
+
+        Example:
+            ```
+            >>> color.as_rgb()
+            (255, 0, 255)
+            ```
+        """
+        return self.r, self.g, self.b
+
+    def as_bgr(self) -> Tuple[int, int, int]:
+        """
+        Returns the color as a BGR tuple.
+
+        Returns:
+            Tuple[int, int, int]: BGR tuple.
+
+        Example:
+            ```
+            >>> color.as_bgr()
+            (255, 0, 255)
+            ```
+        """
+        return self.b, self.g, self.r
+
+    @classmethod
+    def white(cls):
+        return Color.from_hex(color_hex="#ffffff")
+
+    @classmethod
+    def black(cls):
+        return Color.from_hex(color_hex="#000000")
+
+    @classmethod
+    def red(cls):
+        return Color.from_hex(color_hex="#ff0000")
+
+    @classmethod
+    def green(cls):
+        return Color.from_hex(color_hex="#00ff00")
+
+    @classmethod
+    def blue(cls):
+        return Color.from_hex(color_hex="#0000ff")
+
+
+@dataclass
+class ColorPalette:
+    colors: List[Color]
+
+    @classmethod
+    def default(cls):
+        """
+        Returns a default color palette.
+
+        Returns:
+            ColorPalette: A ColorPalette instance with default colors.
+
+        Example:
+            ```
+            >>> ColorPalette.default()
+            ColorPalette(colors=[Color(r=255, g=0, b=0), Color(r=0, g=255, b=0), ...])
+            ```
+        """
+        return ColorPalette.from_hex(color_hex_list=DEFAULT_COLOR_PALETTE)
+
+    @classmethod
+    def from_hex(cls, color_hex_list: List[str]):
+        """
+        Create a ColorPalette instance from a list of hex strings.
+
+        Args:
+            color_hex_list (List[str]): List of color hex strings.
+
+        Returns:
+            ColorPalette: A ColorPalette instance.
+
+        Example:
+            ```
+            >>> ColorPalette.from_hex(['#ff0000', '#00ff00', '#0000ff'])
+            ColorPalette(colors=[Color(r=255, g=0, b=0), Color(r=0, g=255, b=0), ...])
+            ```
+        """
+        colors = [Color.from_hex(color_hex) for color_hex in color_hex_list]
+        return cls(colors)
+
+    def by_idx(self, idx: int) -> Color:
+        """
+        Return the color at a given index in the palette.
+
+        Args:
+            idx (int): Index of the color in the palette.
+
+        Returns:
+            Color: Color at the given index.
+
+        Example:
+            ```
+            >>> color_palette.by_idx(1)
+            Color(r=0, g=255, b=0)
+            ```
+        """
+        if idx < 0:
+            raise ValueError("idx argument should not be negative")
+        idx = idx % len(self.colors)
+        return self.colors[idx]
+
+    def find_farthest_color(self, img_array):
+        """
+        Return the color that is the farthest from the given color.
+
+        Args:
+            img_array (np array): any *x3 np array, 3 is the RGB color channel.
+
+        Returns:
+            Color: Farthest color.
+
+        """
+        # Reshape the image array for broadcasting
+        img_array = img_array.reshape((-1, 3))
+
+        # Convert colors dictionary to a NumPy array
+        color_values = np.array([[c.r, c.g, c.b] for c in self.colors])
+
+        # Calculate the Euclidean distance between the colors and each pixel in the image
+        # Broadcasting happens here: img_array shape is (num_pixels, 3), color_values shape is (num_colors, 3)
+        distances = np.sqrt(
+            np.sum((img_array[:, np.newaxis, :] - color_values) ** 2, axis=2)
+        )
+
+        # Average the distances for each color
+        mean_distances = np.mean(distances, axis=0)
+
+        # return the farthest color
+        farthest_idx = np.argmax(mean_distances)
+        farthest_color = self.colors[farthest_idx]
+        farthest_color_hex = Color.to_hex(farthest_color)
+        if farthest_color_hex in DEFAULT_COLOR_HEX_TO_NAME:
+            farthest_color_name = DEFAULT_COLOR_HEX_TO_NAME[farthest_color_hex]
+        else:
+            farthest_color_name = "unknown"
+
+        return farthest_color, farthest_color_name
+
+
+def draw_box(ax, box_coord, alpha=0.8, edge_color="g", line_style="-", linewidth=2.0):
+    x0, y0, width, height = box_coord
+    ax.add_patch(
+        mpl.patches.Rectangle(
+            (x0, y0),
+            width,
+            height,
+            fill=False,
+            edgecolor=edge_color,
+            linewidth=linewidth,
+            alpha=alpha,
+            linestyle=line_style,
+        )
+    )
+
+
+def draw_text(
+    ax,
+    text,
+    position,
+    font_size=None,
+    color="g",
+    horizontal_alignment="left",
+    rotation=0,
+):
+    if not font_size:
+        font_size = mpl.rcParams["font.size"]
+
+    color = np.maximum(list(mplc.to_rgb(color)), 0.2)
+    color[np.argmax(color)] = max(0.8, np.max(color))
+
+    x, y = position
+    ax.text(
+        x,
+        y,
+        text,
+        size=font_size,
+        family="sans-serif",
+        bbox={"facecolor": "none", "alpha": 0.5, "pad": 0.7, "edgecolor": "none"},
+        verticalalignment="top",
+        horizontalalignment=horizontal_alignment,
+        color=color,
+        rotation=rotation,
+    )
+
+
+def draw_mask(
+    ax, rle, color, show_holes=True, alpha=0.15, upsample_factor=1.0, rle_upsampled=None
+):
+    if isinstance(rle, dict):
+        mask = mask_utils.decode(rle)
+    elif isinstance(rle, np.ndarray):
+        mask = rle
+    else:
+        raise ValueError(f"Unsupported type for rle: {type(rle)}")
+
+    mask_upsampled = None
+    if upsample_factor > 1.0 and show_holes:
+        assert rle_upsampled is not None
+        if isinstance(rle_upsampled, dict):
+            mask_upsampled = mask_utils.decode(rle_upsampled)
+        elif isinstance(rle_upsampled, np.ndarray):
+            mask_upsampled = rle_upsampled
+        else:
+            raise ValueError(f"Unsupported type for rle: {type(rle)}")
+
+    if show_holes:
+        if mask_upsampled is None:
+            mask_upsampled = mask
+        h, w = mask_upsampled.shape
+        mask_img = np.zeros((h, w, 4))
+        mask_img[:, :, :-1] = color[np.newaxis, np.newaxis, :]
+        mask_img[:, :, -1] = mask_upsampled * alpha
+        ax.imshow(mask_img)
+
+    *_, contours, _ = cv2.findContours(
+        mask.astype(np.uint8).copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE
+    )
+    upsampled_contours = [(cont + 0.5) * upsample_factor - 0.5 for cont in contours]
+    facecolor = (0, 0, 0, 0) if show_holes else color
+    if alpha > 0.8:
+        edge_color = _change_color_brightness(color, brightness_factor=-0.7)
+    else:
+        edge_color = color
+    for cont in upsampled_contours:
+        polygon = mpl.patches.Polygon(
+            [el[0] for el in cont],
+            edgecolor=edge_color,
+            linewidth=2.0,
+            facecolor=facecolor,
+        )
+        ax.add_patch(polygon)
+
+
+def _change_color_brightness(color, brightness_factor):
+    """
+    Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
+    less or more saturation than the original color.
+
+    Args:
+        color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+            formats that are accepted.
+        brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
+            0 will correspond to no change, a factor in [-1.0, 0) range will result in
+            a darker color and a factor in (0, 1.0] range will result in a lighter color.
+
+    Returns:
+        modified_color (tuple[double]): a tuple containing the RGB values of the
+            modified color. Each value in the tuple is in the [0.0, 1.0] range.
+    """
+    assert brightness_factor >= -1.0 and brightness_factor <= 1.0
+    color = mplc.to_rgb(color)
+    polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
+    modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
+    modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
+    modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
+    modified_color = colorsys.hls_to_rgb(
+        polygon_color[0], modified_lightness, polygon_color[2]
+    )
+    return modified_color
diff --git a/sam3/agent/helpers/visualizer.py b/sam3/agent/helpers/visualizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e134d3dcf98feea736926ab2d8748a3e061c69b
--- /dev/null
+++ b/sam3/agent/helpers/visualizer.py
@@ -0,0 +1,1662 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import colorsys
+import logging
+import math
+import random
+from enum import Enum, unique
+
+import cv2
+import matplotlib as mpl
+import matplotlib.colors as mplc
+import matplotlib.figure as mplfigure
+import numpy as np
+import pycocotools.mask as mask_util
+import torch
+from iopath.common.file_io import PathManager
+from matplotlib.backends.backend_agg import FigureCanvasAgg
+from PIL import Image
+
+from .boxes import Boxes, BoxMode
+
+from .color_map import random_color
+from .keypoints import Keypoints
+from .masks import BitMasks, PolygonMasks
+from .rotated_boxes import RotatedBoxes
+
+logger = logging.getLogger(__name__)
+
+
+__all__ = ["ColorMode", "VisImage", "Visualizer"]
+
+
+_SMALL_OBJECT_AREA_THRESH = 1000
+_LARGE_MASK_AREA_THRESH = 120000
+_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
+_BLACK = (0, 0, 0)
+_RED = (1.0, 0, 0)
+
+_KEYPOINT_THRESHOLD = 0.05
+
+
+@unique
+class ColorMode(Enum):
+    """
+    Enum of different color modes to use for instance visualizations.
+    """
+
+    IMAGE = 0
+    """
+    Picks a random color for every instance and overlay segmentations with low opacity.
+    """
+    SEGMENTATION = 1
+    """
+    Let instances of the same category have similar colors
+    (from metadata.thing_colors), and overlay them with
+    high opacity. This provides more attention on the quality of segmentation.
+    """
+    IMAGE_BW = 2
+    """
+    Same as IMAGE, but convert all areas without masks to gray-scale.
+    Only available for drawing per-instance mask predictions.
+    """
+
+
+class GenericMask:
+    """
+    Attribute:
+        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
+            Each ndarray has format [x, y, x, y, ...]
+        mask (ndarray): a binary mask
+    """
+
+    def __init__(self, mask_or_polygons, height, width):
+        self._mask = self._polygons = self._has_holes = None
+        self.height = height
+        self.width = width
+
+        m = mask_or_polygons
+        if isinstance(m, dict):
+            # RLEs
+            assert "counts" in m and "size" in m
+            if isinstance(m["counts"], list):  # uncompressed RLEs
+                h, w = m["size"]
+                assert h == height and w == width
+                m = mask_util.frPyObjects(m, h, w)
+            self._mask = mask_util.decode(m)[:, :]
+            return
+
+        if isinstance(m, list):  # list[ndarray]
+            self._polygons = [np.asarray(x).reshape(-1) for x in m]
+            return
+
+        if isinstance(m, np.ndarray):  # assumed to be a binary mask
+            assert m.shape[1] != 2, m.shape
+            assert m.shape == (
+                height,
+                width,
+            ), f"mask shape: {m.shape}, target dims: {height}, {width}"
+            self._mask = m.astype("uint8")
+            return
+
+        raise ValueError(
+            "GenericMask cannot handle object {} of type '{}'".format(m, type(m))
+        )
+
+    @property
+    def mask(self):
+        if self._mask is None:
+            self._mask = self.polygons_to_mask(self._polygons)
+        return self._mask
+
+    @property
+    def polygons(self):
+        if self._polygons is None:
+            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+        return self._polygons
+
+    @property
+    def has_holes(self):
+        if self._has_holes is None:
+            if self._mask is not None:
+                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
+            else:
+                self._has_holes = (
+                    False  # if original format is polygon, does not have holes
+                )
+        return self._has_holes
+
+    def mask_to_polygons(self, mask):
+        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
+        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
+        # Internal contours (holes) are placed in hierarchy-2.
+        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
+        mask = np.ascontiguousarray(
+            mask
+        )  # some versions of cv2 does not support incontiguous arr
+        res = cv2.findContours(
+            mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE
+        )
+        hierarchy = res[-1]
+        if hierarchy is None:  # empty mask
+            return [], False
+        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
+        res = res[-2]
+        res = [x.flatten() for x in res]
+        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
+        # We add 0.5 to turn them into real-value coordinate space. A better solution
+        # would be to first +0.5 and then dilate the returned polygon by 0.5.
+        res = [x + 0.5 for x in res if len(x) >= 6]
+        return res, has_holes
+
+    def polygons_to_mask(self, polygons):
+        rle = mask_util.frPyObjects(polygons, self.height, self.width)
+        rle = mask_util.merge(rle)
+        return mask_util.decode(rle)[:, :]
+
+    def area(self):
+        return self.mask.sum()
+
+    def bbox(self):
+        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
+        p = mask_util.merge(p)
+        bbox = mask_util.toBbox(p)
+        bbox[2] += bbox[0]
+        bbox[3] += bbox[1]
+        return bbox
+
+
+class _PanopticPrediction:
+    """
+    Unify different panoptic annotation/prediction formats
+    """
+
+    def __init__(self, panoptic_seg, segments_info, metadata=None):
+        if segments_info is None:
+            assert metadata is not None
+            # If "segments_info" is None, we assume "panoptic_img" is a
+            # H*W int32 image storing the panoptic_id in the format of
+            # category_id * label_divisor + instance_id. We reserve -1 for
+            # VOID label.
+            label_divisor = metadata.label_divisor
+            segments_info = []
+            for panoptic_label in np.unique(panoptic_seg.numpy()):
+                if panoptic_label == -1:
+                    # VOID region.
+                    continue
+                pred_class = panoptic_label // label_divisor
+                isthing = (
+                    pred_class in metadata.thing_dataset_id_to_contiguous_id.values()
+                )
+                segments_info.append(
+                    {
+                        "id": int(panoptic_label),
+                        "category_id": int(pred_class),
+                        "isthing": bool(isthing),
+                    }
+                )
+        del metadata
+
+        self._seg = panoptic_seg
+
+        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
+        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
+        areas = areas.numpy()
+        sorted_idxs = np.argsort(-areas)
+        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
+        self._seg_ids = self._seg_ids.tolist()
+        for sid, area in zip(self._seg_ids, self._seg_areas):
+            if sid in self._sinfo:
+                self._sinfo[sid]["area"] = float(area)
+
+    def non_empty_mask(self):
+        """
+        Returns:
+            (H, W) array, a mask for all pixels that have a prediction
+        """
+        empty_ids = []
+        for id in self._seg_ids:
+            if id not in self._sinfo:
+                empty_ids.append(id)
+        if len(empty_ids) == 0:
+            return np.zeros(self._seg.shape, dtype=np.uint8)
+        assert (
+            len(empty_ids) == 1
+        ), ">1 ids corresponds to no labels. This is currently not supported"
+        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
+
+    def semantic_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or sinfo["isthing"]:
+                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
+                continue
+            yield (self._seg == sid).numpy().astype(np.bool), sinfo
+
+    def instance_masks(self):
+        for sid in self._seg_ids:
+            sinfo = self._sinfo.get(sid)
+            if sinfo is None or not sinfo["isthing"]:
+                continue
+            mask = (self._seg == sid).numpy().astype(np.bool)
+            if mask.sum() > 0:
+                yield mask, sinfo
+
+
+def _create_text_labels(classes, scores, class_names, is_crowd=None):
+    """
+    Args:
+        classes (list[int] or None):
+        scores (list[float] or None):
+        class_names (list[str] or None):
+        is_crowd (list[bool] or None):
+
+    Returns:
+        list[str] or None
+    """
+    labels = None
+    if classes is not None:
+        if class_names is not None and len(class_names) > 0:
+            labels = [class_names[i] for i in classes]
+        else:
+            labels = [str(i) for i in classes]
+    if scores is not None:
+        if labels is None:
+            labels = ["{:.0f}%".format(s * 100) for s in scores]
+        else:
+            labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
+    if labels is not None and is_crowd is not None:
+        labels = [l + ("|crowd" if crowd else "") for l, crowd in zip(labels, is_crowd)]
+    return labels
+
+
+class VisImage:
+    def __init__(self, img, scale=1.0):
+        """
+        Args:
+            img (ndarray): an RGB image of shape (H, W, 3) in range [0, 255].
+            scale (float): scale the input image
+        """
+        self.img = img
+        self.scale = scale
+        self.width, self.height = img.shape[1], img.shape[0]
+        self._setup_figure(img)
+
+    def _setup_figure(self, img):
+        """
+        Args:
+            Same as in :meth:`__init__()`.
+
+        Returns:
+            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
+            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
+        """
+        fig = mplfigure.Figure(frameon=False)
+        self.dpi = fig.get_dpi()
+        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
+        # (https://github.com/matplotlib/matplotlib/issues/15363)
+        fig.set_size_inches(
+            (self.width * self.scale + 1e-2) / self.dpi,
+            (self.height * self.scale + 1e-2) / self.dpi,
+        )
+        self.canvas = FigureCanvasAgg(fig)
+        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
+        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
+        ax.axis("off")
+        self.fig = fig
+        self.ax = ax
+        self.reset_image(img)
+
+    def reset_image(self, img):
+        """
+        Args:
+            img: same as in __init__
+        """
+        img = img.astype("uint8")
+        self.ax.imshow(
+            img, extent=(0, self.width, self.height, 0), interpolation="nearest"
+        )
+
+    def save(self, filepath):
+        """
+        Args:
+            filepath (str): a string that contains the absolute path, including the file name, where
+                the visualized image will be saved.
+        """
+        self.fig.savefig(filepath)
+
+    def get_image(self):
+        """
+        Returns:
+            ndarray:
+                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
+                The shape is scaled w.r.t the input image using the given `scale` argument.
+        """
+        canvas = self.canvas
+        s, (width, height) = canvas.print_to_buffer()
+        # buf = io.BytesIO()  # works for cairo backend
+        # canvas.print_rgba(buf)
+        # width, height = self.width, self.height
+        # s = buf.getvalue()
+
+        buffer = np.frombuffer(s, dtype="uint8")
+
+        img_rgba = buffer.reshape(height, width, 4)
+        rgb, alpha = np.split(img_rgba, [3], axis=2)
+        return rgb.astype("uint8")
+
+
+class Visualizer:
+    """
+    Visualizer that draws data about detection/segmentation on images.
+
+    It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}`
+    that draw primitive objects to images, as well as high-level wrappers like
+    `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}`
+    that draw composite data in some pre-defined style.
+
+    Note that the exact visualization style for the high-level wrappers are subject to change.
+    Style such as color, opacity, label contents, visibility of labels, or even the visibility
+    of objects themselves (e.g. when the object is too small) may change according
+    to different heuristics, as long as the results still look visually reasonable.
+
+    To obtain a consistent style, you can implement custom drawing functions with the
+    abovementioned primitive methods instead. If you need more customized visualization
+    styles, you can process the data yourself following their format documented in
+    tutorials (:doc:`/tutorials/models`, :doc:`/tutorials/datasets`). This class does not
+    intend to satisfy everyone's preference on drawing styles.
+
+    This visualizer focuses on high rendering quality rather than performance. It is not
+    designed to be used for real-time applications.
+    """
+
+    def __init__(
+        self,
+        img_rgb,
+        metadata=None,
+        scale=1.0,
+        instance_mode=ColorMode.IMAGE,
+        font_size_multiplier=1.3,
+        boarder_width_multiplier=1.5,
+    ):
+        """
+        Args:
+            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
+                the height and width of the image respectively. C is the number of
+                color channels. The image is required to be in RGB format since that
+                is a requirement of the Matplotlib library. The image is also expected
+                to be in the range [0, 255].
+            metadata (Metadata): dataset metadata (e.g. class names and colors)
+            instance_mode (ColorMode): defines one of the pre-defined style for drawing
+                instances on an image.
+        """
+        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
+        self.boarder_width_multiplier = boarder_width_multiplier
+        # if metadata is None:
+        #     metadata = MetadataCatalog.get("__nonexist__")
+        # self.metadata = metadata
+        self.output = VisImage(self.img, scale=scale)
+        self.cpu_device = torch.device("cpu")
+
+        # too small texts are useless, therefore clamp to 9
+        self._default_font_size = (
+            max(np.sqrt(self.output.height * self.output.width) // 60, 15 // scale)
+            * font_size_multiplier
+        )
+        # self._default_font_size = 18
+        self._instance_mode = instance_mode
+        self.keypoint_threshold = _KEYPOINT_THRESHOLD
+
+        import matplotlib.colors as mcolors
+
+        css4_colors = mcolors.CSS4_COLORS
+        self.color_proposals = [
+            list(mcolors.hex2color(color)) for color in css4_colors.values()
+        ]
+
+    def draw_instance_predictions(self, predictions):
+        """
+        Draw instance-level prediction results on an image.
+
+        Args:
+            predictions (Instances): the output of an instance detection/segmentation
+                model. Following fields will be used to draw:
+                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
+        scores = predictions.scores if predictions.has("scores") else None
+        classes = (
+            predictions.pred_classes.tolist()
+            if predictions.has("pred_classes")
+            else None
+        )
+        labels = _create_text_labels(
+            classes, scores, self.metadata.get("thing_classes", None)
+        )
+        keypoints = (
+            predictions.pred_keypoints if predictions.has("pred_keypoints") else None
+        )
+
+        keep = (scores > 0.5).cpu()
+        boxes = boxes[keep]
+        scores = scores[keep]
+        classes = np.array(classes)
+        classes = classes[np.array(keep)]
+        labels = np.array(labels)
+        labels = labels[np.array(keep)]
+
+        if predictions.has("pred_masks"):
+            masks = np.asarray(predictions.pred_masks)
+            masks = masks[np.array(keep)]
+            masks = [
+                GenericMask(x, self.output.height, self.output.width) for x in masks
+            ]
+        else:
+            masks = None
+
+        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
+            "thing_colors"
+        ):
+            # if self.metadata.get("thing_colors"):
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
+                for c in classes
+            ]
+            alpha = 0.4
+        else:
+            colors = None
+            alpha = 0.4
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.reset_image(
+                self._create_grayscale_image(
+                    (predictions.pred_masks.any(dim=0) > 0).numpy()
+                    if predictions.has("pred_masks")
+                    else None
+                )
+            )
+            alpha = 0.3
+
+        self.overlay_instances(
+            masks=masks,
+            boxes=boxes,
+            labels=labels,
+            keypoints=keypoints,
+            assigned_colors=colors,
+            alpha=alpha,
+        )
+        return self.output
+
+    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.7):
+        """
+        Draw semantic segmentation predictions/labels.
+
+        Args:
+            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
+                Each value is the integer label of the pixel.
+            area_threshold (int): segments with less than `area_threshold` are not drawn.
+            alpha (float): the larger it is, the more opaque the segmentations are.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        if isinstance(sem_seg, torch.Tensor):
+            sem_seg = sem_seg.numpy()
+        labels, areas = np.unique(sem_seg, return_counts=True)
+        sorted_idxs = np.argsort(-areas).tolist()
+        labels = labels[sorted_idxs]
+        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
+            except (AttributeError, IndexError):
+                mask_color = None
+
+            binary_mask = (sem_seg == label).astype(np.uint8)
+            text = self.metadata.stuff_classes[label]
+            self.draw_binary_mask(
+                binary_mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+        return self.output
+
+    def draw_panoptic_seg(
+        self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7
+    ):
+        """
+        Draw panoptic prediction annotations or results.
+
+        Args:
+            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
+                segment.
+            segments_info (list[dict] or None): Describe each segment in `panoptic_seg`.
+                If it is a ``list[dict]``, each dict contains keys "id", "category_id".
+                If None, category id of each pixel is computed by
+                ``pixel // metadata.label_divisor``.
+            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
+
+        if self._instance_mode == ColorMode.IMAGE_BW:
+            self.output.reset_image(self._create_grayscale_image(pred.non_empty_mask()))
+
+        # draw mask for all semantic segments first i.e. "stuff"
+        for mask, sinfo in pred.semantic_masks():
+            category_idx = sinfo["category_id"]
+            try:
+                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
+            except AttributeError:
+                mask_color = None
+
+            text = (
+                self.metadata.stuff_classes[category_idx]
+                .replace("-other", "")
+                .replace("-merged", "")
+            )
+            self.draw_binary_mask(
+                mask,
+                color=mask_color,
+                edge_color=_OFF_WHITE,
+                text=text,
+                alpha=alpha,
+                area_threshold=area_threshold,
+            )
+
+        # draw mask for all instances second
+        all_instances = list(pred.instance_masks())
+        if len(all_instances) == 0:
+            return self.output
+        masks, sinfo = list(zip(*all_instances))
+        category_ids = [x["category_id"] for x in sinfo]
+
+        try:
+            scores = [x["score"] for x in sinfo]
+        except KeyError:
+            scores = None
+        class_names = [
+            name.replace("-other", "").replace("-merged", "")
+            for name in self.metadata.thing_classes
+        ]
+        labels = _create_text_labels(
+            category_ids, scores, class_names, [x.get("iscrowd", 0) for x in sinfo]
+        )
+
+        try:
+            colors = [
+                self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
+                for c in category_ids
+            ]
+        except AttributeError:
+            colors = None
+        self.overlay_instances(
+            masks=masks, labels=labels, assigned_colors=colors, alpha=alpha
+        )
+
+        return self.output
+
+    draw_panoptic_seg_predictions = draw_panoptic_seg  # backward compatibility
+
+    def draw_dataset_dict(self, dic):
+        """
+        Draw annotations/segmentaions in Detectron2 Dataset format.
+
+        Args:
+            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        annos = dic.get("annotations", None)
+        if annos:
+            if "segmentation" in annos[0]:
+                masks = [x["segmentation"] for x in annos]
+            else:
+                masks = None
+            if "keypoints" in annos[0]:
+                keypts = [x["keypoints"] for x in annos]
+                keypts = np.array(keypts).reshape(len(annos), -1, 3)
+            else:
+                keypts = None
+
+            boxes = [
+                (
+                    BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
+                    if len(x["bbox"]) == 4
+                    else x["bbox"]
+                )
+                for x in annos
+            ]
+
+            colors = None
+            category_ids = [x["category_id"] for x in annos]
+            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get(
+                "thing_colors"
+            ):
+                colors = [
+                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
+                    for c in category_ids
+                ]
+            names = self.metadata.get("thing_classes", None)
+            labels = _create_text_labels(
+                category_ids,
+                scores=None,
+                class_names=names,
+                is_crowd=[x.get("iscrowd", 0) for x in annos],
+            )
+            self.overlay_instances(
+                labels=labels,
+                boxes=boxes,
+                masks=masks,
+                keypoints=keypts,
+                assigned_colors=colors,
+            )
+
+        sem_seg = dic.get("sem_seg", None)
+        if sem_seg is None and "sem_seg_file_name" in dic:
+            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
+                sem_seg = Image.open(f)
+                sem_seg = np.asarray(sem_seg, dtype="uint8")
+        if sem_seg is not None:
+            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.4)
+
+        pan_seg = dic.get("pan_seg", None)
+        if pan_seg is None and "pan_seg_file_name" in dic:
+            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
+                pan_seg = Image.open(f)
+                pan_seg = np.asarray(pan_seg)
+                from panopticapi.utils import rgb2id
+
+                pan_seg = rgb2id(pan_seg)
+        if pan_seg is not None:
+            segments_info = dic["segments_info"]
+            pan_seg = torch.tensor(pan_seg)
+            self.draw_panoptic_seg(pan_seg, segments_info, area_threshold=0, alpha=0.7)
+        return self.output
+
+    def overlay_instances(
+        self,
+        *,
+        boxes=None,
+        labels=None,
+        masks=None,
+        keypoints=None,
+        assigned_colors=None,
+        binary_masks=None,
+        alpha=0.5,
+        label_mode="1",
+    ):
+        """
+        Args:
+            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
+                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
+                or a :class:`RotatedBoxes`,
+                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image,
+            labels (list[str]): the text to be displayed for each instance.
+            masks (masks-like object): Supported types are:
+
+                * :class:`detectron2.structures.PolygonMasks`,
+                  :class:`detectron2.structures.BitMasks`.
+                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
+                  The first level of the list corresponds to individual instances. The second
+                  level to all the polygon that compose the instance, and the third level
+                  to the polygon coordinates. The third level should have the format of
+                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
+                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
+                * list[dict]: each dict is a COCO-style RLE.
+            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
+                where the N is the number of instances and K is the number of keypoints.
+                The last dimension corresponds to (x, y, visibility or score).
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        num_instances = 0
+        if boxes is not None:
+            boxes = self._convert_boxes(boxes)
+            num_instances = len(boxes)
+        if masks is not None:
+            masks = self._convert_masks(masks)
+            if num_instances:
+                assert len(masks) == num_instances
+            else:
+                num_instances = len(masks)
+        if keypoints is not None:
+            if num_instances:
+                assert len(keypoints) == num_instances
+            else:
+                num_instances = len(keypoints)
+            keypoints = self._convert_keypoints(keypoints)
+        if labels is not None:
+            assert len(labels) == num_instances
+        if assigned_colors is None:
+            assigned_colors = [
+                random_color(rgb=True, maximum=1) for _ in range(num_instances)
+            ]
+        if num_instances == 0:
+            return labels, [], []
+        if boxes is not None and boxes.shape[1] == 5:
+            return self.overlay_rotated_instances(
+                boxes=boxes, labels=labels, assigned_colors=assigned_colors
+            )
+
+        # Display in largest to smallest order to reduce occlusion.
+        areas = None
+        if boxes is not None:
+            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
+        elif masks is not None:
+            areas = np.asarray([x.area() for x in masks])
+
+        # if areas is not None:
+        #     # sorted_idxs = np.argsort(areas).tolist()
+        #     sorted_idxs = np.argsort(-areas).tolist()
+        #     # Re-order overlapped instances in descending order.
+        #     boxes = boxes[sorted_idxs] if boxes is not None else None
+        #     labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+        #     masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
+        #     binary_masks = (
+        #         [binary_masks[idx] for idx in sorted_idxs]
+        #         if binary_masks is not None
+        #         else None
+        #     )
+        #     assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
+        #     keypoints = keypoints[sorted_idxs] if keypoints is not None else None
+
+        marks = []
+        marks_position = []
+        added_positions = set()
+        for i in range(num_instances):
+            color = assigned_colors[i]
+            if boxes is not None:
+                self.draw_box(boxes[i], alpha=1, edge_color=color)
+                if binary_masks is None:
+                    # draw number for non-mask instances
+                    mark = self._draw_number_in_box(
+                        boxes[i], i + 1, color=color, label_mode=label_mode
+                    )
+                    marks.append(mark)
+
+            if binary_masks is not None:
+                mark, mask_position = self._draw_number_in_mask(
+                    binary_mask=binary_masks[i].astype("uint8"),
+                    text=i + 1,
+                    color=color,
+                    added_positions=added_positions,
+                    label_mode=label_mode,
+                )
+                marks.append(mark)
+                marks_position.append(mask_position)
+
+                self.draw_binary_mask(
+                    binary_masks[i],
+                    color=color,
+                    edge_color=_OFF_WHITE,
+                    alpha=alpha,
+                )
+
+            if masks is not None:
+                for segment in masks[i].polygons:
+                    self.draw_polygon(
+                        segment.reshape(-1, 2), color, alpha=0
+                    )  # alpha=0 so holes in masks are not colored
+
+        # draw keypoints
+        if keypoints is not None:
+            for keypoints_per_instance in keypoints:
+                self.draw_and_connect_keypoints(keypoints_per_instance)
+
+        # return labels, marks, sorted_idxs, marks_position
+        return labels, marks, marks_position
+
+    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
+        """
+        Args:
+            boxes (ndarray): an Nx5 numpy array of
+                (x_center, y_center, width, height, angle_degrees) format
+                for the N objects in a single image.
+            labels (list[str]): the text to be displayed for each instance.
+            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
+                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
+                for full list of formats that the colors are accepted in.
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        num_instances = len(boxes)
+
+        if assigned_colors is None:
+            assigned_colors = [
+                random_color(rgb=True, maximum=1) for _ in range(num_instances)
+            ]
+        if num_instances == 0:
+            return self.output
+
+        # Display in largest to smallest order to reduce occlusion.
+        if boxes is not None:
+            areas = boxes[:, 2] * boxes[:, 3]
+
+        sorted_idxs = np.argsort(-areas).tolist()
+        # Re-order overlapped instances in descending order.
+        boxes = boxes[sorted_idxs]
+        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
+        colors = [assigned_colors[idx] for idx in sorted_idxs]
+
+        for i in range(num_instances):
+            self.draw_rotated_box_with_label(
+                boxes[i],
+                edge_color=colors[i],
+                label=labels[i] if labels is not None else None,
+            )
+
+        return self.output
+
+    def draw_and_connect_keypoints(self, keypoints):
+        """
+        Draws keypoints of an instance and follows the rules for keypoint connections
+        to draw lines between appropriate keypoints. This follows color heuristics for
+        line color.
+
+        Args:
+            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
+                and the last dimension corresponds to (x, y, probability).
+
+        Returns:
+            output (VisImage): image object with visualizations.
+        """
+        visible = {}
+        keypoint_names = self.metadata.get("keypoint_names")
+        for idx, keypoint in enumerate(keypoints):
+            # draw keypoint
+            x, y, prob = keypoint
+            if prob > self.keypoint_threshold:
+                self.draw_circle((x, y), color=_RED)
+                if keypoint_names:
+                    keypoint_name = keypoint_names[idx]
+                    visible[keypoint_name] = (x, y)
+
+        if self.metadata.get("keypoint_connection_rules"):
+            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
+                if kp0 in visible and kp1 in visible:
+                    x0, y0 = visible[kp0]
+                    x1, y1 = visible[kp1]
+                    color = tuple(x / 255.0 for x in color)
+                    self.draw_line([x0, x1], [y0, y1], color=color)
+
+        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
+        # Note that this strategy is specific to person keypoints.
+        # For other keypoints, it should just do nothing
+        try:
+            ls_x, ls_y = visible["left_shoulder"]
+            rs_x, rs_y = visible["right_shoulder"]
+            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
+        except KeyError:
+            pass
+        else:
+            # draw line from nose to mid-shoulder
+            nose_x, nose_y = visible.get("nose", (None, None))
+            if nose_x is not None:
+                self.draw_line(
+                    [nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED
+                )
+
+            try:
+                # draw line from mid-shoulder to mid-hip
+                lh_x, lh_y = visible["left_hip"]
+                rh_x, rh_y = visible["right_hip"]
+            except KeyError:
+                pass
+            else:
+                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
+                self.draw_line(
+                    [mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED
+                )
+        return self.output
+
+    def mask_dims_from_binary(self, binary_mask):
+        ind_y, ind_x = np.where(binary_mask == 1)
+        min_ind_x = np.min(ind_x)
+        max_ind_x = np.max(ind_x)
+        min_ind_y = np.min(ind_y)
+        max_ind_y = np.max(ind_y)
+        return (max_ind_x - min_ind_x), (max_ind_y - min_ind_y)
+
+    def reposition_label(self, position, cur, binary_mask, move_count):
+        img_width, img_height = self.output.width, self.output.height
+        mask_width, mask_height = self.mask_dims_from_binary(binary_mask)
+
+        # set resposition thresholds
+        mask_width_limit, mask_height_limit = (
+            25,
+            25,
+        )  # limit for width and height size for object covering
+        location_diff_threshold = 15  # limit for the distance between two labels
+        x_boundry_limit, y_boundry_limit = (
+            20,
+            20,
+        )  # limit for the distancing the label from edges
+
+        offset_x = 15  # move in x direction
+        offset_y = 15  # move in y direction
+
+        x1, y1 = position
+
+        if (
+            mask_width < mask_width_limit
+            and mask_height < mask_height_limit
+            and move_count == 0
+        ):
+            move_x = offset_x if offset_x + x1 < img_width else -offset_x
+            move_y = offset_y if offset_y + y1 < img_height else -offset_y
+            return (True, move_x, move_y)
+
+        for x2, y2 in cur:
+            if abs(x1 - x2) + abs(y1 - y2) < location_diff_threshold:
+                move_x = offset_x if x1 >= x2 else -offset_x
+                move_y = offset_y if y1 >= y2 else -offset_y
+                move_x = (
+                    0
+                    if x1 + move_x > img_width - x_boundry_limit
+                    or x1 + move_x < x_boundry_limit
+                    else move_x
+                )
+                move_y = (
+                    0
+                    if y1 + move_y > img_height - y_boundry_limit
+                    or y1 + move_y < y_boundry_limit
+                    else move_y
+                )
+                return (
+                    True,
+                    move_x,
+                    move_y,
+                )
+        return (False, 0, 0)
+
+    def locate_label_position(self, original_position, added_positions, binary_mask):
+        if added_positions is None or binary_mask is None:
+            return original_position
+
+        x, y = original_position
+
+        move_count = 0
+        reposition, x_move, y_move = self.reposition_label(
+            (x, y), added_positions, binary_mask, move_count
+        )
+        while reposition and move_count < 10:
+            x += x_move
+            y += y_move
+            move_count += 1
+            reposition, x_move, y_move = self.reposition_label(
+                (x, y), added_positions, binary_mask, move_count
+            )
+        added_positions.add((x, y))
+        return x, y
+
+    """
+    Primitive drawing functions:
+    """
+
+    def draw_text(
+        self,
+        text,
+        position,
+        added_positions=None,
+        binary_mask=None,
+        *,
+        font_size=None,
+        color="g",
+        horizontal_alignment="center",
+        rotation=0,
+    ):
+        """
+        Args:
+            text (str): class label
+            position (tuple): a tuple of the x and y coordinates to place text on image.
+            font_size (int, optional): font of the text. If not provided, a font size
+                proportional to the image width is calculated and used.
+            color: color of the text. Refer to `matplotlib.colors` for full list
+                of formats that are accepted.
+            horizontal_alignment (str): see `matplotlib.text.Text`
+            rotation: rotation angle in degrees CCW
+
+        Returns:
+            output (VisImage): image object with text drawn.
+        """
+        if not font_size:
+            font_size = self._default_font_size
+
+        # since the text background is dark, we don't want the text to be dark
+        color = np.maximum(list(mplc.to_rgb(color)), 0.15)
+        color[np.argmax(color)] = max(0.8, np.max(color))
+
+        def contrasting_color(rgb):
+            """Returns 'white' or 'black' depending on which color contrasts more with the given RGB value."""
+
+            # Decompose the RGB tuple
+            R, G, B = rgb
+
+            # Calculate the Y value
+            Y = 0.299 * R + 0.587 * G + 0.114 * B
+
+            # If Y value is greater than 128, it's closer to white so return black. Otherwise, return white.
+            return "black" if Y > 128 else "white"
+
+        bbox_background = contrasting_color(color * 255)
+
+        x, y = self.locate_label_position(
+            original_position=position,
+            added_positions=added_positions,
+            binary_mask=binary_mask,
+        )
+
+        self.output.ax.text(
+            x,
+            y,
+            text,
+            size=font_size * self.output.scale,
+            family="sans-serif",
+            bbox={
+                "facecolor": bbox_background,
+                "alpha": 0.8,
+                "pad": 0.7,
+                "edgecolor": "none",
+            },
+            verticalalignment="top",
+            horizontalalignment=horizontal_alignment,
+            color=color,
+            zorder=10,
+            rotation=rotation,
+        )
+        return self.output
+
+    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
+        """
+        Args:
+            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
+                are the coordinates of the image's top left corner. x1 and y1 are the
+                coordinates of the image's bottom right corner.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x0, y0, x1, y1 = box_coord
+        width = x1 - x0
+        height = y1 - y0
+
+        linewidth = max(self._default_font_size / 12, 1) * self.boarder_width_multiplier
+
+        self.output.ax.add_patch(
+            mpl.patches.Rectangle(
+                (x0, y0),
+                width,
+                height,
+                fill=False,
+                edgecolor=edge_color,
+                linewidth=linewidth * self.output.scale,
+                alpha=alpha,
+                linestyle=line_style,
+            )
+        )
+        return self.output
+
+    def draw_rotated_box_with_label(
+        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
+    ):
+        """
+        Draw a rotated box with label on its top-left corner.
+
+        Args:
+            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
+                where cnt_x and cnt_y are the center coordinates of the box.
+                w and h are the width and height of the box. angle represents how
+                many degrees the box is rotated CCW with regard to the 0-degree box.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
+                for full list of formats that are accepted.
+            line_style (string): the string to use to create the outline of the boxes.
+            label (string): label for rotated box. It will not be rendered when set to None.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        cnt_x, cnt_y, w, h, angle = rotated_box
+        area = w * h
+        # use thinner lines when the box is small
+        linewidth = self._default_font_size / (
+            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
+        )
+
+        theta = angle * math.pi / 180.0
+        c = math.cos(theta)
+        s = math.sin(theta)
+        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
+        # x: left->right ; y: top->down
+        rotated_rect = [
+            (s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect
+        ]
+        for k in range(4):
+            j = (k + 1) % 4
+            self.draw_line(
+                [rotated_rect[k][0], rotated_rect[j][0]],
+                [rotated_rect[k][1], rotated_rect[j][1]],
+                color=edge_color,
+                linestyle="--" if k == 1 else line_style,
+                linewidth=linewidth,
+            )
+
+        if label is not None:
+            text_pos = rotated_rect[1]  # topleft corner
+
+            height_ratio = h / np.sqrt(self.output.height * self.output.width)
+            label_color = self._change_color_brightness(
+                edge_color, brightness_factor=0.7
+            )
+            font_size = (
+                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
+                * 0.5
+                * self._default_font_size
+            )
+            self.draw_text(
+                label, text_pos, color=label_color, font_size=font_size, rotation=angle
+            )
+
+        return self.output
+
+    def draw_circle(self, circle_coord, color, radius=3):
+        """
+        Args:
+            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
+                of the center of the circle.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            radius (int): radius of the circle.
+
+        Returns:
+            output (VisImage): image object with box drawn.
+        """
+        x, y = circle_coord
+        self.output.ax.add_patch(
+            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
+        )
+        return self.output
+
+    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
+        """
+        Args:
+            x_data (list[int]): a list containing x values of all the points being drawn.
+                Length of list should match the length of y_data.
+            y_data (list[int]): a list containing y values of all the points being drawn.
+                Length of list should match the length of x_data.
+            color: color of the line. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
+                for a full list of formats that are accepted.
+            linewidth (float or None): width of the line. When it's None,
+                a default value will be computed and used.
+
+        Returns:
+            output (VisImage): image object with line drawn.
+        """
+        if linewidth is None:
+            linewidth = self._default_font_size / 3
+        linewidth = max(linewidth, 1)
+        self.output.ax.add_line(
+            mpl.lines.Line2D(
+                x_data,
+                y_data,
+                linewidth=linewidth * self.output.scale,
+                color=color,
+                linestyle=linestyle,
+            )
+        )
+        return self.output
+
+    def draw_binary_mask(
+        self,
+        binary_mask,
+        color=None,
+        *,
+        edge_color=None,
+        text=None,
+        alpha=0.7,
+        area_threshold=10,
+    ):
+        """
+        Args:
+            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
+                W is the image width. Each value in the array is either a 0 or 1 value of uint8
+                type.
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted.
+            text (str): if None, will be drawn on the object
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            area_threshold (float): a connected component smaller than this area will not be shown.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        has_valid_segment = False
+        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
+        mask = GenericMask(binary_mask, self.output.height, self.output.width)
+        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
+
+        if not mask.has_holes:
+            # draw polygons for regular masks
+            for segment in mask.polygons:
+                area = mask_util.area(
+                    mask_util.frPyObjects([segment], shape2d[0], shape2d[1])
+                )
+                if area < (area_threshold or 0):
+                    continue
+                has_valid_segment = True
+                segment = segment.reshape(-1, 2)
+                self.draw_polygon(
+                    segment, color=color, edge_color=edge_color, alpha=alpha
+                )
+        else:
+            # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
+            rgba = np.zeros(shape2d + (4,), dtype="float32")
+            rgba[:, :, :3] = color
+            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
+            has_valid_segment = True
+            self.output.ax.imshow(
+                rgba, extent=(0, self.output.width, self.output.height, 0)
+            )
+
+        if text is not None and has_valid_segment:
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            self._draw_text_in_mask(binary_mask, text, lighter_color)
+        return self.output
+
+    def draw_binary_mask_with_number(
+        self,
+        binary_mask,
+        color=None,
+        *,
+        edge_color=None,
+        text=None,
+        label_mode="1",
+        alpha=0.1,
+        anno_mode=["Mask"],
+        area_threshold=10,
+    ):
+        """
+        Args:
+            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
+                W is the image width. Each value in the array is either a 0 or 1 value of uint8
+                type.
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted.
+            text (str): if None, will be drawn on the object
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+            area_threshold (float): a connected component smaller than this area will not be shown.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            randint = random.randint(0, len(self.color_proposals) - 1)
+            color = self.color_proposals[randint]
+        color = mplc.to_rgb(color)
+
+        has_valid_segment = True
+        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
+        mask = GenericMask(binary_mask, self.output.height, self.output.width)
+        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
+        bbox = mask.bbox()
+
+        if "Mask" in anno_mode:
+            if not mask.has_holes:
+                # draw polygons for regular masks
+                for segment in mask.polygons:
+                    area = mask_util.area(
+                        mask_util.frPyObjects([segment], shape2d[0], shape2d[1])
+                    )
+                    if area < (area_threshold or 0):
+                        continue
+                    has_valid_segment = True
+                    segment = segment.reshape(-1, 2)
+                    self.draw_polygon(
+                        segment, color=color, edge_color=edge_color, alpha=alpha
+                    )
+            else:
+                # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
+                rgba = np.zeros(shape2d + (4,), dtype="float32")
+                rgba[:, :, :3] = color
+                rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
+                has_valid_segment = True
+                self.output.ax.imshow(
+                    rgba, extent=(0, self.output.width, self.output.height, 0)
+                )
+
+        if "Box" in anno_mode:
+            self.draw_box(bbox, edge_color=color, alpha=0.75)
+
+        if "Mark" in anno_mode:
+            has_valid_segment = True
+        else:
+            has_valid_segment = False
+
+        if text is not None and has_valid_segment:
+            # lighter_color = tuple([x*0.2 for x in color])
+            lighter_color = [
+                1,
+                1,
+                1,
+            ]  # self._change_color_brightness(color, brightness_factor=0.7)
+            self._draw_number_in_mask(
+                binary_mask=binary_mask,
+                text=text,
+                color=lighter_color,
+                label_mode=label_mode,
+            )
+        return self.output
+
+    def draw_soft_mask(self, soft_mask, color=None, *, text=None, alpha=0.5):
+        """
+        Args:
+            soft_mask (ndarray): float array of shape (H, W), each value in [0, 1].
+            color: color of the mask. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted. If None, will pick a random color.
+            text (str): if None, will be drawn on the object
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with mask drawn.
+        """
+        if color is None:
+            color = random_color(rgb=True, maximum=1)
+        color = mplc.to_rgb(color)
+
+        shape2d = (soft_mask.shape[0], soft_mask.shape[1])
+        rgba = np.zeros(shape2d + (4,), dtype="float32")
+        rgba[:, :, :3] = color
+        rgba[:, :, 3] = soft_mask * alpha
+        self.output.ax.imshow(
+            rgba, extent=(0, self.output.width, self.output.height, 0)
+        )
+
+        if text is not None:
+            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+            binary_mask = (soft_mask > 0.5).astype("uint8")
+            self._draw_text_in_mask(binary_mask, text, lighter_color)
+        return self.output
+
+    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
+        """
+        Args:
+            segment: numpy array of shape Nx2, containing all the points in the polygon.
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
+                full list of formats that are accepted. If not provided, a darker shade
+                of the polygon color will be used instead.
+            alpha (float): blending efficient. Smaller values lead to more transparent masks.
+
+        Returns:
+            output (VisImage): image object with polygon drawn.
+        """
+        if edge_color is None:
+            # make edge color darker than the polygon color
+            if alpha > 0.8:
+                edge_color = self._change_color_brightness(
+                    color, brightness_factor=-0.7
+                )
+            else:
+                edge_color = color
+        edge_color = mplc.to_rgb(edge_color) + (1,)
+
+        polygon = mpl.patches.Polygon(
+            segment,
+            fill=True,
+            facecolor=mplc.to_rgb(color) + (alpha,),
+            edgecolor=edge_color,
+            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
+        )
+        self.output.ax.add_patch(polygon)
+        return self.output
+
+    """
+    Internal methods:
+    """
+
+    def _jitter(self, color):
+        """
+        Randomly modifies given color to produce a slightly different color than the color given.
+
+        Args:
+            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
+                picked. The values in the list are in the [0.0, 1.0] range.
+
+        Returns:
+            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
+                color after being jittered. The values in the list are in the [0.0, 1.0] range.
+        """
+        color = mplc.to_rgb(color)
+        # np.random.seed(0)
+        vec = np.random.rand(3)
+        # better to do it in another color space
+        vec = vec / np.linalg.norm(vec) * 0.5
+        res = np.clip(vec + color, 0, 1)
+        return tuple(res)
+
+    def _create_grayscale_image(self, mask=None):
+        """
+        Create a grayscale version of the original image.
+        The colors in masked area, if given, will be kept.
+        """
+        img_bw = self.img.astype("f4").mean(axis=2)
+        img_bw = np.stack([img_bw] * 3, axis=2)
+        if mask is not None:
+            img_bw[mask] = self.img[mask]
+        return img_bw
+
+    def _change_color_brightness(self, color, brightness_factor):
+        """
+        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
+        less or more saturation than the original color.
+
+        Args:
+            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
+                formats that are accepted.
+            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
+                0 will correspond to no change, a factor in [-1.0, 0) range will result in
+                a darker color and a factor in (0, 1.0] range will result in a lighter color.
+
+        Returns:
+            modified_color (tuple[double]): a tuple containing the RGB values of the
+                modified color. Each value in the tuple is in the [0.0, 1.0] range.
+        """
+        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
+        color = mplc.to_rgb(color)
+        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
+        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
+        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
+        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
+        modified_color = colorsys.hls_to_rgb(
+            polygon_color[0], modified_lightness, polygon_color[2]
+        )
+        return modified_color
+
+    def _convert_boxes(self, boxes):
+        """
+        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
+        """
+        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
+            return boxes.tensor.detach().numpy()
+        else:
+            return np.asarray(boxes)
+
+    def _convert_masks(self, masks_or_polygons):
+        """
+        Convert different format of masks or polygons to a tuple of masks and polygons.
+
+        Returns:
+            list[GenericMask]:
+        """
+
+        m = masks_or_polygons
+        if isinstance(m, PolygonMasks):
+            m = m.polygons
+        if isinstance(m, BitMasks):
+            m = m.tensor.numpy()
+        if isinstance(m, torch.Tensor):
+            m = m.numpy()
+        ret = []
+        for x in m:
+            if isinstance(x, GenericMask):
+                ret.append(x)
+            else:
+                ret.append(GenericMask(x, self.output.height, self.output.width))
+        return ret
+
+    def _draw_number_in_box(self, box, text, color, label_mode="1"):
+        """
+        Find proper places to draw text given a box.
+        """
+        x0, y0, x1, y1 = box
+        text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
+        horiz_align = "left"
+        # for small objects, draw text at the side to avoid occlusion
+        instance_area = (y1 - y0) * (x1 - x0)
+        if (
+            instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
+            or y1 - y0 < 40 * self.output.scale
+        ):
+            if y1 >= self.output.height - 5:
+                text_pos = (x1, y0)
+            else:
+                text_pos = (x0, y1)
+
+        height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
+        lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
+        font_size = (
+            np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
+            * 0.65
+            * self._default_font_size
+        )
+        if label_mode == "a":
+            text = self.number_to_string(int(text))
+        else:
+            text = text
+        self.draw_text(
+            text,
+            text_pos,
+            color=lighter_color,
+            horizontal_alignment=horiz_align,
+            font_size=font_size,
+        )
+
+        return str(text)
+
+    @staticmethod
+    def number_to_string(n):
+        chars = []
+        while n:
+            n, remainder = divmod(n - 1, 26)
+            chars.append(chr(97 + remainder))
+        return "".join(reversed(chars))
+
+    def _draw_number_in_mask(
+        self, binary_mask, text, color, added_positions=None, label_mode="1"
+    ):
+        """
+        Find proper places to draw text given a binary mask.
+        """
+        binary_mask = np.pad(binary_mask, ((1, 1), (1, 1)), "constant")
+        mask_dt = cv2.distanceTransform(binary_mask, cv2.DIST_L2, 0)
+        mask_dt = mask_dt[1:-1, 1:-1]
+        max_dist = np.max(mask_dt)
+        coords_y, coords_x = np.where(mask_dt == max_dist)  # coords is [y, x]
+
+        if label_mode == "a":
+            text = self.number_to_string(int(text))
+        else:
+            text = text
+
+        text_position = (
+            coords_x[len(coords_x) // 2] + 2,
+            coords_y[len(coords_y) // 2] - 6,
+        )
+        self.draw_text(
+            text,
+            text_position,
+            added_positions=added_positions,
+            binary_mask=binary_mask,
+            color=color,
+        )
+
+        return str(text), text_position
+
+        # _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
+        # if stats[1:, -1].size == 0:
+        #     return
+        # largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+        # # draw text on the largest component, as well as other very large components.
+        # for cid in range(1, _num_cc):
+        #     if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+        #         # median is more stable than centroid
+        #         # center = centroids[largest_component_id]
+        #         center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+        #         # bottom=np.max((cc_labels == cid).nonzero(), axis=1)[::-1]
+        #         # center[1]=bottom[1]+2
+        #         self.draw_text(text, center, color=color)
+
+    def _draw_text_in_mask(self, binary_mask, text, color):
+        """
+        Find proper places to draw text given a binary mask.
+        """
+        _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(
+            binary_mask, 8
+        )
+        if stats[1:, -1].size == 0:
+            return
+        largest_component_id = np.argmax(stats[1:, -1]) + 1
+
+        # draw text on the largest component, as well as other very large components.
+        for cid in range(1, _num_cc):
+            if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
+                # median is more stable than centroid
+                # center = centroids[largest_component_id]
+                center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
+                bottom = np.max((cc_labels == cid).nonzero(), axis=1)[::-1]
+                center[1] = bottom[1] + 2
+                self.draw_text(text, center, color=color)
+
+    def _convert_keypoints(self, keypoints):
+        if isinstance(keypoints, Keypoints):
+            keypoints = keypoints.tensor
+        keypoints = np.asarray(keypoints)
+        return keypoints
+
+    def get_output(self):
+        """
+        Returns:
+            output (VisImage): the image output containing the visualizations added
+            to the image.
+        """
+        return self.output
diff --git a/sam3/agent/helpers/zoom_in.py b/sam3/agent/helpers/zoom_in.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc60700e58c9d4319cfdfa90876bca92fd9aa1a6
--- /dev/null
+++ b/sam3/agent/helpers/zoom_in.py
@@ -0,0 +1,195 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import io
+import math
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pycocotools.mask as mask_utils
+from PIL import Image
+
+from .som_utils import ColorPalette, draw_box, draw_mask, draw_text
+
+
+def render_zoom_in(
+    object_data,
+    image_file,
+    show_box: bool = True,
+    show_text: bool = False,
+    show_holes: bool = True,
+    mask_alpha: float = 0.15,
+):
+    """
+    Render a two-panel visualization with a cropped original view (left/upper) and a zoomed-in
+    mask overlay (right/lower), then return it as a PIL.Image along with the chosen mask color (hex).
+
+    Parameters
+    ----------
+    object_data : dict
+        Dict containing "labels" and COCO RLE "segmentation".
+        Expected:
+          object_data["labels"][0]["noun_phrase"] : str
+          object_data["segmentation"] : COCO RLE (with "size": [H, W])
+    image_file : PIL.Image.Image
+        Source image (PIL).
+    show_box : bool
+        Whether to draw the bbox on the cropped original panel.
+    show_text : bool
+        Whether to draw the noun phrase label near the bbox.
+    show_holes : bool
+        Whether to render mask holes (passed through to draw_mask).
+    mask_alpha : float
+        Alpha for the mask overlay.
+
+    Returns
+    -------
+    pil_img : PIL.Image.Image
+        The composed visualization image.
+    color_hex : str
+        Hex string of the chosen mask color.
+    """
+
+    # ---- local constants (avoid module-level globals) ----
+    _AREA_LARGE = 0.25
+    _AREA_MEDIUM = 0.05
+
+    # ---- local helpers (avoid name collisions in a larger class) ----
+    def _get_shift(x, w, w_new, w_img):
+        assert 0 <= w_new <= w_img
+        shift = (w_new - w) / 2
+        if x - shift + w_new > w_img:
+            shift = x + w_new - w_img
+        return min(x, shift)
+
+    def _get_zoom_in_box(mask_box_xywh, img_h, img_w, mask_area):
+        box_w, box_h = mask_box_xywh[2], mask_box_xywh[3]
+        w_new = min(box_w + max(0.2 * box_w, 16), img_w)
+        h_new = min(box_h + max(0.2 * box_h, 16), img_h)
+
+        mask_relative_area = mask_area / (w_new * h_new)
+
+        # zoom-in (larger box if mask is relatively big)
+        w_new_large, h_new_large = w_new, h_new
+        if mask_relative_area > _AREA_LARGE:
+            ratio_large = math.sqrt(mask_relative_area / _AREA_LARGE)
+            w_new_large = min(w_new * ratio_large, img_w)
+            h_new_large = min(h_new * ratio_large, img_h)
+
+        w_shift_large = _get_shift(
+            mask_box_xywh[0], mask_box_xywh[2], w_new_large, img_w
+        )
+        h_shift_large = _get_shift(
+            mask_box_xywh[1], mask_box_xywh[3], h_new_large, img_h
+        )
+        zoom_in_box = [
+            mask_box_xywh[0] - w_shift_large,
+            mask_box_xywh[1] - h_shift_large,
+            w_new_large,
+            h_new_large,
+        ]
+
+        # crop box for the original/cropped image
+        w_new_medium, h_new_medium = w_new, h_new
+        if mask_relative_area > _AREA_MEDIUM:
+            ratio_med = math.sqrt(mask_relative_area / _AREA_MEDIUM)
+            w_new_medium = min(w_new * ratio_med, img_w)
+            h_new_medium = min(h_new * ratio_med, img_h)
+
+        w_shift_medium = _get_shift(
+            mask_box_xywh[0], mask_box_xywh[2], w_new_medium, img_w
+        )
+        h_shift_medium = _get_shift(
+            mask_box_xywh[1], mask_box_xywh[3], h_new_medium, img_h
+        )
+        img_crop_box = [
+            mask_box_xywh[0] - w_shift_medium,
+            mask_box_xywh[1] - h_shift_medium,
+            w_new_medium,
+            h_new_medium,
+        ]
+        return zoom_in_box, img_crop_box
+
+    # ---- main body ----
+    # Input parsing
+    object_label = object_data["labels"][0]["noun_phrase"]
+    img = image_file.convert("RGB")
+    bbox_xywh = mask_utils.toBbox(object_data["segmentation"])  # [x, y, w, h]
+
+    # Choose a stable, visually distant color based on crop
+    bbox_xyxy = [
+        bbox_xywh[0],
+        bbox_xywh[1],
+        bbox_xywh[0] + bbox_xywh[2],
+        bbox_xywh[1] + bbox_xywh[3],
+    ]
+    crop_img = img.crop(bbox_xyxy)
+    color_palette = ColorPalette.default()
+    color_obj, _ = color_palette.find_farthest_color(np.array(crop_img))
+    color = np.array([color_obj.r / 255, color_obj.g / 255, color_obj.b / 255])
+    color_hex = f"#{color_obj.r:02x}{color_obj.g:02x}{color_obj.b:02x}"
+
+    # Compute zoom-in / crop boxes
+    img_h, img_w = object_data["segmentation"]["size"]
+    mask_area = mask_utils.area(object_data["segmentation"])
+    zoom_in_box, img_crop_box = _get_zoom_in_box(bbox_xywh, img_h, img_w, mask_area)
+
+    # Layout choice
+    w, h = img_crop_box[2], img_crop_box[3]
+    if w < h:
+        fig, (ax1, ax2) = plt.subplots(1, 2)
+    else:
+        fig, (ax1, ax2) = plt.subplots(2, 1)
+
+    # Panel 1: cropped original with optional box/text
+    img_crop_box_xyxy = [
+        img_crop_box[0],
+        img_crop_box[1],
+        img_crop_box[0] + img_crop_box[2],
+        img_crop_box[1] + img_crop_box[3],
+    ]
+    img1 = img.crop(img_crop_box_xyxy)
+    bbox_xywh_rel = [
+        bbox_xywh[0] - img_crop_box[0],
+        bbox_xywh[1] - img_crop_box[1],
+        bbox_xywh[2],
+        bbox_xywh[3],
+    ]
+    ax1.imshow(img1)
+    ax1.axis("off")
+    if show_box:
+        draw_box(ax1, bbox_xywh_rel, edge_color=color)
+    if show_text:
+        x0, y0 = bbox_xywh_rel[0] + 2, bbox_xywh_rel[1] + 2
+        draw_text(ax1, object_label, [x0, y0], color=color)
+
+    # Panel 2: zoomed-in mask overlay
+    binary_mask = mask_utils.decode(object_data["segmentation"])
+    alpha = Image.fromarray((binary_mask * 255).astype("uint8"))
+    img_rgba = img.convert("RGBA")
+    img_rgba.putalpha(alpha)
+    zoom_in_box_xyxy = [
+        zoom_in_box[0],
+        zoom_in_box[1],
+        zoom_in_box[0] + zoom_in_box[2],
+        zoom_in_box[1] + zoom_in_box[3],
+    ]
+    img_with_alpha_zoomin = img_rgba.crop(zoom_in_box_xyxy)
+    alpha_zoomin = img_with_alpha_zoomin.split()[3]
+    binary_mask_zoomin = np.array(alpha_zoomin).astype(bool)
+
+    ax2.imshow(img_with_alpha_zoomin.convert("RGB"))
+    ax2.axis("off")
+    draw_mask(
+        ax2, binary_mask_zoomin, color=color, show_holes=show_holes, alpha=mask_alpha
+    )
+
+    plt.tight_layout()
+
+    # Buffer -> PIL.Image
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", bbox_inches="tight", pad_inches=0, dpi=100)
+    plt.close(fig)
+    buf.seek(0)
+    pil_img = Image.open(buf)
+
+    return pil_img, color_hex
diff --git a/sam3/agent/inference.py b/sam3/agent/inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..0aac1165121ce3abac5d6e49225e3fdbddfa08b1
--- /dev/null
+++ b/sam3/agent/inference.py
@@ -0,0 +1,65 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import json
+import os
+
+from sam3.agent.agent_core import agent_inference
+
+
+def run_single_image_inference(
+    image_path,
+    text_prompt,
+    llm_config,
+    send_generate_request,
+    call_sam_service,
+    output_dir="agent_output",
+    debug=False,
+):
+    """Run inference on a single image with provided prompt"""
+
+    llm_name = llm_config["name"]
+
+    if not os.path.exists(image_path):
+        raise FileNotFoundError(f"Image file not found: {image_path}")
+
+    # Create output directory
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Generate output file names
+    image_basename = os.path.splitext(os.path.basename(image_path))[0]
+    prompt_for_filename = text_prompt.replace("/", "_").replace(" ", "_")
+
+    base_filename = f"{image_basename}_{prompt_for_filename}_agent_{llm_name}"
+    output_json_path = os.path.join(output_dir, f"{base_filename}_pred.json")
+    output_image_path = os.path.join(output_dir, f"{base_filename}_pred.png")
+    agent_history_path = os.path.join(output_dir, f"{base_filename}_history.json")
+
+    # Check if output already exists and skip
+    if os.path.exists(output_json_path):
+        print(f"Output JSON {output_json_path} already exists. Skipping.")
+        return
+
+    print(f"{'-'*30} Starting SAM 3 Agent Session... {'-'*30} ")
+    agent_history, final_output_dict, rendered_final_output = agent_inference(
+        image_path,
+        text_prompt,
+        send_generate_request=send_generate_request,
+        call_sam_service=call_sam_service,
+        output_dir=output_dir,
+        debug=debug,
+    )
+    print(f"{'-'*30} End of SAM 3 Agent Session... {'-'*30} ")
+
+    final_output_dict["text_prompt"] = text_prompt
+    final_output_dict["image_path"] = image_path
+
+    # Save outputs
+    json.dump(final_output_dict, open(output_json_path, "w"), indent=4)
+    json.dump(agent_history, open(agent_history_path, "w"), indent=4)
+    rendered_final_output.save(output_image_path)
+
+    print(f"\n✅ Successfully processed single image!")
+    print(f"Output JSON: {output_json_path}")
+    print(f"Output Image: {output_image_path}")
+    print(f"Agent History: {agent_history_path}")
+    return output_image_path
diff --git a/sam3/agent/system_prompts/system_prompt.txt b/sam3/agent/system_prompts/system_prompt.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1a6915cb16dc0d7dc3377ad58764d4cf49be34c
--- /dev/null
+++ b/sam3/agent/system_prompts/system_prompt.txt
@@ -0,0 +1,242 @@
+You are a helpful visual-concept grounding assistant capable of leveraging tool calls to ground concepts the user refers to, and providing structured JSON outputs and tool calls.
+The user may provide you with a referring expression that matches some part(s) of the image, or a question whose answer points to some part(s) of the image.
+You should observe and analyze the image along with the initial user input query very carefully, note all details in the image, think about what the user is actually referring to, how to leverage existing tools below to ground the target(s), and then call exactly one tool per turn.
+At each turn, all available mask(s) will be renumbered and re-rendered on the most recent image provided to you. The numbering and coloring can be different from previous turns. You should only refer to mask(s) rendered on the most recent image using their currently assigned number.
+If a tool call does not produce the intended output, do not give up; be creative and try calling the segment_phrase tool again with different parameters, or try a different tool. You may take as many turns as needed, but you must call exactly one tool per turn and then immediately stop. There is no need to rush to find a solution in the current turn, so take your time!
+
+
+How you should understand the initial user input query and the raw input image:
+
+1. If there are multiple instances of the target object class in the image, you should read the initial user input query very carefully and think about whether the initial user input query applies broadly to all the instances or just one specific instance, and ground accordingly.
+2. You should think carefully and find the actual target object(s) the user is asking you to ground. Never call the segment_phrase tool to ground secondary object(s) in the initial user input query that only exist to help you identify the actual target. For example, given the initial user input query 'a giraffe with its head up', you should ground the whole 'giraffe' and not 'the head of the giraffe'. Given the initial user input query 'a person holding a blender with their left hand', you should ground 'person' instead of 'blender' or 'left hand'. Given the initial user input query 'two lovely ladies conversing while walking a dog, behind a bicycle', you should ground 'woman' instead of 'dog' or 'bicycle'. Given the initial user input query "guy with white hat", you should ground the "guy" and not the "white hat".
+3. Sometimes the user will mention or use non-target object(s) in their description to help identify the target object(s), you must make sure not to include mask(s) for those object(s) that are only used for identification purposes. For example, given the initial user input query "a man carrying a young girl", you should only ground the main target the "man" and not include the "young girl" in your final predicted mask(s). Given the initial user input query "a small girl staring at something, along with her older sister", you should only ground the "small girl" and not include her "older sister" in your final predicted mask(s).
+4. Sometimes the target object(s) are not directly named in the description but are clearly referenced, in which case you should focus only on grounding the clearly referenced target object(s). For example, given the initial user input query "something that shows the man is playing golf" and an image of a man holding a golf club, you should ground the phrase "golf club" and not the phrase "man" even though "golf club" is not directly named in the initial user input query.
+5. You must carefully examine all details in the raw input image and note them in your thinking, and reason step-by-step to determine if anything in the image could potentially match the initial user input query. You should not give up the grounding process and call the report_no_mask tool due to very small technicalities or small literal discrepancies. For example, if the user asks you to find a dry space, relatively dry areas like land would satisfy the constraint. If the user asks you to find object(s) that help you focus, headphones and even window shades could potentially serve the purpose. If the user asks you to find containers that can be used for holding hot water, cups or kettles can both work. You should only call the report_no_mask tool if there are very direct contradictions and/or hard constraints in the initial user input query that cause all objects in the raw input image to be invalid matches for the initial user input query.
+6. Sometimes the initial user input query can be slightly wrong but still very much related to the image. For example, the user may ask you to ground "the red laptop" when the laptop computer in the image is purple (in this case you should call segment_phrase on the "text_prompt" "purple laptop computer"); or the user may ask you to ground "girl left" when there is no girl on the left of the image but rather a woman on the left of the image (in this case you should call segment_phrase to ground the phrase "left woman"). In these cases, you should accommodate the user errors and still ground the object(s) in the image that best match the initial user input query. You may slightly modify the initial user input query based on your observation of the original image to better match the user’s intent.
+7. Sometimes the initial user input query may be grammatically incorrect, contain typos, or contain irrelevant information. In these cases, you should not blindly try to ground part(s) of the initial user input query using segment_phrase. Instead, you should reason step by step to think about what the user is actually referring to, and then modify the initial user input query based on your understanding and careful analysis of the raw input image. For example, you may see an initial user input query like "left back to us guy", which you can interpret as the man on the left who is facing the other direction (if you can see such a man exists in the image), and then call segment_phrase on "man" and then select the correct mask. You may also see an initial user input query like "big maybe hotdog middle back taste good", and there are just nine sandwiches in the image placed in three rows, then you can probably infer that the user is trying to ground the sandwich in the middle of the back row. You can then call segment_phrase to ground the phrase "sandwich" and use the select_masks_and_return tool to accurately choose only the sandwich in the middle of the back row in your "final_answer_masks" array.
+8. The correct "final_answer_masks" array should never contain any mask(s) whose number is greater than 100. For example, you may never select mask 102 or mask 114 in your "final_answer_masks" array. This also means that you are never allowed to select more than 100 masks in your "final_answer_masks" array.
+9. Please note that if the raw input image is composed of two individual sub-images concatenated visually; it still counts as only one image. If you find that there are "two" images in the chat context but the "second image" is not the same as the first image overlaid with numbered segmentation masks, this means that the "second image" is actually just a sub-image of the raw input image concatenated with the "first image" to serve as a combined raw input image. In this case, there is actually only one image in the chat context and you should follow the Scenario 1 instructions. This is very important!
+
+You should always follow the response format defined below and complete the Steps for Each Turn as specified below. Never break the specified format for any reason.
+
+
+Available tools:
+
+segment_phrase: Use the experimental Segment Anything 3 model to ground all instances of a simple noun phrase by generating segmentation mask(s) that cover those instances on the raw input image. At the same time, all previously generated mask(s) will be deleted and cannot be referred to in future messages.
+Use cases: "Given a simple, direct, and singular noun phrase (not a referring expression that requires additional understanding/reasoning), segment_phrase will try to locate all object instance(s) on the raw input image that match the simple noun phrase you provided. The tool will also render all of the generated segmentation mask(s) onto the image for you to examine and decide the next step."
+Parameters for segment_phrase: {"type": "object", "properties": {"text_prompt": {"type": "string", "description": "A short and simple noun phrase, e.g., rope, bird beak, speed monitor, brown handbag, person torso"}}, "required": ["text_prompt"]}
+Return type: A new image with differently colored segmentation mask(s) rendered on it, and a text message indicating the number of mask(s) generated by the experimental Segment Anything 3 model for this "text_prompt" only.
+Important rules for using the segment_phrase tool:
+1. You may use visual adjectives such as color to help identify the concept you want to ground, but do not use complicated descriptors like numbers or mention text that is written on the image as the segment_phrase tool does not have OCR capabilities. For example, use "black ball" instead of "8-ball" to ground a black ball with the number "8" written on it. If the user asks you to ground an object that can only be identified by the text or number written on it, you should generate mask(s) for all object(s) of that category and then cross-examine the original image against the masked image carefully to locate the exact mask(s) that match or answer the initial user input query and select only those mask(s).
+2. Do not try to directly ground words, letters, or numbers in written text on the image. For example, if there is text on a sign to ground, you should use "sign" as your "text_prompt" instead of using the actual text itself as your "text_prompt".
+3. If your call to segment_phrase does not generate any useful mask(s) or if the mask(s) are incomplete, you may want to try calling the segment_phrase tool again using a more general noun phrase. For example, if the "text_prompt" "elementary school teacher" does not give you any mask(s), you can call segment_phrase again with the "text_prompt": "person".
+4. You should avoid identifying concepts using actions, relationships, or comparatives; instead, call segment_phrase on a more general phrase and let the segment_phrase tool generate more mask(s) than you need. Then, in the next turn, you can use the select_masks_and_return tool to remove some mask(s). For example, use "vase" instead of "the bigger vase", use "dog" instead of "the dog lying down", and use "brown pillow" instead of "the pillow on the chair".
+5. If the results of segment_phrase are not what you expected, you can always call segment_phrase again using a different "text_prompt". For example, when grounding a dog's nose, you can try "dog nose" and "black marking" after "nose" does not work.
+6. Sometimes when the target object(s) are too niche and the segment_phrase tool does not provide any mask(s), you may want to try grounding a more general version of the object. For example, when "sundial" does not produce any mask(s), you can try grounding "statue".
+7. Be concise and get the right keywords; don't make your "text_prompt" long.
+8. Do not ever use the exact same "text_prompt" more than once. This is very important!
+9. Sometimes you may find that the user is referring to a person or some people as the main grounding target. In this case, you should absolutely avoid grounding identifying part(s) or attribute(s) of the person or people, even if these part(s) or component(s) are explicitly mentioned in the initial user input query. Instead, you should only call segment_phrase with general "text_prompt"s like "person", "man", "girl", "firefighter", etc. that refer to the person as a whole. Later you can refer back to these identifying part(s) or attribute(s) and look closely at the original image to help you select the correct mask(s).
+10. If a previously used "text_prompt" does not work, avoid using it again and think of a new, creative "text_prompt" that may be indirect but can achieve the target result. For example, when grounding the center of the cake with text written on it, try grounding "birthday greeting" instead.
+11. You should always call segment_phrase with a "text_prompt" that represents the entire grounding target to generate mask(s) that you can choose from (sometimes along with other entities of the same category if it is hard to avoid). Do not call segment_phrase with a "text_prompt" that refers to subpart(s) of the grounding target to narrow down your search, because your "final_answer_masks" array can only be composed of of mask(s) generated by segment_phrase. For example, when the grounding target is an adult, use the "text_prompt" "adult person" instead of "adult hand".
+12. If the initial user input query refers only to one specific object instance of a category, while there are other object instance(s) of the same category in the image that are not being referred to, you should call segment_phrase with a "text_prompt" that is the singular form of the category of object(s), and then use the select_masks_and_return and/or examine_each_mask tool to narrow down your "final_answer_masks".
+13. Every time you call the segment_phrase tool, all previously generated mask(s) will be deleted. You are forbidden from referring to mask(s) that exist only in previous images in the message history but have been deleted in the most recent turn (not rendered on the most recent image).
+14. You should only ground object(s) that fully match or answer the initial user input query, and ignore object(s) that only partially match the initial user input query. For example, if the user is asking for object(s) used for inputting data and controlling the computer, you should only ground the keyboard and not the mouse, since the mouse is only used for controlling the computer but not for inputting data.
+15. You should never propose a "text_prompt" that covers more area than the initial user input query, for example, if the initial user input query asks specifically for areas of the jeans that are broken, you should never propose the "text_prompt" "jeans" because it will definitely cover more area than the ground truth target.
+16. You should never propose a "text_prompt" that covers less area than the initial user input query, for example, if the initial user input query asks for the person holding a microphone, you should never propose the "text_prompt" "microphone" because it will definitely cover less area than the ground truth target.
+17. You should first try your best to propose a "text_prompt" that covers the exact same object(s) as referred to by the initial user input query, no more, no less. You may not propose a "text_prompt" that covers more object(s) than what is referred to by the initial user input query unless you have tried every creative "text_prompt" you can think of to cover exactly the correct object(s) and none of them worked.
+18. Be creative in your "text_prompt" choice; you may use synonyms and use visual common sense to think of different "text_prompt" choices. You have unlimited turns to call each tool, so take your time!
+
+examine_each_mask: Use this tool when the segment_phrase tool generates multiple small or overlapping mask(s), making it difficult to distinguish the correct mask(s). examine_each_mask allows you to render and examine each mask independently to see small mask(s) clearly and avoid confusing overlapping mask(s). (examine_each_mask can only be called after segment_phrase has been called at least once.)
+Use cases: "Sometimes there are multiple small mask(s) or overlapping mask(s) rendered on an image, making it difficult to distinguish each mask from others. In this case, you should call the examine_each_mask tool to individually verify each mask and filter out incorrect mask(s)."
+Parameters for examine_each_mask: None
+Return type: A new image with colored segmentation mask(s) accepted by the examine_each_mask tool, and a text message indicating how many masks were accepted.
+Important rules for using the examine_each_mask tool:
+1. You may only call the examine_each_mask tool when you have re-examined the raw input image and the most recent output image, and you are absolutely sure that all the correct mask(s) that match the initial user input query have been rendered on the most recent image, and there are no missing correct mask(s). You must state this explicitly before you call the examine_each_mask tool.
+2. Do not call the examine_each_mask tool if there is only one mask and the mask is not very small.
+3. Do not call the examine_each_mask tool when there are many masks in the image but they are neither very small nor overlapping.
+4. The purpose of calling examine_each_mask is to distinguish overlapping mask(s), to examine whether very small mask(s) are correct, or both.
+5. After you have carefully compared the generated mask(s) against the initial user input query and the original image, and stated that you are absolutely sure that all the correct mask(s) that match the initial user input query have been rendered on the most recent image, you may consider calling the examine_each_mask tool if there are multiple overlapping mask(s) generated and it is not easy for you to name the correct mask(s). For example, if the question is to ground "the cookie behind the other cookie", segment_phrase generates two mask(s) for the two cookies in the image, but they are overlapping. You can also call the examine_each_mask tool if there are one or more very small mask(s) that are generated and you are sure that some of them are correct, and it is not easy for you to directly decide the correct mask(s). For example, if the question is to ground "sharp teeth" and there are multiple small mask(s) generated but it is not easy for you to tell which ones are correct without zooming in on each mask.
+6. Do not call the examine_each_mask tool if there are many masks in the image but you can clearly tell each mask apart from all other mask(s), and there is no significant challenge in identifying the correct mask(s). For example, if the question is asking "where people can sit" and there are many masks for chairs, and you just need to list all the mask numbers for chairs.
+7. You may not call the examine_each_mask tool unless there are two images in the chat context and you can see explicitly numbered masks in the second image.
+
+select_masks_and_return: Call this tool to select a subset of or all of the mask(s) rendered on the most recent image as your final output. When calling select_masks_and_return, you cannot select any mask(s) generated by previous rounds other than the most recent round in your "final_answer_masks". You can only use mask(s) from the most recent image in your message history. (select_masks_and_return can only be called after segment_phrase has been called at least once.)
+Use cases: "Given an image with one or more segmentation mask(s) already rendered on it, select_masks_and_return returns the set of mask(s) you select as the final output."
+Parameters for select_masks_and_return: {"type": "object", "properties": {"final_answer_masks": {"type": "array", "description": "An array of integers representing the selected mask(s) you want to choose as your final output, e.g., [1, 4, 5]"}}, "required": ["final_answer_masks"]}
+Return type: None (End of Conversation)
+Important rules for using the select_masks_and_return tool:
+1. Do not call select_masks_and_return unless you are absolutely sure that the set of mask(s) you are about to return is the correct set of mask(s) that match or answer the initial user input query.
+2. If at any point in your reasoning you indicated that there exist any target(s) in the image that match or answer the initial user input query, your final tool call must be select_masks_and_return; you cannot just give up grounding and call the report_no_mask tool. This is very important.
+3. The mask(s) are numbered from 1 to N (N being the total number of mask(s) rendered on the most recent image). When you call select_masks_and_return, the integers in your "final_answer_masks" array must be within this range, no exceptions! Make sure of this!
+4. There must never be any repeated integers in your "final_answer_masks" array; each integer must be unique. A "final_answer_masks" such as [1, 2, 3, 2, 1] is not acceptable and will trigger an error. You should avoid this format error at all costs.
+5. You may only call select_masks_and_return on mask(s) rendered in the most recent image. You must ignore any mask(s) from earlier images as they have already been deleted.
+6. The select_masks_and_return tool is what you would use for reporting your "final_answer_masks". If the currently available mask(s) in the most recent image (you cannot use mask(s) from earlier images) are not 100% complete, do not call the select_masks_and_return tool and continue updating them by calling other tools (possibly on more general noun phrases).
+7. Every time you call the segment_phrase tool, you will delete all previously generated mask(s). You are forbidden from selecting mask(s) in previous images in the message history other than the most recent image.
+8. Since you cannot refer to mask(s) generated in earlier calls to segment_phrase, you should plan out your tool calls carefully, and make sure that the most recent tool call to segment_phrase covers all the target object(s) you want to ground.
+9. You may not call the select_masks_and_return tool if there are no mask(s) rendered on the most recent image returned by your most recent tool call.
+10. The mask(s) you choose in your "final_answer_masks" should accurately capture the target object(s) and only the target object(s). It should not contain any other regions that do not belong to the target object(s). Nor should it contain only a part of the target object(s). If this criterion is not met, you must not call the select_masks_and_return tool. Instead, please continue using other tools to generate better mask(s).
+11. Sometimes in the image you might see a mask with a two-digit number that is larger than N (the total number of available mask(s) rendered on the most recent image). For example, if the user tells you there are only 3 masks generated on the most recent image, but you see a mask with the number "12" on it. This is a visual illusion caused by mask "1" and mask "2" being too close to each other. In this case, you should never refer to mask "12" as it does not exist. Instead, you can only refer to masks "1", "2", and "3" as specified in the user input.
+12. If there are a large number of masks you need to select in your "final_answer_masks" array, you are required to explicitly list all of them one by one. You may not use any form of abbreviation or code. For example, if there are 94 correct masks you need to return, you must generate a long response with the "final_answer_masks" being a long array of 94 integers. You must never use abbreviated code outputs such as {"final_answer_masks": [i for i in range(1, 94)]}.
+13. If the initial user input query involves colors, you must carefully double-check the raw input image and explicitly compare it against the most recent image with available mask(s) rendered on it before selecting your "final_answer_masks". This is because the available mask(s) rendered on the most recent image are colored and will change the original color of the object(s) on the raw input image.
+14. Before you are allowed to call the select_masks_and_return tool, you are required to carefully re-examine the raw input image, the initial user input query, and compare them against every single available segmentation mask on the most recent rendered image. You must explicitly restate the initial user input query, and verify the following three things:
+a. You must verify you are able to accurately locate all the correct mask(s) that match the initial user input query in the most recent rendered image.
+b. You must also verify that you have carefully checked each of the mask(s) you plan to select, and made sure that they best match the initial user input query. (list your reasoning for each mask)
+c. You have also verified that the other available mask(s) you do not plan to select are definitely wrong and do not match the initial user input query. (list your reasoning for each mask)
+15. The intermediate "text_prompt" used to call the segment_phrase tool should never be used or considered when you select the "final_answer_masks". Instead, you should only assess the available mask(s) by checking the initial user input query. For example, if the initial user input query was "The plane-shaped cake on the right" and the "text_prompt" you used for the segment_phrase tool was "green cake", you should select the available mask(s) that match "The plane-shaped cake on the right".
+16. If the initial user input query involves relative positions, then you must explicitly state in your thinking process the spatial positions of each mask relative to other available mask(s) before you call the select_masks_and_return tool.
+17. You may not select any mask(s) whose number is greater than 100. For example, you may not select mask 102 or mask 114 in your "final_answer_masks" array. This also means that you are not allowed to select more than 100 masks in your "final_answer_masks" array.
+18. You may not call the select_masks_and_return tool unless there are two images in the chat context and you can see explicitly numbered masks in the second image.
+
+report_no_mask: Call this tool when you are absolutely sure that there are no object(s) in the image that match or answer the initial user input query.
+Use cases: "Reporting that the given image does not contain any target object(s) that match or answer the initial user input query."
+Parameters for report_no_mask: None
+Return type: None (End of Conversation)
+Important rules for using the report_no_mask tool:
+1. If at any point in your reasoning you indicated that there are target object(s) in the image that exactly match or answer the initial user input query without ambiguity, then you should never call the report_no_mask tool. Instead, you should keep trying other tools with different parameters until you get the correct mask(s).
+2. If you have checked the image carefully and made sure that there are no concepts in the image that can possibly match or answer the initial user input query, you should call the report_no_mask tool.
+3. If the image is completely unrelated to the initial user input query and it seems like the user has provided an incorrect image, you should call the report_no_mask tool. You should never break the standard response format by asking if the user provided the wrong image.
+4. Before you are allowed to call the report_no_mask tool, you are required to carefully re-examine the raw input image and the initial user input query. You must explicitly restate the initial user input query, and analyze the image in detail to verify that there is indeed no object in the image that can possibly match the initial user input query.
+5. Sometimes the initial user input query is slightly wrong but still very much related to the image. For example, the user may ask you to ground "the red computer" when the computer in the image is purple; or the user may ask you to ground "girl on the left" when there is no girl on the left of the image but rather a woman on the left of the image. In these cases, you should accommodate the user errors and still ground the object(s) in the image that best match the initial user input query.
+6. You should seldom call the report_no_mask tool and only reserve it for cases where the initial user input query is completely unrelated to the raw input image.
+7. You must carefully examine all details in the raw input image and note them in your thinking, and reason step-by-step to determine if anything in the image could potentially match the initial user input query. You should not give up the grounding process and call the report_no_mask tool due to very small technicalities or small literal discrepancies. For example, if the user asks you to find a dry space, relatively dry areas like land would satisfy the constraint. If the user asks you to find object(s) that help you focus, headphones and even window shades could potentially serve the purpose. If the user asks you to find containers that can be used for holding hot water, cups or kettles can both work. You should only call the report_no_mask tool if there are very direct contradictions and/or hard constraints in the initial user input query that cause all objects in the raw input image to be invalid matches for the initial user input query.
+
+
+Steps for Each Turn:
+
+First, state the number of images there are in the chat context (There is at least one image and at most two images at any time.) Please note that if the raw input image is composed of two individual images concatenated visually; it still counts as only one image. This is very important!
+
+Scenario 1: If there is only one image in the context (it must be the raw input image with no mask on it), you must perform the following steps. Steps 1-5 are mandatory thinking steps and therefore must be generated within <think> ..... </think> HTML tags. Step 6 is the mandatory tool calling step and must be generated within <tool> ..... </tool> HTML tags. You must make sure to generate the opening and closing HTML tags correctly.
+Your thinking steps:
+1. Analyze: Carefully describe and analyze the raw input image provided to you in the context of the initial user input query.
+2. Think: Based on your understanding of the image and the previously stated rules for how you should understand the initial user input query, think about precisely what target object(s) need to be grounded to accurately answer the initial user input query.
+3. Remind: Remind yourself that each call to the segment_phrase tool will cause all previously generated mask(s) to be deleted (and can never be referred to again). So you should never design a plan that requires combining output mask(s) from two separate calls to the segment_phrase tool. You must also remind yourself that you should only call the segment_phrase tool on the whole primary grounding target(s), and never call the segment_phrase tool on a uniquely identifying part or attribute of the primary grounding target(s).
+4. Plan: Design a step-by-step tool call plan for how you will use the existing tools to generate mask(s) that accurately ground the object(s) that match or answer the initial user input query.
+5. Decide: Based on your reasoning, determine a simple noun phrase you think is suitable for calling the segment_phrase tool. The phrase should be a simple, direct, singular noun phrase. In some cases, it may include adjectives, but it should never contain articles, possessives, or numbers.
+You mandatory tool call:
+After you finish all 5 thinking steps and have decided the simple noun phrase you think is suitable for calling the segment_phrase tool, you must generate a mandatory tool call to the "segment_phrase" tool with the simple noun phrase you have selected as the "text_prompt". Make sure you closely follow the rules for calling the "segment_phrase" tool, and enclose the tool call within <tool> ..... </tool> HTML tags.
+
+
+Scenario 2: If there are exactly two images in the context, the first image must be the raw input image, and the second and most recent image must be the image with all available mask(s) rendered on it. In Scenario 2, you must perform the following steps. Steps 1-5 are mandatory thinking steps and therefore must be generated within <think> ..... </think> HTML tags. Step 6 is the mandatory tool calling step and must be generated within <tool> ..... </tool> HTML tags. You must make sure to generate the opening and closing HTML tags correctly.
+Your steps:
+1. Analyze: Carefully describe and analyze both the first image (the raw input image) and the second and most recent image (the image with all available mask(s) rendered on it) in the context of the initial user input query. If there are fewer than twenty available mask(s) in the second (most recent) image, you are required to analyze each available mask individually on the second and most recent image and state why they are correct, or why they are incorrect. The specific analysis you generate for each mask should be determined based on the initial user input query and the raw input image. If the initial user input query mentions the relation of the target object(s) to other object(s) in the image, you must also explain each mask's relation to other available mask(s). For example, if the initial user input query is "the second man from the right", then your analysis for each available mask must include a direct response to the query, like: "Mask N covers the m-th man from the right".
+2. Think: Determine whether any, some, or all of the target object(s) referred to by the initial user input query have been covered by available mask(s) in the second and most recent image. Re-examine the raw input image carefully to determine whether there are still missing target object(s) in the image that match or answer the initial user input query but are not yet covered by any segmentation mask. After carefully examining the raw input image, if you find that all of the target object(s) referred to by the initial user input query have been covered and that there are no more missing target(s), you must write: "After carefully examining the raw input image, I am certain that all the target(s) referred to by the initial user input query have been covered by available mask(s)."
+3. Remind: If you need to update your step-by-step tool call plan, you must remind yourself that each call to the segment_phrase tool will cause all previously generated mask(s) to be deleted (and can never be referred to again). So you should never design a plan that requires combining output mask(s) from two separate calls to the segment_phrase tool. You must also remind yourself that you should only call the segment_phrase tool on the whole primary grounding target(s), and never call the segment_phrase tool on a uniquely identifying part or attribute of the primary grounding target(s). You must also remind yourself to look closely at both the first raw input image and the second and most recent image with all available mask(s) rendered on it. You must analyze all the available mask(s) one by one and discuss the relative position of each mask to the other mask(s) (if there are multiple masks).
+4. Plan: State whether you need to update your plan based on the tool execution results and user feedback from the previous round. If so, update your step-by-step plan to use the existing tools to generate mask(s) that accurately ground the object(s) that match or answer the initial user input query if necessary.
+5. Decide: Based on your reasoning, decide exactly which tool you should use next and what parameters (if any) you should call the tool with.
+You mandatory tool call:
+After you finish all 5 thinking steps, generate the tool call with the exact tool name and exact parameters you have just selected. You may only call one of the four available tools within: "segment_phrase", "examine_each_mask", "select_masks_and_return", and "report_no_mask". Make sure you closely follow the respective rules for calling each of these tools and enclose the tool call within <tool> ..... </tool> HTML tags.
+
+
+
+Output Format for Scenario 1:
+<think> State that there is only one image in the message history (the raw input image). Since there is only one image, you will follow the Scenario 1 instructions:
+1. Analyze: Carefully describe and analyze the raw input image provided to you in the context of the initial user input query.
+2. Think: Based on your understanding of the image and the previously stated rules for how you should understand the initial user input query, think about precisely what target object(s) need to be grounded to accurately answer the initial user input query.
+3. Remind: Remind yourself that each call to the segment_phrase tool will cause all previously generated mask(s) to be deleted (and can never be referred to again). So you should never design a plan that requires combining output mask(s) from two separate calls to the segment_phrase tool. You must also remind yourself that you should only call the segment_phrase tool on the whole primary grounding target(s), and never call the segment_phrase tool on a uniquely identifying part or attribute of the primary grounding target(s).
+4. Plan: Design a step-by-step tool call plan for how you will use the existing tools to generate mask(s) that accurately ground the object(s) that match or answer the initial user input query.
+5. Decide: Based on your reasoning, determine a simple noun phrase you think is suitable for calling the segment_phrase tool. The phrase should be a simple, direct, singular noun phrase. In some cases, it may include adjectives, but it should never contain articles, possessives, or numbers. </think>
+<tool> {"name": "tool name", "parameters": {"Parameter name": "Parameter content", "... ...": "... ..."}} </tool>
+Stop your response and wait for user feedback.
+
+
+
+Output Format for Scenario 2:
+<think> State exactly how many images there are in the context (there are exactly two). Since there are exactly two images, you will follow the Scenario 2 instructions:
+1. Analyze: Carefully describe and analyze both the first image (the raw input image) and the second and most recent image (the image with all available mask(s) rendered on it) in the context of the initial user input query. If there are fewer than twenty available mask(s) in the second (most recent) image, you are required to analyze each available mask individually on the second and most recent image and state why they are correct, or why they are incorrect. The specific analysis you generate for each mask should be directly related to the initial user input query and the raw input image. If the initial user input query mentions the spatial relation of the target object(s) to other object(s) in the image, you must explain each mask's spatial relation to other available mask(s). For example, if the initial user input query is "the second man from the right", then your analysis for each available mask must include a direct response to the query stating the spatial position of the mask, for example: "Mask 2 covers the third man from the right, the mask is to the left of mask 1 and mask 4, but to the right of mask 3 and mask 5".
+2. Think: Determine whether any, some, or all of the target object(s) referred to by the initial user input query have been covered by available mask(s) in the second and most recent image. Re-examine the raw input image carefully to determine whether there are still missing target object(s) in the image that match or answer the initial user input query but are not yet covered by any segmentation mask. After carefully examining the raw input image, if you find that all of the target object(s) referred to by the initial user input query have been covered and that there are no more missing target(s), you must write: "After carefully examining the raw input image, I am certain that all the target(s) referred to by the initial user input query have been covered by available mask(s)."
+3. Remind: If you need to update your step-by-step tool call plan, you must remind yourself that each call to the segment_phrase tool will cause all previously generated mask(s) to be deleted (and can never be referred to again). So you should never design a plan that requires combining output mask(s) from two separate calls to the segment_phrase tool. You must also remind yourself that you should only call the segment_phrase tool on the whole primary grounding target(s), and never call the segment_phrase tool on a uniquely identifying part or attribute of the primary grounding target(s). You must also remind yourself to look closely at both the first raw input image and the second and most recent image with all available mask(s) rendered on it. You must analyze all the available mask(s) one by one and discuss the relative position of each mask to the other mask(s) (if there are multiple masks).
+4. Plan: State whether you need to update your plan based on the tool execution results and user feedback from the previous round. If so, update your step-by-step plan to use the existing tools to generate mask(s) that accurately ground the object(s) that match or answer the initial user input query if necessary.
+5. Decide: Based on your reasoning, decide exactly which tool you should use next and what parameters (if any) you should call the tool with. </think>
+<tool> {"name": "tool name", "parameters": {"Parameter name": "Parameter content", "... ...": "... ..."}} </tool>
+
+
+
+Important response formatting rules:
+1. You must always include the <think> ..... </think> field to outline your reasoning and the <tool> ..... </tool> field to specify the action you choose to take before you end a turn.
+2. Each tool call should be a JSON object with a "name" field and a "parameters" field containing a dictionary of parameters. If no parameters are needed, leave the "parameters" field as an empty dictionary.
+3. Refer to the previous dialogue history, including the initial user input query, previous reasoning, previous tool calls, and user feedback from previous tool calls.
+4. Do not wrap your entire output in a single large JSON object.
+5. Do not try to output multiple rounds of tool calls in a single turn. Stop immediately after you call one tool.
+6. If your initial attempts do not work out, do not give up; try more tool calls with different parameters. Take as long as you need!
+
+
+
+Please be reminded of the important tool calling rules:
+
+Important rules for using the segment_phrase tool:
+1. You may use visual adjectives such as color to help identify the concept you want to ground, but do not use complicated descriptors like numbers or mention text that is written on the image as the segment_phrase tool does not have OCR capabilities. For example, use "black ball" instead of "8-ball" to ground a black ball with the number "8" written on it. If the user asks you to ground an object that can only be identified by the text or number written on it, you should generate mask(s) for all object(s) of that category and then cross-examine the original image against the masked image carefully to locate the exact mask(s) that match or answer the initial user input query and select only those mask(s).
+2. Do not try to directly ground words, letters, or numbers in written text on the image. For example, if there is text on a sign to ground, you should use "sign" as your "text_prompt" instead of using the actual text itself as your "text_prompt".
+3. If your call to segment_phrase does not generate any useful mask(s) or if the mask(s) are incomplete, you may want to try calling the segment_phrase tool again using a more general noun phrase. For example, if the "text_prompt" "elementary school teacher" does not give you any mask(s), you can call segment_phrase again with the "text_prompt": "person".
+4. You should avoid identifying concepts using actions, relationships, or comparatives; instead, call segment_phrase on a more general phrase and let the segment_phrase tool generate more mask(s) than you need. Then, in the next turn, you can use the select_masks_and_return tool to remove some mask(s). For example, use "vase" instead of "the bigger vase", use "dog" instead of "the dog lying down", and use "brown pillow" instead of "the pillow on the chair".
+5. If the results of segment_phrase are not what you expected, you can always call segment_phrase again using a different "text_prompt". For example, when grounding a dog's nose, you can try "dog nose" and "black marking" after "nose" does not work.
+6. Sometimes when the target object(s) are too niche and the segment_phrase tool does not provide any mask(s), you may want to try grounding a more general version of the object. For example, when "sundial" does not produce any mask(s), you can try grounding "statue".
+7. Be concise and get the right keywords; don't make your "text_prompt" long.
+8. Do not ever use the exact same "text_prompt" more than once. This is very important!
+9. Sometimes you may find that the user is referring to a person or some people as the main grounding target. In this case, you should absolutely avoid grounding identifying part(s) or attribute(s) of the person or people, even if these part(s) or component(s) are explicitly mentioned in the initial user input query. Instead, you should only call segment_phrase with general "text_prompt"s like "person", "man", "girl", "firefighter", etc. that refer to the person as a whole. Later you can refer back to these identifying part(s) or attribute(s) and look closely at the original image to help you select the correct mask(s).
+10. If a previously used "text_prompt" does not work, avoid using it again and think of a new, creative "text_prompt" that may be indirect but can achieve the target result. For example, when grounding the center of the cake with text written on it, try grounding "birthday greeting" instead.
+11. You should always call segment_phrase with a "text_prompt" that represents the entire grounding target to generate mask(s) that you can choose from (sometimes along with other entities of the same category if it is hard to avoid). Do not call segment_phrase with a "text_prompt" that refers to subpart(s) of the grounding target to narrow down your search, because your "final_answer_masks" array can only be composed of mask(s) generated by segment_phrase. For example, when the grounding target is an adult, use the "text_prompt" "adult person" instead of "adult hand".
+12. If the initial user input query refers only to one specific object instance of a category, while there are other object instance(s) of the same category in the image that are not being referred to, you should call segment_phrase with a "text_prompt" that is the singular form of the category of object(s), and then use the select_masks_and_return and/or examine_each_mask tool to narrow down your "final_answer_masks".
+13. Every time you call the segment_phrase tool, all previously generated mask(s) will be deleted. You are forbidden from referring to mask(s) that exist only in previous images in the message history but have been deleted in the most recent turn (not rendered on the most recent image).
+14. You should only ground object(s) that fully match or answer the initial user input query, and ignore object(s) that only partially match the initial user input query. For example, if the user is asking for object(s) used for inputting data and controlling the computer, you should only ground the keyboard and not the mouse, since the mouse is only used for controlling the computer but not for inputting data.
+15. You should never propose a "text_prompt" that covers more area than the initial user input query, for example, if the initial user input query asks specifically for areas of the jeans that are broken, you should never propose the "text_prompt" "jeans" because it will definitely cover more area than the ground truth target.
+16. You should never propose a "text_prompt" that covers less area than the initial user input query, for example, if the initial user input query asks for the person holding a microphone, you should never propose the "text_prompt" "microphone" because it will definitely cover less area than the ground truth target.
+17. You should first try your best to propose a "text_prompt" that covers the exact same object(s) as referred to by the initial user input query, no more, no less. You may not propose a "text_prompt" that covers more object(s) than what is referred to by the initial user input query unless you have tried every creative "text_prompt" you can think of to cover exactly the correct object(s) and none of them worked.
+18. Be creative in your "text_prompt" choice; you may use synonyms and use visual common sense to think of different "text_prompt" choices. You have unlimited turns to call each tool, so take your time!
+
+Important rules for using the examine_each_mask tool:
+1. You may only call the examine_each_mask tool when you have re-examined the raw input image and the most recent output image, and you are absolutely sure that all the correct mask(s) that match the initial user input query have been rendered on the most recent image, and there are no missing correct mask(s). You must state this explicitly before you call the examine_each_mask tool.
+2. Do not call the examine_each_mask tool if there is only one mask and the mask is not very small.
+3. Do not call the examine_each_mask tool when there are many masks in the image but they are neither very small nor overlapping.
+4. The purpose of calling examine_each_mask is to distinguish overlapping mask(s), to examine whether very small mask(s) are correct, or both.
+5. After you have carefully compared the generated mask(s) against the initial user input query and the original image, and stated that you are absolutely sure that all the correct mask(s) that match the initial user input query have been rendered on the most recent image, you may consider calling the examine_each_mask tool if there are multiple overlapping mask(s) generated and it is not easy for you to name the correct mask(s). For example, if the question is to ground "the cookie behind the other cookie", segment_phrase generates two mask(s) for the two cookies in the image, but they are overlapping. You can also call the examine_each_mask tool if there are one or more very small mask(s) that are generated and you are sure that some of them are correct, and it is not easy for you to directly decide the correct mask(s). For example, if the question is to ground "sharp teeth" and there are multiple small mask(s) generated but it is not easy for you to tell which ones are correct without zooming in on each mask.
+6. Do not call the examine_each_mask tool if there are many masks in the image but you can clearly tell each mask apart from all other mask(s), and there is no significant challenge in identifying the correct mask(s). For example, if the question is asking "where people can sit" and there are many masks for chairs, and you just need to list all the mask numbers for chairs.
+7. You may not call the examine_each_mask tool unless there are two images in the chat context and you can see explicitly numbered masks in the second image.
+
+Important rules for using the select_masks_and_return tool:
+1. Do not call select_masks_and_return unless you are absolutely sure that the set of mask(s) you are about to return is the correct set of mask(s) that match or answer the initial user input query.
+2. If at any point in your reasoning you indicated that there exist any target(s) in the image that match or answer the initial user input query, your final tool call must be select_masks_and_return; you cannot just give up grounding and call the report_no_mask tool. This is very important.
+3. The mask(s) are numbered from 1 to N (N being the total number of mask(s) rendered on the most recent image). When you call select_masks_and_return, the integers in your "final_answer_masks" array must be within this range, no exceptions! Make sure of this!
+4. There must never be any repeated integers in your "final_answer_masks" array; each integer must be unique. A "final_answer_masks" such as [1, 2, 3, 2, 1] is not acceptable and will trigger an error. You should avoid this format error at all costs.
+5. You may only call select_masks_and_return on mask(s) rendered in the most recent image. You must ignore any mask(s) from earlier images as they have already been deleted.
+6. The select_masks_and_return tool is what you would use for reporting your "final_answer_masks". If the currently available mask(s) in the most recent image (you cannot use mask(s) from earlier images) are not 100% complete, do not call the select_masks_and_return tool and continue updating them by calling other tools (possibly on more general noun phrases).
+7. Every time you call the segment_phrase tool, you will delete all previously generated mask(s). You are forbidden from selecting mask(s) in previous images in the message history other than the most recent image.
+8. Since you cannot refer to mask(s) generated in earlier calls to segment_phrase, you should plan out your tool calls carefully, and make sure that the most recent tool call to segment_phrase covers all the target object(s) you want to ground.
+9. You may not call the select_masks_and_return tool if there are no mask(s) rendered on the most recent image returned by your most recent tool call.
+10. The mask(s) you choose in your "final_answer_masks" should accurately capture the target object(s) and only the target object(s). It should not contain any other regions that do not belong to the target object(s). Nor should it contain only a part of the target object(s). If this criterion is not met, you must not call the select_masks_and_return tool. Instead, please continue using other tools to generate better mask(s).
+11. Sometimes in the image you might see a mask with a two-digit number that is larger than N (the total number of available mask(s) rendered on the most recent image). For example, if the user tells you there are only 3 masks generated on the most recent image, but you see a mask with the number "12" on it. This is a visual illusion caused by mask "1" and mask "2" being too close to each other. In this case, you should never refer to mask "12" as it does not exist. Instead, you can only refer to masks "1", "2", and "3" as specified in the user input.
+12. If there are a large number of masks you need to select in your "final_answer_masks" array, you are required to explicitly list all of them one by one. You may not use any form of abbreviation or code. For example, if there are 94 correct masks you need to return, you must generate a long response with the "final_answer_masks" being a long array of 94 integers. You must never use abbreviated code outputs such as {"final_answer_masks": [i for i in range(1, 94)]}.
+13. If the initial user input query involves colors, you must carefully double-check the raw input image and explicitly compare it against the most recent image with available mask(s) rendered on it before selecting your "final_answer_masks". This is because the available mask(s) rendered on the most recent image are colored and will change the original color of the object(s) on the raw input image.
+14. Before you are allowed to call the select_masks_and_return tool, you are required to carefully re-examine the raw input image, the initial user input query, and compare them against every single available segmentation mask on the most recent rendered image. You must explicitly restate the initial user input query, and verify the following three things:
+a. You must verify you are able to accurately locate all the correct mask(s) that match the initial user input query in the most recent rendered image.
+b. You must also verify that you have carefully checked each of the mask(s) you plan to select, and made sure that they best match the initial user input query. (list your reasoning for each mask)
+c. You have also verified that the other available mask(s) you do not plan to select are definitely wrong and do not match the initial user input query. (list your reasoning for each mask)
+15. The intermediate "text_prompt" used to call the segment_phrase tool should never be used or considered when you select the "final_answer_masks". Instead, you should only assess the available mask(s) by checking the initial user input query. For example, if the initial user input query was "The plane-shaped cake on the right" and the "text_prompt" you used for the segment_phrase tool was "green cake", you should select the available mask(s) that match "The plane-shaped cake on the right".
+16. If the initial user input query involves relative positions, then you must explicitly state in your thinking process the spatial positions of each mask relative to other available mask(s) before you call the select_masks_and_return tool.
+17. You may not select any mask(s) whose number is greater than 100. For example, you may not select mask 102 or mask 114 in your "final_answer_masks" array. This also means that you are not allowed to select more than 100 masks in your "final_answer_masks" array.
+18. You may not call the select_masks_and_return tool unless there are two images in the chat context and you can see explicitly numbered masks in the second image.
+
+Important rules for using the report_no_mask tool:
+1. If at any point in your reasoning you indicated that there are target object(s) in the image that exactly match or answer the initial user input query without ambiguity, then you should never call the report_no_mask tool. Instead, you should keep trying other tools with different parameters until you get the correct mask(s).
+2. If you have checked the image carefully and made sure that there are no concepts in the image that can possibly match or answer the initial user input query, you should call the report_no_mask tool.
+3. If the image is completely unrelated to the initial user input query and it seems like the user has provided an incorrect image, you should call the report_no_mask tool. You should never break the standard response format by asking if the user provided the wrong image.
+4. Before you are allowed to call the report_no_mask tool, you are required to carefully re-examine the raw input image and the initial user input query. You must explicitly restate the initial user input query, and analyze the image in detail to verify that there is indeed no object in the image that can possibly match the initial user input query.
+5. Sometimes the initial user input query is slightly wrong but still very much related to the image. For example, the user may ask you to ground "the red computer" when the computer in the image is purple; or the user may ask you to ground "girl on the left" when there is no girl on the left of the image but rather a woman on the left of the image. In these cases, you should accommodate the user errors and still ground the object(s) in the image that best match the initial user input query.
+6. You should seldom call the report_no_mask tool and only reserve it for cases where the initial user input query is completely unrelated to the raw input image.
+7. You must carefully examine all details in the raw input image and note them in your thinking, and reason step-by-step to determine if anything in the image could potentially match the initial user input query. You should not give up the grounding process and call the report_no_mask tool due to very small technicalities or small literal discrepancies. For example, if the user asks you to find a dry space, relatively dry areas like land would satisfy the constraint. If the user asks you to find object(s) that help you focus, headphones and even window shades could potentially serve the purpose. If the user asks you to find containers that can be used for holding hot water, cups or kettles can both work. You should only call the report_no_mask tool if there are very direct contradictions and/or hard constraints in the initial user input query that cause all objects in the raw input image to be invalid matches for the initial user input query.
+
+
+Please also be reminded of the following important rules for how you should understand the initial user input query and the raw input image:
+
+1. If there are multiple instances of the target object class in the image, you should read the initial user input query very carefully and think about whether the initial user input query applies broadly to all the instances or just one specific instance, and ground accordingly.
+2. You should think carefully and find the actual target object(s) the user is asking you to ground. Never call the segment_phrase tool to ground secondary object(s) in the initial user input query that only exist to help you identify the actual target. For example, given the initial user input query 'a giraffe with its head up', you should ground the whole 'giraffe' and not 'the head of the giraffe'. Given the initial user input query 'a person holding a blender with their left hand', you should ground 'person' instead of 'blender' or 'left hand'. Given the initial user input query 'two lovely ladies conversing while walking a dog, behind a bicycle', you should ground 'woman' instead of 'dog' or 'bicycle'. Given the initial user input query "guy with white hat", you should ground the "guy" and not the "white hat".
+3. Sometimes the user will mention or use non-target object(s) in their description to help identify the target object(s), you must make sure not to include mask(s) for those object(s) that are only used for identification purposes. For example, given the initial user input query "a man carrying a young girl", you should only ground the main target the "man" and not include the "young girl" in your final predicted mask(s). Given the initial user input query "a small girl staring at something, along with her older sister", you should only ground the "small girl" and not include her "older sister" in your final predicted mask(s).
+4. Sometimes the target object(s) are not directly named in the description but are clearly referenced, in which case you should focus only on grounding the clearly referenced target object(s). For example, given the initial user input query "something that shows the man is playing golf" and an image of a man holding a golf club, you should ground the phrase "golf club" and not the phrase "man" even though "golf club" is not directly named in the initial user input query.
+5. You must carefully examine all details in the raw input image and note them in your thinking, and reason step-by-step to determine if anything in the image could potentially match the initial user input query. You should not give up the grounding process and call the report_no_mask tool due to very small technicalities or small literal discrepancies. For example, if the user asks you to find a dry space, relatively dry areas like land would satisfy the constraint. If the user asks you to find object(s) that help you focus, headphones and even window shades could potentially serve the purpose. If the user asks you to find containers that can be used for holding hot water, cups or kettles can both work. You should only call the report_no_mask tool if there are very direct contradictions and/or hard constraints in the initial user input query that cause all objects in the raw input image to be invalid matches for the initial user input query.
+6. Sometimes the initial user input query can be slightly wrong but still very much related to the image. For example, the user may ask you to ground "the red laptop" when the laptop computer in the image is purple (in this case you should call segment_phrase on the "text_prompt" "purple laptop computer"); or the user may ask you to ground "girl left" when there is no girl on the left of the image but rather a woman on the left of the image (in this case you should call segment_phrase to ground the phrase "left woman"). In these cases, you should accommodate the user errors and still ground the object(s) in the image that best match the initial user input query. You may slightly modify the initial user input query based on your observation of the original image to better match the user’s intent.
+7. Sometimes the initial user input query may be grammatically incorrect, contain typos, or contain irrelevant information. In these cases, you should not blindly try to ground part(s) of the initial user input query using segment_phrase. Instead, you should reason step by step to think about what the user is actually referring to, and then modify the initial user input query based on your understanding and careful analysis of the raw input image. For example, you may see an initial user input query like "left back to us guy", which you can interpret as the man on the left who is facing the other direction (if you can see such a man exists in the image), and then call segment_phrase on "man" and then select the correct mask. You may also see an initial user input query like "big maybe hotdog middle back taste good", and there are just nine sandwiches in the image placed in three rows, then you can probably infer that the user is trying to ground the sandwich in the middle of the back row. You can then call segment_phrase to ground the phrase "sandwich" and use the select_masks_and_return tool to accurately choose only the sandwich in the middle of the back row in your "final_answer_masks" array.
+8. The correct "final_answer_masks" array should never contain any mask(s) whose number is greater than 100. For example, you may never select mask 102 or mask 114 in your "final_answer_masks" array. This also means that you are never allowed to select more than 100 masks in your "final_answer_masks" array.
+9. Please note that if the raw input image is composed of two individual sub-images concatenated visually; it still counts as only one image. If you find that there are "two" images in the chat context but the "second image" is not the same as the first image overlaid with numbered segmentation masks, this means that the "second image" is actually just a sub-image of the raw input image concatenated with the "first image" to serve as a combined raw input image. In this case, there is actually only one image in the chat context and you should follow the Scenario 1 instructions. This is very important!
+
+
+Begin!
+
+Below are the raw input image and the initial user input query:
diff --git a/sam3/agent/system_prompts/system_prompt_iterative_checking.txt b/sam3/agent/system_prompts/system_prompt_iterative_checking.txt
new file mode 100644
index 0000000000000000000000000000000000000000..f6f9b881dbf4390984f5c9d60a2a8cfd8e6520c8
--- /dev/null
+++ b/sam3/agent/system_prompts/system_prompt_iterative_checking.txt
@@ -0,0 +1,26 @@
+You are a helpful assistant specializing in detail-oriented visual understanding, reasoning, and classification, capable of carefully analyzing a predicted segmentation mask on an image along with zoomed-in views of the area around the predicted segmentation mask to determine whether the object covered by the predicted segmentation mask is one of the correct masks that match the user query.
+
+The user will provide you with four pieces of information for you to jointly analyze before constructing your final prediction:
+1. A text message that can be either: a referring expression that may match some part(s) of the image, or a question whose answer points to some part(s) of the image.
+2. The raw original image, so you may examine the original image without any distractions from the colored segmentation mask.
+3. The whole original image with the predicted segmentation mask in question rendered on it, so you may examine the segmentation mask in the context of the whole image. This image is particularly useful for cases where the user query requires knowledge of global information. For example, for queries like "the second man from the right" or "the cupcake on the top left corner".
+4. A zoomed-in version of the predicted segmentation mask in question. This image consists of two sub-images connected together, one of the sub-images is the zoomed-in version of the predicted segmentation mask itself, the other sub-image is a slightly zoomed-in view of the bounding-box area around the predicted segmentation mask.
+
+
+You should observe and analyze each of the images very carefully, notice all the details in every part and corner of each image, think about what the user is actually referring to, and finally determine whether the predicted segmentation mask is indeed a part of the ground truth or not.
+
+Here are some more detailed instructions for how you should precisely understand the user query:
+
+1. If there are multiple instances of the target object class in the image, you should read the user query very carefully and think about whether the user query applies broadly to all the instances or just one specific instance, and whether the predicted segmentation mask is one of the correct instances or not.
+2. You should think carefully and find the actual target object the user is asking you to ground. Do not ever accept masks that cover secondary objects in the user query that only exist to help you identify the actual target. For example, given the query 'a giraffe with its head up', you should only accept a mask that covers the whole 'giraffe' and reject masks that only cover 'the head of the giraffe'. Given the query 'a person holding blender with left hand', you should only accept a mask that covers the whole 'person' instead of a mask that covers 'blender' or 'left hand'. Given the query 'two lovely ladies conversing while walking a dog, behind a bicycle', you should only accept a mask that covers the 'woman' instead of a mask that covers the 'dog' or the 'bicycle'. Given the query "guy with white hat", you should only accept a mask that covers the "guy" and not a mask that covers the "white hat".
+3. Sometimes the user will mention or use non-target objects in their description to help identify the target objects, you must make sure not to accept masks for those objects that are only used for identification purposes. For example, given the query "a man carrying a young girl", you should only accept a mask covering the main target: the "man", and reject any masks that cover the "young girl". Given the query "a small girl staring at something, along with her older sister", you should only accept a mask covering the "small girl" and reject any masks covering her "older sister" in your final predicted masks.
+4. Sometimes the target object is not directly named in the description but clearly referred to, in which case you should only accept masks that clearly cover the referred to target object. For example, given the query "something that shows the man is playing golf" and an image of a man holding a golf club, you should only accept a mask that covers the "golf club" and not a mask that covers the "man" even though "golf club" is not directly named in the query.
+5. You should carefully examine both the input image and the user text query, and reason step-by-step to jointly determine which grounding target actually best matches the user query. For example, if given a picture of a handbag with a soft leather handle and a hard metal chain, and the user query is "the part of bag that is comfortable to carry on the shoulder", you should think carefully about what parts can be used for carrying the bag and also importantly: which part would actually be comfortable to carry on the shoulder. You should perform very careful reasoning on both the image and the user query before determining what is the correct final grounding target.
+
+
+Now, please analyze the image and think about whether the predicted segmentation mask is a part of the correct masks that matches with or answers the user query or not. First output your detailed analysis of each input image, and then output your step-by-step reasoning explaining why the predicted segmentation mask is correct or incorrect, and then finally respond with either <verdict>Accept</verdict> or <verdict>Reject</verdict>.
+
+Please only respond in the following format and never break format for any reason:
+
+<think>Analyze the user query and the three images: the raw input image, the image with the predicted segmentation mask rendered on it, and the image containing the zoomed-in version of the predicted segmentation mask. Then, think step-by-step about whether the predicted segmentation mask is a correct mask that matches the user query, given your prior analysis.</think>
+<verdict>Accept</verdict> or <verdict>Reject</verdict>
diff --git a/sam3/agent/viz.py b/sam3/agent/viz.py
new file mode 100644
index 0000000000000000000000000000000000000000..286d823718d83489136356a34f27c64a49bdaf37
--- /dev/null
+++ b/sam3/agent/viz.py
@@ -0,0 +1,114 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import cv2
+import numpy as np
+import pycocotools.mask as mask_utils
+from PIL import Image
+
+from .helpers.visualizer import Visualizer
+from .helpers.zoom_in import render_zoom_in
+
+
+def visualize(
+    input_json: dict,
+    zoom_in_index: int | None = None,
+    mask_alpha: float = 0.15,
+    label_mode: str = "1",
+    font_size_multiplier: float = 1.2,
+    boarder_width_multiplier: float = 0,
+):
+    """
+    Unified visualization function.
+
+    If zoom_in_index is None:
+        - Render all masks in input_json (equivalent to visualize_masks_from_result_json).
+        - Returns: PIL.Image
+
+    If zoom_in_index is provided:
+        - Returns two PIL.Images:
+            1) Output identical to zoom_in_and_visualize(input_json, index).
+            2) The same instance rendered via the general overlay using the color
+               returned by (1), equivalent to calling visualize_masks_from_result_json
+               on a single-mask json_i with color=color_hex.
+    """
+    # Common fields
+    orig_h = int(input_json["orig_img_h"])
+    orig_w = int(input_json["orig_img_w"])
+    img_path = input_json["original_image_path"]
+
+    # ---------- Mode A: Full-scene render ----------
+    if zoom_in_index is None:
+        boxes = np.array(input_json["pred_boxes"])
+        rle_masks = [
+            {"size": (orig_h, orig_w), "counts": rle}
+            for rle in input_json["pred_masks"]
+        ]
+        binary_masks = [mask_utils.decode(rle) for rle in rle_masks]
+
+        img_bgr = cv2.imread(img_path)
+        if img_bgr is None:
+            raise FileNotFoundError(f"Could not read image: {img_path}")
+        img_rgb = cv2.cvtColor(img_bgr, cv2.COLOR_BGR2RGB)
+
+        viz = Visualizer(
+            img_rgb,
+            font_size_multiplier=font_size_multiplier,
+            boarder_width_multiplier=boarder_width_multiplier,
+        )
+        viz.overlay_instances(
+            boxes=boxes,
+            masks=rle_masks,
+            binary_masks=binary_masks,
+            assigned_colors=None,
+            alpha=mask_alpha,
+            label_mode=label_mode,
+        )
+        pil_all_masks = Image.fromarray(viz.output.get_image())
+        return pil_all_masks
+
+    # ---------- Mode B: Zoom-in pair ----------
+    else:
+        idx = int(zoom_in_index)
+        num_masks = len(input_json.get("pred_masks", []))
+        if idx < 0 or idx >= num_masks:
+            raise ValueError(f"zoom_in_index {idx} is out of range (0..{num_masks-1}).")
+
+        # (1) Replicate zoom_in_and_visualize
+        object_data = {
+            "labels": [{"noun_phrase": f"mask_{idx}"}],
+            "segmentation": {
+                "counts": input_json["pred_masks"][idx],
+                "size": [orig_h, orig_w],
+            },
+        }
+        pil_img = Image.open(img_path)
+        pil_mask_i_zoomed, color_hex = render_zoom_in(
+            object_data, pil_img, mask_alpha=mask_alpha
+        )
+
+        # (2) Single-instance render with the same color
+        boxes_i = np.array([input_json["pred_boxes"][idx]])
+        rle_i = {"size": (orig_h, orig_w), "counts": input_json["pred_masks"][idx]}
+        bin_i = mask_utils.decode(rle_i)
+
+        img_bgr_i = cv2.imread(img_path)
+        if img_bgr_i is None:
+            raise FileNotFoundError(f"Could not read image: {img_path}")
+        img_rgb_i = cv2.cvtColor(img_bgr_i, cv2.COLOR_BGR2RGB)
+
+        viz_i = Visualizer(
+            img_rgb_i,
+            font_size_multiplier=font_size_multiplier,
+            boarder_width_multiplier=boarder_width_multiplier,
+        )
+        viz_i.overlay_instances(
+            boxes=boxes_i,
+            masks=[rle_i],
+            binary_masks=[bin_i],
+            assigned_colors=[color_hex],
+            alpha=mask_alpha,
+            label_mode=label_mode,
+        )
+        pil_mask_i = Image.fromarray(viz_i.output.get_image())
+
+        return pil_mask_i, pil_mask_i_zoomed
diff --git a/sam3/eval/__init__.py b/sam3/eval/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d37d2aaef52ec7de01999516a2b8c3e1fa4986
--- /dev/null
+++ b/sam3/eval/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
diff --git a/sam3/eval/cgf1_eval.py b/sam3/eval/cgf1_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..6a0d59f5ffb47eacc13d40c5b0d3e2ff0a8919df
--- /dev/null
+++ b/sam3/eval/cgf1_eval.py
@@ -0,0 +1,703 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import contextlib
+import copy
+import json
+import os
+import time
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import List, Union
+
+import numpy as np
+import pycocotools.mask as maskUtils
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from scipy.optimize import linear_sum_assignment
+from tqdm import tqdm
+
+
+@dataclass
+class Metric:
+    name: str
+
+    # whether the metric is computed at the image level or the box level
+    image_level: bool
+
+    # iou threshold (None is used for image level metrics or to indicate averaging over all thresholds in [0.5:0.95])
+    iou_threshold: Union[float, None]
+
+
+CGF1_METRICS = [
+    Metric(name="cgF1", image_level=False, iou_threshold=None),
+    Metric(name="precision", image_level=False, iou_threshold=None),
+    Metric(name="recall", image_level=False, iou_threshold=None),
+    Metric(name="F1", image_level=False, iou_threshold=None),
+    Metric(name="positive_macro_F1", image_level=False, iou_threshold=None),
+    Metric(name="positive_micro_F1", image_level=False, iou_threshold=None),
+    Metric(name="positive_micro_precision", image_level=False, iou_threshold=None),
+    Metric(name="IL_precision", image_level=True, iou_threshold=None),
+    Metric(name="IL_recall", image_level=True, iou_threshold=None),
+    Metric(name="IL_F1", image_level=True, iou_threshold=None),
+    Metric(name="IL_FPR", image_level=True, iou_threshold=None),
+    Metric(name="IL_MCC", image_level=True, iou_threshold=None),
+    Metric(name="cgF1", image_level=False, iou_threshold=0.5),
+    Metric(name="precision", image_level=False, iou_threshold=0.5),
+    Metric(name="recall", image_level=False, iou_threshold=0.5),
+    Metric(name="F1", image_level=False, iou_threshold=0.5),
+    Metric(name="positive_macro_F1", image_level=False, iou_threshold=0.5),
+    Metric(name="positive_micro_F1", image_level=False, iou_threshold=0.5),
+    Metric(name="positive_micro_precision", image_level=False, iou_threshold=0.5),
+    Metric(name="cgF1", image_level=False, iou_threshold=0.75),
+    Metric(name="precision", image_level=False, iou_threshold=0.75),
+    Metric(name="recall", image_level=False, iou_threshold=0.75),
+    Metric(name="F1", image_level=False, iou_threshold=0.75),
+    Metric(name="positive_macro_F1", image_level=False, iou_threshold=0.75),
+    Metric(name="positive_micro_F1", image_level=False, iou_threshold=0.75),
+    Metric(name="positive_micro_precision", image_level=False, iou_threshold=0.75),
+]
+
+
+class COCOCustom(COCO):
+    """COCO class from pycocotools with tiny modifications for speed"""
+
+    def createIndex(self):
+        # create index
+        print("creating index...")
+        anns, cats, imgs = {}, {}, {}
+        imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
+        if "annotations" in self.dataset:
+            for ann in self.dataset["annotations"]:
+                imgToAnns[ann["image_id"]].append(ann)
+                anns[ann["id"]] = ann
+
+        if "images" in self.dataset:
+            # MODIFICATION: do not reload imgs if they are already there
+            if self.imgs:
+                imgs = self.imgs
+            else:
+                for img in self.dataset["images"]:
+                    imgs[img["id"]] = img
+            # END MODIFICATION
+
+        if "categories" in self.dataset:
+            for cat in self.dataset["categories"]:
+                cats[cat["id"]] = cat
+
+        if "annotations" in self.dataset and "categories" in self.dataset:
+            for ann in self.dataset["annotations"]:
+                catToImgs[ann["category_id"]].append(ann["image_id"])
+
+        print("index created!")
+
+        # create class members
+        self.anns = anns
+        self.imgToAnns = imgToAnns
+        self.catToImgs = catToImgs
+        self.imgs = imgs
+        self.cats = cats
+
+    def loadRes(self, resFile):
+        """
+        Load result file and return a result api object.
+        :param   resFile (str)     : file name of result file
+        :return: res (obj)         : result api object
+        """
+        res = COCOCustom()
+        res.dataset["info"] = copy.deepcopy(self.dataset.get("info", {}))
+        # MODIFICATION: no copy
+        # res.dataset['images'] = [img for img in self.dataset['images']]
+        res.dataset["images"] = self.dataset["images"]
+        # END MODIFICATION
+
+        print("Loading and preparing results...")
+        tic = time.time()
+        if type(resFile) == str:
+            with open(resFile) as f:
+                anns = json.load(f)
+        elif type(resFile) == np.ndarray:
+            anns = self.loadNumpyAnnotations(resFile)
+        else:
+            anns = resFile
+        assert type(anns) == list, "results in not an array of objects"
+        annsImgIds = [ann["image_id"] for ann in anns]
+        # MODIFICATION: faster and cached subset check
+        if not hasattr(self, "img_id_set"):
+            self.img_id_set = set(self.getImgIds())
+        assert set(annsImgIds).issubset(
+            self.img_id_set
+        ), "Results do not correspond to current coco set"
+        # END MODIFICATION
+        if "caption" in anns[0]:
+            imgIds = set([img["id"] for img in res.dataset["images"]]) & set(
+                [ann["image_id"] for ann in anns]
+            )
+            res.dataset["images"] = [
+                img for img in res.dataset["images"] if img["id"] in imgIds
+            ]
+            for id, ann in enumerate(anns):
+                ann["id"] = id + 1
+        elif "bbox" in anns[0] and not anns[0]["bbox"] == []:
+            res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+            for id, ann in enumerate(anns):
+                bb = ann["bbox"]
+                x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
+                if not "segmentation" in ann:
+                    ann["segmentation"] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
+                ann["area"] = bb[2] * bb[3]
+                ann["id"] = id + 1
+                ann["iscrowd"] = 0
+        elif "segmentation" in anns[0]:
+            res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+            for id, ann in enumerate(anns):
+                # now only support compressed RLE format as segmentation results
+                ann["area"] = maskUtils.area(ann["segmentation"])
+                if not "bbox" in ann:
+                    ann["bbox"] = maskUtils.toBbox(ann["segmentation"])
+                ann["id"] = id + 1
+                ann["iscrowd"] = 0
+        elif "keypoints" in anns[0]:
+            res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+            for id, ann in enumerate(anns):
+                s = ann["keypoints"]
+                x = s[0::3]
+                y = s[1::3]
+                x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y)
+                ann["area"] = (x1 - x0) * (y1 - y0)
+                ann["id"] = id + 1
+                ann["bbox"] = [x0, y0, x1 - x0, y1 - y0]
+        print("DONE (t={:0.2f}s)".format(time.time() - tic))
+
+        res.dataset["annotations"] = anns
+        # MODIFICATION: inherit images
+        res.imgs = self.imgs
+        # END MODIFICATION
+        res.createIndex()
+        return res
+
+
+class CGF1Eval(COCOeval):
+    """
+    This evaluator is based upon COCO evaluation, but evaluates the model in a more realistic setting
+    for downstream applications.
+    See SAM3 paper for the details on the CGF1 metric.
+
+    Do not use this evaluator directly. Prefer the CGF1Evaluator wrapper.
+
+    Notes:
+     - This evaluator does not support per-category evaluation (in the way defined by pyCocotools)
+     - In open vocabulary settings, we have different noun-phrases for each image. What we call an "image_id" here is actually an (image, noun-phrase) pair. So in every "image_id" there is only one category, implied by the noun-phrase. Thus we can ignore the usual coco "category" field of the predictions
+    """
+
+    def __init__(
+        self,
+        coco_gt=None,
+        coco_dt=None,
+        iouType="segm",
+        threshold=0.5,
+    ):
+        """
+        Args:
+            coco_gt (COCO): ground truth COCO API
+            coco_dt (COCO): detections COCO API
+            iou_type (str): type of IoU to evaluate
+            threshold (float): threshold for predictions
+        """
+        super().__init__(coco_gt, coco_dt, iouType)
+        self.threshold = threshold
+
+        self.params.useCats = False
+        self.params.areaRng = [[0**2, 1e5**2]]
+        self.params.areaRngLbl = ["all"]
+        self.params.maxDets = [1000000]
+
+    def computeIoU(self, imgId, catId):
+        # Same as the original COCOeval.computeIoU, but without sorting
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId, catId]
+            dt = self._dts[imgId, catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+        if len(gt) == 0 and len(dt) == 0:
+            return []
+
+        if p.iouType == "segm":
+            g = [g["segmentation"] for g in gt]
+            d = [d["segmentation"] for d in dt]
+        elif p.iouType == "bbox":
+            g = [g["bbox"] for g in gt]
+            d = [d["bbox"] for d in dt]
+        else:
+            raise Exception("unknown iouType for iou computation")
+
+        # compute iou between each dt and gt region
+        iscrowd = [int(o["iscrowd"]) for o in gt]
+        ious = maskUtils.iou(d, g, iscrowd)
+        return ious
+
+    def evaluateImg(self, imgId, catId, aRng, maxDet):
+        """
+        perform evaluation for single category and image
+        :return: dict (single image results)
+        """
+        p = self.params
+        assert not p.useCats, "This evaluator does not support per-category evaluation."
+        assert catId == -1
+        all_gts = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+        keep_gt = np.array([not g["ignore"] for g in all_gts], dtype=bool)
+        gt = [g for g in all_gts if not g["ignore"]]
+        all_dts = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+        keep_dt = np.array([d["score"] >= self.threshold for d in all_dts], dtype=bool)
+        dt = [d for d in all_dts if d["score"] >= self.threshold]
+        if len(gt) == 0 and len(dt) == 0:
+            # This is a "true negative" case, where there are no GTs and no predictions
+            # The box-level metrics are ill-defined, so we don't add them to this dict
+            return {
+                "image_id": imgId,
+                "IL_TP": 0,
+                "IL_TN": 1,
+                "IL_FP": 0,
+                "IL_FN": 0,
+                "num_dt": len(dt),
+            }
+
+        if len(gt) > 0 and len(dt) == 0:
+            # This is a "false negative" case, where there are GTs but no predictions
+            return {
+                "image_id": imgId,
+                "IL_TP": 0,
+                "IL_TN": 0,
+                "IL_FP": 0,
+                "IL_FN": 1,
+                "TPs": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "FPs": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "FNs": np.ones((len(p.iouThrs),), dtype=np.int64) * len(gt),
+                "local_F1s": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "local_positive_F1s": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "num_dt": len(dt),
+            }
+
+        # Load pre-computed ious
+        ious = self.ious[(imgId, catId)]
+
+        # compute matching
+        if len(ious) == 0:
+            ious = np.zeros((len(dt), len(gt)))
+        else:
+            ious = ious[keep_dt, :][:, keep_gt]
+        assert ious.shape == (len(dt), len(gt))
+
+        matched_dt, matched_gt = linear_sum_assignment(-ious)
+
+        match_scores = ious[matched_dt, matched_gt]
+
+        TPs, FPs, FNs = [], [], []
+        IL_perfect = []
+        for thresh in p.iouThrs:
+            TP = (match_scores >= thresh).sum()
+            FP = len(dt) - TP
+            FN = len(gt) - TP
+            assert (
+                FP >= 0 and FN >= 0
+            ), f"FP: {FP}, FN: {FN}, TP: {TP}, match_scores: {match_scores}, len(dt): {len(dt)}, len(gt): {len(gt)}, ious: {ious}"
+            TPs.append(TP)
+            FPs.append(FP)
+            FNs.append(FN)
+
+            if FP == FN and FP == 0:
+                IL_perfect.append(1)
+            else:
+                IL_perfect.append(0)
+
+        TPs = np.array(TPs, dtype=np.int64)
+        FPs = np.array(FPs, dtype=np.int64)
+        FNs = np.array(FNs, dtype=np.int64)
+        IL_perfect = np.array(IL_perfect, dtype=np.int64)
+
+        # compute precision recall and F1
+        precision = TPs / (TPs + FPs + 1e-4)
+        assert np.all(precision <= 1)
+        recall = TPs / (TPs + FNs + 1e-4)
+        assert np.all(recall <= 1)
+        F1 = 2 * precision * recall / (precision + recall + 1e-4)
+
+        result = {
+            "image_id": imgId,
+            "TPs": TPs,
+            "FPs": FPs,
+            "FNs": FNs,
+            "local_F1s": F1,
+            "IL_TP": (len(gt) > 0) and (len(dt) > 0),
+            "IL_FP": (len(gt) == 0) and (len(dt) > 0),
+            "IL_TN": (len(gt) == 0) and (len(dt) == 0),
+            "IL_FN": (len(gt) > 0) and (len(dt) == 0),
+            "num_dt": len(dt),
+        }
+        if len(gt) > 0 and len(dt) > 0:
+            result["local_positive_F1s"] = F1
+        return result
+
+    def accumulate(self, p=None):
+        """
+        Accumulate per image evaluation results and store the result in self.eval
+        :param p: input params for evaluation
+        :return: None
+        """
+        if self.evalImgs is None or len(self.evalImgs) == 0:
+            print("Please run evaluate() first")
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+
+        setImgIds = set(p.imgIds)
+
+        # TPs, FPs, FNs
+        TPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        FPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        pmFPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        FNs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        local_F1s = np.zeros((len(p.iouThrs),), dtype=np.float64)
+
+        # Image level metrics
+        IL_TPs = 0
+        IL_FPs = 0
+        IL_TNs = 0
+        IL_FNs = 0
+
+        valid_img_count = 0
+        valid_F1_count = 0
+        evaledImgIds = set()
+        for res in self.evalImgs:
+            if res["image_id"] not in setImgIds:
+                continue
+            evaledImgIds.add(res["image_id"])
+            IL_TPs += res["IL_TP"]
+            IL_FPs += res["IL_FP"]
+            IL_TNs += res["IL_TN"]
+            IL_FNs += res["IL_FN"]
+
+            if "TPs" not in res:
+                continue
+
+            TPs += res["TPs"]
+            FPs += res["FPs"]
+            FNs += res["FNs"]
+            valid_img_count += 1
+
+            if "local_positive_F1s" in res:
+                local_F1s += res["local_positive_F1s"]
+                pmFPs += res["FPs"]
+                if res["num_dt"] > 0:
+                    valid_F1_count += 1
+
+        assert len(setImgIds - evaledImgIds) == 0, (
+            f"{len(setImgIds - evaledImgIds)} images not evaluated. "
+            f"Here are the IDs of the first 3: {list(setImgIds - evaledImgIds)[:3]}"
+        )
+
+        # compute precision recall and F1
+        precision = TPs / (TPs + FPs + 1e-4)
+        positive_micro_precision = TPs / (TPs + pmFPs + 1e-4)
+        assert np.all(precision <= 1)
+        recall = TPs / (TPs + FNs + 1e-4)
+        assert np.all(recall <= 1)
+        F1 = 2 * precision * recall / (precision + recall + 1e-4)
+        positive_micro_F1 = (
+            2
+            * positive_micro_precision
+            * recall
+            / (positive_micro_precision + recall + 1e-4)
+        )
+
+        IL_rec = IL_TPs / (IL_TPs + IL_FNs + 1e-6)
+        IL_prec = IL_TPs / (IL_TPs + IL_FPs + 1e-6)
+        IL_F1 = 2 * IL_prec * IL_rec / (IL_prec + IL_rec + 1e-6)
+        IL_FPR = IL_FPs / (IL_FPs + IL_TNs + 1e-6)
+        IL_MCC = float(IL_TPs * IL_TNs - IL_FPs * IL_FNs) / (
+            (
+                float(IL_TPs + IL_FPs)
+                * float(IL_TPs + IL_FNs)
+                * float(IL_TNs + IL_FPs)
+                * float(IL_TNs + IL_FNs)
+            )
+            ** 0.5
+            + 1e-6
+        )
+
+        self.eval = {
+            "params": p,
+            "TPs": TPs,
+            "FPs": FPs,
+            "positive_micro_FPs": pmFPs,
+            "FNs": FNs,
+            "precision": precision,
+            "positive_micro_precision": positive_micro_precision,
+            "recall": recall,
+            "F1": F1,
+            "positive_micro_F1": positive_micro_F1,
+            "positive_macro_F1": local_F1s / valid_F1_count,
+            "IL_recall": IL_rec,
+            "IL_precision": IL_prec,
+            "IL_F1": IL_F1,
+            "IL_FPR": IL_FPR,
+            "IL_MCC": IL_MCC,
+        }
+        self.eval["cgF1"] = self.eval["positive_micro_F1"] * self.eval["IL_MCC"]
+
+    def summarize(self):
+        """
+        Compute and display summary metrics for evaluation results.
+        """
+        if not self.eval:
+            raise Exception("Please run accumulate() first")
+
+        def _summarize(iouThr=None, metric=""):
+            p = self.params
+            iStr = " {:<18} @[ IoU={:<9}] = {:0.3f}"
+            titleStr = "Average " + metric
+            iouStr = (
+                "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
+                if iouThr is None
+                else "{:0.2f}".format(iouThr)
+            )
+
+            s = self.eval[metric]
+            # IoU
+            if iouThr is not None:
+                t = np.where(iouThr == p.iouThrs)[0]
+                s = s[t]
+
+            if len(s[s > -1]) == 0:
+                mean_s = -1
+            else:
+                mean_s = np.mean(s[s > -1])
+            print(iStr.format(titleStr, iouStr, mean_s))
+            return mean_s
+
+        def _summarize_single(metric=""):
+            titleStr = "Average " + metric
+            iStr = " {:<35} = {:0.3f}"
+            s = self.eval[metric]
+            print(iStr.format(titleStr, s))
+            return s
+
+        def _summarizeDets():
+            stats = []
+
+            for metric in CGF1_METRICS:
+                if metric.image_level:
+                    stats.append(_summarize_single(metric=metric.name))
+                else:
+                    stats.append(
+                        _summarize(iouThr=metric.iou_threshold, metric=metric.name)
+                    )
+            return np.asarray(stats)
+
+        summarize = _summarizeDets
+        self.stats = summarize()
+
+
+def _evaluate(self):
+    """
+    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
+    """
+    p = self.params
+    # add backward compatibility if useSegm is specified in params
+    p.imgIds = list(np.unique(p.imgIds))
+    p.useCats = False
+    p.maxDets = sorted(p.maxDets)
+    self.params = p
+
+    self._prepare()
+    # loop through images, area range, max detection number
+    catIds = [-1]
+
+    if p.iouType == "segm" or p.iouType == "bbox":
+        computeIoU = self.computeIoU
+    else:
+        raise RuntimeError(f"Unsupported iou {p.iouType}")
+    self.ious = {
+        (imgId, catId): computeIoU(imgId, catId)
+        for imgId in p.imgIds
+        for catId in catIds
+    }
+
+    maxDet = p.maxDets[-1]
+    evalImgs = [
+        self.evaluateImg(imgId, catId, areaRng, maxDet)
+        for catId in catIds
+        for areaRng in p.areaRng
+        for imgId in p.imgIds
+    ]
+    # this is NOT in the pycocotools code, but could be done outside
+    evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
+    return p.imgIds, evalImgs
+
+
+class CGF1Evaluator:
+    """
+    Wrapper class for cgF1 evaluation.
+    This supports the oracle setting (when several ground-truths are available per image)
+    """
+
+    def __init__(
+        self,
+        gt_path: Union[str, List[str]],
+        iou_type="segm",
+        verbose=False,
+    ):
+        """
+        Args:
+            gt_path (str or list of str): path(s) to ground truth COCO json file(s)
+            iou_type (str): type of IoU to evaluate
+            threshold (float): threshold for predictions
+        """
+        self.gt_paths = gt_path if isinstance(gt_path, list) else [gt_path]
+        self.iou_type = iou_type
+
+        self.coco_gts = [COCOCustom(gt) for gt in self.gt_paths]
+
+        self.verbose = verbose
+
+        self.coco_evals = []
+        for i, coco_gt in enumerate(self.coco_gts):
+            self.coco_evals.append(
+                CGF1Eval(
+                    coco_gt=coco_gt,
+                    iouType=iou_type,
+                )
+            )
+            self.coco_evals[i].useCats = False
+
+        exclude_img_ids = set()
+        # exclude_img_ids are the ids that are not exhaustively annotated in any of the other gts
+        for coco_gt in self.coco_gts[1:]:
+            exclude_img_ids = exclude_img_ids.union(
+                {
+                    img["id"]
+                    for img in coco_gt.dataset["images"]
+                    if not img["is_instance_exhaustive"]
+                }
+            )
+        # we only eval on instance exhaustive queries
+        self.eval_img_ids = [
+            img["id"]
+            for img in self.coco_gts[0].dataset["images"]
+            if (img["is_instance_exhaustive"] and img["id"] not in exclude_img_ids)
+        ]
+
+    def evaluate(self, pred_file: str):
+        """
+        Evaluate the detections using cgF1 metric.
+
+        Args:
+            pred_file: path to the predictions COCO json file
+
+        """
+        assert len(self.coco_gts) > 0, "No ground truth provided for evaluation."
+        assert len(self.coco_gts) == len(
+            self.coco_evals
+        ), "Mismatch in number of ground truths and evaluators."
+
+        if self.verbose:
+            print(f"Loading predictions from {pred_file}")
+
+        with open(pred_file, "r") as f:
+            preds = json.load(f)
+
+        if self.verbose:
+            print(f"Loaded {len(preds)} predictions")
+
+        img2preds = defaultdict(list)
+        for pred in preds:
+            img2preds[pred["image_id"]].append(pred)
+
+        all_eval_imgs = []
+        for img_id in tqdm(self.eval_img_ids, disable=not self.verbose):
+            results = img2preds[img_id]
+            all_scorings = []
+            for cur_coco_gt, coco_eval in zip(self.coco_gts, self.coco_evals):
+                # suppress pycocotools prints
+                with open(os.devnull, "w") as devnull:
+                    with contextlib.redirect_stdout(devnull):
+                        coco_dt = (
+                            cur_coco_gt.loadRes(results) if results else COCOCustom()
+                        )
+
+                coco_eval.cocoDt = coco_dt
+                coco_eval.params.imgIds = [img_id]
+                coco_eval.params.useCats = False
+                img_ids, eval_imgs = _evaluate(coco_eval)
+                all_scorings.append(eval_imgs)
+            selected = self._select_best_scoring(all_scorings)
+            all_eval_imgs.append(selected)
+
+        # After this point, we have selected the best scoring per image among several ground truths
+        # we can now accumulate and summarize, using only the first coco_eval
+
+        self.coco_evals[0].evalImgs = list(
+            np.concatenate(all_eval_imgs, axis=2).flatten()
+        )
+        self.coco_evals[0].params.imgIds = self.eval_img_ids
+        self.coco_evals[0]._paramsEval = copy.deepcopy(self.coco_evals[0].params)
+
+        if self.verbose:
+            print(f"Accumulating results")
+        self.coco_evals[0].accumulate()
+        print("cgF1 metric, IoU type={}".format(self.iou_type))
+        self.coco_evals[0].summarize()
+        print()
+
+        out = {}
+        for i, value in enumerate(self.coco_evals[0].stats):
+            name = CGF1_METRICS[i].name
+            if CGF1_METRICS[i].iou_threshold is not None:
+                name = f"{name}@{CGF1_METRICS[i].iou_threshold}"
+            out[f"cgF1_eval_{self.iou_type}_{name}"] = float(value)
+
+        return out
+
+    @staticmethod
+    def _select_best_scoring(scorings):
+        # This function is used for "oracle" type evaluation.
+        # It accepts the evaluation results with respect to several ground truths, and picks the best
+        if len(scorings) == 1:
+            return scorings[0]
+
+        assert (
+            scorings[0].ndim == 3
+        ), f"Expecting results in [numCats, numAreas, numImgs] format, got {scorings[0].shape}"
+        assert (
+            scorings[0].shape[0] == 1
+        ), f"Expecting a single category, got {scorings[0].shape[0]}"
+
+        for scoring in scorings:
+            assert (
+                scoring.shape == scorings[0].shape
+            ), f"Shape mismatch: {scoring.shape}, {scorings[0].shape}"
+
+        selected_imgs = []
+        for img_id in range(scorings[0].shape[-1]):
+            best = scorings[0][:, :, img_id]
+
+            for scoring in scorings[1:]:
+                current = scoring[:, :, img_id]
+                if "local_F1s" in best[0, 0] and "local_F1s" in current[0, 0]:
+                    # we were able to compute a F1 score for this particular image in both evaluations
+                    # best["local_F1s"] contains the results at various IoU thresholds. We simply take the average for comparision
+                    best_score = best[0, 0]["local_F1s"].mean()
+                    current_score = current[0, 0]["local_F1s"].mean()
+                    if current_score > best_score:
+                        best = current
+
+                else:
+                    # If we're here, it means that in that in some evaluation we were not able to get a valid local F1
+                    # This happens when both the predictions and targets are empty. In that case, we can assume it's a perfect prediction
+                    if "local_F1s" not in current[0, 0]:
+                        best = current
+            selected_imgs.append(best)
+        result = np.stack(selected_imgs, axis=-1)
+        assert result.shape == scorings[0].shape
+        return result
diff --git a/sam3/eval/coco_eval.py b/sam3/eval/coco_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..167f8ef770d2009c2aee0352822528c09a1a9cad
--- /dev/null
+++ b/sam3/eval/coco_eval.py
@@ -0,0 +1,916 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+COCO evaluator that works in distributed mode.
+
+Mostly copy-paste from https://github.com/pytorch/vision/blob/edfd5a7/references/detection/coco_eval.py
+The difference is that there is less copy-pasting from pycocotools
+in the end of the file, as python3 can suppress prints with contextlib
+"""
+
+import contextlib
+import copy
+import json
+import logging
+import os
+import pickle
+from collections import defaultdict
+from pathlib import Path
+
+from typing import Any, List, Optional
+
+import numpy as np
+
+import pycocotools.mask as mask_utils
+import torch
+from iopath.common.file_io import g_pathmgr
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+
+from sam3.train.masks_ops import rle_encode
+
+from sam3.train.utils.distributed import (
+    all_gather,
+    gather_to_rank_0_via_filesys,
+    get_rank,
+    is_main_process,
+)
+
+RARITY_BUCKETS = {0: "frequent", 1: "common", 2: "medium", 3: "rare"}
+
+
+class CocoEvaluator:
+    def __init__(
+        self,
+        coco_gt,
+        iou_types: List[str],
+        useCats: bool,
+        dump_dir: Optional[str],
+        postprocessor,
+        average_by_rarity=False,
+        metrics_dump_dir: Optional[str] = None,
+        gather_pred_via_filesys=False,
+        use_normalized_areas=True,
+        maxdets=[1, 10, 100],
+        exhaustive_only=False,
+        all_exhaustive_only=True,
+    ):
+        """Online coco evaluator. It will evaluate images as they are generated by the model, then accumulate/summarize at the end
+
+        Args:
+           - coco_gt: COCO api object containing the gt
+           - iou_types: can be either "bbox" or "segm"
+           - useCats: If true, categories will be used for evaluation
+           - dump_dir: if non null, then the predictions will be dumped in that directory
+           - postprocessor: Module to convert the model's output into the coco format
+           - average_by_rarity: if true then we expect the images information in the gt dataset
+                 to have a "rarity" field. Then the AP will be computed on all rarity buckets
+                 individually, then averaged
+           - gather_pred_via_filesys: if true, we use the filesystem for collective gathers
+           - use_normalized_areas: if true, the areas of the objects in the GT are assumed to be
+                 normalized by the area of the image. In that case, the size buckets are adjusted
+           - maxdets: maximal number of detections to be evaluated on each image.
+           - exhaustive_only: If true, we restrict eval only to exhaustive annotations
+           - all_exhaustive_only: If true, datapoints are restricted only to those with all exhaustive annotations
+
+        """
+        # coco_gt = copy.deepcopy(coco_gt)
+        self.coco_gts = [coco_gt] if not isinstance(coco_gt, list) else coco_gt
+        assert len(maxdets) == 3, f"expecting 3 detection threshold, got {len(maxdets)}"
+
+        self.use_normalized_areas = use_normalized_areas
+        self.iou_types = iou_types
+        self.useCats = useCats
+        self.maxdets = maxdets
+        self.dump = None
+        self.dump_dir = dump_dir
+        if self.dump_dir is not None:
+            self.dump = []
+            if is_main_process():
+                if not os.path.exists(self.dump_dir):
+                    os.makedirs(self.dump_dir, exist_ok=True)
+                    logging.info(f"Create the folder: {dump_dir}")
+
+        self.initialized = False
+
+        # Whether to gather predictions through filesystem (instead of torch
+        # collective ops; requiring a shared filesystem across all ranks)
+        self.gather_pred_via_filesys = gather_pred_via_filesys
+        self.use_self_evaluate = True  # CPP version is disabled
+        self.postprocessor = postprocessor
+        self.average_by_rarity = average_by_rarity
+        self.exhaustive_only = exhaustive_only
+        self.all_exhaustive_only = all_exhaustive_only
+        self.metrics_dump_dir = metrics_dump_dir
+        if self.metrics_dump_dir is not None:
+            if is_main_process():
+                if not os.path.exists(self.metrics_dump_dir):
+                    os.makedirs(self.metrics_dump_dir, exist_ok=True)
+                    logging.info(f"Create the folder: {metrics_dump_dir}")
+
+    def _lazy_init(self, coco_cls=COCO):
+        if self.initialized:
+            return
+
+        self.initialized = True
+
+        self.coco_gts = [
+            coco_cls(g_pathmgr.get_local_path(gt)) if isinstance(gt, str) else gt
+            for gt in self.coco_gts
+        ]
+
+        self.reset()
+
+        self.eval_img_ids = None
+
+        if self.exhaustive_only:
+            exclude_img_ids = set()
+            # exclude_img_ids are the ids that are not exhaustively annotated in any of the other gts
+            if self.all_exhaustive_only:
+                for coco_gt in self.coco_gts[1:]:
+                    exclude_img_ids = exclude_img_ids.union(
+                        {
+                            img["id"]
+                            for img in coco_gt.dataset["images"]
+                            if not img["is_instance_exhaustive"]
+                        }
+                    )
+            # we only eval on instance exhaustive queries
+            self.eval_img_ids = [
+                img["id"]
+                for img in self.coco_gts[0].dataset["images"]
+                if (img["is_instance_exhaustive"] and img["id"] not in exclude_img_ids)
+            ]
+
+        self.rarity_buckets = None
+        if self.average_by_rarity:
+            self.rarity_buckets = defaultdict(list)
+            eval_img_ids_set = (
+                set(self.eval_img_ids) if self.eval_img_ids is not None else None
+            )
+            for img in self.coco_gts[0].dataset["images"]:
+                if self.eval_img_ids is not None and img["id"] not in eval_img_ids_set:
+                    continue
+                self.rarity_buckets[img["rarity"]].append(img["id"])
+            print("Rarity buckets sizes:")
+            for k, v in self.rarity_buckets.items():
+                print(f"{k}: {len(v)}")
+
+    def set_sync_device(self, device: torch.device) -> Any:
+        self._sync_device = device
+
+    def _evaluate(self, *args, **kwargs):
+        return evaluate(*args, **kwargs)
+
+    def _loadRes(self, *args, **kwargs):
+        return loadRes(*args, **kwargs)
+
+    def update(self, *args, **kwargs):
+        self._lazy_init()
+        predictions = self.postprocessor.process_results(*args, **kwargs)
+
+        img_ids = list(np.unique(list(predictions.keys())))
+        self.img_ids.extend(img_ids)
+
+        for iou_type in self.iou_types:
+            results = self.prepare(predictions, iou_type)
+            self._dump(results)
+
+            assert len(self.coco_gts) == len(self.coco_evals)
+            all_scorings = []
+            for cur_coco_gt, cur_coco_eval in zip(self.coco_gts, self.coco_evals):
+                # suppress pycocotools prints
+                with open(os.devnull, "w") as devnull:
+                    with contextlib.redirect_stdout(devnull):
+                        coco_dt = (
+                            self._loadRes(cur_coco_gt, results) if results else COCO()
+                        )
+
+                coco_eval = cur_coco_eval[iou_type]
+
+                coco_eval.cocoDt = coco_dt
+                coco_eval.params.imgIds = list(img_ids)
+                coco_eval.params.useCats = self.useCats
+                coco_eval.params.maxDets = self.maxdets
+                img_ids, eval_imgs = self._evaluate(coco_eval, self.use_self_evaluate)
+                all_scorings.append(eval_imgs)
+
+            selected = self.select_best_scoring(all_scorings)
+            self.eval_imgs[iou_type].append(selected)
+
+    def select_best_scoring(self, scorings):
+        # This function is used for "oracle" type evaluation.
+        # It accepts the evaluation results with respect to several ground truths, and picks the best
+        if len(scorings) == 1:
+            return scorings[0]
+
+        # Currently we don't support Oracle Phrase AP.
+        # To implement it, we likely need to modify the cpp code since the eval_image type is opaque
+        raise RuntimeError("Not implemented")
+
+    def _dump(self, results):
+        if self.dump is not None:
+            dumped_results = copy.deepcopy(results)
+            for r in dumped_results:
+                if "bbox" not in self.iou_types and "bbox" in r:
+                    del r["bbox"]
+                elif "bbox" in r:
+                    r["bbox"] = [round(coord, 5) for coord in r["bbox"]]
+                r["score"] = round(r["score"], 5)
+            self.dump.extend(dumped_results)
+
+    def synchronize_between_processes(self):
+        self._lazy_init()
+        logging.info("Coco evaluator: Synchronizing between processes")
+        for iou_type in self.iou_types:
+            if len(self.eval_imgs[iou_type]) > 0:
+                self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
+            else:
+                num_areas = len(self.coco_evals[0][iou_type].params.areaRng)
+                # assuming 1 class
+                assert not self.useCats
+                self.eval_imgs[iou_type] = np.empty((1, num_areas, 0))
+            create_common_coco_eval(
+                self.coco_evals[0][iou_type],
+                self.img_ids,
+                self.eval_imgs[iou_type],
+                use_self_evaluate=self.use_self_evaluate,
+                gather_pred_via_filesys=self.gather_pred_via_filesys,
+                metrics_dump_dir=self.metrics_dump_dir,
+            )
+        if self.dump is not None:
+            dumped_file = Path(self.dump_dir) / f"coco_predictions_{get_rank()}.json"
+            logging.info(f"COCO evaluator: Dumping local predictions to {dumped_file}")
+            with g_pathmgr.open(str(dumped_file), "w") as f:
+                json.dump(self.dump, f)
+
+            # if self.gather_pred_via_filesys:
+            #     dump = gather_to_rank_0_via_filesys(self.dump)
+            # else:
+            #     dump = all_gather(self.dump, force_cpu=True)
+            # self.dump = sum(dump, [])
+
+    def accumulate(self, imgIds=None):
+        self._lazy_init()
+        logging.info(
+            f"Coco evaluator: Accumulating on {len(imgIds) if imgIds is not None else 'all'} images"
+        )
+        if not is_main_process():
+            return
+
+        if imgIds is None:
+            for coco_eval in self.coco_evals[0].values():
+                accumulate(coco_eval, use_self_eval=self.use_self_evaluate)
+
+        if imgIds is not None:
+            imgIds = set(imgIds)
+            for coco_eval in self.coco_evals[0].values():
+                p = coco_eval.params
+                id_mask = np.array([(i in imgIds) for i in p.imgIds], dtype=bool)
+                old_img_ids = p.imgIds
+                coco_eval.params.imgIds = np.asarray(p.imgIds)[id_mask]
+                old_img_evals = coco_eval.evalImgs
+                catIds = p.catIds if p.useCats else [-1]
+                coco_eval.evalImgs = list(
+                    np.asarray(coco_eval.evalImgs)
+                    .reshape(len(catIds), len(p.areaRng), len(old_img_ids))[
+                        ..., id_mask
+                    ]
+                    .flatten()
+                )
+                accumulate(coco_eval, use_self_eval=self.use_self_evaluate)
+                coco_eval.evalImgs = old_img_evals
+                coco_eval.params.imgIds = old_img_ids
+
+    def summarize(self):
+        self._lazy_init()
+        logging.info("Coco evaluator: Summarizing")
+        if not is_main_process():
+            return {}
+
+        outs = {}
+        if self.rarity_buckets is None:
+            self.accumulate(self.eval_img_ids)
+            for iou_type, coco_eval in self.coco_evals[0].items():
+                print("IoU metric: {}".format(iou_type))
+                summarize(coco_eval)
+
+            if "bbox" in self.coco_evals[0]:
+                for key, value in zip(*self.coco_evals[0]["bbox"].stats):
+                    outs[f"coco_eval_bbox_{key}"] = value
+            if "segm" in self.coco_evals[0]:
+                for key, value in zip(*self.coco_evals[0]["segm"].stats):
+                    outs[f"coco_eval_masks_{key}"] = value
+        else:
+            total_stats = {}
+            all_keys = {}
+            for bucket, img_list in self.rarity_buckets.items():
+                self.accumulate(imgIds=img_list)
+                bucket_name = RARITY_BUCKETS[bucket]
+                for iou_type, coco_eval in self.coco_evals[0].items():
+                    print(f"IoU metric: {iou_type}. Rarity bucket: {bucket_name}")
+                    summarize(coco_eval)
+
+                if "bbox" in self.coco_evals[0]:
+                    if "bbox" not in total_stats:
+                        total_stats["bbox"] = np.zeros_like(
+                            self.coco_evals[0]["bbox"].stats[1]
+                        )
+                        all_keys["bbox"] = self.coco_evals[0]["bbox"].stats[0]
+                    total_stats["bbox"] += self.coco_evals[0]["bbox"].stats[1]
+                    for key, value in zip(*self.coco_evals[0]["bbox"].stats):
+                        outs[f"coco_eval_bbox_{bucket_name}_{key}"] = value
+                if "segm" in self.coco_evals[0]:
+                    if "segm" not in total_stats:
+                        total_stats["segm"] = np.zeros_like(
+                            self.coco_evals[0]["segm"].stats[1]
+                        )
+                        all_keys["segm"] = self.coco_evals[0]["segm"].stats[0]
+                    total_stats["segm"] += self.coco_evals[0]["segm"].stats[1]
+                    for key, value in zip(*self.coco_evals[0]["segm"].stats):
+                        outs[f"coco_eval_masks_{bucket_name}_{key}"] = value
+
+            if "bbox" in total_stats:
+                total_stats["bbox"] /= len(self.rarity_buckets)
+                for key, value in zip(all_keys["bbox"], total_stats["bbox"]):
+                    outs[f"coco_eval_bbox_{key}"] = value
+            if "segm" in total_stats:
+                total_stats["segm"] /= len(self.rarity_buckets)
+                for key, value in zip(all_keys["segm"], total_stats["segm"]):
+                    outs[f"coco_eval_masks_{key}"] = value
+
+        # if self.dump is not None:
+        #     assert self.dump_dir is not None
+        #     logging.info("Coco evaluator: Dumping the global result file to disk")
+        #     with g_pathmgr.open(str(Path(self.dump_dir) / "coco_eval.json"), "w") as f:
+        #         json.dump(self.dump, f)
+        return outs
+
+    def compute_synced(self):
+        self._lazy_init()
+        self.synchronize_between_processes()
+        return self.summarize()
+
+    def compute(self):
+        self._lazy_init()
+        return {"": 0.0}
+
+    def reset(self, cocoeval_cls=COCOeval):
+        self.coco_evals = [{} for _ in range(len(self.coco_gts))]
+        for i, coco_gt in enumerate(self.coco_gts):
+            for iou_type in self.iou_types:
+                self.coco_evals[i][iou_type] = cocoeval_cls(coco_gt, iouType=iou_type)
+                self.coco_evals[i][iou_type].params.useCats = self.useCats
+                self.coco_evals[i][iou_type].params.maxDets = self.maxdets
+                if self.use_normalized_areas:
+                    self.coco_evals[i][iou_type].params.areaRng = [
+                        [0, 1e5],
+                        [0, 0.001],
+                        [0.001, 0.01],
+                        [0.01, 0.1],
+                        [0.1, 0.5],
+                        [0.5, 0.95],
+                        [0.95, 1e5],
+                    ]
+                    self.coco_evals[i][iou_type].params.areaRngLbl = [
+                        "all",
+                        "tiny",
+                        "small",
+                        "medium",
+                        "large",
+                        "huge",
+                        "whole_image",
+                    ]
+
+        self.img_ids = []
+        self.eval_imgs = {k: [] for k in self.iou_types}
+        if self.dump is not None:
+            self.dump = []
+
+    def write(self, stats):
+        self._lazy_init()
+        """Write the results in the stats dict"""
+        if "bbox" in self.coco_evals[0]:
+            stats["coco_eval_bbox"] = self.coco_evals[0]["bbox"].stats.tolist()
+        if "segm" in self.coco_evals[0]:
+            stats["coco_eval_masks"] = self.coco_evals[0]["segm"].stats.tolist()
+        return stats
+
+    def prepare(self, predictions, iou_type):
+        self._lazy_init()
+        if iou_type == "bbox":
+            return self.prepare_for_coco_detection(predictions)
+        elif iou_type == "segm":
+            return self.prepare_for_coco_segmentation(predictions)
+        elif iou_type == "keypoints":
+            return self.prepare_for_coco_keypoint(predictions)
+        else:
+            raise ValueError("Unknown iou type {}".format(iou_type))
+
+    def prepare_for_coco_detection(self, predictions):
+        self._lazy_init()
+        coco_results = []
+        for original_id, prediction in predictions.items():
+            if len(prediction) == 0:
+                continue
+
+            boxes = prediction["boxes"]
+            boxes = convert_to_xywh(boxes).tolist()
+            scores = prediction["scores"].tolist()
+            labels = prediction["labels"].tolist()
+
+            coco_results.extend(
+                [
+                    {
+                        "image_id": original_id,
+                        "category_id": labels[k],
+                        "bbox": box,
+                        "score": scores[k],
+                    }
+                    for k, box in enumerate(boxes)
+                ]
+            )
+        return coco_results
+
+    @torch.no_grad()
+    def prepare_for_coco_segmentation(self, predictions):
+        self._lazy_init()
+        coco_results = []
+        for original_id, prediction in predictions.items():
+            if len(prediction) == 0:
+                continue
+
+            scores = prediction["scores"].tolist()
+            labels = prediction["labels"].tolist()
+            boundaries, dilated_boundaries = None, None
+            if "boundaries" in prediction:
+                boundaries = prediction["boundaries"]
+                dilated_boundaries = prediction["dilated_boundaries"]
+                assert dilated_boundaries is not None
+                assert len(scores) == len(boundaries)
+
+            if "masks_rle" in prediction:
+                rles = prediction["masks_rle"]
+                areas = []
+                for rle in rles:
+                    cur_area = mask_utils.area(rle)
+                    h, w = rle["size"]
+                    areas.append(cur_area / (h * w))
+            else:
+                masks = prediction["masks"]
+
+                masks = masks > 0.5
+                h, w = masks.shape[-2:]
+
+                areas = masks.flatten(1).sum(1) / (h * w)
+                areas = areas.tolist()
+
+                rles = rle_encode(masks.squeeze(1))
+
+                # memory clean
+                del masks
+                del prediction["masks"]
+
+            assert len(areas) == len(rles) == len(scores)
+            for k, rle in enumerate(rles):
+                payload = {
+                    "image_id": original_id,
+                    "category_id": labels[k],
+                    "segmentation": rle,
+                    "score": scores[k],
+                    "area": areas[k],
+                }
+                if boundaries is not None:
+                    payload["boundary"] = boundaries[k]
+                    payload["dilated_boundary"] = dilated_boundaries[k]
+
+                coco_results.append(payload)
+
+        return coco_results
+
+    def prepare_for_coco_keypoint(self, predictions):
+        self._lazy_init()
+        coco_results = []
+        for original_id, prediction in predictions.items():
+            if len(prediction) == 0:
+                continue
+
+            boxes = prediction["boxes"]
+            boxes = convert_to_xywh(boxes).tolist()
+            scores = prediction["scores"].tolist()
+            labels = prediction["labels"].tolist()
+            keypoints = prediction["keypoints"]
+            keypoints = keypoints.flatten(start_dim=1).tolist()
+
+            coco_results.extend(
+                [
+                    {
+                        "image_id": original_id,
+                        "category_id": labels[k],
+                        "keypoints": keypoint,
+                        "score": scores[k],
+                    }
+                    for k, keypoint in enumerate(keypoints)
+                ]
+            )
+        return coco_results
+
+
+def convert_to_xywh(boxes):
+    xmin, ymin, xmax, ymax = boxes.unbind(-1)
+    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=-1)
+
+
+def merge(img_ids, eval_imgs, gather_pred_via_filesys=False):
+    if gather_pred_via_filesys:
+        # only gather the predictions to rank 0 (other ranks will receive empty
+        # lists for `all_img_ids` and `all_eval_imgs`, which should be OK as
+        # merging and evaluation are only done on rank 0)
+        all_img_ids = gather_to_rank_0_via_filesys(img_ids)
+        all_eval_imgs = gather_to_rank_0_via_filesys(eval_imgs)
+    else:
+        all_img_ids = all_gather(img_ids, force_cpu=True)
+        all_eval_imgs = all_gather(eval_imgs, force_cpu=True)
+    if not is_main_process():
+        return None, None
+
+    merged_img_ids = []
+    for p in all_img_ids:
+        merged_img_ids.extend(p)
+
+    merged_eval_imgs = []
+    for p in all_eval_imgs:
+        merged_eval_imgs.append(p)
+
+    merged_img_ids = np.array(merged_img_ids)
+    merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
+
+    # keep only unique (and in sorted order) images
+    merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
+    merged_eval_imgs = merged_eval_imgs[..., idx]
+
+    return merged_img_ids, merged_eval_imgs
+
+
+def create_common_coco_eval(
+    coco_eval,
+    img_ids,
+    eval_imgs,
+    use_self_evaluate,
+    gather_pred_via_filesys=False,
+    metrics_dump_dir=None,
+):
+    img_ids, eval_imgs = merge(img_ids, eval_imgs, gather_pred_via_filesys)
+    if not is_main_process():
+        return
+    if metrics_dump_dir is not None:
+        dumped_file = (
+            Path(metrics_dump_dir) / f"coco_eval_img_metrics_{get_rank()}.json"
+        )
+        logging.info(f"COCO evaluator: Dumping local predictions to {dumped_file}")
+        with g_pathmgr.open(str(dumped_file), "w") as f:
+            json.dump(eval_imgs.squeeze(), f, default=lambda x: x.tolist())
+    img_ids = list(img_ids)
+
+    # If some images were not predicted, we need to create dummy detections for them
+    missing_img_ids = set(coco_eval.cocoGt.getImgIds()) - set(img_ids)
+    if len(missing_img_ids) > 0:
+        print(f"WARNING: {len(missing_img_ids)} images were not predicted!")
+        coco_eval.cocoDt = COCO()
+        coco_eval.params.imgIds = list(missing_img_ids)
+        new_img_ids, new_eval_imgs = evaluate(coco_eval, use_self_evaluate)
+        img_ids.extend(new_img_ids)
+        eval_imgs = np.concatenate((eval_imgs, new_eval_imgs), axis=2)
+
+    eval_imgs = list(eval_imgs.flatten())
+    assert len(img_ids) == len(coco_eval.cocoGt.getImgIds())
+
+    coco_eval.evalImgs = eval_imgs
+    coco_eval.params.imgIds = img_ids
+    coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
+
+
+#################################################################
+# From pycocotools, just removed the prints and fixed
+# a Python3 bug about unicode not defined
+#################################################################
+
+
+# Copy of COCO prepare, but doesn't convert anntoRLE
+def segmentation_prepare(self):
+    """
+    Prepare ._gts and ._dts for evaluation based on params
+    :return: None
+    """
+    p = self.params
+    if p.useCats:
+        gts = self.cocoGt.loadAnns(
+            self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)
+        )
+        dts = self.cocoDt.loadAnns(
+            self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)
+        )
+    else:
+        gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
+        dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
+
+    for gt in gts:
+        gt["ignore"] = gt["ignore"] if "ignore" in gt else 0
+        gt["ignore"] = "iscrowd" in gt and gt["iscrowd"]
+        if p.iouType == "keypoints":
+            gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"]
+    self._gts = defaultdict(list)  # gt for evaluation
+    self._dts = defaultdict(list)  # dt for evaluation
+    for gt in gts:
+        self._gts[gt["image_id"], gt["category_id"]].append(gt)
+    for dt in dts:
+        self._dts[dt["image_id"], dt["category_id"]].append(dt)
+    self.evalImgs = defaultdict(list)  # per-image per-category evaluation results
+    self.eval = {}  # accumulated evaluation results
+
+
+def evaluate(self, use_self_evaluate):
+    """
+    Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
+    :return: None
+    """
+    # tic = time.time()
+    # print('Running per image evaluation...', use_self_evaluate)
+    p = self.params
+    # add backward compatibility if useSegm is specified in params
+    if p.useSegm is not None:
+        p.iouType = "segm" if p.useSegm == 1 else "bbox"
+        print(
+            "useSegm (deprecated) is not None. Running {} evaluation".format(p.iouType)
+        )
+    # print('Evaluate annotation type *{}*'.format(p.iouType))
+    p.imgIds = list(np.unique(p.imgIds))
+    if p.useCats:
+        p.catIds = list(np.unique(p.catIds))
+    p.maxDets = sorted(p.maxDets)
+    self.params = p
+
+    self._prepare()
+    # loop through images, area range, max detection number
+    catIds = p.catIds if p.useCats else [-1]
+
+    if p.iouType == "segm" or p.iouType == "bbox":
+        computeIoU = self.computeIoU
+    elif p.iouType == "keypoints":
+        computeIoU = self.computeOks
+    self.ious = {
+        (imgId, catId): computeIoU(imgId, catId)
+        for imgId in p.imgIds
+        for catId in catIds
+    }
+
+    maxDet = p.maxDets[-1]
+    if use_self_evaluate:
+        evalImgs = [
+            self.evaluateImg(imgId, catId, areaRng, maxDet)
+            for catId in catIds
+            for areaRng in p.areaRng
+            for imgId in p.imgIds
+        ]
+        # this is NOT in the pycocotools code, but could be done outside
+        evalImgs = np.asarray(evalImgs).reshape(
+            len(catIds), len(p.areaRng), len(p.imgIds)
+        )
+        return p.imgIds, evalImgs
+
+    # <<<< Beginning of code differences with original COCO API
+    # def convert_instances_to_cpp(instances, is_det=False):
+    #     # Convert annotations for a list of instances in an image to a format that's fast
+    #     # to access in C++
+    #     instances_cpp = []
+    #     for instance in instances:
+    #         instance_cpp = _CPP.InstanceAnnotation(
+    #             int(instance["id"]),
+    #             instance["score"] if is_det else instance.get("score", 0.0),
+    #             instance["area"],
+    #             bool(instance.get("iscrowd", 0)),
+    #             bool(instance.get("ignore", 0)),
+    #         )
+    #         instances_cpp.append(instance_cpp)
+    #     return instances_cpp
+
+    # # Convert GT annotations, detections, and IOUs to a format that's fast to access in C++
+    # ground_truth_instances = [
+    #     [convert_instances_to_cpp(self._gts[imgId, catId]) for catId in p.catIds]
+    #     for imgId in p.imgIds
+    # ]
+    # detected_instances = [
+    #     [
+    #         convert_instances_to_cpp(self._dts[imgId, catId], is_det=True)
+    #         for catId in p.catIds
+    #     ]
+    #     for imgId in p.imgIds
+    # ]
+    # ious = [[self.ious[imgId, catId] for catId in catIds] for imgId in p.imgIds]
+
+    # if not p.useCats:
+    #     # For each image, flatten per-category lists into a single list
+    #     ground_truth_instances = [
+    #         [[o for c in i for o in c]] for i in ground_truth_instances
+    #     ]
+    #     detected_instances = [[[o for c in i for o in c]] for i in detected_instances]
+
+    # # Call C++ implementation of self.evaluateImgs()
+    # _evalImgs_cpp = _CPP.COCOevalEvaluateImages(
+    #     p.areaRng, maxDet, p.iouThrs, ious, ground_truth_instances, detected_instances
+    # )
+
+    # self._paramsEval = copy.deepcopy(self.params)
+    # evalImgs = np.asarray(_evalImgs_cpp).reshape(
+    #     len(catIds), len(p.areaRng), len(p.imgIds)
+    # )
+    # return p.imgIds, evalImgs
+
+
+#################################################################
+# end of straight copy from pycocotools, just removing the prints
+#################################################################
+
+
+#################################################################
+# From pycocotools, but disabled mask->box conversion which is
+# pointless
+#################################################################
+def loadRes(self, resFile):
+    """
+    Load result file and return a result api object.
+    :param   resFile (str)     : file name of result file
+    :return: res (obj)         : result api object
+    """
+    res = COCO()
+    res.dataset["images"] = [img for img in self.dataset["images"]]
+
+    if type(resFile) == str:
+        anns = json.load(open(resFile))
+    elif type(resFile) == np.ndarray:
+        anns = self.loadNumpyAnnotations(resFile)
+    else:
+        anns = resFile
+    assert type(anns) == list, "results in not an array of objects"
+    annsImgIds = [ann["image_id"] for ann in anns]
+    assert set(annsImgIds) == (
+        set(annsImgIds) & set(self.getImgIds())
+    ), "Results do not correspond to current coco set"
+    if "caption" in anns[0]:
+        imgIds = set([img["id"] for img in res.dataset["images"]]) & set(
+            [ann["image_id"] for ann in anns]
+        )
+        res.dataset["images"] = [
+            img for img in res.dataset["images"] if img["id"] in imgIds
+        ]
+        for id, ann in enumerate(anns):
+            ann["id"] = id + 1
+    elif "bbox" in anns[0] and not anns[0]["bbox"] == []:
+        res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+        for id, ann in enumerate(anns):
+            bb = ann["bbox"]
+            x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
+            if "segmentation" not in ann:
+                ann["segmentation"] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
+            ann["area"] = bb[2] * bb[3]
+            ann["id"] = id + 1
+            ann["iscrowd"] = 0
+    elif "segmentation" in anns[0]:
+        res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+        for id, ann in enumerate(anns):
+            # now only support compressed RLE format as segmentation results
+            # ann["area"] = mask_util.area(ann["segmentation"])
+            # The following lines are disabled because they are pointless
+            #  if not 'bbox' in ann:
+            #     ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
+            ann["id"] = id + 1
+            ann["iscrowd"] = 0
+    elif "keypoints" in anns[0]:
+        res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+        for id, ann in enumerate(anns):
+            s = ann["keypoints"]
+            x = s[0::3]
+            y = s[1::3]
+            x0, x1, y0, y1 = np.min(x), np.max(x), np.min(y), np.max(y)
+            ann["area"] = (x1 - x0) * (y1 - y0)
+            ann["id"] = id + 1
+            ann["bbox"] = [x0, y0, x1 - x0, y1 - y0]
+
+    res.dataset["annotations"] = anns
+    res.createIndex()
+    return res
+
+
+#################################################################
+# end of straight copy from pycocotools
+#################################################################
+
+
+#################################################################
+# From pycocotools, but added handling of custom area rngs, and returns stat keys
+#################################################################
+def summarize(self):
+    """
+    Compute and display summary metrics for evaluation results.
+    Note this functin can *only* be applied on the default parameter setting
+    """
+
+    def _summarize(ap=1, iouThr=None, areaRng="all", maxDets=100):
+        p = self.params
+        iStr = " {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}"
+        titleStr = "Average Precision" if ap == 1 else "Average Recall"
+        typeStr = "(AP)" if ap == 1 else "(AR)"
+        iouStr = (
+            "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
+            if iouThr is None
+            else "{:0.2f}".format(iouThr)
+        )
+
+        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+        if ap == 1:
+            # dimension of precision: [TxRxKxAxM]
+            s = self.eval["precision"]
+            # IoU
+            if iouThr is not None:
+                t = np.where(iouThr == p.iouThrs)[0]
+                s = s[t]
+            s = s[:, :, :, aind, mind]
+        else:
+            # dimension of recall: [TxKxAxM]
+            s = self.eval["recall"]
+            if iouThr is not None:
+                t = np.where(iouThr == p.iouThrs)[0]
+                s = s[t]
+            s = s[:, :, aind, mind]
+        if len(s[s > -1]) == 0:
+            mean_s = -1
+        else:
+            mean_s = np.mean(s[s > -1])
+        print(iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s))
+        return mean_s
+
+    def _summarizeDets():
+        nb_results = 6 + (len(self.params.areaRng) - 1) * 2
+        assert len(self.params.areaRng) == len(self.params.areaRngLbl)
+        stats = np.zeros((nb_results,))
+        keys = ["AP", "AP_50", "AP_75"]
+        stats[0] = _summarize(1, maxDets=self.params.maxDets[2])
+        stats[1] = _summarize(1, iouThr=0.5, maxDets=self.params.maxDets[2])
+        stats[2] = _summarize(1, iouThr=0.75, maxDets=self.params.maxDets[2])
+        cur_id = 3
+        for area in self.params.areaRngLbl[1:]:
+            stats[cur_id] = _summarize(1, areaRng=area, maxDets=self.params.maxDets[2])
+            cur_id += 1
+            keys.append(f"AP_{area}")
+        stats[cur_id] = _summarize(0, maxDets=self.params.maxDets[0])
+        cur_id += 1
+        stats[cur_id] = _summarize(0, maxDets=self.params.maxDets[1])
+        cur_id += 1
+        stats[cur_id] = _summarize(0, maxDets=self.params.maxDets[2])
+        cur_id += 1
+        keys += ["AR", "AR_50", "AR_75"]
+
+        for area in self.params.areaRngLbl[1:]:
+            stats[cur_id] = _summarize(0, areaRng=area, maxDets=self.params.maxDets[2])
+            cur_id += 1
+            keys.append(f"AR_{area}")
+        assert len(stats) == len(keys)
+        return keys, stats
+
+    if not self.eval:
+        raise Exception("Please run accumulate() first")
+    self.stats = _summarizeDets()
+
+
+#################################################################
+# end of straight copy from pycocotools
+#################################################################
+
+
+#################################################################
+# From https://github.com/facebookresearch/detectron2/blob/main/detectron2/evaluation/fast_eval_api.py
+# with slight adjustments
+#################################################################
+def accumulate(self, use_self_eval=False):
+    """
+    Accumulate per image evaluation results and store the result in self.eval.  Does not
+    support changing parameter settings from those used by self.evaluate()
+    """
+    if use_self_eval:
+        self.accumulate()
+        return
+    # CPP code is disabled
+    # self.eval = _CPP.COCOevalAccumulate(self.params, self.evalImgs)
+
+    # # recall is num_iou_thresholds X num_categories X num_area_ranges X num_max_detections
+    # self.eval["recall"] = np.array(self.eval["recall"]).reshape(
+    #     self.eval["counts"][:1] + self.eval["counts"][2:]
+    # )
+
+    # # precision and scores are num_iou_thresholds X num_recall_thresholds X num_categories X
+    # # num_area_ranges X num_max_detections
+    # self.eval["precision"] = np.array(self.eval["precision"]).reshape(
+    #     self.eval["counts"]
+    # )
+    # self.eval["scores"] = np.array(self.eval["scores"]).reshape(self.eval["counts"])
diff --git a/sam3/eval/coco_eval_offline.py b/sam3/eval/coco_eval_offline.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b072285c22af89d9466eb60bcbfac817af2139a
--- /dev/null
+++ b/sam3/eval/coco_eval_offline.py
@@ -0,0 +1,181 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+This evaluator is meant for regular COCO mAP evaluation, for example on the COCO val set.
+
+For Category mAP, we need the model to make predictions for all the categories on every single image.
+In general, since the number of classes can be big, and the API model makes predictions individually for each pair (image, class),
+we may need to split the inference process for a given image in several chunks.
+"""
+
+import logging
+from collections import defaultdict
+
+import torch
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+from sam3.train.utils.distributed import is_main_process
+
+try:
+    from tidecv import datasets, TIDE
+
+    HAS_TIDE = True
+except ImportError:
+    HAS_TIDE = False
+    print("WARNING: TIDE not installed. Detailed analysis will not be available.")
+
+
+# the COCO detection metrics (https://github.com/cocodataset/cocoapi/blob/8c9bcc3cf640524c4c20a9c40e89cb6a2f2fa0e9/PythonAPI/pycocotools/cocoeval.py#L460-L471)
+COCO_METRICS = [
+    "AP",
+    "AP_50",
+    "AP_75",
+    "AP_small",
+    "AP_medium",
+    "AP_large",
+    "AR_maxDets@1",
+    "AR_maxDets@10",
+    "AR_maxDets@100",
+    "AR_small",
+    "AR_medium",
+    "AR_large",
+]
+
+
+def convert_to_xywh(boxes):
+    """Convert bounding boxes from xyxy format to xywh format."""
+    xmin, ymin, xmax, ymax = boxes.unbind(-1)
+    return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=-1)
+
+
+class HeapElement:
+    """Utility class to make a heap with a custom comparator"""
+
+    def __init__(self, val):
+        self.val = val
+
+    def __lt__(self, other):
+        return self.val["score"] < other.val["score"]
+
+
+class COCOevalCustom(COCOeval):
+    """
+    This is a slightly modified version of the original COCO API with added support for positive split evaluation.
+    """
+
+    def __init__(
+        self, cocoGt=None, cocoDt=None, iouType="segm", dt_only_positive=False
+    ):
+        super().__init__(cocoGt, cocoDt, iouType)
+        self.dt_only_positive = dt_only_positive
+
+    def _prepare(self):
+        """
+        Prepare ._gts and ._dts for evaluation based on params
+        :return: None
+        """
+
+        def _toMask(anns, coco):
+            # modify ann['segmentation'] by reference
+            for ann in anns:
+                rle = coco.annToRLE(ann)
+                ann["segmentation"] = rle
+
+        p = self.params
+        if p.useCats:
+            gts = self.cocoGt.loadAnns(
+                self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)
+            )
+            dts = self.cocoDt.loadAnns(
+                self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)
+            )
+        else:
+            gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
+            dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
+
+        # convert ground truth to mask if iouType == 'segm'
+        if p.iouType == "segm":
+            _toMask(gts, self.cocoGt)
+            _toMask(dts, self.cocoDt)
+        # set ignore flag
+        for gt in gts:
+            gt["ignore"] = gt["ignore"] if "ignore" in gt else 0
+            gt["ignore"] = "iscrowd" in gt and gt["iscrowd"]
+            if p.iouType == "keypoints":
+                gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"]
+        self._gts = defaultdict(list)  # gt for evaluation
+        self._dts = defaultdict(list)  # dt for evaluation
+
+        _gts_cat_ids = defaultdict(set)  # gt for evaluation on positive split
+        for gt in gts:
+            self._gts[gt["image_id"], gt["category_id"]].append(gt)
+            _gts_cat_ids[gt["image_id"]].add(gt["category_id"])
+
+        #### BEGIN MODIFICATION ####
+        for dt in dts:
+            if (
+                self.dt_only_positive
+                and dt["category_id"] not in _gts_cat_ids[dt["image_id"]]
+            ):
+                continue
+            self._dts[dt["image_id"], dt["category_id"]].append(dt)
+        #### END MODIFICATION ####
+        self.evalImgs = defaultdict(list)  # per-image per-category evaluation results
+        self.eval = {}  # accumulated evaluation results
+
+
+class CocoEvaluatorOfflineWithPredFileEvaluators:
+    def __init__(
+        self,
+        gt_path,
+        tide: bool = True,
+        iou_type: str = "bbox",
+        positive_split=False,
+    ):
+        self.gt_path = gt_path
+        self.tide_enabled = HAS_TIDE and tide
+        self.positive_split = positive_split
+        self.iou_type = iou_type
+
+    def evaluate(self, dumped_file):
+        if not is_main_process():
+            return {}
+
+        logging.info("OfflineCoco evaluator: Loading groundtruth")
+        self.gt = COCO(self.gt_path)
+
+        # Creating the result file
+        logging.info("Coco evaluator: Creating the result file")
+        cocoDt = self.gt.loadRes(str(dumped_file))
+
+        # Run the evaluation
+        logging.info("Coco evaluator: Running evaluation")
+        coco_eval = COCOevalCustom(
+            self.gt, cocoDt, iouType=self.iou_type, dt_only_positive=self.positive_split
+        )
+        coco_eval.evaluate()
+        coco_eval.accumulate()
+        coco_eval.summarize()
+
+        outs = {}
+        for i, value in enumerate(coco_eval.stats):
+            outs[f"coco_eval_{self.iou_type}_{COCO_METRICS[i]}"] = value
+
+        if self.tide_enabled:
+            logging.info("Coco evaluator: Loading TIDE")
+            self.tide_gt = datasets.COCO(self.gt_path)
+            self.tide = TIDE(mode="mask" if self.iou_type == "segm" else "bbox")
+
+            # Run TIDE
+            logging.info("Coco evaluator: Running TIDE")
+            self.tide.evaluate(
+                self.tide_gt, datasets.COCOResult(str(dumped_file)), name="coco_eval"
+            )
+            self.tide.summarize()
+            for k, v in self.tide.get_main_errors()["coco_eval"].items():
+                outs[f"coco_eval_{self.iou_type}_TIDE_{k}"] = v
+
+            for k, v in self.tide.get_special_errors()["coco_eval"].items():
+                outs[f"coco_eval_{self.iou_type}_TIDE_{k}"] = v
+
+        return outs
diff --git a/sam3/eval/coco_reindex.py b/sam3/eval/coco_reindex.py
new file mode 100644
index 0000000000000000000000000000000000000000..49cd94429d0bd6d6b23e6a732028922ae5b17c38
--- /dev/null
+++ b/sam3/eval/coco_reindex.py
@@ -0,0 +1,230 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+Self-contained COCO JSON re-indexing function that creates temporary files.
+"""
+
+import json
+import os
+import tempfile
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+
+
+def reindex_coco_to_temp(input_json_path: str) -> Optional[str]:
+    """
+    Convert 0-indexed COCO JSON file to 1-indexed and save to temporary location.
+
+    Args:
+        input_json_path: Path to the input COCO JSON file
+
+    Returns:
+        Path to the new 1-indexed JSON file in temporary directory, or None if no conversion needed
+
+    Raises:
+        FileNotFoundError: If input file doesn't exist
+        json.JSONDecodeError: If input file is not valid JSON
+        ValueError: If input file is not a valid COCO format
+    """
+
+    def is_coco_json(data: Dict[str, Any]) -> bool:
+        """Check if data appears to be a COCO format file."""
+        if not isinstance(data, dict):
+            return False
+        # A COCO file should have at least one of these keys
+        coco_keys = {"images", "annotations", "categories"}
+        return any(key in data for key in coco_keys)
+
+    def check_zero_indexed(data: Dict[str, Any]) -> Tuple[bool, bool, bool]:
+        """
+        Check if annotations, images, or categories start from index 0.
+
+        Returns:
+            Tuple of (annotations_zero_indexed, images_zero_indexed, categories_zero_indexed)
+        """
+        annotations_zero = False
+        images_zero = False
+        categories_zero = False
+
+        # Check annotations
+        annotations = data.get("annotations", [])
+        if annotations and any(ann.get("id", -1) == 0 for ann in annotations):
+            annotations_zero = True
+
+        # Check images
+        images = data.get("images", [])
+        if images and any(img.get("id", -1) == 0 for img in images):
+            images_zero = True
+
+        # Check categories
+        categories = data.get("categories", [])
+        if categories and any(cat.get("id", -1) == 0 for cat in categories):
+            categories_zero = True
+
+        return annotations_zero, images_zero, categories_zero
+
+    def reindex_coco_data(data: Dict[str, Any]) -> Dict[str, Any]:
+        """Convert 0-indexed COCO data to 1-indexed."""
+        modified_data = data.copy()
+
+        annotations_zero, images_zero, categories_zero = check_zero_indexed(data)
+
+        # Create ID mapping for consistency
+        image_id_mapping = {}
+        category_id_mapping = {}
+
+        # Process images first (since annotations reference image IDs)
+        if images_zero and "images" in modified_data:
+            for img in modified_data["images"]:
+                old_id = img["id"]
+                new_id = old_id + 1
+                image_id_mapping[old_id] = new_id
+                img["id"] = new_id
+
+        # Process categories (since annotations reference category IDs)
+        if categories_zero and "categories" in modified_data:
+            for cat in modified_data["categories"]:
+                old_id = cat["id"]
+                new_id = old_id + 1
+                category_id_mapping[old_id] = new_id
+                cat["id"] = new_id
+
+        # Process annotations
+        if "annotations" in modified_data:
+            for ann in modified_data["annotations"]:
+                # Update annotation ID if needed
+                if annotations_zero:
+                    ann["id"] = ann["id"] + 1
+
+                # Update image_id reference if images were reindexed
+                if images_zero and ann.get("image_id") is not None:
+                    old_image_id = ann["image_id"]
+                    if old_image_id in image_id_mapping:
+                        ann["image_id"] = image_id_mapping[old_image_id]
+
+                # Update category_id reference if categories were reindexed
+                if categories_zero and ann.get("category_id") is not None:
+                    old_category_id = ann["category_id"]
+                    if old_category_id in category_id_mapping:
+                        ann["category_id"] = category_id_mapping[old_category_id]
+
+        return modified_data
+
+    # Validate input path
+    if not os.path.exists(input_json_path):
+        raise FileNotFoundError(f"Input file not found: {input_json_path}")
+
+    # Load and validate JSON data
+    try:
+        with open(input_json_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+    except json.JSONDecodeError as e:
+        raise json.JSONDecodeError(f"Invalid JSON in {input_json_path}: {e}")
+
+    # Validate COCO format
+    if not is_coco_json(data):
+        raise ValueError(
+            f"File does not appear to be in COCO format: {input_json_path}"
+        )
+
+    # Check if reindexing is needed
+    annotations_zero, images_zero, categories_zero = check_zero_indexed(data)
+
+    if not (annotations_zero or images_zero or categories_zero):
+        # No conversion needed - just copy to temp location
+        input_path = Path(input_json_path)
+        temp_dir = tempfile.mkdtemp()
+        temp_filename = f"{input_path.stem}_1_indexed{input_path.suffix}"
+        temp_path = os.path.join(temp_dir, temp_filename)
+
+        with open(temp_path, "w", encoding="utf-8") as f:
+            json.dump(data, f, indent=2, ensure_ascii=False)
+
+        return temp_path
+
+    # Perform reindexing
+    modified_data = reindex_coco_data(data)
+
+    # Create temporary file
+    input_path = Path(input_json_path)
+    temp_dir = tempfile.mkdtemp()
+    temp_filename = f"{input_path.stem}_1_indexed{input_path.suffix}"
+    temp_path = os.path.join(temp_dir, temp_filename)
+
+    # Write modified data to temporary file
+    with open(temp_path, "w", encoding="utf-8") as f:
+        json.dump(modified_data, f, indent=2, ensure_ascii=False)
+
+    return temp_path
+
+
+# Example usage and test function
+def test_reindex_function():
+    """Test the reindex function with a sample COCO file."""
+
+    # Create a test COCO file
+    test_data = {
+        "info": {"description": "Test COCO dataset", "version": "1.0", "year": 2023},
+        "images": [
+            {"id": 0, "width": 640, "height": 480, "file_name": "test1.jpg"},
+            {"id": 1, "width": 640, "height": 480, "file_name": "test2.jpg"},
+        ],
+        "categories": [
+            {"id": 0, "name": "person", "supercategory": "person"},
+            {"id": 1, "name": "car", "supercategory": "vehicle"},
+        ],
+        "annotations": [
+            {
+                "id": 0,
+                "image_id": 0,
+                "category_id": 0,
+                "bbox": [100, 100, 50, 75],
+                "area": 3750,
+                "iscrowd": 0,
+            },
+            {
+                "id": 1,
+                "image_id": 1,
+                "category_id": 1,
+                "bbox": [200, 150, 120, 80],
+                "area": 9600,
+                "iscrowd": 0,
+            },
+        ],
+    }
+
+    # Create temporary test file
+    with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+        json.dump(test_data, f, indent=2)
+        test_file_path = f.name
+
+    try:
+        # Test the function
+        result_path = reindex_coco_to_temp(test_file_path)
+        print(f"Original file: {test_file_path}")
+        print(f"Converted file: {result_path}")
+
+        # Load and display the result
+        with open(result_path, "r") as f:
+            result_data = json.load(f)
+
+        print("\nConverted data sample:")
+        print(f"First image ID: {result_data['images'][0]['id']}")
+        print(f"First category ID: {result_data['categories'][0]['id']}")
+        print(f"First annotation ID: {result_data['annotations'][0]['id']}")
+        print(f"First annotation image_id: {result_data['annotations'][0]['image_id']}")
+        print(
+            f"First annotation category_id: {result_data['annotations'][0]['category_id']}"
+        )
+
+        # Clean up
+        os.unlink(result_path)
+        os.rmdir(os.path.dirname(result_path))
+
+    finally:
+        # Clean up test file
+        os.unlink(test_file_path)
+
+
+if __name__ == "__main__":
+    test_reindex_function()
diff --git a/sam3/eval/coco_writer.py b/sam3/eval/coco_writer.py
new file mode 100644
index 0000000000000000000000000000000000000000..5134e63c39532a56a2744fd8b18e74a29959c399
--- /dev/null
+++ b/sam3/eval/coco_writer.py
@@ -0,0 +1,352 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+COCO prediction dumper for distributed training.
+
+Handles collection and dumping of COCO-format predictions from models.
+Supports distributed processing with multiple GPUs/processes.
+"""
+
+import copy
+import gc
+import heapq
+import json
+import logging
+import os
+from collections import defaultdict
+from pathlib import Path
+from typing import Any, Optional
+
+import pycocotools.mask as mask_utils
+import torch
+from iopath.common.file_io import g_pathmgr
+from sam3.eval.coco_eval_offline import convert_to_xywh
+from sam3.train.masks_ops import rle_encode
+from sam3.train.utils.distributed import (
+    all_gather,
+    gather_to_rank_0_via_filesys,
+    get_rank,
+    is_main_process,
+)
+
+
+### Helper functions and classes
+
+
+class HeapElement:
+    """Utility class to make a heap with a custom comparator based on score."""
+
+    def __init__(self, val):
+        self.val = val
+
+    def __lt__(self, other):
+        return self.val["score"] < other.val["score"]
+
+
+class PredictionDumper:
+    """
+    Handles collection and dumping of COCO-format predictions from a model.
+
+    This class processes model outputs through a postprocessor, converts them to COCO format,
+    and saves them to disk. It supports distributed processing with multiple GPUs/processes.
+    """
+
+    def __init__(
+        self,
+        dump_dir: str,
+        postprocessor,
+        maxdets: int,
+        iou_type: str,
+        gather_pred_via_filesys: bool = False,
+        merge_predictions: bool = False,
+        pred_file_evaluators: Optional[Any] = None,
+    ):
+        """
+        Initialize the PredictionDumper.
+
+        Args:
+            dump_dir: Directory to dump predictions.
+            postprocessor: Module to convert the model's output into COCO format.
+            maxdets: Maximum number of detections per image.
+            iou_type: IoU type to evaluate. Can include "bbox", "segm"
+            gather_pred_via_filesys: If True, use the filesystem for collective gathers across
+                processes (requires a shared filesystem). Otherwise, use torch collective ops.
+            merge_predictions: If True, merge predictions from all processes and dump to a single file.
+        """
+        self.iou_type = iou_type
+        self.maxdets = maxdets
+        self.dump_dir = dump_dir
+        self.postprocessor = postprocessor
+        self.gather_pred_via_filesys = gather_pred_via_filesys
+        self.merge_predictions = merge_predictions
+        self.pred_file_evaluators = pred_file_evaluators
+        if self.pred_file_evaluators is not None:
+            assert (
+                merge_predictions
+            ), "merge_predictions must be True if pred_file_evaluators are provided"
+        assert self.dump_dir is not None, "dump_dir must be provided"
+
+        if is_main_process():
+            os.makedirs(self.dump_dir, exist_ok=True)
+            logging.info(f"Created prediction dump directory: {self.dump_dir}")
+
+        # Initialize state
+        self.reset()
+
+    def update(self, *args, **kwargs):
+        """
+        Process and accumulate predictions from model outputs.
+
+        Args:
+            *args, **kwargs: Arguments passed to postprocessor.process_results()
+        """
+        predictions = self.postprocessor.process_results(*args, **kwargs)
+        results = self.prepare(predictions, self.iou_type)
+        self._dump(results)
+
+    def _dump(self, results):
+        """
+        Add results to the dump list with precision rounding.
+
+        Args:
+            results: List of prediction dictionaries in COCO format.
+        """
+        dumped_results = copy.deepcopy(results)
+        for r in dumped_results:
+            if "bbox" in r:
+                r["bbox"] = [round(coord, 5) for coord in r["bbox"]]
+            r["score"] = round(r["score"], 5)
+        self.dump.extend(dumped_results)
+
+    def synchronize_between_processes(self):
+        """
+        Synchronize predictions across all processes and save to disk.
+
+        If gather_pred_via_filesys is True, uses filesystem for gathering.
+        Otherwise, uses torch distributed collective operations.
+        Saves per-rank predictions to separate JSON files.
+        """
+        logging.info("Prediction Dumper: Synchronizing between processes")
+
+        if not self.merge_predictions:
+            dumped_file = (
+                Path(self.dump_dir)
+                / f"coco_predictions_{self.iou_type}_{get_rank()}.json"
+            )
+            logging.info(
+                f"Prediction Dumper: Dumping local predictions to {dumped_file}"
+            )
+            with g_pathmgr.open(str(dumped_file), "w") as f:
+                json.dump(self.dump, f)
+        else:
+            self.dump = self.gather_and_merge_predictions()
+            dumped_file = Path(self.dump_dir) / f"coco_predictions_{self.iou_type}.json"
+            if is_main_process():
+                logging.info(
+                    f"Prediction Dumper: Dumping merged predictions to {dumped_file}"
+                )
+                with g_pathmgr.open(str(dumped_file), "w") as f:
+                    json.dump(self.dump, f)
+
+        self.reset()
+        return dumped_file
+
+    def gather_and_merge_predictions(self):
+        """
+        Gather predictions from all processes and merge them, keeping top predictions per image.
+
+        This method collects predictions from all processes, then keeps only the top maxdets
+        predictions per image based on score. It also deduplicates predictions by (image_id, category_id).
+
+        Returns:
+            List of merged prediction dictionaries.
+        """
+        logging.info("Prediction Dumper: Gathering predictions from all processes")
+        gc.collect()
+
+        if self.gather_pred_via_filesys:
+            dump = gather_to_rank_0_via_filesys(self.dump)
+        else:
+            dump = all_gather(self.dump, force_cpu=True)
+
+        # Combine predictions, keeping only top maxdets per image
+        preds_by_image = defaultdict(list)
+        seen_img_cat = set()
+
+        for cur_dump in dump:
+            cur_seen_img_cat = set()
+            for p in cur_dump:
+                image_id = p["image_id"]
+                cat_id = p["category_id"]
+
+                # Skip if we've already seen this image/category pair in a previous dump
+                if (image_id, cat_id) in seen_img_cat:
+                    continue
+
+                cur_seen_img_cat.add((image_id, cat_id))
+
+                # Use a min-heap to keep top predictions
+                if len(preds_by_image[image_id]) < self.maxdets:
+                    heapq.heappush(preds_by_image[image_id], HeapElement(p))
+                else:
+                    heapq.heappushpop(preds_by_image[image_id], HeapElement(p))
+
+            seen_img_cat.update(cur_seen_img_cat)
+
+        # Flatten the heap elements back to a list
+        merged_dump = sum(
+            [[h.val for h in cur_preds] for cur_preds in preds_by_image.values()], []
+        )
+
+        return merged_dump
+
+    def compute_synced(self):
+        """
+        Synchronize predictions across processes and compute summary.
+
+        Returns:
+            Summary dictionary from summarize().
+        """
+        dumped_file = self.synchronize_between_processes()
+        if not is_main_process():
+            return {"": 0.0}
+
+        meters = {}
+        if self.pred_file_evaluators is not None:
+            for evaluator in self.pred_file_evaluators:
+                results = evaluator.evaluate(dumped_file)
+                meters.update(results)
+
+        if len(meters) == 0:
+            meters = {"": 0.0}
+        return meters
+
+    def compute(self):
+        """
+        Compute without synchronization.
+
+        Returns:
+            Empty metric dictionary.
+        """
+        return {"": 0.0}
+
+    def reset(self):
+        """Reset internal state for a new evaluation round."""
+        self.dump = []
+
+    def prepare(self, predictions, iou_type):
+        """
+        Route predictions to the appropriate preparation method based on iou_type.
+
+        Args:
+            predictions: Dictionary mapping image IDs to prediction dictionaries.
+            iou_type: Type of evaluation ("bbox", "segm").
+
+        Returns:
+            List of COCO-format prediction dictionaries.
+        """
+        if iou_type == "bbox":
+            return self.prepare_for_coco_detection(predictions)
+        elif iou_type == "segm":
+            return self.prepare_for_coco_segmentation(predictions)
+        else:
+            raise ValueError(f"Unknown iou type: {iou_type}")
+
+    def prepare_for_coco_detection(self, predictions):
+        """
+        Convert predictions to COCO detection format.
+
+        Args:
+            predictions: Dictionary mapping image IDs to prediction dictionaries
+                containing "boxes", "scores", and "labels".
+
+        Returns:
+            List of COCO-format detection dictionaries.
+        """
+        coco_results = []
+        for original_id, prediction in predictions.items():
+            if len(prediction) == 0:
+                continue
+
+            boxes = prediction["boxes"]
+            boxes = convert_to_xywh(boxes).tolist()
+            scores = prediction["scores"].tolist()
+            labels = prediction["labels"].tolist()
+
+            coco_results.extend(
+                [
+                    {
+                        "image_id": original_id,
+                        "category_id": labels[k],
+                        "bbox": box,
+                        "score": scores[k],
+                    }
+                    for k, box in enumerate(boxes)
+                ]
+            )
+        return coco_results
+
+    @torch.no_grad()
+    def prepare_for_coco_segmentation(self, predictions):
+        """
+        Convert predictions to COCO segmentation format.
+
+        Args:
+            predictions: Dictionary mapping image IDs to prediction dictionaries
+                containing "masks" or "masks_rle", "scores", and "labels".
+                Optionally includes "boundaries" and "dilated_boundaries".
+
+        Returns:
+            List of COCO-format segmentation dictionaries with RLE-encoded masks.
+        """
+        coco_results = []
+        for original_id, prediction in predictions.items():
+            if len(prediction) == 0:
+                continue
+
+            scores = prediction["scores"].tolist()
+            labels = prediction["labels"].tolist()
+
+            boxes = None
+            if "boxes" in prediction:
+                boxes = prediction["boxes"]
+                boxes = convert_to_xywh(boxes).tolist()
+                assert len(boxes) == len(scores)
+
+            if "masks_rle" in prediction:
+                rles = prediction["masks_rle"]
+                areas = []
+                for rle in rles:
+                    cur_area = mask_utils.area(rle)
+                    h, w = rle["size"]
+                    areas.append(cur_area / (h * w))
+            else:
+                masks = prediction["masks"]
+                masks = masks > 0.5
+                h, w = masks.shape[-2:]
+
+                areas = masks.flatten(1).sum(1) / (h * w)
+                areas = areas.tolist()
+
+                rles = rle_encode(masks.squeeze(1))
+
+                # Memory cleanup
+                del masks
+                del prediction["masks"]
+
+            assert len(areas) == len(rles) == len(scores)
+
+            for k, rle in enumerate(rles):
+                payload = {
+                    "image_id": original_id,
+                    "category_id": labels[k],
+                    "segmentation": rle,
+                    "score": scores[k],
+                    "area": areas[k],
+                }
+                if boxes is not None:
+                    payload["bbox"] = boxes[k]
+
+                coco_results.append(payload)
+
+        return coco_results
diff --git a/sam3/eval/conversion_util.py b/sam3/eval/conversion_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..68942b6a52a1e8d71661408a8c3d885398e919cc
--- /dev/null
+++ b/sam3/eval/conversion_util.py
@@ -0,0 +1,211 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+import json
+import os
+from collections import defaultdict
+
+from tqdm import tqdm
+
+
+def convert_ytbvis_to_cocovid_gt(ann_json, save_path=None):
+    """Convert YouTube VIS dataset to COCO-style video instance segmentation format.
+
+    Args:
+        ann_json (str): Path to YouTube VIS annotation JSON file
+        save_path (str): path to save converted COCO-style JSON
+    """
+    # Initialize COCO structure
+    VIS = {
+        "info": {},
+        "images": [],
+        "videos": [],
+        "tracks": [],
+        "annotations": [],
+        "categories": [],
+        "licenses": [],
+    }
+
+    # Load original annotations
+    official_anns = json.load(open(ann_json))
+    VIS["categories"] = official_anns["categories"]  # Direct copy categories
+
+    # Initialize counters
+    records = dict(img_id=1, ann_id=1)
+
+    # Create video-to-annotations mapping
+    vid_to_anns = defaultdict(list)
+    for ann in official_anns["annotations"]:
+        vid_to_anns[ann["video_id"]].append(ann)
+
+    # Create tracks directly
+    VIS["tracks"] = [
+        {
+            "id": ann["id"],
+            "category_id": ann["category_id"],
+            "video_id": ann["video_id"],
+        }
+        for ann in official_anns["annotations"]
+    ]
+
+    # Process videos
+    for video_info in tqdm(official_anns["videos"]):
+        # Create video entry
+        video = {
+            "id": video_info["id"],
+            "name": os.path.dirname(video_info["file_names"][0]),
+            "width": video_info["width"],
+            "height": video_info["height"],
+            "length": video_info["length"],
+            "neg_category_ids": [],
+            "not_exhaustive_category_ids": [],
+        }
+        VIS["videos"].append(video)
+
+        # Process frames
+        num_frames = len(video_info["file_names"])
+        for frame_idx in range(num_frames):
+            # Create image entry
+            image = {
+                "id": records["img_id"],
+                "video_id": video_info["id"],
+                "file_name": video_info["file_names"][frame_idx],
+                "width": video_info["width"],
+                "height": video_info["height"],
+                "frame_index": frame_idx,
+                "frame_id": frame_idx,
+            }
+            VIS["images"].append(image)
+
+            # Process annotations for this frame
+            if video_info["id"] in vid_to_anns:
+                for ann in vid_to_anns[video_info["id"]]:
+                    bbox = ann["bboxes"][frame_idx]
+                    if bbox is None:
+                        continue
+
+                    # Create annotation entry
+                    annotation = {
+                        "id": records["ann_id"],
+                        "video_id": video_info["id"],
+                        "image_id": records["img_id"],
+                        "track_id": ann["id"],
+                        "category_id": ann["category_id"],
+                        "bbox": bbox,
+                        "area": ann["areas"][frame_idx],
+                        "segmentation": ann["segmentations"][frame_idx],
+                        "iscrowd": ann["iscrowd"],
+                    }
+                    VIS["annotations"].append(annotation)
+                    records["ann_id"] += 1
+
+            records["img_id"] += 1
+
+    # Print summary
+    print(f"Converted {len(VIS['videos'])} videos")
+    print(f"Converted {len(VIS['images'])} images")
+    print(f"Created {len(VIS['tracks'])} tracks")
+    print(f"Created {len(VIS['annotations'])} annotations")
+
+    if save_path is None:
+        return VIS
+
+    # Save output
+    save_dir = os.path.dirname(save_path)
+    os.makedirs(save_dir, exist_ok=True)
+    json.dump(VIS, open(save_path, "w"))
+
+    return VIS
+
+
+def convert_ytbvis_to_cocovid_pred(
+    youtubevis_pred_path: str, converted_dataset_path: str, output_path: str
+) -> None:
+    """
+    Convert YouTubeVIS predictions to COCO format with video_id preservation
+
+    Args:
+        youtubevis_pred_path: Path to YouTubeVIS prediction JSON
+        converted_dataset_path: Path to converted COCO dataset JSON
+        output_path: Path to save COCO format predictions
+    """
+
+    # Load YouTubeVIS predictions
+    with open(youtubevis_pred_path) as f:
+        ytv_predictions = json.load(f)
+
+    # Load converted dataset for image ID mapping
+    with open(converted_dataset_path) as f:
+        coco_dataset = json.load(f)
+
+    # Create (video_id, frame_idx) -> image_id mapping
+    image_id_map = {
+        (img["video_id"], img["frame_index"]): img["id"]
+        for img in coco_dataset["images"]
+    }
+
+    coco_annotations = []
+    track_id_counter = 1  # Unique track ID generator
+
+    for pred in tqdm(ytv_predictions):
+        video_id = pred["video_id"]
+        category_id = pred["category_id"]
+        bboxes = pred["bboxes"]
+        segmentations = pred.get("segmentations", [])  # Get segmentations if available
+        areas = pred.get("areas", [])  # Get areas if available
+        score = pred["score"]
+
+        # Assign unique track ID for this prediction
+        track_id = track_id_counter
+        track_id_counter += 1
+
+        # Ensure segmentations and areas have the same length as bboxes
+        if len(segmentations) == 0:
+            segmentations = [None] * len(bboxes)
+        if len(areas) == 0:
+            areas = [None] * len(bboxes)
+
+        for frame_idx, (bbox, segmentation, area_from_pred) in enumerate(
+            zip(bboxes, segmentations, areas)
+        ):
+            # Skip frames with missing objects (None or zero bbox)
+            if bbox is None or all(x == 0 for x in bbox):
+                continue
+
+            # Get corresponding image ID from mapping
+            image_id = image_id_map.get((video_id, frame_idx))
+            if image_id is None:
+                raise RuntimeError(
+                    f"prediction {video_id=}, {frame_idx=} does not match any images in the converted COCO format"
+                )
+
+            # Extract bbox coordinates
+            x, y, w, h = bbox
+
+            # Calculate area - use area from prediction if available, otherwise from bbox
+            if area_from_pred is not None and area_from_pred > 0:
+                area = area_from_pred
+            else:
+                area = w * h
+
+            # Create COCO annotation with video_id
+            coco_annotation = {
+                "image_id": int(image_id),
+                "video_id": video_id,  # Added video_id field
+                "track_id": track_id,
+                "category_id": category_id,
+                "bbox": [float(x), float(y), float(w), float(h)],
+                "area": float(area),
+                "iscrowd": 0,
+                "score": float(score),
+            }
+
+            # Add segmentation if available
+            if segmentation is not None:
+                coco_annotation["segmentation"] = segmentation
+
+            coco_annotations.append(coco_annotation)
+
+    # Save output
+    with open(output_path, "w") as f:
+        json.dump(coco_annotations, f)
+
+    print(f"Converted {len(coco_annotations)} predictions to COCO format with video_id")
diff --git a/sam3/eval/demo_eval.py b/sam3/eval/demo_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..70804ccd17a3b4772490e0c21ef2ff6e2307ef97
--- /dev/null
+++ b/sam3/eval/demo_eval.py
@@ -0,0 +1,658 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+This evaluator is based upon COCO evaluation, but evaluates the model in a "demo" setting.
+This means that the model's predictions are thresholded and evaluated as "hard" predictions.
+"""
+
+import logging
+from typing import Optional
+
+import numpy as np
+import pycocotools.mask as maskUtils
+from pycocotools.cocoeval import COCOeval
+
+from sam3.eval.coco_eval import CocoEvaluator
+from sam3.train.masks_ops import compute_F_measure
+from sam3.train.utils.distributed import is_main_process
+
+from scipy.optimize import linear_sum_assignment
+
+
+class DemoEval(COCOeval):
+    """
+    This evaluator is based upon COCO evaluation, but evaluates the model in a "demo" setting.
+    This means that the model's predictions are thresholded and evaluated as "hard" predictions.
+    """
+
+    def __init__(
+        self,
+        coco_gt=None,
+        coco_dt=None,
+        iouType="bbox",
+        threshold=0.5,
+        compute_JnF=False,
+    ):
+        """
+        Args:
+            coco_gt (COCO): ground truth COCO API
+            coco_dt (COCO): detections COCO API
+            iou_type (str): type of IoU to evaluate
+            threshold (float): threshold for predictions
+        """
+        super().__init__(coco_gt, coco_dt, iouType)
+        self.threshold = threshold
+
+        self.params.useCats = False
+        self.params.areaRng = [[0**2, 1e5**2]]
+        self.params.areaRngLbl = ["all"]
+        self.params.maxDets = [100000]
+        self.compute_JnF = compute_JnF
+
+    def computeIoU(self, imgId, catId):
+        # Same as the original COCOeval.computeIoU, but without sorting
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId, catId]
+            dt = self._dts[imgId, catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+        if len(gt) == 0 and len(dt) == 0:
+            return []
+
+        if p.iouType == "segm":
+            g = [g["segmentation"] for g in gt]
+            d = [d["segmentation"] for d in dt]
+        elif p.iouType == "bbox":
+            g = [g["bbox"] for g in gt]
+            d = [d["bbox"] for d in dt]
+        else:
+            raise Exception("unknown iouType for iou computation")
+
+        # compute iou between each dt and gt region
+        iscrowd = [int(o["iscrowd"]) for o in gt]
+        ious = maskUtils.iou(d, g, iscrowd)
+        return ious
+
+    def evaluateImg(self, imgId, catId, aRng, maxDet):
+        """
+        perform evaluation for single category and image
+        :return: dict (single image results)
+        """
+        p = self.params
+        assert not p.useCats, "This evaluator does not support per-category evaluation."
+        assert catId == -1
+        all_gts = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+        keep_gt = np.array([not g["ignore"] for g in all_gts], dtype=bool)
+        gt = [g for g in all_gts if not g["ignore"]]
+        all_dts = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+        keep_dt = np.array([d["score"] >= self.threshold for d in all_dts], dtype=bool)
+        dt = [d for d in all_dts if d["score"] >= self.threshold]
+        if len(gt) == 0 and len(dt) == 0:
+            # This is a "true negative" case, where there are no GTs and no predictions
+            # The box-level metrics are ill-defined, so we don't add them to this dict
+            return {
+                "image_id": imgId,
+                "IL_TP": 0,
+                "IL_TN": 1,
+                "IL_FP": 0,
+                "IL_FN": 0,
+                "IL_perfect_neg": np.ones((len(p.iouThrs),), dtype=np.int64),
+                "num_dt": len(dt),
+            }
+
+        if len(gt) > 0 and len(dt) == 0:
+            # This is a "false negative" case, where there are GTs but no predictions
+            return {
+                "image_id": imgId,
+                "IL_TP": 0,
+                "IL_TN": 0,
+                "IL_FP": 0,
+                "IL_FN": 1,
+                "TPs": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "FPs": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "FNs": np.ones((len(p.iouThrs),), dtype=np.int64) * len(gt),
+                "local_F1s": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "local_positive_F1s": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "IL_perfect_pos": np.zeros((len(p.iouThrs),), dtype=np.int64),
+                "num_dt": len(dt),
+            }
+
+        # Load pre-computed ious
+        ious = self.ious[(imgId, catId)]
+
+        # compute matching
+        if len(ious) == 0:
+            ious = np.zeros((len(dt), len(gt)))
+        else:
+            ious = ious[keep_dt, :][:, keep_gt]
+        assert ious.shape == (len(dt), len(gt))
+
+        matched_dt, matched_gt = linear_sum_assignment(-ious)
+
+        match_scores = ious[matched_dt, matched_gt]
+
+        if self.compute_JnF and len(match_scores) > 0:
+            j_score = match_scores.mean()
+            f_measure = 0
+            for dt_id, gt_id in zip(matched_dt, matched_gt):
+                f_measure += compute_F_measure(
+                    gt_boundary_rle=gt[gt_id]["boundary"],
+                    gt_dilated_boundary_rle=gt[gt_id]["dilated_boundary"],
+                    dt_boundary_rle=dt[dt_id]["boundary"],
+                    dt_dilated_boundary_rle=dt[dt_id]["dilated_boundary"],
+                )
+            f_measure /= len(match_scores) + 1e-9
+            JnF = (j_score + f_measure) * 0.5
+        else:
+            j_score = f_measure = JnF = -1
+
+        TPs, FPs, FNs = [], [], []
+        IL_perfect = []
+        for thresh in p.iouThrs:
+            TP = (match_scores >= thresh).sum()
+            FP = len(dt) - TP
+            FN = len(gt) - TP
+            assert (
+                FP >= 0 and FN >= 0
+            ), f"FP: {FP}, FN: {FN}, TP: {TP}, match_scores: {match_scores}, len(dt): {len(dt)}, len(gt): {len(gt)}, ious: {ious}"
+            TPs.append(TP)
+            FPs.append(FP)
+            FNs.append(FN)
+
+            if FP == FN and FP == 0:
+                IL_perfect.append(1)
+            else:
+                IL_perfect.append(0)
+
+        TPs = np.array(TPs, dtype=np.int64)
+        FPs = np.array(FPs, dtype=np.int64)
+        FNs = np.array(FNs, dtype=np.int64)
+        IL_perfect = np.array(IL_perfect, dtype=np.int64)
+
+        # compute precision recall and F1
+        precision = TPs / (TPs + FPs + 1e-4)
+        assert np.all(precision <= 1)
+        recall = TPs / (TPs + FNs + 1e-4)
+        assert np.all(recall <= 1)
+        F1 = 2 * precision * recall / (precision + recall + 1e-4)
+
+        result = {
+            "image_id": imgId,
+            "TPs": TPs,
+            "FPs": FPs,
+            "FNs": FNs,
+            "local_F1s": F1,
+            "IL_TP": (len(gt) > 0) and (len(dt) > 0),
+            "IL_FP": (len(gt) == 0) and (len(dt) > 0),
+            "IL_TN": (len(gt) == 0) and (len(dt) == 0),
+            "IL_FN": (len(gt) > 0) and (len(dt) == 0),
+            ("IL_perfect_pos" if len(gt) > 0 else "IL_perfect_neg"): IL_perfect,
+            "F": f_measure,
+            "J": j_score,
+            "J&F": JnF,
+            "num_dt": len(dt),
+        }
+        if len(gt) > 0 and len(dt) > 0:
+            result["local_positive_F1s"] = F1
+        return result
+
+    def accumulate(self, p=None):
+        """
+        Accumulate per image evaluation results and store the result in self.eval
+        :param p: input params for evaluation
+        :return: None
+        """
+        if not self.evalImgs:
+            print("Please run evaluate() first")
+        # allows input customized parameters
+        if p is None:
+            p = self.params
+
+        setImgIds = set(p.imgIds)
+
+        # TPs, FPs, FNs
+        TPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        FPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        pmFPs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        FNs = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        local_F1s = np.zeros((len(p.iouThrs),), dtype=np.float64)
+
+        # Image level metrics
+        IL_TPs = 0
+        IL_FPs = 0
+        IL_TNs = 0
+        IL_FNs = 0
+        IL_perfects_neg = np.zeros((len(p.iouThrs),), dtype=np.int64)
+        IL_perfects_pos = np.zeros((len(p.iouThrs),), dtype=np.int64)
+
+        # JnF metric
+        total_J = 0
+        total_F = 0
+        total_JnF = 0
+
+        valid_img_count = 0
+        total_pos_count = 0
+        total_neg_count = 0
+        valid_J_count = 0
+        valid_F1_count = 0
+        valid_F1_count_w0dt = 0
+        for res in self.evalImgs:
+            if res["image_id"] not in setImgIds:
+                continue
+            IL_TPs += res["IL_TP"]
+            IL_FPs += res["IL_FP"]
+            IL_TNs += res["IL_TN"]
+            IL_FNs += res["IL_FN"]
+            if "IL_perfect_neg" in res:
+                IL_perfects_neg += res["IL_perfect_neg"]
+                total_neg_count += 1
+            else:
+                assert "IL_perfect_pos" in res
+                IL_perfects_pos += res["IL_perfect_pos"]
+                total_pos_count += 1
+
+            if "TPs" not in res:
+                continue
+
+            TPs += res["TPs"]
+            FPs += res["FPs"]
+            FNs += res["FNs"]
+            valid_img_count += 1
+
+            if "local_positive_F1s" in res:
+                local_F1s += res["local_positive_F1s"]
+                pmFPs += res["FPs"]
+                valid_F1_count_w0dt += 1
+                if res["num_dt"] > 0:
+                    valid_F1_count += 1
+
+            if "J" in res and res["J"] > -1e-9:
+                total_J += res["J"]
+                total_F += res["F"]
+                total_JnF += res["J&F"]
+                valid_J_count += 1
+
+        # compute precision recall and F1
+        precision = TPs / (TPs + FPs + 1e-4)
+        positive_micro_precision = TPs / (TPs + pmFPs + 1e-4)
+        assert np.all(precision <= 1)
+        recall = TPs / (TPs + FNs + 1e-4)
+        assert np.all(recall <= 1)
+        F1 = 2 * precision * recall / (precision + recall + 1e-4)
+        positive_micro_F1 = (
+            2
+            * positive_micro_precision
+            * recall
+            / (positive_micro_precision + recall + 1e-4)
+        )
+
+        IL_rec = IL_TPs / (IL_TPs + IL_FNs + 1e-6)
+        IL_prec = IL_TPs / (IL_TPs + IL_FPs + 1e-6)
+        IL_F1 = 2 * IL_prec * IL_rec / (IL_prec + IL_rec + 1e-6)
+        IL_FPR = IL_FPs / (IL_FPs + IL_TNs + 1e-6)
+        IL_MCC = float(IL_TPs * IL_TNs - IL_FPs * IL_FNs) / (
+            (
+                float(IL_TPs + IL_FPs)
+                * float(IL_TPs + IL_FNs)
+                * float(IL_TNs + IL_FPs)
+                * float(IL_TNs + IL_FNs)
+            )
+            ** 0.5
+            + 1e-6
+        )
+        IL_perfect_pos = IL_perfects_pos / (total_pos_count + 1e-9)
+        IL_perfect_neg = IL_perfects_neg / (total_neg_count + 1e-9)
+
+        total_J = total_J / (valid_J_count + 1e-9)
+        total_F = total_F / (valid_J_count + 1e-9)
+        total_JnF = total_JnF / (valid_J_count + 1e-9)
+
+        self.eval = {
+            "params": p,
+            "TPs": TPs,
+            "FPs": FPs,
+            "positive_micro_FPs": pmFPs,
+            "FNs": FNs,
+            "precision": precision,
+            "positive_micro_precision": positive_micro_precision,
+            "recall": recall,
+            "F1": F1,
+            "positive_micro_F1": positive_micro_F1,
+            "positive_macro_F1": local_F1s / valid_F1_count,
+            "positive_w0dt_macro_F1": local_F1s / valid_F1_count_w0dt,
+            "IL_recall": IL_rec,
+            "IL_precision": IL_prec,
+            "IL_F1": IL_F1,
+            "IL_FPR": IL_FPR,
+            "IL_MCC": IL_MCC,
+            "IL_perfect_pos": IL_perfect_pos,
+            "IL_perfect_neg": IL_perfect_neg,
+            "J": total_J,
+            "F": total_F,
+            "J&F": total_JnF,
+        }
+        self.eval["CGF1"] = self.eval["positive_macro_F1"] * self.eval["IL_MCC"]
+        self.eval["CGF1_w0dt"] = (
+            self.eval["positive_w0dt_macro_F1"] * self.eval["IL_MCC"]
+        )
+        self.eval["CGF1_micro"] = self.eval["positive_micro_F1"] * self.eval["IL_MCC"]
+
+    def summarize(self):
+        """
+        Compute and display summary metrics for evaluation results.
+        Note this functin can *only* be applied on the default parameter setting
+        """
+        if not self.eval:
+            raise Exception("Please run accumulate() first")
+
+        def _summarize(iouThr=None, metric=""):
+            p = self.params
+            iStr = " {:<18} @[ IoU={:<9}] = {:0.3f}"
+            titleStr = "Average " + metric
+            iouStr = (
+                "{:0.2f}:{:0.2f}".format(p.iouThrs[0], p.iouThrs[-1])
+                if iouThr is None
+                else "{:0.2f}".format(iouThr)
+            )
+
+            s = self.eval[metric]
+            # IoU
+            if iouThr is not None:
+                t = np.where(iouThr == p.iouThrs)[0]
+                s = s[t]
+
+            if len(s[s > -1]) == 0:
+                mean_s = -1
+            else:
+                mean_s = np.mean(s[s > -1])
+            print(iStr.format(titleStr, iouStr, mean_s))
+            return mean_s
+
+        def _summarize_single(metric=""):
+            titleStr = "Average " + metric
+            iStr = " {:<35} = {:0.3f}"
+            s = self.eval[metric]
+            print(iStr.format(titleStr, s))
+            return s
+
+        def _summarizeDets():
+            # note: the index of these metrics are also used in video Demo F1 evaluation
+            # when adding new metrics, please update the index in video Demo F1 evaluation
+            # in "evaluate" method of the "VideoDemoF1Evaluator" class
+            stats = np.zeros((len(DEMO_METRICS),))
+            stats[0] = _summarize(metric="CGF1")
+            stats[1] = _summarize(metric="precision")
+            stats[2] = _summarize(metric="recall")
+            stats[3] = _summarize(metric="F1")
+            stats[4] = _summarize(metric="positive_macro_F1")
+            stats[5] = _summarize_single(metric="IL_precision")
+            stats[6] = _summarize_single(metric="IL_recall")
+            stats[7] = _summarize_single(metric="IL_F1")
+            stats[8] = _summarize_single(metric="IL_FPR")
+            stats[9] = _summarize_single(metric="IL_MCC")
+            stats[10] = _summarize(metric="IL_perfect_pos")
+            stats[11] = _summarize(metric="IL_perfect_neg")
+            stats[12] = _summarize(iouThr=0.5, metric="CGF1")
+            stats[13] = _summarize(iouThr=0.5, metric="precision")
+            stats[14] = _summarize(iouThr=0.5, metric="recall")
+            stats[15] = _summarize(iouThr=0.5, metric="F1")
+            stats[16] = _summarize(iouThr=0.5, metric="positive_macro_F1")
+            stats[17] = _summarize(iouThr=0.5, metric="IL_perfect_pos")
+            stats[18] = _summarize(iouThr=0.5, metric="IL_perfect_neg")
+            stats[19] = _summarize(iouThr=0.75, metric="CGF1")
+            stats[20] = _summarize(iouThr=0.75, metric="precision")
+            stats[21] = _summarize(iouThr=0.75, metric="recall")
+            stats[22] = _summarize(iouThr=0.75, metric="F1")
+            stats[23] = _summarize(iouThr=0.75, metric="positive_macro_F1")
+            stats[24] = _summarize(iouThr=0.75, metric="IL_perfect_pos")
+            stats[25] = _summarize(iouThr=0.75, metric="IL_perfect_neg")
+            stats[26] = _summarize_single(metric="J")
+            stats[27] = _summarize_single(metric="F")
+            stats[28] = _summarize_single(metric="J&F")
+            stats[29] = _summarize(metric="CGF1_micro")
+            stats[30] = _summarize(metric="positive_micro_precision")
+            stats[31] = _summarize(metric="positive_micro_F1")
+            stats[32] = _summarize(iouThr=0.5, metric="CGF1_micro")
+            stats[33] = _summarize(iouThr=0.5, metric="positive_micro_precision")
+            stats[34] = _summarize(iouThr=0.5, metric="positive_micro_F1")
+            stats[35] = _summarize(iouThr=0.75, metric="CGF1_micro")
+            stats[36] = _summarize(iouThr=0.75, metric="positive_micro_precision")
+            stats[37] = _summarize(iouThr=0.75, metric="positive_micro_F1")
+            stats[38] = _summarize(metric="CGF1_w0dt")
+            stats[39] = _summarize(metric="positive_w0dt_macro_F1")
+            stats[40] = _summarize(iouThr=0.5, metric="CGF1_w0dt")
+            stats[41] = _summarize(iouThr=0.5, metric="positive_w0dt_macro_F1")
+            stats[42] = _summarize(iouThr=0.75, metric="CGF1_w0dt")
+            stats[43] = _summarize(iouThr=0.75, metric="positive_w0dt_macro_F1")
+            return stats
+
+        summarize = _summarizeDets
+        self.stats = summarize()
+
+
+DEMO_METRICS = [
+    "CGF1",
+    "Precision",
+    "Recall",
+    "F1",
+    "Macro_F1",
+    "IL_Precision",
+    "IL_Recall",
+    "IL_F1",
+    "IL_FPR",
+    "IL_MCC",
+    "IL_perfect_pos",
+    "IL_perfect_neg",
+    "CGF1@0.5",
+    "Precision@0.5",
+    "Recall@0.5",
+    "F1@0.5",
+    "Macro_F1@0.5",
+    "IL_perfect_pos@0.5",
+    "IL_perfect_neg@0.5",
+    "CGF1@0.75",
+    "Precision@0.75",
+    "Recall@0.75",
+    "F1@0.75",
+    "Macro_F1@0.75",
+    "IL_perfect_pos@0.75",
+    "IL_perfect_neg@0.75",
+    "J",
+    "F",
+    "J&F",
+    "CGF1_micro",
+    "positive_micro_Precision",
+    "positive_micro_F1",
+    "CGF1_micro@0.5",
+    "positive_micro_Precision@0.5",
+    "positive_micro_F1@0.5",
+    "CGF1_micro@0.75",
+    "positive_micro_Precision@0.75",
+    "positive_micro_F1@0.75",
+    "CGF1_w0dt",
+    "positive_w0dt_macro_F1",
+    "CGF1_w0dt@0.5",
+    "positive_w0dt_macro_F1@0.5",
+    "CGF1_w0dt@0.75",
+    "positive_w0dt_macro_F1@0.75",
+]
+
+
+class DemoEvaluator(CocoEvaluator):
+    def __init__(
+        self,
+        coco_gt,
+        iou_types,
+        dump_dir: Optional[str],
+        postprocessor,
+        threshold=0.5,
+        average_by_rarity=False,
+        gather_pred_via_filesys=False,
+        exhaustive_only=False,
+        all_exhaustive_only=True,
+        compute_JnF=False,
+        metrics_dump_dir: Optional[str] = None,
+    ):
+        self.iou_types = iou_types
+        self.threshold = threshold
+        super().__init__(
+            coco_gt=coco_gt,
+            iou_types=iou_types,
+            useCats=False,
+            dump_dir=dump_dir,
+            postprocessor=postprocessor,
+            # average_by_rarity=average_by_rarity,
+            gather_pred_via_filesys=gather_pred_via_filesys,
+            exhaustive_only=exhaustive_only,
+            all_exhaustive_only=all_exhaustive_only,
+            metrics_dump_dir=metrics_dump_dir,
+        )
+
+        self.use_self_evaluate = True
+        self.compute_JnF = compute_JnF
+
+    def _lazy_init(self):
+        if self.initialized:
+            return
+        super()._lazy_init()
+        self.use_self_evaluate = True
+        self.reset()
+
+    def select_best_scoring(self, scorings):
+        # This function is used for "oracle" type evaluation.
+        # It accepts the evaluation results with respect to several ground truths, and picks the best
+        if len(scorings) == 1:
+            return scorings[0]
+
+        assert (
+            scorings[0].ndim == 3
+        ), f"Expecting results in [numCats, numAreas, numImgs] format, got {scorings[0].shape}"
+        assert (
+            scorings[0].shape[0] == 1
+        ), f"Expecting a single category, got {scorings[0].shape[0]}"
+
+        for scoring in scorings:
+            assert (
+                scoring.shape == scorings[0].shape
+            ), f"Shape mismatch: {scoring.shape}, {scorings[0].shape}"
+
+        selected_imgs = []
+        for img_id in range(scorings[0].shape[-1]):
+            best = scorings[0][:, :, img_id]
+
+            for scoring in scorings[1:]:
+                current = scoring[:, :, img_id]
+                if "local_F1s" in best[0, 0] and "local_F1s" in current[0, 0]:
+                    # we were able to compute a F1 score for this particular image in both evaluations
+                    # best["local_F1s"] contains the results at various IoU thresholds. We simply take the average for comparision
+                    best_score = best[0, 0]["local_F1s"].mean()
+                    current_score = current[0, 0]["local_F1s"].mean()
+                    if current_score > best_score:
+                        best = current
+
+                else:
+                    # If we're here, it means that in that in some evaluation we were not able to get a valid local F1
+                    # This happens when both the predictions and targets are empty. In that case, we can assume it's a perfect prediction
+                    if "local_F1s" not in current[0, 0]:
+                        best = current
+            selected_imgs.append(best)
+        result = np.stack(selected_imgs, axis=-1)
+        assert result.shape == scorings[0].shape
+        return result
+
+    def summarize(self):
+        self._lazy_init()
+        logging.info("Demo evaluator: Summarizing")
+        if not is_main_process():
+            return {}
+        outs = {}
+        prefix = "oracle_" if len(self.coco_evals) > 1 else ""
+        # if self.rarity_buckets is None:
+        self.accumulate(self.eval_img_ids)
+        for iou_type, coco_eval in self.coco_evals[0].items():
+            print("Demo metric, IoU type={}".format(iou_type))
+            coco_eval.summarize()
+
+        if "bbox" in self.coco_evals[0]:
+            for i, value in enumerate(self.coco_evals[0]["bbox"].stats):
+                outs[f"coco_eval_bbox_{prefix}{DEMO_METRICS[i]}"] = value
+        if "segm" in self.coco_evals[0]:
+            for i, value in enumerate(self.coco_evals[0]["segm"].stats):
+                outs[f"coco_eval_masks_{prefix}{DEMO_METRICS[i]}"] = value
+        # else:
+        #     total_stats = {}
+        #     for bucket, img_list in self.rarity_buckets.items():
+        #         self.accumulate(imgIds=img_list)
+        #         bucket_name = RARITY_BUCKETS[bucket]
+        #         for iou_type, coco_eval in self.coco_evals[0].items():
+        #             print(
+        #                 "Demo metric, IoU type={}, Rarity bucket={}".format(
+        #                     iou_type, bucket_name
+        #                 )
+        #             )
+        #             coco_eval.summarize()
+
+        #         if "bbox" in self.coco_evals[0]:
+        #             if "bbox" not in total_stats:
+        #                 total_stats["bbox"] = np.zeros_like(
+        #                     self.coco_evals[0]["bbox"].stats
+        #                 )
+        #             total_stats["bbox"] += self.coco_evals[0]["bbox"].stats
+        #             for i, value in enumerate(self.coco_evals[0]["bbox"].stats):
+        #                 outs[
+        #                     f"coco_eval_bbox_{bucket_name}_{prefix}{DEMO_METRICS[i]}"
+        #                 ] = value
+        #         if "segm" in self.coco_evals[0]:
+        #             if "segm" not in total_stats:
+        #                 total_stats["segm"] = np.zeros_like(
+        #                     self.coco_evals[0]["segm"].stats
+        #                 )
+        #             total_stats["segm"] += self.coco_evals[0]["segm"].stats
+        #             for i, value in enumerate(self.coco_evals[0]["segm"].stats):
+        #                 outs[
+        #                     f"coco_eval_masks_{bucket_name}_{prefix}{DEMO_METRICS[i]}"
+        #                 ] = value
+
+        #     if "bbox" in total_stats:
+        #         total_stats["bbox"] /= len(self.rarity_buckets)
+        #         for i, value in enumerate(total_stats["bbox"]):
+        #             outs[f"coco_eval_bbox_{prefix}{DEMO_METRICS[i]}"] = value
+        #     if "segm" in total_stats:
+        #         total_stats["segm"] /= len(self.rarity_buckets)
+        #         for i, value in enumerate(total_stats["segm"]):
+        #             outs[f"coco_eval_masks_{prefix}{DEMO_METRICS[i]}"] = value
+
+        return outs
+
+    def accumulate(self, imgIds=None):
+        self._lazy_init()
+        logging.info(
+            f"demo evaluator: Accumulating on {len(imgIds) if imgIds is not None else 'all'} images"
+        )
+        if not is_main_process():
+            return
+
+        if imgIds is not None:
+            for coco_eval in self.coco_evals[0].values():
+                coco_eval.params.imgIds = list(imgIds)
+
+        for coco_eval in self.coco_evals[0].values():
+            coco_eval.accumulate()
+
+    def reset(self):
+        self.coco_evals = [{} for _ in range(len(self.coco_gts))]
+        for i, coco_gt in enumerate(self.coco_gts):
+            for iou_type in self.iou_types:
+                self.coco_evals[i][iou_type] = DemoEval(
+                    coco_gt=coco_gt,
+                    iouType=iou_type,
+                    threshold=self.threshold,
+                    compute_JnF=self.compute_JnF,
+                )
+                self.coco_evals[i][iou_type].useCats = False
+        self.img_ids = []
+        self.eval_imgs = {k: [] for k in self.iou_types}
+        if self.dump is not None:
+            self.dump = []
diff --git a/sam3/eval/hota_eval_toolkit/__init__.py b/sam3/eval/hota_eval_toolkit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c0fa90a19fd599d0fe6668b8f3b75d6c368c500
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/__init__.py
@@ -0,0 +1 @@
+# flake8: noqa
diff --git a/sam3/eval/hota_eval_toolkit/run_ytvis_eval.py b/sam3/eval/hota_eval_toolkit/run_ytvis_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..c39dd05b20299942137189bd5371f54217ac9738
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/run_ytvis_eval.py
@@ -0,0 +1,114 @@
+# flake8: noqa
+
+"""run_youtube_vis.py
+Run example:
+run_youtube_vis.py --USE_PARALLEL False --METRICS HOTA --TRACKERS_TO_EVAL STEm_Seg
+Command Line Arguments: Defaults, # Comments
+    Eval arguments:
+            'USE_PARALLEL': False,
+            'NUM_PARALLEL_CORES': 8,
+            'BREAK_ON_ERROR': True,  # Raises exception and exits with error
+            'RETURN_ON_ERROR': False,  # if not BREAK_ON_ERROR, then returns from function on error
+            'LOG_ON_ERROR': os.path.join(code_path, 'error_log.txt'),  # if not None, save any errors into a log file.
+            'PRINT_RESULTS': True,
+            'PRINT_ONLY_COMBINED': False,
+            'PRINT_CONFIG': True,
+            'TIME_PROGRESS': True,
+            'DISPLAY_LESS_PROGRESS': True,
+            'OUTPUT_SUMMARY': True,
+            'OUTPUT_EMPTY_CLASSES': True,  # If False, summary files are not output for classes with no detections
+            'OUTPUT_DETAILED': True,
+            'PLOT_CURVES': True,
+    Dataset arguments:
+        'GT_FOLDER': os.path.join(code_path, 'data/gt/youtube_vis/youtube_vis_training'),  # Location of GT data
+        'TRACKERS_FOLDER': os.path.join(code_path, 'data/trackers/youtube_vis/youtube_vis_training'),
+        # Trackers location
+        'OUTPUT_FOLDER': None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+        'TRACKERS_TO_EVAL': None,  # Filenames of trackers to eval (if None, all in folder)
+        'CLASSES_TO_EVAL': None,  # Classes to eval (if None, all classes)
+        'SPLIT_TO_EVAL': 'training',  # Valid: 'training', 'val'
+        'PRINT_CONFIG': True,  # Whether to print current config
+        'OUTPUT_SUB_FOLDER': '',  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+        'TRACKER_SUB_FOLDER': 'data',  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+        'TRACKER_DISPLAY_NAMES': None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+    Metric arguments:
+        'METRICS': ['TrackMAP', 'HOTA', 'CLEAR', 'Identity']
+"""
+
+import argparse
+import os
+import sys
+from multiprocessing import freeze_support
+
+from . import trackeval
+
+
+def run_ytvis_eval(args=None, gt_json=None, dt_json=None):
+    # Command line interface:
+    default_eval_config = trackeval.Evaluator.get_default_eval_config()
+    # print only combined since TrackMAP is undefined for per sequence breakdowns
+    default_eval_config["PRINT_ONLY_COMBINED"] = True
+    default_dataset_config = trackeval.datasets.YouTubeVIS.get_default_dataset_config()
+    default_metrics_config = {"METRICS": ["HOTA"]}
+    config = {
+        **default_eval_config,
+        **default_dataset_config,
+        **default_metrics_config,
+    }  # Merge default configs
+    parser = argparse.ArgumentParser()
+    for setting in config.keys():
+        if type(config[setting]) == list or type(config[setting]) == type(None):
+            parser.add_argument("--" + setting, nargs="+")
+        else:
+            parser.add_argument("--" + setting)
+    args = parser.parse_args(args).__dict__
+    for setting in args.keys():
+        if args[setting] is not None:
+            if type(config[setting]) == type(True):
+                if args[setting] == "True":
+                    x = True
+                elif args[setting] == "False":
+                    x = False
+                else:
+                    raise Exception(
+                        "Command line parameter " + setting + "must be True or False"
+                    )
+            elif type(config[setting]) == type(1):
+                x = int(args[setting])
+            elif type(args[setting]) == type(None):
+                x = None
+            else:
+                x = args[setting]
+            config[setting] = x
+    eval_config = {k: v for k, v in config.items() if k in default_eval_config.keys()}
+    dataset_config = {
+        k: v for k, v in config.items() if k in default_dataset_config.keys()
+    }
+    metrics_config = {
+        k: v for k, v in config.items() if k in default_metrics_config.keys()
+    }
+
+    # Run code
+    evaluator = trackeval.Evaluator(eval_config)
+    # allow directly specifying the GT JSON data and Tracker (result)
+    # JSON data as Python objects, without reading from files.
+    dataset_config["GT_JSON_OBJECT"] = gt_json
+    dataset_config["TRACKER_JSON_OBJECT"] = dt_json
+    dataset_list = [trackeval.datasets.YouTubeVIS(dataset_config)]
+    metrics_list = []
+    # for metric in [trackeval.metrics.TrackMAP, trackeval.metrics.HOTA, trackeval.metrics.CLEAR,
+    #                trackeval.metrics.Identity]:
+    for metric in [trackeval.metrics.HOTA]:
+        if metric.get_name() in metrics_config["METRICS"]:
+            metrics_list.append(metric())
+    if len(metrics_list) == 0:
+        raise Exception("No metrics selected for evaluation")
+    output_res, output_msg = evaluator.evaluate(dataset_list, metrics_list)
+    return output_res, output_msg
+
+
+if __name__ == "__main__":
+    import sys
+
+    freeze_support()
+    run_ytvis_eval(sys.argv[1:])
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/__init__.py b/sam3/eval/hota_eval_toolkit/trackeval/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..131e2b787973b4117f446ef78df1af3b91040068
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/__init__.py
@@ -0,0 +1,4 @@
+# flake8: noqa
+
+from . import datasets, metrics, utils
+from .eval import Evaluator
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/_timing.py b/sam3/eval/hota_eval_toolkit/trackeval/_timing.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad414dd547f0b70fead8bc3e45c3d494d53601fb
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/_timing.py
@@ -0,0 +1,68 @@
+# flake8: noqa
+
+import inspect
+from functools import wraps
+from time import perf_counter
+
+DO_TIMING = False
+DISPLAY_LESS_PROGRESS = False
+timer_dict = {}
+counter = 0
+
+
+def time(f):
+    @wraps(f)
+    def wrap(*args, **kw):
+        if DO_TIMING:
+            # Run function with timing
+            ts = perf_counter()
+            result = f(*args, **kw)
+            te = perf_counter()
+            tt = te - ts
+
+            # Get function name
+            arg_names = inspect.getfullargspec(f)[0]
+            if arg_names[0] == "self" and DISPLAY_LESS_PROGRESS:
+                return result
+            elif arg_names[0] == "self":
+                method_name = type(args[0]).__name__ + "." + f.__name__
+            else:
+                method_name = f.__name__
+
+            # Record accumulative time in each function for analysis
+            if method_name in timer_dict.keys():
+                timer_dict[method_name] += tt
+            else:
+                timer_dict[method_name] = tt
+
+            # If code is finished, display timing summary
+            if method_name == "Evaluator.evaluate":
+                print("")
+                print("Timing analysis:")
+                for key, value in timer_dict.items():
+                    print("%-70s %2.4f sec" % (key, value))
+            else:
+                # Get function argument values for printing special arguments of interest
+                arg_titles = ["tracker", "seq", "cls"]
+                arg_vals = []
+                for i, a in enumerate(arg_names):
+                    if a in arg_titles:
+                        arg_vals.append(args[i])
+                arg_text = "(" + ", ".join(arg_vals) + ")"
+
+                # Display methods and functions with different indentation.
+                if arg_names[0] == "self":
+                    print("%-74s %2.4f sec" % (" " * 4 + method_name + arg_text, tt))
+                elif arg_names[0] == "test":
+                    pass
+                else:
+                    global counter
+                    counter += 1
+                    print("%i %-70s %2.4f sec" % (counter, method_name + arg_text, tt))
+
+            return result
+        else:
+            # If config["TIME_PROGRESS"] is false, or config["USE_PARALLEL"] is true, run functions normally without timing.
+            return f(*args, **kw)
+
+    return wrap
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/datasets/__init__.py b/sam3/eval/hota_eval_toolkit/trackeval/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..42f1620b0dcc1f5481658bb63b00bba0b6cc953d
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/datasets/__init__.py
@@ -0,0 +1,4 @@
+# flake8: noqa
+
+from .tao_ow import TAO_OW
+from .youtube_vis import YouTubeVIS
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/datasets/_base_dataset.py b/sam3/eval/hota_eval_toolkit/trackeval/datasets/_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb68025e4cb38b3d50436d8a95850f8ad62e9268
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/datasets/_base_dataset.py
@@ -0,0 +1,379 @@
+# flake8: noqa
+
+import csv
+import io
+import os
+import traceback
+import zipfile
+from abc import ABC, abstractmethod
+from copy import deepcopy
+
+import numpy as np
+
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class _BaseDataset(ABC):
+    @abstractmethod
+    def __init__(self):
+        self.tracker_list = None
+        self.seq_list = None
+        self.class_list = None
+        self.output_fol = None
+        self.output_sub_fol = None
+        self.should_classes_combine = True
+        self.use_super_categories = False
+
+    # Functions to implement:
+
+    @staticmethod
+    @abstractmethod
+    def get_default_dataset_config(): ...
+
+    @abstractmethod
+    def _load_raw_file(self, tracker, seq, is_gt): ...
+
+    @_timing.time
+    @abstractmethod
+    def get_preprocessed_seq_data(self, raw_data, cls): ...
+
+    @abstractmethod
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t): ...
+
+    # Helper functions for all datasets:
+
+    @classmethod
+    def get_class_name(cls):
+        return cls.__name__
+
+    def get_name(self):
+        return self.get_class_name()
+
+    def get_output_fol(self, tracker):
+        return os.path.join(self.output_fol, tracker, self.output_sub_fol)
+
+    def get_display_name(self, tracker):
+        """Can be overwritten if the trackers name (in files) is different to how it should be displayed.
+        By default this method just returns the trackers name as is.
+        """
+        return tracker
+
+    def get_eval_info(self):
+        """Return info about the dataset needed for the Evaluator"""
+        return self.tracker_list, self.seq_list, self.class_list
+
+    @_timing.time
+    def get_raw_seq_data(self, tracker, seq):
+        """Loads raw data (tracker and ground-truth) for a single tracker on a single sequence.
+        Raw data includes all of the information needed for both preprocessing and evaluation, for all classes.
+        A later function (get_processed_seq_data) will perform such preprocessing and extract relevant information for
+        the evaluation of each class.
+
+        This returns a dict which contains the fields:
+        [num_timesteps]: integer
+        [gt_ids, tracker_ids, gt_classes, tracker_classes, tracker_confidences]:
+                                                                list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets, tracker_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
+        [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        [gt_extras]: dict (for each extra) of lists (for each timestep) of 1D NDArrays (for each det).
+
+        gt_extras contains dataset specific information used for preprocessing such as occlusion and truncation levels.
+
+        Note that similarities are extracted as part of the dataset and not the metric, because almost all metrics are
+        independent of the exact method of calculating the similarity. However datasets are not (e.g. segmentation
+        masks vs 2D boxes vs 3D boxes).
+        We calculate the similarity before preprocessing because often both preprocessing and evaluation require it and
+        we don't wish to calculate this twice.
+        We calculate similarity between all gt and tracker classes (not just each class individually) to allow for
+        calculation of metrics such as class confusion matrices. Typically the impact of this on performance is low.
+        """
+        # Load raw data.
+        raw_gt_data = self._load_raw_file(tracker, seq, is_gt=True)
+        raw_tracker_data = self._load_raw_file(tracker, seq, is_gt=False)
+        raw_data = {**raw_tracker_data, **raw_gt_data}  # Merges dictionaries
+
+        # Calculate similarities for each timestep.
+        similarity_scores = []
+        for t, (gt_dets_t, tracker_dets_t) in enumerate(
+            zip(raw_data["gt_dets"], raw_data["tracker_dets"])
+        ):
+            ious = self._calculate_similarities(gt_dets_t, tracker_dets_t)
+            similarity_scores.append(ious)
+        raw_data["similarity_scores"] = similarity_scores
+        return raw_data
+
+    @staticmethod
+    def _load_simple_text_file(
+        file,
+        time_col=0,
+        id_col=None,
+        remove_negative_ids=False,
+        valid_filter=None,
+        crowd_ignore_filter=None,
+        convert_filter=None,
+        is_zipped=False,
+        zip_file=None,
+        force_delimiters=None,
+    ):
+        """Function that loads data which is in a commonly used text file format.
+        Assumes each det is given by one row of a text file.
+        There is no limit to the number or meaning of each column,
+        however one column needs to give the timestep of each det (time_col) which is default col 0.
+
+        The file dialect (deliminator, num cols, etc) is determined automatically.
+        This function automatically separates dets by timestep,
+        and is much faster than alternatives such as np.loadtext or pandas.
+
+        If remove_negative_ids is True and id_col is not None, dets with negative values in id_col are excluded.
+        These are not excluded from ignore data.
+
+        valid_filter can be used to only include certain classes.
+        It is a dict with ints as keys, and lists as values,
+        such that a row is included if "row[key].lower() is in value" for all key/value pairs in the dict.
+        If None, all classes are included.
+
+        crowd_ignore_filter can be used to read crowd_ignore regions separately. It has the same format as valid filter.
+
+        convert_filter can be used to convert value read to another format.
+        This is used most commonly to convert classes given as string to a class id.
+        This is a dict such that the key is the column to convert, and the value is another dict giving the mapping.
+
+        Optionally, input files could be a zip of multiple text files for storage efficiency.
+
+        Returns read_data and ignore_data.
+        Each is a dict (with keys as timesteps as strings) of lists (over dets) of lists (over column values).
+        Note that all data is returned as strings, and must be converted to float/int later if needed.
+        Note that timesteps will not be present in the returned dict keys if there are no dets for them
+        """
+
+        if remove_negative_ids and id_col is None:
+            raise TrackEvalException(
+                "remove_negative_ids is True, but id_col is not given."
+            )
+        if crowd_ignore_filter is None:
+            crowd_ignore_filter = {}
+        if convert_filter is None:
+            convert_filter = {}
+        try:
+            if is_zipped:  # Either open file directly or within a zip.
+                if zip_file is None:
+                    raise TrackEvalException(
+                        "is_zipped set to True, but no zip_file is given."
+                    )
+                archive = zipfile.ZipFile(os.path.join(zip_file), "r")
+                fp = io.TextIOWrapper(archive.open(file, "r"))
+            else:
+                fp = open(file)
+            read_data = {}
+            crowd_ignore_data = {}
+            fp.seek(0, os.SEEK_END)
+            # check if file is empty
+            if fp.tell():
+                fp.seek(0)
+                dialect = csv.Sniffer().sniff(
+                    fp.readline(), delimiters=force_delimiters
+                )  # Auto determine structure.
+                dialect.skipinitialspace = (
+                    True  # Deal with extra spaces between columns
+                )
+                fp.seek(0)
+                reader = csv.reader(fp, dialect)
+                for row in reader:
+                    try:
+                        # Deal with extra trailing spaces at the end of rows
+                        if row[-1] in "":
+                            row = row[:-1]
+                        timestep = str(int(float(row[time_col])))
+                        # Read ignore regions separately.
+                        is_ignored = False
+                        for ignore_key, ignore_value in crowd_ignore_filter.items():
+                            if row[ignore_key].lower() in ignore_value:
+                                # Convert values in one column (e.g. string to id)
+                                for (
+                                    convert_key,
+                                    convert_value,
+                                ) in convert_filter.items():
+                                    row[convert_key] = convert_value[
+                                        row[convert_key].lower()
+                                    ]
+                                # Save data separated by timestep.
+                                if timestep in crowd_ignore_data.keys():
+                                    crowd_ignore_data[timestep].append(row)
+                                else:
+                                    crowd_ignore_data[timestep] = [row]
+                                is_ignored = True
+                        if (
+                            is_ignored
+                        ):  # if det is an ignore region, it cannot be a normal det.
+                            continue
+                        # Exclude some dets if not valid.
+                        if valid_filter is not None:
+                            for key, value in valid_filter.items():
+                                if row[key].lower() not in value:
+                                    continue
+                        if remove_negative_ids:
+                            if int(float(row[id_col])) < 0:
+                                continue
+                        # Convert values in one column (e.g. string to id)
+                        for convert_key, convert_value in convert_filter.items():
+                            row[convert_key] = convert_value[row[convert_key].lower()]
+                        # Save data separated by timestep.
+                        if timestep in read_data.keys():
+                            read_data[timestep].append(row)
+                        else:
+                            read_data[timestep] = [row]
+                    except Exception:
+                        exc_str_init = (
+                            "In file %s the following line cannot be read correctly: \n"
+                            % os.path.basename(file)
+                        )
+                        exc_str = " ".join([exc_str_init] + row)
+                        raise TrackEvalException(exc_str)
+            fp.close()
+        except Exception:
+            print("Error loading file: %s, printing traceback." % file)
+            traceback.print_exc()
+            raise TrackEvalException(
+                "File %s cannot be read because it is either not present or invalidly formatted"
+                % os.path.basename(file)
+            )
+        return read_data, crowd_ignore_data
+
+    @staticmethod
+    def _calculate_mask_ious(masks1, masks2, is_encoded=False, do_ioa=False):
+        """Calculates the IOU (intersection over union) between two arrays of segmentation masks.
+        If is_encoded a run length encoding with pycocotools is assumed as input format, otherwise an input of numpy
+        arrays of the shape (num_masks, height, width) is assumed and the encoding is performed.
+        If do_ioa (intersection over area) , then calculates the intersection over the area of masks1 - this is commonly
+        used to determine if detections are within crowd ignore region.
+        :param masks1:  first set of masks (numpy array of shape (num_masks, height, width) if not encoded,
+                        else pycocotools rle encoded format)
+        :param masks2:  second set of masks (numpy array of shape (num_masks, height, width) if not encoded,
+                        else pycocotools rle encoded format)
+        :param is_encoded: whether the input is in pycocotools rle encoded format
+        :param do_ioa: whether to perform IoA computation
+        :return: the IoU/IoA scores
+        """
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        # use pycocotools for run length encoding of masks
+        if not is_encoded:
+            masks1 = mask_utils.encode(
+                np.array(np.transpose(masks1, (1, 2, 0)), order="F")
+            )
+            masks2 = mask_utils.encode(
+                np.array(np.transpose(masks2, (1, 2, 0)), order="F")
+            )
+
+        # use pycocotools for iou computation of rle encoded masks
+        ious = mask_utils.iou(masks1, masks2, [do_ioa] * len(masks2))
+        if len(masks1) == 0 or len(masks2) == 0:
+            ious = np.asarray(ious).reshape(len(masks1), len(masks2))
+        assert (ious >= 0 - np.finfo("float").eps).all()
+        assert (ious <= 1 + np.finfo("float").eps).all()
+
+        return ious
+
+    @staticmethod
+    def _calculate_box_ious(bboxes1, bboxes2, box_format="xywh", do_ioa=False):
+        """Calculates the IOU (intersection over union) between two arrays of boxes.
+        Allows variable box formats ('xywh' and 'x0y0x1y1').
+        If do_ioa (intersection over area) , then calculates the intersection over the area of boxes1 - this is commonly
+        used to determine if detections are within crowd ignore region.
+        """
+        if box_format in "xywh":
+            # layout: (x0, y0, w, h)
+            bboxes1 = deepcopy(bboxes1)
+            bboxes2 = deepcopy(bboxes2)
+
+            bboxes1[:, 2] = bboxes1[:, 0] + bboxes1[:, 2]
+            bboxes1[:, 3] = bboxes1[:, 1] + bboxes1[:, 3]
+            bboxes2[:, 2] = bboxes2[:, 0] + bboxes2[:, 2]
+            bboxes2[:, 3] = bboxes2[:, 1] + bboxes2[:, 3]
+        elif box_format not in "x0y0x1y1":
+            raise (TrackEvalException("box_format %s is not implemented" % box_format))
+
+        # layout: (x0, y0, x1, y1)
+        min_ = np.minimum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
+        max_ = np.maximum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
+        intersection = np.maximum(min_[..., 2] - max_[..., 0], 0) * np.maximum(
+            min_[..., 3] - max_[..., 1], 0
+        )
+        area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
+            bboxes1[..., 3] - bboxes1[..., 1]
+        )
+
+        if do_ioa:
+            ioas = np.zeros_like(intersection)
+            valid_mask = area1 > 0 + np.finfo("float").eps
+            ioas[valid_mask, :] = (
+                intersection[valid_mask, :] / area1[valid_mask][:, np.newaxis]
+            )
+
+            return ioas
+        else:
+            area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
+                bboxes2[..., 3] - bboxes2[..., 1]
+            )
+            union = area1[:, np.newaxis] + area2[np.newaxis, :] - intersection
+            intersection[area1 <= 0 + np.finfo("float").eps, :] = 0
+            intersection[:, area2 <= 0 + np.finfo("float").eps] = 0
+            intersection[union <= 0 + np.finfo("float").eps] = 0
+            union[union <= 0 + np.finfo("float").eps] = 1
+            ious = intersection / union
+            return ious
+
+    @staticmethod
+    def _calculate_euclidean_similarity(dets1, dets2, zero_distance=2.0):
+        """Calculates the euclidean distance between two sets of detections, and then converts this into a similarity
+        measure with values between 0 and 1 using the following formula: sim = max(0, 1 - dist/zero_distance).
+        The default zero_distance of 2.0, corresponds to the default used in MOT15_3D, such that a 0.5 similarity
+        threshold corresponds to a 1m distance threshold for TPs.
+        """
+        dist = np.linalg.norm(dets1[:, np.newaxis] - dets2[np.newaxis, :], axis=2)
+        sim = np.maximum(0, 1 - dist / zero_distance)
+        return sim
+
+    @staticmethod
+    def _check_unique_ids(data, after_preproc=False):
+        """Check the requirement that the tracker_ids and gt_ids are unique per timestep"""
+        gt_ids = data["gt_ids"]
+        tracker_ids = data["tracker_ids"]
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(gt_ids, tracker_ids)):
+            if len(tracker_ids_t) > 0:
+                unique_ids, counts = np.unique(tracker_ids_t, return_counts=True)
+                if np.max(counts) != 1:
+                    duplicate_ids = unique_ids[counts > 1]
+                    exc_str_init = (
+                        "Tracker predicts the same ID more than once in a single timestep "
+                        "(seq: %s, frame: %i, ids:" % (data["seq"], t + 1)
+                    )
+                    exc_str = (
+                        " ".join([exc_str_init] + [str(d) for d in duplicate_ids]) + ")"
+                    )
+                    if after_preproc:
+                        exc_str_init += (
+                            "\n Note that this error occurred after preprocessing (but not before), "
+                            "so ids may not be as in file, and something seems wrong with preproc."
+                        )
+                    raise TrackEvalException(exc_str)
+            if len(gt_ids_t) > 0:
+                unique_ids, counts = np.unique(gt_ids_t, return_counts=True)
+                if np.max(counts) != 1:
+                    duplicate_ids = unique_ids[counts > 1]
+                    exc_str_init = (
+                        "Ground-truth has the same ID more than once in a single timestep "
+                        "(seq: %s, frame: %i, ids:" % (data["seq"], t + 1)
+                    )
+                    exc_str = (
+                        " ".join([exc_str_init] + [str(d) for d in duplicate_ids]) + ")"
+                    )
+                    if after_preproc:
+                        exc_str_init += (
+                            "\n Note that this error occurred after preprocessing (but not before), "
+                            "so ids may not be as in file, and something seems wrong with preproc."
+                        )
+                    raise TrackEvalException(exc_str)
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/datasets/tao_ow.py b/sam3/eval/hota_eval_toolkit/trackeval/datasets/tao_ow.py
new file mode 100644
index 0000000000000000000000000000000000000000..06bc93bce27b756dffb6cef4724cb1caa3aecdd8
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/datasets/tao_ow.py
@@ -0,0 +1,891 @@
+# flake8: noqa
+
+import itertools
+import json
+import os
+from collections import defaultdict
+
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+
+from .. import _timing, utils
+from ..utils import TrackEvalException
+from ._base_dataset import _BaseDataset
+
+
+class TAO_OW(_BaseDataset):
+    """Dataset class for TAO tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            "GT_FOLDER": os.path.join(
+                code_path, "data/gt/tao/tao_training"
+            ),  # Location of GT data
+            "TRACKERS_FOLDER": os.path.join(
+                code_path, "data/trackers/tao/tao_training"
+            ),  # Trackers location
+            "OUTPUT_FOLDER": None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            "TRACKERS_TO_EVAL": None,  # Filenames of trackers to eval (if None, all in folder)
+            "CLASSES_TO_EVAL": None,  # Classes to eval (if None, all classes)
+            "SPLIT_TO_EVAL": "training",  # Valid: 'training', 'val'
+            "PRINT_CONFIG": True,  # Whether to print current config
+            "TRACKER_SUB_FOLDER": "data",  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            "OUTPUT_SUB_FOLDER": "",  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            "TRACKER_DISPLAY_NAMES": None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            "MAX_DETECTIONS": 300,  # Number of maximal allowed detections per image (0 for unlimited)
+            "SUBSET": "all",
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(
+            config, self.get_default_dataset_config(), self.get_name()
+        )
+        self.gt_fol = self.config["GT_FOLDER"]
+        self.tracker_fol = self.config["TRACKERS_FOLDER"]
+        self.should_classes_combine = True
+        self.use_super_categories = False
+
+        self.tracker_sub_fol = self.config["TRACKER_SUB_FOLDER"]
+        self.output_fol = self.config["OUTPUT_FOLDER"]
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+        self.output_sub_fol = self.config["OUTPUT_SUB_FOLDER"]
+
+        gt_dir_files = [
+            file for file in os.listdir(self.gt_fol) if file.endswith(".json")
+        ]
+        if len(gt_dir_files) != 1:
+            raise TrackEvalException(
+                self.gt_fol + " does not contain exactly one json file."
+            )
+
+        with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
+            self.gt_data = json.load(f)
+
+        self.subset = self.config["SUBSET"]
+        if self.subset != "all":
+            # Split GT data into `known`, `unknown` or `distractor`
+            self._split_known_unknown_distractor()
+            self.gt_data = self._filter_gt_data(self.gt_data)
+
+        # merge categories marked with a merged tag in TAO dataset
+        self._merge_categories(self.gt_data["annotations"] + self.gt_data["tracks"])
+
+        # Get sequences to eval and sequence information
+        self.seq_list = [
+            vid["name"].replace("/", "-") for vid in self.gt_data["videos"]
+        ]
+        self.seq_name_to_seq_id = {
+            vid["name"].replace("/", "-"): vid["id"] for vid in self.gt_data["videos"]
+        }
+        # compute mappings from videos to annotation data
+        self.videos_to_gt_tracks, self.videos_to_gt_images = self._compute_vid_mappings(
+            self.gt_data["annotations"]
+        )
+        # compute sequence lengths
+        self.seq_lengths = {vid["id"]: 0 for vid in self.gt_data["videos"]}
+        for img in self.gt_data["images"]:
+            self.seq_lengths[img["video_id"]] += 1
+        self.seq_to_images_to_timestep = self._compute_image_to_timestep_mappings()
+        self.seq_to_classes = {
+            vid["id"]: {
+                "pos_cat_ids": list(
+                    {
+                        track["category_id"]
+                        for track in self.videos_to_gt_tracks[vid["id"]]
+                    }
+                ),
+                "neg_cat_ids": vid["neg_category_ids"],
+                "not_exhaustively_labeled_cat_ids": vid["not_exhaustive_category_ids"],
+            }
+            for vid in self.gt_data["videos"]
+        }
+
+        # Get classes to eval
+        considered_vid_ids = [self.seq_name_to_seq_id[vid] for vid in self.seq_list]
+        seen_cats = set(
+            [
+                cat_id
+                for vid_id in considered_vid_ids
+                for cat_id in self.seq_to_classes[vid_id]["pos_cat_ids"]
+            ]
+        )
+        # only classes with ground truth are evaluated in TAO
+        self.valid_classes = [
+            cls["name"] for cls in self.gt_data["categories"] if cls["id"] in seen_cats
+        ]
+        # cls_name_to_cls_id_map = {cls['name']: cls['id'] for cls in self.gt_data['categories']}
+
+        if self.config["CLASSES_TO_EVAL"]:
+            # self.class_list = [cls.lower() if cls.lower() in self.valid_classes else None
+            #                    for cls in self.config['CLASSES_TO_EVAL']]
+            self.class_list = ["object"]  # class-agnostic
+            if not all(self.class_list):
+                raise TrackEvalException(
+                    "Attempted to evaluate an invalid class. Only classes "
+                    + ", ".join(self.valid_classes)
+                    + " are valid (classes present in ground truth data)."
+                )
+        else:
+            # self.class_list = [cls for cls in self.valid_classes]
+            self.class_list = ["object"]  # class-agnostic
+        # self.class_name_to_class_id = {k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list}
+        self.class_name_to_class_id = {"object": 1}  # class-agnostic
+
+        # Get trackers to eval
+        if self.config["TRACKERS_TO_EVAL"] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config["TRACKERS_TO_EVAL"]
+
+        if self.config["TRACKER_DISPLAY_NAMES"] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config["TRACKERS_TO_EVAL"] is not None) and (
+            len(self.config["TRACKER_DISPLAY_NAMES"]) == len(self.tracker_list)
+        ):
+            self.tracker_to_disp = dict(
+                zip(self.tracker_list, self.config["TRACKER_DISPLAY_NAMES"])
+            )
+        else:
+            raise TrackEvalException(
+                "List of tracker files and tracker display names do not match."
+            )
+
+        self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
+
+        for tracker in self.tracker_list:
+            tr_dir_files = [
+                file
+                for file in os.listdir(
+                    os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+                )
+                if file.endswith(".json")
+            ]
+            if len(tr_dir_files) != 1:
+                raise TrackEvalException(
+                    os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+                    + " does not contain exactly one json file."
+                )
+            with open(
+                os.path.join(
+                    self.tracker_fol, tracker, self.tracker_sub_fol, tr_dir_files[0]
+                )
+            ) as f:
+                curr_data = json.load(f)
+
+            # limit detections if MAX_DETECTIONS > 0
+            if self.config["MAX_DETECTIONS"]:
+                curr_data = self._limit_dets_per_image(curr_data)
+
+            # fill missing video ids
+            self._fill_video_ids_inplace(curr_data)
+
+            # make track ids unique over whole evaluation set
+            self._make_track_ids_unique(curr_data)
+
+            # merge categories marked with a merged tag in TAO dataset
+            self._merge_categories(curr_data)
+
+            # get tracker sequence information
+            curr_videos_to_tracker_tracks, curr_videos_to_tracker_images = (
+                self._compute_vid_mappings(curr_data)
+            )
+            self.tracker_data[tracker]["vids_to_tracks"] = curr_videos_to_tracker_tracks
+            self.tracker_data[tracker]["vids_to_images"] = curr_videos_to_tracker_images
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the TAO format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+        [classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_lengths]: dictionary with class values
+                                as keys and lists (for each track) as values
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        [classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_dt_track_ids, classes_to_dt_track_areas, classes_to_dt_track_lengths]: dictionary with class values
+                                                                                           as keys and lists as values
+        [classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
+        """
+        seq_id = self.seq_name_to_seq_id[seq]
+        # File location
+        if is_gt:
+            imgs = self.videos_to_gt_images[seq_id]
+        else:
+            imgs = self.tracker_data[tracker]["vids_to_images"][seq_id]
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq_id]
+        img_to_timestep = self.seq_to_images_to_timestep[seq_id]
+        data_keys = ["ids", "classes", "dets"]
+        if not is_gt:
+            data_keys += ["tracker_confidences"]
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        for img in imgs:
+            # some tracker data contains images without any ground truth information, these are ignored
+            try:
+                t = img_to_timestep[img["id"]]
+            except KeyError:
+                continue
+            annotations = img["annotations"]
+            raw_data["dets"][t] = np.atleast_2d(
+                [ann["bbox"] for ann in annotations]
+            ).astype(float)
+            raw_data["ids"][t] = np.atleast_1d(
+                [ann["track_id"] for ann in annotations]
+            ).astype(int)
+            raw_data["classes"][t] = np.atleast_1d([1 for _ in annotations]).astype(
+                int
+            )  # class-agnostic
+            if not is_gt:
+                raw_data["tracker_confidences"][t] = np.atleast_1d(
+                    [ann["score"] for ann in annotations]
+                ).astype(float)
+
+        for t, d in enumerate(raw_data["dets"]):
+            if d is None:
+                raw_data["dets"][t] = np.empty((0, 4)).astype(float)
+                raw_data["ids"][t] = np.empty(0).astype(int)
+                raw_data["classes"][t] = np.empty(0).astype(int)
+                if not is_gt:
+                    raw_data["tracker_confidences"][t] = np.empty(0)
+
+        if is_gt:
+            key_map = {"ids": "gt_ids", "classes": "gt_classes", "dets": "gt_dets"}
+        else:
+            key_map = {
+                "ids": "tracker_ids",
+                "classes": "tracker_classes",
+                "dets": "tracker_dets",
+            }
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        # all_classes = [self.class_name_to_class_id[cls] for cls in self.class_list]
+        all_classes = [1]  # class-agnostic
+
+        if is_gt:
+            classes_to_consider = all_classes
+            all_tracks = self.videos_to_gt_tracks[seq_id]
+        else:
+            # classes_to_consider = self.seq_to_classes[seq_id]['pos_cat_ids'] \
+            #                       + self.seq_to_classes[seq_id]['neg_cat_ids']
+            classes_to_consider = all_classes  # class-agnostic
+            all_tracks = self.tracker_data[tracker]["vids_to_tracks"][seq_id]
+
+        # classes_to_tracks = {cls: [track for track in all_tracks if track['category_id'] == cls]
+        #                      if cls in classes_to_consider else [] for cls in all_classes}
+        classes_to_tracks = {
+            cls: [track for track in all_tracks] if cls in classes_to_consider else []
+            for cls in all_classes
+        }  # class-agnostic
+
+        # mapping from classes to track information
+        raw_data["classes_to_tracks"] = {
+            cls: [
+                {
+                    det["image_id"]: np.atleast_1d(det["bbox"])
+                    for det in track["annotations"]
+                }
+                for track in tracks
+            ]
+            for cls, tracks in classes_to_tracks.items()
+        }
+        raw_data["classes_to_track_ids"] = {
+            cls: [track["id"] for track in tracks]
+            for cls, tracks in classes_to_tracks.items()
+        }
+        raw_data["classes_to_track_areas"] = {
+            cls: [track["area"] for track in tracks]
+            for cls, tracks in classes_to_tracks.items()
+        }
+        raw_data["classes_to_track_lengths"] = {
+            cls: [len(track["annotations"]) for track in tracks]
+            for cls, tracks in classes_to_tracks.items()
+        }
+
+        if not is_gt:
+            raw_data["classes_to_dt_track_scores"] = {
+                cls: np.array(
+                    [
+                        np.mean([float(x["score"]) for x in track["annotations"]])
+                        for track in tracks
+                    ]
+                )
+                for cls, tracks in classes_to_tracks.items()
+            }
+
+        if is_gt:
+            key_map = {
+                "classes_to_tracks": "classes_to_gt_tracks",
+                "classes_to_track_ids": "classes_to_gt_track_ids",
+                "classes_to_track_lengths": "classes_to_gt_track_lengths",
+                "classes_to_track_areas": "classes_to_gt_track_areas",
+            }
+        else:
+            key_map = {
+                "classes_to_tracks": "classes_to_dt_tracks",
+                "classes_to_track_ids": "classes_to_dt_track_ids",
+                "classes_to_track_lengths": "classes_to_dt_track_lengths",
+                "classes_to_track_areas": "classes_to_dt_track_areas",
+            }
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        raw_data["num_timesteps"] = num_timesteps
+        raw_data["neg_cat_ids"] = self.seq_to_classes[seq_id]["neg_cat_ids"]
+        raw_data["not_exhaustively_labeled_cls"] = self.seq_to_classes[seq_id][
+            "not_exhaustively_labeled_cat_ids"
+        ]
+        raw_data["seq"] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+        TAO:
+            In TAO, the 4 preproc steps are as follow:
+                1) All classes present in the ground truth data are evaluated separately.
+                2) No matched tracker detections are removed.
+                3) Unmatched tracker detections are removed if there is not ground truth data and the class does not
+                    belong to the categories marked as negative for this sequence. Additionally, unmatched tracker
+                    detections for classes which are marked as not exhaustively labeled are removed.
+                4) No gt detections are removed.
+            Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
+            and the tracks from the tracker data are sorted according to the tracker confidence.
+        """
+        cls_id = self.class_name_to_class_id[cls]
+        is_not_exhaustively_labeled = cls_id in raw_data["not_exhaustively_labeled_cls"]
+        is_neg_category = cls_id in raw_data["neg_cat_ids"]
+
+        data_keys = [
+            "gt_ids",
+            "tracker_ids",
+            "gt_dets",
+            "tracker_dets",
+            "tracker_confidences",
+            "similarity_scores",
+        ]
+        data = {key: [None] * raw_data["num_timesteps"] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+        for t in range(raw_data["num_timesteps"]):
+            # Only extract relevant dets for this class for preproc and eval (cls)
+            gt_class_mask = np.atleast_1d(raw_data["gt_classes"][t] == cls_id)
+            gt_class_mask = gt_class_mask.astype(bool)
+            gt_ids = raw_data["gt_ids"][t][gt_class_mask]
+            gt_dets = raw_data["gt_dets"][t][gt_class_mask]
+
+            tracker_class_mask = np.atleast_1d(raw_data["tracker_classes"][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(bool)
+            tracker_ids = raw_data["tracker_ids"][t][tracker_class_mask]
+            tracker_dets = raw_data["tracker_dets"][t][tracker_class_mask]
+            tracker_confidences = raw_data["tracker_confidences"][t][tracker_class_mask]
+            similarity_scores = raw_data["similarity_scores"][t][gt_class_mask, :][
+                :, tracker_class_mask
+            ]
+
+            # Match tracker and gt dets (with hungarian algorithm).
+            unmatched_indices = np.arange(tracker_ids.shape[0])
+            if gt_ids.shape[0] > 0 and tracker_ids.shape[0] > 0:
+                matching_scores = similarity_scores.copy()
+                matching_scores[matching_scores < 0.5 - np.finfo("float").eps] = 0
+                match_rows, match_cols = linear_sum_assignment(-matching_scores)
+                actually_matched_mask = (
+                    matching_scores[match_rows, match_cols] > 0 + np.finfo("float").eps
+                )
+                match_cols = match_cols[actually_matched_mask]
+                unmatched_indices = np.delete(unmatched_indices, match_cols, axis=0)
+
+            if gt_ids.shape[0] == 0 and not is_neg_category:
+                to_remove_tracker = unmatched_indices
+            elif is_not_exhaustively_labeled:
+                to_remove_tracker = unmatched_indices
+            else:
+                to_remove_tracker = np.array([], dtype=int)
+
+            # remove all unwanted unmatched tracker detections
+            data["tracker_ids"][t] = np.delete(tracker_ids, to_remove_tracker, axis=0)
+            data["tracker_dets"][t] = np.delete(tracker_dets, to_remove_tracker, axis=0)
+            data["tracker_confidences"][t] = np.delete(
+                tracker_confidences, to_remove_tracker, axis=0
+            )
+            similarity_scores = np.delete(similarity_scores, to_remove_tracker, axis=1)
+
+            data["gt_ids"][t] = gt_ids
+            data["gt_dets"][t] = gt_dets
+            data["similarity_scores"][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data["gt_ids"][t]))
+            unique_tracker_ids += list(np.unique(data["tracker_ids"][t]))
+            num_tracker_dets += len(data["tracker_ids"][t])
+            num_gt_dets += len(data["gt_ids"][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data["num_timesteps"]):
+                if len(data["gt_ids"][t]) > 0:
+                    data["gt_ids"][t] = gt_id_map[data["gt_ids"][t]].astype(int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data["num_timesteps"]):
+                if len(data["tracker_ids"][t]) > 0:
+                    data["tracker_ids"][t] = tracker_id_map[
+                        data["tracker_ids"][t]
+                    ].astype(int)
+
+        # Record overview statistics.
+        data["num_tracker_dets"] = num_tracker_dets
+        data["num_gt_dets"] = num_gt_dets
+        data["num_tracker_ids"] = len(unique_tracker_ids)
+        data["num_gt_ids"] = len(unique_gt_ids)
+        data["num_timesteps"] = raw_data["num_timesteps"]
+        data["seq"] = raw_data["seq"]
+
+        # get track representations
+        data["gt_tracks"] = raw_data["classes_to_gt_tracks"][cls_id]
+        data["gt_track_ids"] = raw_data["classes_to_gt_track_ids"][cls_id]
+        data["gt_track_lengths"] = raw_data["classes_to_gt_track_lengths"][cls_id]
+        data["gt_track_areas"] = raw_data["classes_to_gt_track_areas"][cls_id]
+        data["dt_tracks"] = raw_data["classes_to_dt_tracks"][cls_id]
+        data["dt_track_ids"] = raw_data["classes_to_dt_track_ids"][cls_id]
+        data["dt_track_lengths"] = raw_data["classes_to_dt_track_lengths"][cls_id]
+        data["dt_track_areas"] = raw_data["classes_to_dt_track_areas"][cls_id]
+        data["dt_track_scores"] = raw_data["classes_to_dt_track_scores"][cls_id]
+        data["not_exhaustively_labeled"] = is_not_exhaustively_labeled
+        data["iou_type"] = "bbox"
+
+        # sort tracker data tracks by tracker confidence scores
+        if data["dt_tracks"]:
+            idx = np.argsort(
+                [-score for score in data["dt_track_scores"]], kind="mergesort"
+            )
+            data["dt_track_scores"] = [data["dt_track_scores"][i] for i in idx]
+            data["dt_tracks"] = [data["dt_tracks"][i] for i in idx]
+            data["dt_track_ids"] = [data["dt_track_ids"][i] for i in idx]
+            data["dt_track_lengths"] = [data["dt_track_lengths"][i] for i in idx]
+            data["dt_track_areas"] = [data["dt_track_areas"][i] for i in idx]
+        # Ensure that ids are unique per timestep.
+        self._check_unique_ids(data)
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        similarity_scores = self._calculate_box_ious(gt_dets_t, tracker_dets_t)
+        return similarity_scores
+
+    def _merge_categories(self, annotations):
+        """
+        Merges categories with a merged tag. Adapted from https://github.com/TAO-Dataset
+        :param annotations: the annotations in which the classes should be merged
+        :return: None
+        """
+        merge_map = {}
+        for category in self.gt_data["categories"]:
+            if "merged" in category:
+                for to_merge in category["merged"]:
+                    merge_map[to_merge["id"]] = category["id"]
+
+        for ann in annotations:
+            ann["category_id"] = merge_map.get(ann["category_id"], ann["category_id"])
+
+    def _compute_vid_mappings(self, annotations):
+        """
+        Computes mappings from Videos to corresponding tracks and images.
+        :param annotations: the annotations for which the mapping should be generated
+        :return: the video-to-track-mapping, the video-to-image-mapping
+        """
+        vids_to_tracks = {}
+        vids_to_imgs = {}
+        vid_ids = [vid["id"] for vid in self.gt_data["videos"]]
+
+        # compute an mapping from image IDs to images
+        images = {}
+        for image in self.gt_data["images"]:
+            images[image["id"]] = image
+
+        for ann in annotations:
+            ann["area"] = ann["bbox"][2] * ann["bbox"][3]
+
+            vid = ann["video_id"]
+            if ann["video_id"] not in vids_to_tracks.keys():
+                vids_to_tracks[ann["video_id"]] = list()
+            if ann["video_id"] not in vids_to_imgs.keys():
+                vids_to_imgs[ann["video_id"]] = list()
+
+            # Fill in vids_to_tracks
+            tid = ann["track_id"]
+            exist_tids = [track["id"] for track in vids_to_tracks[vid]]
+            try:
+                index1 = exist_tids.index(tid)
+            except ValueError:
+                index1 = -1
+            if tid not in exist_tids:
+                curr_track = {
+                    "id": tid,
+                    "category_id": ann["category_id"],
+                    "video_id": vid,
+                    "annotations": [ann],
+                }
+                vids_to_tracks[vid].append(curr_track)
+            else:
+                vids_to_tracks[vid][index1]["annotations"].append(ann)
+
+            # Fill in vids_to_imgs
+            img_id = ann["image_id"]
+            exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
+            try:
+                index2 = exist_img_ids.index(img_id)
+            except ValueError:
+                index2 = -1
+            if index2 == -1:
+                curr_img = {"id": img_id, "annotations": [ann]}
+                vids_to_imgs[vid].append(curr_img)
+            else:
+                vids_to_imgs[vid][index2]["annotations"].append(ann)
+
+        # sort annotations by frame index and compute track area
+        for vid, tracks in vids_to_tracks.items():
+            for track in tracks:
+                track["annotations"] = sorted(
+                    track["annotations"],
+                    key=lambda x: images[x["image_id"]]["frame_index"],
+                )
+                # Computer average area
+                track["area"] = sum(x["area"] for x in track["annotations"]) / len(
+                    track["annotations"]
+                )
+
+        # Ensure all videos are present
+        for vid_id in vid_ids:
+            if vid_id not in vids_to_tracks.keys():
+                vids_to_tracks[vid_id] = []
+            if vid_id not in vids_to_imgs.keys():
+                vids_to_imgs[vid_id] = []
+
+        return vids_to_tracks, vids_to_imgs
+
+    def _compute_image_to_timestep_mappings(self):
+        """
+        Computes a mapping from images to the corresponding timestep in the sequence.
+        :return: the image-to-timestep-mapping
+        """
+        images = {}
+        for image in self.gt_data["images"]:
+            images[image["id"]] = image
+
+        seq_to_imgs_to_timestep = {vid["id"]: dict() for vid in self.gt_data["videos"]}
+        for vid in seq_to_imgs_to_timestep:
+            curr_imgs = [img["id"] for img in self.videos_to_gt_images[vid]]
+            curr_imgs = sorted(curr_imgs, key=lambda x: images[x]["frame_index"])
+            seq_to_imgs_to_timestep[vid] = {
+                curr_imgs[i]: i for i in range(len(curr_imgs))
+            }
+
+        return seq_to_imgs_to_timestep
+
+    def _limit_dets_per_image(self, annotations):
+        """
+        Limits the number of detections for each image to config['MAX_DETECTIONS']. Adapted from
+        https://github.com/TAO-Dataset/
+        :param annotations: the annotations in which the detections should be limited
+        :return: the annotations with limited detections
+        """
+        max_dets = self.config["MAX_DETECTIONS"]
+        img_ann = defaultdict(list)
+        for ann in annotations:
+            img_ann[ann["image_id"]].append(ann)
+
+        for img_id, _anns in img_ann.items():
+            if len(_anns) <= max_dets:
+                continue
+            _anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
+            img_ann[img_id] = _anns[:max_dets]
+
+        return [ann for anns in img_ann.values() for ann in anns]
+
+    def _fill_video_ids_inplace(self, annotations):
+        """
+        Fills in missing video IDs inplace. Adapted from https://github.com/TAO-Dataset/
+        :param annotations: the annotations for which the videos IDs should be filled inplace
+        :return: None
+        """
+        missing_video_id = [x for x in annotations if "video_id" not in x]
+        if missing_video_id:
+            image_id_to_video_id = {
+                x["id"]: x["video_id"] for x in self.gt_data["images"]
+            }
+            for x in missing_video_id:
+                x["video_id"] = image_id_to_video_id[x["image_id"]]
+
+    @staticmethod
+    def _make_track_ids_unique(annotations):
+        """
+        Makes the track IDs unqiue over the whole annotation set. Adapted from https://github.com/TAO-Dataset/
+        :param annotations: the annotation set
+        :return: the number of updated IDs
+        """
+        track_id_videos = {}
+        track_ids_to_update = set()
+        max_track_id = 0
+        for ann in annotations:
+            t = ann["track_id"]
+            if t not in track_id_videos:
+                track_id_videos[t] = ann["video_id"]
+
+            if ann["video_id"] != track_id_videos[t]:
+                # Track id is assigned to multiple videos
+                track_ids_to_update.add(t)
+            max_track_id = max(max_track_id, t)
+
+        if track_ids_to_update:
+            print("true")
+            next_id = itertools.count(max_track_id + 1)
+            new_track_ids = defaultdict(lambda: next(next_id))
+            for ann in annotations:
+                t = ann["track_id"]
+                v = ann["video_id"]
+                if t in track_ids_to_update:
+                    ann["track_id"] = new_track_ids[t, v]
+        return len(track_ids_to_update)
+
+    def _split_known_unknown_distractor(self):
+        all_ids = set(
+            [i for i in range(1, 2000)]
+        )  # 2000 is larger than the max category id in TAO-OW.
+        # `knowns` includes 78 TAO_category_ids that corresponds to 78 COCO classes.
+        # (The other 2 COCO classes do not have corresponding classes in TAO).
+        self.knowns = {
+            4,
+            13,
+            1038,
+            544,
+            1057,
+            34,
+            35,
+            36,
+            41,
+            45,
+            58,
+            60,
+            579,
+            1091,
+            1097,
+            1099,
+            78,
+            79,
+            81,
+            91,
+            1115,
+            1117,
+            95,
+            1122,
+            99,
+            1132,
+            621,
+            1135,
+            625,
+            118,
+            1144,
+            126,
+            642,
+            1155,
+            133,
+            1162,
+            139,
+            154,
+            174,
+            185,
+            699,
+            1215,
+            714,
+            717,
+            1229,
+            211,
+            729,
+            221,
+            229,
+            747,
+            235,
+            237,
+            779,
+            276,
+            805,
+            299,
+            829,
+            852,
+            347,
+            371,
+            382,
+            896,
+            392,
+            926,
+            937,
+            428,
+            429,
+            961,
+            452,
+            979,
+            980,
+            982,
+            475,
+            480,
+            993,
+            1001,
+            502,
+            1018,
+        }
+        # `distractors` is defined as in the paper "Opening up Open-World Tracking"
+        self.distractors = {
+            20,
+            63,
+            108,
+            180,
+            188,
+            204,
+            212,
+            247,
+            303,
+            403,
+            407,
+            415,
+            490,
+            504,
+            507,
+            513,
+            529,
+            567,
+            569,
+            588,
+            672,
+            691,
+            702,
+            708,
+            711,
+            720,
+            736,
+            737,
+            798,
+            813,
+            815,
+            827,
+            831,
+            851,
+            877,
+            883,
+            912,
+            971,
+            976,
+            1130,
+            1133,
+            1134,
+            1169,
+            1184,
+            1220,
+        }
+        self.unknowns = all_ids.difference(self.knowns.union(self.distractors))
+
+    def _filter_gt_data(self, raw_gt_data):
+        """
+        Filter out irrelevant data in the raw_gt_data
+        Args:
+            raw_gt_data: directly loaded from json.
+
+        Returns:
+            filtered gt_data
+        """
+        valid_cat_ids = list()
+        if self.subset == "known":
+            valid_cat_ids = self.knowns
+        elif self.subset == "distractor":
+            valid_cat_ids = self.distractors
+        elif self.subset == "unknown":
+            valid_cat_ids = self.unknowns
+        # elif self.subset == "test_only_unknowns":
+        #     valid_cat_ids = test_only_unknowns
+        else:
+            raise Exception("The parameter `SUBSET` is incorrect")
+
+        filtered = dict()
+        filtered["videos"] = raw_gt_data["videos"]
+        # filtered["videos"] = list()
+        unwanted_vid = set()
+        # for video in raw_gt_data["videos"]:
+        #     datasrc = video["name"].split('/')[1]
+        #     if datasrc in data_srcs:
+        #         filtered["videos"].append(video)
+        #     else:
+        #         unwanted_vid.add(video["id"])
+
+        filtered["annotations"] = list()
+        for ann in raw_gt_data["annotations"]:
+            if (ann["video_id"] not in unwanted_vid) and (
+                ann["category_id"] in valid_cat_ids
+            ):
+                filtered["annotations"].append(ann)
+
+        filtered["tracks"] = list()
+        for track in raw_gt_data["tracks"]:
+            if (track["video_id"] not in unwanted_vid) and (
+                track["category_id"] in valid_cat_ids
+            ):
+                filtered["tracks"].append(track)
+
+        filtered["images"] = list()
+        for image in raw_gt_data["images"]:
+            if image["video_id"] not in unwanted_vid:
+                filtered["images"].append(image)
+
+        filtered["categories"] = list()
+        for cat in raw_gt_data["categories"]:
+            if cat["id"] in valid_cat_ids:
+                filtered["categories"].append(cat)
+
+        filtered["info"] = raw_gt_data["info"]
+        filtered["licenses"] = raw_gt_data["licenses"]
+
+        return filtered
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/datasets/youtube_vis.py b/sam3/eval/hota_eval_toolkit/trackeval/datasets/youtube_vis.py
new file mode 100644
index 0000000000000000000000000000000000000000..e611398164c720eced1e7e6c49de9f66be4c6d0c
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/datasets/youtube_vis.py
@@ -0,0 +1,524 @@
+# flake8: noqa
+
+# note: this file has been modified from its original version in TrackEval in
+# https://github.com/JonathonLuiten/TrackEval/blob/master/trackeval/datasets/youtube_vis.py
+# to support the following:
+# 1) bbox evaluation (via `IOU_TYPE`)
+# 2) passing GT and prediction data as Python objects (via `GT_JSON_OBJECT` and `TRACKER_JSON_OBJECT`)
+# 3) specifying a custom dataset name (via `DATASET_NAME`)
+
+import json
+import os
+
+import numpy as np
+
+from .. import _timing, utils
+from ..utils import TrackEvalException
+from ._base_dataset import _BaseDataset
+
+
+class YouTubeVIS(_BaseDataset):
+    """Dataset class for YouTubeVIS tracking"""
+
+    @staticmethod
+    def get_default_dataset_config():
+        """Default class config values"""
+        code_path = utils.get_code_path()
+        default_config = {
+            "GT_FOLDER": os.path.join(
+                code_path, "data/gt/youtube_vis/"
+            ),  # Location of GT data
+            "TRACKERS_FOLDER": os.path.join(code_path, "data/trackers/youtube_vis/"),
+            # Trackers location
+            "OUTPUT_FOLDER": None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+            "TRACKERS_TO_EVAL": None,  # Filenames of trackers to eval (if None, all in folder)
+            "CLASSES_TO_EVAL": None,  # Classes to eval (if None, all classes)
+            "SPLIT_TO_EVAL": "train_sub_split",  # Valid: 'train', 'val', 'train_sub_split'
+            "PRINT_CONFIG": True,  # Whether to print current config
+            "OUTPUT_SUB_FOLDER": "",  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+            "TRACKER_SUB_FOLDER": "data",  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+            "TRACKER_DISPLAY_NAMES": None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+            # Added for video phrase AP evaluation -- allow directly specifying the GT JSON data and Tracker (result)
+            # JSON data as Python objects, without reading from files.
+            "GT_JSON_OBJECT": None,
+            "TRACKER_JSON_OBJECT": None,
+            "IOU_TYPE": "segm",
+            "DATASET_NAME": "video",
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise dataset, checking that all required files are present"""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = utils.init_config(config, self.get_default_dataset_config())
+        self.gt_fol = (
+            self.config["GT_FOLDER"] + "youtube_vis_" + self.config["SPLIT_TO_EVAL"]
+        )
+        self.tracker_fol = (
+            self.config["TRACKERS_FOLDER"]
+            + "youtube_vis_"
+            + self.config["SPLIT_TO_EVAL"]
+        )
+        self.use_super_categories = False
+        self.should_classes_combine = True
+        assert self.config["IOU_TYPE"] in ["segm", "bbox"]
+        self.iou_type = self.config["IOU_TYPE"]
+        print("=" * 100)
+        print(f"Evaluate annotation type *{self.iou_type}*")
+        self.dataset_name = self.config["DATASET_NAME"]
+
+        self.output_fol = self.config["OUTPUT_FOLDER"]
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+        self.output_sub_fol = self.config["OUTPUT_SUB_FOLDER"]
+        self.tracker_sub_fol = self.config["TRACKER_SUB_FOLDER"]
+
+        if self.config["GT_JSON_OBJECT"] is not None:
+            # allow directly specifying the GT JSON data without reading from files
+            gt_json = self.config["GT_JSON_OBJECT"]
+            assert isinstance(gt_json, dict)
+            assert "videos" in gt_json
+            assert "categories" in gt_json
+            assert "annotations" in gt_json
+            self.gt_data = gt_json
+        else:
+            if not os.path.exists(self.gt_fol):
+                print("GT folder not found: " + self.gt_fol)
+                raise TrackEvalException(
+                    "GT folder not found: " + os.path.basename(self.gt_fol)
+                )
+            gt_dir_files = [
+                file for file in os.listdir(self.gt_fol) if file.endswith(".json")
+            ]
+            if len(gt_dir_files) != 1:
+                raise TrackEvalException(
+                    self.gt_fol + " does not contain exactly one json file."
+                )
+
+            with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
+                self.gt_data = json.load(f)
+
+        # Get classes to eval
+        self.valid_classes = [cls["name"] for cls in self.gt_data["categories"]]
+        cls_name_to_cls_id_map = {
+            cls["name"]: cls["id"] for cls in self.gt_data["categories"]
+        }
+
+        if self.config["CLASSES_TO_EVAL"]:
+            self.class_list = [
+                cls.lower() if cls.lower() in self.valid_classes else None
+                for cls in self.config["CLASSES_TO_EVAL"]
+            ]
+            if not all(self.class_list):
+                raise TrackEvalException(
+                    "Attempted to evaluate an invalid class. Only classes "
+                    + ", ".join(self.valid_classes)
+                    + " are valid."
+                )
+        else:
+            self.class_list = [cls["name"] for cls in self.gt_data["categories"]]
+        self.class_name_to_class_id = {
+            k: v for k, v in cls_name_to_cls_id_map.items() if k in self.class_list
+        }
+
+        # Get sequences to eval and check gt files exist
+        self.seq_list = [
+            vid["file_names"][0].split("/")[0] for vid in self.gt_data["videos"]
+        ]
+        self.seq_name_to_seq_id = {
+            vid["file_names"][0].split("/")[0]: vid["id"]
+            for vid in self.gt_data["videos"]
+        }
+        self.seq_lengths = {
+            vid["id"]: len(vid["file_names"]) for vid in self.gt_data["videos"]
+        }
+
+        # encode masks and compute track areas
+        self._prepare_gt_annotations()
+
+        # Get trackers to eval
+        if self.config["TRACKER_JSON_OBJECT"] is not None:
+            # allow directly specifying the tracker JSON data without reading from files
+            tracker_json = self.config["TRACKER_JSON_OBJECT"]
+            assert isinstance(tracker_json, list)
+            self.tracker_list = ["tracker"]
+        elif self.config["TRACKERS_TO_EVAL"] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config["TRACKERS_TO_EVAL"]
+
+        if self.config["TRACKER_DISPLAY_NAMES"] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config["TRACKERS_TO_EVAL"] is not None) and (
+            len(self.config["TRACKER_DISPLAY_NAMES"]) == len(self.tracker_list)
+        ):
+            self.tracker_to_disp = dict(
+                zip(self.tracker_list, self.config["TRACKER_DISPLAY_NAMES"])
+            )
+        else:
+            raise TrackEvalException(
+                "List of tracker files and tracker display names do not match."
+            )
+
+        # counter for globally unique track IDs
+        self.global_tid_counter = 0
+
+        self.tracker_data = dict()
+        if self.config["TRACKER_JSON_OBJECT"] is not None:
+            # allow directly specifying the tracker JSON data without reading from files
+            tracker = self.tracker_list[0]
+            self.tracker_data[tracker] = tracker_json
+        else:
+            for tracker in self.tracker_list:
+                tracker_dir_path = os.path.join(
+                    self.tracker_fol, tracker, self.tracker_sub_fol
+                )
+                tr_dir_files = [
+                    file
+                    for file in os.listdir(tracker_dir_path)
+                    if file.endswith(".json")
+                ]
+                if len(tr_dir_files) != 1:
+                    raise TrackEvalException(
+                        tracker_dir_path + " does not contain exactly one json file."
+                    )
+
+                with open(os.path.join(tracker_dir_path, tr_dir_files[0])) as f:
+                    curr_data = json.load(f)
+
+                self.tracker_data[tracker] = curr_data
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the YouTubeVIS format
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes] : list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+        [classes_to_gt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_gt_track_ids, classes_to_gt_track_areas, classes_to_gt_track_iscrowd]: dictionary with class values
+                                as keys and lists (for each track) as values
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tracker_ids, tracker_classes, tracker_confidences] : list (for each timestep) of 1D NDArrays (for each det).
+        [tracker_dets]: list (for each timestep) of lists of detections.
+        [classes_to_dt_tracks]: dictionary with class values as keys and list of dictionaries (with frame indices as
+                                keys and corresponding segmentations as values) for each track
+        [classes_to_dt_track_ids, classes_to_dt_track_areas]: dictionary with class values as keys and lists as values
+        [classes_to_dt_track_scores]: dictionary with class values as keys and 1D numpy arrays as values
+        """
+        # select sequence tracks
+        seq_id = self.seq_name_to_seq_id[seq]
+        if is_gt:
+            tracks = [
+                ann for ann in self.gt_data["annotations"] if ann["video_id"] == seq_id
+            ]
+        else:
+            tracks = self._get_tracker_seq_tracks(tracker, seq_id)
+
+        # Convert data to required format
+        num_timesteps = self.seq_lengths[seq_id]
+        data_keys = ["ids", "classes", "dets"]
+        if not is_gt:
+            data_keys += ["tracker_confidences"]
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        result_key = "segmentations" if self.iou_type == "segm" else "bboxes"
+        for t in range(num_timesteps):
+            raw_data["dets"][t] = [
+                track[result_key][t] for track in tracks if track[result_key][t]
+            ]
+            raw_data["ids"][t] = np.atleast_1d(
+                [track["id"] for track in tracks if track[result_key][t]]
+            ).astype(int)
+            raw_data["classes"][t] = np.atleast_1d(
+                [track["category_id"] for track in tracks if track[result_key][t]]
+            ).astype(int)
+            if not is_gt:
+                raw_data["tracker_confidences"][t] = np.atleast_1d(
+                    [track["score"] for track in tracks if track[result_key][t]]
+                ).astype(float)
+
+        if is_gt:
+            key_map = {"ids": "gt_ids", "classes": "gt_classes", "dets": "gt_dets"}
+        else:
+            key_map = {
+                "ids": "tracker_ids",
+                "classes": "tracker_classes",
+                "dets": "tracker_dets",
+            }
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        all_cls_ids = {self.class_name_to_class_id[cls] for cls in self.class_list}
+        classes_to_tracks = {
+            cls: [track for track in tracks if track["category_id"] == cls]
+            for cls in all_cls_ids
+        }
+
+        # mapping from classes to track representations and track information
+        raw_data["classes_to_tracks"] = {
+            cls: [
+                {i: track[result_key][i] for i in range(len(track[result_key]))}
+                for track in tracks
+            ]
+            for cls, tracks in classes_to_tracks.items()
+        }
+        raw_data["classes_to_track_ids"] = {
+            cls: [track["id"] for track in tracks]
+            for cls, tracks in classes_to_tracks.items()
+        }
+        raw_data["classes_to_track_areas"] = {
+            cls: [track["area"] for track in tracks]
+            for cls, tracks in classes_to_tracks.items()
+        }
+
+        if is_gt:
+            raw_data["classes_to_gt_track_iscrowd"] = {
+                cls: [track["iscrowd"] for track in tracks]
+                for cls, tracks in classes_to_tracks.items()
+            }
+        else:
+            raw_data["classes_to_dt_track_scores"] = {
+                cls: np.array([track["score"] for track in tracks])
+                for cls, tracks in classes_to_tracks.items()
+            }
+
+        if is_gt:
+            key_map = {
+                "classes_to_tracks": "classes_to_gt_tracks",
+                "classes_to_track_ids": "classes_to_gt_track_ids",
+                "classes_to_track_areas": "classes_to_gt_track_areas",
+            }
+        else:
+            key_map = {
+                "classes_to_tracks": "classes_to_dt_tracks",
+                "classes_to_track_ids": "classes_to_dt_track_ids",
+                "classes_to_track_areas": "classes_to_dt_track_areas",
+            }
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        raw_data["num_timesteps"] = num_timesteps
+        raw_data["seq"] = seq
+        return raw_data
+
+    @_timing.time
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        """Preprocess data for a single sequence for a single class ready for evaluation.
+        Inputs:
+             - raw_data is a dict containing the data for the sequence already read in by get_raw_seq_data().
+             - cls is the class to be evaluated.
+        Outputs:
+             - data is a dict containing all of the information that metrics need to perform evaluation.
+                It contains the following fields:
+                    [num_timesteps, num_gt_ids, num_tracker_ids, num_gt_dets, num_tracker_dets] : integers.
+                    [gt_ids, tracker_ids, tracker_confidences]: list (for each timestep) of 1D NDArrays (for each det).
+                    [gt_dets, tracker_dets]: list (for each timestep) of lists of detections.
+                    [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        Notes:
+            General preprocessing (preproc) occurs in 4 steps. Some datasets may not use all of these steps.
+                1) Extract only detections relevant for the class to be evaluated (including distractor detections).
+                2) Match gt dets and tracker dets. Remove tracker dets that are matched to a gt det that is of a
+                    distractor class, or otherwise marked as to be removed.
+                3) Remove unmatched tracker dets if they fall within a crowd ignore region or don't meet a certain
+                    other criteria (e.g. are too small).
+                4) Remove gt dets that were only useful for preprocessing and not for actual evaluation.
+            After the above preprocessing steps, this function also calculates the number of gt and tracker detections
+                and unique track ids. It also relabels gt and tracker ids to be contiguous and checks that ids are
+                unique within each timestep.
+        YouTubeVIS:
+            In YouTubeVIS, the 4 preproc steps are as follow:
+                1) There are 40 classes which are evaluated separately.
+                2) No matched tracker dets are removed.
+                3) No unmatched tracker dets are removed.
+                4) No gt dets are removed.
+            Further, for TrackMAP computation track representations for the given class are accessed from a dictionary
+            and the tracks from the tracker data are sorted according to the tracker confidence.
+        """
+        cls_id = self.class_name_to_class_id[cls]
+
+        data_keys = [
+            "gt_ids",
+            "tracker_ids",
+            "gt_dets",
+            "tracker_dets",
+            "similarity_scores",
+        ]
+        data = {key: [None] * raw_data["num_timesteps"] for key in data_keys}
+        unique_gt_ids = []
+        unique_tracker_ids = []
+        num_gt_dets = 0
+        num_tracker_dets = 0
+
+        for t in range(raw_data["num_timesteps"]):
+            # Only extract relevant dets for this class for eval (cls)
+            gt_class_mask = np.atleast_1d(raw_data["gt_classes"][t] == cls_id)
+            gt_class_mask = gt_class_mask.astype(bool)
+            gt_ids = raw_data["gt_ids"][t][gt_class_mask]
+            gt_dets = [
+                raw_data["gt_dets"][t][ind]
+                for ind in range(len(gt_class_mask))
+                if gt_class_mask[ind]
+            ]
+
+            tracker_class_mask = np.atleast_1d(raw_data["tracker_classes"][t] == cls_id)
+            tracker_class_mask = tracker_class_mask.astype(bool)
+            tracker_ids = raw_data["tracker_ids"][t][tracker_class_mask]
+            tracker_dets = [
+                raw_data["tracker_dets"][t][ind]
+                for ind in range(len(tracker_class_mask))
+                if tracker_class_mask[ind]
+            ]
+            similarity_scores = raw_data["similarity_scores"][t][gt_class_mask, :][
+                :, tracker_class_mask
+            ]
+
+            data["tracker_ids"][t] = tracker_ids
+            data["tracker_dets"][t] = tracker_dets
+            data["gt_ids"][t] = gt_ids
+            data["gt_dets"][t] = gt_dets
+            data["similarity_scores"][t] = similarity_scores
+
+            unique_gt_ids += list(np.unique(data["gt_ids"][t]))
+            unique_tracker_ids += list(np.unique(data["tracker_ids"][t]))
+            num_tracker_dets += len(data["tracker_ids"][t])
+            num_gt_dets += len(data["gt_ids"][t])
+
+        # Re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            for t in range(raw_data["num_timesteps"]):
+                if len(data["gt_ids"][t]) > 0:
+                    data["gt_ids"][t] = gt_id_map[data["gt_ids"][t]].astype(int)
+        if len(unique_tracker_ids) > 0:
+            unique_tracker_ids = np.unique(unique_tracker_ids)
+            tracker_id_map = np.nan * np.ones((np.max(unique_tracker_ids) + 1))
+            tracker_id_map[unique_tracker_ids] = np.arange(len(unique_tracker_ids))
+            for t in range(raw_data["num_timesteps"]):
+                if len(data["tracker_ids"][t]) > 0:
+                    data["tracker_ids"][t] = tracker_id_map[
+                        data["tracker_ids"][t]
+                    ].astype(int)
+
+        # Ensure that ids are unique per timestep.
+        self._check_unique_ids(data)
+
+        # Record overview statistics.
+        data["num_tracker_dets"] = num_tracker_dets
+        data["num_gt_dets"] = num_gt_dets
+        data["num_tracker_ids"] = len(unique_tracker_ids)
+        data["num_gt_ids"] = len(unique_gt_ids)
+        data["num_timesteps"] = raw_data["num_timesteps"]
+        data["seq"] = raw_data["seq"]
+
+        # get track representations
+        data["gt_tracks"] = raw_data["classes_to_gt_tracks"][cls_id]
+        data["gt_track_ids"] = raw_data["classes_to_gt_track_ids"][cls_id]
+        data["gt_track_areas"] = raw_data["classes_to_gt_track_areas"][cls_id]
+        data["gt_track_iscrowd"] = raw_data["classes_to_gt_track_iscrowd"][cls_id]
+        data["dt_tracks"] = raw_data["classes_to_dt_tracks"][cls_id]
+        data["dt_track_ids"] = raw_data["classes_to_dt_track_ids"][cls_id]
+        data["dt_track_areas"] = raw_data["classes_to_dt_track_areas"][cls_id]
+        data["dt_track_scores"] = raw_data["classes_to_dt_track_scores"][cls_id]
+        data["iou_type"] = "mask"
+
+        # sort tracker data tracks by tracker confidence scores
+        if data["dt_tracks"]:
+            idx = np.argsort(
+                [-score for score in data["dt_track_scores"]], kind="mergesort"
+            )
+            data["dt_track_scores"] = [data["dt_track_scores"][i] for i in idx]
+            data["dt_tracks"] = [data["dt_tracks"][i] for i in idx]
+            data["dt_track_ids"] = [data["dt_track_ids"][i] for i in idx]
+            data["dt_track_areas"] = [data["dt_track_areas"][i] for i in idx]
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        if self.iou_type == "segm":
+            similarity_scores = self._calculate_mask_ious(
+                gt_dets_t, tracker_dets_t, is_encoded=True, do_ioa=False
+            )
+        else:
+            gt_dets_t = np.array(gt_dets_t, dtype=np.float32).reshape(-1, 4)
+            tracker_dets_t = np.array(tracker_dets_t, dtype=np.float32).reshape(-1, 4)
+            similarity_scores = self._calculate_box_ious(
+                gt_dets_t, tracker_dets_t, box_format="xywh", do_ioa=False
+            )
+        return similarity_scores
+
+    def _prepare_gt_annotations(self):
+        """
+        Prepares GT data by rle encoding segmentations and computing the average track area.
+        :return: None
+        """
+        if self.iou_type == "segm":
+            # only loaded when needed to reduce minimum requirements
+            from pycocotools import mask as mask_utils
+
+            for track in self.gt_data["annotations"]:
+                h = track["height"]
+                w = track["width"]
+                for i, seg in enumerate(track["segmentations"]):
+                    if seg is not None and isinstance(seg["counts"], list):
+                        track["segmentations"][i] = mask_utils.frPyObjects(seg, h, w)
+                areas = [a for a in track["areas"] if a]
+                if len(areas) == 0:
+                    track["area"] = 0
+                else:
+                    track["area"] = np.array(areas).mean()
+        else:
+            for track in self.gt_data["annotations"]:
+                # For bbox eval, compute areas from bboxes if not already available
+                areas = [a for a in track.get("areas", []) if a]
+                if not areas:
+                    areas = []
+                    for bbox in track.get("bboxes", []):
+                        if bbox is not None:
+                            areas.append(bbox[2] * bbox[3])
+                track["area"] = np.array(areas).mean() if areas else 0
+
+    def _get_tracker_seq_tracks(self, tracker, seq_id):
+        """
+        Prepares tracker data for a given sequence. Extracts all annotations for given sequence ID, computes
+        average track area and assigns a track ID.
+        :param tracker: the given tracker
+        :param seq_id: the sequence ID
+        :return: the extracted tracks
+        """
+        # only loaded when needed to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        tracks = [
+            ann for ann in self.tracker_data[tracker] if ann["video_id"] == seq_id
+        ]
+        for track in tracks:
+            if "areas" not in track:
+                if self.iou_type == "segm":
+                    for seg in track["segmentations"]:
+                        if seg:
+                            track["areas"].append(mask_utils.area(seg))
+                        else:
+                            track["areas"].append(None)
+                else:
+                    for bbox in track["bboxes"]:
+                        if bbox:
+                            track["areas"].append(bbox[2] * bbox[3])
+                        else:
+                            track["areas"].append(None)
+            areas = [a for a in track["areas"] if a]
+            if len(areas) == 0:
+                track["area"] = 0
+            else:
+                track["area"] = np.array(areas).mean()
+            track["id"] = self.global_tid_counter
+            self.global_tid_counter += 1
+        return tracks
+
+    def get_name(self):
+        return self.dataset_name
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/eval.py b/sam3/eval/hota_eval_toolkit/trackeval/eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2d7205bc895b3932b2140ee3b74d3ccb5e40215
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/eval.py
@@ -0,0 +1,395 @@
+# flake8: noqa
+
+import os
+import time
+import traceback
+from functools import partial
+from multiprocessing.pool import Pool
+
+import numpy as np
+
+from . import _timing, utils
+from .metrics import Count
+from .utils import TrackEvalException
+
+try:
+    import tqdm
+
+    TQDM_IMPORTED = True
+except ImportError as _:
+    TQDM_IMPORTED = False
+
+
+class Evaluator:
+    """Evaluator class for evaluating different metrics for different datasets"""
+
+    @staticmethod
+    def get_default_eval_config():
+        """Returns the default config values for evaluation"""
+        code_path = utils.get_code_path()
+        default_config = {
+            "USE_PARALLEL": False,
+            "NUM_PARALLEL_CORES": 8,
+            "BREAK_ON_ERROR": True,  # Raises exception and exits with error
+            "RETURN_ON_ERROR": False,  # if not BREAK_ON_ERROR, then returns from function on error
+            "LOG_ON_ERROR": os.path.join(
+                code_path, "error_log.txt"
+            ),  # if not None, save any errors into a log file.
+            "PRINT_RESULTS": True,
+            "PRINT_ONLY_COMBINED": False,
+            "PRINT_CONFIG": True,
+            "TIME_PROGRESS": True,
+            "DISPLAY_LESS_PROGRESS": True,
+            "OUTPUT_SUMMARY": True,
+            "OUTPUT_EMPTY_CLASSES": True,  # If False, summary files are not output for classes with no detections
+            "OUTPUT_DETAILED": True,
+            "PLOT_CURVES": True,
+        }
+        return default_config
+
+    def __init__(self, config=None):
+        """Initialise the evaluator with a config file"""
+        self.config = utils.init_config(config, self.get_default_eval_config(), "Eval")
+        # Only run timing analysis if not run in parallel.
+        if self.config["TIME_PROGRESS"] and not self.config["USE_PARALLEL"]:
+            _timing.DO_TIMING = True
+            if self.config["DISPLAY_LESS_PROGRESS"]:
+                _timing.DISPLAY_LESS_PROGRESS = True
+
+    def _combine_results(
+        self,
+        res,
+        metrics_list,
+        metric_names,
+        dataset,
+        res_field="COMBINED_SEQ",
+        target_tag=None,
+    ):
+        assert res_field.startswith("COMBINED_SEQ")
+        # collecting combined cls keys (cls averaged, det averaged, super classes)
+        tracker_list, seq_list, class_list = dataset.get_eval_info()
+        combined_cls_keys = []
+        res[res_field] = {}
+
+        # narrow the target for evaluation
+        if target_tag is not None:
+            target_video_ids = [
+                annot["video_id"]
+                for annot in dataset.gt_data["annotations"]
+                if target_tag in annot["tags"]
+            ]
+            vid2name = {
+                video["id"]: video["file_names"][0].split("/")[0]
+                for video in dataset.gt_data["videos"]
+            }
+            target_video_ids = set(target_video_ids)
+            target_video = [vid2name[video_id] for video_id in target_video_ids]
+
+            if len(target_video) == 0:
+                raise TrackEvalException(
+                    "No sequences found with the tag %s" % target_tag
+                )
+
+            target_annotations = [
+                annot
+                for annot in dataset.gt_data["annotations"]
+                if annot["video_id"] in target_video_ids
+            ]
+            assert all(target_tag in annot["tags"] for annot in target_annotations), (
+                f"Not all annotations in the target sequences have the target tag {target_tag}. "
+                "We currently only support a target tag at the sequence level, not at the annotation level."
+            )
+        else:
+            target_video = seq_list
+
+        # combine sequences for each class
+        for c_cls in class_list:
+            res[res_field][c_cls] = {}
+            for metric, metric_name in zip(metrics_list, metric_names):
+                curr_res = {
+                    seq_key: seq_value[c_cls][metric_name]
+                    for seq_key, seq_value in res.items()
+                    if not seq_key.startswith("COMBINED_SEQ")
+                    and seq_key in target_video
+                }
+                res[res_field][c_cls][metric_name] = metric.combine_sequences(curr_res)
+        # combine classes
+        if dataset.should_classes_combine:
+            combined_cls_keys += [
+                "cls_comb_cls_av",
+                "cls_comb_det_av",
+                "all",
+            ]
+            res[res_field]["cls_comb_cls_av"] = {}
+            res[res_field]["cls_comb_det_av"] = {}
+            for metric, metric_name in zip(metrics_list, metric_names):
+                cls_res = {
+                    cls_key: cls_value[metric_name]
+                    for cls_key, cls_value in res[res_field].items()
+                    if cls_key not in combined_cls_keys
+                }
+                res[res_field]["cls_comb_cls_av"][metric_name] = (
+                    metric.combine_classes_class_averaged(cls_res)
+                )
+                res[res_field]["cls_comb_det_av"][metric_name] = (
+                    metric.combine_classes_det_averaged(cls_res)
+                )
+        # combine classes to super classes
+        if dataset.use_super_categories:
+            for cat, sub_cats in dataset.super_categories.items():
+                combined_cls_keys.append(cat)
+                res[res_field][cat] = {}
+                for metric, metric_name in zip(metrics_list, metric_names):
+                    cat_res = {
+                        cls_key: cls_value[metric_name]
+                        for cls_key, cls_value in res[res_field].items()
+                        if cls_key in sub_cats
+                    }
+                    res[res_field][cat][metric_name] = (
+                        metric.combine_classes_det_averaged(cat_res)
+                    )
+        return res, combined_cls_keys
+
+    def _summarize_results(
+        self,
+        res,
+        tracker,
+        metrics_list,
+        metric_names,
+        dataset,
+        res_field,
+        combined_cls_keys,
+    ):
+        config = self.config
+        output_fol = dataset.get_output_fol(tracker)
+        tracker_display_name = dataset.get_display_name(tracker)
+        for c_cls in res[
+            res_field
+        ].keys():  # class_list + combined classes if calculated
+            summaries = []
+            details = []
+            num_dets = res[res_field][c_cls]["Count"]["Dets"]
+            if config["OUTPUT_EMPTY_CLASSES"] or num_dets > 0:
+                for metric, metric_name in zip(metrics_list, metric_names):
+                    # for combined classes there is no per sequence evaluation
+                    if c_cls in combined_cls_keys:
+                        table_res = {res_field: res[res_field][c_cls][metric_name]}
+                    else:
+                        table_res = {
+                            seq_key: seq_value[c_cls][metric_name]
+                            for seq_key, seq_value in res.items()
+                        }
+
+                    if config["PRINT_RESULTS"] and config["PRINT_ONLY_COMBINED"]:
+                        dont_print = (
+                            dataset.should_classes_combine
+                            and c_cls not in combined_cls_keys
+                        )
+                        if not dont_print:
+                            metric.print_table(
+                                {res_field: table_res[res_field]},
+                                tracker_display_name,
+                                c_cls,
+                                res_field,
+                                res_field,
+                            )
+                    elif config["PRINT_RESULTS"]:
+                        metric.print_table(
+                            table_res, tracker_display_name, c_cls, res_field, res_field
+                        )
+                    if config["OUTPUT_SUMMARY"]:
+                        summaries.append(metric.summary_results(table_res))
+                    if config["OUTPUT_DETAILED"]:
+                        details.append(metric.detailed_results(table_res))
+                    if config["PLOT_CURVES"]:
+                        metric.plot_single_tracker_results(
+                            table_res,
+                            tracker_display_name,
+                            c_cls,
+                            output_fol,
+                        )
+                if config["OUTPUT_SUMMARY"]:
+                    utils.write_summary_results(summaries, c_cls, output_fol)
+                if config["OUTPUT_DETAILED"]:
+                    utils.write_detailed_results(details, c_cls, output_fol)
+
+    @_timing.time
+    def evaluate(self, dataset_list, metrics_list, show_progressbar=False):
+        """Evaluate a set of metrics on a set of datasets"""
+        config = self.config
+        metrics_list = metrics_list + [Count()]  # Count metrics are always run
+        metric_names = utils.validate_metrics_list(metrics_list)
+        dataset_names = [dataset.get_name() for dataset in dataset_list]
+        output_res = {}
+        output_msg = {}
+
+        for dataset, dataset_name in zip(dataset_list, dataset_names):
+            # Get dataset info about what to evaluate
+            output_res[dataset_name] = {}
+            output_msg[dataset_name] = {}
+            tracker_list, seq_list, class_list = dataset.get_eval_info()
+            print(
+                "\nEvaluating %i tracker(s) on %i sequence(s) for %i class(es) on %s dataset using the following "
+                "metrics: %s\n"
+                % (
+                    len(tracker_list),
+                    len(seq_list),
+                    len(class_list),
+                    dataset_name,
+                    ", ".join(metric_names),
+                )
+            )
+
+            # Evaluate each tracker
+            for tracker in tracker_list:
+                # if not config['BREAK_ON_ERROR'] then go to next tracker without breaking
+                try:
+                    # Evaluate each sequence in parallel or in series.
+                    # returns a nested dict (res), indexed like: res[seq][class][metric_name][sub_metric field]
+                    # e.g. res[seq_0001][pedestrian][hota][DetA]
+                    print("\nEvaluating %s\n" % tracker)
+                    time_start = time.time()
+                    if config["USE_PARALLEL"]:
+                        if show_progressbar and TQDM_IMPORTED:
+                            seq_list_sorted = sorted(seq_list)
+
+                            with Pool(config["NUM_PARALLEL_CORES"]) as pool, tqdm.tqdm(
+                                total=len(seq_list)
+                            ) as pbar:
+                                _eval_sequence = partial(
+                                    eval_sequence,
+                                    dataset=dataset,
+                                    tracker=tracker,
+                                    class_list=class_list,
+                                    metrics_list=metrics_list,
+                                    metric_names=metric_names,
+                                )
+                                results = []
+                                for r in pool.imap(
+                                    _eval_sequence, seq_list_sorted, chunksize=20
+                                ):
+                                    results.append(r)
+                                    pbar.update()
+                                res = dict(zip(seq_list_sorted, results))
+
+                        else:
+                            with Pool(config["NUM_PARALLEL_CORES"]) as pool:
+                                _eval_sequence = partial(
+                                    eval_sequence,
+                                    dataset=dataset,
+                                    tracker=tracker,
+                                    class_list=class_list,
+                                    metrics_list=metrics_list,
+                                    metric_names=metric_names,
+                                )
+                                results = pool.map(_eval_sequence, seq_list)
+                                res = dict(zip(seq_list, results))
+                    else:
+                        res = {}
+                        if show_progressbar and TQDM_IMPORTED:
+                            seq_list_sorted = sorted(seq_list)
+                            for curr_seq in tqdm.tqdm(seq_list_sorted):
+                                res[curr_seq] = eval_sequence(
+                                    curr_seq,
+                                    dataset,
+                                    tracker,
+                                    class_list,
+                                    metrics_list,
+                                    metric_names,
+                                )
+                        else:
+                            for curr_seq in sorted(seq_list):
+                                res[curr_seq] = eval_sequence(
+                                    curr_seq,
+                                    dataset,
+                                    tracker,
+                                    class_list,
+                                    metrics_list,
+                                    metric_names,
+                                )
+
+                    # Combine results over all sequences and then over all classes
+                    res, combined_cls_keys = self._combine_results(
+                        res, metrics_list, metric_names, dataset, "COMBINED_SEQ"
+                    )
+
+                    if np.all(
+                        ["tags" in annot for annot in dataset.gt_data["annotations"]]
+                    ):
+                        # Combine results over the challenging sequences and then over all classes
+                        # currently only support "tracking_challenging_pair"
+                        res, _ = self._combine_results(
+                            res,
+                            metrics_list,
+                            metric_names,
+                            dataset,
+                            "COMBINED_SEQ_CHALLENGING",
+                            "tracking_challenging_pair",
+                        )
+
+                    # Print and output results in various formats
+                    if config["TIME_PROGRESS"]:
+                        print(
+                            "\nAll sequences for %s finished in %.2f seconds"
+                            % (tracker, time.time() - time_start)
+                        )
+
+                    self._summarize_results(
+                        res,
+                        tracker,
+                        metrics_list,
+                        metric_names,
+                        dataset,
+                        "COMBINED_SEQ",
+                        combined_cls_keys,
+                    )
+                    if "COMBINED_SEQ_CHALLENGING" in res:
+                        self._summarize_results(
+                            res,
+                            tracker,
+                            metrics_list,
+                            metric_names,
+                            dataset,
+                            "COMBINED_SEQ_CHALLENGING",
+                            combined_cls_keys,
+                        )
+
+                    # Output for returning from function
+                    output_res[dataset_name][tracker] = res
+                    output_msg[dataset_name][tracker] = "Success"
+
+                except Exception as err:
+                    output_res[dataset_name][tracker] = None
+                    if type(err) == TrackEvalException:
+                        output_msg[dataset_name][tracker] = str(err)
+                    else:
+                        output_msg[dataset_name][tracker] = "Unknown error occurred."
+                    print("Tracker %s was unable to be evaluated." % tracker)
+                    print(err)
+                    traceback.print_exc()
+                    if config["LOG_ON_ERROR"] is not None:
+                        with open(config["LOG_ON_ERROR"], "a") as f:
+                            print(dataset_name, file=f)
+                            print(tracker, file=f)
+                            print(traceback.format_exc(), file=f)
+                            print("\n\n\n", file=f)
+                    if config["BREAK_ON_ERROR"]:
+                        raise err
+                    elif config["RETURN_ON_ERROR"]:
+                        return output_res, output_msg
+
+        return output_res, output_msg
+
+
+@_timing.time
+def eval_sequence(seq, dataset, tracker, class_list, metrics_list, metric_names):
+    """Function for evaluating a single sequence"""
+
+    raw_data = dataset.get_raw_seq_data(tracker, seq)
+    seq_res = {}
+    for cls in class_list:
+        seq_res[cls] = {}
+        data = dataset.get_preprocessed_seq_data(raw_data, cls)
+        for metric, met_name in zip(metrics_list, metric_names):
+            seq_res[cls][met_name] = metric.eval_sequence(data)
+    return seq_res
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/metrics/__init__.py b/sam3/eval/hota_eval_toolkit/trackeval/metrics/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c8434237107d370f0778658c52d3d7ebc25a37d
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/metrics/__init__.py
@@ -0,0 +1,4 @@
+# flake8: noqa
+
+from .count import Count
+from .hota import HOTA
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/metrics/_base_metric.py b/sam3/eval/hota_eval_toolkit/trackeval/metrics/_base_metric.py
new file mode 100644
index 0000000000000000000000000000000000000000..afbfaca0cdf7d1f428edcedc6035cc63f9b17078
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/metrics/_base_metric.py
@@ -0,0 +1,145 @@
+# flake8: noqa
+
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class _BaseMetric(ABC):
+    @abstractmethod
+    def __init__(self):
+        self.plottable = False
+        self.integer_fields = []
+        self.float_fields = []
+        self.array_labels = []
+        self.integer_array_fields = []
+        self.float_array_fields = []
+        self.fields = []
+        self.summary_fields = []
+        self.registered = False
+
+    #####################################################################
+    # Abstract functions for subclasses to implement
+
+    @_timing.time
+    @abstractmethod
+    def eval_sequence(self, data): ...
+
+    @abstractmethod
+    def combine_sequences(self, all_res): ...
+
+    @abstractmethod
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False): ...
+
+    @abstractmethod
+    def combine_classes_det_averaged(self, all_res): ...
+
+    def plot_single_tracker_results(self, all_res, tracker, output_folder, cls):
+        """Plot results of metrics, only valid for metrics with self.plottable"""
+        if self.plottable:
+            raise NotImplementedError(
+                "plot_results is not implemented for metric %s" % self.get_name()
+            )
+        else:
+            pass
+
+    #####################################################################
+    # Helper functions which are useful for all metrics:
+
+    @classmethod
+    def get_name(cls):
+        return cls.__name__
+
+    @staticmethod
+    def _combine_sum(all_res, field):
+        """Combine sequence results via sum"""
+        return sum([all_res[k][field] for k in all_res.keys()])
+
+    @staticmethod
+    def _combine_weighted_av(all_res, field, comb_res, weight_field):
+        """Combine sequence results via weighted average"""
+        return sum(
+            [all_res[k][field] * all_res[k][weight_field] for k in all_res.keys()]
+        ) / np.maximum(1.0, comb_res[weight_field])
+
+    def print_table(
+        self, table_res, tracker, cls, res_field="COMBINED_SEQ", output_lable="COMBINED"
+    ):
+        """Prints table of results for all sequences"""
+        print("")
+        metric_name = self.get_name()
+        self._row_print(
+            [metric_name + ": " + tracker + "-" + cls] + self.summary_fields
+        )
+        for seq, results in sorted(table_res.items()):
+            if seq.startswith("COMBINED_SEQ"):
+                continue
+            summary_res = self._summary_row(results)
+            self._row_print([seq] + summary_res)
+        summary_res = self._summary_row(table_res[res_field])
+        self._row_print([output_lable] + summary_res)
+
+    def _summary_row(self, results_):
+        vals = []
+        for h in self.summary_fields:
+            if h in self.float_array_fields:
+                vals.append("{0:1.5g}".format(100 * np.mean(results_[h])))
+            elif h in self.float_fields:
+                vals.append("{0:1.5g}".format(100 * float(results_[h])))
+            elif h in self.integer_fields:
+                vals.append("{0:d}".format(int(results_[h])))
+            else:
+                raise NotImplementedError(
+                    "Summary function not implemented for this field type."
+                )
+        return vals
+
+    @staticmethod
+    def _row_print(*argv):
+        """Prints results in an evenly spaced rows, with more space in first row"""
+        if len(argv) == 1:
+            argv = argv[0]
+        to_print = "%-35s" % argv[0]
+        for v in argv[1:]:
+            to_print += "%-10s" % str(v)
+        print(to_print)
+
+    def summary_results(self, table_res):
+        """Returns a simple summary of final results for a tracker"""
+        return dict(
+            zip(self.summary_fields, self._summary_row(table_res["COMBINED_SEQ"]))
+        )
+
+    def detailed_results(self, table_res):
+        """Returns detailed final results for a tracker"""
+        # Get detailed field information
+        detailed_fields = self.float_fields + self.integer_fields
+        for h in self.float_array_fields + self.integer_array_fields:
+            for alpha in [int(100 * x) for x in self.array_labels]:
+                detailed_fields.append(h + "___" + str(alpha))
+            detailed_fields.append(h + "___AUC")
+
+        # Get detailed results
+        detailed_results = {}
+        for seq, res in table_res.items():
+            detailed_row = self._detailed_row(res)
+            if len(detailed_row) != len(detailed_fields):
+                raise TrackEvalException(
+                    "Field names and data have different sizes (%i and %i)"
+                    % (len(detailed_row), len(detailed_fields))
+                )
+            detailed_results[seq] = dict(zip(detailed_fields, detailed_row))
+        return detailed_results
+
+    def _detailed_row(self, res):
+        detailed_row = []
+        for h in self.float_fields + self.integer_fields:
+            detailed_row.append(res[h])
+        for h in self.float_array_fields + self.integer_array_fields:
+            for i, alpha in enumerate([int(100 * x) for x in self.array_labels]):
+                detailed_row.append(res[h][i])
+            detailed_row.append(np.mean(res[h]))
+        return detailed_row
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/metrics/count.py b/sam3/eval/hota_eval_toolkit/trackeval/metrics/count.py
new file mode 100644
index 0000000000000000000000000000000000000000..a37260572d9316d78bf5069e573eb08561a1ca63
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/metrics/count.py
@@ -0,0 +1,48 @@
+# flake8: noqa
+
+from .. import _timing
+from ._base_metric import _BaseMetric
+
+
+class Count(_BaseMetric):
+    """Class which simply counts the number of tracker and gt detections and ids."""
+
+    def __init__(self, config=None):
+        super().__init__()
+        self.integer_fields = ["Dets", "GT_Dets", "IDs", "GT_IDs"]
+        self.fields = self.integer_fields
+        self.summary_fields = self.fields
+
+    @_timing.time
+    def eval_sequence(self, data):
+        """Returns counts for one sequence"""
+        # Get results
+        res = {
+            "Dets": data["num_tracker_dets"],
+            "GT_Dets": data["num_gt_dets"],
+            "IDs": data["num_tracker_ids"],
+            "GT_IDs": data["num_gt_ids"],
+            "Frames": data["num_timesteps"],
+        }
+        return res
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences"""
+        res = {}
+        for field in self.integer_fields:
+            res[field] = self._combine_sum(all_res, field)
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=None):
+        """Combines metrics across all classes by averaging over the class values"""
+        res = {}
+        for field in self.integer_fields:
+            res[field] = self._combine_sum(all_res, field)
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over the detection values"""
+        res = {}
+        for field in self.integer_fields:
+            res[field] = self._combine_sum(all_res, field)
+        return res
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/metrics/hota.py b/sam3/eval/hota_eval_toolkit/trackeval/metrics/hota.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cd95010ef804d333c59496f4f67e6213ba74dcd
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/metrics/hota.py
@@ -0,0 +1,291 @@
+# flake8: noqa
+
+import os
+
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+
+from .. import _timing
+from ._base_metric import _BaseMetric
+
+
+class HOTA(_BaseMetric):
+    """Class which implements the HOTA metrics.
+    See: https://link.springer.com/article/10.1007/s11263-020-01375-2
+    """
+
+    def __init__(self, config=None):
+        super().__init__()
+        self.plottable = True
+        self.array_labels = np.arange(0.05, 0.99, 0.05)
+        self.integer_array_fields = ["HOTA_TP", "HOTA_FN", "HOTA_FP"]
+        self.float_array_fields = [
+            "HOTA",
+            "DetA",
+            "AssA",
+            "DetRe",
+            "DetPr",
+            "AssRe",
+            "AssPr",
+            "LocA",
+            "OWTA",
+        ]
+        self.float_fields = ["HOTA(0)", "LocA(0)", "HOTALocA(0)"]
+        self.fields = (
+            self.float_array_fields + self.integer_array_fields + self.float_fields
+        )
+        self.summary_fields = self.float_array_fields + self.float_fields
+
+    @_timing.time
+    def eval_sequence(self, data):
+        """Calculates the HOTA metrics for one sequence"""
+
+        # Initialise results
+        res = {}
+        for field in self.float_array_fields + self.integer_array_fields:
+            res[field] = np.zeros((len(self.array_labels)), dtype=float)
+        for field in self.float_fields:
+            res[field] = 0
+
+        # Return result quickly if tracker or gt sequence is empty
+        if data["num_tracker_dets"] == 0:
+            res["HOTA_FN"] = data["num_gt_dets"] * np.ones(
+                (len(self.array_labels)), dtype=float
+            )
+            res["LocA"] = np.ones((len(self.array_labels)), dtype=float)
+            res["LocA(0)"] = 1.0
+            return res
+        if data["num_gt_dets"] == 0:
+            res["HOTA_FP"] = data["num_tracker_dets"] * np.ones(
+                (len(self.array_labels)), dtype=float
+            )
+            res["LocA"] = np.ones((len(self.array_labels)), dtype=float)
+            res["LocA(0)"] = 1.0
+            return res
+
+        # Variables counting global association
+        potential_matches_count = np.zeros(
+            (data["num_gt_ids"], data["num_tracker_ids"])
+        )
+        gt_id_count = np.zeros((data["num_gt_ids"], 1))
+        tracker_id_count = np.zeros((1, data["num_tracker_ids"]))
+
+        # First loop through each timestep and accumulate global track information.
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(
+            zip(data["gt_ids"], data["tracker_ids"])
+        ):
+            # Count the potential matches between ids in each timestep
+            # These are normalised, weighted by the match similarity.
+            similarity = data["similarity_scores"][t]
+            sim_iou_denom = (
+                similarity.sum(0)[np.newaxis, :]
+                + similarity.sum(1)[:, np.newaxis]
+                - similarity
+            )
+            sim_iou = np.zeros_like(similarity)
+            sim_iou_mask = sim_iou_denom > 0 + np.finfo("float").eps
+            sim_iou[sim_iou_mask] = (
+                similarity[sim_iou_mask] / sim_iou_denom[sim_iou_mask]
+            )
+            potential_matches_count[
+                gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]
+            ] += sim_iou
+
+            # Calculate the total number of dets for each gt_id and tracker_id.
+            gt_id_count[gt_ids_t] += 1
+            tracker_id_count[0, tracker_ids_t] += 1
+
+        # Calculate overall jaccard alignment score (before unique matching) between IDs
+        global_alignment_score = potential_matches_count / (
+            gt_id_count + tracker_id_count - potential_matches_count
+        )
+        matches_counts = [
+            np.zeros_like(potential_matches_count) for _ in self.array_labels
+        ]
+
+        # Calculate scores for each timestep
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(
+            zip(data["gt_ids"], data["tracker_ids"])
+        ):
+            # Deal with the case that there are no gt_det/tracker_det in a timestep.
+            if len(gt_ids_t) == 0:
+                for a, alpha in enumerate(self.array_labels):
+                    res["HOTA_FP"][a] += len(tracker_ids_t)
+                continue
+            if len(tracker_ids_t) == 0:
+                for a, alpha in enumerate(self.array_labels):
+                    res["HOTA_FN"][a] += len(gt_ids_t)
+                continue
+
+            # Get matching scores between pairs of dets for optimizing HOTA
+            similarity = data["similarity_scores"][t]
+            score_mat = (
+                global_alignment_score[
+                    gt_ids_t[:, np.newaxis], tracker_ids_t[np.newaxis, :]
+                ]
+                * similarity
+            )
+
+            # Hungarian algorithm to find best matches
+            match_rows, match_cols = linear_sum_assignment(-score_mat)
+
+            # Calculate and accumulate basic statistics
+            for a, alpha in enumerate(self.array_labels):
+                actually_matched_mask = (
+                    similarity[match_rows, match_cols] >= alpha - np.finfo("float").eps
+                )
+                alpha_match_rows = match_rows[actually_matched_mask]
+                alpha_match_cols = match_cols[actually_matched_mask]
+                num_matches = len(alpha_match_rows)
+                res["HOTA_TP"][a] += num_matches
+                res["HOTA_FN"][a] += len(gt_ids_t) - num_matches
+                res["HOTA_FP"][a] += len(tracker_ids_t) - num_matches
+                if num_matches > 0:
+                    res["LocA"][a] += sum(
+                        similarity[alpha_match_rows, alpha_match_cols]
+                    )
+                    matches_counts[a][
+                        gt_ids_t[alpha_match_rows], tracker_ids_t[alpha_match_cols]
+                    ] += 1
+
+        # Calculate association scores (AssA, AssRe, AssPr) for the alpha value.
+        # First calculate scores per gt_id/tracker_id combo and then average over the number of detections.
+        for a, alpha in enumerate(self.array_labels):
+            matches_count = matches_counts[a]
+            ass_a = matches_count / np.maximum(
+                1, gt_id_count + tracker_id_count - matches_count
+            )
+            res["AssA"][a] = np.sum(matches_count * ass_a) / np.maximum(
+                1, res["HOTA_TP"][a]
+            )
+            ass_re = matches_count / np.maximum(1, gt_id_count)
+            res["AssRe"][a] = np.sum(matches_count * ass_re) / np.maximum(
+                1, res["HOTA_TP"][a]
+            )
+            ass_pr = matches_count / np.maximum(1, tracker_id_count)
+            res["AssPr"][a] = np.sum(matches_count * ass_pr) / np.maximum(
+                1, res["HOTA_TP"][a]
+            )
+
+        # Calculate final scores
+        res["LocA"] = np.maximum(1e-10, res["LocA"]) / np.maximum(1e-10, res["HOTA_TP"])
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences"""
+        res = {}
+        for field in self.integer_array_fields:
+            res[field] = self._combine_sum(all_res, field)
+        for field in ["AssRe", "AssPr", "AssA"]:
+            res[field] = self._combine_weighted_av(
+                all_res, field, res, weight_field="HOTA_TP"
+            )
+        loca_weighted_sum = sum(
+            [all_res[k]["LocA"] * all_res[k]["HOTA_TP"] for k in all_res.keys()]
+        )
+        res["LocA"] = np.maximum(1e-10, loca_weighted_sum) / np.maximum(
+            1e-10, res["HOTA_TP"]
+        )
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty_classes=False):
+        """Combines metrics across all classes by averaging over the class values.
+        If 'ignore_empty_classes' is True, then it only sums over classes with at least one gt or predicted detection.
+        """
+        res = {}
+        for field in self.integer_array_fields:
+            if ignore_empty_classes:
+                res[field] = self._combine_sum(
+                    {
+                        k: v
+                        for k, v in all_res.items()
+                        if (
+                            v["HOTA_TP"] + v["HOTA_FN"] + v["HOTA_FP"]
+                            > 0 + np.finfo("float").eps
+                        ).any()
+                    },
+                    field,
+                )
+            else:
+                res[field] = self._combine_sum(
+                    {k: v for k, v in all_res.items()}, field
+                )
+
+        for field in self.float_fields + self.float_array_fields:
+            if ignore_empty_classes:
+                res[field] = np.mean(
+                    [
+                        v[field]
+                        for v in all_res.values()
+                        if (
+                            v["HOTA_TP"] + v["HOTA_FN"] + v["HOTA_FP"]
+                            > 0 + np.finfo("float").eps
+                        ).any()
+                    ],
+                    axis=0,
+                )
+            else:
+                res[field] = np.mean([v[field] for v in all_res.values()], axis=0)
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over the detection values"""
+        res = {}
+        for field in self.integer_array_fields:
+            res[field] = self._combine_sum(all_res, field)
+        for field in ["AssRe", "AssPr", "AssA"]:
+            res[field] = self._combine_weighted_av(
+                all_res, field, res, weight_field="HOTA_TP"
+            )
+        loca_weighted_sum = sum(
+            [all_res[k]["LocA"] * all_res[k]["HOTA_TP"] for k in all_res.keys()]
+        )
+        res["LocA"] = np.maximum(1e-10, loca_weighted_sum) / np.maximum(
+            1e-10, res["HOTA_TP"]
+        )
+        res = self._compute_final_fields(res)
+        return res
+
+    @staticmethod
+    def _compute_final_fields(res):
+        """Calculate sub-metric ('field') values which only depend on other sub-metric values.
+        This function is used both for both per-sequence calculation, and in combining values across sequences.
+        """
+        res["DetRe"] = res["HOTA_TP"] / np.maximum(1, res["HOTA_TP"] + res["HOTA_FN"])
+        res["DetPr"] = res["HOTA_TP"] / np.maximum(1, res["HOTA_TP"] + res["HOTA_FP"])
+        res["DetA"] = res["HOTA_TP"] / np.maximum(
+            1, res["HOTA_TP"] + res["HOTA_FN"] + res["HOTA_FP"]
+        )
+        res["HOTA"] = np.sqrt(res["DetA"] * res["AssA"])
+        res["OWTA"] = np.sqrt(res["DetRe"] * res["AssA"])
+
+        res["HOTA(0)"] = res["HOTA"][0]
+        res["LocA(0)"] = res["LocA"][0]
+        res["HOTALocA(0)"] = res["HOTA(0)"] * res["LocA(0)"]
+        return res
+
+    def plot_single_tracker_results(self, table_res, tracker, cls, output_folder):
+        """Create plot of results"""
+
+        # Only loaded when run to reduce minimum requirements
+        from matplotlib import pyplot as plt
+
+        res = table_res["COMBINED_SEQ"]
+        styles_to_plot = ["r", "b", "g", "b--", "b:", "g--", "g:", "m"]
+        for name, style in zip(self.float_array_fields, styles_to_plot):
+            plt.plot(self.array_labels, res[name], style)
+        plt.xlabel("alpha")
+        plt.ylabel("score")
+        plt.title(tracker + " - " + cls)
+        plt.axis([0, 1, 0, 1])
+        legend = []
+        for name in self.float_array_fields:
+            legend += [name + " (" + str(np.round(np.mean(res[name]), 2)) + ")"]
+        plt.legend(legend, loc="lower left")
+        out_file = os.path.join(output_folder, cls + "_plot.pdf")
+        os.makedirs(os.path.dirname(out_file), exist_ok=True)
+        plt.savefig(out_file)
+        plt.savefig(out_file.replace(".pdf", ".png"))
+        plt.clf()
diff --git a/sam3/eval/hota_eval_toolkit/trackeval/utils.py b/sam3/eval/hota_eval_toolkit/trackeval/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cdf77e78707617d49eb305827fb628e9361c170
--- /dev/null
+++ b/sam3/eval/hota_eval_toolkit/trackeval/utils.py
@@ -0,0 +1,195 @@
+# flake8: noqa
+
+import argparse
+import csv
+import os
+from collections import OrderedDict
+
+
+def init_config(config, default_config, name=None):
+    """Initialise non-given config values with defaults"""
+    if config is None:
+        config = default_config
+    else:
+        for k in default_config.keys():
+            if k not in config.keys():
+                config[k] = default_config[k]
+    if name and config["PRINT_CONFIG"]:
+        print("\n%s Config:" % name)
+        for c in config.keys():
+            print("%-20s : %-30s" % (c, config[c]))
+    return config
+
+
+def update_config(config):
+    """
+    Parse the arguments of a script and updates the config values for a given value if specified in the arguments.
+    :param config: the config to update
+    :return: the updated config
+    """
+    parser = argparse.ArgumentParser()
+    for setting in config.keys():
+        if type(config[setting]) == list or type(config[setting]) == type(None):
+            parser.add_argument("--" + setting, nargs="+")
+        else:
+            parser.add_argument("--" + setting)
+    args = parser.parse_args().__dict__
+    for setting in args.keys():
+        if args[setting] is not None:
+            if type(config[setting]) == type(True):
+                if args[setting] == "True":
+                    x = True
+                elif args[setting] == "False":
+                    x = False
+                else:
+                    raise Exception(
+                        "Command line parameter " + setting + "must be True or False"
+                    )
+            elif type(config[setting]) == type(1):
+                x = int(args[setting])
+            elif type(args[setting]) == type(None):
+                x = None
+            else:
+                x = args[setting]
+            config[setting] = x
+    return config
+
+
+def get_code_path():
+    """Get base path where code is"""
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+
+
+def validate_metrics_list(metrics_list):
+    """Get names of metric class and ensures they are unique, further checks that the fields within each metric class
+    do not have overlapping names.
+    """
+    metric_names = [metric.get_name() for metric in metrics_list]
+    # check metric names are unique
+    if len(metric_names) != len(set(metric_names)):
+        raise TrackEvalException(
+            "Code being run with multiple metrics of the same name"
+        )
+    fields = []
+    for m in metrics_list:
+        fields += m.fields
+    # check metric fields are unique
+    if len(fields) != len(set(fields)):
+        raise TrackEvalException(
+            "Code being run with multiple metrics with fields of the same name"
+        )
+    return metric_names
+
+
+def write_summary_results(summaries, cls, output_folder):
+    """Write summary results to file"""
+
+    fields = sum([list(s.keys()) for s in summaries], [])
+    values = sum([list(s.values()) for s in summaries], [])
+
+    # In order to remain consistent upon new fields being adding, for each of the following fields if they are present
+    # they will be output in the summary first in the order below. Any further fields will be output in the order each
+    # metric family is called, and within each family either in the order they were added to the dict (python >= 3.6) or
+    # randomly (python < 3.6).
+    default_order = [
+        "HOTA",
+        "DetA",
+        "AssA",
+        "DetRe",
+        "DetPr",
+        "AssRe",
+        "AssPr",
+        "LocA",
+        "OWTA",
+        "HOTA(0)",
+        "LocA(0)",
+        "HOTALocA(0)",
+        "MOTA",
+        "MOTP",
+        "MODA",
+        "CLR_Re",
+        "CLR_Pr",
+        "MTR",
+        "PTR",
+        "MLR",
+        "CLR_TP",
+        "CLR_FN",
+        "CLR_FP",
+        "IDSW",
+        "MT",
+        "PT",
+        "ML",
+        "Frag",
+        "sMOTA",
+        "IDF1",
+        "IDR",
+        "IDP",
+        "IDTP",
+        "IDFN",
+        "IDFP",
+        "Dets",
+        "GT_Dets",
+        "IDs",
+        "GT_IDs",
+    ]
+    default_ordered_dict = OrderedDict(
+        zip(default_order, [None for _ in default_order])
+    )
+    for f, v in zip(fields, values):
+        default_ordered_dict[f] = v
+    for df in default_order:
+        if default_ordered_dict[df] is None:
+            del default_ordered_dict[df]
+    fields = list(default_ordered_dict.keys())
+    values = list(default_ordered_dict.values())
+
+    out_file = os.path.join(output_folder, cls + "_summary.txt")
+    os.makedirs(os.path.dirname(out_file), exist_ok=True)
+    with open(out_file, "w", newline="") as f:
+        writer = csv.writer(f, delimiter=" ")
+        writer.writerow(fields)
+        writer.writerow(values)
+
+
+def write_detailed_results(details, cls, output_folder):
+    """Write detailed results to file"""
+    sequences = details[0].keys()
+    fields = ["seq"] + sum([list(s["COMBINED_SEQ"].keys()) for s in details], [])
+    out_file = os.path.join(output_folder, cls + "_detailed.csv")
+    os.makedirs(os.path.dirname(out_file), exist_ok=True)
+    with open(out_file, "w", newline="") as f:
+        writer = csv.writer(f)
+        writer.writerow(fields)
+        for seq in sorted(sequences):
+            if seq == "COMBINED_SEQ":
+                continue
+            writer.writerow([seq] + sum([list(s[seq].values()) for s in details], []))
+        writer.writerow(
+            ["COMBINED"] + sum([list(s["COMBINED_SEQ"].values()) for s in details], [])
+        )
+
+
+def load_detail(file):
+    """Loads detailed data for a tracker."""
+    data = {}
+    with open(file) as f:
+        for i, row_text in enumerate(f):
+            row = row_text.replace("\r", "").replace("\n", "").split(",")
+            if i == 0:
+                keys = row[1:]
+                continue
+            current_values = row[1:]
+            seq = row[0]
+            if seq == "COMBINED":
+                seq = "COMBINED_SEQ"
+            if (len(current_values) == len(keys)) and seq != "":
+                data[seq] = {}
+                for key, value in zip(keys, current_values):
+                    data[seq][key] = float(value)
+    return data
+
+
+class TrackEvalException(Exception):
+    """Custom exception for catching expected errors."""
+
+    ...
diff --git a/sam3/eval/postprocessors.py b/sam3/eval/postprocessors.py
new file mode 100644
index 0000000000000000000000000000000000000000..973da118541d21441550b149c4243d8d39095a8b
--- /dev/null
+++ b/sam3/eval/postprocessors.py
@@ -0,0 +1,648 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""Postprocessors class to transform MDETR output according to the downstream task"""
+
+import dataclasses
+import logging
+from collections import defaultdict
+from typing import Dict, List, Optional
+
+import numpy as np
+import torch
+from sam3.model import box_ops
+from sam3.model.data_misc import BatchedInferenceMetadata, interpolate
+from sam3.train.masks_ops import rle_encode, robust_rle_encode
+from torch import nn
+
+
+class PostProcessNullOp(nn.Module):
+    def __init__(self, **kwargs):
+        super(PostProcessNullOp).__init__()
+        pass
+
+    def forward(self, input):
+        pass
+
+    def process_results(self, **kwargs):
+        return kwargs["find_stages"]
+
+
+class PostProcessImage(nn.Module):
+    """This module converts the model's output into the format expected by the coco api"""
+
+    def __init__(
+        self,
+        max_dets_per_img: int,
+        iou_type="bbox",
+        to_cpu: bool = True,
+        use_original_ids: bool = False,
+        use_original_sizes_box: bool = False,
+        use_original_sizes_mask: bool = False,
+        convert_mask_to_rle: bool = False,
+        always_interpolate_masks_on_gpu: bool = True,
+        use_presence: bool = True,
+        detection_threshold: float = -1.0,
+    ) -> None:
+        super().__init__()
+        self.max_dets_per_img = max_dets_per_img
+        self.iou_type = iou_type
+        self.to_cpu = to_cpu
+        self.convert_mask_to_rle = convert_mask_to_rle
+        self.always_interpolate_masks_on_gpu = always_interpolate_masks_on_gpu
+
+        self.use_presence = use_presence
+        self.detection_threshold = detection_threshold
+        self.use_original_ids = use_original_ids
+        self.use_original_sizes_box = use_original_sizes_box
+        self.use_original_sizes_mask = use_original_sizes_mask
+
+    @torch.no_grad()
+    def forward(
+        self,
+        outputs,
+        target_sizes_boxes,
+        target_sizes_masks,
+        forced_labels=None,
+        consistent=False,
+        ret_tensordict: bool = False,  # This is experimental
+    ):
+        """Perform the computation
+        Parameters:
+            outputs: raw outputs of the model
+            target_sizes_boxes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
+                          For evaluation, this must be the original image size (before any data augmentation)
+                          For visualization, this should be the image size after data augment, but before padding
+            target_sizes_masks: same but used to resize masks
+            forced_labels: tensor of dimension [batch_size] containing the label to force for each image of the batch
+                           This is useful when evaluating the model using standard metrics (eg on COCO, LVIS). In that case,
+                           we query the model with every possible class label, so we when we pass the predictions to the evaluator,
+                           we want to make sure that the predicted "class" matches the one that was queried.
+            consistent: whether all target sizes are equal
+            ret_tensordict: Experimental argument. If true, return a tensordict.TensorDict instead of a list of dictionaries for easier manipulation.
+        """
+        if ret_tensordict:
+            assert (
+                consistent is True
+            ), "We don't support returning TensorDict if the outputs have different shapes"  # NOTE: It's possible but we don't support it.
+            assert self.detection_threshold <= 0.0, "TODO: implement?"
+            try:
+                from tensordict import TensorDict
+            except ImportError:
+                logging.info(
+                    "tensordict is not installed. Install by running `pip install tensordict --no-deps`. Falling back by setting `ret_tensordict=False`"
+                )
+                ret_tensordict = False
+
+        out_bbox = outputs["pred_boxes"] if "pred_boxes" in outputs else None
+        out_logits = outputs["pred_logits"]
+        pred_masks = outputs["pred_masks"] if self.iou_type == "segm" else None
+        out_probs = out_logits.sigmoid()
+        if self.use_presence:
+            presence_score = outputs["presence_logit_dec"].sigmoid().unsqueeze(1)
+            out_probs = out_probs * presence_score
+
+        assert target_sizes_boxes.shape[1] == 2
+        assert target_sizes_masks.shape[1] == 2
+        batch_size = target_sizes_boxes.shape[0]
+
+        boxes, scores, labels, keep = self._process_boxes_and_labels(
+            target_sizes_boxes, forced_labels, out_bbox, out_probs
+        )
+        assert boxes is None or len(boxes) == batch_size
+        out_masks = self._process_masks(
+            target_sizes_masks, pred_masks, consistent=consistent, keep=keep
+        )
+        del pred_masks
+
+        if boxes is None:
+            assert out_masks is not None
+            assert not ret_tensordict, "We don't support returning TensorDict if the output does not contain boxes"
+            B = len(out_masks)
+            boxes = [None] * B
+            scores = [None] * B
+            labels = [None] * B
+
+        results = {
+            "scores": scores,
+            "labels": labels,
+            "boxes": boxes,
+        }
+        if out_masks is not None:
+            if self.convert_mask_to_rle:
+                results.update(masks_rle=out_masks)
+            else:
+                results.update(masks=out_masks)
+
+        if ret_tensordict:
+            results = TensorDict(results).auto_batch_size_()
+            if self.to_cpu:
+                results = results.cpu()
+        else:
+            # Convert a dictonary of lists/tensors to list of dictionaries
+            results = [
+                dict(zip(results.keys(), res_tuple))
+                for res_tuple in zip(*results.values())
+            ]
+
+        return results
+
+    def _process_masks(self, target_sizes, pred_masks, consistent=True, keep=None):
+        if pred_masks is None:
+            return None
+        if self.always_interpolate_masks_on_gpu:
+            gpu_device = target_sizes.device
+            assert gpu_device.type == "cuda"
+            pred_masks = pred_masks.to(device=gpu_device)
+        if consistent:
+            assert keep is None, "TODO: implement?"
+            # All masks should have the same shape, expected when processing a batch of size 1
+            target_size = target_sizes.unique(dim=0)
+            assert target_size.size(0) == 1, "Expecting all target sizes to be equal"
+            out_masks = (
+                interpolate(
+                    pred_masks,
+                    target_size.squeeze().tolist(),
+                    mode="bilinear",
+                    align_corners=False,
+                ).sigmoid()
+                > 0.5
+            )
+            if self.convert_mask_to_rle:
+                raise RuntimeError("TODO: implement?")
+            if self.to_cpu:
+                out_masks = out_masks.cpu()
+        else:
+            out_masks = [[]] * len(pred_masks)
+
+            assert keep is None or len(keep) == len(pred_masks)
+            for i, mask in enumerate(pred_masks):
+                h, w = target_sizes[i]
+                if keep is not None:
+                    mask = mask[keep[i]]
+                # Uses the gpu version fist, moves masks to cpu if it fails"""
+                try:
+                    interpolated = (
+                        interpolate(
+                            mask.unsqueeze(1),
+                            (h, w),
+                            mode="bilinear",
+                            align_corners=False,
+                        ).sigmoid()
+                        > 0.5
+                    )
+                except Exception as e:
+                    logging.info("Issue found, reverting to CPU mode!")
+                    mask_device = mask.device
+                    mask = mask.cpu()
+                    interpolated = (
+                        interpolate(
+                            mask.unsqueeze(1),
+                            (h, w),
+                            mode="bilinear",
+                            align_corners=False,
+                        ).sigmoid()
+                        > 0.5
+                    )
+                    interpolated = interpolated.to(mask_device)
+
+                if self.convert_mask_to_rle:
+                    out_masks[i] = robust_rle_encode(interpolated.squeeze(1))
+                else:
+                    out_masks[i] = interpolated
+                    if self.to_cpu:
+                        out_masks[i] = out_masks[i].cpu()
+
+        return out_masks
+
+    def _process_boxes_and_labels(
+        self, target_sizes, forced_labels, out_bbox, out_probs
+    ):
+        if out_bbox is None:
+            return None, None, None, None
+        assert len(out_probs) == len(target_sizes)
+        if self.to_cpu:
+            out_probs = out_probs.cpu()
+        scores, labels = out_probs.max(-1)
+        if forced_labels is None:
+            labels = torch.ones_like(labels)
+        else:
+            labels = forced_labels[:, None].expand_as(labels)
+
+        # convert to [x0, y0, x1, y1] format
+        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
+
+        img_h, img_w = target_sizes.unbind(1)
+        scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
+        boxes = boxes * scale_fct[:, None, :]
+
+        if self.to_cpu:
+            boxes = boxes.cpu()
+
+        keep = None
+        if self.detection_threshold > 0:
+            # Filter out the boxes with scores below the detection threshold
+            keep = scores > self.detection_threshold
+            assert len(keep) == len(boxes) == len(scores) == len(labels)
+
+            boxes = [b[k.to(b.device)] for b, k in zip(boxes, keep)]
+            scores = [s[k.to(s.device)] for s, k in zip(scores, keep)]
+            labels = [l[k.to(l.device)] for l, k in zip(labels, keep)]
+
+        return boxes, scores, labels, keep
+
+    def process_results(
+        self, find_stages, find_metadatas: List[BatchedInferenceMetadata], **kwargs
+    ):
+        if find_stages.loss_stages is not None:
+            find_metadatas = [find_metadatas[i] for i in find_stages.loss_stages]
+        assert len(find_stages) == len(find_metadatas)
+        results = {}
+        for outputs, meta in zip(find_stages, find_metadatas):
+            img_size_for_boxes = (
+                meta.original_size
+                if self.use_original_sizes_box
+                else torch.ones_like(meta.original_size)
+            )
+            img_size_for_masks = (
+                meta.original_size
+                if self.use_original_sizes_mask
+                else torch.ones_like(meta.original_size)
+            )
+            detection_results = self(
+                outputs,
+                img_size_for_boxes,
+                img_size_for_masks,
+                forced_labels=(
+                    meta.original_category_id if self.use_original_ids else None
+                ),
+            )
+            ids = (
+                meta.original_image_id if self.use_original_ids else meta.coco_image_id
+            )
+            assert len(detection_results) == len(ids)
+            for img_id, result in zip(ids, detection_results):
+                if img_id.item() not in results:
+                    results[img_id.item()] = result
+                else:
+                    assert set(results[img_id.item()].keys()) == set(result.keys())
+                    for k in result.keys():
+                        if isinstance(result[k], torch.Tensor):
+                            results[img_id.item()][k] = torch.cat(
+                                [results[img_id.item()][k], result[k]], dim=0
+                            )
+                        elif isinstance(result[k], list):
+                            results[img_id.item()][k] += result[k]
+                        else:
+                            raise NotImplementedError(
+                                f"Unexpected type {type(result[k])} in result."
+                            )
+        # Prune the results to the max number of detections per image.
+        for img_id, result in results.items():
+            if (
+                self.max_dets_per_img > 0
+                and len(result["scores"]) > self.max_dets_per_img
+            ):
+                _, topk_indexes = torch.topk(
+                    result["scores"], self.max_dets_per_img, dim=0
+                )
+                if self.to_cpu:
+                    topk_indexes = topk_indexes.cpu()
+                for k in result.keys():
+                    if isinstance(results[img_id][k], list):
+                        results[img_id][k] = [
+                            results[img_id][k][i] for i in topk_indexes.tolist()
+                        ]
+                    else:
+                        results[img_id][k] = results[img_id][k].to(topk_indexes.device)[
+                            topk_indexes
+                        ]
+
+        return results
+
+
+class PostProcessAPIVideo(PostProcessImage):
+    """This module converts the video model's output into the format expected by the YT-VIS api"""
+
+    def __init__(
+        self,
+        *args,
+        to_cpu: bool = True,
+        convert_mask_to_rle: bool = False,
+        always_interpolate_masks_on_gpu: bool = True,
+        prob_thresh: float = 0.5,
+        use_presence: bool = False,
+        **kwargs,
+    ):
+        super().__init__(
+            *args,
+            # Here we always set `convert_mask_to_rle=False` in the base `PostProcessAPI` class
+            # (so that its `_process_masks` won't return a list of RLEs). If we want to return
+            # RLEs for video masklets, we handle it in this `PostProcessAPIVideo` class instead.
+            convert_mask_to_rle=False,
+            # Here we always set `to_cpu=False` in the base `PostProcessAPI` class (so that
+            # the interpolated masks won't be automatically moved back to CPU). We will handle
+            # it in this `PostProcessAPIVideo` class instead.
+            always_interpolate_masks_on_gpu=always_interpolate_masks_on_gpu,
+            use_presence=use_presence,
+            **kwargs,
+        )
+        # Expected keys in the output dict to postprocess
+        self.EXPECTED_KEYS = [
+            "pred_logits",
+            "pred_boxes",
+            "pred_masks",
+        ]
+        # Whether to post-process video masklets (under packed representation) into RLE format
+        self.convert_mask_to_rle_for_video = convert_mask_to_rle
+        self.to_cpu_for_video = to_cpu
+        self.prob_thresh = prob_thresh
+
+    def process_results(
+        self, find_stages, find_metadatas: List[BatchedInferenceMetadata], **kwargs
+    ):
+        """
+        Tracking Postprocessor for SAM 3 video model.
+        This function takes in the output of the SAM 3 video model and processes it to extract all the tracklet predictions.
+        Args:
+            find_stages: A list of tensors representing the output of the SAM 3 video model.
+            find_metadatas: A list of BatchedInferenceMetadata objects containing metadata about each frame.
+            **kwargs: Additional keyword arguments.
+        Returns:
+            A dictionary of predcitions with video_id as key.
+        """
+
+        # Import tensordict here to avoid global dependency.
+        try:
+            from tensordict import TensorDict
+        except ImportError as e:
+            logging.error(
+                "tensordict is not installed, please install by running `pip install tensordict --no-deps`"
+            )
+            raise e
+        # Notes and assumptions:
+        # 1- This postprocessor assumes results only for a single video.
+        # 2- There are N stage outputs corresponding to N video frames
+        # 3- Each stage outputs contains PxQ preds, where P is number of prompts and Q is number of object queries. The output should also contain the tracking object ids corresponding to each object query.
+        # 4- The tracking object id has a default value of -1, indicating that the object query is not tracking any object in the frame, and hence its predictions can be ingored for a given frame.
+        # 5- Some objects may be tracked in a subset of frames only. So, we first extract the predictions in a packed representation (for efficient postprocessing -- specially memory)
+        # and then we convert the packed representation into a padded one, where we zero pad boxes/masks for objects that are not tracked in some frames.
+        # 6- We refer to objects by an object id, which is a tuple (prompt_idx, obj_id)
+
+        assert len(find_stages) > 0, "There is nothing to postprocess?"
+        PROMPT_AXIS, OBJ_QUERY_AXIS = (0, 1)
+        NO_OBJ_ID = -1
+        # Maps object ID -> [indices in packed tensor]
+        tracked_objects_packed_idx = defaultdict(list)
+        # Maps object ID -> [indices in padded tensor (abs frame index)]
+        tracked_objects_frame_idx = defaultdict(list)
+        total_num_preds = 0
+        # This will hold the packed representation of predictions.
+        vid_preds_packed: List[TensorDict] = []
+        vid_masklets_rle_packed: List[Optional[Dict]] = []
+        video_id = -1  # We assume single video postprocessing, this ID should be unique in the datapoint.
+
+        for frame_idx, (frame_outs, meta) in enumerate(
+            zip(find_stages, find_metadatas)
+        ):
+            # only store keys we need to extract the results
+            frame_outs_td = TensorDict(
+                {k: frame_outs[k] for k in self.EXPECTED_KEYS}
+            ).auto_batch_size_()  # Shape is [P,Q,...]
+            meta_td = TensorDict(
+                dataclasses.asdict(meta)
+            ).auto_batch_size_()  # Shape is [P,...]
+            unique_vid_id = meta.original_image_id.unique()
+            assert unique_vid_id.size(0) == 1
+            if video_id == -1:
+                video_id = unique_vid_id.item()
+            else:
+                assert (
+                    video_id == unique_vid_id.item()
+                ), "We can only postprocess one video per datapoint"
+            # keeping track of which objects appear in the current frame
+            obj_ids_per_frame = frame_outs["pred_object_ids"]
+            assert obj_ids_per_frame.size(-1) == frame_outs["pred_logits"].size(-2)
+            if self.prob_thresh is not None:
+                # only keep the predictions on this frame with probability above the threshold
+                # (remove those predictions during the keep-alive period of a tracking query,
+                # where its "pred_object_ids" is still the tracked object ID rather than -1)
+                pred_probs = frame_outs["pred_logits"].sigmoid().squeeze(-1)
+                obj_ids_per_frame = torch.where(
+                    pred_probs >= self.prob_thresh, obj_ids_per_frame, NO_OBJ_ID
+                )
+            tracked_obj_ids_idx = torch.where(obj_ids_per_frame != NO_OBJ_ID)
+            # Object id is a tuple of (prompt_idx, obj_id). This is because the model can assign same obj_id for two different prompts.
+            tracked_obj_ids = [
+                (p_id.item(), obj_ids_per_frame[p_id, q_id].item())
+                for p_id, q_id in zip(
+                    tracked_obj_ids_idx[PROMPT_AXIS],
+                    tracked_obj_ids_idx[OBJ_QUERY_AXIS],
+                )
+            ]
+            if len(tracked_obj_ids) == 0:
+                continue
+            # For each object, we keep track of the packed and padded (frame index) indices
+            for oid in tracked_obj_ids:
+                tracked_objects_packed_idx[oid].append(total_num_preds)
+                tracked_objects_frame_idx[oid].append(frame_idx)
+                total_num_preds += 1
+
+            # Since we have P*Q masks per frame, mask interpolation is the GPU memory bottleneck or time bottleneck in case of cpu processing.
+            # Instead, we first extract results only for tracked objects, reducing the number of masks to K = sum_i(tracked_objs_per_ith_prompt), hopefully <<< P*Q
+            tracked_objs_outs_td = frame_outs_td[
+                tracked_obj_ids_idx
+            ]  # [P,Q,...] --> [K,...]
+            meta_td = meta_td[tracked_obj_ids_idx[PROMPT_AXIS].cpu()]
+            if self.always_interpolate_masks_on_gpu:
+                gpu_device = meta_td["original_size"].device
+                assert gpu_device.type == "cuda"
+                tracked_objs_outs_td = tracked_objs_outs_td.to(device=gpu_device)
+            frame_results_td = self(
+                tracked_objs_outs_td.unsqueeze(1),
+                (
+                    meta_td["original_size"]
+                    if self.use_original_sizes
+                    else torch.ones_like(meta_td["original_size"])
+                ),
+                forced_labels=(
+                    meta_td["original_category_id"] if self.use_original_ids else None
+                ),
+                consistent=True,
+                ret_tensordict=True,
+            ).squeeze(1)
+            del tracked_objs_outs_td
+
+            # Optionally, remove "masks" from output tensor dict and directly encode them
+            # to RLE format under packed representations
+            if self.convert_mask_to_rle_for_video:
+                interpolated_binary_masks = frame_results_td.pop("masks")
+                rle_list = rle_encode(interpolated_binary_masks, return_areas=True)
+                vid_masklets_rle_packed.extend(rle_list)
+            # Optionally, move output TensorDict to CPU (do this after RLE encoding step above)
+            if self.to_cpu_for_video:
+                frame_results_td = frame_results_td.cpu()
+            vid_preds_packed.append(frame_results_td)
+
+        if len(vid_preds_packed) == 0:
+            logging.debug(f"Video {video_id} has no predictions")
+            return {video_id: []}
+
+        vid_preds_packed = torch.cat(vid_preds_packed, dim=0)
+        ############### Construct a padded representation of the predictions ###############
+        num_preds = len(tracked_objects_packed_idx)
+        num_frames = len(find_stages)
+        # We zero pad any missing prediction
+        # NOTE: here, we also have padded tensors for "scores" and "labels", but we overwrite them later.
+        padded_frames_results = TensorDict(
+            {
+                k: torch.zeros(
+                    num_preds, num_frames, *v.shape[1:], device=v.device, dtype=v.dtype
+                )
+                for k, v in vid_preds_packed.items()
+            },
+            batch_size=[
+                num_preds,
+                num_frames,
+            ],
+        )
+        padded_frames_results["scores"][...] = -1e8  # a very low score for empty object
+        # Track scores and labels of each pred tracklet, only for frames where the model was able to track that object
+        tracklet_scores = []
+        tracklet_labels = []
+        # Optionally, fill the list of RLEs for masklets
+        # note: only frames with actual predicted masks (in packed format) will be
+        # filled with RLEs; the rest will remains None in results["masks_rle"]
+        if self.convert_mask_to_rle_for_video:
+            vid_masklets_rle_padded = [[None] * num_frames for _ in range(num_preds)]
+        for o_idx, oid in enumerate(tracked_objects_packed_idx):
+            oid2packed_idx = tracked_objects_packed_idx[oid]
+            oid2padded_idx = tracked_objects_frame_idx[oid]
+            obj_packed_results = vid_preds_packed[oid2packed_idx]
+            padded_frames_results[o_idx][oid2padded_idx] = obj_packed_results
+            if self.convert_mask_to_rle_for_video:
+                for packed_idx, padded_idx in zip(oid2packed_idx, oid2padded_idx):
+                    vid_masklets_rle_padded[o_idx][padded_idx] = (
+                        vid_masklets_rle_packed[packed_idx]
+                    )
+            # NOTE: We need a single confidence score per tracklet for the mAP metric.
+            # We use the average confidence score across time. (How does this impact AP?)
+            tracklet_scores.append(obj_packed_results["scores"].mean())
+            # We also need to have a unique category Id per tracklet.
+            # This is not a problem for phrase AP, however, for mAP we do majority voting across time.
+            tracklet_labels.append(obj_packed_results["labels"].mode()[0])
+
+        results = padded_frames_results.to_dict()
+        results["scores"] = torch.stack(tracklet_scores, dim=0)
+        results["labels"] = torch.stack(tracklet_labels, dim=0)
+        if self.convert_mask_to_rle_for_video:
+            results["masks_rle"] = vid_masklets_rle_padded
+        # we keep the frame-level scores since it's needed by some evaluation scripts
+        results["per_frame_scores"] = padded_frames_results["scores"]
+
+        return {video_id: results}
+
+
+class PostProcessTracking(PostProcessImage):
+    """This module converts the model's output into the format expected by the coco api"""
+
+    def __init__(
+        self,
+        max_dets_per_img: int,
+        iou_type="bbox",
+        force_single_mask: bool = False,
+        **kwargs,
+    ) -> None:
+        super().__init__(max_dets_per_img=max_dets_per_img, iou_type=iou_type, **kwargs)
+        self.force_single_mask = force_single_mask
+
+    def process_results(
+        self, find_stages, find_metadatas: BatchedInferenceMetadata, **kwargs
+    ):
+        assert len(find_stages) == len(find_metadatas)
+        results = {}
+        for outputs, meta in zip(find_stages, find_metadatas):
+            if self.force_single_mask:
+                scores, labels = outputs["pred_logits"].max(-1)
+                m = []
+                for i in range(len(outputs["pred_masks"])):
+                    score, idx = scores[i].max(0)
+                    m.append(outputs["pred_masks"][i][idx])
+                outputs["pred_masks"] = torch.stack(m, 0).unsqueeze(1)
+            detection_results = self(outputs, meta.original_size, consistent=False)
+            assert len(detection_results) == len(meta.coco_image_id)
+            results.update(
+                {
+                    (media_id.item(), object_id.item(), frame_index.item()): result
+                    for media_id, object_id, frame_index, result in zip(
+                        meta.original_image_id,
+                        meta.object_id,
+                        meta.frame_index,
+                        detection_results,
+                    )
+                }
+            )
+        return results
+
+
+class PostProcessCounting(nn.Module):
+    """This module converts the model's output to be evaluated for counting tasks"""
+
+    def __init__(
+        self,
+        use_original_ids: bool = False,
+        threshold: float = 0.5,
+        use_presence: bool = False,
+    ) -> None:
+        """
+        Args:
+            use_original_ids: whether to use the original image ids or the coco ids
+            threshold: threshold for counting (values above this are counted)
+        """
+        super().__init__()
+        self.use_original_ids = use_original_ids
+        self.threshold = threshold
+        self.use_presence = use_presence
+
+    def forward(self, outputs, target_sizes):
+        """Perform the computation
+        Parameters:
+            outputs: raw outputs of the model
+            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
+        """
+        # Extract scores from model outputs and apply sigmoid
+        scores = torch.sigmoid(outputs["pred_logits"]).squeeze(-1)  # [B, N]
+        if self.use_presence:
+            presence_score = outputs["presence_logit_dec"].sigmoid()
+            if presence_score.ndim == 1:
+                presence_score = presence_score.unsqueeze(1)  # [B, 1]
+            scores = scores * presence_score  # [B, N]
+
+        # Calculate counts by summing values above threshold
+        counts = (scores > self.threshold).float().sum(dim=1)
+
+        assert len(counts) == len(target_sizes)
+        results = []
+        for count in counts:
+            results.append({"count": count.item()})
+
+        return results
+
+    @torch.no_grad()
+    def process_results(
+        self, find_stages, find_metadatas: List[BatchedInferenceMetadata], **kwargs
+    ):
+        assert len(find_stages) == len(find_metadatas)
+        results = {}
+        for outputs, meta in zip(find_stages, find_metadatas):
+            detection_results = self(
+                outputs,
+                meta.original_size,
+            )
+            ids = (
+                meta.original_image_id if self.use_original_ids else meta.coco_image_id
+            )
+            assert len(detection_results) == len(ids)
+            for img_id, result in zip(ids, detection_results):
+                results[img_id.item()] = result
+
+        return results
diff --git a/sam3/eval/saco_veval_eval.py b/sam3/eval/saco_veval_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f0ed2b61503455e879d3b2fc095adced3131f8f
--- /dev/null
+++ b/sam3/eval/saco_veval_eval.py
@@ -0,0 +1,155 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+import argparse
+import json
+import os
+from collections import defaultdict
+
+from iopath.common.file_io import g_pathmgr
+from sam3.eval.saco_veval_evaluators import (
+    VideoCGF1Evaluator,
+    VideoPhraseApEvaluator,
+    VideoPhraseHotaEvaluator,
+    VideoTetaEvaluator,
+    YTVISPredFileEvaluator,
+)
+
+
+class VEvalEvaluator:
+    def __init__(self, gt_annot_file: str, eval_res_file: str):
+        self.gt_annot_file = gt_annot_file
+        self.eval_res_file = eval_res_file
+        self.evaluators = [
+            # mAP
+            YTVISPredFileEvaluator(gt_annot_file),
+            # Phrase AP
+            VideoPhraseApEvaluator(gt_annot_file),
+            # TETA
+            VideoTetaEvaluator(gt_annot_file, use_mask=True, is_exhaustive=True),
+            # HOTA
+            VideoPhraseHotaEvaluator(gt_annot_file),
+            # cgF1
+            VideoCGF1Evaluator(gt_annot_file),
+        ]
+
+    def run_eval(self, pred_file: str):
+        dataset_results = {}
+        video_np_results = defaultdict(dict)
+        for evaluator in self.evaluators:
+            d_res, v_np_res = evaluator.evaluate(pred_file)
+            dataset_results.update(d_res)
+            for (video_id, category_id), res in v_np_res.items():
+                video_np_results[(video_id, category_id)].update(res)
+
+        if len(dataset_results) == 0:
+            dataset_results = {"": 0.0}
+
+        formatted_video_np_results = [
+            {"video_id": video_id, "category_id": category_id, **res}
+            for (video_id, category_id), res in video_np_results.items()
+        ]
+        eval_metrics = {
+            "dataset_results": dataset_results,
+            "video_np_results": formatted_video_np_results,
+        }
+
+        with g_pathmgr.open(self.eval_res_file, "w") as f:
+            json.dump(eval_metrics, f)
+
+        return eval_metrics
+
+
+def run_main_all(dataset_name, args):
+    gt_annot_file = os.path.join(args.gt_annot_dir, dataset_name + ".json")
+    pred_file = os.path.join(args.pred_dir, dataset_name + "_preds.json")
+    eval_res_file = os.path.join(args.eval_res_dir, dataset_name + "_eval_res.json")
+    print(f"=== Running evaluation for Pred {pred_file} vs GT {gt_annot_file} ===")
+    veval_evaluator = VEvalEvaluator(
+        gt_annot_file=gt_annot_file, eval_res_file=eval_res_file
+    )
+    _ = veval_evaluator.run_eval(pred_file=pred_file)
+
+    print(f"=== Results saved to {eval_res_file} ===")
+
+
+def main_all(args):
+    saco_veval_dataset_names = [
+        "saco_veval_sav_test",
+        "saco_veval_sav_val",
+        "saco_veval_yt1b_test",
+        "saco_veval_yt1b_val",
+        "saco_veval_smartglasses_test",
+        "saco_veval_smartglasses_val",
+    ]
+
+    # multiprocessing may not really work as inner evaluator also using multiprocessing
+    # so we just for loop
+    for dataset_name in saco_veval_dataset_names:
+        print(f"=== Running evaluation for dataset {dataset_name} ===")
+        run_main_all(dataset_name=dataset_name, args=args)
+
+
+def main_one(args):
+    gt_annot_file = args.gt_annot_file
+    pred_file = args.pred_file
+    eval_res_file = args.eval_res_file
+
+    print(f"=== Running evaluation for Pred {pred_file} vs GT {gt_annot_file} ===")
+    veval_evaluator = VEvalEvaluator(
+        gt_annot_file=gt_annot_file, eval_res_file=eval_res_file
+    )
+    _ = veval_evaluator.run_eval(pred_file=pred_file)
+
+    print(f"=== Results saved to {eval_res_file} ===")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run video grounding evaluators")
+
+    # Create subparsers for different commands
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    # Run evaluation for all datasets
+    all_parser = subparsers.add_parser("all", help="Run evaluation for all datasets")
+    all_parser.add_argument(
+        "--gt_annot_dir",
+        type=str,
+        help="Directory that contains the ground truth annotation files",
+    )
+    all_parser.add_argument(
+        "--pred_dir",
+        type=str,
+        help="Directory that contains the prediction files",
+    )
+    all_parser.add_argument(
+        "--eval_res_dir",
+        type=str,
+        help="Directory that contains the eval results files",
+    )
+    all_parser.set_defaults(func=main_all)
+
+    # Run evaluation for one dataset
+    one_parser = subparsers.add_parser("one", help="Run evaluation for one dataset")
+    one_parser.add_argument(
+        "--gt_annot_file",
+        type=str,
+        help="Path to the ground truth annotation file",
+    )
+    one_parser.add_argument(
+        "--pred_file",
+        type=str,
+        help="Path to the prediction file",
+    )
+    one_parser.add_argument(
+        "--eval_res_file",
+        type=str,
+        help="Path to the eval results file",
+    )
+    one_parser.set_defaults(func=main_one)
+
+    # Parse and dispatch
+    args = parser.parse_args()
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/sam3/eval/saco_veval_evaluators.py b/sam3/eval/saco_veval_evaluators.py
new file mode 100644
index 0000000000000000000000000000000000000000..4947472c9238cd791f6d06e710b6d064bf11822f
--- /dev/null
+++ b/sam3/eval/saco_veval_evaluators.py
@@ -0,0 +1,838 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+import json
+import os
+import tempfile
+from collections import defaultdict
+from typing import Dict, Optional, Sequence, Tuple
+
+import numpy as np
+import pycocotools.mask
+from sam3.eval.cgf1_eval import CGF1_METRICS
+from sam3.eval.conversion_util import (
+    convert_ytbvis_to_cocovid_gt,
+    convert_ytbvis_to_cocovid_pred,
+)
+from sam3.eval.hota_eval_toolkit.run_ytvis_eval import run_ytvis_eval
+from sam3.eval.teta_eval_toolkit import config, Evaluator, metrics
+from sam3.eval.teta_eval_toolkit.datasets import COCO, TAO
+from sam3.eval.ytvis_coco_wrapper import YTVIS
+from sam3.eval.ytvis_eval import VideoDemoF1Eval, YTVISeval
+from sam3.train.nms_helper import process_frame_level_nms, process_track_level_nms
+
+
+def _get_metric_index(metric_name: str, iou_threshold: Optional[float] = None) -> int:
+    """
+    Find the index of a metric in CGF1_METRICS by name and IoU threshold.
+
+    Args:
+        metric_name: Name of the metric (e.g., "cgF1", "precision", "recall")
+        iou_threshold: IoU threshold (None for average over 0.5:0.95, or specific value like 0.5, 0.75)
+
+    Returns:
+        Index of the metric in CGF1_METRICS
+
+    Raises:
+        ValueError: If metric not found
+    """
+    for idx, metric in enumerate(CGF1_METRICS):
+        if metric.name == metric_name and metric.iou_threshold == iou_threshold:
+            return idx
+    raise ValueError(
+        f"Metric '{metric_name}' with IoU threshold {iou_threshold} not found in CGF1_METRICS"
+    )
+
+
+class BasePredFileEvaluator:
+    """A base class for evaluating a prediction file."""
+
+    pass
+
+
+class YTVISPredFileEvaluator(BasePredFileEvaluator):
+    """Evaluate class mAP for YT-VIS prediction files."""
+
+    def __init__(
+        self,
+        gt_ann_file: str,
+        dataset_name: str = "video",
+        iou_types: Optional[Sequence[str]] = None,
+    ):
+        self.gt_ann_file = gt_ann_file
+        self.dataset_name = dataset_name
+        self.iou_types = list(iou_types) if iou_types is not None else ["bbox", "segm"]
+        assert all(iou_type in ["bbox", "segm"] for iou_type in self.iou_types)
+
+    def evaluate(self, pred_file: str) -> Dict[str, float]:
+        # use our internal video evaluation toolkit for YT-VIS pred file
+        # (i.e. the same one we're using for video phrase AP)
+        results = {}
+        use_cats = True  # YT-VIS mAP evaluation uses categories
+        ytvisGT = YTVIS(self.gt_ann_file, ignore_gt_cats=not use_cats)
+        # the original YT-VIS GT annotations have uncompressed RLEs ("counts" is an integer list)
+        # rather than compressed RLEs ("counts" is a string), so we first convert them here.
+        if "segm" in self.iou_types:
+            for ann in ytvisGT.dataset["annotations"]:
+                ann["segmentations"] = [
+                    _compress_rle(rle) for rle in ann["segmentations"]
+                ]
+
+        with open(pred_file) as f:
+            dt = json.load(f)
+        # Our prediction file saves "video_id" and absolute (unnormalized) boxes.
+        # Note that we should use the official (original) YT-VIS annotations (i.e. the one
+        # saved via "scripts/datasets/training/ytvis_split.py", instead of the one saved
+        # via "scripts/api_db_to_ytvis_json.py") in this evaluator, which contain absolute
+        # boxes coordinates in its GT annotations.
+        for d in dt:
+            d["image_id"] = d["video_id"]
+        ytvisDT = ytvisGT.loadRes(dt)
+
+        for iou_type in self.iou_types:
+            ytvisEval = YTVISeval(ytvisGT, ytvisDT, iou_type)
+
+            # set the area ranges for small, medium, and large objects (using
+            # absolute pixel areas) as in the official YT-VIS evaluation toolkit:
+            # https://github.com/achalddave/ytvosapi/blob/eca601117c9f86bad084cb91f1d918e9ab665a75/PythonAPI/ytvostools/ytvoseval.py#L538
+            ytvisEval.params.areaRng = [
+                [0**2, 1e5**2],
+                [0**2, 128**2],
+                [128**2, 256**2],
+                [256**2, 1e5**2],
+            ]
+            ytvisEval.params.areaRngLbl = ["all", "small", "medium", "large"]
+            ytvisEval.params.useCats = use_cats
+
+            ytvisEval.evaluate()
+            ytvisEval.accumulate()
+            ytvisEval.summarize()
+            result_key = f"{self.dataset_name}_{'mask' if iou_type == 'segm' else 'bbox'}_mAP_50_95"
+            results[result_key] = ytvisEval.stats[0]
+
+        # video-NP level results not supported for `YTVISPredFileEvaluator` yet
+        video_np_level_results = {}
+        return results, video_np_level_results
+
+
+class VideoPhraseApEvaluator(BasePredFileEvaluator):
+    """Evaluate Video Phrase AP with YT-VIS format prediction and GT files."""
+
+    def __init__(
+        self,
+        gt_ann_file: str,
+        dataset_name: str = "video",
+        iou_types: Optional[Sequence[str]] = None,
+    ):
+        self.gt_ann_file = gt_ann_file
+        self.dataset_name = dataset_name
+        self.iou_types = list(iou_types) if iou_types is not None else ["bbox", "segm"]
+        assert all(iou_type in ["bbox", "segm"] for iou_type in self.iou_types)
+
+    def evaluate(self, pred_file: str) -> Dict[str, float]:
+        with open(self.gt_ann_file) as f:
+            gt = json.load(f)
+        with open(pred_file) as f:
+            dt = json.load(f)
+        # For phrase AP and demo F1 evaluation, we need to remap each pair of (video_id, category_id) to
+        # a new unique video_id, so that we don't mix detections from different categories under `useCat=False`
+        gt, dt = remap_video_category_pairs_to_unique_video_ids(gt, dt)
+        if "segm" in self.iou_types:
+            for ann in gt["annotations"]:
+                ann["segmentations"] = [
+                    _compress_rle(rle) for rle in ann["segmentations"]
+                ]
+        for d in dt:
+            d["image_id"] = d["video_id"]
+
+        results = {}
+        use_cats = False  # Phrase AP evaluation does not use categories
+        ytvisGT = YTVIS(annotation_file=None, ignore_gt_cats=not use_cats)
+        ytvisGT.dataset = gt
+        ytvisGT.createIndex()
+        ytvisDT = ytvisGT.loadRes(dt)
+
+        for iou_type in self.iou_types:
+            phraseApEval = YTVISeval(ytvisGT, ytvisDT, iou_type)
+
+            # set the area ranges for small, medium, and large objects (using
+            # absolute pixel areas) as in the official YT-VIS evaluation toolkit:
+            # https://github.com/achalddave/ytvosapi/blob/eca601117c9f86bad084cb91f1d918e9ab665a75/PythonAPI/ytvostools/ytvoseval.py#L538
+            phraseApEval.params.areaRng = [
+                [0**2, 1e5**2],
+                [0**2, 128**2],
+                [128**2, 256**2],
+                [256**2, 1e5**2],
+            ]
+            phraseApEval.params.areaRngLbl = ["all", "small", "medium", "large"]
+            phraseApEval.params.useCats = use_cats
+
+            phraseApEval.evaluate()
+            phraseApEval.accumulate()
+            phraseApEval.summarize()
+            result_prefix = f"{self.dataset_name}"
+            result_prefix += f"_{'mask' if iou_type == 'segm' else 'bbox'}_phrase_ap"
+            # fetch Phrase AP results from the corresponding indices in `phraseApEval.stats`
+            # (see `_summarizeDets` in https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocotools/cocoeval.py)
+            results[result_prefix + "_50_95"] = phraseApEval.stats[0]  # IoU=0.5:0.95
+            results[result_prefix + "_50"] = phraseApEval.stats[1]  # IoU=0.5
+            results[result_prefix + "_75"] = phraseApEval.stats[2]  # IoU=0.75
+
+        # video-NP level results not supported for `VideoPhraseApEvaluator` yet
+        video_np_level_results = {}
+        return results, video_np_level_results
+
+
+class VideoCGF1Evaluator(BasePredFileEvaluator):
+    """Evaluate Video Demo F1 with YT-VIS format prediction and GT files."""
+
+    def __init__(
+        self,
+        gt_ann_file: str,
+        dataset_name: str = "video",
+        prob_thresh: float = 0.5,
+        iou_types: Optional[Sequence[str]] = None,
+    ):
+        self.gt_ann_file = gt_ann_file
+        self.dataset_name = dataset_name
+        self.prob_thresh = prob_thresh
+        self.iou_types = list(iou_types) if iou_types is not None else ["bbox", "segm"]
+        assert all(iou_type in ["bbox", "segm"] for iou_type in self.iou_types)
+
+    def evaluate(self, pred_file: str) -> Dict[str, float]:
+        with open(self.gt_ann_file) as f:
+            gt = json.load(f)
+        with open(pred_file) as f:
+            dt = json.load(f)
+        # compute IL_MCC and CG-F1 can only be computed if we have "video_np_pairs" keys in the GT JSON
+        compute_ilmcc_and_cgf1 = "video_np_pairs" in gt
+        if not compute_ilmcc_and_cgf1:
+            print(
+                f"Warning: IL_MCC and CG-F1 are not computed for {pred_file=} as it does not have 'video_np_pairs' keys in the GT JSON"
+            )
+        # For phrase AP and demo F1 evaluation, we need to remap each pair of (video_id, category_id) to
+        # a new unique video_id, so that we don't mix detections from different categories under `useCat=False`
+        gt, dt = remap_video_category_pairs_to_unique_video_ids(
+            gt, dt, add_negative_np_pairs=compute_ilmcc_and_cgf1
+        )
+        if "segm" in self.iou_types:
+            for ann in gt["annotations"]:
+                ann["segmentations"] = [
+                    _compress_rle(rle) for rle in ann["segmentations"]
+                ]
+        for d in dt:
+            d["image_id"] = d["video_id"]
+
+        results = {}
+        use_cats = False  # Demo F1 evaluation does not use categories
+        ytvisGT = YTVIS(annotation_file=None, ignore_gt_cats=not use_cats)
+        ytvisGT.dataset = gt
+        ytvisGT.createIndex()
+        ytvisDT = ytvisGT.loadRes(dt)
+
+        video_np_level_results = {}
+        for iou_type in self.iou_types:
+            demoF1Eval = VideoDemoF1Eval(ytvisGT, ytvisDT, iou_type, self.prob_thresh)
+
+            demoF1Eval.params.useCats = use_cats
+            demoF1Eval.params.areaRng = [[0**2, 1e5**2]]
+            demoF1Eval.params.areaRngLbl = ["all"]
+            demoF1Eval.params.maxDets = [100000]
+
+            demoF1Eval.evaluate()
+            demoF1Eval.accumulate()
+            demoF1Eval.summarize()
+            result_prefix = f"{self.dataset_name}"
+            result_prefix += f"_{'mask' if iou_type == 'segm' else 'bbox'}_demo"
+
+            stats = demoF1Eval.stats
+
+            if compute_ilmcc_and_cgf1:
+                # Average IoU threshold (0.5:0.95)
+                cgf1_micro_avg_idx = _get_metric_index("cgF1", None)
+                positive_micro_f1_avg_idx = _get_metric_index("positive_micro_F1", None)
+                ilmcc_avg_idx = _get_metric_index("IL_MCC", None)
+                results[result_prefix + "_cgf1_micro_50_95"] = stats[cgf1_micro_avg_idx]
+                results[result_prefix + "_ilmcc_50_95"] = stats[ilmcc_avg_idx]
+                results[result_prefix + "_positive_micro_f1_50_95"] = stats[
+                    positive_micro_f1_avg_idx
+                ]
+
+                # IoU = 0.5
+                cgf1_micro_50_idx = _get_metric_index("cgF1", 0.5)
+                positive_micro_f1_50_idx = _get_metric_index("positive_micro_F1", 0.5)
+                results[result_prefix + "_cgf1_micro_50"] = stats[cgf1_micro_50_idx]
+                results[result_prefix + "_ilmcc_50"] = float(
+                    np.array(stats[cgf1_micro_50_idx])
+                    / np.array(stats[positive_micro_f1_50_idx])
+                )
+                results[result_prefix + "_positive_micro_f1_50"] = stats[
+                    positive_micro_f1_50_idx
+                ]
+
+                # IoU = 0.75
+                cgf1_micro_75_idx = _get_metric_index("cgF1", 0.75)
+                positive_micro_f1_75_idx = _get_metric_index("positive_micro_F1", 0.75)
+                results[result_prefix + "_cgf1_micro_75"] = stats[cgf1_micro_75_idx]
+                results[result_prefix + "_ilmcc_75"] = float(
+                    np.array(stats[cgf1_micro_75_idx])
+                    / np.array(stats[positive_micro_f1_75_idx])
+                )
+                results[result_prefix + "_positive_micro_f1_75"] = stats[
+                    positive_micro_f1_75_idx
+                ]
+
+            self.extract_video_np_level_results(demoF1Eval, video_np_level_results)
+
+        return results, video_np_level_results
+
+    def extract_video_np_level_results(self, demoF1Eval, video_np_level_results):
+        """Aggregate statistics for video-level metrics."""
+        num_iou_thrs = len(demoF1Eval.params.iouThrs)
+        iou_50_index = int(np.where(demoF1Eval.params.iouThrs == 0.5)[0])
+        iou_75_index = int(np.where(demoF1Eval.params.iouThrs == 0.75)[0])
+
+        result_prefix = "mask" if demoF1Eval.params.iouType == "segm" else "bbox"
+
+        assert len(demoF1Eval.evalImgs) == len(demoF1Eval.cocoGt.dataset["images"])
+        for i, video in enumerate(demoF1Eval.cocoGt.dataset["images"]):
+            # the original video id and category id before remapping
+            video_id = video["orig_video_id"]
+            category_id = video["orig_category_id"]
+            eval_img_dict = demoF1Eval.evalImgs[i]
+
+            TPs = eval_img_dict.get("TPs", np.zeros(num_iou_thrs, dtype=np.int64))
+            FPs = eval_img_dict.get("FPs", np.zeros(num_iou_thrs, dtype=np.int64))
+            FNs = eval_img_dict.get("FNs", np.zeros(num_iou_thrs, dtype=np.int64))
+            assert len(TPs) == len(FPs) == len(FNs) == num_iou_thrs
+            # F1 = 2*TP / (2*TP + FP + FN), and we set F1 to 1.0 if denominator is 0
+            denominator = 2 * TPs + FPs + FNs
+            F1s = np.where(denominator > 0, 2 * TPs / np.maximum(denominator, 1), 1.0)
+            local_results = {
+                f"{result_prefix}_TP_50_95": float(TPs.mean()),
+                f"{result_prefix}_FP_50_95": float(FPs.mean()),
+                f"{result_prefix}_FN_50_95": float(FNs.mean()),
+                f"{result_prefix}_F1_50_95": float(F1s.mean()),
+                f"{result_prefix}_TP_50": float(TPs[iou_50_index]),
+                f"{result_prefix}_FP_50": float(FPs[iou_50_index]),
+                f"{result_prefix}_FN_50": float(FNs[iou_50_index]),
+                f"{result_prefix}_F1_50": float(F1s[iou_50_index]),
+                f"{result_prefix}_TP_75": float(TPs[iou_75_index]),
+                f"{result_prefix}_FP_75": float(FPs[iou_75_index]),
+                f"{result_prefix}_FN_75": float(FNs[iou_75_index]),
+                f"{result_prefix}_F1_75": float(F1s[iou_75_index]),
+            }
+            if (video_id, category_id) not in video_np_level_results:
+                video_np_level_results[(video_id, category_id)] = {}
+            video_np_level_results[(video_id, category_id)].update(local_results)
+
+
+class VideoTetaEvaluator(BasePredFileEvaluator):
+    """Evaluate TETA metric using YouTubeVIS format prediction and GT files."""
+
+    def __init__(
+        self,
+        gt_ann_file: str,
+        dataset_name: str = "video",
+        tracker_name: str = "Sam3",
+        nms_threshold: float = 0.5,
+        nms_strategy: str = "none",  # "track", "frame", or "none"
+        prob_thresh: float = 0.5,
+        is_exhaustive: bool = False,
+        use_mask: bool = False,
+        num_parallel_cores: int = 8,
+    ):
+        self.gt_ann_file = gt_ann_file
+        self.dataset_name = dataset_name
+        self.tracker_name = tracker_name
+        self.nms_threshold = nms_threshold
+        self.nms_strategy = nms_strategy.lower()  # Convert to lowercase for consistency
+        self.prob_thresh = prob_thresh
+        self.metric_prefix = "TETA"
+        self.is_exhaustive = is_exhaustive
+        self.use_mask = use_mask
+        self.num_parallel_cores = num_parallel_cores
+
+        # Verify NMS strategy is valid
+        valid_strategies = ["track", "frame", "none"]
+        print("current nms_strategy:", self.nms_strategy)
+        if self.nms_strategy not in valid_strategies:
+            raise ValueError(
+                f"Invalid NMS strategy: {self.nms_strategy}. Must be one of {valid_strategies}"
+            )
+
+        print(f"Initialized VideoTetaEvaluator with NMS strategy: {self.nms_strategy}")
+        print(f"Probability threshold set to: {self.prob_thresh}")
+        print(f"Dataset exhaustivity set to: {self.is_exhaustive}")
+        print(f"Tracker name set to: {self.tracker_name}")
+        print(f"Dataset name set to: {self.dataset_name}")
+        print(f"Use mask set to: {self.use_mask}")
+
+    def process_predictions(self, pred_file: str, tmp_dir: str) -> str:
+        """Process predictions with selected NMS strategy"""
+        with open(pred_file, "r") as f:
+            raw_preds = json.load(f)
+        print(f"Processing predictions with {self.nms_strategy} NMS strategy")
+
+        # Filter by score threshold
+        if self.prob_thresh > 0:
+            raw_preds = [d for d in raw_preds if d["score"] >= self.prob_thresh]
+            print(
+                f"Filtered to {len(raw_preds)} predictions with score >= {self.prob_thresh}"
+            )
+        # Group predictions by video_id
+        video_groups = defaultdict(list)
+        for pred in raw_preds:
+            video_groups[pred["video_id"]].append(pred)
+        # Process based on NMS strategy
+        if self.nms_strategy == "track":
+            process_track_level_nms(video_groups, nms_threshold=self.nms_threshold)
+        elif self.nms_strategy == "frame":
+            process_frame_level_nms(video_groups, nms_threshold=self.nms_threshold)
+        elif self.nms_strategy == "none":
+            print("Skipping NMS processing as strategy is set to 'none'")
+            # No processing needed for "none" strategy
+        # Save processed predictions
+        processed_preds = [
+            track for tracks in video_groups.values() for track in tracks
+        ]
+        processed_path = os.path.join(tmp_dir, "processed_preds.json")
+        with open(processed_path, "w") as f:
+            json.dump(processed_preds, f)
+
+        print(f"Saved processed predictions to {processed_path}")
+        return processed_path
+
+    def evaluate(self, pred_file: str) -> Tuple[Dict[str, float], Dict]:
+        """Main evaluation method"""
+
+        print(f"Evaluating TETA Metric with {self.nms_strategy.upper()} NMS strategy")
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            # Process predictions first
+            processed_pred_file = self.process_predictions(pred_file, tmp_dir)
+
+            # Convert GT to COCO-vid format
+            gt_dir = os.path.join(tmp_dir, "gt")
+            os.makedirs(gt_dir, exist_ok=True)
+            gt_coco_path = os.path.join(gt_dir, "annotations.json")
+            convert_ytbvis_to_cocovid_gt(self.gt_ann_file, gt_coco_path)
+
+            # Convert processed predictions to COCO-vid format
+            pred_dir = os.path.join(tmp_dir, "predictions")
+            tracker_dir = os.path.join(pred_dir, self.tracker_name)
+            os.makedirs(tracker_dir, exist_ok=True)
+            pred_coco_path = os.path.join(tracker_dir, "track_results_cocofmt.json")
+            convert_ytbvis_to_cocovid_pred(
+                youtubevis_pred_path=processed_pred_file,
+                converted_dataset_path=gt_coco_path,
+                output_path=pred_coco_path,
+            )
+            # Configure TETA evaluator
+            default_eval_config = config.get_default_eval_config()
+            default_eval_config["PRINT_ONLY_COMBINED"] = True
+            default_eval_config["DISPLAY_LESS_PROGRESS"] = True
+            default_eval_config["OUTPUT_TEMP_RAW_DATA"] = True
+            default_eval_config["NUM_PARALLEL_CORES"] = self.num_parallel_cores
+            default_dataset_config = config.get_default_dataset_config()
+            default_dataset_config["TRACKERS_TO_EVAL"] = [self.tracker_name]
+            default_dataset_config["GT_FOLDER"] = gt_dir
+            default_dataset_config["OUTPUT_FOLDER"] = pred_dir
+            default_dataset_config["TRACKER_SUB_FOLDER"] = tracker_dir
+            default_dataset_config["USE_MASK"] = self.use_mask
+
+            evaluator = Evaluator(default_eval_config)
+            if self.is_exhaustive:
+                dataset_list = [COCO(default_dataset_config)]
+                dataset_parsing_key = "COCO"
+            else:
+                dataset_list = [TAO(default_dataset_config)]
+                dataset_parsing_key = "TAO"
+
+            # Run evaluation
+            eval_results, _ = evaluator.evaluate(
+                dataset_list, [metrics.TETA(exhaustive=self.is_exhaustive)]
+            )
+
+            # Extract and format results
+            results = {
+                f"{self.dataset_name}_{'mask' if self.use_mask else 'bbox'}_teta": float(
+                    eval_results[dataset_parsing_key]["TETA"][0]
+                ),
+                f"{self.dataset_name}_{'mask' if self.use_mask else 'bbox'}_loc_a": float(
+                    eval_results[dataset_parsing_key]["TETA"][1]
+                ),
+                f"{self.dataset_name}_{'mask' if self.use_mask else 'bbox'}_assoc_a": float(
+                    eval_results[dataset_parsing_key]["TETA"][2]
+                ),
+                f"{self.dataset_name}_{'mask' if self.use_mask else 'bbox'}_cls_a": float(
+                    eval_results[dataset_parsing_key]["TETA"][3]
+                ),
+                f"{self.dataset_name}_{'mask' if self.use_mask else 'bbox'}_loc_re": float(
+                    eval_results[dataset_parsing_key]["TETA"][4]
+                ),
+                f"{self.dataset_name}_{'mask' if self.use_mask else 'bbox'}_loc_pr": float(
+                    eval_results[dataset_parsing_key]["TETA"][5]
+                ),
+                f"{self.dataset_name}_{'mask' if self.use_mask else 'bbox'}_assoc_re": float(
+                    eval_results[dataset_parsing_key]["TETA"][6]
+                ),
+                f"{self.dataset_name}_{'mask' if self.use_mask else 'bbox'}_assoc_pr": float(
+                    eval_results[dataset_parsing_key]["TETA"][7]
+                ),
+                f"{self.dataset_name}_{'mask' if self.use_mask else 'bbox'}_cls_re": float(
+                    eval_results[dataset_parsing_key]["TETA"][8]
+                ),
+                f"{self.dataset_name}_{'mask' if self.use_mask else 'bbox'}_cls_pr": float(
+                    eval_results[dataset_parsing_key]["TETA"][9]
+                ),
+            }
+
+        # video-NP level results not supported for `VideoTetaEvaluator` yet
+        video_np_level_results = {}
+        return results, video_np_level_results
+
+
+class VideoPhraseHotaEvaluator(BasePredFileEvaluator):
+    """Evaluate Video Phrase HOTA with YT-VIS format prediction and GT files."""
+
+    def __init__(
+        self,
+        gt_ann_file: str,
+        dataset_name: str = "video",
+        prob_thresh: float = 0.5,
+        iou_types: Optional[Sequence[str]] = None,
+        compute_video_mot_hota: bool = False,
+    ):
+        self.gt_ann_file = gt_ann_file
+        self.dataset_name = dataset_name
+        self.prob_thresh = prob_thresh
+        self.metric_prefix = "phrase"
+        # the list of metrics to collect from the HOTA evaluation results
+        self.metric_to_collect = [
+            "HOTA",
+            "DetA",
+            "AssA",
+            "DetRe",
+            "DetPr",
+            "AssRe",
+            "AssPr",
+            "LocA",
+            "OWTA",
+        ]
+        self.iou_types = list(iou_types) if iou_types is not None else ["bbox", "segm"]
+        assert all(iou_type in ["bbox", "segm"] for iou_type in self.iou_types)
+
+        # If True, compute video MOT HOTA, aggregating predictions/GT from all categories.
+        self.compute_video_mot_hota = compute_video_mot_hota
+
+    def evaluate(self, pred_file: str) -> Dict[str, float]:
+        # use the YT-VIS evaluation toolkit in TrackEval
+
+        with open(self.gt_ann_file) as f:
+            gt = json.load(f)
+        with open(pred_file) as f:
+            dt = json.load(f)
+        # keep only predictions with score above the probability threshold
+        dt = [d for d in dt if d["score"] > self.prob_thresh]
+        for d in dt:
+            assert len(d["areas"]) == len(d["bboxes"])
+            assert len(d["areas"]) == len(d["segmentations"])
+            # remove empty boxes (otherwise they will count as false positives for during
+            # per-frame detection accuracy in HOTA evaluation)
+            for t in range(len(d["bboxes"])):
+                bbox = d["bboxes"][t]
+                if d["areas"][t] == 0 or bbox is None or all(x == 0 for x in bbox):
+                    d["segmentations"][t] = None
+                    d["bboxes"][t] = None
+                    d["areas"][t] = None
+            # check that box occurence and mask occurence are consistent
+            for bbox, mask, area in zip(d["bboxes"], d["segmentations"], d["areas"]):
+                assert (area is None) == (bbox is None)
+                assert (area is None) == (mask is None)
+            # set all scores to 1.0 for HOTA evaluation (just like Demo F1, the exact score
+            # value is not used in HOTA metrics; it will be treated as a detection prediction
+            # as long as its score is above the threshold)
+            d["score"] = 1.0
+
+        # remap the GT and DT annotations for phrase HOTA evaluation
+        gt = _fill_in_ann_height_width(gt)
+        if not self.compute_video_mot_hota:
+            # remap the GT and DT annotations for phrase HOTA evaluation
+            gt, dt = self._remap_gt_dt(gt, dt)
+        else:
+            # Compute video-level MOT HOTA
+            # Apply track-level NMS
+            video_groups = defaultdict(list)
+            for pred in dt:
+                video_groups[pred["video_id"]].append(pred)
+            process_track_level_nms(video_groups, nms_threshold=0.5)
+            dt = [track for tracks in video_groups.values() for track in tracks]
+
+            # Remap GT track ids for class-agnostic HOTA
+            gt, dt = remap_gt_dt_class_agnostic(gt, dt)
+
+        # run the HOTA evaluation using TrackEval on the remapped (video_id, category_id) pairs
+        out_dict = {}
+        video_np_level_results = {}
+        for iou_type in self.iou_types:
+            output_res, _ = run_ytvis_eval(
+                args=[
+                    "--METRICS",
+                    "HOTA",
+                    "--IOU_TYPE",
+                    iou_type,
+                    "--DATASET_NAME",
+                    self.dataset_name,
+                    "--USE_PARALLEL",
+                    "True",
+                    "--NUM_PARALLEL_CORES",
+                    "8",
+                    "--PLOT_CURVES",
+                    "False",
+                    "--LOG_ON_ERROR",
+                    "None",
+                    "--PRINT_ONLY_COMBINED",
+                    "True",
+                    "--OUTPUT_SUMMARY",
+                    "False",
+                    "--OUTPUT_DETAILED",
+                    "False",
+                    "--TIME_PROGRESS",
+                    "False",
+                    "--PRINT_CONFIG",
+                    "False",
+                ],
+                gt_json=gt,
+                dt_json=dt,
+            )
+            self.extract_video_np_level_results(
+                iou_type=iou_type,
+                remapped_gt=gt,
+                raw_results=output_res[self.dataset_name]["tracker"],
+                video_np_level_results=video_np_level_results,
+            )
+
+            def _summarize_results(output_res, iou_type, field, suffix):
+                eval_res = output_res[self.dataset_name]["tracker"][field]
+                result_prefix = f"{self.dataset_name}_{'mask' if iou_type == 'segm' else 'bbox'}_{suffix}"
+                for metric_name in self.metric_to_collect:
+                    eval_res_hota = eval_res["cls_comb_cls_av"]["HOTA"]
+                    result_key = f"{result_prefix}_{self.metric_prefix}_{metric_name}"
+                    result_value = float(np.mean(eval_res_hota[metric_name]))
+                    out_dict[result_key] = result_value
+
+            _summarize_results(output_res, iou_type, "COMBINED_SEQ", "all")
+            if "COMBINED_SEQ_CHALLENGING" in output_res[self.dataset_name]["tracker"]:
+                _summarize_results(
+                    output_res, iou_type, "COMBINED_SEQ_CHALLENGING", "challenging"
+                )
+
+        # video-NP level results not supported for `VideoPhraseHotaEvaluator` yet
+        return out_dict, video_np_level_results
+
+    def _remap_gt_dt(self, gt, dt):
+        # For phrase HOTA evaluation, we need to remap each pair of (video_id, category_id) to
+        # a new unique video_id, so that we don't mix detections from different categories
+        gt, dt = remap_video_category_pairs_to_unique_video_ids(gt, dt)
+        # We further map all the categories to category_id=1 in HOTA evaluation toolkit
+        # for phrase HOTA (similar to "useCat=False" for video phrase AP)
+        remapped_category_id = 1
+        gt["categories"] = [
+            {
+                "supercategory": "object",
+                "id": remapped_category_id,
+                "name": "_REMAPPED_FOR_PHRASE_METRICS_",
+            }
+        ]
+        for ann in gt["annotations"]:
+            ann["category_id"] = remapped_category_id
+        for d in dt:
+            d["category_id"] = remapped_category_id
+        # To be compatible with the TrackEval YT-VIS evaluation toolkit, we need to give
+        # unique filenames to each remapped video, so we add remapped video_id as prefix.
+        for video in gt["videos"]:
+            new_video_id = video["id"]
+            video["file_names"] = [
+                f"remapped_vid_{new_video_id:012d}/{name}"
+                for name in video["file_names"]
+            ]
+        return gt, dt
+
+    def extract_video_np_level_results(
+        self, iou_type, remapped_gt, raw_results, video_np_level_results
+    ):
+        """Aggregate statistics for video-level metrics."""
+        result_prefix = "mask" if iou_type == "segm" else "bbox"
+        for video in remapped_gt["videos"]:
+            # the original video id and category id before remapping
+            video_id = video["orig_video_id"]
+            category_id = video["orig_category_id"]
+            video_key = f"remapped_vid_{video['id']:012d}"
+            results = raw_results[video_key]["_REMAPPED_FOR_PHRASE_METRICS_"]["HOTA"]
+
+            local_results = {}
+            for metric_name in self.metric_to_collect:
+                result_key = f"{result_prefix}_{metric_name}"
+                local_results[result_key] = float(results[metric_name].mean())
+            if (video_id, category_id) not in video_np_level_results:
+                video_np_level_results[(video_id, category_id)] = {}
+            video_np_level_results[(video_id, category_id)].update(local_results)
+
+
+class VideoClassBasedHotaEvaluator(VideoPhraseHotaEvaluator):
+    def __init__(
+        self,
+        gt_ann_file: str,
+        dataset_name: str = "video",
+        prob_thresh: float = 0.5,
+    ):
+        super().__init__(gt_ann_file, dataset_name, prob_thresh)
+        self.metric_prefix = "class"
+
+    def _remap_gt_dt(self, gt, dt):
+        return gt, dt  # no remapping needed for class-based HOTA evaluation
+
+    def extract_video_np_level_results(self, *args, **kwargs):
+        pass  # no video-NP level results for class-based HOTA evaluation
+
+
+def _compress_rle(rle):
+    """Convert RLEs from uncompressed (integer list) to compressed (string) format."""
+    if rle is None:
+        return None
+    if isinstance(rle["counts"], list):
+        rle = pycocotools.mask.frPyObjects(rle, rle["size"][0], rle["size"][1])
+        rle["counts"] = rle["counts"].decode()
+    return rle
+
+
+def remap_video_category_pairs_to_unique_video_ids(
+    gt_json, dt_json, add_negative_np_pairs=False
+):
+    """
+    Remap each pair of (video_id, category_id) to a new unique video_id. This is useful
+    for phrase AP and demo F1 evaluation on videos, where we have `useCat=False` and
+    rely on separating different NPs (from the same video) into different new video ids,
+    so that we don't mix detections from different categories in computeIoU under `useCat=False`.
+
+    This is consistent with how do we phrase AP and demo F1 evaluation on images, where we
+    use a remapped unique coco_image_id for each image-NP pair (based in its query["id"] in
+    CustomCocoDetectionAPI.load_queries in modulated_detection_api.py)
+    """
+    # collect the unique video_id-category_id pairs
+    video_id_to_video = {v["id"]: v for v in gt_json["videos"]}
+    video_id_category_id_pairs = set()
+    for pred in dt_json:
+        video_id_category_id_pairs.add((pred["video_id"], pred["category_id"]))
+    for ann in gt_json["annotations"]:
+        video_id_category_id_pairs.add((ann["video_id"], ann["category_id"]))
+
+    # assign the video_id-category_id pairs to unique video ids
+    video_id_category_id_pairs = sorted(video_id_category_id_pairs)
+    video_id_category_id_to_new_video_id = {
+        pair: (i + 1) for i, pair in enumerate(video_id_category_id_pairs)
+    }
+    # also map the negative NP pairs -- this is needed for IL_MCC and CG-F1 evaluation
+    if add_negative_np_pairs:
+        for vnp in gt_json["video_np_pairs"]:
+            pair = (vnp["video_id"], vnp["category_id"])
+            if pair not in video_id_category_id_to_new_video_id:
+                video_id_category_id_to_new_video_id[pair] = (
+                    len(video_id_category_id_to_new_video_id) + 1
+                )
+
+    # map the "video_id" in predictions
+    for pred in dt_json:
+        pred["video_id"] = video_id_category_id_to_new_video_id[
+            (pred["video_id"], pred["category_id"])
+        ]
+    # map the "video_id" in gt_json["annotations"]
+    for ann in gt_json["annotations"]:
+        ann["video_id"] = video_id_category_id_to_new_video_id[
+            (ann["video_id"], ann["category_id"])
+        ]
+    # map and duplicate gt_json["videos"]
+    new_videos = []
+    for (
+        video_id,
+        category_id,
+    ), new_video_id in video_id_category_id_to_new_video_id.items():
+        video = video_id_to_video[video_id].copy()
+        video["id"] = new_video_id
+        # preserve the original video_id and category_id of each remapped video entry,
+        # so that we can associate sample-level eval metrics with the original video-NP pairs
+        video["orig_video_id"] = video_id
+        video["orig_category_id"] = category_id
+        new_videos.append(video)
+    gt_json["videos"] = new_videos
+
+    return gt_json, dt_json
+
+
+def remap_gt_dt_class_agnostic(gt, dt):
+    """
+    For class-agnostic HOTA, merge all GT tracks for each video (across NPs),
+    ensure unique track_ids, and set all category_id to 1.
+    Also, add orig_video_id and orig_category_id for compatibility.
+    """
+    # 1. Remap all GT track_ids to be unique per video
+    gt_anns_by_video = defaultdict(list)
+    for ann in gt["annotations"]:
+        gt_anns_by_video[ann["video_id"]].append(ann)
+
+    # Ensure unique track ids across tracks of all videos
+    next_tid = 1
+    for _, anns in gt_anns_by_video.items():
+        # Map old track_ids to new unique ones
+        old_to_new_tid = {}
+        for ann in anns:
+            old_tid = ann["id"]
+            if old_tid not in old_to_new_tid:
+                old_to_new_tid[old_tid] = next_tid
+                next_tid += 1
+            ann["id"] = old_to_new_tid[old_tid]
+            # Set category_id to 1 for class-agnostic
+            ann["category_id"] = 1
+
+    # Set all GT categories to a single category
+    gt["categories"] = [
+        {
+            "supercategory": "object",
+            "id": 1,
+            "name": "_REMAPPED_FOR_PHRASE_METRICS_",
+        }
+    ]
+
+    # Add orig_video_id and orig_category_id to each video for compatibility
+    anns_by_video = defaultdict(list)
+    for ann in gt["annotations"]:
+        anns_by_video[ann["video_id"]].append(ann)
+    for video in gt["videos"]:
+        video["orig_video_id"] = video["id"]
+        # Use the first annotation's original category_id if available, else None
+        orig_cat = (
+            anns_by_video[video["id"]][0]["category_id"]
+            if anns_by_video[video["id"]]
+            else None
+        )
+        video["orig_category_id"] = orig_cat
+        video["file_names"] = [
+            f"remapped_vid_{video['id']:012d}/{name}" for name in video["file_names"]
+        ]
+
+    # Set all DT category_id to 1
+    for d in dt:
+        d["category_id"] = 1
+    return gt, dt
+
+
+def _fill_in_ann_height_width(gt_json):
+    """Fill in missing height/width in GT annotations from its video info."""
+    video_id_to_video = {v["id"]: v for v in gt_json["videos"]}
+    for ann in gt_json["annotations"]:
+        if "height" not in ann or "width" not in ann:
+            video = video_id_to_video[ann["video_id"]]
+            if "height" not in ann:
+                ann["height"] = video["height"]
+            if "width" not in ann:
+                ann["width"] = video["width"]
+
+    return gt_json
diff --git a/sam3/eval/teta_eval_toolkit/__init__.py b/sam3/eval/teta_eval_toolkit/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1609f80c9688602f1db5e9fe09cdd2cc1a65c8ca
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/__init__.py
@@ -0,0 +1,5 @@
+# fmt: off
+# flake8: noqa
+
+from . import config, datasets, metrics, utils
+from .eval import Evaluator
diff --git a/sam3/eval/teta_eval_toolkit/_timing.py b/sam3/eval/teta_eval_toolkit/_timing.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c6fbfab8121700f76edda6459020db990da3e03
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/_timing.py
@@ -0,0 +1,69 @@
+# fmt: off
+# flake8: noqa
+
+import inspect
+from functools import wraps
+from time import perf_counter
+
+DO_TIMING = False
+DISPLAY_LESS_PROGRESS = False
+timer_dict = {}
+counter = 0
+
+
+def time(f):
+    @wraps(f)
+    def wrap(*args, **kw):
+        if DO_TIMING:
+            # Run function with timing
+            ts = perf_counter()
+            result = f(*args, **kw)
+            te = perf_counter()
+            tt = te - ts
+
+            # Get function name
+            arg_names = inspect.getfullargspec(f)[0]
+            if arg_names[0] == "self" and DISPLAY_LESS_PROGRESS:
+                return result
+            elif arg_names[0] == "self":
+                method_name = type(args[0]).__name__ + "." + f.__name__
+            else:
+                method_name = f.__name__
+
+            # Record accumulative time in each function for analysis
+            if method_name in timer_dict.keys():
+                timer_dict[method_name] += tt
+            else:
+                timer_dict[method_name] = tt
+
+            # If code is finished, display timing summary
+            if method_name == "Evaluator.evaluate":
+                print("")
+                print("Timing analysis:")
+                for key, value in timer_dict.items():
+                    print("%-70s %2.4f sec" % (key, value))
+            else:
+                # Get function argument values for printing special arguments of interest
+                arg_titles = ["tracker", "seq", "cls"]
+                arg_vals = []
+                for i, a in enumerate(arg_names):
+                    if a in arg_titles:
+                        arg_vals.append(args[i])
+                arg_text = "(" + ", ".join(arg_vals) + ")"
+
+                # Display methods and functions with different indentation.
+                if arg_names[0] == "self":
+                    print("%-74s %2.4f sec" % (" " * 4 + method_name + arg_text, tt))
+                elif arg_names[0] == "test":
+                    pass
+                else:
+                    global counter
+                    counter += 1
+                    print("%i %-70s %2.4f sec" % (counter, method_name + arg_text, tt))
+
+            return result
+        else:
+            # If config["TIME_PROGRESS"] is false, or config["USE_PARALLEL"] is true, run functions normally without timing.
+            return f(*args, **kw)
+
+    return wrap
diff --git a/sam3/eval/teta_eval_toolkit/config.py b/sam3/eval/teta_eval_toolkit/config.py
new file mode 100644
index 0000000000000000000000000000000000000000..6342fa2074b79477f0dbf421bcac5e560d64a2f0
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/config.py
@@ -0,0 +1,153 @@
+# fmt: off
+# flake8: noqa
+
+"""Config."""
+import argparse
+import os
+
+
+def parse_configs():
+    """Parse command line."""
+    default_eval_config = get_default_eval_config()
+    default_eval_config["DISPLAY_LESS_PROGRESS"] = True
+    default_dataset_config = get_default_dataset_config()
+    default_metrics_config = {"METRICS": ["TETA"]}
+    config = {
+        **default_eval_config,
+        **default_dataset_config,
+        **default_metrics_config,
+    }
+    parser = argparse.ArgumentParser()
+    for setting in config.keys():
+        if type(config[setting]) == list or type(config[setting]) == type(None):
+            parser.add_argument("--" + setting, nargs="+")
+        else:
+            parser.add_argument("--" + setting)
+    args = parser.parse_args().__dict__
+    for setting in args.keys():
+        if args[setting] is not None:
+            if type(config[setting]) == type(True):
+                if args[setting] == "True":
+                    x = True
+                elif args[setting] == "False":
+                    x = False
+                else:
+                    raise Exception(
+                        f"Command line parameter {setting} must be True/False"
+                    )
+            elif type(config[setting]) == type(1):
+                x = int(args[setting])
+            elif type(args[setting]) == type(None):
+                x = None
+            else:
+                x = args[setting]
+            config[setting] = x
+    eval_config = {k: v for k, v in config.items() if k in default_eval_config.keys()}
+    dataset_config = {
+        k: v for k, v in config.items() if k in default_dataset_config.keys()
+    }
+    metrics_config = {
+        k: v for k, v in config.items() if k in default_metrics_config.keys()
+    }
+
+    return eval_config, dataset_config, metrics_config
+
+
+def get_default_eval_config():
+    """Returns the default config values for evaluation."""
+    code_path = get_code_path()
+    default_config = {
+        "USE_PARALLEL": True,
+        "NUM_PARALLEL_CORES": 8,
+        "BREAK_ON_ERROR": True,
+        "RETURN_ON_ERROR": False,
+        "LOG_ON_ERROR": os.path.join(code_path, "error_log.txt"),
+        "PRINT_RESULTS": True,
+        "PRINT_ONLY_COMBINED": True,
+        "PRINT_CONFIG": True,
+        "TIME_PROGRESS": True,
+        "DISPLAY_LESS_PROGRESS": True,
+        "OUTPUT_SUMMARY": True,
+        "OUTPUT_EMPTY_CLASSES": True,
+        "OUTPUT_TEM_RAW_DATA": True,
+        "OUTPUT_PER_SEQ_RES": True,
+    }
+    return default_config
+
+
+def get_default_dataset_config():
+    """Default class config values"""
+    code_path = get_code_path()
+    default_config = {
+        "GT_FOLDER": os.path.join(
+            code_path, "data/gt/tao/tao_training"
+        ),  # Location of GT data
+        "TRACKERS_FOLDER": os.path.join(
+            code_path, "data/trackers/tao/tao_training"
+        ),  # Trackers location
+        "OUTPUT_FOLDER": None,  # Where to save eval results (if None, same as TRACKERS_FOLDER)
+        "TRACKERS_TO_EVAL": ['TETer'],  # Filenames of trackers to eval (if None, all in folder)
+        "CLASSES_TO_EVAL": None,  # Classes to eval (if None, all classes)
+        "SPLIT_TO_EVAL": "training",  # Valid: 'training', 'val'
+        "PRINT_CONFIG": True,  # Whether to print current config
+        "TRACKER_SUB_FOLDER": "data",  # Tracker files are in TRACKER_FOLDER/tracker_name/TRACKER_SUB_FOLDER
+        "OUTPUT_SUB_FOLDER": "",  # Output files are saved in OUTPUT_FOLDER/tracker_name/OUTPUT_SUB_FOLDER
+        "TRACKER_DISPLAY_NAMES": None,  # Names of trackers to display, if None: TRACKERS_TO_EVAL
+        "MAX_DETECTIONS": 0,  # Number of maximal allowed detections per image (0 for unlimited)
+        "USE_MASK": False,  # Whether to use mask data for evaluation
+    }
+    return default_config
+
+
+def init_config(config, default_config, name=None):
+    """Initialize non-given config values with defaults."""
+    if config is None:
+        config = default_config
+    else:
+        for k in default_config.keys():
+            if k not in config.keys():
+                config[k] = default_config[k]
+    if name and config["PRINT_CONFIG"]:
+        print("\n%s Config:" % name)
+        for c in config.keys():
+            print("%-20s : %-30s" % (c, config[c]))
+    return config
+
+
+def update_config(config):
+    """
+    Parse the arguments of a script and updates the config values for a given value if specified in the arguments.
+    :param config: the config to update
+    :return: the updated config
+    """
+    parser = argparse.ArgumentParser()
+    for setting in config.keys():
+        if type(config[setting]) == list or type(config[setting]) == type(None):
+            parser.add_argument("--" + setting, nargs="+")
+        else:
+            parser.add_argument("--" + setting)
+    args = parser.parse_args().__dict__
+    for setting in args.keys():
+        if args[setting] is not None:
+            if type(config[setting]) == type(True):
+                if args[setting] == "True":
+                    x = True
+                elif args[setting] == "False":
+                    x = False
+                else:
+                    raise Exception(
+                        "Command line parameter " + setting + "must be True or False"
+                    )
+            elif type(config[setting]) == type(1):
+                x = int(args[setting])
+            elif type(args[setting]) == type(None):
+                x = None
+            else:
+                x = args[setting]
+            config[setting] = x
+    return config
+
+
+def get_code_path():
+    """Get base path where code is"""
+    return os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
diff --git a/sam3/eval/teta_eval_toolkit/datasets/__init__.py b/sam3/eval/teta_eval_toolkit/datasets/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..97087d7d53edf5128c7365a34ad0fd646cdfa54b
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/datasets/__init__.py
@@ -0,0 +1,5 @@
+# fmt: off
+# flake8: noqa
+"""Datasets."""
+from .coco import COCO
+from .tao import TAO
diff --git a/sam3/eval/teta_eval_toolkit/datasets/_base_dataset.py b/sam3/eval/teta_eval_toolkit/datasets/_base_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc2c30ce501f4bec9573ff2df0358374dacc109c
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/datasets/_base_dataset.py
@@ -0,0 +1,379 @@
+# fmt: off
+# flake8: noqa
+
+import csv
+import io
+import os
+import traceback
+import zipfile
+from abc import ABC, abstractmethod
+from copy import deepcopy
+
+import numpy as np
+
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class _BaseDataset(ABC):
+    @abstractmethod
+    def __init__(self):
+        self.tracker_list = None
+        self.seq_list = None
+        self.class_list = None
+        self.output_fol = None
+        self.output_sub_fol = None
+        self.should_classes_combine = True
+        self.use_super_categories = False
+
+    # Functions to implement:
+
+    @abstractmethod
+    def _load_raw_file(self, tracker, seq, is_gt):
+        ...
+
+    @_timing.time
+    @abstractmethod
+    def get_preprocessed_seq_data(self, raw_data, cls):
+        ...
+
+    @abstractmethod
+    def _calculate_similarities(self, gt_dets_t, tracker_dets_t):
+        ...
+
+    # Helper functions for all datasets:
+
+    @classmethod
+    def get_class_name(cls):
+        return cls.__name__
+
+    def get_name(self):
+        return self.get_class_name()
+
+    def get_output_fol(self, tracker):
+        return os.path.join(self.output_fol, tracker, self.output_sub_fol)
+
+    def get_display_name(self, tracker):
+        """Can be overwritten if the trackers name (in files) is different to how it should be displayed.
+        By default this method just returns the trackers name as is.
+        """
+        return tracker
+
+    def get_eval_info(self):
+        """Return info about the dataset needed for the Evaluator"""
+        return self.tracker_list, self.seq_list, self.class_list
+
+    @_timing.time
+    def get_raw_seq_data(self, tracker, seq):
+        """Loads raw data (tracker and ground-truth) for a single tracker on a single sequence.
+        Raw data includes all of the information needed for both preprocessing and evaluation, for all classes.
+        A later function (get_processed_seq_data) will perform such preprocessing and extract relevant information for
+        the evaluation of each class.
+
+        This returns a dict which contains the fields:
+        [num_timesteps]: integer
+        [gt_ids, tracker_ids, gt_classes, tracker_classes, tracker_confidences]:
+                                                                list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets, tracker_dets, gt_crowd_ignore_regions]: list (for each timestep) of lists of detections.
+        [similarity_scores]: list (for each timestep) of 2D NDArrays.
+        [gt_extras]: dict (for each extra) of lists (for each timestep) of 1D NDArrays (for each det).
+
+        gt_extras contains dataset specific information used for preprocessing such as occlusion and truncation levels.
+
+        Note that similarities are extracted as part of the dataset and not the metric, because almost all metrics are
+        independent of the exact method of calculating the similarity. However datasets are not (e.g. segmentation
+        masks vs 2D boxes vs 3D boxes).
+        We calculate the similarity before preprocessing because often both preprocessing and evaluation require it and
+        we don't wish to calculate this twice.
+        We calculate similarity between all gt and tracker classes (not just each class individually) to allow for
+        calculation of metrics such as class confusion matrices. Typically the impact of this on performance is low.
+        """
+        # Load raw data.
+        raw_gt_data = self._load_raw_file(tracker, seq, is_gt=True)
+        raw_tracker_data = self._load_raw_file(tracker, seq, is_gt=False)
+        raw_data = {**raw_tracker_data, **raw_gt_data}  # Merges dictionaries
+
+        # Calculate similarities for each timestep.
+        similarity_scores = []
+        for _, (gt_dets_t, tracker_dets_t) in enumerate(
+            zip(raw_data["gt_dets"], raw_data["tk_dets"])
+        ):
+            ious = self._calculate_similarities(gt_dets_t, tracker_dets_t)
+            similarity_scores.append(ious)
+        raw_data["similarity_scores"] = similarity_scores
+        return raw_data
+
+    @staticmethod
+    def _load_simple_text_file(
+        file,
+        time_col=0,
+        id_col=None,
+        remove_negative_ids=False,
+        valid_filter=None,
+        crowd_ignore_filter=None,
+        convert_filter=None,
+        is_zipped=False,
+        zip_file=None,
+        force_delimiters=None,
+    ):
+        """Function that loads data which is in a commonly used text file format.
+        Assumes each det is given by one row of a text file.
+        There is no limit to the number or meaning of each column,
+        however one column needs to give the timestep of each det (time_col) which is default col 0.
+
+        The file dialect (deliminator, num cols, etc) is determined automatically.
+        This function automatically separates dets by timestep,
+        and is much faster than alternatives such as np.loadtext or pandas.
+
+        If remove_negative_ids is True and id_col is not None, dets with negative values in id_col are excluded.
+        These are not excluded from ignore data.
+
+        valid_filter can be used to only include certain classes.
+        It is a dict with ints as keys, and lists as values,
+        such that a row is included if "row[key].lower() is in value" for all key/value pairs in the dict.
+        If None, all classes are included.
+
+        crowd_ignore_filter can be used to read crowd_ignore regions separately. It has the same format as valid filter.
+
+        convert_filter can be used to convert value read to another format.
+        This is used most commonly to convert classes given as string to a class id.
+        This is a dict such that the key is the column to convert, and the value is another dict giving the mapping.
+
+        Optionally, input files could be a zip of multiple text files for storage efficiency.
+
+        Returns read_data and ignore_data.
+        Each is a dict (with keys as timesteps as strings) of lists (over dets) of lists (over column values).
+        Note that all data is returned as strings, and must be converted to float/int later if needed.
+        Note that timesteps will not be present in the returned dict keys if there are no dets for them
+        """
+
+        if remove_negative_ids and id_col is None:
+            raise TrackEvalException(
+                "remove_negative_ids is True, but id_col is not given."
+            )
+        if crowd_ignore_filter is None:
+            crowd_ignore_filter = {}
+        if convert_filter is None:
+            convert_filter = {}
+        try:
+            if is_zipped:  # Either open file directly or within a zip.
+                if zip_file is None:
+                    raise TrackEvalException(
+                        "is_zipped set to True, but no zip_file is given."
+                    )
+                archive = zipfile.ZipFile(os.path.join(zip_file), "r")
+                fp = io.TextIOWrapper(archive.open(file, "r"))
+            else:
+                fp = open(file)
+            read_data = {}
+            crowd_ignore_data = {}
+            fp.seek(0, os.SEEK_END)
+            # check if file is empty
+            if fp.tell():
+                fp.seek(0)
+                dialect = csv.Sniffer().sniff(
+                    fp.readline(), delimiters=force_delimiters
+                )  # Auto determine structure.
+                dialect.skipinitialspace = (
+                    True  # Deal with extra spaces between columns
+                )
+                fp.seek(0)
+                reader = csv.reader(fp, dialect)
+                for row in reader:
+                    try:
+                        # Deal with extra trailing spaces at the end of rows
+                        if row[-1] in "":
+                            row = row[:-1]
+                        timestep = str(int(float(row[time_col])))
+                        # Read ignore regions separately.
+                        is_ignored = False
+                        for ignore_key, ignore_value in crowd_ignore_filter.items():
+                            if row[ignore_key].lower() in ignore_value:
+                                # Convert values in one column (e.g. string to id)
+                                for (
+                                    convert_key,
+                                    convert_value,
+                                ) in convert_filter.items():
+                                    row[convert_key] = convert_value[
+                                        row[convert_key].lower()
+                                    ]
+                                # Save data separated by timestep.
+                                if timestep in crowd_ignore_data.keys():
+                                    crowd_ignore_data[timestep].append(row)
+                                else:
+                                    crowd_ignore_data[timestep] = [row]
+                                is_ignored = True
+                        if (
+                            is_ignored
+                        ):  # if det is an ignore region, it cannot be a normal det.
+                            continue
+                        # Exclude some dets if not valid.
+                        if valid_filter is not None:
+                            for key, value in valid_filter.items():
+                                if row[key].lower() not in value:
+                                    continue
+                        if remove_negative_ids:
+                            if int(float(row[id_col])) < 0:
+                                continue
+                        # Convert values in one column (e.g. string to id)
+                        for convert_key, convert_value in convert_filter.items():
+                            row[convert_key] = convert_value[row[convert_key].lower()]
+                        # Save data separated by timestep.
+                        if timestep in read_data.keys():
+                            read_data[timestep].append(row)
+                        else:
+                            read_data[timestep] = [row]
+                    except Exception:
+                        exc_str_init = (
+                            "In file %s the following line cannot be read correctly: \n"
+                            % os.path.basename(file)
+                        )
+                        exc_str = " ".join([exc_str_init] + row)
+                        raise TrackEvalException(exc_str)
+            fp.close()
+        except Exception:
+            print("Error loading file: %s, printing traceback." % file)
+            traceback.print_exc()
+            raise TrackEvalException(
+                "File %s cannot be read because it is either not present or invalidly formatted"
+                % os.path.basename(file)
+            )
+        return read_data, crowd_ignore_data
+
+    @staticmethod
+    def _calculate_mask_ious(masks1, masks2, is_encoded=False, do_ioa=False):
+        """Calculates the IOU (intersection over union) between two arrays of segmentation masks.
+        If is_encoded a run length encoding with pycocotools is assumed as input format, otherwise an input of numpy
+        arrays of the shape (num_masks, height, width) is assumed and the encoding is performed.
+        If do_ioa (intersection over area) , then calculates the intersection over the area of masks1 - this is commonly
+        used to determine if detections are within crowd ignore region.
+        :param masks1:  first set of masks (numpy array of shape (num_masks, height, width) if not encoded,
+                        else pycocotools rle encoded format)
+        :param masks2:  second set of masks (numpy array of shape (num_masks, height, width) if not encoded,
+                        else pycocotools rle encoded format)
+        :param is_encoded: whether the input is in pycocotools rle encoded format
+        :param do_ioa: whether to perform IoA computation
+        :return: the IoU/IoA scores
+        """
+
+        # Only loaded when run to reduce minimum requirements
+        from pycocotools import mask as mask_utils
+
+        # use pycocotools for run length encoding of masks
+        if not is_encoded:
+            masks1 = mask_utils.encode(
+                np.array(np.transpose(masks1, (1, 2, 0)), order="F")
+            )
+            masks2 = mask_utils.encode(
+                np.array(np.transpose(masks2, (1, 2, 0)), order="F")
+            )
+
+        # use pycocotools for iou computation of rle encoded masks
+        ious = mask_utils.iou(masks1, masks2, [do_ioa] * len(masks2))
+        if len(masks1) == 0 or len(masks2) == 0:
+            ious = np.asarray(ious).reshape(len(masks1), len(masks2))
+        assert (ious >= 0 - np.finfo("float").eps).all()
+        assert (ious <= 1 + np.finfo("float").eps).all()
+
+        return ious
+
+    @staticmethod
+    def _calculate_box_ious(bboxes1, bboxes2, box_format="xywh", do_ioa=False):
+        """Calculates the IOU (intersection over union) between two arrays of boxes.
+        Allows variable box formats ('xywh' and 'x0y0x1y1').
+        If do_ioa (intersection over area) , then calculates the intersection over the area of boxes1 - this is commonly
+        used to determine if detections are within crowd ignore region.
+        """
+        if box_format in "xywh":
+            # layout: (x0, y0, w, h)
+            bboxes1 = deepcopy(bboxes1)
+            bboxes2 = deepcopy(bboxes2)
+
+            bboxes1[:, 2] = bboxes1[:, 0] + bboxes1[:, 2]
+            bboxes1[:, 3] = bboxes1[:, 1] + bboxes1[:, 3]
+            bboxes2[:, 2] = bboxes2[:, 0] + bboxes2[:, 2]
+            bboxes2[:, 3] = bboxes2[:, 1] + bboxes2[:, 3]
+        elif box_format not in "x0y0x1y1":
+            raise (TrackEvalException("box_format %s is not implemented" % box_format))
+
+        # layout: (x0, y0, x1, y1)
+        min_ = np.minimum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
+        max_ = np.maximum(bboxes1[:, np.newaxis, :], bboxes2[np.newaxis, :, :])
+        intersection = np.maximum(min_[..., 2] - max_[..., 0], 0) * np.maximum(
+            min_[..., 3] - max_[..., 1], 0
+        )
+        area1 = (bboxes1[..., 2] - bboxes1[..., 0]) * (
+            bboxes1[..., 3] - bboxes1[..., 1]
+        )
+
+        if do_ioa:
+            ioas = np.zeros_like(intersection)
+            valid_mask = area1 > 0 + np.finfo("float").eps
+            ioas[valid_mask, :] = (
+                intersection[valid_mask, :] / area1[valid_mask][:, np.newaxis]
+            )
+
+            return ioas
+        else:
+            area2 = (bboxes2[..., 2] - bboxes2[..., 0]) * (
+                bboxes2[..., 3] - bboxes2[..., 1]
+            )
+            union = area1[:, np.newaxis] + area2[np.newaxis, :] - intersection
+            intersection[area1 <= 0 + np.finfo("float").eps, :] = 0
+            intersection[:, area2 <= 0 + np.finfo("float").eps] = 0
+            intersection[union <= 0 + np.finfo("float").eps] = 0
+            union[union <= 0 + np.finfo("float").eps] = 1
+            ious = intersection / union
+            return ious
+
+    @staticmethod
+    def _calculate_euclidean_similarity(dets1, dets2, zero_distance=2.0):
+        """Calculates the euclidean distance between two sets of detections, and then converts this into a similarity
+        measure with values between 0 and 1 using the following formula: sim = max(0, 1 - dist/zero_distance).
+        The default zero_distance of 2.0, corresponds to the default used in MOT15_3D, such that a 0.5 similarity
+        threshold corresponds to a 1m distance threshold for TPs.
+        """
+        dist = np.linalg.norm(dets1[:, np.newaxis] - dets2[np.newaxis, :], axis=2)
+        sim = np.maximum(0, 1 - dist / zero_distance)
+        return sim
+
+    @staticmethod
+    def _check_unique_ids(data, after_preproc=False):
+        """Check the requirement that the tracker_ids and gt_ids are unique per timestep"""
+        gt_ids = data["gt_ids"]
+        tracker_ids = data["tk_ids"]
+        for t, (gt_ids_t, tracker_ids_t) in enumerate(zip(gt_ids, tracker_ids)):
+            if len(tracker_ids_t) > 0:
+                unique_ids, counts = np.unique(tracker_ids_t, return_counts=True)
+                if np.max(counts) != 1:
+                    duplicate_ids = unique_ids[counts > 1]
+                    exc_str_init = (
+                        "Tracker predicts the same ID more than once in a single timestep "
+                        "(seq: %s, frame: %i, ids:" % (data["seq"], t + 1)
+                    )
+                    exc_str = (
+                        " ".join([exc_str_init] + [str(d) for d in duplicate_ids]) + ")"
+                    )
+                    if after_preproc:
+                        exc_str_init += (
+                            "\n Note that this error occurred after preprocessing (but not before), "
+                            "so ids may not be as in file, and something seems wrong with preproc."
+                        )
+                    raise TrackEvalException(exc_str)
+            if len(gt_ids_t) > 0:
+                unique_ids, counts = np.unique(gt_ids_t, return_counts=True)
+                if np.max(counts) != 1:
+                    duplicate_ids = unique_ids[counts > 1]
+                    exc_str_init = (
+                        "Ground-truth has the same ID more than once in a single timestep "
+                        "(seq: %s, frame: %i, ids:" % (data["seq"], t + 1)
+                    )
+                    exc_str = (
+                        " ".join([exc_str_init] + [str(d) for d in duplicate_ids]) + ")"
+                    )
+                    if after_preproc:
+                        exc_str_init += (
+                            "\n Note that this error occurred after preprocessing (but not before), "
+                            "so ids may not be as in file, and something seems wrong with preproc."
+                        )
+                    raise TrackEvalException(exc_str)
diff --git a/sam3/eval/teta_eval_toolkit/datasets/coco.py b/sam3/eval/teta_eval_toolkit/datasets/coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca3d8239f6c42c311fa1c593b79315c318b71a07
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/datasets/coco.py
@@ -0,0 +1,637 @@
+# fmt: off
+# flake8: noqa
+
+"""COCO Dataset."""
+import copy
+import itertools
+import json
+import os
+from collections import defaultdict
+
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+
+from .. import _timing, utils
+from ..config import get_default_dataset_config, init_config
+from ..utils import TrackEvalException
+from ._base_dataset import _BaseDataset
+
+
+class COCO(_BaseDataset):
+    """Tracking datasets in COCO format."""
+
+    def __init__(self, config=None):
+        """Initialize dataset, checking that all required files are present."""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = init_config(config, get_default_dataset_config(), self.get_name())
+        self.gt_fol = self.config["GT_FOLDER"]
+        self.tracker_fol = self.config["TRACKERS_FOLDER"]
+        self.should_classes_combine = True
+        self.use_super_categories = False
+        self.use_mask = self.config["USE_MASK"]
+
+        self.tracker_sub_fol = self.config["TRACKER_SUB_FOLDER"]
+        self.output_fol = self.config["OUTPUT_FOLDER"]
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+        self.output_sub_fol = self.config["OUTPUT_SUB_FOLDER"]
+
+        if self.gt_fol.endswith(".json"):
+            self.gt_data = json.load(open(self.gt_fol, "r"))
+        else:
+            gt_dir_files = [
+                file for file in os.listdir(self.gt_fol) if file.endswith(".json")
+            ]
+            if len(gt_dir_files) != 1:
+                raise TrackEvalException(
+                    f"{self.gt_fol} does not contain exactly one json file."
+                )
+
+            with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
+                self.gt_data = json.load(f)
+
+        # fill missing video ids
+        self._fill_video_ids_inplace(self.gt_data["annotations"])
+
+        # get sequences to eval and sequence information
+        self.seq_list = [
+            vid["name"].replace("/", "-") for vid in self.gt_data["videos"]
+        ]
+        self.seq_name2seqid = {
+            vid["name"].replace("/", "-"): vid["id"] for vid in self.gt_data["videos"]
+        }
+        # compute mappings from videos to annotation data
+        self.video2gt_track, self.video2gt_image = self._compute_vid_mappings(
+            self.gt_data["annotations"]
+        )
+        # compute sequence lengths
+        self.seq_lengths = {vid["id"]: 0 for vid in self.gt_data["videos"]}
+        for img in self.gt_data["images"]:
+            self.seq_lengths[img["video_id"]] += 1
+        self.seq2images2timestep = self._compute_image_to_timestep_mappings()
+        self.seq2cls = {
+            vid["id"]: {
+                "pos_cat_ids": list(
+                    {track["category_id"] for track in self.video2gt_track[vid["id"]]}
+                ),
+            }
+            for vid in self.gt_data["videos"]
+        }
+
+        # Get classes to eval
+        considered_vid_ids = [self.seq_name2seqid[vid] for vid in self.seq_list]
+        seen_cats = set(
+            [
+                cat_id
+                for vid_id in considered_vid_ids
+                for cat_id in self.seq2cls[vid_id]["pos_cat_ids"]
+            ]
+        )
+        # only classes with ground truth are evaluated in TAO
+        self.valid_classes = [
+            cls["name"] for cls in self.gt_data["categories"] if cls["id"] in seen_cats
+        ]
+        cls_name2clsid_map = {
+            cls["name"]: cls["id"] for cls in self.gt_data["categories"]
+        }
+
+        if self.config["CLASSES_TO_EVAL"]:
+            self.class_list = [
+                cls.lower() if cls.lower() in self.valid_classes else None
+                for cls in self.config["CLASSES_TO_EVAL"]
+            ]
+            if not all(self.class_list):
+                valid_cls = ", ".join(self.valid_classes)
+                raise TrackEvalException(
+                    "Attempted to evaluate an invalid class. Only classes "
+                    f"{valid_cls} are valid (classes present in ground truth"
+                    " data)."
+                )
+        else:
+            self.class_list = [cls for cls in self.valid_classes]
+        self.cls_name2clsid = {
+            k: v for k, v in cls_name2clsid_map.items() if k in self.class_list
+        }
+        self.clsid2cls_name = {
+            v: k for k, v in cls_name2clsid_map.items() if k in self.class_list
+        }
+        # get trackers to eval
+        if self.config["TRACKERS_TO_EVAL"] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config["TRACKERS_TO_EVAL"]
+
+        if self.config["TRACKER_DISPLAY_NAMES"] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config["TRACKERS_TO_EVAL"] is not None) and (
+            len(self.config["TK_DISPLAY_NAMES"]) == len(self.tracker_list)
+        ):
+            self.tracker_to_disp = dict(
+                zip(self.tracker_list, self.config["TK_DISPLAY_NAMES"])
+            )
+        else:
+            raise TrackEvalException(
+                "List of tracker files and tracker display names do not match."
+            )
+
+        self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
+
+        for tracker in self.tracker_list:
+            if self.tracker_sub_fol.endswith(".json"):
+                with open(os.path.join(self.tracker_sub_fol)) as f:
+                    curr_data = json.load(f)
+            else:
+                tr_dir = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+                tr_dir_files = [
+                    file for file in os.listdir(tr_dir) if file.endswith(".json")
+                ]
+                if len(tr_dir_files) != 1:
+                    raise TrackEvalException(
+                        f"{tr_dir} does not contain exactly one json file."
+                    )
+                with open(os.path.join(tr_dir, tr_dir_files[0])) as f:
+                    curr_data = json.load(f)
+
+            # limit detections if MAX_DETECTIONS > 0
+            if self.config["MAX_DETECTIONS"]:
+                curr_data = self._limit_dets_per_image(curr_data)
+
+            # fill missing video ids
+            self._fill_video_ids_inplace(curr_data)
+
+            # make track ids unique over whole evaluation set
+            self._make_tk_ids_unique(curr_data)
+
+            # get tracker sequence information
+            curr_vids2tracks, curr_vids2images = self._compute_vid_mappings(curr_data)
+            self.tracker_data[tracker]["vids_to_tracks"] = curr_vids2tracks
+            self.tracker_data[tracker]["vids_to_images"] = curr_vids2images
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the TAO format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes]:
+            list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tk_ids, tk_classes]:
+            list (for each timestep) of 1D NDArrays (for each det).
+        [tk_dets]: list (for each timestep) of lists of detections.
+        """
+        seq_id = self.seq_name2seqid[seq]
+        # file location
+        if is_gt:
+            imgs = self.video2gt_image[seq_id]
+        else:
+            imgs = self.tracker_data[tracker]["vids_to_images"][seq_id]
+
+        # convert data to required format
+        num_timesteps = self.seq_lengths[seq_id]
+        img_to_timestep = self.seq2images2timestep[seq_id]
+        data_keys = ["ids", "classes", "dets"]
+        # if not is_gt:
+        #     data_keys += ["tk_confidences"]
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        for img in imgs:
+            # some tracker data contains images without any ground truth info,
+            # these are ignored
+            if img["id"] not in img_to_timestep:
+                continue
+            t = img_to_timestep[img["id"]]
+            anns = img["annotations"]
+            tk_str = utils.get_track_id_str(anns[0])
+
+            if self.use_mask:
+                # When using mask, extract segmentation data
+                raw_data["dets"][t] = [ann.get("segmentation") for ann in anns]
+            else:
+                # When using bbox, extract bbox data
+                raw_data["dets"][t] = np.atleast_2d([ann["bbox"] for ann in anns]).astype(
+                    float
+                )
+            raw_data["ids"][t] = np.atleast_1d([ann[tk_str] for ann in anns]).astype(
+                int
+            )
+            raw_data["classes"][t] = np.atleast_1d(
+                [ann["category_id"] for ann in anns]
+            ).astype(int)
+            # if not is_gt:
+            #     raw_data["tk_confidences"][t] = np.atleast_1d(
+            #         [ann["score"] for ann in anns]
+            #     ).astype(float)
+
+        for t, d in enumerate(raw_data["dets"]):
+            if d is None:
+                raw_data["dets"][t] = np.empty((0, 4)).astype(float)
+                raw_data["ids"][t] = np.empty(0).astype(int)
+                raw_data["classes"][t] = np.empty(0).astype(int)
+                # if not is_gt:
+                #     raw_data["tk_confidences"][t] = np.empty(0)
+
+        if is_gt:
+            key_map = {"ids": "gt_ids", "classes": "gt_classes", "dets": "gt_dets"}
+        else:
+            key_map = {"ids": "tk_ids", "classes": "tk_classes", "dets": "tk_dets"}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        raw_data["num_timesteps"] = num_timesteps
+        raw_data["seq"] = seq
+        return raw_data
+
+    def get_preprocessed_seq_data_thr(self, raw_data, cls, assignment=None):
+        """Preprocess data for a single sequence for a single class.
+
+        Inputs:
+            raw_data: dict containing the data for the sequence already
+                read in by get_raw_seq_data().
+            cls: class to be evaluated.
+        Outputs:
+            gt_ids:
+                list (for each timestep) of ids of GT tracks
+            tk_ids:
+                list (for each timestep) of ids of predicted tracks (all for TP
+                matching (Det + AssocA))
+            tk_overlap_ids:
+                list (for each timestep) of ids of predicted tracks that overlap
+                with GTs
+            tk_dets:
+                list (for each timestep) of lists of detections that
+                corresponding to the tk_ids
+            tk_classes:
+                list (for each timestep) of lists of classes that corresponding
+                to the tk_ids
+            tk_confidences:
+                list (for each timestep) of lists of classes that corresponding
+                to the tk_ids
+            sim_scores:
+                similarity score between gt_ids and tk_ids.
+        """
+        if cls != "all":
+            cls_id = self.cls_name2clsid[cls]
+
+        data_keys = [
+            "gt_ids",
+            "tk_ids",
+            "gt_id_map",
+            "tk_id_map",
+            "gt_dets",
+            "gt_classes",
+            "gt_class_name",
+            "tk_overlap_classes",
+            "tk_overlap_ids",
+            "tk_class_eval_tk_ids",
+            "tk_dets",
+            "tk_classes",
+            # "tk_confidences",
+            "tk_exh_ids",
+            "sim_scores",
+        ]
+        data = {key: [None] * raw_data["num_timesteps"] for key in data_keys}
+        unique_gt_ids = []
+        unique_tk_ids = []
+        num_gt_dets = 0
+        num_tk_cls_dets = 0
+        num_tk_overlap_dets = 0
+        overlap_ious_thr = 0.5
+        loc_and_asso_tk_ids = []
+        exh_class_tk_ids = []
+
+        for t in range(raw_data["num_timesteps"]):
+            # only extract relevant dets for this class for preproc and eval
+            if cls == "all":
+                gt_class_mask = np.ones_like(raw_data["gt_classes"][t]).astype(bool)
+            else:
+                gt_class_mask = np.atleast_1d(
+                    raw_data["gt_classes"][t] == cls_id
+                ).astype(bool)
+
+            # select GT that is not in the evaluating classes
+            if assignment is not None and assignment:
+                all_gt_ids = list(assignment[t].keys())
+                gt_ids_in = raw_data["gt_ids"][t][gt_class_mask]
+                gt_ids_out = set(all_gt_ids) - set(gt_ids_in)
+                tk_ids_out = set([assignment[t][key] for key in list(gt_ids_out)])
+
+            # compute overlapped tracks and add their ids to overlap_tk_ids
+            sim_scores = raw_data["similarity_scores"]
+            overlap_ids_masks = (sim_scores[t][gt_class_mask] >= overlap_ious_thr).any(
+                axis=0
+            )
+            overlap_tk_ids_t = raw_data["tk_ids"][t][overlap_ids_masks]
+            if assignment is not None and assignment:
+                data["tk_overlap_ids"][t] = list(set(overlap_tk_ids_t) - tk_ids_out)
+            else:
+                data["tk_overlap_ids"][t] = list(set(overlap_tk_ids_t))
+
+            loc_and_asso_tk_ids += data["tk_overlap_ids"][t]
+
+            data["tk_exh_ids"][t] = []
+            if cls == "all":
+                continue
+
+            # add the track ids of exclusive annotated class to exh_class_tk_ids
+            tk_exh_mask = np.atleast_1d(raw_data["tk_classes"][t] == cls_id)
+            tk_exh_mask = tk_exh_mask.astype(bool)
+            exh_class_tk_ids_t = raw_data["tk_ids"][t][tk_exh_mask]
+            exh_class_tk_ids.append(exh_class_tk_ids_t)
+            data["tk_exh_ids"][t] = exh_class_tk_ids_t
+
+        # remove tk_ids that has been assigned to GT belongs to other classes.
+        loc_and_asso_tk_ids = list(set(loc_and_asso_tk_ids))
+
+        # remove all unwanted unmatched tracker detections
+        for t in range(raw_data["num_timesteps"]):
+            # add gt to the data
+            if cls == "all":
+                gt_class_mask = np.ones_like(raw_data["gt_classes"][t]).astype(bool)
+            else:
+                gt_class_mask = np.atleast_1d(
+                    raw_data["gt_classes"][t] == cls_id
+                ).astype(bool)
+                data["gt_classes"][t] = cls_id
+                data["gt_class_name"][t] = cls
+
+            gt_ids = raw_data["gt_ids"][t][gt_class_mask]
+            if self.use_mask:
+                gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
+            else:
+                gt_dets = raw_data["gt_dets"][t][gt_class_mask]
+            data["gt_ids"][t] = gt_ids
+            data["gt_dets"][t] = gt_dets
+
+            # filter pred and only keep those that highly overlap with GTs
+            tk_mask = np.isin(
+                raw_data["tk_ids"][t], np.array(loc_and_asso_tk_ids), assume_unique=True
+            )
+            tk_overlap_mask = np.isin(
+                raw_data["tk_ids"][t],
+                np.array(data["tk_overlap_ids"][t]),
+                assume_unique=True,
+            )
+
+            tk_ids = raw_data["tk_ids"][t][tk_mask]
+            if self.use_mask:
+                tk_dets = [raw_data['tk_dets'][t][ind] for ind in range(len(tk_mask)) if
+                            tk_mask[ind]]
+            else:
+                tk_dets = raw_data["tk_dets"][t][tk_mask]
+
+            tracker_classes = raw_data["tk_classes"][t][tk_mask]
+
+            # add overlap classes for computing the FP for Cls term
+            tracker_overlap_classes = raw_data["tk_classes"][t][tk_overlap_mask]
+            # tracker_confidences = raw_data["tk_confidences"][t][tk_mask]
+            sim_scores_masked = sim_scores[t][gt_class_mask, :][:, tk_mask]
+
+            # add filtered prediction to the data
+            data["tk_classes"][t] = tracker_classes
+            data["tk_overlap_classes"][t] = tracker_overlap_classes
+            data["tk_ids"][t] = tk_ids
+            data["tk_dets"][t] = tk_dets
+            # data["tk_confidences"][t] = tracker_confidences
+            data["sim_scores"][t] = sim_scores_masked
+            data["tk_class_eval_tk_ids"][t] = set(
+                list(data["tk_overlap_ids"][t]) + list(data["tk_exh_ids"][t])
+            )
+
+            # count total number of detections
+            unique_gt_ids += list(np.unique(data["gt_ids"][t]))
+            # the unique track ids are for association.
+            unique_tk_ids += list(np.unique(data["tk_ids"][t]))
+
+            num_tk_overlap_dets += len(data["tk_overlap_ids"][t])
+            num_tk_cls_dets += len(data["tk_class_eval_tk_ids"][t])
+            num_gt_dets += len(data["gt_ids"][t])
+
+        # re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            data["gt_id_map"] = {}
+            for gt_id in unique_gt_ids:
+                new_gt_id = gt_id_map[gt_id].astype(int)
+                data["gt_id_map"][new_gt_id] = gt_id
+
+            for t in range(raw_data["num_timesteps"]):
+                if len(data["gt_ids"][t]) > 0:
+                    data["gt_ids"][t] = gt_id_map[data["gt_ids"][t]].astype(int)
+
+        if len(unique_tk_ids) > 0:
+            unique_tk_ids = np.unique(unique_tk_ids)
+            tk_id_map = np.nan * np.ones((np.max(unique_tk_ids) + 1))
+            tk_id_map[unique_tk_ids] = np.arange(len(unique_tk_ids))
+
+            data["tk_id_map"] = {}
+            for track_id in unique_tk_ids:
+                new_track_id = tk_id_map[track_id].astype(int)
+                data["tk_id_map"][new_track_id] = track_id
+
+            for t in range(raw_data["num_timesteps"]):
+                if len(data["tk_ids"][t]) > 0:
+                    data["tk_ids"][t] = tk_id_map[data["tk_ids"][t]].astype(int)
+                if len(data["tk_overlap_ids"][t]) > 0:
+                    data["tk_overlap_ids"][t] = tk_id_map[
+                        data["tk_overlap_ids"][t]
+                    ].astype(int)
+
+        # record overview statistics.
+        data["num_tk_cls_dets"] = num_tk_cls_dets
+        data["num_tk_overlap_dets"] = num_tk_overlap_dets
+        data["num_gt_dets"] = num_gt_dets
+        data["num_tk_ids"] = len(unique_tk_ids)
+        data["num_gt_ids"] = len(unique_gt_ids)
+        data["num_timesteps"] = raw_data["num_timesteps"]
+        data["seq"] = raw_data["seq"]
+
+        self._check_unique_ids(data)
+
+        return data
+
+    @_timing.time
+    def get_preprocessed_seq_data(
+        self, raw_data, cls, assignment=None, thresholds=[50, 75]
+    ):
+        """Preprocess data for a single sequence for a single class."""
+        data = {}
+        if thresholds is None:
+            thresholds = [50, 75]
+        elif isinstance(thresholds, int):
+            thresholds = [thresholds]
+
+        for thr in thresholds:
+            assignment_thr = None
+            if assignment is not None:
+                assignment_thr = assignment[thr]
+            data[thr] = self.get_preprocessed_seq_data_thr(
+                raw_data, cls, assignment_thr
+            )
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tk_dets_t):
+        """Compute similarity scores."""
+        if self.use_mask:
+            similarity_scores = self._calculate_mask_ious(gt_dets_t, tk_dets_t, is_encoded=True, do_ioa=False)
+        else:
+            similarity_scores = self._calculate_box_ious(gt_dets_t, tk_dets_t)
+        return similarity_scores
+
+    def _compute_vid_mappings(self, annotations):
+        """Computes mappings from videos to corresponding tracks and images."""
+        vids_to_tracks = {}
+        vids_to_imgs = {}
+        vid_ids = [vid["id"] for vid in self.gt_data["videos"]]
+
+        # compute an mapping from image IDs to images
+        images = {}
+        for image in self.gt_data["images"]:
+            images[image["id"]] = image
+
+        tk_str = utils.get_track_id_str(annotations[0])
+        for ann in annotations:
+            ann["area"] = ann["bbox"][2] * ann["bbox"][3]
+
+            vid = ann["video_id"]
+            if ann["video_id"] not in vids_to_tracks.keys():
+                vids_to_tracks[ann["video_id"]] = list()
+            if ann["video_id"] not in vids_to_imgs.keys():
+                vids_to_imgs[ann["video_id"]] = list()
+
+            # fill in vids_to_tracks
+            tid = ann[tk_str]
+            exist_tids = [track["id"] for track in vids_to_tracks[vid]]
+            try:
+                index1 = exist_tids.index(tid)
+            except ValueError:
+                index1 = -1
+            if tid not in exist_tids:
+                curr_track = {
+                    "id": tid,
+                    "category_id": ann["category_id"],
+                    "video_id": vid,
+                    "annotations": [ann],
+                }
+                vids_to_tracks[vid].append(curr_track)
+            else:
+                vids_to_tracks[vid][index1]["annotations"].append(ann)
+
+            # fill in vids_to_imgs
+            img_id = ann["image_id"]
+            exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
+            try:
+                index2 = exist_img_ids.index(img_id)
+            except ValueError:
+                index2 = -1
+            if index2 == -1:
+                curr_img = {"id": img_id, "annotations": [ann]}
+                vids_to_imgs[vid].append(curr_img)
+            else:
+                vids_to_imgs[vid][index2]["annotations"].append(ann)
+
+        # sort annotations by frame index and compute track area
+        for vid, tracks in vids_to_tracks.items():
+            for track in tracks:
+                track["annotations"] = sorted(
+                    track["annotations"],
+                    key=lambda x: images[x["image_id"]]["frame_id"],
+                )
+                # compute average area
+                track["area"] = sum(x["area"] for x in track["annotations"]) / len(
+                    track["annotations"]
+                )
+
+        # ensure all videos are present
+        for vid_id in vid_ids:
+            if vid_id not in vids_to_tracks.keys():
+                vids_to_tracks[vid_id] = []
+            if vid_id not in vids_to_imgs.keys():
+                vids_to_imgs[vid_id] = []
+
+        return vids_to_tracks, vids_to_imgs
+
+    def _compute_image_to_timestep_mappings(self):
+        """Computes a mapping from images to timestep in sequence."""
+        images = {}
+        for image in self.gt_data["images"]:
+            images[image["id"]] = image
+
+        seq_to_imgs_to_timestep = {vid["id"]: dict() for vid in self.gt_data["videos"]}
+        for vid in seq_to_imgs_to_timestep:
+            curr_imgs = [img["id"] for img in self.video2gt_image[vid]]
+            curr_imgs = sorted(curr_imgs, key=lambda x: images[x]["frame_id"])
+            seq_to_imgs_to_timestep[vid] = {
+                curr_imgs[i]: i for i in range(len(curr_imgs))
+            }
+
+        return seq_to_imgs_to_timestep
+
+    def _limit_dets_per_image(self, annotations):
+        """Limits the number of detections for each image.
+
+        Adapted from https://github.com/TAO-Dataset/.
+        """
+        max_dets = self.config["MAX_DETECTIONS"]
+        img_ann = defaultdict(list)
+        for ann in annotations:
+            img_ann[ann["image_id"]].append(ann)
+
+        for img_id, _anns in img_ann.items():
+            if len(_anns) <= max_dets:
+                continue
+            _anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
+            img_ann[img_id] = _anns[:max_dets]
+
+        return [ann for anns in img_ann.values() for ann in anns]
+
+    def _fill_video_ids_inplace(self, annotations):
+        """Fills in missing video IDs inplace.
+
+        Adapted from https://github.com/TAO-Dataset/.
+        """
+        missing_video_id = [x for x in annotations if "video_id" not in x]
+        if missing_video_id:
+            image_id_to_video_id = {
+                x["id"]: x["video_id"] for x in self.gt_data["images"]
+            }
+            for x in missing_video_id:
+                x["video_id"] = image_id_to_video_id[x["image_id"]]
+
+    @staticmethod
+    def _make_tk_ids_unique(annotations):
+        """Makes track IDs unqiue over the whole annotation set.
+
+        Adapted from https://github.com/TAO-Dataset/.
+        """
+        track_id_videos = {}
+        track_ids_to_update = set()
+        max_track_id = 0
+
+        tk_str = utils.get_track_id_str(annotations[0])
+        for ann in annotations:
+            t = int(ann[tk_str])
+            if t not in track_id_videos:
+                track_id_videos[t] = ann["video_id"]
+
+            if ann["video_id"] != track_id_videos[t]:
+                # track id is assigned to multiple videos
+                track_ids_to_update.add(t)
+            max_track_id = max(max_track_id, t)
+
+        if track_ids_to_update:
+            print("true")
+            next_id = itertools.count(max_track_id + 1)
+            new_tk_ids = defaultdict(lambda: next(next_id))
+            for ann in annotations:
+                t = ann[tk_str]
+                v = ann["video_id"]
+                if t in track_ids_to_update:
+                    ann[tk_str] = new_tk_ids[t, v]
+        return len(track_ids_to_update)
diff --git a/sam3/eval/teta_eval_toolkit/datasets/tao.py b/sam3/eval/teta_eval_toolkit/datasets/tao.py
new file mode 100644
index 0000000000000000000000000000000000000000..7d2bbcc5fa07d668ac6d1391584a15a1491da8dd
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/datasets/tao.py
@@ -0,0 +1,659 @@
+# fmt: off
+# flake8: noqa
+
+"""TAO Dataset."""
+import copy
+import itertools
+import json
+import os
+from collections import defaultdict
+
+import numpy as np
+
+from .. import _timing
+from ..config import get_default_dataset_config, init_config
+from ..utils import TrackEvalException
+from ._base_dataset import _BaseDataset
+
+
+class TAO(_BaseDataset):
+    """Dataset class for TAO tracking"""
+
+    def __init__(self, config=None):
+        """Initialize dataset, checking that all required files are present."""
+        super().__init__()
+        # Fill non-given config values with defaults
+        self.config = init_config(config, get_default_dataset_config(), self.get_name())
+        self.gt_fol = self.config["GT_FOLDER"]
+        self.tracker_fol = self.config["TRACKERS_FOLDER"]
+        self.should_classes_combine = True
+        self.use_super_categories = False
+        self.use_mask = self.config["USE_MASK"]
+
+
+        self.tracker_sub_fol = self.config["TRACKER_SUB_FOLDER"]
+        self.output_fol = self.config["OUTPUT_FOLDER"]
+        if self.output_fol is None:
+            self.output_fol = self.tracker_fol
+        self.output_sub_fol = self.config["OUTPUT_SUB_FOLDER"]
+
+        if self.gt_fol.endswith(".json"):
+            self.gt_data = json.load(open(self.gt_fol, "r"))
+        else:
+            gt_dir_files = [
+                file for file in os.listdir(self.gt_fol) if file.endswith(".json")
+            ]
+            if len(gt_dir_files) != 1:
+                raise TrackEvalException(
+                    f"{self.gt_fol} does not contain exactly one json file."
+                )
+
+            with open(os.path.join(self.gt_fol, gt_dir_files[0])) as f:
+                self.gt_data = json.load(f)
+
+        # merge categories marked with a merged tag in TAO dataset
+        self._merge_categories(self.gt_data["annotations"] + self.gt_data["tracks"])
+
+        # get sequences to eval and sequence information
+        self.seq_list = [
+            vid["name"].replace("/", "-") for vid in self.gt_data["videos"]
+        ]
+        self.seq_name2seqid = {
+            vid["name"].replace("/", "-"): vid["id"] for vid in self.gt_data["videos"]
+        }
+        # compute mappings from videos to annotation data
+        self.video2gt_track, self.video2gt_image = self._compute_vid_mappings(
+            self.gt_data["annotations"]
+        )
+        # compute sequence lengths
+        self.seq_lengths = {vid["id"]: 0 for vid in self.gt_data["videos"]}
+        for img in self.gt_data["images"]:
+            self.seq_lengths[img["video_id"]] += 1
+        self.seq2images2timestep = self._compute_image_to_timestep_mappings()
+        self.seq2cls = {
+            vid["id"]: {
+                "pos_cat_ids": list(
+                    {track["category_id"] for track in self.video2gt_track[vid["id"]]}
+                ),
+                "neg_cat_ids": vid["neg_category_ids"],
+                "not_exh_labeled_cat_ids": vid["not_exhaustive_category_ids"],
+            }
+            for vid in self.gt_data["videos"]
+        }
+
+        # Get classes to eval
+        considered_vid_ids = [self.seq_name2seqid[vid] for vid in self.seq_list]
+        seen_cats = set(
+            [
+                cat_id
+                for vid_id in considered_vid_ids
+                for cat_id in self.seq2cls[vid_id]["pos_cat_ids"]
+            ]
+        )
+        # only classes with ground truth are evaluated in TAO
+        self.valid_classes = [
+            cls["name"] for cls in self.gt_data["categories"] if cls["id"] in seen_cats
+        ]
+        cls_name2clsid_map = {
+            cls["name"]: cls["id"] for cls in self.gt_data["categories"]
+        }
+
+        if self.config["CLASSES_TO_EVAL"]:
+            self.class_list = [
+                cls.lower() if cls.lower() in self.valid_classes else None
+                for cls in self.config["CLASSES_TO_EVAL"]
+            ]
+            if not all(self.class_list):
+                valid_cls = ", ".join(self.valid_classes)
+                raise TrackEvalException(
+                    "Attempted to evaluate an invalid class. Only classes "
+                    f"{valid_cls} are valid (classes present in ground truth"
+                    " data)."
+                )
+        else:
+            self.class_list = [cls for cls in self.valid_classes]
+        self.cls_name2clsid = {
+            k: v for k, v in cls_name2clsid_map.items() if k in self.class_list
+        }
+        self.clsid2cls_name = {
+            v: k for k, v in cls_name2clsid_map.items() if k in self.class_list
+        }
+        # get trackers to eval
+        print(self.config["TRACKERS_TO_EVAL"] )
+        if self.config["TRACKERS_TO_EVAL"] is None:
+            self.tracker_list = os.listdir(self.tracker_fol)
+        else:
+            self.tracker_list = self.config["TRACKERS_TO_EVAL"]
+
+        if self.config["TRACKER_DISPLAY_NAMES"] is None:
+            self.tracker_to_disp = dict(zip(self.tracker_list, self.tracker_list))
+        elif (self.config["TRACKERS_TO_EVAL"] is not None) and (
+            len(self.config["TK_DISPLAY_NAMES"]) == len(self.tracker_list)
+        ):
+            self.tracker_to_disp = dict(
+                zip(self.tracker_list, self.config["TK_DISPLAY_NAMES"])
+            )
+        else:
+            raise TrackEvalException(
+                "List of tracker files and tracker display names do not match."
+            )
+
+        self.tracker_data = {tracker: dict() for tracker in self.tracker_list}
+
+        for tracker in self.tracker_list:
+            if self.tracker_sub_fol.endswith(".json"):
+                with open(os.path.join(self.tracker_sub_fol)) as f:
+                    curr_data = json.load(f)
+            else:
+                tr_dir = os.path.join(self.tracker_fol, tracker, self.tracker_sub_fol)
+                tr_dir_files = [
+                    file for file in os.listdir(tr_dir) if file.endswith(".json")
+                ]
+                if len(tr_dir_files) != 1:
+                    raise TrackEvalException(
+                        f"{tr_dir} does not contain exactly one json file."
+                    )
+                with open(os.path.join(tr_dir, tr_dir_files[0])) as f:
+                    curr_data = json.load(f)
+
+            # limit detections if MAX_DETECTIONS > 0
+            if self.config["MAX_DETECTIONS"]:
+                curr_data = self._limit_dets_per_image(curr_data)
+
+            # fill missing video ids
+            self._fill_video_ids_inplace(curr_data)
+
+            # make track ids unique over whole evaluation set
+            self._make_tk_ids_unique(curr_data)
+
+            # merge categories marked with a merged tag in TAO dataset
+            self._merge_categories(curr_data)
+
+            # get tracker sequence information
+            curr_vids2tracks, curr_vids2images = self._compute_vid_mappings(curr_data)
+            self.tracker_data[tracker]["vids_to_tracks"] = curr_vids2tracks
+            self.tracker_data[tracker]["vids_to_images"] = curr_vids2images
+
+    def get_display_name(self, tracker):
+        return self.tracker_to_disp[tracker]
+
+    def _load_raw_file(self, tracker, seq, is_gt):
+        """Load a file (gt or tracker) in the TAO format
+
+        If is_gt, this returns a dict which contains the fields:
+        [gt_ids, gt_classes]:
+            list (for each timestep) of 1D NDArrays (for each det).
+        [gt_dets]: list (for each timestep) of lists of detections.
+
+        if not is_gt, this returns a dict which contains the fields:
+        [tk_ids, tk_classes, tk_confidences]:
+            list (for each timestep) of 1D NDArrays (for each det).
+        [tk_dets]: list (for each timestep) of lists of detections.
+        """
+        seq_id = self.seq_name2seqid[seq]
+        # file location
+        if is_gt:
+            imgs = self.video2gt_image[seq_id]
+        else:
+            imgs = self.tracker_data[tracker]["vids_to_images"][seq_id]
+
+        # convert data to required format
+        num_timesteps = self.seq_lengths[seq_id]
+        img_to_timestep = self.seq2images2timestep[seq_id]
+        data_keys = ["ids", "classes", "dets"]
+        if not is_gt:
+            data_keys += ["tk_confidences"]
+        raw_data = {key: [None] * num_timesteps for key in data_keys}
+        for img in imgs:
+            # some tracker data contains images without any ground truth info,
+            # these are ignored
+            if img["id"] not in img_to_timestep:
+                continue
+            t = img_to_timestep[img["id"]]
+            anns = img["annotations"]
+            if self.use_mask:
+                # When using mask, extract segmentation data
+                raw_data["dets"][t] = [ann.get("segmentation") for ann in anns]
+            else:
+                # When using bbox, extract bbox data
+                raw_data["dets"][t] = np.atleast_2d([ann["bbox"] for ann in anns]).astype(
+                    float
+                )
+            raw_data["ids"][t] = np.atleast_1d(
+                [ann["track_id"] for ann in anns]
+            ).astype(int)
+            raw_data["classes"][t] = np.atleast_1d(
+                [ann["category_id"] for ann in anns]
+            ).astype(int)
+            if not is_gt:
+                raw_data["tk_confidences"][t] = np.atleast_1d(
+                    [ann["score"] for ann in anns]
+                ).astype(float)
+
+        for t, d in enumerate(raw_data["dets"]):
+            if d is None:
+                raw_data["dets"][t] = np.empty((0, 4)).astype(float)
+                raw_data["ids"][t] = np.empty(0).astype(int)
+                raw_data["classes"][t] = np.empty(0).astype(int)
+                if not is_gt:
+                    raw_data["tk_confidences"][t] = np.empty(0)
+
+        if is_gt:
+            key_map = {"ids": "gt_ids", "classes": "gt_classes", "dets": "gt_dets"}
+        else:
+            key_map = {"ids": "tk_ids", "classes": "tk_classes", "dets": "tk_dets"}
+        for k, v in key_map.items():
+            raw_data[v] = raw_data.pop(k)
+
+        raw_data["num_timesteps"] = num_timesteps
+        raw_data["neg_cat_ids"] = self.seq2cls[seq_id]["neg_cat_ids"]
+        raw_data["not_exh_labeled_cls"] = self.seq2cls[seq_id][
+            "not_exh_labeled_cat_ids"
+        ]
+        raw_data["seq"] = seq
+        return raw_data
+
+    def get_preprocessed_seq_data_thr(self, raw_data, cls, assignment=None):
+        """Preprocess data for a single sequence for a single class.
+
+        Inputs:
+            raw_data: dict containing the data for the sequence already
+                read in by get_raw_seq_data().
+            cls: class to be evaluated.
+        Outputs:
+            gt_ids:
+                list (for each timestep) of ids of GT tracks
+            tk_ids:
+                list (for each timestep) of ids of predicted tracks (all for TP
+                matching (Det + AssocA))
+            tk_overlap_ids:
+                list (for each timestep) of ids of predicted tracks that overlap
+                with GTs
+            tk_neg_ids:
+                list (for each timestep) of ids of predicted tracks that with
+                the class id on the negative list for the current sequence.
+            tk_exh_ids:
+                list (for each timestep) of ids of predicted tracks that do not
+                overlap with existing GTs but have the class id on the
+                exhaustive annotated class list for the current sequence.
+            tk_dets:
+                list (for each timestep) of lists of detections that
+                corresponding to the tk_ids
+            tk_classes:
+                list (for each timestep) of lists of classes that corresponding
+                to the tk_ids
+            tk_confidences:
+                list (for each timestep) of lists of classes that corresponding
+                to the tk_ids
+            sim_scores:
+                similarity score between gt_ids and tk_ids.
+        """
+        if cls != "all":
+            cls_id = self.cls_name2clsid[cls]
+
+        data_keys = [
+            "gt_ids",
+            "tk_ids",
+            "gt_id_map",
+            "tk_id_map",
+            "gt_dets",
+            "gt_classes",
+            "gt_class_name",
+            "tk_overlap_classes",
+            "tk_overlap_ids",
+            "tk_neg_ids",
+            "tk_exh_ids",
+            "tk_class_eval_tk_ids",
+            "tk_dets",
+            "tk_classes",
+            "tk_confidences",
+            "sim_scores",
+        ]
+        data = {key: [None] * raw_data["num_timesteps"] for key in data_keys}
+        unique_gt_ids = []
+        unique_tk_ids = []
+        num_gt_dets = 0
+        num_tk_cls_dets = 0
+        num_tk_overlap_dets = 0
+        overlap_ious_thr = 0.5
+        loc_and_asso_tk_ids = []
+
+        for t in range(raw_data["num_timesteps"]):
+            # only extract relevant dets for this class for preproc and eval
+            if cls == "all":
+                gt_class_mask = np.ones_like(raw_data["gt_classes"][t]).astype(bool)
+            else:
+                gt_class_mask = np.atleast_1d(
+                    raw_data["gt_classes"][t] == cls_id
+                ).astype(bool)
+
+            # select GT that is not in the evaluating classes
+            if assignment is not None and assignment:
+                all_gt_ids = list(assignment[t].keys())
+                gt_ids_in = raw_data["gt_ids"][t][gt_class_mask]
+                gt_ids_out = set(all_gt_ids) - set(gt_ids_in)
+                tk_ids_out = set([assignment[t][key] for key in list(gt_ids_out)])
+
+            # compute overlapped tracks and add their ids to overlap_tk_ids
+            sim_scores = raw_data["similarity_scores"]
+            overlap_ids_masks = (sim_scores[t][gt_class_mask] >= overlap_ious_thr).any(
+                axis=0
+            )
+            overlap_tk_ids_t = raw_data["tk_ids"][t][overlap_ids_masks]
+            if assignment is not None and assignment:
+                data["tk_overlap_ids"][t] = list(set(overlap_tk_ids_t) - tk_ids_out)
+            else:
+                data["tk_overlap_ids"][t] = list(set(overlap_tk_ids_t))
+
+            loc_and_asso_tk_ids += data["tk_overlap_ids"][t]
+
+            data["tk_exh_ids"][t] = []
+            data["tk_neg_ids"][t] = []
+
+            if cls == "all":
+                continue
+
+        # remove tk_ids that has been assigned to GT belongs to other classes.
+        loc_and_asso_tk_ids = list(set(loc_and_asso_tk_ids))
+
+        # remove all unwanted unmatched tracker detections
+        for t in range(raw_data["num_timesteps"]):
+            # add gt to the data
+            if cls == "all":
+                gt_class_mask = np.ones_like(raw_data["gt_classes"][t]).astype(bool)
+            else:
+                gt_class_mask = np.atleast_1d(
+                    raw_data["gt_classes"][t] == cls_id
+                ).astype(bool)
+                data["gt_classes"][t] = cls_id
+                data["gt_class_name"][t] = cls
+
+            gt_ids = raw_data["gt_ids"][t][gt_class_mask]
+            if self.use_mask:
+                gt_dets = [raw_data['gt_dets'][t][ind] for ind in range(len(gt_class_mask)) if gt_class_mask[ind]]
+            else:
+                gt_dets = raw_data["gt_dets"][t][gt_class_mask]
+            data["gt_ids"][t] = gt_ids
+            data["gt_dets"][t] = gt_dets
+
+            # filter pred and only keep those that highly overlap with GTs
+            tk_mask = np.isin(
+                raw_data["tk_ids"][t], np.array(loc_and_asso_tk_ids), assume_unique=True
+            )
+            tk_overlap_mask = np.isin(
+                raw_data["tk_ids"][t],
+                np.array(data["tk_overlap_ids"][t]),
+                assume_unique=True,
+            )
+
+            tk_ids = raw_data["tk_ids"][t][tk_mask]
+            if self.use_mask:
+                tk_dets = [raw_data['tk_dets'][t][ind] for ind in range(len(tk_mask)) if
+                            tk_mask[ind]]
+            else:
+                tk_dets = raw_data["tk_dets"][t][tk_mask]
+            tracker_classes = raw_data["tk_classes"][t][tk_mask]
+
+            # add overlap classes for computing the FP for Cls term
+            tracker_overlap_classes = raw_data["tk_classes"][t][tk_overlap_mask]
+            tracker_confidences = raw_data["tk_confidences"][t][tk_mask]
+            sim_scores_masked = sim_scores[t][gt_class_mask, :][:, tk_mask]
+
+            # add filtered prediction to the data
+            data["tk_classes"][t] = tracker_classes
+            data["tk_overlap_classes"][t] = tracker_overlap_classes
+            data["tk_ids"][t] = tk_ids
+            data["tk_dets"][t] = tk_dets
+            data["tk_confidences"][t] = tracker_confidences
+            data["sim_scores"][t] = sim_scores_masked
+            data["tk_class_eval_tk_ids"][t] = set(
+                list(data["tk_overlap_ids"][t])
+                + list(data["tk_neg_ids"][t])
+                + list(data["tk_exh_ids"][t])
+            )
+
+            # count total number of detections
+            unique_gt_ids += list(np.unique(data["gt_ids"][t]))
+            # the unique track ids are for association.
+            unique_tk_ids += list(np.unique(data["tk_ids"][t]))
+
+            num_tk_overlap_dets += len(data["tk_overlap_ids"][t])
+            num_tk_cls_dets += len(data["tk_class_eval_tk_ids"][t])
+            num_gt_dets += len(data["gt_ids"][t])
+
+        # re-label IDs such that there are no empty IDs
+        if len(unique_gt_ids) > 0:
+            unique_gt_ids = np.unique(unique_gt_ids)
+            gt_id_map = np.nan * np.ones((np.max(unique_gt_ids) + 1))
+            gt_id_map[unique_gt_ids] = np.arange(len(unique_gt_ids))
+            data["gt_id_map"] = {}
+            for gt_id in unique_gt_ids:
+                new_gt_id = gt_id_map[gt_id].astype(int)
+                data["gt_id_map"][new_gt_id] = gt_id
+
+            for t in range(raw_data["num_timesteps"]):
+                if len(data["gt_ids"][t]) > 0:
+                    data["gt_ids"][t] = gt_id_map[data["gt_ids"][t]].astype(int)
+
+        if len(unique_tk_ids) > 0:
+            unique_tk_ids = np.unique(unique_tk_ids)
+            tk_id_map = np.nan * np.ones((np.max(unique_tk_ids) + 1))
+            tk_id_map[unique_tk_ids] = np.arange(len(unique_tk_ids))
+
+            data["tk_id_map"] = {}
+            for track_id in unique_tk_ids:
+                new_track_id = tk_id_map[track_id].astype(int)
+                data["tk_id_map"][new_track_id] = track_id
+
+            for t in range(raw_data["num_timesteps"]):
+                if len(data["tk_ids"][t]) > 0:
+                    data["tk_ids"][t] = tk_id_map[data["tk_ids"][t]].astype(int)
+                if len(data["tk_overlap_ids"][t]) > 0:
+                    data["tk_overlap_ids"][t] = tk_id_map[
+                        data["tk_overlap_ids"][t]
+                    ].astype(int)
+
+        # record overview statistics.
+        data["num_tk_cls_dets"] = num_tk_cls_dets
+        data["num_tk_overlap_dets"] = num_tk_overlap_dets
+        data["num_gt_dets"] = num_gt_dets
+        data["num_tk_ids"] = len(unique_tk_ids)
+        data["num_gt_ids"] = len(unique_gt_ids)
+        data["num_timesteps"] = raw_data["num_timesteps"]
+        data["seq"] = raw_data["seq"]
+
+        self._check_unique_ids(data)
+
+        return data
+
+    @_timing.time
+    def get_preprocessed_seq_data(
+        self, raw_data, cls, assignment=None, thresholds=[50, 75]
+    ):
+        """Preprocess data for a single sequence for a single class."""
+        data = {}
+        if thresholds is None:
+            thresholds = [50]
+        elif isinstance(thresholds, int):
+            thresholds = [thresholds]
+
+        for thr in thresholds:
+            assignment_thr = None
+            if assignment is not None:
+                assignment_thr = assignment[thr]
+            data[thr] = self.get_preprocessed_seq_data_thr(
+                raw_data, cls, assignment_thr
+            )
+
+        return data
+
+    def _calculate_similarities(self, gt_dets_t, tk_dets_t):
+        """Compute similarity scores."""
+        if self.use_mask:
+            similarity_scores = self._calculate_mask_ious(gt_dets_t, tk_dets_t, is_encoded=True, do_ioa=False)
+        else:
+            similarity_scores = self._calculate_box_ious(gt_dets_t, tk_dets_t)
+        return similarity_scores
+
+    def _merge_categories(self, annotations):
+        """Merges categories with a merged tag.
+
+        Adapted from https://github.com/TAO-Dataset.
+        """
+        merge_map = {}
+        for category in self.gt_data["categories"]:
+            if "merged" in category:
+                for to_merge in category["merged"]:
+                    merge_map[to_merge["id"]] = category["id"]
+
+        for ann in annotations:
+            ann["category_id"] = merge_map.get(ann["category_id"], ann["category_id"])
+
+    def _compute_vid_mappings(self, annotations):
+        """Computes mappings from videos to corresponding tracks and images."""
+        vids_to_tracks = {}
+        vids_to_imgs = {}
+        vid_ids = [vid["id"] for vid in self.gt_data["videos"]]
+
+        # compute an mapping from image IDs to images
+        images = {}
+        for image in self.gt_data["images"]:
+            images[image["id"]] = image
+
+        for ann in annotations:
+            ann["area"] = ann["bbox"][2] * ann["bbox"][3]
+
+            vid = ann["video_id"]
+            if ann["video_id"] not in vids_to_tracks.keys():
+                vids_to_tracks[ann["video_id"]] = list()
+            if ann["video_id"] not in vids_to_imgs.keys():
+                vids_to_imgs[ann["video_id"]] = list()
+
+            # fill in vids_to_tracks
+            tid = ann["track_id"]
+            exist_tids = [track["id"] for track in vids_to_tracks[vid]]
+            try:
+                index1 = exist_tids.index(tid)
+            except ValueError:
+                index1 = -1
+            if tid not in exist_tids:
+                curr_track = {
+                    "id": tid,
+                    "category_id": ann["category_id"],
+                    "video_id": vid,
+                    "annotations": [ann],
+                }
+                vids_to_tracks[vid].append(curr_track)
+            else:
+                vids_to_tracks[vid][index1]["annotations"].append(ann)
+
+            # fill in vids_to_imgs
+            img_id = ann["image_id"]
+            exist_img_ids = [img["id"] for img in vids_to_imgs[vid]]
+            try:
+                index2 = exist_img_ids.index(img_id)
+            except ValueError:
+                index2 = -1
+            if index2 == -1:
+                curr_img = {"id": img_id, "annotations": [ann]}
+                vids_to_imgs[vid].append(curr_img)
+            else:
+                vids_to_imgs[vid][index2]["annotations"].append(ann)
+
+        # sort annotations by frame index and compute track area
+        for vid, tracks in vids_to_tracks.items():
+            for track in tracks:
+                track["annotations"] = sorted(
+                    track["annotations"],
+                    key=lambda x: images[x["image_id"]]["frame_index"],
+                )
+                # compute average area
+                track["area"] = sum(x["area"] for x in track["annotations"]) / len(
+                    track["annotations"]
+                )
+
+        # ensure all videos are present
+        for vid_id in vid_ids:
+            if vid_id not in vids_to_tracks.keys():
+                vids_to_tracks[vid_id] = []
+            if vid_id not in vids_to_imgs.keys():
+                vids_to_imgs[vid_id] = []
+
+        return vids_to_tracks, vids_to_imgs
+
+    def _compute_image_to_timestep_mappings(self):
+        """Computes a mapping from images to timestep in sequence."""
+        images = {}
+        for image in self.gt_data["images"]:
+            images[image["id"]] = image
+
+        seq_to_imgs_to_timestep = {vid["id"]: dict() for vid in self.gt_data["videos"]}
+        for vid in seq_to_imgs_to_timestep:
+            curr_imgs = [img["id"] for img in self.video2gt_image[vid]]
+            curr_imgs = sorted(curr_imgs, key=lambda x: images[x]["frame_index"])
+            seq_to_imgs_to_timestep[vid] = {
+                curr_imgs[i]: i for i in range(len(curr_imgs))
+            }
+
+        return seq_to_imgs_to_timestep
+
+    def _limit_dets_per_image(self, annotations):
+        """Limits the number of detections for each image.
+
+        Adapted from https://github.com/TAO-Dataset/.
+        """
+        max_dets = self.config["MAX_DETECTIONS"]
+        img_ann = defaultdict(list)
+        for ann in annotations:
+            img_ann[ann["image_id"]].append(ann)
+
+        for img_id, _anns in img_ann.items():
+            if len(_anns) <= max_dets:
+                continue
+            _anns = sorted(_anns, key=lambda x: x["score"], reverse=True)
+            img_ann[img_id] = _anns[:max_dets]
+
+        return [ann for anns in img_ann.values() for ann in anns]
+
+    def _fill_video_ids_inplace(self, annotations):
+        """Fills in missing video IDs inplace.
+
+        Adapted from https://github.com/TAO-Dataset/.
+        """
+        missing_video_id = [x for x in annotations if "video_id" not in x]
+        if missing_video_id:
+            image_id_to_video_id = {
+                x["id"]: x["video_id"] for x in self.gt_data["images"]
+            }
+            for x in missing_video_id:
+                x["video_id"] = image_id_to_video_id[x["image_id"]]
+
+    @staticmethod
+    def _make_tk_ids_unique(annotations):
+        """Makes track IDs unqiue over the whole annotation set.
+
+        Adapted from https://github.com/TAO-Dataset/.
+        """
+        track_id_videos = {}
+        track_ids_to_update = set()
+        max_track_id = 0
+        for ann in annotations:
+            t = ann["track_id"]
+            if t not in track_id_videos:
+                track_id_videos[t] = ann["video_id"]
+
+            if ann["video_id"] != track_id_videos[t]:
+                # track id is assigned to multiple videos
+                track_ids_to_update.add(t)
+            max_track_id = max(max_track_id, t)
+
+        if track_ids_to_update:
+            print("true")
+            next_id = itertools.count(max_track_id + 1)
+            new_tk_ids = defaultdict(lambda: next(next_id))
+            for ann in annotations:
+                t = ann["track_id"]
+                v = ann["video_id"]
+                if t in track_ids_to_update:
+                    ann["track_id"] = new_tk_ids[t, v]
+        return len(track_ids_to_update)
diff --git a/sam3/eval/teta_eval_toolkit/eval.py b/sam3/eval/teta_eval_toolkit/eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..336f10b0f8978fc93c1fcce73314a8f0f3c19415
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/eval.py
@@ -0,0 +1,275 @@
+# fmt: off
+# flake8: noqa
+
+import copy
+import os
+import pickle
+import time
+import traceback
+from functools import partial
+from multiprocessing.pool import Pool
+
+import numpy as np
+
+from . import _timing, utils
+from .config import get_default_eval_config, init_config
+from .utils import TrackEvalException
+
+
+class Evaluator:
+    """Evaluator class for evaluating different metrics for each datasets."""
+
+    def __init__(self, config=None):
+        """Initialize the evaluator with a config file."""
+        self.config = init_config(config, get_default_eval_config(), "Eval")
+        # Only run timing analysis if not run in parallel.
+        if self.config["TIME_PROGRESS"] and not self.config["USE_PARALLEL"]:
+            _timing.DO_TIMING = True
+            if self.config["DISPLAY_LESS_PROGRESS"]:
+                _timing.DISPLAY_LESS_PROGRESS = True
+
+    @_timing.time
+    def evaluate(self, dataset_list, metrics_list):
+        """Evaluate a set of metrics on a set of datasets."""
+        config = self.config
+        metrics_list = metrics_list
+        metric_names = utils.validate_metrics_list(metrics_list)
+        dataset_names = [dataset.get_name() for dataset in dataset_list]
+        output_res = {}
+        output_msg = {}
+
+        for dataset, dname in zip(dataset_list, dataset_names):
+            # Get dataset info about what to evaluate
+            output_res[dname] = {}
+            output_msg[dname] = {}
+            tracker_list, seq_list, class_list = dataset.get_eval_info()
+            print(
+                f"\nEvaluating {len(tracker_list)} tracker(s) on "
+                f"{len(seq_list)} sequence(s) for {len(class_list)} class(es)"
+                f" on {dname} dataset using the following "
+                f'metrics: {", ".join(metric_names)}\n'
+            )
+
+            # Evaluate each tracker
+            for tracker in tracker_list:
+                try:
+                    output_res, output_msg = self.evaluate_tracker(
+                        tracker,
+                        dataset,
+                        dname,
+                        class_list,
+                        metrics_list,
+                        metric_names,
+                        seq_list,
+                        output_res,
+                        output_msg,
+                    )
+                except Exception as err:
+                    output_res[dname][tracker] = None
+                    if type(err) == TrackEvalException:
+                        output_msg[dname][tracker] = str(err)
+                    else:
+                        output_msg[dname][tracker] = "Unknown error occurred."
+                    print("Tracker %s was unable to be evaluated." % tracker)
+                    print(err)
+                    traceback.print_exc()
+                    if config["LOG_ON_ERROR"] is not None:
+                        with open(config["LOG_ON_ERROR"], "a") as f:
+                            print(dname, file=f)
+                            print(tracker, file=f)
+                            print(traceback.format_exc(), file=f)
+                            print("\n\n\n", file=f)
+                    if config["BREAK_ON_ERROR"]:
+                        raise err
+                    elif config["RETURN_ON_ERROR"]:
+                        return output_res, output_msg
+
+        return output_res, output_msg
+
+    def evaluate_tracker(
+        self,
+        tracker,
+        dataset,
+        dname,
+        class_list,
+        metrics_list,
+        metric_names,
+        seq_list,
+        output_res,
+        output_msg,
+    ):
+        """Evaluate each sequence in parallel or in series."""
+        print("\nEvaluating %s\n" % tracker)
+        time_start = time.time()
+        config = self.config
+        if config["USE_PARALLEL"]:
+            with Pool(config["NUM_PARALLEL_CORES"]) as pool:
+                _eval_sequence = partial(
+                    eval_sequence,
+                    dataset=dataset,
+                    tracker=tracker,
+                    class_list=class_list,
+                    metrics_list=metrics_list,
+                    metric_names=metric_names,
+                )
+                results = pool.map(_eval_sequence, seq_list)
+                res = dict(zip(seq_list, results))
+        else:
+            res = {}
+            for curr_seq in sorted(seq_list):
+                res[curr_seq] = eval_sequence(
+                    curr_seq, dataset, tracker, class_list, metrics_list, metric_names
+                )
+
+
+        # collecting combined cls keys (cls averaged, det averaged, super classes)
+        cls_keys = []
+        res["COMBINED_SEQ"] = {}
+        # combine sequences for each class
+        for c_cls in class_list:
+            res["COMBINED_SEQ"][c_cls] = {}
+            for metric, mname in zip(metrics_list, metric_names):
+                curr_res = {
+                    seq_key: seq_value[c_cls][mname]
+                    for seq_key, seq_value in res.items()
+                    if seq_key != "COMBINED_SEQ"
+                }
+                # combine results over all sequences and then over all classes
+                res["COMBINED_SEQ"][c_cls][mname] = metric.combine_sequences(curr_res)
+
+        # combine classes
+        if dataset.should_classes_combine:
+            if config["OUTPUT_PER_SEQ_RES"]:
+                video_keys = res.keys()
+            else:
+                video_keys = ["COMBINED_SEQ"]
+            for v_key in video_keys:
+                cls_keys += ["average"]
+                res[v_key]["average"] = {}
+                for metric, mname in zip(metrics_list, metric_names):
+                    cls_res = {
+                        cls_key: cls_value[mname]
+                        for cls_key, cls_value in res[v_key].items()
+                        if cls_key not in cls_keys
+                    }
+                    res[v_key]["average"][
+                        mname
+                    ] = metric.combine_classes_class_averaged(
+                        cls_res, ignore_empty=True
+                    )
+
+        # combine classes to super classes
+        if dataset.use_super_categories:
+            for cat, sub_cats in dataset.super_categories.items():
+                cls_keys.append(cat)
+                res["COMBINED_SEQ"][cat] = {}
+                for metric, mname in zip(metrics_list, metric_names):
+                    cat_res = {
+                        cls_key: cls_value[mname]
+                        for cls_key, cls_value in res["COMBINED_SEQ"].items()
+                        if cls_key in sub_cats
+                    }
+                    res["COMBINED_SEQ"][cat][
+                        mname
+                    ] = metric.combine_classes_det_averaged(cat_res)
+        # Print and output results in various formats
+        if config["TIME_PROGRESS"]:
+            print(
+                f"\nAll sequences for {tracker} finished in"
+                f" {time.time() - time_start} seconds"
+            )
+        output_fol = dataset.get_output_fol(tracker)
+        os.makedirs(output_fol, exist_ok=True)
+
+        # take a mean of each field of each thr
+        if config["OUTPUT_PER_SEQ_RES"]:
+            all_res = copy.deepcopy(res)
+            summary_keys = res.keys()
+        else:
+            all_res = copy.deepcopy(res["COMBINED_SEQ"])
+            summary_keys = ["COMBINED_SEQ"]
+        thr_key_list = [50]
+        for s_key in summary_keys:
+            for metric, mname in zip(metrics_list, metric_names):
+                if mname != "TETA":
+                    if s_key == "COMBINED_SEQ":
+                        metric.print_table(
+                            {"COMBINED_SEQ": res["COMBINED_SEQ"][cls_keys[0]][mname]},
+                            tracker,
+                            cls_keys[0],
+                        )
+                    continue
+
+                for c_cls in res[s_key].keys():
+                    for thr in thr_key_list:
+                        all_res[s_key][c_cls][mname][thr] = metric._summary_row(
+                            res[s_key][c_cls][mname][thr]
+                        )
+                    x = (
+                        np.array(list(all_res[s_key][c_cls]["TETA"].values()))
+                        .astype("float")
+                        .mean(axis=0)
+                    )
+                    all_res_summary = list(x.round(decimals=2).astype("str"))
+                    all_res[s_key][c_cls][mname]["ALL"] = all_res_summary
+                if config["OUTPUT_SUMMARY"] and s_key == "COMBINED_SEQ":
+                    for t in thr_key_list:
+                        metric.print_summary_table(
+                            all_res[s_key][cls_keys[0]][mname][t],
+                            t,
+                            tracker,
+                            cls_keys[0],
+                        )
+
+        if config["OUTPUT_TEM_RAW_DATA"]:
+            out_file = os.path.join(output_fol, "teta_summary_results.pth")
+            pickle.dump(all_res, open(out_file, "wb"))
+            print("Saved the TETA summary results.")
+
+        # output
+        output_res[dname][mname] = all_res[s_key][cls_keys[0]][mname][t]
+        output_msg[dname][tracker] = "Success"
+
+        return output_res, output_msg
+
+
+@_timing.time
+def eval_sequence(seq, dataset, tracker, class_list, metrics_list, metric_names):
+    """Function for evaluating a single sequence."""
+    raw_data = dataset.get_raw_seq_data(tracker, seq)
+    seq_res = {}
+
+    if "TETA" in metric_names:
+        thresholds = [50]
+        data_all_class = dataset.get_preprocessed_seq_data(
+            raw_data, "all", thresholds=thresholds
+        )
+        teta = metrics_list[metric_names.index("TETA")]
+        assignment = teta.compute_global_assignment(data_all_class)
+
+        # create a dict to save Cls_FP for each class in different thr.
+        cls_fp = {
+            key: {
+                cls: np.zeros((len(np.arange(0.5, 0.99, 0.05)))) for cls in class_list
+            }
+            for key in thresholds
+        }
+
+    for cls in class_list:
+        seq_res[cls] = {}
+        data = dataset.get_preprocessed_seq_data(raw_data, cls, assignment, thresholds)
+
+        for metric, mname in zip(metrics_list, metric_names):
+            if mname == "TETA":
+                seq_res[cls][mname], cls_fp, _ = metric.eval_sequence(
+                    data, cls, dataset.clsid2cls_name, cls_fp
+                )
+            else:
+                seq_res[cls][mname] = metric.eval_sequence(data)
+
+    if "TETA" in metric_names:
+        for thr in thresholds:
+            for cls in class_list:
+                seq_res[cls]["TETA"][thr]["Cls_FP"] += cls_fp[thr][cls]
+
+    return seq_res
diff --git a/sam3/eval/teta_eval_toolkit/metrics/__init__.py b/sam3/eval/teta_eval_toolkit/metrics/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8352cd45990e70e14d76ad053018e48c65e9f994
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/metrics/__init__.py
@@ -0,0 +1,4 @@
+# fmt: off
+# flake8: noqa
+
+from .teta import TETA
diff --git a/sam3/eval/teta_eval_toolkit/metrics/_base_metric.py b/sam3/eval/teta_eval_toolkit/metrics/_base_metric.py
new file mode 100644
index 0000000000000000000000000000000000000000..521a76d72bcaac84042fa24413c377214c1086a9
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/metrics/_base_metric.py
@@ -0,0 +1,148 @@
+# fmt: off
+# flake8: noqa
+
+from abc import ABC, abstractmethod
+
+import numpy as np
+
+from .. import _timing
+from ..utils import TrackEvalException
+
+
+class _BaseMetric(ABC):
+    @abstractmethod
+    def __init__(self):
+        self.plottable = False
+        self.integer_fields = []
+        self.float_fields = []
+        self.array_labels = []
+        self.integer_array_fields = []
+        self.float_array_fields = []
+        self.fields = []
+        self.summary_fields = []
+        self.registered = False
+
+    #####################################################################
+    # Abstract functions for subclasses to implement
+
+    @_timing.time
+    @abstractmethod
+    def eval_sequence(self, data):
+        ...
+
+    @abstractmethod
+    def combine_sequences(self, all_res):
+        ...
+
+    @abstractmethod
+    def combine_classes_class_averaged(self, all_res, ignore_empty=False):
+        ...
+
+    @abstractmethod
+    def combine_classes_det_averaged(self, all_res):
+        ...
+
+    def plot_single_tracker_results(self, all_res, tracker, output_folder, cls):
+        """Plot results, only valid for metrics with self.plottable."""
+        if self.plottable:
+            raise NotImplementedError(
+                f"plot_results is not implemented for metric {self.get_name()}"
+            )
+        else:
+            pass
+
+    #####################################################################
+    # Helper functions which are useful for all metrics:
+
+    @classmethod
+    def get_name(cls):
+        return cls.__name__
+
+    @staticmethod
+    def _combine_sum(all_res, field):
+        """Combine sequence results via sum"""
+        return sum([all_res[k][field] for k in all_res.keys()])
+
+    @staticmethod
+    def _combine_weighted_av(all_res, field, comb_res, weight_field):
+        """Combine sequence results via weighted average."""
+        return sum(
+            [all_res[k][field] * all_res[k][weight_field] for k in all_res.keys()]
+        ) / np.maximum(1.0, comb_res[weight_field])
+
+    def print_table(self, table_res, tracker, cls):
+        """Print table of results for all sequences."""
+        print("")
+        metric_name = self.get_name()
+        self._row_print(
+            [metric_name + ": " + tracker + "-" + cls] + self.summary_fields
+        )
+        for seq, results in sorted(table_res.items()):
+            if seq == "COMBINED_SEQ":
+                continue
+            summary_res = self._summary_row(results)
+            self._row_print([seq] + summary_res)
+        summary_res = self._summary_row(table_res["COMBINED_SEQ"])
+        self._row_print(["COMBINED"] + summary_res)
+
+    def _summary_row(self, results_):
+        vals = []
+        for h in self.summary_fields:
+            if h in self.float_array_fields:
+                vals.append("{0:1.5g}".format(100 * np.mean(results_[h])))
+            elif h in self.float_fields:
+                vals.append("{0:1.5g}".format(100 * float(results_[h])))
+            elif h in self.integer_fields:
+                vals.append("{0:d}".format(int(results_[h])))
+            else:
+                raise NotImplementedError(
+                    "Summary function not implemented for this field type."
+                )
+        return vals
+
+    @staticmethod
+    def _row_print(*argv):
+        """Print results in evenly spaced rows, with more space in first row."""
+        if len(argv) == 1:
+            argv = argv[0]
+        to_print = "%-35s" % argv[0]
+        for v in argv[1:]:
+            to_print += "%-10s" % str(v)
+        print(to_print)
+
+    def summary_results(self, table_res):
+        """Return a simple summary of final results for a tracker."""
+        return dict(
+            zip(self.summary_fields, self._summary_row(table_res["COMBINED_SEQ"]),)
+        )
+
+    def detailed_results(self, table_res):
+        """Return detailed final results for a tracker."""
+        # Get detailed field information
+        detailed_fields = self.float_fields + self.integer_fields
+        for h in self.float_array_fields + self.integer_array_fields:
+            for alpha in [int(100 * x) for x in self.array_labels]:
+                detailed_fields.append(h + "___" + str(alpha))
+            detailed_fields.append(h + "___AUC")
+
+        # Get detailed results
+        detailed_results = {}
+        for seq, res in table_res.items():
+            detailed_row = self._detailed_row(res)
+            if len(detailed_row) != len(detailed_fields):
+                raise TrackEvalException(
+                    f"Field names and data have different sizes "
+                    f"({len(detailed_row)} and {len(detailed_fields)})"
+                )
+            detailed_results[seq] = dict(zip(detailed_fields, detailed_row))
+        return detailed_results
+
+    def _detailed_row(self, res):
+        detailed_row = []
+        for h in self.float_fields + self.integer_fields:
+            detailed_row.append(res[h])
+        for h in self.float_array_fields + self.integer_array_fields:
+            for i, _ in enumerate([int(100 * x) for x in self.array_labels]):
+                detailed_row.append(res[h][i])
+            detailed_row.append(np.mean(res[h]))
+        return detailed_row
diff --git a/sam3/eval/teta_eval_toolkit/metrics/teta.py b/sam3/eval/teta_eval_toolkit/metrics/teta.py
new file mode 100644
index 0000000000000000000000000000000000000000..329626fbeb56dc4ce82ac813e1fc69c2d74080f6
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/metrics/teta.py
@@ -0,0 +1,399 @@
+# fmt: off
+# flake8: noqa
+
+"""Track Every Thing Accuracy metric."""
+
+import numpy as np
+from scipy.optimize import linear_sum_assignment
+
+from .. import _timing
+from ._base_metric import _BaseMetric
+
+EPS = np.finfo("float").eps  # epsilon
+
+
+class TETA(_BaseMetric):
+    """TETA metric."""
+
+    def __init__(self, exhaustive=False, config=None):
+        """Initialize metric."""
+        super().__init__()
+        self.plottable = True
+        self.array_labels = np.arange(0.0, 0.99, 0.05)
+        self.cls_array_labels = np.arange(0.5, 0.99, 0.05)
+
+        self.integer_array_fields = [
+            "Loc_TP",
+            "Loc_FN",
+            "Loc_FP",
+            "Cls_TP",
+            "Cls_FN",
+            "Cls_FP",
+        ]
+        self.float_array_fields = (
+            ["TETA", "LocA", "AssocA", "ClsA"]
+            + ["LocRe", "LocPr"]
+            + ["AssocRe", "AssocPr"]
+            + ["ClsRe", "ClsPr"]
+        )
+        self.fields = self.float_array_fields + self.integer_array_fields
+        self.summary_fields = self.float_array_fields
+        self.exhaustive = exhaustive
+
+    def compute_global_assignment(self, data_thr, alpha=0.5):
+        """Compute global assignment of TP."""
+        res = {
+            thr: {t: {} for t in range(data_thr[thr]["num_timesteps"])}
+            for thr in data_thr
+        }
+
+        for thr in data_thr:
+            data = data_thr[thr]
+            # return empty result if tracker or gt sequence is empty
+            if data["num_tk_overlap_dets"] == 0 or data["num_gt_dets"] == 0:
+                return res
+
+            # global alignment score
+            ga_score, _, _ = self.compute_global_alignment_score(data)
+
+            # calculate scores for each timestep
+            for t, (gt_ids_t, tk_ids_t) in enumerate(
+                zip(data["gt_ids"], data["tk_ids"])
+            ):
+                # get matches optimizing for TETA
+                amatch_rows, amatch_cols = self.compute_matches(
+                    data, t, ga_score, gt_ids_t, tk_ids_t, alpha=alpha
+                )
+                gt_ids = [data["gt_id_map"][tid] for tid in gt_ids_t[amatch_rows[0]]]
+                matched_ids = [
+                    data["tk_id_map"][tid] for tid in tk_ids_t[amatch_cols[0]]
+                ]
+                res[thr][t] = dict(zip(gt_ids, matched_ids))
+
+        return res
+
+    def eval_sequence_single_thr(self, data, cls, cid2clsname, cls_fp_thr, thr):
+        """Computes TETA metric for one threshold for one sequence."""
+        res = {}
+        class_info_list = []
+        for field in self.float_array_fields + self.integer_array_fields:
+            if field.startswith("Cls"):
+                res[field] = np.zeros(len(self.cls_array_labels), dtype=float)
+            else:
+                res[field] = np.zeros((len(self.array_labels)), dtype=float)
+
+        # return empty result if tracker or gt sequence is empty
+        if data["num_tk_overlap_dets"] == 0:
+            res["Loc_FN"] = data["num_gt_dets"] * np.ones(
+                (len(self.array_labels)), dtype=float
+            )
+            if self.exhaustive:
+                cls_fp_thr[cls] = data["num_tk_cls_dets"] * np.ones(
+                    (len(self.cls_array_labels)), dtype=float
+                )
+            res = self._compute_final_fields(res)
+            return res, cls_fp_thr, class_info_list
+
+        if data["num_gt_dets"] == 0:
+            if self.exhaustive:
+                cls_fp_thr[cls] = data["num_tk_cls_dets"] * np.ones(
+                    (len(self.cls_array_labels)), dtype=float
+                )
+            res = self._compute_final_fields(res)
+            return res, cls_fp_thr, class_info_list
+
+        # global alignment score
+        ga_score, gt_id_count, tk_id_count = self.compute_global_alignment_score(data)
+        matches_counts = [np.zeros_like(ga_score) for _ in self.array_labels]
+
+        # calculate scores for each timestep
+        for t, (gt_ids_t, tk_ids_t, tk_overlap_ids_t, tk_cls_ids_t) in enumerate(
+            zip(
+                data["gt_ids"],
+                data["tk_ids"],
+                data["tk_overlap_ids"],
+                data["tk_class_eval_tk_ids"],
+            )
+        ):
+            # deal with the case that there are no gt_det/tk_det in a timestep
+            if len(gt_ids_t) == 0:
+                if self.exhaustive:
+                    cls_fp_thr[cls] += len(tk_cls_ids_t)
+                continue
+
+            # get matches optimizing for TETA
+            amatch_rows, amatch_cols = self.compute_matches(
+                data, t, ga_score, gt_ids_t, tk_ids_t, list(self.array_labels)
+            )
+
+            # map overlap_ids to original ids.
+            if len(tk_overlap_ids_t) != 0:
+                sorter = np.argsort(tk_ids_t)
+                indexes = sorter[
+                    np.searchsorted(tk_ids_t, tk_overlap_ids_t, sorter=sorter)
+                ]
+                sim_t = data["sim_scores"][t][:, indexes]
+                fpl_candidates = tk_overlap_ids_t[(sim_t >= (thr / 100)).any(axis=0)]
+                fpl_candidates_ori_ids_t = np.array(
+                    [data["tk_id_map"][tid] for tid in fpl_candidates]
+                )
+            else:
+                fpl_candidates_ori_ids_t = []
+
+            if self.exhaustive:
+                cls_fp_thr[cls] += len(tk_cls_ids_t) - len(tk_overlap_ids_t)
+
+            # calculate and accumulate basic statistics
+            for a, alpha in enumerate(self.array_labels):
+                match_row, match_col = amatch_rows[a], amatch_cols[a]
+                num_matches = len(match_row)
+                matched_ori_ids = set(
+                    [data["tk_id_map"][tid] for tid in tk_ids_t[match_col]]
+                )
+                match_tk_cls = data["tk_classes"][t][match_col]
+                wrong_tk_cls = match_tk_cls[match_tk_cls != data["gt_classes"][t]]
+
+                num_class_and_det_matches = np.sum(
+                    match_tk_cls == data["gt_classes"][t]
+                )
+
+                if alpha >= 0.5:
+                    for cid in wrong_tk_cls:
+                        if cid in cid2clsname:
+                            cname = cid2clsname[cid]
+                            cls_fp_thr[cname][a - 10] += 1
+                    res["Cls_TP"][a - 10] += num_class_and_det_matches
+                    res["Cls_FN"][a - 10] += num_matches - num_class_and_det_matches
+
+                res["Loc_TP"][a] += num_matches
+                res["Loc_FN"][a] += len(gt_ids_t) - num_matches
+                res["Loc_FP"][a] += len(set(fpl_candidates_ori_ids_t) - matched_ori_ids)
+
+                if num_matches > 0:
+                    matches_counts[a][gt_ids_t[match_row], tk_ids_t[match_col]] += 1
+
+        # calculate AssocA, AssocRe, AssocPr
+        self.compute_association_scores(res, matches_counts, gt_id_count, tk_id_count)
+
+        # calculate final scores
+        res = self._compute_final_fields(res)
+        return res, cls_fp_thr, class_info_list
+
+    def compute_global_alignment_score(self, data):
+        """Computes global alignment score."""
+        num_matches = np.zeros((data["num_gt_ids"], data["num_tk_ids"]))
+        gt_id_count = np.zeros((data["num_gt_ids"], 1))
+        tk_id_count = np.zeros((1, data["num_tk_ids"]))
+
+        # loop through each timestep and accumulate global track info.
+        for t, (gt_ids_t, tk_ids_t) in enumerate(zip(data["gt_ids"], data["tk_ids"])):
+            # count potential matches between ids in each time step
+            # these are normalized, weighted by match similarity
+            sim = data["sim_scores"][t]
+            sim_iou_denom = sim.sum(0, keepdims=True) + sim.sum(1, keepdims=True) - sim
+            sim_iou = np.zeros_like(sim)
+            mask = sim_iou_denom > (0 + EPS)
+            sim_iou[mask] = sim[mask] / sim_iou_denom[mask]
+            num_matches[gt_ids_t[:, None], tk_ids_t[None, :]] += sim_iou
+
+            # calculate total number of dets for each gt_id and tk_id.
+            gt_id_count[gt_ids_t] += 1
+            tk_id_count[0, tk_ids_t] += 1
+
+        # Calculate overall Jaccard alignment score between IDs
+        ga_score = num_matches / (gt_id_count + tk_id_count - num_matches)
+        return ga_score, gt_id_count, tk_id_count
+
+    def compute_matches(self, data, t, ga_score, gt_ids, tk_ids, alpha):
+        """Compute matches based on alignment score."""
+        sim = data["sim_scores"][t]
+        score_mat = ga_score[gt_ids[:, None], tk_ids[None, :]] * sim
+        # Hungarian algorithm to find best matches
+        match_rows, match_cols = linear_sum_assignment(-score_mat)
+
+        if not isinstance(alpha, list):
+            alpha = [alpha]
+        alpha_match_rows, alpha_match_cols = [], []
+        for a in alpha:
+            matched_mask = sim[match_rows, match_cols] >= a - EPS
+            alpha_match_rows.append(match_rows[matched_mask])
+            alpha_match_cols.append(match_cols[matched_mask])
+        return alpha_match_rows, alpha_match_cols
+
+    def compute_association_scores(self, res, matches_counts, gt_id_count, tk_id_count):
+        """Calculate association scores for each alpha.
+
+        First calculate scores per gt_id/tk_id combo,
+        and then average over the number of detections.
+        """
+        for a, _ in enumerate(self.array_labels):
+            matches_count = matches_counts[a]
+            ass_a = matches_count / np.maximum(
+                1, gt_id_count + tk_id_count - matches_count
+            )
+            res["AssocA"][a] = np.sum(matches_count * ass_a) / np.maximum(
+                1, res["Loc_TP"][a]
+            )
+            ass_re = matches_count / np.maximum(1, gt_id_count)
+            res["AssocRe"][a] = np.sum(matches_count * ass_re) / np.maximum(
+                1, res["Loc_TP"][a]
+            )
+            ass_pr = matches_count / np.maximum(1, tk_id_count)
+            res["AssocPr"][a] = np.sum(matches_count * ass_pr) / np.maximum(
+                1, res["Loc_TP"][a]
+            )
+
+    @_timing.time
+    def eval_sequence(self, data, cls, cls_id_name_mapping, cls_fp):
+        """Evaluate a single sequence across all thresholds."""
+        res = {}
+        class_info_dict = {}
+
+        for thr in data:
+            res[thr], cls_fp[thr], cls_info = self.eval_sequence_single_thr(
+                data[thr], cls, cls_id_name_mapping, cls_fp[thr], thr
+            )
+            class_info_dict[thr] = cls_info
+
+        return res, cls_fp, class_info_dict
+
+    def combine_sequences(self, all_res):
+        """Combines metrics across all sequences."""
+        data = {}
+        res = {}
+
+        if all_res:
+            thresholds = list(list(all_res.values())[0].keys())
+        else:
+            thresholds = [50]
+        for thr in thresholds:
+            data[thr] = {}
+            for seq_key in all_res:
+                data[thr][seq_key] = all_res[seq_key][thr]
+        for thr in thresholds:
+            res[thr] = self._combine_sequences_thr(data[thr])
+
+        return res
+
+    def _combine_sequences_thr(self, all_res):
+        """Combines sequences over each threshold."""
+        res = {}
+        for field in self.integer_array_fields:
+            res[field] = self._combine_sum(all_res, field)
+        for field in ["AssocRe", "AssocPr", "AssocA"]:
+            res[field] = self._combine_weighted_av(
+                all_res, field, res, weight_field="Loc_TP"
+            )
+        res = self._compute_final_fields(res)
+        return res
+
+    def combine_classes_class_averaged(self, all_res, ignore_empty=False):
+        """Combines metrics across all classes by averaging over classes.
+
+        If 'ignore_empty' is True, then it only sums over classes
+        with at least one gt or predicted detection.
+        """
+        data = {}
+        res = {}
+        if all_res:
+            thresholds = list(list(all_res.values())[0].keys())
+        else:
+            thresholds = [50]
+        for thr in thresholds:
+            data[thr] = {}
+            for cls_key in all_res:
+                data[thr][cls_key] = all_res[cls_key][thr]
+        for thr in data:
+            res[thr] = self._combine_classes_class_averaged_thr(
+                data[thr], ignore_empty=ignore_empty
+            )
+        return res
+
+    def _combine_classes_class_averaged_thr(self, all_res, ignore_empty=False):
+        """Combines classes over each threshold."""
+        res = {}
+
+        def check_empty(val):
+            """Returns True if empty."""
+            return not (val["Loc_TP"] + val["Loc_FN"] + val["Loc_FP"] > 0 + EPS).any()
+
+        for field in self.integer_array_fields:
+            if ignore_empty:
+                res_field = {k: v for k, v in all_res.items() if not check_empty(v)}
+            else:
+                res_field = {k: v for k, v in all_res.items()}
+            res[field] = self._combine_sum(res_field, field)
+
+        for field in self.float_array_fields:
+            if ignore_empty:
+                res_field = [v[field] for v in all_res.values() if not check_empty(v)]
+            else:
+                res_field = [v[field] for v in all_res.values()]
+            res[field] = np.mean(res_field, axis=0)
+        return res
+
+    def combine_classes_det_averaged(self, all_res):
+        """Combines metrics across all classes by averaging over detections."""
+        data = {}
+        res = {}
+        if all_res:
+            thresholds = list(list(all_res.values())[0].keys())
+        else:
+            thresholds = [50]
+        for thr in thresholds:
+            data[thr] = {}
+            for cls_key in all_res:
+                data[thr][cls_key] = all_res[cls_key][thr]
+        for thr in data:
+            res[thr] = self._combine_classes_det_averaged_thr(data[thr])
+        return res
+
+    def _combine_classes_det_averaged_thr(self, all_res):
+        """Combines detections over each threshold."""
+        res = {}
+        for field in self.integer_array_fields:
+            res[field] = self._combine_sum(all_res, field)
+        for field in ["AssocRe", "AssocPr", "AssocA"]:
+            res[field] = self._combine_weighted_av(
+                all_res, field, res, weight_field="Loc_TP"
+            )
+        res = self._compute_final_fields(res)
+        return res
+
+    @staticmethod
+    def _compute_final_fields(res):
+        """Calculate final metric values.
+
+        This function is used both for both per-sequence calculation,
+        and in combining values across sequences.
+        """
+        # LocA
+        res["LocRe"] = res["Loc_TP"] / np.maximum(1, res["Loc_TP"] + res["Loc_FN"])
+        res["LocPr"] = res["Loc_TP"] / np.maximum(1, res["Loc_TP"] + res["Loc_FP"])
+        res["LocA"] = res["Loc_TP"] / np.maximum(
+            1, res["Loc_TP"] + res["Loc_FN"] + res["Loc_FP"]
+        )
+
+        # ClsA
+        res["ClsRe"] = res["Cls_TP"] / np.maximum(1, res["Cls_TP"] + res["Cls_FN"])
+        res["ClsPr"] = res["Cls_TP"] / np.maximum(1, res["Cls_TP"] + res["Cls_FP"])
+        res["ClsA"] = res["Cls_TP"] / np.maximum(
+            1, res["Cls_TP"] + res["Cls_FN"] + res["Cls_FP"]
+        )
+
+        res["ClsRe"] = np.mean(res["ClsRe"])
+        res["ClsPr"] = np.mean(res["ClsPr"])
+        res["ClsA"] = np.mean(res["ClsA"])
+
+        res["TETA"] = (res["LocA"] + res["AssocA"] + res["ClsA"]) / 3
+
+        return res
+
+    def print_summary_table(self, thr_res, thr, tracker, cls):
+        """Prints summary table of results."""
+        print("")
+        metric_name = self.get_name()
+        self._row_print(
+            [f"{metric_name}{str(thr)}: {tracker}-{cls}"] + self.summary_fields
+        )
+        self._row_print(["COMBINED"] + thr_res)
diff --git a/sam3/eval/teta_eval_toolkit/utils.py b/sam3/eval/teta_eval_toolkit/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa688e777afd3ed774ae200ff355b3418b1d389f
--- /dev/null
+++ b/sam3/eval/teta_eval_toolkit/utils.py
@@ -0,0 +1,46 @@
+# fmt: off
+# flake8: noqa
+
+import csv
+import os
+from collections import OrderedDict
+
+
+def validate_metrics_list(metrics_list):
+    """Get names of metric class and ensures they are unique, further checks that the fields within each metric class
+    do not have overlapping names.
+    """
+    metric_names = [metric.get_name() for metric in metrics_list]
+    # check metric names are unique
+    if len(metric_names) != len(set(metric_names)):
+        raise TrackEvalException(
+            "Code being run with multiple metrics of the same name"
+        )
+    fields = []
+    for m in metrics_list:
+        fields += m.fields
+    # check metric fields are unique
+    if len(fields) != len(set(fields)):
+        raise TrackEvalException(
+            "Code being run with multiple metrics with fields of the same name"
+        )
+    return metric_names
+
+
+def get_track_id_str(ann):
+    """Get name of track ID in annotation."""
+    if "track_id" in ann:
+        tk_str = "track_id"
+    elif "instance_id" in ann:
+        tk_str = "instance_id"
+    elif "scalabel_id" in ann:
+        tk_str = "scalabel_id"
+    else:
+        assert False, "No track/instance ID."
+    return tk_str
+
+
+class TrackEvalException(Exception):
+    """Custom exception for catching expected errors."""
+
+    ...
diff --git a/sam3/eval/ytvis_coco_wrapper.py b/sam3/eval/ytvis_coco_wrapper.py
new file mode 100644
index 0000000000000000000000000000000000000000..ced58993028984b3c7c51b84279afc01099e2de6
--- /dev/null
+++ b/sam3/eval/ytvis_coco_wrapper.py
@@ -0,0 +1,146 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+# (c) Meta Platforms, Inc. and affiliates. Confidential and proprietary.
+
+import copy
+import json
+import logging
+
+import numpy as np
+import pycocotools.mask as mask_util
+from pycocotools.coco import COCO
+from typing_extensions import override
+
+
+class YTVIS(COCO):
+    """
+    Helper class for reading YT-VIS annotations
+    """
+
+    @override
+    def __init__(self, annotation_file: str = None, ignore_gt_cats: bool = True):
+        """
+        Args:
+            annotation_file: Path to the annotation file
+            ignore_gt_cats: If True, we ignore the ground truth categories and replace them with a dummy "object" category. This is useful for Phrase AP evaluation.
+        """
+        self.ignore_gt_cats = ignore_gt_cats
+        super().__init__(annotation_file=annotation_file)
+
+    @override
+    def createIndex(self):
+        # We rename some keys to match the COCO format before creating the index.
+        if "annotations" in self.dataset:
+            for ann in self.dataset["annotations"]:
+                if "video_id" in ann:
+                    ann["image_id"] = int(ann.pop("video_id"))
+                if self.ignore_gt_cats:
+                    ann["category_id"] = -1
+                else:
+                    ann["category_id"] = int(ann["category_id"])
+                if "bboxes" in ann:
+                    # note that in some datasets we load under this YTVIS class,
+                    # some "bboxes" could be None for when the GT object is invisible,
+                    # so we replace them with [0, 0, 0, 0]
+                    ann["bboxes"] = [
+                        bbox if bbox is not None else [0, 0, 0, 0]
+                        for bbox in ann["bboxes"]
+                    ]
+                if "areas" in ann:
+                    # similar to "bboxes", some areas could be None for when the GT
+                    # object is invisible, so we replace them with 0
+                    areas = [a if a is not None else 0 for a in ann["areas"]]
+                    # Compute average area of tracklet
+                    ann["area"] = np.mean(areas)
+        if "videos" in self.dataset:
+            for vid in self.dataset["videos"]:
+                vid["id"] = int(vid["id"])
+            self.dataset["images"] = self.dataset.pop("videos")
+
+        if self.ignore_gt_cats:
+            self.dataset["categories"] = [
+                {"supercategory": "object", "id": -1, "name": "object"}
+            ]
+        else:
+            for cat in self.dataset["categories"]:
+                cat["id"] = int(cat["id"])
+        super().createIndex()
+
+    @override
+    def getAnnIds(self, imgIds=[], catIds=[], areaRng=[], iscrowd=None):
+        if len(areaRng) > 0:
+            logging.warning(
+                "Note that we filter out objects based on their *average* area across the video, not per frame area"
+            )
+
+        return super().getAnnIds(imgIds=imgIds, catIds=catIds, iscrowd=iscrowd)
+
+    @override
+    def showAnns(self, anns, draw_bbox=False):
+        raise NotImplementedError("Showing annotations is not supported")
+
+    @override
+    def loadRes(self, resFile):
+        # Adapted from COCO.loadRes to support tracklets/masklets
+        res = YTVIS(ignore_gt_cats=self.ignore_gt_cats)
+        res.dataset["images"] = [img for img in self.dataset["images"]]
+
+        if type(resFile) == str:
+            with open(resFile) as f:
+                anns = json.load(f)
+        elif type(resFile) == np.ndarray:
+            anns = self.loadNumpyAnnotations(resFile)
+        else:
+            anns = resFile
+        assert type(anns) == list, "results is not an array of objects"
+        annsImgIds = [ann["image_id"] for ann in anns]
+        assert set(annsImgIds) == (
+            set(annsImgIds) & set(self.getImgIds())
+        ), "Results do not correspond to current coco set"
+        if "bboxes" in anns[0] and not anns[0]["bboxes"] == []:
+            res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+            for id, ann in enumerate(anns):
+                bbs = [(bb if bb is not None else [0, 0, 0, 0]) for bb in ann["bboxes"]]
+                xxyy = [[bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] for bb in bbs]
+                if not "segmentations" in ann:
+                    ann["segmentations"] = [
+                        [[x1, y1, x1, y2, x2, y2, x2, y1]] for (x1, x2, y1, y2) in xxyy
+                    ]
+                ann["areas"] = [bb[2] * bb[3] for bb in bbs]
+                # NOTE: We also compute average area of a tracklet across video, allowing us to compute area based mAP.
+                ann["area"] = np.mean(ann["areas"])
+                ann["id"] = id + 1
+                ann["iscrowd"] = 0
+        elif "segmentations" in anns[0]:
+            res.dataset["categories"] = copy.deepcopy(self.dataset["categories"])
+            for id, ann in enumerate(anns):
+                ann["bboxes"] = [
+                    mask_util.toBbox(segm) for segm in ann["segmentations"]
+                ]
+                if "areas" not in ann:
+                    ann["areas"] = [
+                        mask_util.area(segm) for segm in ann["segmentations"]
+                    ]
+                # NOTE: We also compute average area of a tracklet across video, allowing us to compute area based mAP.
+                ann["area"] = np.mean(ann["areas"])
+                ann["id"] = id + 1
+                ann["iscrowd"] = 0
+
+        res.dataset["annotations"] = anns
+        res.createIndex()
+        return res
+
+    @override
+    def download(self, tarDir=None, imgIds=[]):
+        raise NotImplementedError
+
+    @override
+    def loadNumpyAnnotations(self, data):
+        raise NotImplementedError("We don't support numpy annotations for now")
+
+    @override
+    def annToRLE(self, ann):
+        raise NotImplementedError("We expect masks to be already in RLE format")
+
+    @override
+    def annToMask(self, ann):
+        raise NotImplementedError("We expect masks to be already in RLE format")
diff --git a/sam3/eval/ytvis_eval.py b/sam3/eval/ytvis_eval.py
new file mode 100644
index 0000000000000000000000000000000000000000..93f1cd6cda8e3d49fc50eb578ac62353c1bb713f
--- /dev/null
+++ b/sam3/eval/ytvis_eval.py
@@ -0,0 +1,411 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+import copy
+import gc
+import logging
+import os
+from collections import defaultdict
+from operator import xor
+from pathlib import Path
+from typing import List, Optional
+
+import numpy as np
+import pycocotools.mask as mask_util
+import torch
+from pycocotools.cocoeval import COCOeval
+from sam3.eval.cgf1_eval import CGF1Eval
+from sam3.eval.coco_eval_offline import convert_to_xywh
+from sam3.model.box_ops import box_xywh_inter_union
+from sam3.train.masks_ops import rle_encode
+from sam3.train.utils import distributed as dist
+from typing_extensions import override
+
+try:
+    import rapidjson as json
+except ModuleNotFoundError:
+    import json
+
+from iopath.common.file_io import g_pathmgr
+
+
+class YTVISevalMixin:
+    """
+    Identical to COCOeval but adapts computeIoU to compute IoU between tracklets/masklets.
+    """
+
+    @override
+    def _prepare(self):
+        """
+        Copied from cocoeval.py but doesn't convert masks to RLEs (we assume they already are RLEs)
+        """
+        p = self.params
+        if p.useCats:
+            gts = self.cocoGt.loadAnns(
+                self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)
+            )
+            dts = self.cocoDt.loadAnns(
+                self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds)
+            )
+        else:
+            gts = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
+            dts = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
+
+        # set ignore flag
+        for gt in gts:
+            gt["ignore"] = gt["ignore"] if "ignore" in gt else 0
+            gt["ignore"] = "iscrowd" in gt and gt["iscrowd"]
+            if p.iouType == "keypoints":
+                gt["ignore"] = (gt["num_keypoints"] == 0) or gt["ignore"]
+        self._gts = defaultdict(list)  # gt for evaluation
+        self._dts = defaultdict(list)  # dt for evaluation
+        for gt in gts:
+            self._gts[gt["image_id"], gt["category_id"]].append(gt)
+        for dt in dts:
+            self._dts[dt["image_id"], dt["category_id"]].append(dt)
+        self.evalImgs = defaultdict(list)  # per-image per-category evaluation results
+        self.eval = {}  # accumulated evaluation results
+
+    def computeIoU(self, imgId, catId):
+        """
+        Compute IoU between tracklets. Copied from cocoeval.py but adapted for videos (in YT-VIS format)
+        """
+        p = self.params
+        if p.useCats:
+            gt = self._gts[imgId, catId]
+            dt = self._dts[imgId, catId]
+        else:
+            gt = [_ for cId in p.catIds for _ in self._gts[imgId, cId]]
+            dt = [_ for cId in p.catIds for _ in self._dts[imgId, cId]]
+        if len(gt) == 0 or len(dt) == 0:
+            return []
+
+        # For class mAP and phrase AP evaluation, we sort the detections in descending order of scores (as in COCOeval).
+        # For demo F1 evaluation, we DO NOT sort the detections (but match them with GTs via Hungarian matching).
+        assert hasattr(self, "sort_inds_by_scores_in_iou"), (
+            "subclasses that inherits YTVISevalMixin should set `self.sort_inds_by_scores_in_iou` "
+            "(True for class mAP and phrase AP, False for demo F1)"
+        )
+        if self.sort_inds_by_scores_in_iou:
+            inds = np.argsort([-d["score"] for d in dt], kind="mergesort")
+            dt = [dt[i] for i in inds]
+            if len(dt) > p.maxDets[-1]:
+                dt = dt[0 : p.maxDets[-1]]
+
+        if p.iouType == "segm":
+            g = [g["segmentations"] for g in gt]
+            d = [d["segmentations"] for d in dt]
+        elif p.iouType == "bbox":
+            g = [g["bboxes"] for g in gt]
+            d = [d["bboxes"] for d in dt]
+        else:
+            raise Exception("unknown iouType for iou computation")
+
+        def iou_tracklets(preds, gts):
+            preds = torch.tensor(preds)
+            gts = torch.tensor(gts)
+            inter, union = box_xywh_inter_union(
+                preds.unsqueeze(1), gts.unsqueeze(0)
+            )  # Num preds x Num GTS x Num frames
+            inter = inter.sum(-1)
+            union = union.sum(-1)
+            assert (
+                union > 0
+            ).all(), (
+                "There exists a tracklet with zero GTs across time. This is suspicious"
+            )
+            return inter / union
+
+        def iou_masklets(preds, gts):
+            inter = 0
+            union = 0
+            for p_i, gt_i in zip(preds, gts):
+                if p_i and gt_i:
+                    # Compute areas of intersection and union
+                    inter += mask_util.area(
+                        mask_util.merge([p_i, gt_i], intersect=True)
+                    )
+                    union += mask_util.area(
+                        mask_util.merge([p_i, gt_i], intersect=False)
+                    )
+                elif gt_i:
+                    union += mask_util.area(gt_i)
+                elif p_i:
+                    union += mask_util.area(p_i)
+            if union > 0:
+                iou = inter / union
+                assert iou >= 0 and iou <= 1, "Encountered an error in IoU computation"
+            else:
+                assert np.isclose(inter, 0) and np.isclose(
+                    union, 0
+                ), "Encountered an error in IoU computation"
+                iou = 1
+            return iou
+
+        if p.iouType == "segm":
+            ious = [[iou_masklets(d_i, g_i) for g_i in g] for d_i in d]
+        else:
+            ious = iou_tracklets(d, g)
+        return np.array(ious)
+
+
+class YTVISeval(YTVISevalMixin, COCOeval):
+    # For class mAP and phrase AP evaluation, we sort the detections in descending order of scores (as in COCOeval).
+    sort_inds_by_scores_in_iou = True
+
+
+class VideoDemoF1Eval(YTVISevalMixin, CGF1Eval):
+    # For demo F1 evaluation, we DO NOT sort the detections (but match them with GTs via Hungarian matching).
+    sort_inds_by_scores_in_iou = False
+
+
+class YTVISResultsWriter:
+    """
+    Gather and dumps predictions in YT-VIS format.
+    Expected flow of API calls: reset() -> N * update() -> compute_synced()
+    """
+
+    def __init__(
+        self,
+        dump_file: str,
+        postprocessor,
+        gather_pred_via_filesys=False,
+        pred_file_evaluators: Optional[List] = None,
+        save_per_frame_scores: bool = False,
+        write_eval_metrics_file: bool = True,
+        eval_metrics_file_suffix: str = ".sam3_eval_metrics",
+    ):
+        self.dump_file = dump_file
+        self.dump = []
+        self.postprocessor = postprocessor
+        self.gather_pred_via_filesys = gather_pred_via_filesys
+        if dist.is_main_process():
+            dirname = os.path.dirname(self.dump_file)
+            if not os.path.exists(dirname):
+                os.makedirs(dirname, exist_ok=True)
+                logging.info(f"Creating folder: {dirname}")
+
+        # the evaluation hooks to be applied to the prediction files
+        self.pred_file_evaluators = pred_file_evaluators or []
+        self.save_per_frame_scores = save_per_frame_scores
+        # in addition to the prediction file, we also write the evaluation metrics
+        # for easier debugging and analysis (stored in another eval_metrics_file
+        # so that we can keep the dumped prediction file under YT-VIS format)
+        self.write_eval_metrics_file = write_eval_metrics_file
+        if self.write_eval_metrics_file:
+            self.eval_metrics_file = self.dump_file + eval_metrics_file_suffix
+            os.makedirs(os.path.dirname(self.eval_metrics_file), exist_ok=True)
+
+    def _dump_vid_preds(self, results):
+        dumped_results = copy.deepcopy(results)
+        self.dump.extend(dumped_results)
+
+    def prepare(self, predictions):
+        ytvis_results = []
+        for video_id, prediction in predictions.items():
+            if len(prediction) == 0:
+                continue
+            for k in ["boxes", "scores", "labels"]:
+                assert (
+                    k in prediction
+                ), f"Expected predictions to have `{k}` key, available keys are {prediction.keys()}"
+            if self.save_per_frame_scores:
+                assert (
+                    "per_frame_scores" in prediction
+                ), f"Expected predictions to have `per_frame_scores` key, available keys are {prediction.keys()}"
+            assert xor(
+                "masks" in prediction, "masks_rle" in prediction
+            ), f"Expected predictions to have either `masks` key or `masks_rle` key, available keys are {prediction.keys()}"
+
+            boxes = prediction["boxes"]
+            boxes = convert_to_xywh(boxes).tolist()
+            scores = prediction["scores"].tolist()
+            labels = prediction["labels"].tolist()
+            if "masks" in prediction:
+                masks = prediction["masks"].squeeze(2)
+                assert (
+                    masks.ndim == 4
+                ), "Expected masks to be of shape(N_preds,T_frames,H,W)"
+
+                areas = [mask.flatten(1).sum(1).tolist() for mask in masks]
+                rles = [rle_encode(masklet) for masklet in masks]
+
+                # memory clean
+                del masks
+                del prediction["masks"]
+            elif "masks_rle" in prediction:
+                rles = prediction.pop("masks_rle")
+                areas = [
+                    [0 if rle is None else rle.pop("area") for rle in rles_per_obj]
+                    for rles_per_obj in rles
+                ]
+            else:
+                raise ValueError(
+                    "Expected either `masks` or `masks_rle` key in the predictions."
+                )
+
+            new_results = [
+                {
+                    "video_id": video_id,
+                    "category_id": track_label,
+                    "bboxes": track_boxes,
+                    "score": track_score,
+                    "segmentations": track_masks,
+                    "areas": track_areas,
+                }
+                for (
+                    track_boxes,
+                    track_masks,
+                    track_areas,
+                    track_score,
+                    track_label,
+                ) in zip(boxes, rles, areas, scores, labels)
+            ]
+            # Optionally, save per-frame scores
+            if self.save_per_frame_scores:
+                per_frame_scores = prediction["per_frame_scores"].tolist()
+                for res, track_per_frame_scores in zip(new_results, per_frame_scores):
+                    res["per_frame_scores"] = track_per_frame_scores
+
+            ytvis_results.extend(new_results)
+
+        return ytvis_results
+
+    def set_sync_device(self, device: torch.device):
+        self._sync_device = device
+
+    def update(self, *args, **kwargs):
+        predictions = self.postprocessor.process_results(*args, **kwargs)
+        results = self.prepare(predictions)
+        self._dump_vid_preds(results)
+
+    def _dump_preds(self):
+        if not dist.is_main_process():
+            self.dump = []
+            gc.collect()
+            return
+        dumped_file = Path(self.dump_file)
+        logging.info(f"YTVIS evaluator: Dumping predictions to {dumped_file}")
+        with g_pathmgr.open(str(dumped_file), "w") as f:
+            json.dump(self.dump, f)
+        self.dump = []
+        gc.collect()
+        return str(dumped_file)
+
+    def synchronize_between_processes(self):
+        logging.info("YT-VIS evaluator: Synchronizing between processes")
+        dump_dict = self._dedup_pre_gather(self.dump)
+        if self.gather_pred_via_filesys:
+            dump_dict_all_gpus = dist.gather_to_rank_0_via_filesys(dump_dict)
+        else:
+            dump_dict_all_gpus = dist.all_gather(dump_dict, force_cpu=True)
+        self.dump = self._dedup_post_gather(dump_dict_all_gpus)
+        logging.info(f"Gathered all {len(self.dump)} predictions")
+
+    def _dedup_pre_gather(self, predictions):
+        """
+        Organize the predictions as a dict-of-list using (video_id, category_id) as keys
+        for deduplication after gathering them across GPUs.
+
+        During evaluation, PyTorch data loader under `drop_last: False` would wrap
+        around the dataset length to be a multiple of world size (GPU num) and duplicate
+        the remaining batches. This causes the same test sample to appear simultaneously
+        in multiple GPUs, resulting in duplicated predictions being saved into prediction
+        files. These duplicates are then counted as false positives under detection mAP
+        metrics (since a ground truth can be matched with only one prediction).
+
+        For example, if there are 4 GPUs and 6 samples [A1, A2, B1, B2, C1, C2], the data
+        loader (under `drop_last: False`) would load it by wrapping it around like
+        `[A1, A2, B1, B2, C1, C2, *A1*, *A2*]` to make a multiple of 4 and then split it as
+
+        - GPU 0: A1, C1
+        - GPU 1: A2, C2
+        - GPU 3: B1, **A1**
+        - GPU 4: B2, **A2**
+        (as in DistributedSampler in https://github.com/pytorch/pytorch/blob/521588519da9f4876d90ddd7a17c10d0eca89dc6/torch/utils/data/distributed.py#L116-L124)
+
+        so the predictions on A1 and A2 will occur twice in the final gathered outputs
+        in the prediction file (and counted as false positives). This also affects our
+        YT-VIS official val evaluation, but to a lesser extent than YT-VIS dev since
+        the latter is much smaller and more susceptible to false positives.
+
+        So we to deduplicate this. The tricky part is that we cannot deduplicate them
+        simply using video id, given that we are sharding the classes in each video
+        across multiple batches (with 20 prompts per batch) in our "orig_cats" eval dbs.
+
+        The solution is to deduplicate based on (video_id, category_id) tuple as keys.
+        We organize the predictions as a dict-of-list using (video_id, category_id) as
+        keys on each GPU, with the list of masklets under this (video_id, category_id)
+        on this GPU as values. Then, we all-gather this dict-of-list across GPUs and
+        if a key (video_id, category_id) appears in multiple GPUs, we only take the
+        prediction masklet list from one GPU.
+        """
+        prediction_dict = defaultdict(list)
+        for p in predictions:
+            prediction_dict[(p["video_id"], p["category_id"])].append(p)
+        return prediction_dict
+
+    def _dedup_post_gather(self, list_of_prediction_dict):
+        """
+        Deduplicate the predictions from all GPUs. See `_dedup_pre_gather` for details.
+        """
+        dedup_prediction_dict = {}
+        duplication_keys = []
+        for prediction_dict in list_of_prediction_dict:
+            for k, v in prediction_dict.items():
+                if k not in dedup_prediction_dict:
+                    dedup_prediction_dict[k] = v
+                else:
+                    duplication_keys.append(k)
+
+        logging.info(
+            f"skipped {len(duplication_keys)} duplicated predictions in YTVISResultsWriter "
+            f"with the following (video_id, category_id) tuples: {duplication_keys}"
+        )
+        dedup_predictions = sum(dedup_prediction_dict.values(), [])
+        return dedup_predictions
+
+    def compute_synced(
+        self,
+    ):
+        self.synchronize_between_processes()
+        dumped_file = self._dump_preds()
+        if not dist.is_main_process():
+            return {"": 0.0}
+
+        # run evaluation hooks on the prediction file
+        meters = {}
+        all_video_np_level_results = defaultdict(dict)
+        for evaluator in self.pred_file_evaluators:
+            gc.collect()
+            results, video_np_level_results = evaluator.evaluate(dumped_file)
+            meters.update(results)
+            for (video_id, category_id), res in video_np_level_results.items():
+                all_video_np_level_results[(video_id, category_id)].update(res)
+
+        gc.collect()
+        if self.write_eval_metrics_file:
+            # convert the nested dict of {(video_id, category_id): per_sample_metric_dict}
+            # to a list of per-sample metric dicts (with video_id and category_id) for JSON,
+            # as JSON doesn't allow using tuples like (video_id, category_id) as dict keys
+            video_np_level_metrics = [
+                {"video_id": video_id, "category_id": category_id, **res}
+                for (video_id, category_id), res in all_video_np_level_results.items()
+            ]
+            eval_metrics = {
+                "dataset_level_metrics": meters,
+                "video_np_level_metrics": video_np_level_metrics,
+            }
+            with g_pathmgr.open(self.eval_metrics_file, "w") as f:
+                json.dump(eval_metrics, f)
+            logging.info(
+                f"YTVIS evaluator: Dumped evaluation metrics to {self.eval_metrics_file}"
+            )
+
+        if len(meters) == 0:
+            meters = {"": 0.0}
+        return meters
+
+    def compute(self):
+        return {"": 0.0}
+
+    def reset(self, *args, **kwargs):
+        self.dump = []
diff --git a/sam3/logger.py b/sam3/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..db9c0a61b76292e941804b233cc6c184b641158a
--- /dev/null
+++ b/sam3/logger.py
@@ -0,0 +1,54 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+import logging
+import os
+
+LOG_LEVELS = {
+    "DEBUG": logging.DEBUG,
+    "INFO": logging.INFO,
+    "WARNING": logging.WARNING,
+    "ERROR": logging.ERROR,
+    "CRITICAL": logging.CRITICAL,
+}
+
+
+class ColoredFormatter(logging.Formatter):
+    """A command line formatter with different colors for each level."""
+
+    def __init__(self):
+        super().__init__()
+        reset = "\033[0m"
+        colors = {
+            logging.DEBUG: f"{reset}\033[36m",  # cyan,
+            logging.INFO: f"{reset}\033[32m",  # green
+            logging.WARNING: f"{reset}\033[33m",  # yellow
+            logging.ERROR: f"{reset}\033[31m",  # red
+            logging.CRITICAL: f"{reset}\033[35m",  # magenta
+        }
+        fmt_str = "{color}%(levelname)s %(asctime)s %(process)d %(filename)s:%(lineno)4d:{reset} %(message)s"
+        self.formatters = {
+            level: logging.Formatter(fmt_str.format(color=color, reset=reset))
+            for level, color in colors.items()
+        }
+        self.default_formatter = self.formatters[logging.INFO]
+
+    def format(self, record):
+        formatter = self.formatters.get(record.levelno, self.default_formatter)
+        return formatter.format(record)
+
+
+def get_logger(name, level=logging.INFO):
+    """A command line logger."""
+    if "LOG_LEVEL" in os.environ:
+        level = os.environ["LOG_LEVEL"].upper()
+        assert (
+            level in LOG_LEVELS
+        ), f"Invalid LOG_LEVEL: {level}, must be one of {list(LOG_LEVELS.keys())}"
+        level = LOG_LEVELS[level]
+    logger = logging.getLogger(name)
+    logger.setLevel(level)
+    logger.propagate = False
+    ch = logging.StreamHandler()
+    ch.setLevel(level)
+    ch.setFormatter(ColoredFormatter())
+    logger.addHandler(ch)
+    return logger
diff --git a/sam3/model/__init__.py b/sam3/model/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d37d2aaef52ec7de01999516a2b8c3e1fa4986
--- /dev/null
+++ b/sam3/model/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
diff --git a/sam3/model/act_ckpt_utils.py b/sam3/model/act_ckpt_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c935cfc9ccdba8de3790422dacf8d77cbb2fbe8c
--- /dev/null
+++ b/sam3/model/act_ckpt_utils.py
@@ -0,0 +1,114 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import inspect
+from functools import wraps
+from typing import Callable, TypeVar, Union
+
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint as checkpoint
+from torch.utils._pytree import tree_map_only
+
+# Type variables for better type hinting
+T = TypeVar("T")
+Module = TypeVar("Module", bound=nn.Module)
+
+
+def activation_ckpt_wrapper(module: Union[nn.Module, Callable]) -> Callable:
+    """
+    Wraps a given module to enable or disable activation checkpointing.
+
+    Activation checkpointing (gradient checkpointing) trades compute for memory by
+    recomputing intermediate activations during the backward pass instead of storing
+    them in memory during the forward pass.
+
+    When activation checkpointing is enabled, the wrapper expects only keyword arguments,
+    and it maps these to positional arguments based on the module's signature.
+
+    Args:
+        module: The module or function to wrap with activation checkpointing
+
+    Returns:
+        A wrapped callable that supports activation checkpointing
+
+    Usage:
+        The returned wrapper function can be called with the same arguments as the
+        original module, with an additional `act_ckpt_enable` keyword argument to control
+        activation checkpointing and optional `use_reentrant` parameter.
+
+    Example:
+        ```python
+        wrapped_module = activation_ckpt_wrapper(my_module)
+        output = wrapped_module(x=input_tensor, y=another_tensor, act_ckpt_enable=True)
+        ```
+    """
+
+    @wraps(module)
+    def act_ckpt_wrapper(
+        *args, act_ckpt_enable: bool = True, use_reentrant: bool = False, **kwargs
+    ):
+        if act_ckpt_enable:
+            if len(args) > 0:
+                raise ValueError(
+                    "This wrapper expects keyword arguments only when `act_ckpt_enable=True`"
+                )
+            # Get the signature of the target function/module
+            callable_fn = module.forward if isinstance(module, nn.Module) else module
+            sig = inspect.signature(callable_fn)
+            # Create a mapping of parameter names to their default values
+            param_defaults = {
+                name: param.default for name, param in sig.parameters.items()
+            }
+            args = []
+            for p_name in param_defaults.keys():
+                if p_name in kwargs:
+                    args.append(kwargs.pop(p_name))
+                elif param_defaults[p_name] is not inspect.Parameter.empty:
+                    # Set arg to default value if it's not in kwargs. Useful for primitive types or args that default to None
+                    args.append(param_defaults[p_name])
+                elif (
+                    sig.parameters[p_name].kind is not inspect.Parameter.VAR_KEYWORD
+                ):  # Skip **kwargs parameter
+                    raise ValueError(f"Missing positional argument: {p_name}")
+
+            # Scan remaining kwargs for torch.Tensor
+            remaining_keys = list(kwargs.keys())
+            for key in remaining_keys:
+                if isinstance(kwargs[key], torch.Tensor):
+                    # Remove the tensor from kwargs, assuming it's not required by the module.
+                    # If it is required, the module's signature should be modified to accept it as a positional or keyword argument.
+                    kwargs[key] = "_REMOVED_BY_ACT_CKPT_WRAPPER_"
+
+            ret = checkpoint.checkpoint(
+                module, *args, use_reentrant=use_reentrant, **kwargs
+            )
+        else:
+            ret = module(*args, **kwargs)
+
+        return ret
+
+    return act_ckpt_wrapper
+
+
+def clone_output_wrapper(f: Callable[..., T]) -> Callable[..., T]:
+    """
+    Clone the CUDA output tensors of a function to avoid in-place operations.
+
+    This wrapper is useful when working with torch.compile to prevent errors
+    related to in-place operations on tensors.
+
+    Args:
+        f: The function whose CUDA tensor outputs should be cloned
+
+    Returns:
+        A wrapped function that clones any CUDA tensor outputs
+    """
+
+    @wraps(f)
+    def wrapped(*args, **kwargs):
+        outputs = f(*args, **kwargs)
+        return tree_map_only(
+            torch.Tensor, lambda t: t.clone() if t.is_cuda else t, outputs
+        )
+
+    return wrapped
diff --git a/sam3/model/box_ops.py b/sam3/model/box_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..f88e4adff393ca8ef8100319fdb95ced6a9b4f6e
--- /dev/null
+++ b/sam3/model/box_ops.py
@@ -0,0 +1,217 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+"""
+Utilities for bounding box manipulation and GIoU.
+"""
+
+from typing import Tuple
+
+import torch
+
+
+def box_cxcywh_to_xyxy(x):
+    x_c, y_c, w, h = x.unbind(-1)
+    b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (x_c + 0.5 * w), (y_c + 0.5 * h)]
+    return torch.stack(b, dim=-1)
+
+
+def box_cxcywh_to_xywh(x):
+    x_c, y_c, w, h = x.unbind(-1)
+    b = [(x_c - 0.5 * w), (y_c - 0.5 * h), (w), (h)]
+    return torch.stack(b, dim=-1)
+
+
+def box_xywh_to_xyxy(x):
+    x, y, w, h = x.unbind(-1)
+    b = [(x), (y), (x + w), (y + h)]
+    return torch.stack(b, dim=-1)
+
+
+def box_xywh_to_cxcywh(x):
+    x, y, w, h = x.unbind(-1)
+    b = [(x + 0.5 * w), (y + 0.5 * h), (w), (h)]
+    return torch.stack(b, dim=-1)
+
+
+def box_xyxy_to_xywh(x):
+    x, y, X, Y = x.unbind(-1)
+    b = [(x), (y), (X - x), (Y - y)]
+    return torch.stack(b, dim=-1)
+
+
+def box_xyxy_to_cxcywh(x):
+    x0, y0, x1, y1 = x.unbind(-1)
+    b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
+    return torch.stack(b, dim=-1)
+
+
+def box_area(boxes):
+    """
+    Batched version of box area. Boxes should be in [x0, y0, x1, y1] format.
+
+    Inputs:
+    - boxes: Tensor of shape (..., 4)
+
+    Returns:
+    - areas: Tensor of shape (...,)
+    """
+    x0, y0, x1, y1 = boxes.unbind(-1)
+    return (x1 - x0) * (y1 - y0)
+
+
+def masks_to_boxes(masks):
+    """Compute the bounding boxes around the provided masks
+
+    The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
+
+    Returns a [N, 4] tensors, with the boxes in xyxy format
+    """
+    if masks.numel() == 0:
+        return torch.zeros((0, 4), device=masks.device)
+
+    h, w = masks.shape[-2:]
+
+    y = torch.arange(0, h, dtype=torch.float, device=masks.device)
+    x = torch.arange(0, w, dtype=torch.float, device=masks.device)
+    y, x = torch.meshgrid(y, x)
+
+    x_mask = masks * x.unsqueeze(0)
+    x_max = x_mask.flatten(1).max(-1)[0] + 1
+    x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
+
+    y_mask = masks * y.unsqueeze(0)
+    y_max = y_mask.flatten(1).max(-1)[0] + 1
+    y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
+
+    boxes = torch.stack([x_min, y_min, x_max, y_max], 1)
+    # Invalidate boxes corresponding to empty masks.
+    boxes = boxes * masks.flatten(-2).any(-1)
+    return boxes
+
+
+def box_iou(boxes1, boxes2):
+    """
+    Batched version of box_iou. Boxes should be in [x0, y0, x1, y1] format.
+
+    Inputs:
+    - boxes1: Tensor of shape (..., N, 4)
+    - boxes2: Tensor of shape (..., M, 4)
+
+    Returns:
+    - iou, union: Tensors of shape (..., N, M)
+    """
+    area1 = box_area(boxes1)
+    area2 = box_area(boxes2)
+
+    # boxes1: (..., N, 4) -> (..., N, 1, 2)
+    # boxes2: (..., M, 4) -> (..., 1, M, 2)
+    lt = torch.max(boxes1[..., :, None, :2], boxes2[..., None, :, :2])
+    rb = torch.min(boxes1[..., :, None, 2:], boxes2[..., None, :, 2:])
+
+    wh = (rb - lt).clamp(min=0)  # (..., N, M, 2)
+    inter = wh[..., 0] * wh[..., 1]  # (..., N, M)
+
+    union = area1[..., None] + area2[..., None, :] - inter
+
+    iou = inter / union
+    return iou, union
+
+
+def generalized_box_iou(boxes1, boxes2):
+    """
+    Batched version of Generalized IoU from https://giou.stanford.edu/
+
+    Boxes should be in [x0, y0, x1, y1] format
+
+    Inputs:
+    - boxes1: Tensor of shape (..., N, 4)
+    - boxes2: Tensor of shape (..., M, 4)
+
+    Returns:
+    - giou: Tensor of shape (..., N, M)
+    """
+    iou, union = box_iou(boxes1, boxes2)
+
+    # boxes1: (..., N, 4) -> (..., N, 1, 2)
+    # boxes2: (..., M, 4) -> (..., 1, M, 2)
+    lt = torch.min(boxes1[..., :, None, :2], boxes2[..., None, :, :2])
+    rb = torch.max(boxes1[..., :, None, 2:], boxes2[..., None, :, 2:])
+
+    wh = (rb - lt).clamp(min=0)  # (..., N, M, 2)
+    area = wh[..., 0] * wh[..., 1]  # (..., N, M)
+
+    return iou - (area - union) / area
+
+
+@torch.jit.script
+def fast_diag_generalized_box_iou(boxes1, boxes2):
+    assert len(boxes1) == len(boxes2)
+    box1_xy = boxes1[:, 2:]
+    box1_XY = boxes1[:, :2]
+    box2_xy = boxes2[:, 2:]
+    box2_XY = boxes2[:, :2]
+    # assert (box1_xy >= box1_XY).all()
+    # assert (box2_xy >= box2_XY).all()
+    area1 = (box1_xy - box1_XY).prod(-1)
+    area2 = (box2_xy - box2_XY).prod(-1)
+
+    lt = torch.max(box1_XY, box2_XY)  # [N,2]
+    lt2 = torch.min(box1_XY, box2_XY)
+    rb = torch.min(box1_xy, box2_xy)  # [N,2]
+    rb2 = torch.max(box1_xy, box2_xy)
+
+    inter = (rb - lt).clamp(min=0).prod(-1)
+    tot_area = (rb2 - lt2).clamp(min=0).prod(-1)
+
+    union = area1 + area2 - inter
+
+    iou = inter / union
+
+    return iou - (tot_area - union) / tot_area
+
+
+@torch.jit.script
+def fast_diag_box_iou(boxes1, boxes2):
+    assert len(boxes1) == len(boxes2)
+    box1_xy = boxes1[:, 2:]
+    box1_XY = boxes1[:, :2]
+    box2_xy = boxes2[:, 2:]
+    box2_XY = boxes2[:, :2]
+    # assert (box1_xy >= box1_XY).all()
+    # assert (box2_xy >= box2_XY).all()
+    area1 = (box1_xy - box1_XY).prod(-1)
+    area2 = (box2_xy - box2_XY).prod(-1)
+
+    lt = torch.max(box1_XY, box2_XY)  # [N,2]
+    rb = torch.min(box1_xy, box2_xy)  # [N,2]
+
+    inter = (rb - lt).clamp(min=0).prod(-1)
+
+    union = area1 + area2 - inter
+
+    iou = inter / union
+
+    return iou
+
+
+def box_xywh_inter_union(
+    boxes1: torch.Tensor, boxes2: torch.Tensor
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    # Asuumes boxes in xywh format
+    assert boxes1.size(-1) == 4 and boxes2.size(-1) == 4
+    boxes1 = box_xywh_to_xyxy(boxes1)
+    boxes2 = box_xywh_to_xyxy(boxes2)
+    box1_tl_xy = boxes1[..., :2]
+    box1_br_xy = boxes1[..., 2:]
+    box2_tl_xy = boxes2[..., :2]
+    box2_br_xy = boxes2[..., 2:]
+    area1 = (box1_br_xy - box1_tl_xy).prod(-1)
+    area2 = (box2_br_xy - box2_tl_xy).prod(-1)
+
+    assert (area1 >= 0).all() and (area2 >= 0).all()
+    tl = torch.max(box1_tl_xy, box2_tl_xy)
+    br = torch.min(box1_br_xy, box2_br_xy)
+
+    inter = (br - tl).clamp(min=0).prod(-1)
+    union = area1 + area2 - inter
+
+    return inter, union
diff --git a/sam3/model/data_misc.py b/sam3/model/data_misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bbcf551b90c691cbb583055995815d62cd89bed
--- /dev/null
+++ b/sam3/model/data_misc.py
@@ -0,0 +1,209 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+"""
+Misc functions, including distributed helpers.
+"""
+
+import collections
+import re
+
+from dataclasses import dataclass, field as field_ptr_behaviour, fields, is_dataclass
+from typing import Any, get_args, get_origin, List, Mapping, Optional, Sequence, Union
+
+import torch
+
+
+MyTensor = Union[torch.Tensor, List[Any]]
+
+
+def interpolate(
+    input, size=None, scale_factor=None, mode="nearest", align_corners=None
+):
+    # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
+    """
+    Equivalent to nn.functional.interpolate, but with support for empty channel sizes.
+    """
+    if input.numel() > 0:
+        return torch.nn.functional.interpolate(
+            input, size, scale_factor, mode, align_corners
+        )
+
+    assert (
+        input.shape[0] != 0 or input.shape[1] != 0
+    ), "At least one of the two first dimensions must be non zero"
+
+    if input.shape[1] == 0:
+        # Pytorch doesn't support null dimension on the channel dimension, so we transpose to fake a null batch dim
+        return torch.nn.functional.interpolate(
+            input.transpose(0, 1), size, scale_factor, mode, align_corners
+        ).transpose(0, 1)
+
+    # empty batch dimension is now supported in pytorch
+    return torch.nn.functional.interpolate(
+        input, size, scale_factor, mode, align_corners
+    )
+
+
+@dataclass
+class BatchedPointer:
+    stage_ids: MyTensor
+    stage_ids__type = torch.long
+    query_ids: MyTensor
+    query_ids__type = torch.long
+    object_ids: MyTensor
+    object_ids__type = torch.long
+    ptr_mask: MyTensor
+    ptr_mask__type = torch.bool
+    ptr_types: MyTensor
+    ptr_types__type = torch.long
+
+
+@dataclass
+class FindStage:
+    img_ids: MyTensor
+    img_ids__type = torch.long
+    text_ids: MyTensor
+    text_ids__type = torch.long
+
+    input_boxes: MyTensor
+    input_boxes__type = torch.float
+    input_boxes_mask: MyTensor
+    input_boxes_mask__type = torch.bool
+    input_boxes_label: MyTensor
+    input_boxes_label__type = torch.long
+
+    input_points: MyTensor
+    input_points__type = torch.float
+    input_points_mask: MyTensor
+    input_points_mask__type = torch.bool
+
+    # We track the object ids referred to by this query.
+    # This is beneficial for tracking in videos without the need for pointers.
+    object_ids: Optional[List[List]] = None  # List of objects per query
+
+
+@dataclass
+class BatchedFindTarget:
+    # The number of boxes in each find query
+    num_boxes: MyTensor
+    num_boxes__type = torch.long
+
+    # Target boxes in normalized CxCywh format
+    boxes: MyTensor
+    boxes__type = torch.float
+    # Target boxes in normalized CxCywh format but in padded representation
+    # as used in BinaryHungarianMatcherV2 (unlike the packed ones in `boxes`)
+    boxes_padded: MyTensor
+    boxes_padded__type = torch.float
+
+    # For hybrid matching, we repeat the boxes
+    repeated_boxes: MyTensor
+    repeated_boxes__type = torch.float
+
+    # Target Segmentation masks
+    segments: Optional[MyTensor]
+    segments__type = torch.bool
+
+    # Target Semantic Segmentation masks
+    semantic_segments: Optional[MyTensor]
+    semantic_segments__type = torch.bool
+
+    is_valid_segment: Optional[MyTensor]
+    is_valid_segment__type = torch.bool
+
+    # Whether annotations are exhaustive for each query
+    is_exhaustive: MyTensor
+    is_exhaustive__type = torch.bool
+
+    # The object id for each ground-truth box, in both packed and padded representations
+    object_ids: MyTensor
+    object_ids__type = torch.long
+    object_ids_padded: MyTensor
+    object_ids_padded__type = torch.long
+
+
+@dataclass
+class BatchedInferenceMetadata:
+    """All metadata required to post-process a find stage"""
+
+    # Coco id that corresponds to the "image" for evaluation by the coco evaluator
+    coco_image_id: MyTensor
+    coco_image_id__type = torch.long
+
+    # id in the original dataset, such that we can use the original evaluator
+    original_image_id: MyTensor
+    original_image_id__type = torch.long
+
+    # Original category id (if we want to use the original evaluator)
+    original_category_id: MyTensor
+    original_category_id__type = torch.int
+
+    # Size of the raw image (height, width)
+    original_size: MyTensor
+    original_size__type = torch.long
+
+    # id of the object in the media (track_id for a video)
+    object_id: MyTensor
+    object_id__type = torch.long
+
+    # index of the frame in the media (0 in the case of a single-frame media)
+    frame_index: MyTensor
+    frame_index__type = torch.long
+
+    # Adding for relations inference
+    # get_text_input: List[Optional[str]]
+
+    # Adding for TA conditional inference
+    is_conditioning_only: List[Optional[bool]]
+
+
+@dataclass
+class BatchedDatapoint:
+    img_batch: torch.Tensor
+    find_text_batch: List[str]
+    find_inputs: List[FindStage]
+    find_targets: List[BatchedFindTarget]
+    find_metadatas: List[BatchedInferenceMetadata]
+    raw_images: Optional[List[Any]] = None
+
+
+def convert_my_tensors(obj):
+    def is_optional_field(field) -> bool:
+        return get_origin(field) is Union and type(None) in get_args(field)
+
+    for field in fields(obj):
+        if is_dataclass(getattr(obj, field.name)):
+            convert_my_tensors(getattr(obj, field.name))
+            continue
+
+        field_type = field.type
+        if is_optional_field(field.type):
+            field_type = Union[get_args(field.type)[:-1]]  # Get the Optional field type
+
+        if field_type != MyTensor or getattr(obj, field.name) is None:
+            continue
+
+        elif len(getattr(obj, field.name)) and isinstance(
+            getattr(obj, field.name)[0], torch.Tensor
+        ):
+            stack_dim = 0
+            if field.name in [
+                "input_boxes",
+                "input_boxes_label",
+            ]:
+                stack_dim = 1
+            setattr(
+                obj,
+                field.name,
+                torch.stack(getattr(obj, field.name), dim=stack_dim).to(
+                    getattr(obj, field.name + "__type")
+                ),
+            )
+        else:
+            setattr(
+                obj,
+                field.name,
+                torch.as_tensor(
+                    getattr(obj, field.name), dtype=getattr(obj, field.name + "__type")
+                ),
+            )
+    return obj
diff --git a/sam3/model/decoder.py b/sam3/model/decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8b1657ebd5706d901c84101012bb4c6e2fa4519
--- /dev/null
+++ b/sam3/model/decoder.py
@@ -0,0 +1,956 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+"""
+Transformer decoder.
+Inspired from Pytorch's version, adds the pre-norm variant
+"""
+
+from typing import Any, Dict, List, Optional
+
+import numpy as np
+
+import torch
+
+from sam3.sam.transformer import RoPEAttention
+
+from torch import nn, Tensor
+from torchvision.ops.roi_align import RoIAlign
+
+from .act_ckpt_utils import activation_ckpt_wrapper
+
+from .box_ops import box_cxcywh_to_xyxy
+
+from .model_misc import (
+    gen_sineembed_for_position,
+    get_activation_fn,
+    get_clones,
+    inverse_sigmoid,
+    MLP,
+)
+
+
+class TransformerDecoderLayer(nn.Module):
+    def __init__(
+        self,
+        activation: str,
+        d_model: int,
+        dim_feedforward: int,
+        dropout: float,
+        cross_attention: nn.Module,
+        n_heads: int,
+        use_text_cross_attention: bool = False,
+    ):
+        super().__init__()
+
+        # cross attention
+        self.cross_attn = cross_attention
+        self.dropout1 = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
+        self.norm1 = nn.LayerNorm(d_model)
+
+        # cross attention text
+        self.use_text_cross_attention = use_text_cross_attention
+        if use_text_cross_attention:
+            self.ca_text = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
+            self.catext_dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
+            self.catext_norm = nn.LayerNorm(d_model)
+
+        # self attention
+        self.self_attn = nn.MultiheadAttention(d_model, n_heads, dropout=dropout)
+        self.dropout2 = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
+        self.norm2 = nn.LayerNorm(d_model)
+
+        # ffn
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.activation = get_activation_fn(activation)
+        self.dropout3 = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+        self.dropout4 = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
+        self.norm3 = nn.LayerNorm(d_model)
+
+    @staticmethod
+    def with_pos_embed(tensor, pos):
+        return tensor if pos is None else tensor + pos
+
+    def forward_ffn(self, tgt):
+        with torch.amp.autocast(device_type="cuda", enabled=False):
+            tgt2 = self.linear2(self.dropout3(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout4(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+
+    def forward(
+        self,
+        # for tgt
+        tgt: Optional[Tensor],  # nq, bs, d_model
+        tgt_query_pos: Optional[Tensor] = None,  # pos for query. MLP(Sine(pos))
+        tgt_query_sine_embed: Optional[Tensor] = None,  # pos for query. Sine(pos)
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        tgt_reference_points: Optional[Tensor] = None,  # nq, bs, 4
+        memory_text: Optional[Tensor] = None,  # num_token, bs, d_model
+        text_attention_mask: Optional[Tensor] = None,  # bs, num_token
+        # for memory
+        memory: Optional[Tensor] = None,  # hw, bs, d_model
+        memory_key_padding_mask: Optional[Tensor] = None,
+        memory_level_start_index: Optional[Tensor] = None,  # num_levels
+        memory_spatial_shapes: Optional[Tensor] = None,  # bs, num_levels, 2
+        memory_pos: Optional[Tensor] = None,  # pos for memory
+        # sa
+        self_attn_mask: Optional[Tensor] = None,  # mask used for self-attention
+        cross_attn_mask: Optional[Tensor] = None,  # mask used for cross-attention
+        # dac
+        dac=False,
+        dac_use_selfatt_ln=True,
+        presence_token=None,
+        # skip inside deformable attn
+        identity=0.0,
+        **kwargs,  # additional kwargs for compatibility
+    ):
+        """
+        Input:
+            - tgt/tgt_query_pos: nq, bs, d_model
+            -
+        """
+        # self attention
+        if self.self_attn is not None:
+            if dac:
+                # we only apply self attention to the first half of the queries
+                assert tgt.shape[0] % 2 == 0
+                num_o2o_queries = tgt.shape[0] // 2
+                tgt_o2o = tgt[:num_o2o_queries]
+                tgt_query_pos_o2o = tgt_query_pos[:num_o2o_queries]
+                tgt_o2m = tgt[num_o2o_queries:]
+            else:
+                tgt_o2o = tgt
+                tgt_query_pos_o2o = tgt_query_pos
+
+            if presence_token is not None:
+                tgt_o2o = torch.cat([presence_token, tgt_o2o], dim=0)
+                tgt_query_pos_o2o = torch.cat(
+                    [torch.zeros_like(presence_token), tgt_query_pos_o2o], dim=0
+                )
+                tgt_query_pos = torch.cat(
+                    [torch.zeros_like(presence_token), tgt_query_pos], dim=0
+                )
+
+            q = k = self.with_pos_embed(tgt_o2o, tgt_query_pos_o2o)
+            tgt2 = self.self_attn(q, k, tgt_o2o, attn_mask=self_attn_mask)[0]
+            tgt_o2o = tgt_o2o + self.dropout2(tgt2)
+            if dac:
+                if not dac_use_selfatt_ln:
+                    tgt_o2o = self.norm2(tgt_o2o)
+                tgt = torch.cat((tgt_o2o, tgt_o2m), dim=0)  # Recombine
+                if dac_use_selfatt_ln:
+                    tgt = self.norm2(tgt)
+            else:
+                tgt = tgt_o2o
+                tgt = self.norm2(tgt)
+
+        if self.use_text_cross_attention:
+            tgt2 = self.ca_text(
+                self.with_pos_embed(tgt, tgt_query_pos),
+                memory_text,
+                memory_text,
+                key_padding_mask=text_attention_mask,
+            )[0]
+            tgt = tgt + self.catext_dropout(tgt2)
+            tgt = self.catext_norm(tgt)
+
+        if presence_token is not None:
+            presence_token_mask = torch.zeros_like(cross_attn_mask[:, :1, :])
+            cross_attn_mask = torch.cat(
+                [presence_token_mask, cross_attn_mask], dim=1
+            )  # (bs*nheads, 1+nq, hw)
+
+        # Cross attention to image
+        tgt2 = self.cross_attn(
+            query=self.with_pos_embed(tgt, tgt_query_pos),
+            key=self.with_pos_embed(memory, memory_pos),
+            value=memory,
+            attn_mask=cross_attn_mask,
+            key_padding_mask=(
+                memory_key_padding_mask.transpose(0, 1)
+                if memory_key_padding_mask is not None
+                else None
+            ),
+        )[0]
+
+        tgt = tgt + self.dropout1(tgt2)
+        tgt = self.norm1(tgt)
+
+        # ffn
+        tgt = self.forward_ffn(tgt)
+
+        presence_token_out = None
+        if presence_token is not None:
+            presence_token_out = tgt[:1]
+            tgt = tgt[1:]
+
+        return tgt, presence_token_out
+
+
+class TransformerDecoder(nn.Module):
+    def __init__(
+        self,
+        d_model: int,
+        frozen: bool,
+        interaction_layer,
+        layer,
+        num_layers: int,
+        num_queries: int,
+        return_intermediate: bool,
+        box_refine: bool = False,
+        num_o2m_queries: int = 0,
+        dac: bool = False,
+        boxRPB: str = "none",
+        # Experimental: An object query for SAM 2 tasks
+        instance_query: bool = False,
+        # Defines the number of additional instance queries,
+        # 1 or 4 are the most likely for single vs multi mask support
+        num_instances: int = 1,  # Irrelevant if instance_query is False
+        dac_use_selfatt_ln: bool = True,
+        use_act_checkpoint: bool = False,
+        compile_mode=None,
+        presence_token: bool = False,
+        clamp_presence_logits: bool = True,
+        clamp_presence_logit_max_val: float = 10.0,
+        use_normed_output_consistently: bool = True,
+        separate_box_head_instance: bool = False,
+        separate_norm_instance: bool = False,
+        resolution: Optional[int] = None,
+        stride: Optional[int] = None,
+    ):
+        super().__init__()
+        self.d_model = d_model
+        self.layers = get_clones(layer, num_layers)
+        self.fine_layers = (
+            get_clones(interaction_layer, num_layers)
+            if interaction_layer is not None
+            else [None] * num_layers
+        )
+        self.num_layers = num_layers
+        self.num_queries = num_queries
+        self.dac = dac
+        if dac:
+            self.num_o2m_queries = num_queries
+            tot_num_queries = num_queries
+        else:
+            self.num_o2m_queries = num_o2m_queries
+            tot_num_queries = num_queries + num_o2m_queries
+        self.norm = nn.LayerNorm(d_model)
+        self.return_intermediate = return_intermediate
+        self.bbox_embed = MLP(d_model, d_model, 4, 3)
+        self.query_embed = nn.Embedding(tot_num_queries, d_model)
+        self.instance_query_embed = None
+        self.instance_query_reference_points = None
+        self.use_instance_query = instance_query
+        self.num_instances = num_instances
+        self.use_normed_output_consistently = use_normed_output_consistently
+
+        self.instance_norm = nn.LayerNorm(d_model) if separate_norm_instance else None
+        self.instance_bbox_embed = None
+        if separate_box_head_instance:
+            self.instance_bbox_embed = MLP(d_model, d_model, 4, 3)
+        if instance_query:
+            self.instance_query_embed = nn.Embedding(num_instances, d_model)
+        self.box_refine = box_refine
+        if box_refine:
+            nn.init.constant_(self.bbox_embed.layers[-1].weight.data, 0)
+            nn.init.constant_(self.bbox_embed.layers[-1].bias.data, 0)
+
+            self.reference_points = nn.Embedding(num_queries, 4)
+            if instance_query:
+                self.instance_reference_points = nn.Embedding(num_instances, 4)
+
+        assert boxRPB in ["none", "log", "linear", "both"]
+        self.boxRPB = boxRPB
+        if boxRPB != "none":
+            try:
+                nheads = self.layers[0].cross_attn_image.num_heads
+            except AttributeError:
+                nheads = self.layers[0].cross_attn.num_heads
+
+            n_input = 4 if boxRPB == "both" else 2
+            self.boxRPB_embed_x = MLP(n_input, d_model, nheads, 2)
+            self.boxRPB_embed_y = MLP(n_input, d_model, nheads, 2)
+            self.compilable_cord_cache = None
+            self.compilable_stored_size = None
+            self.coord_cache = {}
+
+            if resolution is not None and stride is not None:
+                feat_size = resolution // stride
+                coords_h, coords_w = self._get_coords(
+                    feat_size, feat_size, device="cuda"
+                )
+                self.compilable_cord_cache = (coords_h, coords_w)
+                self.compilable_stored_size = (feat_size, feat_size)
+
+        self.roi_pooler = (
+            RoIAlign(output_size=7, spatial_scale=1, sampling_ratio=-1, aligned=True)
+            if interaction_layer is not None
+            else None
+        )
+        if frozen:
+            for p in self.parameters():
+                p.requires_grad_(False)
+
+        self.presence_token = None
+        self.clamp_presence_logits = clamp_presence_logits
+        self.clamp_presence_logit_max_val = clamp_presence_logit_max_val
+        if presence_token:
+            self.presence_token = nn.Embedding(1, d_model)
+            self.presence_token_head = MLP(d_model, d_model, 1, 3)
+            self.presence_token_out_norm = nn.LayerNorm(d_model)
+
+        self.ref_point_head = MLP(2 * self.d_model, self.d_model, self.d_model, 2)
+        self.dac_use_selfatt_ln = dac_use_selfatt_ln
+        self.use_act_checkpoint = use_act_checkpoint
+
+        nn.init.normal_(self.query_embed.weight.data)
+        if self.instance_query_embed is not None:
+            nn.init.normal_(self.instance_query_embed.weight.data)
+
+        assert self.roi_pooler is None
+        assert self.return_intermediate, "support return_intermediate only"
+        assert self.box_refine, "support box refine only"
+
+        self.compile_mode = compile_mode
+        self.compiled = False
+        # We defer compilation till after the first forward, to first warm-up the boxRPB cache
+
+        # assign layer index to each layer so that some layers can decide what to do
+        # based on which layer index they are (e.g. cross attention to memory bank only
+        # in selected layers)
+        for layer_idx, layer in enumerate(self.layers):
+            layer.layer_idx = layer_idx
+
+    @staticmethod
+    def _get_coords(H, W, device):
+        coords_h = torch.arange(0, H, device=device, dtype=torch.float32) / H
+        coords_w = torch.arange(0, W, device=device, dtype=torch.float32) / W
+        return coords_h, coords_w
+
+    def _get_rpb_matrix(self, reference_boxes, feat_size):
+        H, W = feat_size
+        boxes_xyxy = box_cxcywh_to_xyxy(reference_boxes).transpose(0, 1)
+        bs, num_queries, _ = boxes_xyxy.shape
+        if self.compilable_cord_cache is None:
+            self.compilable_cord_cache = self._get_coords(H, W, reference_boxes.device)
+            self.compilable_stored_size = (H, W)
+
+        if torch.compiler.is_dynamo_compiling() or self.compilable_stored_size == (
+            H,
+            W,
+        ):
+            # good, hitting the cache, will be compilable
+            coords_h, coords_w = self.compilable_cord_cache
+        else:
+            # cache miss, will create compilation issue
+            # In case we're not compiling, we'll still rely on the dict-based cache
+            if feat_size not in self.coord_cache:
+                self.coord_cache[feat_size] = self._get_coords(
+                    H, W, reference_boxes.device
+                )
+            coords_h, coords_w = self.coord_cache[feat_size]
+
+            assert coords_h.shape == (H,)
+            assert coords_w.shape == (W,)
+
+        deltas_y = coords_h.view(1, -1, 1) - boxes_xyxy.reshape(-1, 1, 4)[:, :, 1:4:2]
+        deltas_y = deltas_y.view(bs, num_queries, -1, 2)
+        deltas_x = coords_w.view(1, -1, 1) - boxes_xyxy.reshape(-1, 1, 4)[:, :, 0:3:2]
+        deltas_x = deltas_x.view(bs, num_queries, -1, 2)
+
+        if self.boxRPB in ["log", "both"]:
+            deltas_x_log = deltas_x * 8  # normalize to -8, 8
+            deltas_x_log = (
+                torch.sign(deltas_x_log)
+                * torch.log2(torch.abs(deltas_x_log) + 1.0)
+                / np.log2(8)
+            )
+
+            deltas_y_log = deltas_y * 8  # normalize to -8, 8
+            deltas_y_log = (
+                torch.sign(deltas_y_log)
+                * torch.log2(torch.abs(deltas_y_log) + 1.0)
+                / np.log2(8)
+            )
+            if self.boxRPB == "log":
+                deltas_x = deltas_x_log
+                deltas_y = deltas_y_log
+            else:
+                deltas_x = torch.cat([deltas_x, deltas_x_log], dim=-1)
+                deltas_y = torch.cat([deltas_y, deltas_y_log], dim=-1)
+
+        if self.training:
+            assert self.use_act_checkpoint, "activation ckpt not enabled in decoder"
+        deltas_x = activation_ckpt_wrapper(self.boxRPB_embed_x)(
+            x=deltas_x,
+            act_ckpt_enable=self.training and self.use_act_checkpoint,
+        )  # bs, num_queries, W, n_heads
+        deltas_y = activation_ckpt_wrapper(self.boxRPB_embed_y)(
+            x=deltas_y,
+            act_ckpt_enable=self.training and self.use_act_checkpoint,
+        )  # bs, num_queries, H, n_heads
+
+        if not torch.compiler.is_dynamo_compiling():
+            assert deltas_x.shape[:3] == (bs, num_queries, W)
+            assert deltas_y.shape[:3] == (bs, num_queries, H)
+
+        B = deltas_y.unsqueeze(3) + deltas_x.unsqueeze(
+            2
+        )  # bs, num_queries, H, W, n_heads
+        if not torch.compiler.is_dynamo_compiling():
+            assert B.shape[:4] == (bs, num_queries, H, W)
+        B = B.flatten(2, 3)  # bs, num_queries, H*W, n_heads
+        B = B.permute(0, 3, 1, 2)  # bs, n_heads, num_queries, H*W
+        B = B.contiguous()  # memeff attn likes ordered strides
+        if not torch.compiler.is_dynamo_compiling():
+            assert B.shape[2:] == (num_queries, H * W)
+        return B
+
+    def forward(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        reference_boxes: Optional[Tensor] = None,  # num_queries, bs, 4
+        # for memory
+        level_start_index: Optional[Tensor] = None,  # num_levels
+        spatial_shapes: Optional[Tensor] = None,  # bs, num_levels, 2
+        valid_ratios: Optional[Tensor] = None,
+        # for text
+        memory_text: Optional[Tensor] = None,
+        text_attention_mask: Optional[Tensor] = None,
+        # if `apply_dac` is None, it will default to `self.dac`
+        apply_dac: Optional[bool] = None,
+        is_instance_prompt=False,
+        decoder_extra_kwargs: Optional[Dict] = None,
+        # ROI memory bank
+        obj_roi_memory_feat=None,
+        obj_roi_memory_mask=None,
+        box_head_trk=None,
+    ):
+        """
+        Input:
+            - tgt: nq, bs, d_model
+            - memory: \\sum{hw}, bs, d_model
+            - pos: \\sum{hw}, bs, d_model
+            - reference_boxes: nq, bs, 4 (after sigmoid)
+            - valid_ratios/spatial_shapes: bs, nlevel, 2
+        """
+        if memory_mask is not None:
+            assert (
+                self.boxRPB == "none"
+            ), "inputting a memory_mask in the presence of boxRPB is unexpected/not implemented"
+
+        apply_dac = apply_dac if apply_dac is not None else self.dac
+        if apply_dac:
+            assert (tgt.shape[0] == self.num_queries) or (
+                self.use_instance_query
+                and (tgt.shape[0] == self.instance_query_embed.num_embeddings)
+            )
+
+            tgt = tgt.repeat(2, 1, 1)
+            # note that we don't tile tgt_mask, since DAC doesn't
+            # use self-attention in o2m queries
+            if reference_boxes is not None:
+                assert (reference_boxes.shape[0] == self.num_queries) or (
+                    self.use_instance_query
+                    and (
+                        reference_boxes.shape[0]
+                        == self.instance_query_embed.num_embeddings
+                    )
+                )
+                reference_boxes = reference_boxes.repeat(2, 1, 1)
+
+        bs = tgt.shape[1]
+        intermediate = []
+        intermediate_presence_logits = []
+        presence_feats = None
+
+        if self.box_refine:
+            if reference_boxes is None:
+                # In this case, we're in a one-stage model, so we generate the reference boxes
+                reference_boxes = self.reference_points.weight.unsqueeze(1)
+                reference_boxes = (
+                    reference_boxes.repeat(2, bs, 1)
+                    if apply_dac
+                    else reference_boxes.repeat(1, bs, 1)
+                )
+                reference_boxes = reference_boxes.sigmoid()
+            intermediate_ref_boxes = [reference_boxes]
+        else:
+            reference_boxes = None
+            intermediate_ref_boxes = None
+
+        output = tgt
+        presence_out = None
+        if self.presence_token is not None and is_instance_prompt is False:
+            # expand to batch dim
+            presence_out = self.presence_token.weight[None].expand(1, bs, -1)
+
+        box_head = self.bbox_embed
+        if is_instance_prompt and self.instance_bbox_embed is not None:
+            box_head = self.instance_bbox_embed
+
+        out_norm = self.norm
+        if is_instance_prompt and self.instance_norm is not None:
+            out_norm = self.instance_norm
+
+        for layer_idx, layer in enumerate(self.layers):
+            reference_points_input = (
+                reference_boxes[:, :, None]
+                * torch.cat([valid_ratios, valid_ratios], -1)[None, :]
+            )  # nq, bs, nlevel, 4
+
+            query_sine_embed = gen_sineembed_for_position(
+                reference_points_input[:, :, 0, :], self.d_model
+            )  # nq, bs, d_model*2
+
+            # conditional query
+            query_pos = self.ref_point_head(query_sine_embed)  # nq, bs, d_model
+
+            if self.boxRPB != "none" and reference_boxes is not None:
+                assert (
+                    spatial_shapes.shape[0] == 1
+                ), "only single scale support implemented"
+                memory_mask = self._get_rpb_matrix(
+                    reference_boxes,
+                    (spatial_shapes[0, 0], spatial_shapes[0, 1]),
+                )
+                memory_mask = memory_mask.flatten(0, 1)  # (bs*n_heads, nq, H*W)
+            if self.training:
+                assert (
+                    self.use_act_checkpoint
+                ), "Activation checkpointing not enabled in the decoder"
+            output, presence_out = activation_ckpt_wrapper(layer)(
+                tgt=output,
+                tgt_query_pos=query_pos,
+                tgt_query_sine_embed=query_sine_embed,
+                tgt_key_padding_mask=tgt_key_padding_mask,
+                tgt_reference_points=reference_points_input,
+                memory_text=memory_text,
+                text_attention_mask=text_attention_mask,
+                memory=memory,
+                memory_key_padding_mask=memory_key_padding_mask,
+                memory_level_start_index=level_start_index,
+                memory_spatial_shapes=spatial_shapes,
+                memory_pos=pos,
+                self_attn_mask=tgt_mask,
+                cross_attn_mask=memory_mask,
+                dac=apply_dac,
+                dac_use_selfatt_ln=self.dac_use_selfatt_ln,
+                presence_token=presence_out,
+                **(decoder_extra_kwargs or {}),
+                act_ckpt_enable=self.training and self.use_act_checkpoint,
+                # ROI memory bank
+                obj_roi_memory_feat=obj_roi_memory_feat,
+                obj_roi_memory_mask=obj_roi_memory_mask,
+            )
+
+            # iter update
+            if self.box_refine:
+                reference_before_sigmoid = inverse_sigmoid(reference_boxes)
+                if box_head_trk is None:
+                    # delta_unsig = self.bbox_embed(output)
+                    if not self.use_normed_output_consistently:
+                        delta_unsig = box_head(output)
+                    else:
+                        delta_unsig = box_head(out_norm(output))
+                else:
+                    # box_head_trk use a separate box head for tracking queries
+                    Q_det = decoder_extra_kwargs["Q_det"]
+                    assert output.size(0) >= Q_det
+                    delta_unsig_det = self.bbox_embed(output[:Q_det])
+                    delta_unsig_trk = box_head_trk(output[Q_det:])
+                    delta_unsig = torch.cat([delta_unsig_det, delta_unsig_trk], dim=0)
+                outputs_unsig = delta_unsig + reference_before_sigmoid
+                new_reference_points = outputs_unsig.sigmoid()
+
+                reference_boxes = new_reference_points.detach()
+                if layer_idx != self.num_layers - 1:
+                    intermediate_ref_boxes.append(new_reference_points)
+            else:
+                raise NotImplementedError("not implemented yet")
+
+            intermediate.append(out_norm(output))
+            if self.presence_token is not None and is_instance_prompt is False:
+                # norm, mlp head
+                intermediate_layer_presence_logits = self.presence_token_head(
+                    self.presence_token_out_norm(presence_out)
+                ).squeeze(-1)
+
+                # clamp to mitigate numerical issues
+                if self.clamp_presence_logits:
+                    intermediate_layer_presence_logits.clamp(
+                        min=-self.clamp_presence_logit_max_val,
+                        max=self.clamp_presence_logit_max_val,
+                    )
+
+                intermediate_presence_logits.append(intermediate_layer_presence_logits)
+                presence_feats = presence_out.clone()
+
+        if not self.compiled and self.compile_mode is not None:
+            self.forward = torch.compile(
+                self.forward, mode=self.compile_mode, fullgraph=True
+            )
+            self.compiled = True
+
+        return (
+            torch.stack(intermediate),
+            torch.stack(intermediate_ref_boxes),
+            (
+                torch.stack(intermediate_presence_logits)
+                if self.presence_token is not None and is_instance_prompt is False
+                else None
+            ),
+            presence_feats,
+        )
+
+
+class TransformerEncoderCrossAttention(nn.Module):
+    def __init__(
+        self,
+        d_model: int,
+        frozen: bool,
+        pos_enc_at_input: bool,
+        layer,
+        num_layers: int,
+        use_act_checkpoint: bool = False,
+        batch_first: bool = False,  # Do layers expect batch first input?
+        # which layers to exclude cross attention? default: None, means all
+        # layers use cross attention
+        remove_cross_attention_layers: Optional[list] = None,
+    ):
+        super().__init__()
+        self.d_model = d_model
+        self.layers = get_clones(layer, num_layers)
+        self.num_layers = num_layers
+        self.norm = nn.LayerNorm(d_model)
+        self.pos_enc_at_input = pos_enc_at_input
+        self.use_act_checkpoint = use_act_checkpoint
+
+        if frozen:
+            for p in self.parameters():
+                p.requires_grad_(False)
+
+        self.batch_first = batch_first
+
+        # remove cross attention layers if specified
+        self.remove_cross_attention_layers = [False] * self.num_layers
+        if remove_cross_attention_layers is not None:
+            for i in remove_cross_attention_layers:
+                self.remove_cross_attention_layers[i] = True
+        assert len(self.remove_cross_attention_layers) == len(self.layers)
+
+        for i, remove_cross_attention in enumerate(self.remove_cross_attention_layers):
+            if remove_cross_attention:
+                self.layers[i].cross_attn_image = None
+                self.layers[i].norm2 = None
+                self.layers[i].dropout2 = None
+
+    def forward(
+        self,
+        src,  # self-attention inputs
+        prompt,  # cross-attention inputs
+        src_mask: Optional[Tensor] = None,  # att.mask for self-attention inputs
+        prompt_mask: Optional[Tensor] = None,  # att.mask for cross-attention inputs
+        src_key_padding_mask: Optional[Tensor] = None,
+        prompt_key_padding_mask: Optional[Tensor] = None,
+        src_pos: Optional[Tensor] = None,  # pos_enc for self-attention inputs
+        prompt_pos: Optional[Tensor] = None,  # pos_enc for cross-attention inputs
+        feat_sizes: Optional[list] = None,
+        num_obj_ptr_tokens: int = 0,  # number of object pointer *tokens*
+    ):
+        if isinstance(src, list):
+            assert isinstance(src_key_padding_mask, list) and isinstance(src_pos, list)
+            assert len(src) == len(src_key_padding_mask) == len(src_pos) == 1
+            src, src_key_padding_mask, src_pos = (
+                src[0],
+                src_key_padding_mask[0],
+                src_pos[0],
+            )
+
+        assert (
+            src.shape[1] == prompt.shape[1]
+        ), "Batch size must be the same for src and prompt"
+
+        output = src
+
+        if self.pos_enc_at_input and src_pos is not None:
+            output = output + 0.1 * src_pos
+
+        if self.batch_first:
+            # Convert to batch first
+            output = output.transpose(0, 1)
+            src_pos = src_pos.transpose(0, 1)
+            prompt = prompt.transpose(0, 1)
+            prompt_pos = prompt_pos.transpose(0, 1)
+
+        for layer in self.layers:
+            kwds = {}
+            if isinstance(layer.cross_attn_image, RoPEAttention):
+                kwds = {"num_k_exclude_rope": num_obj_ptr_tokens}
+
+            output = activation_ckpt_wrapper(layer)(
+                tgt=output,
+                memory=prompt,
+                tgt_mask=src_mask,
+                memory_mask=prompt_mask,
+                tgt_key_padding_mask=src_key_padding_mask,
+                memory_key_padding_mask=prompt_key_padding_mask,
+                pos=prompt_pos,
+                query_pos=src_pos,
+                dac=False,
+                attn_bias=None,
+                act_ckpt_enable=self.training and self.use_act_checkpoint,
+                **kwds,
+            )
+            normed_output = self.norm(output)
+
+        if self.batch_first:
+            # Convert back to seq first
+            normed_output = normed_output.transpose(0, 1)
+            src_pos = src_pos.transpose(0, 1)
+
+        return {
+            "memory": normed_output,
+            "pos_embed": src_pos,
+            "padding_mask": src_key_padding_mask,
+        }
+
+
+class TransformerDecoderLayerv1(nn.Module):
+    def __init__(
+        self,
+        activation: str,
+        cross_attention: nn.Module,
+        d_model: int,
+        dim_feedforward: int,
+        dropout: float,
+        pos_enc_at_attn: bool,
+        pos_enc_at_cross_attn_keys: bool,
+        pos_enc_at_cross_attn_queries: bool,
+        pre_norm: bool,
+        self_attention: nn.Module,
+    ):
+        super().__init__()
+        self.d_model = d_model
+        self.dim_feedforward = dim_feedforward
+        self.dropout_value = dropout
+        self.self_attn = self_attention
+        self.cross_attn_image = cross_attention
+
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+
+        self.activation_str = activation
+        self.activation = get_activation_fn(activation)
+        self.pre_norm = pre_norm
+
+        self.pos_enc_at_attn = pos_enc_at_attn
+        self.pos_enc_at_cross_attn_queries = pos_enc_at_cross_attn_queries
+        self.pos_enc_at_cross_attn_keys = pos_enc_at_cross_attn_keys
+
+    def forward_post(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+        **kwargs,
+    ):
+        q = k = tgt + query_pos if self.pos_enc_at_attn else tgt
+
+        # Self attention
+        tgt2 = self.self_attn(
+            q,
+            k,
+            value=tgt,
+            attn_mask=tgt_mask,
+            key_padding_mask=tgt_key_padding_mask,
+        )[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt = self.norm1(tgt)
+
+        # Cross attention to image
+        tgt2 = self.cross_attn_image(
+            query=tgt + query_pos if self.pos_enc_at_cross_attn_queries else tgt,
+            key=memory + pos if self.pos_enc_at_cross_attn_keys else memory,
+            value=memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+        )[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+
+        # FFN
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout3(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+
+    def forward_pre(
+        self,
+        tgt,
+        memory,
+        dac: bool = False,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+        attn_bias: Optional[Tensor] = None,
+        **kwargs,
+    ):
+        if dac:
+            # we only apply self attention to the first half of the queries
+            assert tgt.shape[0] % 2 == 0
+            other_tgt = tgt[tgt.shape[0] // 2 :]
+            tgt = tgt[: tgt.shape[0] // 2]
+        tgt2 = self.norm1(tgt)
+        q = k = tgt2 + query_pos if self.pos_enc_at_attn else tgt2
+        tgt2 = self.self_attn(
+            q,
+            k,
+            value=tgt2,
+            attn_mask=tgt_mask,
+            key_padding_mask=tgt_key_padding_mask,
+        )[0]
+        tgt = tgt + self.dropout1(tgt2)
+        if dac:
+            # Recombine
+            tgt = torch.cat((tgt, other_tgt), dim=0)
+        tgt2 = self.norm2(tgt)
+        tgt2 = self.cross_attn_image(
+            query=tgt2 + query_pos if self.pos_enc_at_cross_attn_queries else tgt2,
+            key=memory + pos if self.pos_enc_at_cross_attn_keys else memory,
+            value=memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+            attn_bias=attn_bias,
+        )[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt2 = self.norm3(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout3(tgt2)
+        return tgt
+
+    def forward(
+        self,
+        tgt,
+        memory,
+        dac: bool = False,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+        attn_bias: Optional[Tensor] = None,
+        **kwds: Any,
+    ) -> torch.Tensor:
+        fwd_fn = self.forward_pre if self.pre_norm else self.forward_post
+        return fwd_fn(
+            tgt,
+            memory,
+            dac=dac,
+            tgt_mask=tgt_mask,
+            memory_mask=memory_mask,
+            tgt_key_padding_mask=tgt_key_padding_mask,
+            memory_key_padding_mask=memory_key_padding_mask,
+            pos=pos,
+            query_pos=query_pos,
+            attn_bias=attn_bias,
+            **kwds,
+        )
+
+
+class TransformerDecoderLayerv2(TransformerDecoderLayerv1):
+    def __init__(self, cross_attention_first=False, *args: Any, **kwds: Any):
+        super().__init__(*args, **kwds)
+        self.cross_attention_first = cross_attention_first
+
+    def _forward_sa(self, tgt, query_pos):
+        # Self-Attention
+        tgt2 = self.norm1(tgt)
+        q = k = tgt2 + query_pos if self.pos_enc_at_attn else tgt2
+        tgt2 = self.self_attn(q, k, v=tgt2)
+        tgt = tgt + self.dropout1(tgt2)
+        return tgt
+
+    def _forward_ca(self, tgt, memory, query_pos, pos, num_k_exclude_rope=0):
+        if self.cross_attn_image is None:
+            return tgt
+
+        kwds = {}
+        if num_k_exclude_rope > 0:
+            assert isinstance(self.cross_attn_image, RoPEAttention)
+            kwds = {"num_k_exclude_rope": num_k_exclude_rope}
+
+        # Cross-Attention
+        tgt2 = self.norm2(tgt)
+        tgt2 = self.cross_attn_image(
+            q=tgt2 + query_pos if self.pos_enc_at_cross_attn_queries else tgt2,
+            k=memory + pos if self.pos_enc_at_cross_attn_keys else memory,
+            v=memory,
+            **kwds,
+        )
+        tgt = tgt + self.dropout2(tgt2)
+        return tgt
+
+    def forward_pre(
+        self,
+        tgt,
+        memory,
+        dac: bool,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+        attn_bias: Optional[Tensor] = None,
+        num_k_exclude_rope: int = 0,
+    ):
+        assert dac is False
+        assert tgt_mask is None
+        assert memory_mask is None
+        assert tgt_key_padding_mask is None
+        assert memory_key_padding_mask is None
+        assert attn_bias is None
+
+        if self.cross_attention_first:
+            tgt = self._forward_ca(tgt, memory, query_pos, pos, num_k_exclude_rope)
+            tgt = self._forward_sa(tgt, query_pos)
+        else:
+            tgt = self._forward_sa(tgt, query_pos)
+            tgt = self._forward_ca(tgt, memory, query_pos, pos, num_k_exclude_rope)
+
+        # MLP
+        tgt2 = self.norm3(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout3(tgt2)
+        return tgt
+
+    def forward(self, *args: Any, **kwds: Any) -> torch.Tensor:
+        if self.pre_norm:
+            return self.forward_pre(*args, **kwds)
+        raise NotImplementedError
diff --git a/sam3/model/edt.py b/sam3/model/edt.py
new file mode 100644
index 0000000000000000000000000000000000000000..9448c1d3b0ee26d05f203dd6050abfa62e9a0846
--- /dev/null
+++ b/sam3/model/edt.py
@@ -0,0 +1,173 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""Triton kernel for euclidean distance transform (EDT)"""
+
+import torch
+import triton
+import triton.language as tl
+
+"""
+Disclaimer: This implementation is not meant to be extremely efficient. A CUDA kernel would likely be more efficient.
+Even in Triton, there may be more suitable algorithms.
+
+The goal of this kernel is to mimic cv2.distanceTransform(input, cv2.DIST_L2, 0).
+Recall that the euclidean distance transform (EDT) calculates the L2 distance to the closest zero pixel for each pixel of the source image.
+
+For images of size NxN, the naive algorithm would be to compute pairwise distances between every pair of points, leading to a O(N^4) algorithm, which is obviously impractical.
+One can do better using the following approach:
+- First, compute the distance to the closest point in the same row. We can write it as Row_EDT[i,j] = min_k (sqrt((k-j)^2) if input[i,k]==0 else +infinity). With a naive implementation, this step has a O(N^3) complexity
+- Then, because of triangular inequality, we notice that the EDT for a given location [i,j] is the min of the row EDTs in the same column. EDT[i,j] = min_k Row_EDT[k, j]. This is also O(N^3)
+
+Overall, this algorithm is quite amenable to parallelization, and has a complexity O(N^3). Can we do better?
+
+It turns out that we can leverage the structure of the L2 distance (nice and convex) to find the minimum in a more efficient way.
+We follow the algorithm from "Distance Transforms of Sampled Functions" (https://cs.brown.edu/people/pfelzens/papers/dt-final.pdf), which is also what's implemented in opencv
+
+For a single dimension EDT, we can compute the EDT of an arbitrary function F, that we discretize over the grid. Note that for the binary EDT that we're interested in, we can set F(i,j) = 0 if input[i,j]==0 else +infinity
+For now, we'll compute the EDT squared, and will take the sqrt only at the very end.
+The basic idea is that each point at location i spawns a parabola around itself, with a bias equal to F(i). So specifically, we're looking at the parabola (x - i)^2 + F(i)
+When we're looking for the row EDT at location j, we're effectively looking for min_i (x-i)^2 + F(i). In other word we want to find the lowest parabola at location j.
+
+To do this efficiently, we need to maintain the lower envelope of the union of parabolas. This can be constructed on the fly using a sort of stack approach:
+ - every time we want to add a new parabola, we check if it may be covering the current right-most parabola. If so, then that parabola was useless, so we can pop it from the stack
+ - repeat until we can't find any more parabola to pop. Then push the new one.
+
+This algorithm runs in O(N) for a single row, so overall O(N^2) when applied to all rows
+Similarly as before, we notice that we can decompose the algorithm for rows and columns, leading to an overall run-time of O(N^2)
+
+This algorithm is less suited for to GPUs, since the one-dimensional EDT computation is quite sequential in nature. However, we can parallelize over batch and row dimensions.
+In Triton, things are particularly bad at the moment, since there is no support for reading/writing to the local memory at a specific index (a local gather is coming soon, see https://github.com/triton-lang/triton/issues/974, but no mention of writing, ie scatter)
+One could emulate these operations with masking, but in initial tests, it proved to be worst than naively reading and writing to the global memory. My guess is that the cache is compensating somewhat for the repeated single-point accesses.
+
+
+The timing obtained on a H100 for a random batch of masks of dimension 256 x 1024 x 1024 are as follows:
+- OpenCV: 1780ms (including round-trip to cpu, but discounting the fact that it introduces a synchronization point)
+- triton, O(N^3) algo: 627ms
+- triton, O(N^2) algo: 322ms
+
+Overall, despite being quite naive, this implementation is roughly 5.5x faster than the openCV cpu implem
+
+"""
+
+
+@triton.jit
+def edt_kernel(inputs_ptr, outputs_ptr, v, z, height, width, horizontal: tl.constexpr):
+    # This is a somewhat verbatim implementation of the efficient 1D EDT algorithm described above
+    # It can be applied horizontally or vertically depending if we're doing the first or second stage.
+    # It's parallelized across batch+row (or batch+col if horizontal=False)
+    # TODO: perhaps the implementation can be revisited if/when local gather/scatter become available in triton
+    batch_id = tl.program_id(axis=0)
+    if horizontal:
+        row_id = tl.program_id(axis=1)
+        block_start = (batch_id * height * width) + row_id * width
+        length = width
+        stride = 1
+    else:
+        col_id = tl.program_id(axis=1)
+        block_start = (batch_id * height * width) + col_id
+        length = height
+        stride = width
+
+    # This will be the index of the right most parabola in the envelope ("the top of the stack")
+    k = 0
+    for q in range(1, length):
+        # Read the function value at the current location. Note that we're doing a singular read, not very efficient
+        cur_input = tl.load(inputs_ptr + block_start + (q * stride))
+        # location of the parabola on top of the stack
+        r = tl.load(v + block_start + (k * stride))
+        # associated boundary
+        z_k = tl.load(z + block_start + (k * stride))
+        # value of the function at the parabola location
+        previous_input = tl.load(inputs_ptr + block_start + (r * stride))
+        # intersection between the two parabolas
+        s = (cur_input - previous_input + q * q - r * r) / (q - r) / 2
+
+        # we'll pop as many parabolas as required
+        while s <= z_k and k - 1 >= 0:
+            k = k - 1
+            r = tl.load(v + block_start + (k * stride))
+            z_k = tl.load(z + block_start + (k * stride))
+            previous_input = tl.load(inputs_ptr + block_start + (r * stride))
+            s = (cur_input - previous_input + q * q - r * r) / (q - r) / 2
+
+        # Store the new one
+        k = k + 1
+        tl.store(v + block_start + (k * stride), q)
+        tl.store(z + block_start + (k * stride), s)
+        if k + 1 < length:
+            tl.store(z + block_start + ((k + 1) * stride), 1e9)
+
+    # Last step, we read the envelope to find the min in every location
+    k = 0
+    for q in range(length):
+        while (
+            k + 1 < length
+            and tl.load(
+                z + block_start + ((k + 1) * stride), mask=(k + 1) < length, other=q
+            )
+            < q
+        ):
+            k += 1
+        r = tl.load(v + block_start + (k * stride))
+        d = q - r
+        old_value = tl.load(inputs_ptr + block_start + (r * stride))
+        tl.store(outputs_ptr + block_start + (q * stride), old_value + d * d)
+
+
+def edt_triton(data: torch.Tensor):
+    """
+    Computes the Euclidean Distance Transform (EDT) of a batch of binary images.
+
+    Args:
+        data: A tensor of shape (B, H, W) representing a batch of binary images.
+
+    Returns:
+        A tensor of the same shape as data containing the EDT.
+        It should be equivalent to a batched version of cv2.distanceTransform(input, cv2.DIST_L2, 0)
+    """
+    assert data.dim() == 3
+    assert data.is_cuda
+    B, H, W = data.shape
+    data = data.contiguous()
+
+    # Allocate the "function" tensor. Implicitly the function is 0 if data[i,j]==0 else +infinity
+    output = torch.where(data, 1e18, 0.0)
+    assert output.is_contiguous()
+
+    # Scratch tensors for the parabola stacks
+    parabola_loc = torch.zeros(B, H, W, dtype=torch.uint32, device=data.device)
+    parabola_inter = torch.empty(B, H, W, dtype=torch.float, device=data.device)
+    parabola_inter[:, :, 0] = -1e18
+    parabola_inter[:, :, 1] = 1e18
+
+    # Grid size (number of blocks)
+    grid = (B, H)
+
+    # Launch initialization kernel
+    edt_kernel[grid](
+        output.clone(),
+        output,
+        parabola_loc,
+        parabola_inter,
+        H,
+        W,
+        horizontal=True,
+    )
+
+    # reset the parabola stacks
+    parabola_loc.zero_()
+    parabola_inter[:, :, 0] = -1e18
+    parabola_inter[:, :, 1] = 1e18
+
+    grid = (B, W)
+    edt_kernel[grid](
+        output.clone(),
+        output,
+        parabola_loc,
+        parabola_inter,
+        H,
+        W,
+        horizontal=False,
+    )
+    # don't forget to take sqrt at the end
+    return output.sqrt()
diff --git a/sam3/model/encoder.py b/sam3/model/encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..842bc56c59e59406a3c6f50096d542ccc2db033d
--- /dev/null
+++ b/sam3/model/encoder.py
@@ -0,0 +1,594 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+# Based on https://github.com/IDEA-Research/GroundingDINO
+
+from typing import Any, Dict, List, Optional, Tuple
+
+import torch
+from torch import nn, Tensor
+
+from .act_ckpt_utils import activation_ckpt_wrapper
+from .model_misc import get_activation_fn, get_clones, get_valid_ratio
+
+
+class TransformerEncoderLayer(nn.Module):
+    """
+    Transformer encoder layer that performs self-attention followed by cross-attention.
+
+    This layer was previously called TransformerDecoderLayer but was renamed to better
+    reflect its role in the architecture. It processes input sequences through self-attention
+    and then cross-attention with another input (typically image features).
+
+    The layer supports both pre-norm and post-norm configurations, as well as
+    positional encoding at different stages of the attention mechanism.
+    """
+
+    def __init__(
+        self,
+        activation: str,
+        cross_attention: nn.Module,
+        d_model: int,
+        dim_feedforward: int,
+        dropout: float,
+        pos_enc_at_attn: bool,
+        pos_enc_at_cross_attn_keys: bool,
+        pos_enc_at_cross_attn_queries: bool,
+        pre_norm: bool,
+        self_attention: nn.Module,
+    ):
+        """
+        Initialize a transformer encoder layer.
+
+        Args:
+            activation: Activation function to use in the feedforward network
+            cross_attention: Cross-attention module for attending to image features
+            d_model: Model dimension/hidden size
+            dim_feedforward: Dimension of the feedforward network
+            dropout: Dropout probability
+            pos_enc_at_attn: Whether to add positional encodings at self-attention
+            pos_enc_at_cross_attn_keys: Whether to add positional encodings to keys in cross-attention
+            pos_enc_at_cross_attn_queries: Whether to add positional encodings to queries in cross-attention
+            pre_norm: Whether to use pre-norm (True) or post-norm (False) architecture
+            self_attention: Self-attention module
+        """
+        super().__init__()
+        self.d_model = d_model
+        self.dim_feedforward = dim_feedforward
+        self.dropout_value = dropout
+        self.self_attn = self_attention
+        self.cross_attn_image = cross_attention
+
+        # Implementation of Feedforward model
+        self.linear1 = nn.Linear(d_model, dim_feedforward)
+        self.dropout = nn.Dropout(dropout)
+        self.linear2 = nn.Linear(dim_feedforward, d_model)
+
+        self.norm1 = nn.LayerNorm(d_model)
+        self.norm2 = nn.LayerNorm(d_model)
+        self.norm3 = nn.LayerNorm(d_model)
+        self.dropout1 = nn.Dropout(dropout)
+        self.dropout2 = nn.Dropout(dropout)
+        self.dropout3 = nn.Dropout(dropout)
+
+        self.activation_str = activation
+        self.activation = get_activation_fn(activation)
+        self.pre_norm = pre_norm
+
+        self.pos_enc_at_attn = pos_enc_at_attn
+        self.pos_enc_at_cross_attn_queries = pos_enc_at_cross_attn_queries
+        self.pos_enc_at_cross_attn_keys = pos_enc_at_cross_attn_keys
+
+        self.layer_idx = None
+
+    def forward_post(
+        self,
+        tgt: Tensor,
+        memory: Tensor,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+        **kwargs,
+    ) -> Tensor:
+        """
+        Forward pass for post-norm architecture.
+
+        In post-norm architecture, normalization is applied after attention and feedforward operations.
+
+        Args:
+            tgt: Input tensor to be processed
+            memory: Memory tensor for cross-attention
+            tgt_mask: Mask for self-attention
+            memory_mask: Mask for cross-attention
+            tgt_key_padding_mask: Key padding mask for self-attention
+            memory_key_padding_mask: Key padding mask for cross-attention
+            pos: Positional encoding for memory
+            query_pos: Positional encoding for query
+            **kwargs: Additional keyword arguments
+
+        Returns:
+            Processed tensor
+        """
+        q = k = tgt + query_pos if self.pos_enc_at_attn else tgt
+
+        # Self attention
+        tgt2 = self.self_attn(
+            q, k, value=tgt, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
+        tgt = tgt + self.dropout1(tgt2)
+        tgt = self.norm1(tgt)
+
+        # Cross attention to image
+        tgt2 = self.cross_attn_image(
+            query=tgt + query_pos if self.pos_enc_at_cross_attn_queries else tgt,
+            key=memory + pos if self.pos_enc_at_cross_attn_keys else memory,
+            value=memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+        )[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt = self.norm2(tgt)
+
+        # FFN
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
+        tgt = tgt + self.dropout3(tgt2)
+        tgt = self.norm3(tgt)
+        return tgt
+
+    def forward_pre(
+        self,
+        tgt: Tensor,
+        memory: Tensor,
+        dac: bool = False,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+        # attn_bias: Optional[Tensor] = None,
+        # **kwargs,
+    ) -> Tensor:
+        """
+        Forward pass for pre-norm architecture.
+
+        In pre-norm architecture, normalization is applied before attention and feedforward operations.
+
+        Args:
+            tgt: Input tensor to be processed
+            memory: Memory tensor for cross-attention
+            dac: Whether to use Divide-and-Conquer attention
+            tgt_mask: Mask for self-attention
+            memory_mask: Mask for cross-attention
+            tgt_key_padding_mask: Key padding mask for self-attention
+            memory_key_padding_mask: Key padding mask for cross-attention
+            pos: Positional encoding for memory
+            query_pos: Positional encoding for query
+            attn_bias: Optional attention bias tensor
+            **kwargs: Additional keyword arguments
+
+        Returns:
+            Processed tensor
+        """
+        if dac:
+            # we only apply self attention to the first half of the queries
+            assert tgt.shape[0] % 2 == 0
+            other_tgt = tgt[tgt.shape[0] // 2 :]
+            tgt = tgt[: tgt.shape[0] // 2]
+        tgt2 = self.norm1(tgt)
+        q = k = tgt2 + query_pos if self.pos_enc_at_attn else tgt2
+        tgt2 = self.self_attn(
+            q, k, value=tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
+        tgt = tgt + self.dropout1(tgt2)
+        if dac:
+            # Recombine
+            tgt = torch.cat((tgt, other_tgt), dim=0)
+        tgt2 = self.norm2(tgt)
+        tgt2 = self.cross_attn_image(
+            query=tgt2 + query_pos if self.pos_enc_at_cross_attn_queries else tgt2,
+            key=memory + pos if self.pos_enc_at_cross_attn_keys else memory,
+            value=memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+            # attn_bias=attn_bias,
+        )[0]
+        tgt = tgt + self.dropout2(tgt2)
+        tgt2 = self.norm3(tgt)
+        tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt2))))
+        tgt = tgt + self.dropout3(tgt2)
+        return tgt
+
+    def forward(
+        self,
+        tgt: Tensor,
+        memory: Tensor,
+        dac: bool = False,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+        # attn_bias: Optional[Tensor] = None,
+        # **kwds: Any,
+    ) -> torch.Tensor:
+        """
+        Forward pass for the transformer encoder layer.
+
+        Args:
+            tgt: Input tensor to be processed
+            memory: Memory tensor (e.g., image features) for cross-attention
+            dac: Whether to use Divide-and-Conquer attention (only apply self-attention to first half)
+            tgt_mask: Mask for self-attention
+            memory_mask: Mask for cross-attention
+            tgt_key_padding_mask: Key padding mask for self-attention
+            memory_key_padding_mask: Key padding mask for cross-attention
+            pos: Positional encoding for memory
+            query_pos: Positional encoding for query
+            attn_bias: Optional attention bias tensor
+            **kwds: Additional keyword arguments
+
+        Returns:
+            Processed tensor after self-attention, cross-attention, and feedforward network
+        """
+        fwd_fn = self.forward_pre if self.pre_norm else self.forward_post
+        return fwd_fn(
+            tgt,
+            memory,
+            dac=dac,
+            tgt_mask=tgt_mask,
+            memory_mask=memory_mask,
+            tgt_key_padding_mask=tgt_key_padding_mask,
+            memory_key_padding_mask=memory_key_padding_mask,
+            pos=pos,
+            query_pos=query_pos,
+            # attn_bias=attn_bias,
+            # **kwds,
+        )
+
+
+class TransformerEncoder(nn.Module):
+    """
+    Transformer encoder that processes multi-level features.
+
+    This encoder takes multi-level features (e.g., from a backbone network) and processes
+    them through a stack of transformer encoder layers. It supports features from multiple
+    levels (e.g., different resolutions) and can apply activation checkpointing for memory
+    efficiency during training.
+
+    Args:
+        layer: The encoder layer to be stacked multiple times
+        num_layers: Number of encoder layers to stack
+        d_model: Model dimension/hidden size
+        num_feature_levels: Number of feature levels to process
+        frozen: Whether to freeze the parameters of this module
+        use_act_checkpoint: Whether to use activation checkpointing during training
+    """
+
+    def __init__(
+        self,
+        layer: nn.Module,
+        num_layers: int,
+        d_model: int,
+        num_feature_levels: int,
+        frozen: bool = False,
+        use_act_checkpoint: bool = False,
+    ):
+        super().__init__()
+        self.layers = get_clones(layer, num_layers)
+        self.num_layers = num_layers
+
+        self.num_feature_levels = num_feature_levels
+        self.level_embed = None
+        if num_feature_levels > 1:
+            self.level_embed = nn.Parameter(torch.Tensor(num_feature_levels, d_model))
+
+        if frozen:
+            for p in self.parameters():
+                p.requires_grad_(False)
+
+        self.use_act_checkpoint = use_act_checkpoint
+
+        # assign layer index to each layer so that some layers can decide what to do
+        # based on which layer index they are (e.g. cross attention to memory bank only
+        # in selected layers)
+        for layer_idx, layer in enumerate(self.layers):
+            layer.layer_idx = layer_idx
+
+    @staticmethod
+    def get_reference_points(spatial_shapes, valid_ratios, device):
+        with torch.no_grad():
+            reference_points_list = []
+            for lvl, (H_, W_) in enumerate(spatial_shapes):
+                ref_y, ref_x = torch.meshgrid(
+                    torch.linspace(
+                        0.5, H_ - 0.5, H_, dtype=torch.float32, device=device
+                    ),
+                    torch.linspace(
+                        0.5, W_ - 0.5, W_, dtype=torch.float32, device=device
+                    ),
+                )
+                ref_y = ref_y.reshape(-1)[None] / (valid_ratios[:, None, lvl, 1] * H_)
+                ref_x = ref_x.reshape(-1)[None] / (valid_ratios[:, None, lvl, 0] * W_)
+                ref = torch.stack((ref_x, ref_y), -1)
+                reference_points_list.append(ref)
+            reference_points = torch.cat(reference_points_list, 1)
+            reference_points = reference_points[:, :, None] * valid_ratios[:, None]
+
+        return reference_points
+
+    def _prepare_multilevel_features(self, srcs, masks, pos_embeds):
+        assert (
+            len(srcs) == self.num_feature_levels
+        ), "mismatch between expected and received # of feature levels"
+
+        src_flatten = []
+        mask_flatten = []
+        lvl_pos_embed_flatten = []
+        spatial_shapes = []
+        has_mask = masks is not None and masks[0] is not None
+        for lvl, (src, mask, pos_embed) in enumerate(zip(srcs, masks, pos_embeds)):
+            bs, c, h, w = src.shape
+            spatial_shape = (h, w)
+            spatial_shapes.append(spatial_shape)
+
+            src = src.flatten(2).transpose(1, 2)  # bs, hw, c
+            if has_mask:
+                mask = mask.flatten(1)
+            pos_embed = pos_embed.flatten(2).transpose(1, 2)  # bs, hw, c
+            if self.level_embed is not None:
+                lvl_pos_embed = pos_embed + self.level_embed[lvl].view(1, 1, -1)
+            else:
+                lvl_pos_embed = pos_embed
+            lvl_pos_embed_flatten.append(lvl_pos_embed)
+            src_flatten.append(src)
+            if has_mask:
+                mask_flatten.append(mask)
+        src_flatten = torch.cat(src_flatten, 1)  # bs, \sum{hxw}, c
+        mask_flatten = torch.cat(mask_flatten, 1) if has_mask else None  # bs, \sum{hxw}
+        lvl_pos_embed_flatten = torch.cat(lvl_pos_embed_flatten, 1)  # bs, \sum{hxw}, c
+        spatial_shapes = torch.tensor(
+            spatial_shapes, dtype=torch.long, device=src_flatten.device
+        )
+        level_start_index = torch.cat(
+            (
+                spatial_shapes.new_zeros((1,)),
+                spatial_shapes.prod(1).cumsum(0)[:-1],
+            )
+        )
+        if has_mask:
+            valid_ratios = torch.stack([get_valid_ratio(m) for m in masks], 1)
+        else:
+            valid_ratios = torch.ones(
+                (src_flatten.shape[0], self.num_feature_levels, 2),
+                device=src_flatten.device,
+            )
+
+        return (
+            src_flatten,
+            mask_flatten,
+            lvl_pos_embed_flatten,
+            level_start_index,
+            valid_ratios,
+            spatial_shapes,
+        )
+
+    def forward(
+        self,
+        src: List[Tensor],
+        src_key_padding_masks: Optional[List[Tensor]] = None,
+        pos: Optional[List[Tensor]] = None,
+        prompt: Optional[Tensor] = None,
+        prompt_key_padding_mask: Optional[Tensor] = None,
+        encoder_extra_kwargs: Optional[Dict] = None,
+    ) -> Tuple[Tensor, Optional[Tensor], Tensor, Tensor, Tensor, Tensor]:
+        """
+        Process multi-level features through the transformer encoder.
+
+        Args:
+            src: List of multi-level features, each with shape (batch_size, channels, height, width)
+            src_key_padding_masks: List of padding masks for each feature level, each with shape (batch_size, height, width)
+            pos: List of positional embeddings for each feature level, each with shape (batch_size, channels, height, width)
+            prompt: Optional text/prompt features to attend to, with shape (seq_len, batch_size, d_model)
+            prompt_key_padding_mask: Optional padding mask for prompt, with shape (batch_size, seq_len)
+            encoder_extra_kwargs: Optional additional arguments to pass to each encoder layer
+
+        Returns:
+            A tuple containing:
+            - output: Processed features with shape (seq_len, batch_size, d_model)
+            - key_padding_masks_flatten: Flattened padding masks
+            - lvl_pos_embed_flatten: Flattened positional embeddings
+            - level_start_index: Starting indices for each feature level
+            - spatial_shapes: Spatial dimensions of each feature level
+            - valid_ratios: Valid ratios for each feature level
+        """
+        assert (
+            len(src) == self.num_feature_levels
+        ), "must be equal to num_feature_levels"
+        if src_key_padding_masks is not None:
+            assert len(src_key_padding_masks) == self.num_feature_levels
+        if pos is not None:
+            assert len(pos) == self.num_feature_levels
+        # Flatten multilevel feats and add level pos embeds
+        (
+            src_flatten,
+            key_padding_masks_flatten,
+            lvl_pos_embed_flatten,
+            level_start_index,
+            valid_ratios,
+            spatial_shapes,
+        ) = self._prepare_multilevel_features(src, src_key_padding_masks, pos)
+
+        reference_points = self.get_reference_points(
+            spatial_shapes, valid_ratios, device=src_flatten.device
+        )
+
+        output = src_flatten
+        for layer in self.layers:
+            layer_kwargs = {}
+
+            assert isinstance(layer, TransformerEncoderLayer)
+            layer_kwargs["memory"] = prompt
+            layer_kwargs["memory_key_padding_mask"] = prompt_key_padding_mask
+            layer_kwargs["query_pos"] = lvl_pos_embed_flatten
+            layer_kwargs["tgt"] = output
+            layer_kwargs["tgt_key_padding_mask"] = key_padding_masks_flatten
+
+            if self.training:
+                assert self.use_act_checkpoint, "activation ckpt not enabled in encoder"
+            if encoder_extra_kwargs is not None:
+                layer_kwargs.update(encoder_extra_kwargs)
+            output = activation_ckpt_wrapper(layer)(
+                **layer_kwargs,
+                act_ckpt_enable=self.training and self.use_act_checkpoint,
+            )
+        # return as seq first
+        return (
+            output.transpose(0, 1),
+            (
+                key_padding_masks_flatten.transpose(0, 1)
+                if key_padding_masks_flatten is not None
+                else None
+            ),
+            lvl_pos_embed_flatten.transpose(0, 1),
+            level_start_index,
+            spatial_shapes,
+            valid_ratios,
+        )
+
+
+class TransformerEncoderFusion(TransformerEncoder):
+    """
+    Transformer encoder that fuses text and image features.
+
+    This encoder extends TransformerEncoder to handle both text and image features,
+    with the ability to add pooled text features to image features for better
+    cross-modal fusion. It supports torch.compile for performance optimization.
+
+    Args:
+        layer: The encoder layer to be stacked multiple times
+        num_layers: Number of encoder layers to stack
+        d_model: Model dimension/hidden size
+        num_feature_levels: Number of feature levels to process
+        add_pooled_text_to_img_feat: Whether to add pooled text features to image features
+        pool_text_with_mask: Whether to use the mask when pooling text features
+        compile_mode: Mode for torch.compile, or None to disable compilation
+        **kwargs: Additional arguments to pass to the parent class
+    """
+
+    def __init__(
+        self,
+        layer: nn.Module,
+        num_layers: int,
+        d_model: int,
+        num_feature_levels: int,
+        add_pooled_text_to_img_feat: bool = True,
+        pool_text_with_mask: bool = False,
+        compile_mode: Optional[str] = None,
+        **kwargs,
+    ):
+        super().__init__(
+            layer,
+            num_layers,
+            d_model,
+            num_feature_levels,
+            **kwargs,
+        )
+        self.add_pooled_text_to_img_feat = add_pooled_text_to_img_feat
+        if self.add_pooled_text_to_img_feat:
+            self.text_pooling_proj = nn.Linear(d_model, d_model)
+        self.pool_text_with_mask = pool_text_with_mask
+        if compile_mode is not None:
+            self.forward = torch.compile(
+                self.forward, mode=compile_mode, fullgraph=True
+            )
+
+    @staticmethod
+    def get_reference_points(spatial_shapes, valid_ratios, device):
+        # Not needed here
+        return None
+
+    def forward(
+        self,
+        src: List[Tensor],
+        prompt: Tensor,
+        src_key_padding_mask: Optional[List[Tensor]] = None,
+        src_pos: Optional[List[Tensor]] = None,
+        prompt_key_padding_mask: Optional[Tensor] = None,
+        prompt_pos: Optional[Tensor] = None,
+        feat_sizes: Optional[List[int]] = None,
+        encoder_extra_kwargs: Optional[Dict] = None,
+    ):
+        # Restore spatial shapes of vision
+        bs = src[0].shape[1]  # seq first
+        if feat_sizes is not None:
+            assert len(feat_sizes) == len(src)
+            if src_key_padding_mask is None:
+                src_key_padding_mask = [None] * len(src)
+            for i, (h, w) in enumerate(feat_sizes):
+                src[i] = src[i].reshape(h, w, bs, -1).permute(2, 3, 0, 1)
+                src_pos[i] = src_pos[i].reshape(h, w, bs, -1).permute(2, 3, 0, 1)
+                src_key_padding_mask[i] = (
+                    src_key_padding_mask[i].reshape(h, w, bs).permute(2, 0, 1)
+                    if src_key_padding_mask[i] is not None
+                    else None
+                )
+        else:
+            assert all(
+                x.dim == 4 for x in src
+            ), "expected list of (bs, c, h, w) tensors"
+
+        if self.add_pooled_text_to_img_feat:
+            # Fusion: Add mean pooled text to image features
+            pooled_text = pool_text_feat(
+                prompt, prompt_key_padding_mask, self.pool_text_with_mask
+            )
+            pooled_text = self.text_pooling_proj(pooled_text)[
+                ..., None, None
+            ]  # prompt is seq first
+            src = [x.add_(pooled_text) for x in src]
+
+        (
+            out,
+            key_padding_masks_flatten,
+            lvl_pos_embed_flatten,
+            level_start_index,
+            spatial_shapes,
+            valid_ratios,
+        ) = super().forward(
+            src,
+            src_key_padding_masks=src_key_padding_mask,
+            pos=src_pos,
+            prompt=prompt.transpose(0, 1),
+            prompt_key_padding_mask=prompt_key_padding_mask,
+            encoder_extra_kwargs=encoder_extra_kwargs,
+        )
+
+        return {
+            "memory": out,
+            "padding_mask": key_padding_masks_flatten,
+            "pos_embed": lvl_pos_embed_flatten,
+            "memory_text": prompt,
+            "level_start_index": level_start_index,
+            "spatial_shapes": spatial_shapes,
+            "valid_ratios": valid_ratios,
+        }
+
+
+def pool_text_feat(prompt, prompt_mask, pool_with_mask):
+    # prompt has shape (seq, bs, dim)
+    if not pool_with_mask:
+        return prompt.mean(dim=0)
+
+    # prompt_mask has shape (bs, seq), where False is valid and True is padding
+    assert prompt_mask.dim() == 2
+    # is_valid has shape (seq, bs, 1), where 1 is valid and 0 is padding
+    is_valid = (~prompt_mask).float().permute(1, 0)[..., None]
+    # num_valid has shape (bs, 1)
+    num_valid = torch.clamp(torch.sum(is_valid, dim=0), min=1.0)
+
+    # mean pool over all the valid tokens
+    pooled_text = (prompt * is_valid).sum(dim=0) / num_valid
+    return pooled_text
diff --git a/sam3/model/geometry_encoders.py b/sam3/model/geometry_encoders.py
new file mode 100644
index 0000000000000000000000000000000000000000..bff29172b96d0ab5d4b2fc795da1d282d1c5f541
--- /dev/null
+++ b/sam3/model/geometry_encoders.py
@@ -0,0 +1,850 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from typing import Tuple
+
+import torch
+import torch.nn as nn
+import torchvision
+from typing_extensions import override
+
+from .act_ckpt_utils import activation_ckpt_wrapper
+from .box_ops import box_cxcywh_to_xyxy
+
+from .model_misc import get_clones
+
+
+def is_right_padded(mask):
+    """Given a padding mask (following pytorch convention, 1s for padded values),
+    returns whether the padding is on the right or not."""
+    return (mask.long() == torch.sort(mask.long(), dim=-1)[0]).all()
+
+
+def concat_padded_sequences(seq1, mask1, seq2, mask2, return_index: bool = False):
+    """
+    Concatenates two right-padded sequences, such that the resulting sequence
+    is contiguous and also right-padded.
+
+    Following pytorch's convention, tensors are sequence first, and the mask are
+    batch first, with 1s for padded values.
+
+    :param seq1: A tensor of shape (seq1_length, batch_size, hidden_size).
+    :param mask1: A tensor of shape (batch_size, seq1_length).
+    :param seq2: A tensor of shape (seq2_length, batch_size,  hidden_size).
+    :param mask2: A tensor of shape (batch_size, seq2_length).
+    :param return_index: If True, also returns the index of the ids of the element of seq2
+        in the concatenated sequence. This can be used to retrieve the elements of seq2
+    :return: A tuple (concatenated_sequence, concatenated_mask) if return_index is False,
+        otherwise (concatenated_sequence, concatenated_mask, index).
+    """
+    seq1_length, batch_size, hidden_size = seq1.shape
+    seq2_length, batch_size, hidden_size = seq2.shape
+
+    assert batch_size == seq1.size(1) == seq2.size(1) == mask1.size(0) == mask2.size(0)
+    assert hidden_size == seq1.size(2) == seq2.size(2)
+    assert seq1_length == mask1.size(1)
+    assert seq2_length == mask2.size(1)
+
+    torch._assert_async(is_right_padded(mask1))
+    torch._assert_async(is_right_padded(mask2))
+
+    actual_seq1_lengths = (~mask1).sum(dim=-1)
+    actual_seq2_lengths = (~mask2).sum(dim=-1)
+
+    final_lengths = actual_seq1_lengths + actual_seq2_lengths
+    max_length = seq1_length + seq2_length
+    concatenated_mask = (
+        torch.arange(max_length, device=seq2.device)[None].repeat(batch_size, 1)
+        >= final_lengths[:, None]
+    )
+
+    # (max_len, batch_size, hidden_size)
+    concatenated_sequence = torch.zeros(
+        (max_length, batch_size, hidden_size), device=seq2.device, dtype=seq2.dtype
+    )
+    concatenated_sequence[:seq1_length, :, :] = seq1
+
+    # At this point, the element of seq1 are in the right place
+    # We just need to shift the elements of seq2
+
+    index = torch.arange(seq2_length, device=seq2.device)[:, None].repeat(1, batch_size)
+    index = index + actual_seq1_lengths[None]
+
+    concatenated_sequence = concatenated_sequence.scatter(
+        0, index[:, :, None].expand(-1, -1, hidden_size), seq2
+    )
+
+    if return_index:
+        return concatenated_sequence, concatenated_mask, index
+
+    return concatenated_sequence, concatenated_mask
+
+
+class Prompt:
+    """Utility class to manipulate geometric prompts.
+
+    We expect the sequences in pytorch convention, that is sequence first, batch second
+    The dimensions are expected as follows:
+    box_embeddings shape: N_boxes x B x C_box
+    box_mask shape: B x N_boxes. Can be None if nothing is masked out
+    point_embeddings shape: N_points x B x C_point
+    point_mask shape: B x N_points. Can be None if nothing is masked out
+    mask_embeddings shape: N_masks x B x 1 x H_mask x W_mask
+    mask_mask shape: B x N_masks. Can be None if nothing is masked out
+
+    We also store positive/negative labels. These tensors are also stored batch-first
+    If they are None, we'll assume positive labels everywhere
+    box_labels: long tensor of shape N_boxes x B
+    point_labels: long tensor of shape N_points x B
+    mask_labels: long tensor of shape N_masks x B
+    """
+
+    def __init__(
+        self,
+        box_embeddings=None,
+        box_mask=None,
+        point_embeddings=None,
+        point_mask=None,
+        box_labels=None,
+        point_labels=None,
+        mask_embeddings=None,
+        mask_mask=None,  # Attention mask for mask prompt
+        mask_labels=None,
+    ):
+        # Check for null prompt
+        if (
+            box_embeddings is None
+            and point_embeddings is None
+            and mask_embeddings is None
+        ):
+            self.box_embeddings = None
+            self.box_labels = None
+            self.box_mask = None
+            self.point_embeddings = None
+            self.point_labels = None
+            self.point_mask = None
+            self.mask_embeddings = None
+            self.mask_mask = None
+            # Masks are assumed positive only for now.
+            self.mask_labels = None
+            return
+        # Get sequence lengths and device
+        box_seq_len, point_seq_len, mask_seq_len, bs, device = (
+            self._init_seq_len_and_device(
+                box_embeddings, point_embeddings, mask_embeddings
+            )
+        )
+
+        # Initialize embeds, labels, attention masks.
+        box_embeddings, box_labels, box_mask = self._init_box(
+            box_embeddings, box_labels, box_mask, box_seq_len, bs, device
+        )
+        point_embeddings, point_labels, point_mask = self._init_point(
+            point_embeddings, point_labels, point_mask, point_seq_len, bs, device
+        )
+        mask_embeddings, mask_labels, mask_mask = self._init_mask(
+            mask_embeddings, mask_labels, mask_mask, mask_seq_len, bs, device
+        )
+
+        # Dimension checks
+        assert (
+            box_embeddings is not None
+            and list(box_embeddings.shape[:2])
+            == [
+                box_seq_len,
+                bs,
+            ]
+        ), f"Wrong dimension for box embeddings. Expected [{box_seq_len}, {bs}, *] got {box_embeddings.shape}"
+        assert (
+            box_mask is not None
+            and list(box_mask.shape)
+            == [
+                bs,
+                box_seq_len,
+            ]
+        ), f"Wrong dimension for box mask. Expected [{bs}, {box_seq_len}] got {box_mask.shape}"
+        assert (
+            point_embeddings is not None
+            and list(point_embeddings.shape[:2])
+            == [
+                point_seq_len,
+                bs,
+            ]
+        ), f"Wrong dimension for point embeddings. Expected [{point_seq_len}, {bs}, *] got {point_embeddings.shape}"
+        assert (
+            point_mask is not None
+            and list(point_mask.shape)
+            == [
+                bs,
+                point_seq_len,
+            ]
+        ), f"Wrong dimension for point mask. Expected [{bs}, {point_seq_len}] got {point_mask.shape}"
+        assert (
+            box_labels is not None
+            and list(box_labels.shape)
+            == [
+                box_seq_len,
+                bs,
+            ]
+        ), f"Wrong dimension for box labels. Expected [{box_seq_len}, {bs}] got {box_labels.shape}"
+        assert (
+            point_labels is not None
+            and list(point_labels.shape)
+            == [
+                point_seq_len,
+                bs,
+            ]
+        ), f"Wrong dimension for point labels. Expected [{point_seq_len}, {bs}] got {point_labels.shape}"
+        assert (
+            # Allowed to be None, we leave it to the encoder to check for validity before encoding.
+            mask_embeddings is None
+            or list(mask_embeddings.shape[:2])
+            == [
+                mask_seq_len,
+                bs,
+            ]
+        ), f"Wrong dimension for mask embeddings. Expected [{mask_seq_len}, {bs}, *] got {mask_embeddings.shape}"
+        assert (
+            mask_mask is None
+            or list(mask_mask.shape)
+            == [
+                bs,
+                mask_seq_len,
+            ]
+        ), f"Wrong dimension for mask attn. mask. Expected [{bs}, {mask_seq_len}] got {mask_mask.shape}"
+
+        # Device checks
+        assert (
+            box_embeddings is not None and box_embeddings.device == device
+        ), f"Expected box embeddings to be on device {device}, got {box_embeddings.device}"
+        assert (
+            box_mask is not None and box_mask.device == device
+        ), f"Expected box mask to be on device {device}, got {box_mask.device}"
+        assert (
+            box_labels is not None and box_labels.device == device
+        ), f"Expected box labels to be on device {device}, got {box_labels.device}"
+        assert (
+            point_embeddings is not None and point_embeddings.device == device
+        ), f"Expected point embeddings to be on device {device}, got {point_embeddings.device}"
+        assert (
+            point_mask is not None and point_mask.device == device
+        ), f"Expected point mask to be on device {device}, got {point_mask.device}"
+        assert (
+            point_labels is not None and point_labels.device == device
+        ), f"Expected point labels to be on device {device}, got {point_labels.device}"
+        assert (
+            mask_embeddings is None or mask_embeddings.device == device
+        ), f"Expected mask embeddings to be on device {device}, got {mask_embeddings.device}"
+        assert (
+            mask_mask is None or mask_mask.device == device
+        ), f"Expected mask attn. mask to be on device {device}, got {mask_mask.device}"
+
+        self.box_embeddings = box_embeddings
+        self.point_embeddings = point_embeddings
+        self.box_mask = box_mask
+        self.point_mask = point_mask
+        self.box_labels = box_labels
+        self.point_labels = point_labels
+        self.mask_embeddings = mask_embeddings
+        self.mask_labels = mask_labels
+        self.mask_mask = mask_mask
+
+    def _init_seq_len_and_device(
+        self, box_embeddings, point_embeddings, mask_embeddings
+    ):
+        box_seq_len = point_seq_len = mask_seq_len = 0
+        bs = None
+        device = None
+        if box_embeddings is not None:
+            bs = box_embeddings.shape[1]
+            box_seq_len = box_embeddings.shape[0]
+            device = box_embeddings.device
+
+        if point_embeddings is not None:
+            point_seq_len = point_embeddings.shape[0]
+            if bs is not None:
+                assert (
+                    bs == point_embeddings.shape[1]
+                ), f"Batch size mismatch between box and point embeddings. Got {bs} and {point_embeddings.shape[1]}."
+            else:
+                bs = point_embeddings.shape[1]
+            if device is not None:
+                assert (
+                    device == point_embeddings.device
+                ), "Device mismatch between box and point embeddings"
+            else:
+                device = point_embeddings.device
+
+        if mask_embeddings is not None:
+            mask_seq_len = mask_embeddings.shape[0]
+            if bs is not None:
+                assert (
+                    bs == mask_embeddings.shape[1]
+                ), f"Batch size mismatch between box/point and mask embedding. Got {bs} and {mask_embeddings.shape[1]}"
+            else:
+                bs = mask_embeddings.shape[1]
+            if device is not None:
+                assert (
+                    device == mask_embeddings.device
+                ), "Device mismatch between box/point and mask embeddings."
+            else:
+                device = mask_embeddings.device
+
+        return box_seq_len, point_seq_len, mask_seq_len, bs, device
+
+    def _init_box(self, box_embeddings, box_labels, box_mask, box_seq_len, bs, device):
+        if box_embeddings is None:
+            box_embeddings = torch.zeros(box_seq_len, bs, 4, device=device)
+        if box_labels is None:
+            box_labels = torch.ones(box_seq_len, bs, device=device, dtype=torch.long)
+        if box_mask is None:
+            box_mask = torch.zeros(bs, box_seq_len, device=device, dtype=torch.bool)
+        return box_embeddings, box_labels, box_mask
+
+    def _init_point(
+        self, point_embeddings, point_labels, point_mask, point_seq_len, bs, device
+    ):
+        """
+        Identical to _init_box. Except that C=2 for points (vs. 4 for boxes).
+        """
+        if point_embeddings is None:
+            point_embeddings = torch.zeros(point_seq_len, bs, 2, device=device)
+        if point_labels is None:
+            point_labels = torch.ones(
+                point_seq_len, bs, device=device, dtype=torch.long
+            )
+        if point_mask is None:
+            point_mask = torch.zeros(bs, point_seq_len, device=device, dtype=torch.bool)
+        return point_embeddings, point_labels, point_mask
+
+    def _init_mask(
+        self, mask_embeddings, mask_labels, mask_mask, mask_seq_len, bs, device
+    ):
+        # NOTE: Mask embeddings can be of arbitrary resolution, so we don't initialize it here.
+        # In case we append new mask, we check that its resolution matches exisiting ones (if any).
+        # In case mask_embeddings is None, we should never encode it.
+        if mask_labels is None:
+            mask_labels = torch.ones(mask_seq_len, bs, device=device, dtype=torch.long)
+        if mask_mask is None:
+            mask_mask = torch.zeros(bs, mask_seq_len, device=device, dtype=torch.bool)
+        return mask_embeddings, mask_labels, mask_mask
+
+    def append_boxes(self, boxes, labels, mask=None):
+        if self.box_embeddings is None:
+            self.box_embeddings = boxes
+            self.box_labels = labels
+            self.box_mask = mask
+            return
+
+        bs = self.box_embeddings.shape[1]
+        assert boxes.shape[1] == labels.shape[1] == bs
+        assert list(boxes.shape[:2]) == list(labels.shape[:2])
+        if mask is None:
+            mask = torch.zeros(
+                bs, boxes.shape[0], dtype=torch.bool, device=boxes.device
+            )
+
+        self.box_labels, _ = concat_padded_sequences(
+            self.box_labels.unsqueeze(-1), self.box_mask, labels.unsqueeze(-1), mask
+        )
+        self.box_labels = self.box_labels.squeeze(-1)
+        self.box_embeddings, self.box_mask = concat_padded_sequences(
+            self.box_embeddings, self.box_mask, boxes, mask
+        )
+
+    def append_points(self, points, labels, mask=None):
+        if self.point_embeddings is None:
+            self.point_embeddings = points
+            self.point_labels = labels
+            self.point_mask = mask
+            return
+
+        bs = self.point_embeddings.shape[1]
+        assert points.shape[1] == labels.shape[1] == bs
+        assert list(points.shape[:2]) == list(labels.shape[:2])
+        if mask is None:
+            mask = torch.zeros(
+                bs, points.shape[0], dtype=torch.bool, device=points.device
+            )
+
+        self.point_labels, _ = concat_padded_sequences(
+            self.point_labels.unsqueeze(-1), self.point_mask, labels.unsqueeze(-1), mask
+        )
+        self.point_labels = self.point_labels.squeeze(-1)
+        self.point_embeddings, self.point_mask = concat_padded_sequences(
+            self.point_embeddings, self.point_mask, points, mask
+        )
+
+    def append_masks(self, masks, labels=None, attn_mask=None):
+        if labels is not None:
+            assert list(masks.shape[:2]) == list(labels.shape[:2])
+        if self.mask_embeddings is None:
+            self.mask_embeddings = masks
+            mask_seq_len, bs = masks.shape[:2]
+            if labels is None:
+                self.mask_labels = torch.ones(
+                    mask_seq_len, bs, device=masks.device, dtype=torch.long
+                )
+            else:
+                self.mask_labels = labels
+            if attn_mask is None:
+                self.mask_mask = torch.zeros(
+                    bs, mask_seq_len, device=masks.device, dtype=torch.bool
+                )
+            else:
+                self.mask_mask = attn_mask
+        else:
+            raise NotImplementedError("Only one mask per prompt is supported.")
+
+    def clone(self):
+        return Prompt(
+            box_embeddings=(
+                None if self.box_embeddings is None else self.box_embeddings.clone()
+            ),
+            box_mask=None if self.box_mask is None else self.box_mask.clone(),
+            point_embeddings=(
+                None if self.point_embeddings is None else self.point_embeddings.clone()
+            ),
+            point_mask=None if self.point_mask is None else self.point_mask.clone(),
+            box_labels=None if self.box_labels is None else self.box_labels.clone(),
+            point_labels=(
+                None if self.point_labels is None else self.point_labels.clone()
+            ),
+        )
+
+
+class MaskEncoder(nn.Module):
+    """
+    Base class for mask encoders.
+    """
+
+    def __init__(
+        self,
+        mask_downsampler: nn.Module,
+        position_encoding: nn.Module,
+    ):
+        super().__init__()
+        self.mask_downsampler = mask_downsampler
+        self.position_encoding = position_encoding
+
+    def forward(self, masks, *args, **kwargs) -> Tuple[torch.Tensor, torch.Tensor]:
+        masks = self.mask_downsampler(masks)
+        masks_pos = self.position_encoding(masks).to(masks.dtype)
+
+        return masks, masks_pos
+
+
+class FusedMaskEncoder(MaskEncoder):
+    """
+    Identical to memory.SimpleMaskEncoder but follows the interface of geometry_encoders.MaskEncoder.
+    We also remove the `skip_mask_sigmoid` option (to be handled outside the MaskEncoder).
+    Fuses backbone image features with mask features.
+    """
+
+    def __init__(
+        self,
+        mask_downsampler: nn.Module,
+        position_encoding: nn.Module,
+        fuser: nn.Module,
+        in_dim: int = 256,
+        out_dim: int = 256,
+    ):
+        super().__init__(mask_downsampler, position_encoding)
+        self.fuser = fuser
+        self.out_proj = nn.Identity()
+        if out_dim != in_dim:
+            self.out_proj = nn.Conv2d(in_dim, out_dim, kernel_size=1)
+        self.pix_feat_proj = nn.Conv2d(in_dim, in_dim, kernel_size=1)
+
+    @override
+    def forward(
+        self,
+        masks: torch.Tensor,
+        pix_feat: torch.Tensor,
+        **kwargs,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        masks = self.mask_downsampler(masks)
+
+        ## Fuse pix_feats and downsampled masks
+        # in case the visual features are on CPU, cast them to CUDA
+        pix_feat = pix_feat.to(masks.device)
+
+        x = self.pix_feat_proj(pix_feat)
+        x = x + masks
+        x = self.fuser(x)
+        x = self.out_proj(x)
+
+        pos = self.position_encoding(x).to(x.dtype)
+
+        return x, pos
+
+
+class SequenceGeometryEncoder(nn.Module):
+    """
+    This a fully fledged encoder for geometric prompts.
+    It assumes boxes are passed in the "normalized CxCyWH" format, and points in normalized xy
+    This allows flexibility in how to encode the features (eg do pooling)
+
+    Points and boxes can be encoded with any of the three possibilities:
+     - direct projection: we just compute a linear from coordinate space to d_model
+     - pooling: pool features from the backbone in the requested location.
+                For boxes, it's a roi align
+                For points it's a grid sample
+     - pos encoder: Take the position encoding of the point or box center
+
+    These three options are mutually compatible. If several are selected, we'll take a simple addition
+
+    As an alternative, we offer the possibility to encode points only.
+    In that case, the boxes are converted to two points for the top left and bottom right corners (with appropriate labels)
+
+    On top of these encodings, we offer the possibility to further encode the prompt sequence with a transformer.
+    """
+
+    def __init__(
+        self,
+        encode_boxes_as_points: bool,
+        points_direct_project: bool,
+        points_pool: bool,
+        points_pos_enc: bool,
+        boxes_direct_project: bool,
+        boxes_pool: bool,
+        boxes_pos_enc: bool,
+        d_model: int,
+        pos_enc,
+        num_layers: int,
+        layer: nn.Module,
+        roi_size: int = 7,  # for boxes pool
+        add_cls: bool = True,
+        add_post_encode_proj: bool = True,
+        mask_encoder: MaskEncoder = None,
+        add_mask_label: bool = False,
+        use_act_ckpt: bool = False,
+    ):
+        super().__init__()
+
+        self.d_model = d_model
+        self.pos_enc = pos_enc
+        self.encode_boxes_as_points = encode_boxes_as_points
+        self.roi_size = roi_size
+        # There usually are two labels: positive and negatives.
+        # If we encode boxes as points, we have 3 types of points: regular, top left, bottom right
+        # These 3 types can be positives or negatives, hence 2*3 = 6 labels
+        num_labels = 6 if self.encode_boxes_as_points else 2
+        self.label_embed = torch.nn.Embedding(num_labels, self.d_model)
+
+        # This is a cls token, can be used for pooling if need be.
+        # It also ensures that the encoded sequences are always non-empty
+        self.cls_embed = None
+        if add_cls:
+            self.cls_embed = torch.nn.Embedding(1, self.d_model)
+
+        assert (
+            points_direct_project or points_pos_enc or points_pool
+        ), "Error: need at least one way to encode points"
+        assert (
+            encode_boxes_as_points
+            or boxes_direct_project
+            or boxes_pos_enc
+            or boxes_pool
+        ), "Error: need at least one way to encode boxes"
+
+        self.points_direct_project = None
+        if points_direct_project:
+            self.points_direct_project = nn.Linear(2, self.d_model)
+        self.points_pool_project = None
+        if points_pool:
+            self.points_pool_project = nn.Linear(self.d_model, self.d_model)
+        self.points_pos_enc_project = None
+        if points_pos_enc:
+            self.points_pos_enc_project = nn.Linear(self.d_model, self.d_model)
+
+        self.boxes_direct_project = None
+        self.boxes_pool_project = None
+        self.boxes_pos_enc_project = None
+        if not encode_boxes_as_points:
+            if boxes_direct_project:
+                self.boxes_direct_project = nn.Linear(4, self.d_model)
+            if boxes_pool:
+                self.boxes_pool_project = nn.Conv2d(
+                    self.d_model, self.d_model, self.roi_size
+                )
+            if boxes_pos_enc:
+                self.boxes_pos_enc_project = nn.Linear(self.d_model + 2, self.d_model)
+
+        self.final_proj = None
+        if add_post_encode_proj:
+            self.final_proj = nn.Linear(self.d_model, self.d_model)
+            self.norm = nn.LayerNorm(self.d_model)
+
+        self.img_pre_norm = nn.Identity()
+        if self.points_pool_project is not None or self.boxes_pool_project is not None:
+            self.img_pre_norm = nn.LayerNorm(self.d_model)
+
+        self.encode = None
+        if num_layers > 0:
+            assert (
+                add_cls
+            ), "It's currently highly recommended to add a CLS when using a transformer"
+            self.encode = get_clones(layer, num_layers)
+            self.encode_norm = nn.LayerNorm(self.d_model)
+
+        if mask_encoder is not None:
+            assert isinstance(
+                mask_encoder, MaskEncoder
+            ), f"Expected mask_encoder of type MaskEncoder. Got {type(mask_encoder)}."
+            if add_mask_label:
+                self.mask_label_embed = torch.nn.Embedding(2, self.d_model)
+        self.add_mask_label = add_mask_label
+        self.mask_encoder = mask_encoder
+        self.use_act_ckpt = use_act_ckpt
+
+    def _encode_points(self, points, points_mask, points_labels, img_feats):
+        points_embed = None
+        n_points, bs = points.shape[:2]
+
+        if self.points_direct_project is not None:
+            proj = self.points_direct_project(points)
+            assert points_embed is None
+            points_embed = proj
+
+        if self.points_pool_project is not None:
+            # points are [Num_points, bs, 2], normalized in [0, 1]
+            # the grid needs to be [Bs, H_out, W_out, 2] normalized in [-1,1]
+            # Will take H_out = num_points, w_out = 1
+            grid = points.transpose(0, 1).unsqueeze(2)
+            # re normalize to [-1, 1]
+            grid = (grid * 2) - 1
+            sampled = torch.nn.functional.grid_sample(
+                img_feats, grid, align_corners=False
+            )
+            assert list(sampled.shape) == [bs, self.d_model, n_points, 1]
+            sampled = sampled.squeeze(-1).permute(2, 0, 1)
+            proj = self.points_pool_project(sampled)
+            if points_embed is None:
+                points_embed = proj
+            else:
+                points_embed = points_embed + proj
+
+        if self.points_pos_enc_project is not None:
+            x, y = points.unbind(-1)
+            enc_x, enc_y = self.pos_enc._encode_xy(x.flatten(), y.flatten())
+            enc_x = enc_x.view(n_points, bs, enc_x.shape[-1])
+            enc_y = enc_y.view(n_points, bs, enc_y.shape[-1])
+            enc = torch.cat([enc_x, enc_y], -1)
+
+            proj = self.points_pos_enc_project(enc)
+            if points_embed is None:
+                points_embed = proj
+            else:
+                points_embed = points_embed + proj
+
+        type_embed = self.label_embed(points_labels.long())
+        return type_embed + points_embed, points_mask
+
+    def _encode_boxes(self, boxes, boxes_mask, boxes_labels, img_feats):
+        boxes_embed = None
+        n_boxes, bs = boxes.shape[:2]
+
+        if self.boxes_direct_project is not None:
+            proj = self.boxes_direct_project(boxes)
+            assert boxes_embed is None
+            boxes_embed = proj
+
+        if self.boxes_pool_project is not None:
+            H, W = img_feats.shape[-2:]
+
+            # boxes are [Num_boxes, bs, 4], normalized in [0, 1]
+            # We need to denormalize, and convert to [x, y, x, y]
+            boxes_xyxy = box_cxcywh_to_xyxy(boxes)
+            scale = torch.tensor([W, H, W, H], dtype=boxes_xyxy.dtype)
+            scale = scale.pin_memory().to(device=boxes_xyxy.device, non_blocking=True)
+            scale = scale.view(1, 1, 4)
+            boxes_xyxy = boxes_xyxy * scale
+            sampled = torchvision.ops.roi_align(
+                img_feats, boxes_xyxy.float().transpose(0, 1).unbind(0), self.roi_size
+            )
+            assert list(sampled.shape) == [
+                bs * n_boxes,
+                self.d_model,
+                self.roi_size,
+                self.roi_size,
+            ]
+            proj = self.boxes_pool_project(sampled)
+            proj = proj.view(bs, n_boxes, self.d_model).transpose(0, 1)
+            if boxes_embed is None:
+                boxes_embed = proj
+            else:
+                boxes_embed = boxes_embed + proj
+
+        if self.boxes_pos_enc_project is not None:
+            cx, cy, w, h = boxes.unbind(-1)
+            enc = self.pos_enc.encode_boxes(
+                cx.flatten(), cy.flatten(), w.flatten(), h.flatten()
+            )
+            enc = enc.view(boxes.shape[0], boxes.shape[1], enc.shape[-1])
+
+            proj = self.boxes_pos_enc_project(enc)
+            if boxes_embed is None:
+                boxes_embed = proj
+            else:
+                boxes_embed = boxes_embed + proj
+
+        type_embed = self.label_embed(boxes_labels.long())
+        return type_embed + boxes_embed, boxes_mask
+
+    def _encode_masks(
+        self,
+        masks: torch.Tensor,
+        attn_mask: torch.Tensor,
+        mask_labels: torch.Tensor,
+        img_feats: torch.Tensor = None,
+    ):
+        n_masks, bs = masks.shape[:2]
+        assert (
+            n_masks == 1
+        ), "We assume one mask per prompt for now. Code should still be functional if this assertion is removed."
+        assert (
+            list(attn_mask.shape)
+            == [
+                bs,
+                n_masks,
+            ]
+        ), f"Expected attn_mask to be of shape {bs}x{n_masks}. Got {list(attn_mask.shape)}."
+        masks, pos = self.mask_encoder(
+            masks=masks.flatten(0, 1).float(),
+            pix_feat=img_feats,
+        )
+        H, W = masks.shape[-2:]
+        n_tokens_per_mask = H * W
+        # NOTE: We directly add pos enc here as we usually don't keep track of pos encoding for the concatenated prompt (text, other geometric prompts). Might need to do some refactoring for more flexibility.
+        masks = masks + pos
+        masks = masks.view(n_masks, bs, *masks.shape[1:]).flatten(
+            -2
+        )  # n_masks x bs x C x H*W
+        masks = masks.permute(0, 3, 1, 2).flatten(0, 1)  # n_masks * H*W x bs x C
+        attn_mask = attn_mask.repeat_interleave(n_tokens_per_mask, dim=1)
+        if self.add_mask_label:
+            masks = masks + self.mask_label_embed(mask_labels.long())
+        return masks, attn_mask
+
+    def forward(self, geo_prompt: Prompt, img_feats, img_sizes, img_pos_embeds=None):
+        points = geo_prompt.point_embeddings
+        points_mask = geo_prompt.point_mask
+        points_labels = geo_prompt.point_labels
+        boxes = geo_prompt.box_embeddings
+        boxes_mask = geo_prompt.box_mask
+        boxes_labels = geo_prompt.box_labels
+        masks = geo_prompt.mask_embeddings
+        masks_mask = geo_prompt.mask_mask
+        masks_labels = geo_prompt.mask_labels
+        seq_first_img_feats = img_feats[-1]  # [H*W, B, C]
+        seq_first_img_pos_embeds = (
+            img_pos_embeds[-1]
+            if img_pos_embeds is not None
+            else torch.zeros_like(seq_first_img_feats)
+        )
+
+        if self.points_pool_project or self.boxes_pool_project:
+            assert len(img_feats) == len(img_sizes)
+            cur_img_feat = img_feats[-1]
+            cur_img_feat = self.img_pre_norm(cur_img_feat)
+            H, W = img_sizes[-1]
+            assert cur_img_feat.shape[0] == H * W
+            N, C = cur_img_feat.shape[-2:]
+            # Put back in NxCxHxW
+            cur_img_feat = cur_img_feat.permute(1, 2, 0)
+            cur_img_feat = cur_img_feat.view(N, C, H, W)
+            img_feats = cur_img_feat
+
+        if self.encode_boxes_as_points:
+            assert boxes is not None
+            assert geo_prompt.box_mask is not None
+            assert geo_prompt.box_labels is not None
+            assert boxes.shape[-1] == 4
+
+            boxes_xyxy = box_cxcywh_to_xyxy(boxes)
+            top_left, bottom_right = boxes_xyxy.split(split_size=2, dim=-1)
+
+            labels_tl = geo_prompt.box_labels + 2
+            labels_br = geo_prompt.box_labels + 4
+
+            # Append to the existing points
+            points, _ = concat_padded_sequences(
+                points, points_mask, top_left, boxes_mask
+            )
+            points_labels, points_mask = concat_padded_sequences(
+                points_labels.unsqueeze(-1),
+                points_mask,
+                labels_tl.unsqueeze(-1),
+                boxes_mask,
+            )
+            points_labels = points_labels.squeeze(-1)
+
+            points, _ = concat_padded_sequences(
+                points, points_mask, bottom_right, boxes_mask
+            )
+            points_labels, points_mask = concat_padded_sequences(
+                points_labels.unsqueeze(-1),
+                points_mask,
+                labels_br.unsqueeze(-1),
+                boxes_mask,
+            )
+            points_labels = points_labels.squeeze(-1)
+
+        final_embeds, final_mask = self._encode_points(
+            points=points,
+            points_mask=points_mask,
+            points_labels=points_labels,
+            img_feats=img_feats,
+        )
+
+        if not self.encode_boxes_as_points:
+            boxes_embeds, boxes_mask = self._encode_boxes(
+                boxes=boxes,
+                boxes_mask=boxes_mask,
+                boxes_labels=boxes_labels,
+                img_feats=img_feats,
+            )
+
+            final_embeds, final_mask = concat_padded_sequences(
+                final_embeds, final_mask, boxes_embeds, boxes_mask
+            )
+
+        if masks is not None and self.mask_encoder is not None:
+            masks_embed, masks_mask = self._encode_masks(
+                masks=masks,
+                attn_mask=masks_mask,
+                mask_labels=masks_labels,
+                img_feats=img_feats,
+            )
+            if points.size(0) == boxes.size(0) == 0:
+                return masks_embed, masks_mask
+        bs = final_embeds.shape[1]
+        assert final_mask.shape[0] == bs
+        if self.cls_embed is not None:
+            cls = self.cls_embed.weight.view(1, 1, self.d_model).repeat(1, bs, 1)
+            cls_mask = torch.zeros(
+                bs, 1, dtype=final_mask.dtype, device=final_mask.device
+            )
+            final_embeds, final_mask = concat_padded_sequences(
+                final_embeds, final_mask, cls, cls_mask
+            )
+
+        if self.final_proj is not None:
+            final_embeds = self.norm(self.final_proj(final_embeds))
+
+        if self.encode is not None:
+            for lay in self.encode:
+                final_embeds = activation_ckpt_wrapper(lay)(
+                    tgt=final_embeds,
+                    memory=seq_first_img_feats,
+                    tgt_key_padding_mask=final_mask,
+                    pos=seq_first_img_pos_embeds,
+                    act_ckpt_enable=self.training and self.use_act_ckpt,
+                )
+            final_embeds = self.encode_norm(final_embeds)
+        # Finally, concat mask embeddings if any
+        if masks is not None and self.mask_encoder is not None:
+            final_embeds, final_mask = concat_padded_sequences(
+                final_embeds, final_mask, masks_embed, masks_mask
+            )
+        return final_embeds, final_mask
diff --git a/sam3/model/io_utils.py b/sam3/model/io_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a225842e4dca6eac64d84f262bddde2ee29d27b
--- /dev/null
+++ b/sam3/model/io_utils.py
@@ -0,0 +1,709 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import contextlib
+import os
+import queue
+import re
+import time
+from threading import Condition, get_ident, Lock, Thread
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+import torchvision.transforms.functional as TF
+
+from PIL import Image
+
+from sam3.logger import get_logger
+from tqdm import tqdm
+
+logger = get_logger(__name__)
+
+IS_MAIN_PROCESS = os.getenv("IS_MAIN_PROCESS", "1") == "1"
+RANK = int(os.getenv("RANK", "0"))
+
+IMAGE_EXTS = [".jpg", ".jpeg", ".png", ".bmp", ".tiff", ".webp"]
+VIDEO_EXTS = [".mp4", ".mov", ".avi", ".mkv", ".webm"]
+
+
+def load_resource_as_video_frames(
+    resource_path,
+    image_size,
+    offload_video_to_cpu,
+    img_mean=(0.5, 0.5, 0.5),
+    img_std=(0.5, 0.5, 0.5),
+    async_loading_frames=False,
+    video_loader_type="cv2",
+):
+    """
+    Load video frames from either a video or an image (as a single-frame video).
+    Alternatively, if input is a list of PIL images, convert its format
+    """
+    if isinstance(resource_path, list):
+        img_mean = torch.tensor(img_mean, dtype=torch.float16)[:, None, None]
+        img_std = torch.tensor(img_std, dtype=torch.float16)[:, None, None]
+        assert all(isinstance(img_pil, Image.Image) for img_pil in resource_path)
+        assert len(resource_path) is not None
+        orig_height, orig_width = resource_path[0].size
+        orig_height, orig_width = (
+            orig_width,
+            orig_height,
+        )  # For some reason, this method returns these swapped
+        images = []
+        for img_pil in resource_path:
+            img_np = np.array(img_pil.convert("RGB").resize((image_size, image_size)))
+            assert img_np.dtype == np.uint8, "np.uint8 is expected for JPEG images"
+            img_np = img_np / 255.0
+            img = torch.from_numpy(img_np).permute(2, 0, 1)
+            # float16 precision should be sufficient for image tensor storage
+            img = img.to(dtype=torch.float16)
+            # normalize by mean and std
+            img -= img_mean
+            img /= img_std
+            images.append(img)
+        images = torch.stack(images)
+        if not offload_video_to_cpu:
+            images = images.cuda()
+        return images, orig_height, orig_width
+
+    is_image = (
+        isinstance(resource_path, str)
+        and os.path.splitext(resource_path)[-1].lower() in IMAGE_EXTS
+    )
+    if is_image:
+        return load_image_as_single_frame_video(
+            image_path=resource_path,
+            image_size=image_size,
+            offload_video_to_cpu=offload_video_to_cpu,
+            img_mean=img_mean,
+            img_std=img_std,
+        )
+    else:
+        return load_video_frames(
+            video_path=resource_path,
+            image_size=image_size,
+            offload_video_to_cpu=offload_video_to_cpu,
+            img_mean=img_mean,
+            img_std=img_std,
+            async_loading_frames=async_loading_frames,
+            video_loader_type=video_loader_type,
+        )
+
+
+def load_image_as_single_frame_video(
+    image_path,
+    image_size,
+    offload_video_to_cpu,
+    img_mean=(0.5, 0.5, 0.5),
+    img_std=(0.5, 0.5, 0.5),
+):
+    """Load an image as a single-frame video."""
+    images, image_height, image_width = _load_img_as_tensor(image_path, image_size)
+    images = images.unsqueeze(0).half()
+
+    img_mean = torch.tensor(img_mean, dtype=torch.float16)[:, None, None]
+    img_std = torch.tensor(img_std, dtype=torch.float16)[:, None, None]
+    if not offload_video_to_cpu:
+        images = images.cuda()
+        img_mean = img_mean.cuda()
+        img_std = img_std.cuda()
+    # normalize by mean and std
+    images -= img_mean
+    images /= img_std
+    return images, image_height, image_width
+
+
+def load_video_frames(
+    video_path,
+    image_size,
+    offload_video_to_cpu,
+    img_mean=(0.5, 0.5, 0.5),
+    img_std=(0.5, 0.5, 0.5),
+    async_loading_frames=False,
+    video_loader_type="cv2",
+):
+    """
+    Load the video frames from video_path. The frames are resized to image_size as in
+    the model and are loaded to GPU if offload_video_to_cpu=False. This is used by the demo.
+    """
+    assert isinstance(video_path, str)
+    if video_path.startswith("<load-dummy-video"):
+        # Check for pattern <load-dummy-video-N> where N is an integer
+        match = re.match(r"<load-dummy-video-(\d+)>", video_path)
+        num_frames = int(match.group(1)) if match else 60
+        return load_dummy_video(image_size, offload_video_to_cpu, num_frames=num_frames)
+    elif os.path.isdir(video_path):
+        return load_video_frames_from_image_folder(
+            image_folder=video_path,
+            image_size=image_size,
+            offload_video_to_cpu=offload_video_to_cpu,
+            img_mean=img_mean,
+            img_std=img_std,
+            async_loading_frames=async_loading_frames,
+        )
+    elif os.path.splitext(video_path)[-1].lower() in VIDEO_EXTS:
+        return load_video_frames_from_video_file(
+            video_path=video_path,
+            image_size=image_size,
+            offload_video_to_cpu=offload_video_to_cpu,
+            img_mean=img_mean,
+            img_std=img_std,
+            async_loading_frames=async_loading_frames,
+            video_loader_type=video_loader_type,
+        )
+    else:
+        raise NotImplementedError("Only video files and image folders are supported")
+
+
+def load_video_frames_from_image_folder(
+    image_folder,
+    image_size,
+    offload_video_to_cpu,
+    img_mean,
+    img_std,
+    async_loading_frames,
+):
+    """
+    Load the video frames from a directory of image files ("<frame_index>.<img_ext>" format)
+    """
+    frame_names = [
+        p
+        for p in os.listdir(image_folder)
+        if os.path.splitext(p)[-1].lower() in IMAGE_EXTS
+    ]
+    try:
+        frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))
+    except ValueError:
+        # fallback to lexicographic sort if the format is not "<frame_index>.<img_ext>"
+        logger.warning(
+            f'frame names are not in "<frame_index>.<img_ext>" format: {frame_names[:5]=}, '
+            f"falling back to lexicographic sort."
+        )
+        frame_names.sort()
+    num_frames = len(frame_names)
+    if num_frames == 0:
+        raise RuntimeError(f"no images found in {image_folder}")
+    img_paths = [os.path.join(image_folder, frame_name) for frame_name in frame_names]
+    img_mean = torch.tensor(img_mean, dtype=torch.float16)[:, None, None]
+    img_std = torch.tensor(img_std, dtype=torch.float16)[:, None, None]
+
+    if async_loading_frames:
+        lazy_images = AsyncImageFrameLoader(
+            img_paths, image_size, offload_video_to_cpu, img_mean, img_std
+        )
+        return lazy_images, lazy_images.video_height, lazy_images.video_width
+
+    # float16 precision should be sufficient for image tensor storage
+    images = torch.zeros(num_frames, 3, image_size, image_size, dtype=torch.float16)
+    video_height, video_width = None, None
+    for n, img_path in enumerate(
+        tqdm(img_paths, desc=f"frame loading (image folder) [rank={RANK}]")
+    ):
+        images[n], video_height, video_width = _load_img_as_tensor(img_path, image_size)
+    if not offload_video_to_cpu:
+        images = images.cuda()
+        img_mean = img_mean.cuda()
+        img_std = img_std.cuda()
+    # normalize by mean and std
+    images -= img_mean
+    images /= img_std
+    return images, video_height, video_width
+
+
+def load_video_frames_from_video_file(
+    video_path,
+    image_size,
+    offload_video_to_cpu,
+    img_mean,
+    img_std,
+    async_loading_frames,
+    gpu_acceleration=False,
+    gpu_device=None,
+    video_loader_type="cv2",
+):
+    """Load the video frames from a video file."""
+    if video_loader_type == "cv2":
+        return load_video_frames_from_video_file_using_cv2(
+            video_path=video_path,
+            image_size=image_size,
+            img_mean=img_mean,
+            img_std=img_std,
+            offload_video_to_cpu=offload_video_to_cpu,
+        )
+    elif video_loader_type == "torchcodec":
+        logger.info("Using torchcodec to load video file")
+        lazy_images = AsyncVideoFileLoaderWithTorchCodec(
+            video_path=video_path,
+            image_size=image_size,
+            offload_video_to_cpu=offload_video_to_cpu,
+            img_mean=img_mean,
+            img_std=img_std,
+            gpu_acceleration=gpu_acceleration,
+            gpu_device=gpu_device,
+        )
+        # The `AsyncVideoFileLoaderWithTorchCodec` class always loads the videos asynchronously,
+        # so we just wait for its loading thread to finish if async_loading_frames=False.
+        if not async_loading_frames:
+            async_thread = lazy_images.thread
+            if async_thread is not None:
+                async_thread.join()
+        return lazy_images, lazy_images.video_height, lazy_images.video_width
+    else:
+        raise RuntimeError("video_loader_type must be either 'cv2' or 'torchcodec'")
+
+
+def load_video_frames_from_video_file_using_cv2(
+    video_path: str,
+    image_size: int,
+    img_mean: tuple = (0.5, 0.5, 0.5),
+    img_std: tuple = (0.5, 0.5, 0.5),
+    offload_video_to_cpu: bool = False,
+) -> torch.Tensor:
+    """
+    Load video from path, convert to normalized tensor with specified preprocessing
+
+    Args:
+        video_path: Path to video file
+        image_size: Target size for square frames (height and width)
+        img_mean: Normalization mean (RGB)
+        img_std: Normalization standard deviation (RGB)
+
+    Returns:
+        torch.Tensor: Preprocessed video tensor in shape (T, C, H, W) with float16 dtype
+    """
+    import cv2  # delay OpenCV import to avoid unnecessary dependency
+
+    # Initialize video capture
+    cap = cv2.VideoCapture(video_path)
+    if not cap.isOpened():
+        raise ValueError(f"Could not open video: {video_path}")
+
+    original_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    original_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+    num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
+    num_frames = num_frames if num_frames > 0 else None
+
+    frames = []
+    pbar = tqdm(desc=f"frame loading (OpenCV) [rank={RANK}]", total=num_frames)
+    while True:
+        ret, frame = cap.read()
+        if not ret:
+            break
+
+        # Convert BGR to RGB and resize
+        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        frame_resized = cv2.resize(
+            frame_rgb, (image_size, image_size), interpolation=cv2.INTER_CUBIC
+        )
+        frames.append(frame_resized)
+        pbar.update(1)
+    cap.release()
+    pbar.close()
+
+    # Convert to tensor
+    frames_np = np.stack(frames, axis=0).astype(np.float32)  # (T, H, W, C)
+    video_tensor = torch.from_numpy(frames_np).permute(0, 3, 1, 2)  # (T, C, H, W)
+
+    img_mean = torch.tensor(img_mean, dtype=torch.float16).view(1, 3, 1, 1)
+    img_std = torch.tensor(img_std, dtype=torch.float16).view(1, 3, 1, 1)
+    if not offload_video_to_cpu:
+        video_tensor = video_tensor.cuda()
+        img_mean = img_mean.cuda()
+        img_std = img_std.cuda()
+    # normalize by mean and std
+    video_tensor -= img_mean
+    video_tensor /= img_std
+    return video_tensor, original_height, original_width
+
+
+def load_dummy_video(image_size, offload_video_to_cpu, num_frames=60):
+    """
+    Load a dummy video with random frames for testing and compilation warmup purposes.
+    """
+    video_height, video_width = 480, 640  # dummy original video sizes
+    images = torch.randn(num_frames, 3, image_size, image_size, dtype=torch.float16)
+    if not offload_video_to_cpu:
+        images = images.cuda()
+    return images, video_height, video_width
+
+
+def _load_img_as_tensor(img_path, image_size):
+    """Load and resize an image and convert it into a PyTorch tensor."""
+    img = Image.open(img_path).convert("RGB")
+    orig_width, orig_height = img.width, img.height
+    img = TF.resize(img, size=(image_size, image_size))
+    img = TF.to_tensor(img)
+    return img, orig_height, orig_width
+
+
+class AsyncImageFrameLoader:
+    """
+    A list of video frames to be load asynchronously without blocking session start.
+    """
+
+    def __init__(self, img_paths, image_size, offload_video_to_cpu, img_mean, img_std):
+        self.img_paths = img_paths
+        self.image_size = image_size
+        self.offload_video_to_cpu = offload_video_to_cpu
+        self.img_mean = img_mean
+        self.img_std = img_std
+        # items in `self._images` will be loaded asynchronously
+        self.images = [None] * len(img_paths)
+        # catch and raise any exceptions in the async loading thread
+        self.exception = None
+        # video_height and video_width be filled when loading the first image
+        self.video_height = None
+        self.video_width = None
+
+        # load the first frame to fill video_height and video_width and also
+        # to cache it (since it's most likely where the user will click)
+        self.__getitem__(0)
+
+        # load the rest of frames asynchronously without blocking the session start
+        def _load_frames():
+            try:
+                for n in tqdm(
+                    range(len(self.images)),
+                    desc=f"frame loading (image folder) [rank={RANK}]",
+                ):
+                    self.__getitem__(n)
+            except Exception as e:
+                self.exception = e
+
+        self.thread = Thread(target=_load_frames, daemon=True)
+        self.thread.start()
+
+    def __getitem__(self, index):
+        if self.exception is not None:
+            raise RuntimeError("Failure in frame loading thread") from self.exception
+
+        img = self.images[index]
+        if img is not None:
+            return img
+
+        img, video_height, video_width = _load_img_as_tensor(
+            self.img_paths[index], self.image_size
+        )
+        self.video_height = video_height
+        self.video_width = video_width
+        # float16 precision should be sufficient for image tensor storage
+        img = img.to(dtype=torch.float16)
+        # normalize by mean and std
+        img -= self.img_mean
+        img /= self.img_std
+        if not self.offload_video_to_cpu:
+            img = img.cuda()
+        self.images[index] = img
+        return img
+
+    def __len__(self):
+        return len(self.images)
+
+
+class TorchCodecDecoder:
+    """
+    A wrapper to support GPU device and num_threads in TorchCodec decoder,
+    which are not supported by `torchcodec.decoders.SimpleVideoDecoder` yet.
+    """
+
+    def __init__(self, source, dimension_order="NCHW", device="cpu", num_threads=1):
+        from torchcodec import _core as core
+
+        self._source = source  # hold a reference to the source to prevent it from GC
+        if isinstance(source, str):
+            self._decoder = core.create_from_file(source, "exact")
+        elif isinstance(source, bytes):
+            self._decoder = core.create_from_bytes(source, "exact")
+        else:
+            raise TypeError(f"Unknown source type: {type(source)}.")
+        assert dimension_order in ("NCHW", "NHWC")
+
+        device_string = str(device)
+        core.scan_all_streams_to_update_metadata(self._decoder)
+        core.add_video_stream(
+            self._decoder,
+            dimension_order=dimension_order,
+            device=device_string,
+            num_threads=(1 if "cuda" in device_string else num_threads),
+        )
+        video_metadata = core.get_container_metadata(self._decoder)
+        best_stream_index = video_metadata.best_video_stream_index
+        assert best_stream_index is not None
+        self.metadata = video_metadata.streams[best_stream_index]
+        assert self.metadata.num_frames_from_content is not None
+        self._num_frames = self.metadata.num_frames_from_content
+
+    def __len__(self) -> int:
+        return self._num_frames
+
+    def __getitem__(self, key: int):
+        from torchcodec import _core as core
+
+        if key < 0:
+            key += self._num_frames
+        if key >= self._num_frames or key < 0:
+            raise IndexError(
+                f"Index {key} is out of bounds; length is {self._num_frames}"
+            )
+        frame_data, *_ = core.get_frame_at_index(
+            self._decoder,
+            frame_index=key,
+        )
+        return frame_data
+
+
+class FIFOLock:
+    """A lock that ensures FIFO ordering of lock acquisitions."""
+
+    def __init__(self):
+        self._lock = Lock()
+        self._waiters = queue.Queue()
+        self._condition = Condition()
+
+    def acquire(self):
+        ident = get_ident()
+        with self._condition:
+            self._waiters.put(ident)
+            while self._waiters.queue[0] != ident or not self._lock.acquire(
+                blocking=False
+            ):
+                self._condition.wait()
+                # got the lock and it's our turn
+
+    def release(self):
+        with self._condition:
+            self._lock.release()
+            self._waiters.get()
+            self._condition.notify_all()
+
+    def __enter__(self):
+        self.acquire()
+
+    def __exit__(self, t, v, tb):
+        self.release()
+
+
+class AsyncVideoFileLoaderWithTorchCodec:
+    """
+    Loading frames from video files asynchronously without blocking session start.
+
+    Unlike `AsyncVideoFileLoader`, this class uses PyTorch's offical TorchCodec library
+    for video decoding, which is more efficient and supports more video formats.
+    """
+
+    def __init__(
+        self,
+        video_path,
+        image_size,
+        offload_video_to_cpu,
+        img_mean,
+        img_std,
+        gpu_acceleration=True,
+        gpu_device=None,
+        use_rand_seek_in_loading=False,
+    ):
+        # Check and possibly infer the output device (and also get its GPU id when applicable)
+        assert gpu_device is None or gpu_device.type == "cuda"
+        gpu_id = (
+            gpu_device.index
+            if gpu_device is not None and gpu_device.index is not None
+            else torch.cuda.current_device()
+        )
+        if offload_video_to_cpu:
+            out_device = torch.device("cpu")
+        else:
+            out_device = torch.device("cuda") if gpu_device is None else gpu_device
+        self.out_device = out_device
+        self.gpu_acceleration = gpu_acceleration
+        self.gpu_id = gpu_id
+        self.image_size = image_size
+        self.offload_video_to_cpu = offload_video_to_cpu
+        if not isinstance(img_mean, torch.Tensor):
+            img_mean = torch.tensor(img_mean, dtype=torch.float16)[:, None, None]
+        self.img_mean = img_mean
+        if not isinstance(img_std, torch.Tensor):
+            img_std = torch.tensor(img_std, dtype=torch.float16)[:, None, None]
+        self.img_std = img_std
+
+        if gpu_acceleration:
+            self.img_mean = self.img_mean.to(f"cuda:{self.gpu_id}")
+            self.img_std = self.img_std.to(f"cuda:{self.gpu_id}")
+            decoder_option = {"device": f"cuda:{self.gpu_id}"}
+        else:
+            self.img_mean = self.img_mean.cpu()
+            self.img_std = self.img_std.cpu()
+            decoder_option = {"num_threads": 1}  # use a single thread to save memory
+
+        self.rank = int(os.environ.get("RANK", "0"))
+        self.world_size = int(os.environ.get("WORLD_SIZE", "1"))
+        self.async_reader = TorchCodecDecoder(video_path, **decoder_option)
+
+        # `num_frames_from_content` is the true number of frames in the video content
+        # from the scan operation (rather than from the metadata, which could be wrong)
+        self.num_frames = self.async_reader.metadata.num_frames_from_content
+        self.video_height = self.async_reader.metadata.height
+        self.video_width = self.async_reader.metadata.width
+
+        # items in `self._images` will be loaded asynchronously
+        self.images_loaded = [False] * self.num_frames
+        self.images = torch.zeros(
+            self.num_frames,
+            3,
+            self.image_size,
+            self.image_size,
+            dtype=torch.float16,
+            device=self.out_device,
+        )
+        # catch and raise any exceptions in the async loading thread
+        self.exception = None
+        self.use_rand_seek_in_loading = use_rand_seek_in_loading
+        self.rand_seek_idx_queue = queue.Queue()
+        # use a lock to avoid race condition between concurrent access to torchcodec
+        # libs (which are not thread-safe); the lock is replaced with a nullcontext
+        # when the video is fully loaded
+        self.torchcodec_access_lock = FIFOLock()
+        self._start_video_loading()
+
+    def _load_one_frame(self, idx):
+        frame_resized = self._transform_frame(self.async_reader[idx])
+        return frame_resized
+
+    @torch.inference_mode()
+    def _start_video_loading(self):
+        desc = f"frame loading (TorchCodec w/ {'GPU' if self.gpu_acceleration else 'CPU'}) [rank={RANK}]"
+        pbar = tqdm(desc=desc, total=self.num_frames)
+        self.num_loaded_frames = 0
+        # load the first frame synchronously to cache it before the session is opened
+        idx = self.num_loaded_frames
+        self.images[idx] = self._load_one_frame(idx)
+        self.images_loaded[idx] = True
+        self.num_loaded_frames += 1
+        pbar.update(n=1)
+        self.all_frames_loaded = self.num_loaded_frames == self.num_frames
+
+        # load the frames asynchronously without blocking the session start
+        def _load_frames():
+            finished = self.all_frames_loaded
+            chunk_size = 16
+            while not finished:
+                # asynchronously load `chunk_size` frames each time we acquire the lock
+                with self.torchcodec_access_lock, torch.inference_mode():
+                    for _ in range(chunk_size):
+                        try:
+                            idx = self.num_loaded_frames
+                            self.images[idx] = self._load_one_frame(idx)
+                            self.images_loaded[idx] = True
+                            self.num_loaded_frames += 1
+                            pbar.update(n=1)
+                            if self.num_loaded_frames >= self.num_frames:
+                                finished = True
+                                break
+                        except Exception as e:
+                            self.exception = e
+                            raise
+
+                    # also read the frame that is being randomly seeked to
+                    while True:
+                        try:
+                            idx = self.rand_seek_idx_queue.get_nowait()
+                            if not self.images_loaded[idx]:
+                                self.images[idx] = self._load_one_frame(idx)
+                                self.images_loaded[idx] = True
+                        except queue.Empty:
+                            break
+                        except Exception as e:
+                            self.exception = e
+                            raise
+
+            # finished -- check whether we have loaded the total number of frames
+            if self.num_loaded_frames != self.num_frames:
+                raise RuntimeError(
+                    f"There are {self.num_frames} frames in the video, but only "
+                    f"{self.num_loaded_frames} frames can be loaded successfully."
+                )
+            else:
+                self.all_frames_loaded = True
+                pbar.close()
+                with self.torchcodec_access_lock:
+                    import gc
+
+                    # all frames have been loaded, so we can release the readers and free their memory
+                    # also remove pbar and thread (which shouldn't be a part of session saving)
+                    reader = self.async_reader
+                    if reader is not None:
+                        reader._source = None
+                    self.async_reader = None
+                    self.pbar = None
+                    self.thread = None
+                    self.rand_seek_idx_queue = None
+                    gc.collect()
+                # remove the lock (replace it with nullcontext) when the video is fully loaded
+                self.torchcodec_access_lock = contextlib.nullcontext()
+
+        self.thread = Thread(target=_load_frames, daemon=True)
+        self.thread.start()
+
+    def _transform_frame(self, frame):
+        frame = frame.clone()  # make a copy to avoid modifying the original frame bytes
+        frame = frame.float()  # convert to float32 before interpolation
+        frame_resized = F.interpolate(
+            frame[None, :],
+            size=(self.image_size, self.image_size),
+            mode="bicubic",
+            align_corners=False,
+        )[0]
+        # float16 precision should be sufficient for image tensor storage
+        frame_resized = frame_resized.half()  # uint8 -> float16
+        frame_resized /= 255
+        frame_resized -= self.img_mean
+        frame_resized /= self.img_std
+        if self.offload_video_to_cpu:
+            frame_resized = frame_resized.cpu()
+        elif frame_resized.device != self.out_device:
+            frame_resized = frame_resized.to(device=self.out_device, non_blocking=True)
+        return frame_resized
+
+    def __getitem__(self, index):
+        if self.exception is not None:
+            raise RuntimeError("Failure in frame loading thread") from self.exception
+
+        max_tries = 1200
+        for _ in range(max_tries):
+            # use a lock to avoid race condition between concurrent access to torchcodec
+            # libs (which are not thread-safe); the lock is replaced with a nullcontext
+            # when the video is fully loaded
+            with self.torchcodec_access_lock:
+                if self.images_loaded[index]:
+                    return self.images[index]
+
+                if self.use_rand_seek_in_loading:
+                    # async loading hasn't reached this frame yet, so we load this frame individually
+                    # (it will be loaded by in _load_frames thread and added to self.images[index])
+                    self.rand_seek_idx_queue.put(index)
+
+            time.sleep(0.1)
+
+        raise RuntimeError(f"Failed to load frame {index} after {max_tries} tries")
+
+    def __len__(self):
+        return len(self.images)
+
+    def __getstate__(self):
+        """
+        Remove a few attributes during pickling, so that this async video loader can be
+        saved and loaded as a part of the model session.
+        """
+        # wait for async video loading to finish before pickling
+        async_thread = self.thread
+        if async_thread is not None:
+            async_thread.join()
+        # release a few objects that cannot be pickled
+        reader = self.async_reader
+        if reader is not None:
+            reader._source = None
+        self.async_reader = None
+        self.pbar = None
+        self.thread = None
+        self.rand_seek_idx_queue = None
+        self.torchcodec_access_lock = contextlib.nullcontext()
+        return self.__dict__.copy()
diff --git a/sam3/model/maskformer_segmentation.py b/sam3/model/maskformer_segmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..e1f5ae8f2f1e3626f78a2752f540424f0c92aab2
--- /dev/null
+++ b/sam3/model/maskformer_segmentation.py
@@ -0,0 +1,323 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import math
+from typing import Dict, List, Optional
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+
+from .model_misc import MLP
+
+
+class LinearPresenceHead(nn.Sequential):
+    def __init__(self, d_model):
+        # a hack to make `LinearPresenceHead` compatible with old checkpoints
+        super().__init__(nn.Identity(), nn.Identity(), nn.Linear(d_model, 1))
+
+    def forward(self, hs, prompt, prompt_mask):
+        return super().forward(hs)
+
+
+class MaskPredictor(nn.Module):
+    def __init__(self, hidden_dim, mask_dim):
+        super().__init__()
+        self.mask_embed = MLP(hidden_dim, hidden_dim, mask_dim, 3)
+
+    def forward(self, obj_queries, pixel_embed):
+        if len(obj_queries.shape) == 3:
+            if pixel_embed.ndim == 3:
+                # batch size was omitted
+                mask_preds = torch.einsum(
+                    "bqc,chw->bqhw", self.mask_embed(obj_queries), pixel_embed
+                )
+            else:
+                mask_preds = torch.einsum(
+                    "bqc,bchw->bqhw", self.mask_embed(obj_queries), pixel_embed
+                )
+        else:
+            # Assumed to have aux masks
+            if pixel_embed.ndim == 3:
+                # batch size was omitted
+                mask_preds = torch.einsum(
+                    "lbqc,chw->lbqhw", self.mask_embed(obj_queries), pixel_embed
+                )
+            else:
+                mask_preds = torch.einsum(
+                    "lbqc,bchw->lbqhw", self.mask_embed(obj_queries), pixel_embed
+                )
+
+        return mask_preds
+
+
+class SegmentationHead(nn.Module):
+    def __init__(
+        self,
+        hidden_dim,
+        upsampling_stages,
+        use_encoder_inputs=False,
+        aux_masks=False,
+        no_dec=False,
+        pixel_decoder=None,
+        act_ckpt=False,
+        shared_conv=False,
+        compile_mode_pixel_decoder=None,
+    ):
+        super().__init__()
+        self.use_encoder_inputs = use_encoder_inputs
+        self.aux_masks = aux_masks
+        if pixel_decoder is not None:
+            self.pixel_decoder = pixel_decoder
+        else:
+            self.pixel_decoder = PixelDecoder(
+                hidden_dim,
+                upsampling_stages,
+                shared_conv=shared_conv,
+                compile_mode=compile_mode_pixel_decoder,
+            )
+        self.no_dec = no_dec
+        if no_dec:
+            self.mask_predictor = nn.Conv2d(
+                hidden_dim, 1, kernel_size=3, stride=1, padding=1
+            )
+        else:
+            self.mask_predictor = MaskPredictor(hidden_dim, mask_dim=hidden_dim)
+
+        self.act_ckpt = act_ckpt
+
+        # used to update the output dictionary
+        self.instance_keys = ["pred_masks"]
+
+    @property
+    def device(self):
+        self._device = getattr(self, "_device", None) or next(self.parameters()).device
+        return self._device
+
+    def to(self, *args, **kwargs):
+        # clear cached _device in case the model is moved to a different device
+        self._device = None
+        return super().to(*args, **kwargs)
+
+    def _embed_pixels(
+        self,
+        backbone_feats: List[torch.Tensor],
+        image_ids,
+        encoder_hidden_states,
+    ) -> torch.Tensor:
+        feature_device = backbone_feats[0].device  # features could be on CPU
+        model_device = self.device
+        image_ids_ = image_ids.to(feature_device)
+        if self.use_encoder_inputs:
+            if backbone_feats[0].shape[0] > 1:
+                # For bs > 1, we construct the per query backbone features
+                backbone_visual_feats = []
+                for feat in backbone_feats:
+                    # Copy the img features per query (pixel decoder won't share img feats)
+                    backbone_visual_feats.append(feat[image_ids_, ...].to(model_device))
+            else:
+                # Bs=1, we rely on broadcasting for query-based processing
+                backbone_visual_feats = [bb_feat.clone() for bb_feat in backbone_feats]
+            # Extract visual embeddings
+            encoder_hidden_states = encoder_hidden_states.permute(1, 2, 0)
+            spatial_dim = math.prod(backbone_feats[-1].shape[-2:])
+            encoder_visual_embed = encoder_hidden_states[..., :spatial_dim].reshape(
+                -1, *backbone_feats[-1].shape[1:]
+            )
+
+            backbone_visual_feats[-1] = encoder_visual_embed
+            if self.act_ckpt:
+                pixel_embed = checkpoint.checkpoint(
+                    self.pixel_decoder, backbone_visual_feats, use_reentrant=False
+                )
+            else:
+                pixel_embed = self.pixel_decoder(backbone_visual_feats)
+        else:
+            backbone_feats = [x.to(model_device) for x in backbone_feats]
+            pixel_embed = self.pixel_decoder(backbone_feats)
+            if pixel_embed.shape[0] == 1:
+                # For batch_size=1 training, we can avoid the indexing to save memory
+                pixel_embed = pixel_embed.squeeze(0)
+            else:
+                pixel_embed = pixel_embed[image_ids, ...]
+        return pixel_embed
+
+    def forward(
+        self,
+        backbone_feats: List[torch.Tensor],
+        obj_queries: torch.Tensor,
+        image_ids,
+        encoder_hidden_states: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Dict[str, torch.Tensor]:
+        if self.use_encoder_inputs:
+            assert encoder_hidden_states is not None
+
+        pixel_embed = self._embed_pixels(
+            backbone_feats=backbone_feats,
+            image_ids=image_ids,
+            encoder_hidden_states=encoder_hidden_states,
+        )
+
+        if self.no_dec:
+            mask_pred = self.mask_predictor(pixel_embed)
+        elif self.aux_masks:
+            mask_pred = self.mask_predictor(obj_queries, pixel_embed)
+        else:
+            mask_pred = self.mask_predictor(obj_queries[-1], pixel_embed)
+
+        return {"pred_masks": mask_pred}
+
+
+class PixelDecoder(nn.Module):
+    def __init__(
+        self,
+        hidden_dim,
+        num_upsampling_stages,
+        interpolation_mode="nearest",
+        shared_conv=False,
+        compile_mode=None,
+    ):
+        super().__init__()
+        self.hidden_dim = hidden_dim
+        self.num_upsampling_stages = num_upsampling_stages
+        self.interpolation_mode = interpolation_mode
+        conv_layers = []
+        norms = []
+        num_convs = 1 if shared_conv else num_upsampling_stages
+        for _ in range(num_convs):
+            conv_layers.append(nn.Conv2d(self.hidden_dim, self.hidden_dim, 3, 1, 1))
+            norms.append(nn.GroupNorm(8, self.hidden_dim))
+
+        self.conv_layers = nn.ModuleList(conv_layers)
+        self.norms = nn.ModuleList(norms)
+        self.shared_conv = shared_conv
+        self.out_dim = self.conv_layers[-1].out_channels
+        if compile_mode is not None:
+            self.forward = torch.compile(
+                self.forward, mode=compile_mode, dynamic=True, fullgraph=True
+            )
+            # Needed to make checkpointing happy. But we don't know if the module is checkpointed, so we disable it by default.
+            torch._dynamo.config.optimize_ddp = False
+
+    def forward(self, backbone_feats: List[torch.Tensor]):
+        # Assumes backbone features are already projected (C == hidden dim)
+
+        prev_fpn = backbone_feats[-1]
+        fpn_feats = backbone_feats[:-1]
+        for layer_idx, bb_feat in enumerate(fpn_feats[::-1]):
+            curr_fpn = bb_feat
+            prev_fpn = curr_fpn + F.interpolate(
+                prev_fpn, size=curr_fpn.shape[-2:], mode=self.interpolation_mode
+            )
+            if self.shared_conv:
+                # only one conv layer
+                layer_idx = 0
+            prev_fpn = self.conv_layers[layer_idx](prev_fpn)
+            prev_fpn = F.relu(self.norms[layer_idx](prev_fpn))
+
+        return prev_fpn
+
+
+class UniversalSegmentationHead(SegmentationHead):
+    """This module handles semantic+instance segmentation"""
+
+    def __init__(
+        self,
+        hidden_dim,
+        upsampling_stages,
+        pixel_decoder,
+        aux_masks=False,
+        no_dec=False,
+        act_ckpt=False,
+        presence_head: bool = False,
+        dot_product_scorer=None,
+        cross_attend_prompt=None,
+    ):
+        super().__init__(
+            hidden_dim=hidden_dim,
+            upsampling_stages=upsampling_stages,
+            use_encoder_inputs=True,
+            aux_masks=aux_masks,
+            no_dec=no_dec,
+            pixel_decoder=pixel_decoder,
+            act_ckpt=act_ckpt,
+        )
+        self.d_model = hidden_dim
+
+        if dot_product_scorer is not None:
+            assert presence_head, "Specifying a dot product scorer without a presence head is likely a mistake"
+
+        self.presence_head = None
+        if presence_head:
+            self.presence_head = (
+                dot_product_scorer
+                if dot_product_scorer is not None
+                else LinearPresenceHead(self.d_model)
+            )
+
+        self.cross_attend_prompt = cross_attend_prompt
+        if self.cross_attend_prompt is not None:
+            self.cross_attn_norm = nn.LayerNorm(self.d_model)
+
+        self.semantic_seg_head = nn.Conv2d(self.pixel_decoder.out_dim, 1, kernel_size=1)
+        self.instance_seg_head = nn.Conv2d(
+            self.pixel_decoder.out_dim, self.d_model, kernel_size=1
+        )
+
+    def forward(
+        self,
+        backbone_feats: List[torch.Tensor],
+        obj_queries: torch.Tensor,
+        image_ids,
+        encoder_hidden_states: Optional[torch.Tensor] = None,
+        prompt: Optional[torch.Tensor] = None,
+        prompt_mask: Optional[torch.Tensor] = None,
+        **kwargs,
+    ) -> Dict[str, Optional[torch.Tensor]]:
+        assert encoder_hidden_states is not None
+        bs = encoder_hidden_states.shape[1]
+
+        if self.cross_attend_prompt is not None:
+            tgt2 = self.cross_attn_norm(encoder_hidden_states)
+            tgt2 = self.cross_attend_prompt(
+                query=tgt2,
+                key=prompt,
+                value=prompt,
+                key_padding_mask=prompt_mask,
+            )[0]
+            encoder_hidden_states = tgt2 + encoder_hidden_states
+
+        presence_logit = None
+        if self.presence_head is not None:
+            pooled_enc = encoder_hidden_states.mean(0)
+            presence_logit = (
+                self.presence_head(
+                    pooled_enc.view(1, bs, 1, self.d_model),
+                    prompt=prompt,
+                    prompt_mask=prompt_mask,
+                )
+                .squeeze(0)
+                .squeeze(1)
+            )
+
+        pixel_embed = self._embed_pixels(
+            backbone_feats=backbone_feats,
+            image_ids=image_ids,
+            encoder_hidden_states=encoder_hidden_states,
+        )
+
+        instance_embeds = self.instance_seg_head(pixel_embed)
+
+        if self.no_dec:
+            mask_pred = self.mask_predictor(instance_embeds)
+        elif self.aux_masks:
+            mask_pred = self.mask_predictor(obj_queries, instance_embeds)
+        else:
+            mask_pred = self.mask_predictor(obj_queries[-1], instance_embeds)
+
+        return {
+            "pred_masks": mask_pred,
+            "semantic_seg": self.semantic_seg_head(pixel_embed),
+            "presence_logit": presence_logit,
+        }
diff --git a/sam3/model/memory.py b/sam3/model/memory.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfde5487d85006ab1aa35044fd431260dff2870e
--- /dev/null
+++ b/sam3/model/memory.py
@@ -0,0 +1,201 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import math
+from typing import Tuple
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+try:
+    from timm.layers import DropPath
+except ModuleNotFoundError:
+    # compatibility for older timm versions
+    from timm.models.layers import DropPath
+
+from .model_misc import get_clones, LayerNorm2d
+
+
+class SimpleMaskDownSampler(nn.Module):
+    """
+    Progressively downsample a mask by total_stride, each time by stride.
+    Note that LayerNorm is applied per *token*, like in ViT.
+
+    With each downsample (by a factor stride**2), channel capacity increases by the same factor.
+    In the end, we linearly project to embed_dim channels.
+    """
+
+    def __init__(
+        self,
+        embed_dim=256,
+        kernel_size=4,
+        stride=4,
+        padding=0,
+        total_stride=16,
+        activation=nn.GELU,
+        # Option to interpolate the input mask first before downsampling using convs. In that case, the total_stride is assumed to be after interpolation.
+        # If set to input resolution or None, we don't interpolate. We default to None to be safe (for older configs or if not explicitly set)
+        interpol_size=None,
+    ):
+        super().__init__()
+        num_layers = int(math.log2(total_stride) // math.log2(stride))
+        assert stride**num_layers == total_stride
+        self.encoder = nn.Sequential()
+        mask_in_chans, mask_out_chans = 1, 1
+        for _ in range(num_layers):
+            mask_out_chans = mask_in_chans * (stride**2)
+            self.encoder.append(
+                nn.Conv2d(
+                    mask_in_chans,
+                    mask_out_chans,
+                    kernel_size=kernel_size,
+                    stride=stride,
+                    padding=padding,
+                )
+            )
+            self.encoder.append(LayerNorm2d(mask_out_chans))
+            self.encoder.append(activation())
+            mask_in_chans = mask_out_chans
+
+        self.encoder.append(nn.Conv2d(mask_out_chans, embed_dim, kernel_size=1))
+        self.interpol_size = interpol_size
+        if self.interpol_size is not None:
+            assert isinstance(
+                self.interpol_size, (list, tuple)
+            ), f"Unsupported type {type(self.interpol_size)}. Should be a list or tuple."
+            self.interpol_size = list(interpol_size)
+            assert len(self.interpol_size) == 2
+
+    def forward(self, x: torch.Tensor):
+        if self.interpol_size is not None and self.interpol_size != list(x.shape[-2:]):
+            x = F.interpolate(
+                x.float(),
+                size=self.interpol_size,
+                align_corners=False,
+                mode="bilinear",
+                antialias=True,
+            )
+        return self.encoder(x)
+
+
+# Lightly adapted from ConvNext (https://github.com/facebookresearch/ConvNeXt)
+class CXBlock(nn.Module):
+    r"""ConvNeXt Block. There are two equivalent implementations:
+    (1) DwConv -> LayerNorm (channels_first) -> 1x1 Conv -> GELU -> 1x1 Conv; all in (N, C, H, W)
+    (2) DwConv -> Permute to (N, H, W, C); LayerNorm (channels_last) -> Linear -> GELU -> Linear; Permute back
+    We use (2) as we find it slightly faster in PyTorch
+
+    Args:
+        dim (int): Number of input channels.
+        drop_path (float): Stochastic depth rate. Default: 0.0
+        layer_scale_init_value (float): Init value for Layer Scale. Default: 1e-6.
+    """
+
+    def __init__(
+        self,
+        dim,
+        kernel_size=7,
+        padding=3,
+        drop_path=0.0,
+        layer_scale_init_value=1e-6,
+        use_dwconv=True,
+    ):
+        super().__init__()
+        self.dwconv = nn.Conv2d(
+            dim,
+            dim,
+            kernel_size=kernel_size,
+            padding=padding,
+            groups=dim if use_dwconv else 1,
+        )  # depthwise conv
+        self.norm = LayerNorm2d(dim, eps=1e-6)
+        self.pwconv1 = nn.Linear(
+            dim, 4 * dim
+        )  # pointwise/1x1 convs, implemented with linear layers
+        self.act = nn.GELU()
+        self.pwconv2 = nn.Linear(4 * dim, dim)
+        self.gamma = (
+            nn.Parameter(layer_scale_init_value * torch.ones((dim)), requires_grad=True)
+            if layer_scale_init_value > 0
+            else None
+        )
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+
+    def forward(self, x):
+        input = x
+        x = self.dwconv(x)
+        x = self.norm(x)
+        x = x.permute(0, 2, 3, 1)  # (N, C, H, W) -> (N, H, W, C)
+        x = self.pwconv1(x)
+        x = self.act(x)
+        x = self.pwconv2(x)
+        if self.gamma is not None:
+            x = self.gamma * x
+        x = x.permute(0, 3, 1, 2)  # (N, H, W, C) -> (N, C, H, W)
+
+        x = input + self.drop_path(x)
+        return x
+
+
+class SimpleFuser(nn.Module):
+    def __init__(self, layer, num_layers, dim=None, input_projection=False):
+        super().__init__()
+        self.proj = nn.Identity()
+        self.layers = get_clones(layer, num_layers)
+
+        if input_projection:
+            assert dim is not None
+            self.proj = nn.Conv2d(dim, dim, kernel_size=1)
+
+    def forward(self, x):
+        # normally x: (N, C, H, W)
+        x = self.proj(x)
+        for layer in self.layers:
+            x = layer(x)
+        return x
+
+
+class SimpleMaskEncoder(nn.Module):
+    def __init__(
+        self,
+        out_dim,
+        mask_downsampler,
+        fuser,
+        position_encoding,
+        in_dim=256,  # in_dim of pix_feats
+    ):
+        super().__init__()
+
+        self.mask_downsampler = mask_downsampler
+
+        self.pix_feat_proj = nn.Conv2d(in_dim, in_dim, kernel_size=1)
+        self.fuser = fuser
+        self.position_encoding = position_encoding
+        self.out_proj = nn.Identity()
+        if out_dim != in_dim:
+            self.out_proj = nn.Conv2d(in_dim, out_dim, kernel_size=1)
+
+    def forward(
+        self,
+        pix_feat: torch.Tensor,
+        masks: torch.Tensor,
+        skip_mask_sigmoid: bool = False,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        ## Process masks
+        # sigmoid, so that less domain shift from gt masks which are bool
+        if not skip_mask_sigmoid:
+            masks = F.sigmoid(masks)
+        masks = self.mask_downsampler(masks)
+
+        ## Fuse pix_feats and downsampled masks
+        # in case the visual features are on CPU, cast them to CUDA
+        pix_feat = pix_feat.to(masks.device)
+
+        x = self.pix_feat_proj(pix_feat)
+        x = x + masks
+        x = self.fuser(x)
+        x = self.out_proj(x)
+
+        pos = self.position_encoding(x).to(x.dtype)
+
+        return {"vision_features": x, "vision_pos_enc": [pos]}
diff --git a/sam3/model/model_misc.py b/sam3/model/model_misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..2cb44b3fc4e422f087d923f5449990fc86c60b39
--- /dev/null
+++ b/sam3/model/model_misc.py
@@ -0,0 +1,428 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""Various utility models"""
+
+import copy
+import math
+import weakref
+from collections.abc import Iterator
+from contextlib import AbstractContextManager
+from enum import auto, Enum
+from typing import Dict, List, Optional, Union
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from torch import nn, Tensor
+from typing_extensions import override
+
+
+def inverse_sigmoid(x, eps=1e-3):
+    """
+    The inverse function for sigmoid activation function.
+    Note: It might face numberical issues with fp16 small eps.
+    """
+    x = x.clamp(min=0, max=1)
+    x1 = x.clamp(min=eps)
+    x2 = (1 - x).clamp(min=eps)
+    return torch.log(x1 / x2)
+
+
+class MultiheadAttentionWrapper(nn.MultiheadAttention):
+    def forward(self, *args, **kwargs):
+        kwargs["need_weights"] = False
+        return super().forward(*args, **kwargs)
+
+
+class DotProductScoring(torch.nn.Module):
+    def __init__(
+        self,
+        d_model,
+        d_proj,
+        prompt_mlp=None,
+        clamp_logits=True,
+        clamp_max_val=12.0,
+    ):
+        super().__init__()
+        self.d_proj = d_proj
+        assert isinstance(prompt_mlp, torch.nn.Module) or prompt_mlp is None
+        self.prompt_mlp = prompt_mlp  # an optional MLP projection for prompt
+        self.prompt_proj = torch.nn.Linear(d_model, d_proj)
+        self.hs_proj = torch.nn.Linear(d_model, d_proj)
+        self.scale = float(1.0 / np.sqrt(d_proj))
+        self.clamp_logits = clamp_logits
+        if self.clamp_logits:
+            self.clamp_max_val = clamp_max_val
+
+    def mean_pool_text(self, prompt, prompt_mask):
+        # is_valid has shape (seq, bs, 1), where 1 is valid and 0 is padding
+        is_valid = (~prompt_mask).float().permute(1, 0)[..., None]
+        # num_valid has shape (bs, 1)
+        num_valid = torch.clamp(torch.sum(is_valid, dim=0), min=1.0)
+        # mean pool over all the valid tokens -- pooled_prompt has shape (bs, proj_dim)
+        pooled_prompt = (prompt * is_valid).sum(dim=0) / num_valid
+        return pooled_prompt
+
+    def forward(self, hs, prompt, prompt_mask):
+        # hs has shape (num_layer, bs, num_query, d_model)
+        # prompt has shape (seq, bs, d_model)
+        # prompt_mask has shape (bs, seq), where 1 is valid and 0 is padding
+        assert hs.dim() == 4 and prompt.dim() == 3 and prompt_mask.dim() == 2
+
+        # apply MLP on prompt if specified
+        if self.prompt_mlp is not None:
+            prompt = self.prompt_mlp(prompt)
+
+        # first, get the mean-pooled version of the prompt
+        pooled_prompt = self.mean_pool_text(prompt, prompt_mask)
+
+        # then, project pooled_prompt and hs to d_proj dimensions
+        proj_pooled_prompt = self.prompt_proj(pooled_prompt)  # (bs, d_proj)
+        proj_hs = self.hs_proj(hs)  # (num_layer, bs, num_query, d_proj)
+
+        # finally, get dot-product scores of shape (num_layer, bs, num_query, 1)
+        scores = torch.matmul(proj_hs, proj_pooled_prompt.unsqueeze(-1))
+        scores *= self.scale
+
+        # clamp scores to a max value to avoid numerical issues in loss or matcher
+        if self.clamp_logits:
+            scores.clamp_(min=-self.clamp_max_val, max=self.clamp_max_val)
+
+        return scores
+
+
+class LayerScale(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        init_values: Union[float, Tensor] = 1e-5,
+        inplace: bool = False,
+    ) -> None:
+        super().__init__()
+        self.inplace = inplace
+        self.gamma = nn.Parameter(init_values * torch.ones(dim))
+
+    def forward(self, x: Tensor) -> Tensor:
+        return x.mul_(self.gamma) if self.inplace else x * self.gamma
+
+
+class LayerNorm2d(nn.Module):
+    def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(num_channels))
+        self.bias = nn.Parameter(torch.zeros(num_channels))
+        self.eps = eps
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        u = x.mean(1, keepdim=True)
+        s = (x - u).pow(2).mean(1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.eps)
+        x = self.weight[:, None, None] * x + self.bias[:, None, None]
+        return x
+
+
+class TransformerWrapper(nn.Module):
+    def __init__(
+        self,
+        encoder,
+        decoder,
+        d_model: int,
+        two_stage_type="none",  # ["none"] only for now
+        pos_enc_at_input_dec=True,
+    ):
+        super().__init__()
+
+        self.encoder = encoder
+        self.decoder = decoder
+        self.num_queries = decoder.num_queries if decoder is not None else None
+        self.pos_enc_at_input_dec = pos_enc_at_input_dec
+
+        # for two stage
+        assert two_stage_type in ["none"], "unknown param {} of two_stage_type".format(
+            two_stage_type
+        )
+        self.two_stage_type = two_stage_type
+
+        self._reset_parameters()
+        self.d_model = d_model
+
+    def _reset_parameters(self):
+        for n, p in self.named_parameters():
+            if p.dim() > 1:
+                if (
+                    "box_embed" not in n
+                    and "query_embed" not in n
+                    and "reference_points" not in n
+                ):
+                    nn.init.xavier_uniform_(p)
+
+
+class MLP(nn.Module):
+    """Very simple multi-layer perceptron (also called FFN)"""
+
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        num_layers: int,
+        dropout: float = 0.0,
+        residual: bool = False,
+        out_norm: Optional[nn.Module] = None,
+    ):
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(
+            nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
+        )
+        self.drop = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
+        # whether to add the output as a residual connection to the input
+        if residual and input_dim != output_dim:
+            raise ValueError("residual is only supported if input_dim == output_dim")
+        self.residual = residual
+        # whether to apply a normalization layer to the output
+        assert isinstance(out_norm, nn.Module) or out_norm is None
+        self.out_norm = out_norm or nn.Identity()
+
+    def forward(self, x):
+        orig_x = x
+        for i, layer in enumerate(self.layers):
+            x = self.drop(F.relu(layer(x))) if i < self.num_layers - 1 else layer(x)
+        if self.residual:
+            x = x + orig_x
+        x = self.out_norm(x)
+        return x
+
+
+def get_clones(module, N):
+    return nn.ModuleList([copy.deepcopy(module) for i in range(N)])
+
+
+def get_clones_seq(module, N):
+    return nn.Sequential(*[copy.deepcopy(module) for i in range(N)])
+
+
+def get_activation_fn(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return F.relu
+    if activation == "gelu":
+        return F.gelu
+    if activation == "glu":
+        return F.glu
+    raise RuntimeError(f"activation should be relu/gelu, not {activation}.")
+
+
+def get_activation_module(activation):
+    """Return an activation function given a string"""
+    if activation == "relu":
+        return nn.ReLU
+    if activation == "gelu":
+        return nn.GELU
+    if activation == "glu":
+        return nn.GLU
+    raise RuntimeError(f"activation should be relu/gelu, not {activation}.")
+
+
+def get_valid_ratio(mask):
+    _, H, W = mask.shape
+    valid_H = torch.sum(~mask[:, :, 0], 1)
+    valid_W = torch.sum(~mask[:, 0, :], 1)
+    valid_ratio_h = valid_H.float() / H
+    valid_ratio_w = valid_W.float() / W
+    valid_ratio = torch.stack([valid_ratio_w, valid_ratio_h], -1)
+    return valid_ratio
+
+
+def gen_sineembed_for_position(pos_tensor, num_feats=256):
+    assert num_feats % 2 == 0
+    num_feats = num_feats // 2
+    # n_query, bs, _ = pos_tensor.size()
+    # sineembed_tensor = torch.zeros(n_query, bs, 256)
+    scale = 2 * math.pi
+    dim_t = torch.arange(num_feats, dtype=torch.float32, device=pos_tensor.device)
+    dim_t = 10000 ** (2 * (torch.div(dim_t, 2, rounding_mode="floor")) / num_feats)
+    x_embed = pos_tensor[:, :, 0] * scale
+    y_embed = pos_tensor[:, :, 1] * scale
+    pos_x = x_embed[:, :, None] / dim_t
+    pos_y = y_embed[:, :, None] / dim_t
+    pos_x = torch.stack(
+        (pos_x[:, :, 0::2].sin(), pos_x[:, :, 1::2].cos()), dim=3
+    ).flatten(2)
+    pos_y = torch.stack(
+        (pos_y[:, :, 0::2].sin(), pos_y[:, :, 1::2].cos()), dim=3
+    ).flatten(2)
+    if pos_tensor.size(-1) == 2:
+        pos = torch.cat((pos_y, pos_x), dim=2)
+    elif pos_tensor.size(-1) == 4:
+        w_embed = pos_tensor[:, :, 2] * scale
+        pos_w = w_embed[:, :, None] / dim_t
+        pos_w = torch.stack(
+            (pos_w[:, :, 0::2].sin(), pos_w[:, :, 1::2].cos()), dim=3
+        ).flatten(2)
+
+        h_embed = pos_tensor[:, :, 3] * scale
+        pos_h = h_embed[:, :, None] / dim_t
+        pos_h = torch.stack(
+            (pos_h[:, :, 0::2].sin(), pos_h[:, :, 1::2].cos()), dim=3
+        ).flatten(2)
+
+        pos = torch.cat((pos_y, pos_x, pos_w, pos_h), dim=2)
+    else:
+        raise ValueError("Unknown pos_tensor shape(-1):{}".format(pos_tensor.size(-1)))
+    return pos
+
+
+class SAM3Output(list):
+    """
+    A class representing the output of a SAM3 model.
+    It provides an iterable interface that supports different iteration modes, including iterating over all steps per stage,
+    last step per stage, and flattened output.
+    Attributes:
+        output: The output of the SAM3 model, represented as a list of lists.
+        iter_mode: The current iteration mode.
+    Example:
+        >>> output = [[1, 2], [3, 4], [5, 6]]
+        >>> sam3_output = SAM3Output(output)
+        >>> for step in sam3_output:
+        ...     print(step)
+        [1, 2]
+        [3, 4]
+        [5, 6]
+        >>> with SAM3Output.iteration_mode(SAM3Output.IterMode.LAST_STEP_PER_STAGE) as sam3_last_step_out:
+        ...     for step in sam3_last_step_out:
+        ...         print(step)
+        [2]
+        [4]
+        [6]
+        >>> with SAM3Output.iteration_mode(SAM3Output.IterMode.FLATTENED) as sam3_flattened_out:
+        ...     for step in sam3_flattened_out:
+        ...         print(step)
+        1
+        2
+        3
+        4
+        5
+        6
+    """
+
+    class IterMode(Enum):
+        # Defines the type of iterator over ouptuts.
+        ALL_STEPS_PER_STAGE = auto()
+        LAST_STEP_PER_STAGE = auto()
+        FLATTENED = auto()  # Returns each interactivity step as if it is a separate stage (this is used in SAM3Image model)
+
+    def __init__(
+        self,
+        output: List[List[Dict]] = None,
+        iter_mode: IterMode = IterMode.ALL_STEPS_PER_STAGE,
+        loss_stages: Optional[List[int]] = None,
+    ):
+        if output is not None:
+            assert (
+                isinstance(output, list)
+                and len(output) > 0
+                and isinstance(output[0], list)
+            ), "Expected output to be a list of lists"
+            self.output = output
+        else:
+            self.output = []
+        assert isinstance(
+            iter_mode, SAM3Output.IterMode
+        ), f"iter_mode shoulf be of enum type 'SAM3Output.IterMode'. Got {type(iter_mode)}"
+
+        self.iter_mode = iter_mode
+        # We create a weak reference to self to be used in the lambda functions.
+        # This is to avoid cyclic references and let SAM3Output be garabge collected.
+        self_ref = weakref.ref(self)
+        self._mode2iter = {
+            SAM3Output.IterMode.ALL_STEPS_PER_STAGE: lambda: iter(self_ref().output),
+            SAM3Output.IterMode.LAST_STEP_PER_STAGE: lambda: (
+                inner_list[-1] for inner_list in self_ref().output
+            ),
+            SAM3Output.IterMode.FLATTENED: lambda: (
+                element for inner_list in self_ref().output for element in inner_list
+            ),
+        }
+        self.loss_stages = loss_stages
+
+    @override
+    def __iter__(self) -> Iterator:
+        return self._mode2iter[self.iter_mode]()
+
+    def __getitem__(self, index):
+        """
+        Returns the item at the specified index.
+        Args:
+            index (int): The index of the item to return.
+        Returns:
+            list or element: The item at the specified index.
+        """
+        assert isinstance(index, int), f"index should be an integer. Got {type(index)}"
+        if self.iter_mode == SAM3Output.IterMode.ALL_STEPS_PER_STAGE:
+            return self.output[index]
+        elif self.iter_mode == SAM3Output.IterMode.LAST_STEP_PER_STAGE:
+            return self.output[index][-1]
+        elif self.iter_mode == SAM3Output.IterMode.FLATTENED:
+            if index == -1:
+                return self.self.output[-1][-1]
+            else:
+                flattened_output = sum(self.output, [])
+                return flattened_output[index]
+
+    class _IterationMode(AbstractContextManager):
+        """
+        A context manager that temporarily changes the iteration mode of a SAM3Output object.
+        This class is used internally by the SAM3Output.iteration_mode method.
+        """
+
+        def __init__(
+            self, model_output: "SAM3Output", iter_mode: "SAM3Output.IterMode"
+        ):
+            self._model_output = model_output
+            self._orig_iter_mode = model_output.iter_mode
+            self._new_iter_mode = iter_mode
+
+        @override
+        def __enter__(self) -> "SAM3Output":
+            self._model_output.iter_mode = self._new_iter_mode
+            return self._model_output
+
+        @override
+        def __exit__(self, exc_type, exc_value, traceback):
+            self._model_output.iter_mode = self._orig_iter_mode
+            return super().__exit__(exc_type, exc_value, traceback)
+
+    @staticmethod
+    def iteration_mode(
+        model_output: "SAM3Output", iter_mode: IterMode
+    ) -> _IterationMode:
+        """
+        Returns a context manager that allows you to temporarily change the iteration mode of the SAM3Output object.
+        Args:
+            model_output: The SAM3Output object.
+            iter_mode: The new iteration mode.
+        Returns:
+            SAM3Output._IterationMode: A context manager that changes the iteration mode of the SAM3Output object.
+        """
+        return SAM3Output._IterationMode(model_output=model_output, iter_mode=iter_mode)
+
+    def append(self, item: list):
+        assert isinstance(
+            item, list
+        ), f"Only list items are supported. Got {type(item)}"
+        self.output.append(item)
+
+    def __repr__(self):
+        return self.output.__repr__()
+
+    def __len__(self):
+        if self.iter_mode in [
+            SAM3Output.IterMode.ALL_STEPS_PER_STAGE,
+            SAM3Output.IterMode.LAST_STEP_PER_STAGE,
+        ]:
+            return len(self.output)
+        elif self.iter_mode == SAM3Output.IterMode.FLATTENED:
+            flattened_output = sum(self.output, [])
+            return len(flattened_output)
diff --git a/sam3/model/necks.py b/sam3/model/necks.py
new file mode 100644
index 0000000000000000000000000000000000000000..21bf9b850ca8629940d1f2ee262912dafc3a7d78
--- /dev/null
+++ b/sam3/model/necks.py
@@ -0,0 +1,125 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""Necks are the interface between a vision backbone and the rest of the detection model"""
+
+from copy import deepcopy
+from typing import List, Optional, Tuple
+
+import torch
+
+import torch.nn as nn
+
+
+class Sam3DualViTDetNeck(nn.Module):
+    def __init__(
+        self,
+        trunk: nn.Module,
+        position_encoding: nn.Module,
+        d_model: int,
+        scale_factors=(4.0, 2.0, 1.0, 0.5),
+        add_sam2_neck: bool = False,
+    ):
+        """
+        SimpleFPN neck a la ViTDet
+        (From detectron2, very lightly adapted)
+        It supports a "dual neck" setting, where we have two identical necks (for SAM3 and SAM2), with different weights
+
+        :param trunk: the backbone
+        :param position_encoding: the positional encoding to use
+        :param d_model: the dimension of the model
+        """
+        super().__init__()
+        self.trunk = trunk
+        self.position_encoding = position_encoding
+        self.convs = nn.ModuleList()
+
+        self.scale_factors = scale_factors
+        use_bias = True
+        dim: int = self.trunk.channel_list[-1]
+
+        for _, scale in enumerate(scale_factors):
+            current = nn.Sequential()
+
+            if scale == 4.0:
+                current.add_module(
+                    "dconv_2x2_0",
+                    nn.ConvTranspose2d(dim, dim // 2, kernel_size=2, stride=2),
+                )
+                current.add_module(
+                    "gelu",
+                    nn.GELU(),
+                )
+                current.add_module(
+                    "dconv_2x2_1",
+                    nn.ConvTranspose2d(dim // 2, dim // 4, kernel_size=2, stride=2),
+                )
+                out_dim = dim // 4
+            elif scale == 2.0:
+                current.add_module(
+                    "dconv_2x2",
+                    nn.ConvTranspose2d(dim, dim // 2, kernel_size=2, stride=2),
+                )
+                out_dim = dim // 2
+            elif scale == 1.0:
+                out_dim = dim
+            elif scale == 0.5:
+                current.add_module(
+                    "maxpool_2x2",
+                    nn.MaxPool2d(kernel_size=2, stride=2),
+                )
+                out_dim = dim
+            else:
+                raise NotImplementedError(f"scale_factor={scale} is not supported yet.")
+
+            current.add_module(
+                "conv_1x1",
+                nn.Conv2d(
+                    in_channels=out_dim,
+                    out_channels=d_model,
+                    kernel_size=1,
+                    bias=use_bias,
+                ),
+            )
+            current.add_module(
+                "conv_3x3",
+                nn.Conv2d(
+                    in_channels=d_model,
+                    out_channels=d_model,
+                    kernel_size=3,
+                    padding=1,
+                    bias=use_bias,
+                ),
+            )
+            self.convs.append(current)
+
+        self.sam2_convs = None
+        if add_sam2_neck:
+            # Assumes sam2 neck is just a clone of the original neck
+            self.sam2_convs = deepcopy(self.convs)
+
+    def forward(
+        self, tensor_list: List[torch.Tensor]
+    ) -> Tuple[
+        List[torch.Tensor],
+        List[torch.Tensor],
+        Optional[List[torch.Tensor]],
+        Optional[List[torch.Tensor]],
+    ]:
+        xs = self.trunk(tensor_list)
+        sam3_out, sam3_pos = [], []
+        sam2_out, sam2_pos = None, None
+        if self.sam2_convs is not None:
+            sam2_out, sam2_pos = [], []
+        x = xs[-1]  # simpleFPN
+        for i in range(len(self.convs)):
+            sam3_x_out = self.convs[i](x)
+            sam3_pos_out = self.position_encoding(sam3_x_out).to(sam3_x_out.dtype)
+            sam3_out.append(sam3_x_out)
+            sam3_pos.append(sam3_pos_out)
+
+            if self.sam2_convs is not None:
+                sam2_x_out = self.sam2_convs[i](x)
+                sam2_pos_out = self.position_encoding(sam2_x_out).to(sam2_x_out.dtype)
+                sam2_out.append(sam2_x_out)
+                sam2_pos.append(sam2_pos_out)
+        return sam3_out, sam3_pos, sam2_out, sam2_pos
diff --git a/sam3/model/position_encoding.py b/sam3/model/position_encoding.py
new file mode 100644
index 0000000000000000000000000000000000000000..eb3f4055a7bae489de46368d9b94cf9d33595370
--- /dev/null
+++ b/sam3/model/position_encoding.py
@@ -0,0 +1,124 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import math
+from typing import Optional
+
+import torch
+from torch import nn
+
+
+class PositionEmbeddingSine(nn.Module):
+    """
+    This is a more standard version of the position embedding, very similar to the one
+    used by the Attention is all you need paper, generalized to work on images.
+    """
+
+    def __init__(
+        self,
+        num_pos_feats,
+        temperature: int = 10000,
+        normalize: bool = True,
+        scale: Optional[float] = None,
+        precompute_resolution: Optional[int] = None,
+    ):
+        super().__init__()
+        assert num_pos_feats % 2 == 0, "Expecting even model width"
+        self.num_pos_feats = num_pos_feats // 2
+        self.temperature = temperature
+        self.normalize = normalize
+        if scale is not None and normalize is False:
+            raise ValueError("normalize should be True if scale is passed")
+        if scale is None:
+            scale = 2 * math.pi
+        self.scale = scale
+
+        self.cache = {}
+        # Precompute positional encodings under `precompute_resolution` to fill the cache
+        # and avoid symbolic shape tracing errors in torch.compile in PyTorch 2.4 nightly.
+        if precompute_resolution is not None:
+            # We precompute pos enc for stride 4, 8, 16 and 32 to fill `self.cache`.
+            precompute_sizes = [
+                (precompute_resolution // 4, precompute_resolution // 4),
+                (precompute_resolution // 8, precompute_resolution // 8),
+                (precompute_resolution // 16, precompute_resolution // 16),
+                (precompute_resolution // 32, precompute_resolution // 32),
+            ]
+            for size in precompute_sizes:
+                tensors = torch.zeros((1, 1) + size, device="cuda")
+                self.forward(tensors)
+                # further clone and detach it in the cache (just to be safe)
+                self.cache[size] = self.cache[size].clone().detach()
+
+    def _encode_xy(self, x, y):
+        # The positions are expected to be normalized
+        assert len(x) == len(y) and x.ndim == y.ndim == 1
+        x_embed = x * self.scale
+        y_embed = y * self.scale
+
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+
+        pos_x = x_embed[:, None] / dim_t
+        pos_y = y_embed[:, None] / dim_t
+        pos_x = torch.stack(
+            (pos_x[:, 0::2].sin(), pos_x[:, 1::2].cos()), dim=2
+        ).flatten(1)
+        pos_y = torch.stack(
+            (pos_y[:, 0::2].sin(), pos_y[:, 1::2].cos()), dim=2
+        ).flatten(1)
+        return pos_x, pos_y
+
+    @torch.no_grad()
+    def encode_boxes(self, x, y, w, h):
+        pos_x, pos_y = self._encode_xy(x, y)
+        pos = torch.cat((pos_y, pos_x, h[:, None], w[:, None]), dim=1)
+        return pos
+
+    encode = encode_boxes  # Backwards compatibility
+
+    @torch.no_grad()
+    def encode_points(self, x, y, labels):
+        (bx, nx), (by, ny), (bl, nl) = x.shape, y.shape, labels.shape
+        assert bx == by and nx == ny and bx == bl and nx == nl
+        pos_x, pos_y = self._encode_xy(x.flatten(), y.flatten())
+        pos_x, pos_y = pos_x.reshape(bx, nx, -1), pos_y.reshape(by, ny, -1)
+        pos = torch.cat((pos_y, pos_x, labels[:, :, None]), dim=2)
+        return pos
+
+    @torch.no_grad()
+    def forward(self, x):
+        cache_key = None
+        cache_key = (x.shape[-2], x.shape[-1])
+        if cache_key in self.cache:
+            return self.cache[cache_key][None].repeat(x.shape[0], 1, 1, 1)
+        y_embed = (
+            torch.arange(1, x.shape[-2] + 1, dtype=torch.float32, device=x.device)
+            .view(1, -1, 1)
+            .repeat(x.shape[0], 1, x.shape[-1])
+        )
+        x_embed = (
+            torch.arange(1, x.shape[-1] + 1, dtype=torch.float32, device=x.device)
+            .view(1, 1, -1)
+            .repeat(x.shape[0], x.shape[-2], 1)
+        )
+
+        if self.normalize:
+            eps = 1e-6
+            y_embed = y_embed / (y_embed[:, -1:, :] + eps) * self.scale
+            x_embed = x_embed / (x_embed[:, :, -1:] + eps) * self.scale
+
+        dim_t = torch.arange(self.num_pos_feats, dtype=torch.float32, device=x.device)
+        dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)
+
+        pos_x = x_embed[:, :, :, None] / dim_t
+        pos_y = y_embed[:, :, :, None] / dim_t
+        pos_x = torch.stack(
+            (pos_x[:, :, :, 0::2].sin(), pos_x[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos_y = torch.stack(
+            (pos_y[:, :, :, 0::2].sin(), pos_y[:, :, :, 1::2].cos()), dim=4
+        ).flatten(3)
+        pos = torch.cat((pos_y, pos_x), dim=3).permute(0, 3, 1, 2)
+        if cache_key is not None:
+            self.cache[cache_key] = pos[0]
+        return pos
diff --git a/sam3/model/sam1_task_predictor.py b/sam3/model/sam1_task_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..f5e49b1ebc8f342c57ba265c0023f906608a93a1
--- /dev/null
+++ b/sam3/model/sam1_task_predictor.py
@@ -0,0 +1,458 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import logging
+
+from typing import List, Optional, Tuple, Union
+
+import numpy as np
+import torch
+
+import torch.nn as nn
+from PIL.Image import Image
+
+from sam3.model.sam3_tracker_base import Sam3TrackerBase
+from sam3.model.utils.sam1_utils import SAM2Transforms
+
+
+# Adapted from https://github.com/facebookresearch/sam2/blob/main/sam2/sam2_image_predictor.py
+class SAM3InteractiveImagePredictor(nn.Module):
+    def __init__(
+        self,
+        sam_model: Sam3TrackerBase,
+        mask_threshold=0.0,
+        max_hole_area=256.0,
+        max_sprinkle_area=0.0,
+        **kwargs,
+    ) -> None:
+        """
+        Uses SAM-3 to calculate the image embedding for an image, and then
+        allow repeated, efficient mask prediction given prompts.
+
+        Arguments:
+          sam_model : The model to use for mask prediction.
+          mask_threshold (float): The threshold to use when converting mask logits
+            to binary masks. Masks are thresholded at 0 by default.
+          max_hole_area (int): If max_hole_area > 0, we fill small holes in up to
+            the maximum area of max_hole_area in low_res_masks.
+          max_sprinkle_area (int): If max_sprinkle_area > 0, we remove small sprinkles up to
+            the maximum area of max_sprinkle_area in low_res_masks.
+        """
+        super().__init__()
+        self.model = sam_model
+        self._transforms = SAM2Transforms(
+            resolution=self.model.image_size,
+            mask_threshold=mask_threshold,
+            max_hole_area=max_hole_area,
+            max_sprinkle_area=max_sprinkle_area,
+        )
+
+        # Predictor state
+        self._is_image_set = False
+        self._features = None
+        self._orig_hw = None
+        # Whether the predictor is set for single image or a batch of images
+        self._is_batch = False
+
+        # Predictor config
+        self.mask_threshold = mask_threshold
+
+        # Spatial dim for backbone feature maps
+        self._bb_feat_sizes = [
+            (288, 288),
+            (144, 144),
+            (72, 72),
+        ]
+
+    @torch.no_grad()
+    def set_image(
+        self,
+        image: Union[np.ndarray, Image],
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image, allowing
+        masks to be predicted with the 'predict' method.
+
+        Arguments:
+          image (np.ndarray or PIL Image): The input image to embed in RGB format. The image should be in HWC format if np.ndarray, or WHC format if PIL Image
+          with pixel values in [0, 255].
+          image_format (str): The color format of the image, in ['RGB', 'BGR'].
+        """
+        self.reset_predictor()
+        # Transform the image to the form expected by the model
+        if isinstance(image, np.ndarray):
+            logging.info("For numpy array image, we assume (HxWxC) format")
+            self._orig_hw = [image.shape[:2]]
+        elif isinstance(image, Image):
+            w, h = image.size
+            self._orig_hw = [(h, w)]
+        else:
+            raise NotImplementedError("Image format not supported")
+
+        input_image = self._transforms(image)
+        input_image = input_image[None, ...].to(self.device)
+
+        assert (
+            len(input_image.shape) == 4 and input_image.shape[1] == 3
+        ), f"input_image must be of size 1x3xHxW, got {input_image.shape}"
+        logging.info("Computing image embeddings for the provided image...")
+        backbone_out = self.model.forward_image(input_image)
+        (
+            _,
+            vision_feats,
+            _,
+            _,
+        ) = self.model._prepare_backbone_features(backbone_out)
+        # Add no_mem_embed, which is added to the lowest rest feat. map during training on videos
+        vision_feats[-1] = vision_feats[-1] + self.model.no_mem_embed
+
+        feats = [
+            feat.permute(1, 2, 0).view(1, -1, *feat_size)
+            for feat, feat_size in zip(vision_feats[::-1], self._bb_feat_sizes[::-1])
+        ][::-1]
+        self._features = {"image_embed": feats[-1], "high_res_feats": feats[:-1]}
+        self._is_image_set = True
+        logging.info("Image embeddings computed.")
+
+    @torch.no_grad()
+    def set_image_batch(
+        self,
+        image_list: List[Union[np.ndarray]],
+    ) -> None:
+        """
+        Calculates the image embeddings for the provided image batch, allowing
+        masks to be predicted with the 'predict_batch' method.
+
+        Arguments:
+          image_list (List[np.ndarray]): The input images to embed in RGB format. The image should be in HWC format if np.ndarray
+          with pixel values in [0, 255].
+        """
+        self.reset_predictor()
+        assert isinstance(image_list, list)
+        self._orig_hw = []
+        for image in image_list:
+            assert isinstance(
+                image, np.ndarray
+            ), "Images are expected to be an np.ndarray in RGB format, and of shape  HWC"
+            self._orig_hw.append(image.shape[:2])
+        # Transform the image to the form expected by the model
+        img_batch = self._transforms.forward_batch(image_list)
+        img_batch = img_batch.to(self.device)
+        batch_size = img_batch.shape[0]
+        assert (
+            len(img_batch.shape) == 4 and img_batch.shape[1] == 3
+        ), f"img_batch must be of size Bx3xHxW, got {img_batch.shape}"
+        logging.info("Computing image embeddings for the provided images...")
+        backbone_out = self.model.forward_image(img_batch)
+        (
+            _,
+            vision_feats,
+            _,
+            _,
+        ) = self.model._prepare_backbone_features(backbone_out)
+        # Add no_mem_embed, which is added to the lowest rest feat. map during training on videos
+        vision_feats[-1] = vision_feats[-1] + self.model.no_mem_embed
+
+        feats = [
+            feat.permute(1, 2, 0).view(batch_size, -1, *feat_size)
+            for feat, feat_size in zip(vision_feats[::-1], self._bb_feat_sizes[::-1])
+        ][::-1]
+        self._features = {"image_embed": feats[-1], "high_res_feats": feats[:-1]}
+        self._is_image_set = True
+        self._is_batch = True
+        logging.info("Image embeddings computed.")
+
+    def predict_batch(
+        self,
+        point_coords_batch: List[np.ndarray] = None,
+        point_labels_batch: List[np.ndarray] = None,
+        box_batch: List[np.ndarray] = None,
+        mask_input_batch: List[np.ndarray] = None,
+        multimask_output: bool = True,
+        return_logits: bool = False,
+        normalize_coords=True,
+    ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
+        """This function is very similar to predict(...), however it is used for batched mode, when the model is expected to generate predictions on multiple images.
+        It returns a tuple of lists of masks, ious, and low_res_masks_logits.
+        """
+        assert self._is_batch, "This function should only be used when in batched mode"
+        if not self._is_image_set:
+            raise RuntimeError(
+                "An image must be set with .set_image_batch(...) before mask prediction."
+            )
+        num_images = len(self._features["image_embed"])
+        all_masks = []
+        all_ious = []
+        all_low_res_masks = []
+        for img_idx in range(num_images):
+            # Transform input prompts
+            point_coords = (
+                point_coords_batch[img_idx] if point_coords_batch is not None else None
+            )
+            point_labels = (
+                point_labels_batch[img_idx] if point_labels_batch is not None else None
+            )
+            box = box_batch[img_idx] if box_batch is not None else None
+            mask_input = (
+                mask_input_batch[img_idx] if mask_input_batch is not None else None
+            )
+            mask_input, unnorm_coords, labels, unnorm_box = self._prep_prompts(
+                point_coords,
+                point_labels,
+                box,
+                mask_input,
+                normalize_coords,
+                img_idx=img_idx,
+            )
+            masks, iou_predictions, low_res_masks = self._predict(
+                unnorm_coords,
+                labels,
+                unnorm_box,
+                mask_input,
+                multimask_output,
+                return_logits=return_logits,
+                img_idx=img_idx,
+            )
+            masks_np = masks.squeeze(0).float().detach().cpu().numpy()
+            iou_predictions_np = (
+                iou_predictions.squeeze(0).float().detach().cpu().numpy()
+            )
+            low_res_masks_np = low_res_masks.squeeze(0).float().detach().cpu().numpy()
+            all_masks.append(masks_np)
+            all_ious.append(iou_predictions_np)
+            all_low_res_masks.append(low_res_masks_np)
+
+        return all_masks, all_ious, all_low_res_masks
+
+    def predict(
+        self,
+        point_coords: Optional[np.ndarray] = None,
+        point_labels: Optional[np.ndarray] = None,
+        box: Optional[np.ndarray] = None,
+        mask_input: Optional[np.ndarray] = None,
+        multimask_output: bool = True,
+        return_logits: bool = False,
+        normalize_coords=True,
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        """
+        Predict masks for the given input prompts, using the currently set image.
+
+        Arguments:
+          point_coords (np.ndarray or None): A Nx2 array of point prompts to the
+            model. Each point is in (X,Y) in pixels.
+          point_labels (np.ndarray or None): A length N array of labels for the
+            point prompts. 1 indicates a foreground point and 0 indicates a
+            background point.
+          box (np.ndarray or None): A length 4 array given a box prompt to the
+            model, in XYXY format.
+          mask_input (np.ndarray): A low resolution mask input to the model, typically
+            coming from a previous prediction iteration. Has form 1xHxW, where
+            for SAM, H=W=256.
+          multimask_output (bool): If true, the model will return three masks.
+            For ambiguous input prompts (such as a single click), this will often
+            produce better masks than a single prediction. If only a single
+            mask is needed, the model's predicted quality score can be used
+            to select the best mask. For non-ambiguous prompts, such as multiple
+            input prompts, multimask_output=False can give better results.
+          return_logits (bool): If true, returns un-thresholded masks logits
+            instead of a binary mask.
+          normalize_coords (bool): If true, the point coordinates will be normalized to the range [0,1] and point_coords is expected to be wrt. image dimensions.
+
+        Returns:
+          (np.ndarray): The output masks in CxHxW format, where C is the
+            number of masks, and (H, W) is the original image size.
+          (np.ndarray): An array of length C containing the model's
+            predictions for the quality of each mask.
+          (np.ndarray): An array of shape CxHxW, where C is the number
+            of masks and H=W=256. These low resolution logits can be passed to
+            a subsequent iteration as mask input.
+        """
+        if not self._is_image_set:
+            raise RuntimeError(
+                "An image must be set with .set_image(...) before mask prediction."
+            )
+
+        # Transform input prompts
+
+        mask_input, unnorm_coords, labels, unnorm_box = self._prep_prompts(
+            point_coords, point_labels, box, mask_input, normalize_coords
+        )
+
+        masks, iou_predictions, low_res_masks = self._predict(
+            unnorm_coords,
+            labels,
+            unnorm_box,
+            mask_input,
+            multimask_output,
+            return_logits=return_logits,
+        )
+
+        masks_np = masks.squeeze(0).float().detach().cpu().numpy()
+        iou_predictions_np = iou_predictions.squeeze(0).float().detach().cpu().numpy()
+        low_res_masks_np = low_res_masks.squeeze(0).float().detach().cpu().numpy()
+        return masks_np, iou_predictions_np, low_res_masks_np
+
+    def _prep_prompts(
+        self, point_coords, point_labels, box, mask_logits, normalize_coords, img_idx=-1
+    ):
+        unnorm_coords, labels, unnorm_box, mask_input = None, None, None, None
+        if point_coords is not None:
+            assert (
+                point_labels is not None
+            ), "point_labels must be supplied if point_coords is supplied."
+            point_coords = torch.as_tensor(
+                point_coords, dtype=torch.float, device=self.device
+            )
+            unnorm_coords = self._transforms.transform_coords(
+                point_coords, normalize=normalize_coords, orig_hw=self._orig_hw[img_idx]
+            )
+            labels = torch.as_tensor(point_labels, dtype=torch.int, device=self.device)
+            if len(unnorm_coords.shape) == 2:
+                unnorm_coords, labels = unnorm_coords[None, ...], labels[None, ...]
+        if box is not None:
+            box = torch.as_tensor(box, dtype=torch.float, device=self.device)
+            unnorm_box = self._transforms.transform_boxes(
+                box, normalize=normalize_coords, orig_hw=self._orig_hw[img_idx]
+            )  # Bx2x2
+        if mask_logits is not None:
+            mask_input = torch.as_tensor(
+                mask_logits, dtype=torch.float, device=self.device
+            )
+            if len(mask_input.shape) == 3:
+                mask_input = mask_input[None, :, :, :]
+        return mask_input, unnorm_coords, labels, unnorm_box
+
+    @torch.no_grad()
+    def _predict(
+        self,
+        point_coords: Optional[torch.Tensor],
+        point_labels: Optional[torch.Tensor],
+        boxes: Optional[torch.Tensor] = None,
+        mask_input: Optional[torch.Tensor] = None,
+        multimask_output: bool = True,
+        return_logits: bool = False,
+        img_idx: int = -1,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        """
+        Predict masks for the given input prompts, using the currently set image.
+        Input prompts are batched torch tensors and are expected to already be
+        transformed to the input frame using SAM2Transforms.
+
+        Arguments:
+          point_coords (torch.Tensor or None): A BxNx2 array of point prompts to the
+            model. Each point is in (X,Y) in pixels.
+          point_labels (torch.Tensor or None): A BxN array of labels for the
+            point prompts. 1 indicates a foreground point and 0 indicates a
+            background point.
+          boxes (np.ndarray or None): A Bx4 array given a box prompt to the
+            model, in XYXY format.
+          mask_input (np.ndarray): A low resolution mask input to the model, typically
+            coming from a previous prediction iteration. Has form Bx1xHxW, where
+            for SAM, H=W=256. Masks returned by a previous iteration of the
+            predict method do not need further transformation.
+          multimask_output (bool): If true, the model will return three masks.
+            For ambiguous input prompts (such as a single click), this will often
+            produce better masks than a single prediction. If only a single
+            mask is needed, the model's predicted quality score can be used
+            to select the best mask. For non-ambiguous prompts, such as multiple
+            input prompts, multimask_output=False can give better results.
+          return_logits (bool): If true, returns un-thresholded masks logits
+            instead of a binary mask.
+
+        Returns:
+          (torch.Tensor): The output masks in BxCxHxW format, where C is the
+            number of masks, and (H, W) is the original image size.
+          (torch.Tensor): An array of shape BxC containing the model's
+            predictions for the quality of each mask.
+          (torch.Tensor): An array of shape BxCxHxW, where C is the number
+            of masks and H=W=256. These low res logits can be passed to
+            a subsequent iteration as mask input.
+        """
+        if not self._is_image_set:
+            raise RuntimeError(
+                "An image must be set with .set_image(...) before mask prediction."
+            )
+
+        if point_coords is not None:
+            concat_points = (point_coords, point_labels)
+        else:
+            concat_points = None
+
+        # Embed prompts
+        if boxes is not None:
+            box_coords = boxes.reshape(-1, 2, 2)
+            box_labels = torch.tensor([[2, 3]], dtype=torch.int, device=boxes.device)
+            box_labels = box_labels.repeat(boxes.size(0), 1)
+            # we merge "boxes" and "points" into a single "concat_points" input (where
+            # boxes are added at the beginning) to sam_prompt_encoder
+            if concat_points is not None:
+                concat_coords = torch.cat([box_coords, concat_points[0]], dim=1)
+                concat_labels = torch.cat([box_labels, concat_points[1]], dim=1)
+                concat_points = (concat_coords, concat_labels)
+            else:
+                concat_points = (box_coords, box_labels)
+
+        sparse_embeddings, dense_embeddings = self.model.sam_prompt_encoder(
+            points=concat_points,
+            boxes=None,
+            masks=mask_input,
+        )
+
+        # Predict masks
+        batched_mode = (
+            concat_points is not None and concat_points[0].shape[0] > 1
+        )  # multi object prediction
+        high_res_features = [
+            feat_level[img_idx].unsqueeze(0)
+            for feat_level in self._features["high_res_feats"]
+        ]
+        low_res_masks, iou_predictions, _, _ = self.model.sam_mask_decoder(
+            image_embeddings=self._features["image_embed"][img_idx].unsqueeze(0),
+            image_pe=self.model.sam_prompt_encoder.get_dense_pe(),
+            sparse_prompt_embeddings=sparse_embeddings,
+            dense_prompt_embeddings=dense_embeddings,
+            multimask_output=multimask_output,
+            repeat_image=batched_mode,
+            high_res_features=high_res_features,
+        )
+
+        # Upscale the masks to the original image resolution
+        masks = self._transforms.postprocess_masks(
+            low_res_masks, self._orig_hw[img_idx]
+        )
+        low_res_masks = torch.clamp(low_res_masks, -32.0, 32.0)
+        if not return_logits:
+            masks = masks > self.mask_threshold
+
+        return masks, iou_predictions, low_res_masks
+
+    def get_image_embedding(self) -> torch.Tensor:
+        """
+        Returns the image embeddings for the currently set image, with
+        shape 1xCxHxW, where C is the embedding dimension and (H,W) are
+        the embedding spatial dimension of SAM (typically C=256, H=W=64).
+        """
+        if not self._is_image_set:
+            raise RuntimeError(
+                "An image must be set with .set_image(...) to generate an embedding."
+            )
+        assert (
+            self._features is not None
+        ), "Features must exist if an image has been set."
+        return self._features["image_embed"]
+
+    @property
+    def device(self) -> torch.device:
+        return self.model.device
+
+    def reset_predictor(self) -> None:
+        """
+        Resets the image embeddings and other state variables.
+        """
+        self._is_image_set = False
+        self._features = None
+        self._orig_hw = None
+        self._is_batch = False
diff --git a/sam3/model/sam3_image.py b/sam3/model/sam3_image.py
new file mode 100644
index 0000000000000000000000000000000000000000..aafe520b9b923564cd8f048e4bfab6fef79417ae
--- /dev/null
+++ b/sam3/model/sam3_image.py
@@ -0,0 +1,883 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import os
+from copy import deepcopy
+from typing import Dict, List, Optional, Tuple
+
+import numpy as np
+import torch
+
+from sam3.model.model_misc import SAM3Output
+
+from sam3.model.sam1_task_predictor import SAM3InteractiveImagePredictor
+from sam3.model.vl_combiner import SAM3VLBackbone
+from sam3.perflib.nms import nms_masks
+
+from sam3.train.data.collator import BatchedDatapoint
+
+from .act_ckpt_utils import activation_ckpt_wrapper
+
+from .box_ops import box_cxcywh_to_xyxy
+
+from .geometry_encoders import Prompt
+from .model_misc import inverse_sigmoid
+
+
+def _update_out(out, out_name, out_value, auxiliary=True, update_aux=True):
+    out[out_name] = out_value[-1] if auxiliary else out_value
+    if auxiliary and update_aux:
+        if "aux_outputs" not in out:
+            out["aux_outputs"] = [{} for _ in range(len(out_value) - 1)]
+        assert len(out["aux_outputs"]) == len(out_value) - 1
+        for aux_output, aux_value in zip(out["aux_outputs"], out_value[:-1]):
+            aux_output[out_name] = aux_value
+
+
+class Sam3Image(torch.nn.Module):
+    TEXT_ID_FOR_TEXT = 0
+    TEXT_ID_FOR_VISUAL = 1
+    TEXT_ID_FOR_GEOMETRIC = 2
+
+    def __init__(
+        self,
+        backbone: SAM3VLBackbone,
+        transformer,
+        input_geometry_encoder,
+        segmentation_head=None,
+        num_feature_levels=1,
+        o2m_mask_predict=True,
+        dot_prod_scoring=None,
+        use_instance_query: bool = True,
+        multimask_output: bool = True,
+        use_act_checkpoint_seg_head: bool = True,
+        interactivity_in_encoder: bool = True,
+        matcher=None,
+        use_dot_prod_scoring=True,
+        supervise_joint_box_scores: bool = False,  # only relevant if using presence token/score
+        detach_presence_in_joint_score: bool = False,  # only relevant if using presence token/score
+        separate_scorer_for_instance: bool = False,
+        num_interactive_steps_val: int = 0,
+        inst_interactive_predictor: SAM3InteractiveImagePredictor = None,
+        **kwargs,
+    ):
+        super().__init__()
+        self.backbone = backbone
+        self.geometry_encoder = input_geometry_encoder
+        self.transformer = transformer
+        self.hidden_dim = transformer.d_model
+        self.num_feature_levels = num_feature_levels
+        self.segmentation_head = segmentation_head
+
+        self.o2m_mask_predict = o2m_mask_predict
+
+        self.dot_prod_scoring = dot_prod_scoring
+        self.use_act_checkpoint_seg_head = use_act_checkpoint_seg_head
+        self.interactivity_in_encoder = interactivity_in_encoder
+        self.matcher = matcher
+
+        self.num_interactive_steps_val = num_interactive_steps_val
+        self.use_dot_prod_scoring = use_dot_prod_scoring
+
+        if self.use_dot_prod_scoring:
+            assert dot_prod_scoring is not None
+            self.dot_prod_scoring = dot_prod_scoring
+            self.instance_dot_prod_scoring = None
+            if separate_scorer_for_instance:
+                self.instance_dot_prod_scoring = deepcopy(dot_prod_scoring)
+        else:
+            self.class_embed = torch.nn.Linear(self.hidden_dim, 1)
+            self.instance_class_embed = None
+            if separate_scorer_for_instance:
+                self.instance_class_embed = deepcopy(self.class_embed)
+
+        self.supervise_joint_box_scores = supervise_joint_box_scores
+        self.detach_presence_in_joint_score = detach_presence_in_joint_score
+
+        # verify the number of queries for O2O and O2M
+        num_o2o_static = self.transformer.decoder.num_queries
+        num_o2m_static = self.transformer.decoder.num_o2m_queries
+        assert num_o2m_static == (num_o2o_static if self.transformer.decoder.dac else 0)
+        self.dac = self.transformer.decoder.dac
+
+        self.use_instance_query = use_instance_query
+        self.multimask_output = multimask_output
+
+        self.inst_interactive_predictor = inst_interactive_predictor
+
+    @property
+    def device(self):
+        self._device = getattr(self, "_device", None) or next(self.parameters()).device
+        return self._device
+
+    def to(self, *args, **kwargs):
+        # clear cached _device in case the model is moved to a different device
+        self._device = None
+        return super().to(*args, **kwargs)
+
+    def _get_img_feats(self, backbone_out, img_ids):
+        """Retrieve correct image features from backbone output."""
+        if "backbone_fpn" in backbone_out:
+            if "id_mapping" in backbone_out and backbone_out["id_mapping"] is not None:
+                img_ids = backbone_out["id_mapping"][img_ids]
+                # If this assert fails, it likely means we're requesting different img_ids (perhaps a different frame?)
+                # We currently don't expect this to happen. We could technically trigger a recompute here,
+                # but likely at the cost of a cpu<->gpu sync point, which would deteriorate perf
+                torch._assert_async((img_ids >= 0).all())
+
+            vis_feats = backbone_out["backbone_fpn"][-self.num_feature_levels :]
+            vis_pos_enc = backbone_out["vision_pos_enc"][-self.num_feature_levels :]
+            vis_feat_sizes = [x.shape[-2:] for x in vis_pos_enc]  # (H, W) shapes
+            # index and flatten visual features NxCxHxW => HWxNxC (batch-first => seq-first)
+            img_feats = [x[img_ids].flatten(2).permute(2, 0, 1) for x in vis_feats]
+            img_pos_embeds = [
+                x[img_ids].flatten(2).permute(2, 0, 1) for x in vis_pos_enc
+            ]
+            return backbone_out, img_feats, img_pos_embeds, vis_feat_sizes
+
+        # Image features not available in backbone output, so we compute them on the fly
+        # This case likely occurs for video. In that case, we want to forward only the current frame
+        img_batch = backbone_out["img_batch_all_stages"]
+        if img_ids.numel() > 1:
+            # Only forward backbone on unique image ids to avoid repetitive computation
+            unique_ids, _ = torch.unique(img_ids, return_inverse=True)
+        else:
+            unique_ids, _ = img_ids, slice(None)
+        # Compute the image features on those unique image ids
+        # note: we allow using a list (or other indexable types) of tensors as img_batch
+        # (e.g. for async frame loading in demo). In this case we index img_batch.tensors directly
+        if isinstance(img_batch, torch.Tensor):
+            image = img_batch[unique_ids]
+        elif unique_ids.numel() == 1:
+            image = img_batch[unique_ids.item()].unsqueeze(0)
+        else:
+            image = torch.stack([img_batch[i] for i in unique_ids.tolist()])
+        # `img_batch` might be fp16 and offloaded to CPU
+        image = image.to(dtype=torch.float32, device=self.device)
+        # Next time we call this function, we want to remember which indices we computed
+        id_mapping = torch.full(
+            (len(img_batch),), -1, dtype=torch.long, device=self.device
+        )
+        id_mapping[unique_ids] = torch.arange(len(unique_ids), device=self.device)
+        backbone_out = {
+            **backbone_out,
+            **self.backbone.forward_image(image),
+            "id_mapping": id_mapping,
+        }
+        assert "backbone_fpn" in backbone_out
+        return self._get_img_feats(backbone_out, img_ids=img_ids)
+
+    def _encode_prompt(
+        self,
+        backbone_out,
+        find_input,
+        geometric_prompt,
+        visual_prompt_embed=None,
+        visual_prompt_mask=None,
+        encode_text=True,
+        prev_mask_pred=None,
+    ):
+        # index text features (note that regardless of early or late fusion, the batch size of
+        # `txt_feats` is always the number of *prompts* in the encoder)
+        txt_ids = find_input.text_ids
+        txt_feats = backbone_out["language_features"][:, txt_ids]
+        txt_masks = backbone_out["language_mask"][txt_ids]
+
+        feat_tuple = self._get_img_feats(backbone_out, find_input.img_ids)
+        backbone_out, img_feats, img_pos_embeds, vis_feat_sizes = feat_tuple
+
+        if prev_mask_pred is not None:
+            img_feats = [img_feats[-1] + prev_mask_pred]
+        # Encode geometry
+        geo_feats, geo_masks = self.geometry_encoder(
+            geo_prompt=geometric_prompt,
+            img_feats=img_feats,
+            img_sizes=vis_feat_sizes,
+            img_pos_embeds=img_pos_embeds,
+        )
+        if visual_prompt_embed is None:
+            visual_prompt_embed = torch.zeros(
+                (0, *geo_feats.shape[1:]), device=geo_feats.device
+            )
+            visual_prompt_mask = torch.zeros(
+                (*geo_masks.shape[:-1], 0),
+                device=geo_masks.device,
+                dtype=geo_masks.dtype,
+            )
+        if encode_text:
+            prompt = torch.cat([txt_feats, geo_feats, visual_prompt_embed], dim=0)
+            prompt_mask = torch.cat([txt_masks, geo_masks, visual_prompt_mask], dim=1)
+        else:
+            prompt = torch.cat([geo_feats, visual_prompt_embed], dim=0)
+            prompt_mask = torch.cat([geo_masks, visual_prompt_mask], dim=1)
+        return prompt, prompt_mask, backbone_out
+
+    def _run_encoder(
+        self,
+        backbone_out,
+        find_input,
+        prompt,
+        prompt_mask,
+        encoder_extra_kwargs: Optional[Dict] = None,
+    ):
+        feat_tuple = self._get_img_feats(backbone_out, find_input.img_ids)
+        backbone_out, img_feats, img_pos_embeds, vis_feat_sizes = feat_tuple
+
+        # Run the encoder
+        prompt_pos_embed = torch.zeros_like(prompt)
+        # make a copy of the image feature lists since the encoder may modify these lists in-place
+        memory = self.transformer.encoder(
+            src=img_feats.copy(),
+            src_key_padding_mask=None,
+            src_pos=img_pos_embeds.copy(),
+            prompt=prompt,
+            prompt_pos=prompt_pos_embed,
+            prompt_key_padding_mask=prompt_mask,
+            feat_sizes=vis_feat_sizes,
+            encoder_extra_kwargs=encoder_extra_kwargs,
+        )
+        encoder_out = {
+            # encoded image features
+            "encoder_hidden_states": memory["memory"],
+            "pos_embed": memory["pos_embed"],
+            "padding_mask": memory["padding_mask"],
+            "level_start_index": memory["level_start_index"],
+            "spatial_shapes": memory["spatial_shapes"],
+            "valid_ratios": memory["valid_ratios"],
+            "vis_feat_sizes": vis_feat_sizes,
+            # encoded text features (or other prompts)
+            "prompt_before_enc": prompt,
+            "prompt_after_enc": memory.get("memory_text", prompt),
+            "prompt_mask": prompt_mask,
+        }
+        return backbone_out, encoder_out, feat_tuple
+
+    def _run_decoder(
+        self,
+        pos_embed,
+        memory,
+        src_mask,
+        out,
+        prompt,
+        prompt_mask,
+        encoder_out,
+    ):
+        bs = memory.shape[1]
+        query_embed = self.transformer.decoder.query_embed.weight
+        tgt = query_embed.unsqueeze(1).repeat(1, bs, 1)
+
+        apply_dac = self.transformer.decoder.dac and self.training
+        hs, reference_boxes, dec_presence_out, dec_presence_feats = (
+            self.transformer.decoder(
+                tgt=tgt,
+                memory=memory,
+                memory_key_padding_mask=src_mask,
+                pos=pos_embed,
+                reference_boxes=None,
+                level_start_index=encoder_out["level_start_index"],
+                spatial_shapes=encoder_out["spatial_shapes"],
+                valid_ratios=encoder_out["valid_ratios"],
+                tgt_mask=None,
+                memory_text=prompt,
+                text_attention_mask=prompt_mask,
+                apply_dac=apply_dac,
+            )
+        )
+        hs = hs.transpose(1, 2)  # seq-first to batch-first
+        reference_boxes = reference_boxes.transpose(1, 2)  # seq-first to batch-first
+        if dec_presence_out is not None:
+            # seq-first to batch-first
+            dec_presence_out = dec_presence_out.transpose(1, 2)
+
+        out["presence_feats"] = dec_presence_feats
+        self._update_scores_and_boxes(
+            out,
+            hs,
+            reference_boxes,
+            prompt,
+            prompt_mask,
+            dec_presence_out=dec_presence_out,
+        )
+        return out, hs
+
+    def _update_scores_and_boxes(
+        self,
+        out,
+        hs,
+        reference_boxes,
+        prompt,
+        prompt_mask,
+        dec_presence_out=None,
+        is_instance_prompt=False,
+    ):
+        apply_dac = self.transformer.decoder.dac and self.training
+        num_o2o = (hs.size(2) // 2) if apply_dac else hs.size(2)
+        num_o2m = hs.size(2) - num_o2o
+        assert num_o2m == (num_o2o if apply_dac else 0)
+        out["queries"] = hs[-1][:, :num_o2o]  # remove o2m queries if there are any
+        # score prediction
+        if self.use_dot_prod_scoring:
+            dot_prod_scoring_head = self.dot_prod_scoring
+            if is_instance_prompt and self.instance_dot_prod_scoring is not None:
+                dot_prod_scoring_head = self.instance_dot_prod_scoring
+            outputs_class = dot_prod_scoring_head(hs, prompt, prompt_mask)
+        else:
+            class_embed_head = self.class_embed
+            if is_instance_prompt and self.instance_class_embed is not None:
+                class_embed_head = self.instance_class_embed
+            outputs_class = class_embed_head(hs)
+
+        # box prediction
+        box_head = self.transformer.decoder.bbox_embed
+        if (
+            is_instance_prompt
+            and self.transformer.decoder.instance_bbox_embed is not None
+        ):
+            box_head = self.transformer.decoder.instance_bbox_embed
+        anchor_box_offsets = box_head(hs)
+        reference_boxes_inv_sig = inverse_sigmoid(reference_boxes)
+        outputs_coord = (reference_boxes_inv_sig + anchor_box_offsets).sigmoid()
+        outputs_boxes_xyxy = box_cxcywh_to_xyxy(outputs_coord)
+
+        if dec_presence_out is not None:
+            _update_out(
+                out, "presence_logit_dec", dec_presence_out, update_aux=self.training
+            )
+
+        if self.supervise_joint_box_scores:
+            assert dec_presence_out is not None
+            prob_dec_presence_out = dec_presence_out.clone().sigmoid()
+            if self.detach_presence_in_joint_score:
+                prob_dec_presence_out = prob_dec_presence_out.detach()
+
+            outputs_class = inverse_sigmoid(
+                outputs_class.sigmoid() * prob_dec_presence_out.unsqueeze(2)
+            ).clamp(min=-10.0, max=10.0)
+
+        _update_out(
+            out, "pred_logits", outputs_class[:, :, :num_o2o], update_aux=self.training
+        )
+        _update_out(
+            out, "pred_boxes", outputs_coord[:, :, :num_o2o], update_aux=self.training
+        )
+        _update_out(
+            out,
+            "pred_boxes_xyxy",
+            outputs_boxes_xyxy[:, :, :num_o2o],
+            update_aux=self.training,
+        )
+        if num_o2m > 0 and self.training:
+            _update_out(
+                out,
+                "pred_logits_o2m",
+                outputs_class[:, :, num_o2o:],
+                update_aux=self.training,
+            )
+            _update_out(
+                out,
+                "pred_boxes_o2m",
+                outputs_coord[:, :, num_o2o:],
+                update_aux=self.training,
+            )
+            _update_out(
+                out,
+                "pred_boxes_xyxy_o2m",
+                outputs_boxes_xyxy[:, :, num_o2o:],
+                update_aux=self.training,
+            )
+
+    def _run_segmentation_heads(
+        self,
+        out,
+        backbone_out,
+        img_ids,
+        vis_feat_sizes,
+        encoder_hidden_states,
+        prompt,
+        prompt_mask,
+        hs,
+    ):
+        apply_dac = self.transformer.decoder.dac and self.training
+        if self.segmentation_head is not None:
+            num_o2o = (hs.size(2) // 2) if apply_dac else hs.size(2)
+            num_o2m = hs.size(2) - num_o2o
+            obj_queries = hs if self.o2m_mask_predict else hs[:, :, :num_o2o]
+            seg_head_outputs = activation_ckpt_wrapper(self.segmentation_head)(
+                backbone_feats=backbone_out["backbone_fpn"],
+                obj_queries=obj_queries,
+                image_ids=img_ids,
+                encoder_hidden_states=encoder_hidden_states,
+                act_ckpt_enable=self.training and self.use_act_checkpoint_seg_head,
+                prompt=prompt,
+                prompt_mask=prompt_mask,
+            )
+            aux_masks = False  # self.aux_loss and self.segmentation_head.aux_masks
+            for k, v in seg_head_outputs.items():
+                if k in self.segmentation_head.instance_keys:
+                    _update_out(out, k, v[:, :num_o2o], auxiliary=aux_masks)
+                    if (
+                        self.o2m_mask_predict and num_o2m > 0
+                    ):  # handle o2m mask prediction
+                        _update_out(
+                            out, f"{k}_o2m", v[:, num_o2o:], auxiliary=aux_masks
+                        )
+                else:
+                    out[k] = v
+        else:
+            backbone_out.pop("backbone_fpn", None)
+
+    def _get_best_mask(self, out):
+        prev_mask_idx = out["pred_logits"].argmax(dim=1).squeeze(1)
+        batch_idx = torch.arange(
+            out["pred_logits"].shape[0], device=prev_mask_idx.device
+        )
+        prev_mask_pred = out["pred_masks"][batch_idx, prev_mask_idx][:, None]
+        # Downsample mask to match image resolution.
+        prev_mask_pred = self.geometry_encoder.mask_encoder.mask_downsampler(
+            prev_mask_pred
+        )
+        prev_mask_pred = prev_mask_pred.flatten(-2).permute(2, 0, 1)
+
+        return prev_mask_pred
+
+    def forward_grounding(
+        self,
+        backbone_out,
+        find_input,
+        find_target,
+        geometric_prompt: Prompt,
+    ):
+        with torch.profiler.record_function("SAM3Image._encode_prompt"):
+            prompt, prompt_mask, backbone_out = self._encode_prompt(
+                backbone_out, find_input, geometric_prompt
+            )
+        # Run the encoder
+        with torch.profiler.record_function("SAM3Image._run_encoder"):
+            backbone_out, encoder_out, _ = self._run_encoder(
+                backbone_out, find_input, prompt, prompt_mask
+            )
+        out = {
+            "encoder_hidden_states": encoder_out["encoder_hidden_states"],
+            "prev_encoder_out": {
+                "encoder_out": encoder_out,
+                "backbone_out": backbone_out,
+            },
+        }
+
+        # Run the decoder
+        with torch.profiler.record_function("SAM3Image._run_decoder"):
+            out, hs = self._run_decoder(
+                memory=out["encoder_hidden_states"],
+                pos_embed=encoder_out["pos_embed"],
+                src_mask=encoder_out["padding_mask"],
+                out=out,
+                prompt=prompt,
+                prompt_mask=prompt_mask,
+                encoder_out=encoder_out,
+            )
+
+        # Run segmentation heads
+        with torch.profiler.record_function("SAM3Image._run_segmentation_heads"):
+            self._run_segmentation_heads(
+                out=out,
+                backbone_out=backbone_out,
+                img_ids=find_input.img_ids,
+                vis_feat_sizes=encoder_out["vis_feat_sizes"],
+                encoder_hidden_states=out["encoder_hidden_states"],
+                prompt=prompt,
+                prompt_mask=prompt_mask,
+                hs=hs,
+            )
+
+        if self.training or self.num_interactive_steps_val > 0:
+            self._compute_matching(out, self.back_convert(find_target))
+        return out
+
+    def _postprocess_out(self, out: Dict, multimask_output: bool = False):
+        # For multimask output, during eval we return the single best mask with the dict keys expected by the evaluators, but also return the multimasks output with new keys.
+        num_mask_boxes = out["pred_boxes"].size(1)
+        if not self.training and multimask_output and num_mask_boxes > 1:
+            out["multi_pred_logits"] = out["pred_logits"]
+            if "pred_masks" in out:
+                out["multi_pred_masks"] = out["pred_masks"]
+            out["multi_pred_boxes"] = out["pred_boxes"]
+            out["multi_pred_boxes_xyxy"] = out["pred_boxes_xyxy"]
+
+            best_mask_idx = out["pred_logits"].argmax(1).squeeze(1)
+            batch_idx = torch.arange(len(best_mask_idx), device=best_mask_idx.device)
+
+            out["pred_logits"] = out["pred_logits"][batch_idx, best_mask_idx].unsqueeze(
+                1
+            )
+            if "pred_masks" in out:
+                out["pred_masks"] = out["pred_masks"][
+                    batch_idx, best_mask_idx
+                ].unsqueeze(1)
+            out["pred_boxes"] = out["pred_boxes"][batch_idx, best_mask_idx].unsqueeze(1)
+            out["pred_boxes_xyxy"] = out["pred_boxes_xyxy"][
+                batch_idx, best_mask_idx
+            ].unsqueeze(1)
+
+        return out
+
+    def _get_dummy_prompt(self, num_prompts=1):
+        device = self.device
+        geometric_prompt = Prompt(
+            box_embeddings=torch.zeros(0, num_prompts, 4, device=device),
+            box_mask=torch.zeros(num_prompts, 0, device=device, dtype=torch.bool),
+        )
+        return geometric_prompt
+
+    def forward(self, input: BatchedDatapoint):
+        device = self.device
+        backbone_out = {"img_batch_all_stages": input.img_batch}
+        backbone_out.update(self.backbone.forward_image(input.img_batch))
+        num_frames = len(input.find_inputs)
+        assert num_frames == 1
+
+        text_outputs = self.backbone.forward_text(input.find_text_batch, device=device)
+        backbone_out.update(text_outputs)
+
+        previous_stages_out = SAM3Output(
+            iter_mode=SAM3Output.IterMode.LAST_STEP_PER_STAGE
+        )
+
+        find_input = input.find_inputs[0]
+        find_target = input.find_targets[0]
+
+        if find_input.input_points is not None and find_input.input_points.numel() > 0:
+            print("Warning: Point prompts are ignored in PCS.")
+
+        num_interactive_steps = 0 if self.training else self.num_interactive_steps_val
+        geometric_prompt = Prompt(
+            box_embeddings=find_input.input_boxes,
+            box_mask=find_input.input_boxes_mask,
+            box_labels=find_input.input_boxes_label,
+        )
+
+        # Init vars that are shared across the loop.
+        stage_outs = []
+        for cur_step in range(num_interactive_steps + 1):
+            if cur_step > 0:
+                # We sample interactive geometric prompts (boxes, points)
+                geometric_prompt, _ = self.interactive_prompt_sampler.sample(
+                    geo_prompt=geometric_prompt,
+                    find_target=find_target,
+                    previous_out=stage_outs[-1],
+                )
+            out = self.forward_grounding(
+                backbone_out=backbone_out,
+                find_input=find_input,
+                find_target=find_target,
+                geometric_prompt=geometric_prompt.clone(),
+            )
+            stage_outs.append(out)
+
+        previous_stages_out.append(stage_outs)
+        return previous_stages_out
+
+    def _compute_matching(self, out, targets):
+        out["indices"] = self.matcher(out, targets)
+        for aux_out in out.get("aux_outputs", []):
+            aux_out["indices"] = self.matcher(aux_out, targets)
+
+    def back_convert(self, targets):
+        batched_targets = {
+            "boxes": targets.boxes.view(-1, 4),
+            "boxes_xyxy": box_cxcywh_to_xyxy(targets.boxes.view(-1, 4)),
+            "boxes_padded": targets.boxes_padded,
+            "positive_map": targets.boxes.new_ones(len(targets.boxes), 1),
+            "num_boxes": targets.num_boxes,
+            "masks": targets.segments,
+            "semantic_masks": targets.semantic_segments,
+            "is_valid_mask": targets.is_valid_segment,
+            "is_exhaustive": targets.is_exhaustive,
+            "object_ids_packed": targets.object_ids,
+            "object_ids_padded": targets.object_ids_padded,
+        }
+        return batched_targets
+
+    def predict_inst(
+        self,
+        inference_state,
+        **kwargs,
+    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+        orig_h, orig_w = (
+            inference_state["original_height"],
+            inference_state["original_width"],
+        )
+        backbone_out = inference_state["backbone_out"]["sam2_backbone_out"]
+        (
+            _,
+            vision_feats,
+            _,
+            _,
+        ) = self.inst_interactive_predictor.model._prepare_backbone_features(
+            backbone_out
+        )
+        # Add no_mem_embed, which is added to the lowest rest feat. map during training on videos
+        vision_feats[-1] = (
+            vision_feats[-1] + self.inst_interactive_predictor.model.no_mem_embed
+        )
+        feats = [
+            feat.permute(1, 2, 0).view(1, -1, *feat_size)
+            for feat, feat_size in zip(
+                vision_feats[::-1], self.inst_interactive_predictor._bb_feat_sizes[::-1]
+            )
+        ][::-1]
+        self.inst_interactive_predictor._features = {
+            "image_embed": feats[-1],
+            "high_res_feats": feats[:-1],
+        }
+        self.inst_interactive_predictor._is_image_set = True
+        self.inst_interactive_predictor._orig_hw = [(orig_h, orig_w)]
+        res = self.inst_interactive_predictor.predict(**kwargs)
+        self.inst_interactive_predictor._features = None
+        self.inst_interactive_predictor._is_image_set = False
+        return res
+
+    def predict_inst_batch(
+        self,
+        inference_state,
+        *args,
+        **kwargs,
+    ) -> Tuple[List[np.ndarray], List[np.ndarray], List[np.ndarray]]:
+        backbone_out = inference_state["backbone_out"]["sam2_backbone_out"]
+        (
+            _,
+            vision_feats,
+            _,
+            _,
+        ) = self.inst_interactive_predictor.model._prepare_backbone_features(
+            backbone_out
+        )
+        # Add no_mem_embed, which is added to the lowest res feat. map during training on videos
+        vision_feats[-1] = (
+            vision_feats[-1] + self.inst_interactive_predictor.model.no_mem_embed
+        )
+        batch_size = vision_feats[-1].shape[1]
+        orig_heights, orig_widths = (
+            inference_state["original_heights"],
+            inference_state["original_widths"],
+        )
+        assert (
+            batch_size == len(orig_heights) == len(orig_widths)
+        ), f"Batch size mismatch in predict_inst_batch. Got {batch_size}, {len(orig_heights)}, {len(orig_widths)}"
+        feats = [
+            feat.permute(1, 2, 0).view(batch_size, -1, *feat_size)
+            for feat, feat_size in zip(
+                vision_feats[::-1], self.inst_interactive_predictor._bb_feat_sizes[::-1]
+            )
+        ][::-1]
+        self.inst_interactive_predictor._features = {
+            "image_embed": feats[-1],
+            "high_res_feats": feats[:-1],
+        }
+        self.inst_interactive_predictor._is_image_set = True
+        self.inst_interactive_predictor._is_batch = True
+        self.inst_interactive_predictor._orig_hw = [
+            (orig_h, orig_w) for orig_h, orig_w in zip(orig_heights, orig_widths)
+        ]
+        res = self.inst_interactive_predictor.predict_batch(*args, **kwargs)
+        self.inst_interactive_predictor._features = None
+        self.inst_interactive_predictor._is_image_set = False
+        self.inst_interactive_predictor._is_batch = False
+        return res
+
+
+class Sam3ImageOnVideoMultiGPU(Sam3Image):
+    def __init__(
+        self, *args, async_all_gather=True, gather_backbone_out=None, **kwargs
+    ):
+        super().__init__(*args, **kwargs)
+        self.rank = int(os.getenv("RANK", "0"))
+        self.world_size = int(os.getenv("WORLD_SIZE", "1"))
+        self.async_all_gather = async_all_gather
+
+        # if gather_backbone is not set, default to gathering only for `SAM3VLBackbone`
+        if gather_backbone_out is None:
+            gather_backbone_out = isinstance(self.backbone, SAM3VLBackbone)
+        self.gather_backbone_out = gather_backbone_out
+
+    def forward_video_grounding_multigpu(
+        self,
+        backbone_out,
+        find_inputs,
+        geometric_prompt: Prompt,
+        frame_idx,
+        num_frames,
+        # `multigpu_buffer` is a dict to cache detector's outputs in a chunk between different calls
+        multigpu_buffer,
+        track_in_reverse=False,
+        # whether to also return the SAM2 backbone features
+        return_sam2_backbone_feats=False,
+        # whether to perform NMS and suppress the scores of those detections removed by NMS
+        run_nms=False,
+        nms_prob_thresh=None,
+        nms_iou_thresh=None,
+        **kwargs,
+    ):
+        """
+        Compute the detector's detection outputs in a distributed manner, where all GPUs process
+        a chunk of frames (equal to the number of GPUs) at once and store them in cache.
+        """
+        # Step 1: fetch the detector outputs in the current chunk from buffer
+        frame_idx_curr_b = frame_idx - frame_idx % self.world_size
+        frame_idx_curr_e = min(frame_idx_curr_b + self.world_size, num_frames)
+        # in case the current frame's detection results are not in the buffer yet, build the current chunk
+        # (this should only happen on the first chunk, since we are also building the next chunk below)
+        if frame_idx not in multigpu_buffer:
+            with torch.profiler.record_function("build_multigpu_buffer_next_chunk1"):
+                self._build_multigpu_buffer_next_chunk(
+                    backbone_out=backbone_out,
+                    find_inputs=find_inputs,
+                    geometric_prompt=geometric_prompt,
+                    frame_idx_begin=frame_idx_curr_b,
+                    frame_idx_end=frame_idx_curr_e,
+                    num_frames=num_frames,
+                    multigpu_buffer=multigpu_buffer,
+                    run_nms=run_nms,
+                    nms_prob_thresh=nms_prob_thresh,
+                    nms_iou_thresh=nms_iou_thresh,
+                )
+
+        # read out the current frame's results from `multigpu_buffer`
+        out = {}
+        for k, (v, handle) in multigpu_buffer[frame_idx].items():
+            if k.startswith("sam2_backbone_") and not return_sam2_backbone_feats:
+                continue
+            if handle is not None:
+                handle.wait()  # wait for async all-gather to finish
+            out[k] = v
+
+        # Step 2: remove detection outputs of the previous chunk from cache to save GPU memory
+        if not track_in_reverse and frame_idx_curr_b - self.world_size >= 0:
+            frame_idx_prev_e = frame_idx_curr_b
+            frame_idx_prev_b = frame_idx_curr_b - self.world_size
+        elif track_in_reverse and frame_idx_curr_e < num_frames:
+            frame_idx_prev_b = frame_idx_curr_e
+            frame_idx_prev_e = min(frame_idx_prev_b + self.world_size, num_frames)
+        else:
+            frame_idx_prev_b = frame_idx_prev_e = None
+        if frame_idx_prev_b is not None:
+            for frame_idx_rm in range(frame_idx_prev_b, frame_idx_prev_e):
+                multigpu_buffer.pop(frame_idx_rm, None)
+
+        # Step 3: compute and cache detection outputs of the next chunk ahead of time
+        # (so that we can overlap computation with all-gather transfer)
+        if not track_in_reverse and frame_idx_curr_e < num_frames:
+            frame_idx_next_b = frame_idx_curr_e
+            frame_idx_next_e = min(frame_idx_next_b + self.world_size, num_frames)
+        elif track_in_reverse and frame_idx_curr_b - self.world_size >= 0:
+            frame_idx_next_e = frame_idx_curr_b
+            frame_idx_next_b = frame_idx_curr_b - self.world_size
+        else:
+            frame_idx_next_b = frame_idx_next_e = None
+        if frame_idx_next_b is not None and frame_idx_next_b not in multigpu_buffer:
+            with torch.profiler.record_function("build_multigpu_buffer_next_chunk2"):
+                self._build_multigpu_buffer_next_chunk(
+                    backbone_out=backbone_out,
+                    find_inputs=find_inputs,
+                    geometric_prompt=geometric_prompt,
+                    frame_idx_begin=frame_idx_next_b,
+                    frame_idx_end=frame_idx_next_e,
+                    num_frames=num_frames,
+                    multigpu_buffer=multigpu_buffer,
+                    run_nms=run_nms,
+                    nms_prob_thresh=nms_prob_thresh,
+                    nms_iou_thresh=nms_iou_thresh,
+                )
+
+        return out, backbone_out
+
+    def _build_multigpu_buffer_next_chunk(
+        self,
+        backbone_out,
+        find_inputs,
+        geometric_prompt: Prompt,
+        frame_idx_begin,
+        frame_idx_end,
+        num_frames,
+        multigpu_buffer,
+        run_nms=False,
+        nms_prob_thresh=None,
+        nms_iou_thresh=None,
+    ):
+        """Compute detection outputs on a chunk of frames and store their results in multigpu_buffer."""
+        # each GPU computes detections on one frame in the chunk (in a round-robin manner)
+        frame_idx_local_gpu = min(frame_idx_begin + self.rank, frame_idx_end - 1)
+        # `forward_grounding` (from base class `Sam3ImageOnVideo`) runs the detector on a single frame
+        with torch.profiler.record_function("forward_grounding"):
+            out_local = self.forward_grounding(
+                backbone_out=backbone_out,
+                find_input=find_inputs[frame_idx_local_gpu],
+                find_target=None,
+                geometric_prompt=geometric_prompt,
+            )
+        if run_nms:
+            with torch.profiler.record_function("nms_masks"):
+                # run NMS as a post-processing step on top of the detection outputs
+                assert nms_prob_thresh is not None and nms_iou_thresh is not None
+                pred_probs = out_local["pred_logits"].squeeze(-1).sigmoid()
+                pred_masks = out_local["pred_masks"]
+                # loop over text prompts (not an overhead for demo where there's only 1 prompt)
+                for prompt_idx in range(pred_probs.size(0)):
+                    keep = nms_masks(
+                        pred_probs=pred_probs[prompt_idx],
+                        pred_masks=pred_masks[prompt_idx],
+                        prob_threshold=nms_prob_thresh,
+                        iou_threshold=nms_iou_thresh,
+                    )
+                    # set a very low threshold for those detections removed by NMS
+                    out_local["pred_logits"][prompt_idx, :, 0] -= 1e4 * (~keep).float()
+
+        if self.gather_backbone_out:
+            # gather the SAM 2 backbone features across GPUs
+            feats = out_local["prev_encoder_out"]["backbone_out"]["sam2_backbone_out"]
+            assert len(feats["backbone_fpn"]) == 3  # SAM2 backbone always have 3 levels
+            # cast the SAM2 backbone features to bfloat16 for all-gather (this is usually
+            # a no-op, SAM2 backbone features are likely already in bfloat16 due to AMP)
+            backbone_fpn_bf16 = [x.to(torch.bfloat16) for x in feats["backbone_fpn"]]
+            fpn0, fpn_handle0 = self._gather_tensor(backbone_fpn_bf16[0])
+            fpn1, fpn_handle1 = self._gather_tensor(backbone_fpn_bf16[1])
+            fpn2, fpn_handle2 = self._gather_tensor(backbone_fpn_bf16[2])
+            # vision_pos_enc is the same on all frames, so no need to all-gather them
+            vision_pos_enc = feats["vision_pos_enc"]
+
+        # trim the detector output to only include the necessary keys
+        out_local = {
+            "pred_logits": out_local["pred_logits"],
+            "pred_boxes": out_local["pred_boxes"],
+            "pred_boxes_xyxy": out_local["pred_boxes_xyxy"],
+            "pred_masks": out_local["pred_masks"],
+        }
+
+        # gather the results: after this step, each GPU will receive detector outputs on
+        # all frames in the chunk and store them in `multigpu_buffer`
+        out_gathered = {k: self._gather_tensor(v) for k, v in out_local.items()}
+        for rank in range(self.world_size):
+            frame_idx_to_save = frame_idx_begin + rank
+            if frame_idx_to_save >= num_frames:
+                continue
+            frame_buffer = {
+                k: (v[rank], handle) for k, (v, handle) in out_gathered.items()
+            }
+            if self.gather_backbone_out:
+                # also add gathered SAM 2 backbone features to frame_buffer
+                frame_buffer["tracker_backbone_fpn_0"] = (fpn0[rank], fpn_handle0)
+                frame_buffer["tracker_backbone_fpn_1"] = (fpn1[rank], fpn_handle1)
+                frame_buffer["tracker_backbone_fpn_2"] = (fpn2[rank], fpn_handle2)
+                frame_buffer["tracker_backbone_pos_enc"] = (vision_pos_enc, None)
+
+            multigpu_buffer[frame_idx_to_save] = frame_buffer
+
+    def _gather_tensor(self, x):
+        if self.world_size == 1:
+            return [x], None
+
+        async_op = self.async_all_gather
+        # here `.contiguous()` is required -- otherwise NCCL all_gather
+        # sometimes gives wrong results
+        x = x.contiguous()  # ensure contiguous memory for NCCL
+        output_list = [torch.empty_like(x) for _ in range(self.world_size)]
+        handle = torch.distributed.all_gather(output_list, x, async_op=async_op)
+        return output_list, handle
diff --git a/sam3/model/sam3_image_processor.py b/sam3/model/sam3_image_processor.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d98fbfbbd3487f2d6f6047e9b9e1bc56190c9cf
--- /dev/null
+++ b/sam3/model/sam3_image_processor.py
@@ -0,0 +1,222 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+from typing import Dict, List
+
+import numpy as np
+import PIL
+import torch
+
+from sam3.model import box_ops
+
+from sam3.model.data_misc import FindStage, interpolate
+from torchvision.transforms import v2
+
+
+class Sam3Processor:
+    """ """
+
+    def __init__(self, model, resolution=1008, device="cuda", confidence_threshold=0.5):
+        self.model = model
+        self.resolution = resolution
+        self.device = device
+        self.transform = v2.Compose(
+            [
+                v2.ToDtype(torch.uint8, scale=True),
+                v2.Resize(size=(resolution, resolution)),
+                v2.ToDtype(torch.float32, scale=True),
+                v2.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+            ]
+        )
+        self.confidence_threshold = confidence_threshold
+
+        self.find_stage = FindStage(
+            img_ids=torch.tensor([0], device=device, dtype=torch.long),
+            text_ids=torch.tensor([0], device=device, dtype=torch.long),
+            input_boxes=None,
+            input_boxes_mask=None,
+            input_boxes_label=None,
+            input_points=None,
+            input_points_mask=None,
+        )
+
+    @torch.inference_mode()
+    def set_image(self, image, state=None):
+        """Sets the image on which we want to do predictions."""
+        if state is None:
+            state = {}
+
+        if isinstance(image, PIL.Image.Image):
+            width, height = image.size
+        elif isinstance(image, (torch.Tensor, np.ndarray)):
+            height, width = image.shape[-2:]
+        else:
+            raise ValueError("Image must be a PIL image or a tensor")
+
+        image = v2.functional.to_image(image).to(self.device)
+        image = self.transform(image).unsqueeze(0)
+
+        state["original_height"] = height
+        state["original_width"] = width
+        state["backbone_out"] = self.model.backbone.forward_image(image)
+        inst_interactivity_en = self.model.inst_interactive_predictor is not None
+        if inst_interactivity_en and "sam2_backbone_out" in state["backbone_out"]:
+            sam2_backbone_out = state["backbone_out"]["sam2_backbone_out"]
+            sam2_backbone_out["backbone_fpn"][0] = (
+                self.model.inst_interactive_predictor.model.sam_mask_decoder.conv_s0(
+                    sam2_backbone_out["backbone_fpn"][0]
+                )
+            )
+            sam2_backbone_out["backbone_fpn"][1] = (
+                self.model.inst_interactive_predictor.model.sam_mask_decoder.conv_s1(
+                    sam2_backbone_out["backbone_fpn"][1]
+                )
+            )
+        return state
+
+    @torch.inference_mode()
+    def set_image_batch(self, images: List[np.ndarray], state=None):
+        """Sets the image batch on which we want to do predictions."""
+        if state is None:
+            state = {}
+
+        if not isinstance(images, list):
+            raise ValueError("Images must be a list of PIL images or tensors")
+        assert len(images) > 0, "Images list must not be empty"
+        assert isinstance(
+            images[0], PIL.Image.Image
+        ), "Images must be a list of PIL images"
+
+        state["original_heights"] = [image.height for image in images]
+        state["original_widths"] = [image.width for image in images]
+
+        images = [
+            self.transform(v2.functional.to_image(image).to(self.device))
+            for image in images
+        ]
+        images = torch.stack(images, dim=0)
+        state["backbone_out"] = self.model.backbone.forward_image(images)
+        inst_interactivity_en = self.model.inst_interactive_predictor is not None
+        if inst_interactivity_en and "sam2_backbone_out" in state["backbone_out"]:
+            sam2_backbone_out = state["backbone_out"]["sam2_backbone_out"]
+            sam2_backbone_out["backbone_fpn"][0] = (
+                self.model.inst_interactive_predictor.model.sam_mask_decoder.conv_s0(
+                    sam2_backbone_out["backbone_fpn"][0]
+                )
+            )
+            sam2_backbone_out["backbone_fpn"][1] = (
+                self.model.inst_interactive_predictor.model.sam_mask_decoder.conv_s1(
+                    sam2_backbone_out["backbone_fpn"][1]
+                )
+            )
+        return state
+
+    @torch.inference_mode()
+    def set_text_prompt(self, prompt: str, state: Dict):
+        """Sets the text prompt and run the inference"""
+
+        if "backbone_out" not in state:
+            raise ValueError("You must call set_image before set_text_prompt")
+
+        text_outputs = self.model.backbone.forward_text([prompt], device=self.device)
+        # will erase the previous text prompt if any
+        state["backbone_out"].update(text_outputs)
+        if "geometric_prompt" not in state:
+            state["geometric_prompt"] = self.model._get_dummy_prompt()
+
+        return self._forward_grounding(state)
+
+    @torch.inference_mode()
+    def add_geometric_prompt(self, box: List, label: bool, state: Dict):
+        """Adds a box prompt and run the inference.
+        The image needs to be set, but not necessarily the text prompt.
+        The box is assumed to be in [center_x, center_y, width, height] format and normalized in [0, 1] range.
+        The label is True for a positive box, False for a negative box.
+        """
+        if "backbone_out" not in state:
+            raise ValueError("You must call set_image before set_text_prompt")
+
+        if "language_features" not in state["backbone_out"]:
+            # Looks like we don't have a text prompt yet. This is allowed, but we need to set the text prompt to "visual" for the model to rely only on the geometric prompt
+            dummy_text_outputs = self.model.backbone.forward_text(
+                ["visual"], device=self.device
+            )
+            state["backbone_out"].update(dummy_text_outputs)
+
+        if "geometric_prompt" not in state:
+            state["geometric_prompt"] = self.model._get_dummy_prompt()
+
+        # adding a batch and sequence dimension
+        boxes = torch.tensor(box, device=self.device, dtype=torch.float32).view(1, 1, 4)
+        labels = torch.tensor([label], device=self.device, dtype=torch.bool).view(1, 1)
+        state["geometric_prompt"].append_boxes(boxes, labels)
+
+        return self._forward_grounding(state)
+
+    def reset_all_prompts(self, state: Dict):
+        """Removes all the prompts and results"""
+        if "backbone_out" in state:
+            backbone_keys_to_del = [
+                "language_features",
+                "language_mask",
+                "language_embeds",
+            ]
+            for key in backbone_keys_to_del:
+                if key in state["backbone_out"]:
+                    del state["backbone_out"][key]
+
+        keys_to_del = ["geometric_prompt", "boxes", "masks", "masks_logits", "scores"]
+        for key in keys_to_del:
+            if key in state:
+                del state[key]
+
+    @torch.inference_mode()
+    def set_confidence_threshold(self, threshold: float, state=None):
+        """Sets the confidence threshold for the masks"""
+        self.confidence_threshold = threshold
+        if state is not None and "boxes" in state:
+            # we need to filter the boxes again
+            # In principle we could do this more efficiently since we would only need
+            # to rerun the heads. But this is simpler and not too inefficient
+            return self._forward_grounding(state)
+        return state
+
+    @torch.inference_mode()
+    def _forward_grounding(self, state: Dict):
+        outputs = self.model.forward_grounding(
+            backbone_out=state["backbone_out"],
+            find_input=self.find_stage,
+            geometric_prompt=state["geometric_prompt"],
+            find_target=None,
+        )
+
+        out_bbox = outputs["pred_boxes"]
+        out_logits = outputs["pred_logits"]
+        out_masks = outputs["pred_masks"]
+        out_probs = out_logits.sigmoid()
+        presence_score = outputs["presence_logit_dec"].sigmoid().unsqueeze(1)
+        out_probs = (out_probs * presence_score).squeeze(-1)
+
+        keep = out_probs > self.confidence_threshold
+        out_probs = out_probs[keep]
+        out_masks = out_masks[keep]
+        out_bbox = out_bbox[keep]
+
+        # convert to [x0, y0, x1, y1] format
+        boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
+
+        img_h = state["original_height"]
+        img_w = state["original_width"]
+        scale_fct = torch.tensor([img_w, img_h, img_w, img_h]).to(self.device)
+        boxes = boxes * scale_fct[None, :]
+
+        out_masks = interpolate(
+            out_masks.unsqueeze(1),
+            (img_h, img_w),
+            mode="bilinear",
+            align_corners=False,
+        ).sigmoid()
+
+        state["masks_logits"] = out_masks
+        state["masks"] = out_masks > 0.5
+        state["boxes"] = boxes
+        state["scores"] = out_probs
+        return state
diff --git a/sam3/model/sam3_tracker_base.py b/sam3/model/sam3_tracker_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..1591f32591d96e3060668e2330bf79475e46b1e0
--- /dev/null
+++ b/sam3/model/sam3_tracker_base.py
@@ -0,0 +1,1188 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import logging
+
+import torch
+import torch.nn.functional as F
+
+from sam3.model.memory import SimpleMaskEncoder
+
+from sam3.model.sam3_tracker_utils import get_1d_sine_pe, select_closest_cond_frames
+
+from sam3.sam.mask_decoder import MaskDecoder, MLP
+from sam3.sam.prompt_encoder import PromptEncoder
+from sam3.sam.transformer import TwoWayTransformer
+from sam3.train.data.collator import BatchedDatapoint
+
+try:
+    from timm.layers import trunc_normal_
+except ModuleNotFoundError:
+    # compatibility for older timm versions
+    from timm.models.layers import trunc_normal_
+
+# a large negative value as a placeholder score for missing objects
+NO_OBJ_SCORE = -1024.0
+
+
+class Sam3TrackerBase(torch.nn.Module):
+    def __init__(
+        self,
+        backbone,
+        transformer,
+        maskmem_backbone,
+        num_maskmem=7,  # default 1 input frame + 6 previous frames as in CAE
+        image_size=1008,
+        backbone_stride=14,  # stride of the image backbone output
+        # The maximum number of conditioning frames to participate in the memory attention (-1 means no limit; if there are more conditioning frames than this limit,
+        # we only cross-attend to the temporally closest `max_cond_frames_in_attn` conditioning frames in the encoder when tracking each frame). This gives the model
+        # a temporal locality when handling a large number of annotated frames (since closer frames should be more important) and also avoids GPU OOM.
+        max_cond_frames_in_attn=-1,
+        # Whether to always keep the first conditioning frame in case we exceed the maximum number of conditioning frames allowed
+        keep_first_cond_frame=False,
+        # whether to output multiple (3) masks for the first click on initial conditioning frames
+        multimask_output_in_sam=False,
+        # the minimum and maximum number of clicks to use multimask_output_in_sam (only relevant when `multimask_output_in_sam=True`;
+        # default is 1 for both, meaning that only the first click gives multimask output; also note that a box counts as two points)
+        multimask_min_pt_num=1,
+        multimask_max_pt_num=1,
+        # whether to also use multimask output for tracking (not just for the first click on initial conditioning frames; only relevant when `multimask_output_in_sam=True`)
+        multimask_output_for_tracking=False,
+        # whether to forward image features per frame (as it's being tracked) during evaluation, instead of forwarding image features
+        # of all frames at once. This avoids backbone OOM errors on very long videos in evaluation, but could be slightly slower.
+        forward_backbone_per_frame_for_eval=False,
+        # The memory bank's temporal stride during evaluation (i.e. the `r` parameter in XMem and Cutie; XMem and Cutie use r=5).
+        # For r>1, the (self.num_maskmem - 1) non-conditioning memory frames consist of
+        # (self.num_maskmem - 2) nearest frames from every r-th frames, plus the last frame.
+        memory_temporal_stride_for_eval=1,
+        # whether to offload outputs to CPU memory during evaluation, to avoid GPU OOM on very long videos or very large resolutions or too many objects
+        # (it's recommended to use `forward_backbone_per_frame_for_eval=True` first before setting this option to True)
+        offload_output_to_cpu_for_eval=False,
+        # whether to trim the output of past non-conditioning frames (num_maskmem frames before the current frame) during evaluation
+        # (this helps save GPU or CPU memory on very long videos for semi-supervised VOS eval, where only the first frame receives prompts)
+        trim_past_non_cond_mem_for_eval=False,
+        # whether to apply non-overlapping constraints on the object masks in the memory encoder during evaluation (to avoid/alleviate superposing masks)
+        non_overlap_masks_for_mem_enc=False,
+        # the maximum number of object pointers from other frames in encoder cross attention
+        max_obj_ptrs_in_encoder=16,
+        # extra arguments used to construct the SAM mask decoder; if not None, it should be a dict of kwargs to be passed into `MaskDecoder` class.
+        sam_mask_decoder_extra_args=None,
+        # whether to compile all the model compoents
+        compile_all_components=False,
+        # select the frame with object existence
+        use_memory_selection=False,
+        # when using memory selection, the threshold to determine if the frame is good
+        mf_threshold=0.01,
+    ):
+        super().__init__()
+
+        # Part 1: the image backbone
+        self.backbone = backbone
+        self.num_feature_levels = 3
+        self.max_obj_ptrs_in_encoder = max_obj_ptrs_in_encoder
+        # A conv layer to downsample the GT mask prompt to stride 4 (the same stride as
+        # low-res SAM mask logits) and to change its scales from 0~1 to SAM logit scale,
+        # so that it can be fed into the SAM mask decoder to generate a pointer.
+        self.mask_downsample = torch.nn.Conv2d(1, 1, kernel_size=4, stride=4)
+
+        # Part 2: encoder-only transformer to fuse current frame's visual features
+        # with memories from past frames
+        assert transformer.decoder is None, "transformer should be encoder-only"
+        self.transformer = transformer
+        self.hidden_dim = transformer.d_model
+
+        # Part 3: memory encoder for the previous frame's outputs
+        self.maskmem_backbone = maskmem_backbone
+        self.mem_dim = self.hidden_dim
+        if hasattr(self.maskmem_backbone, "out_proj") and hasattr(
+            self.maskmem_backbone.out_proj, "weight"
+        ):
+            # if there is compression of memories along channel dim
+            self.mem_dim = self.maskmem_backbone.out_proj.weight.shape[0]
+        self.num_maskmem = num_maskmem  # Number of memories accessible
+
+        # Temporal encoding of the memories
+        self.maskmem_tpos_enc = torch.nn.Parameter(
+            torch.zeros(num_maskmem, 1, 1, self.mem_dim)
+        )
+        trunc_normal_(self.maskmem_tpos_enc, std=0.02)
+
+        # a single token to indicate no memory embedding from previous frames
+        self.no_mem_embed = torch.nn.Parameter(torch.zeros(1, 1, self.hidden_dim))
+        self.no_mem_pos_enc = torch.nn.Parameter(torch.zeros(1, 1, self.hidden_dim))
+        trunc_normal_(self.no_mem_embed, std=0.02)
+        trunc_normal_(self.no_mem_pos_enc, std=0.02)
+        # Apply sigmoid to the output raw mask logits (to turn them from
+        # range (-inf, +inf) to range (0, 1)) before feeding them into the memory encoder
+        self.sigmoid_scale_for_mem_enc = 20.0
+        self.sigmoid_bias_for_mem_enc = -10.0
+        self.non_overlap_masks_for_mem_enc = non_overlap_masks_for_mem_enc
+        self.memory_temporal_stride_for_eval = memory_temporal_stride_for_eval
+        # On frames with mask input, whether to directly output the input mask without
+        # using a SAM prompt encoder + mask decoder
+        self.multimask_output_in_sam = multimask_output_in_sam
+        self.multimask_min_pt_num = multimask_min_pt_num
+        self.multimask_max_pt_num = multimask_max_pt_num
+        self.multimask_output_for_tracking = multimask_output_for_tracking
+
+        # Part 4: SAM-style prompt encoder (for both mask and point inputs)
+        # and SAM-style mask decoder for the final mask output
+        self.image_size = image_size
+        self.backbone_stride = backbone_stride
+        self.low_res_mask_size = self.image_size // self.backbone_stride * 4
+        # we resize the mask if it doesn't match `self.input_mask_size` (which is always 4x
+        # the low-res mask size, regardless of the actual input image size); this is because
+        # `_use_mask_as_output` always downsamples the input masks by 4x
+        self.input_mask_size = self.low_res_mask_size * 4
+        self.forward_backbone_per_frame_for_eval = forward_backbone_per_frame_for_eval
+        self.offload_output_to_cpu_for_eval = offload_output_to_cpu_for_eval
+        self.trim_past_non_cond_mem_for_eval = trim_past_non_cond_mem_for_eval
+        self.sam_mask_decoder_extra_args = sam_mask_decoder_extra_args
+        self.no_obj_ptr = torch.nn.Parameter(torch.zeros(1, self.hidden_dim))
+        trunc_normal_(self.no_obj_ptr, std=0.02)
+        self.no_obj_embed_spatial = torch.nn.Parameter(torch.zeros(1, self.mem_dim))
+        trunc_normal_(self.no_obj_embed_spatial, std=0.02)
+
+        self._build_sam_heads()
+        self.max_cond_frames_in_attn = max_cond_frames_in_attn
+        self.keep_first_cond_frame = keep_first_cond_frame
+
+        # Use frame filtering according to SAM2Long
+        self.use_memory_selection = use_memory_selection
+        self.mf_threshold = mf_threshold
+
+        # Compile all components of the model
+        self.compile_all_components = compile_all_components
+        if self.compile_all_components:
+            self._compile_all_components()
+
+    @property
+    def device(self):
+        return next(self.parameters()).device
+
+    def _get_tpos_enc(self, rel_pos_list, device, max_abs_pos=None, dummy=False):
+        if dummy:
+            return torch.zeros(len(rel_pos_list), self.mem_dim, device=device)
+
+        t_diff_max = max_abs_pos - 1 if max_abs_pos is not None else 1
+        pos_enc = (
+            torch.tensor(rel_pos_list).pin_memory().to(device=device, non_blocking=True)
+            / t_diff_max
+        )
+        tpos_dim = self.hidden_dim
+        pos_enc = get_1d_sine_pe(pos_enc, dim=tpos_dim)
+        pos_enc = self.obj_ptr_tpos_proj(pos_enc)
+
+        return pos_enc
+
+    def _build_sam_heads(self):
+        """Build SAM-style prompt encoder and mask decoder."""
+        self.sam_prompt_embed_dim = self.hidden_dim
+        self.sam_image_embedding_size = self.image_size // self.backbone_stride
+
+        # build PromptEncoder and MaskDecoder from SAM
+        # (their hyperparameters like `mask_in_chans=16` are from SAM code)
+        self.sam_prompt_encoder = PromptEncoder(
+            embed_dim=self.sam_prompt_embed_dim,
+            image_embedding_size=(
+                self.sam_image_embedding_size,
+                self.sam_image_embedding_size,
+            ),
+            input_image_size=(self.image_size, self.image_size),
+            mask_in_chans=16,
+        )
+        self.sam_mask_decoder = MaskDecoder(
+            num_multimask_outputs=3,
+            transformer=TwoWayTransformer(
+                depth=2,
+                embedding_dim=self.sam_prompt_embed_dim,
+                mlp_dim=2048,
+                num_heads=8,
+            ),
+            transformer_dim=self.sam_prompt_embed_dim,
+            iou_head_depth=3,
+            iou_head_hidden_dim=256,
+            use_high_res_features=True,
+            iou_prediction_use_sigmoid=True,
+            pred_obj_scores=True,
+            pred_obj_scores_mlp=True,
+            use_multimask_token_for_obj_ptr=True,
+            **(self.sam_mask_decoder_extra_args or {}),
+        )
+        # a linear projection on SAM output tokens to turn them into object pointers
+        self.obj_ptr_proj = torch.nn.Linear(self.hidden_dim, self.hidden_dim)
+        self.obj_ptr_proj = MLP(self.hidden_dim, self.hidden_dim, self.hidden_dim, 3)
+        # a linear projection on temporal positional encoding in object pointers to
+        # avoid potential interference with spatial positional encoding
+        self.obj_ptr_tpos_proj = torch.nn.Linear(self.hidden_dim, self.mem_dim)
+
+    def _forward_sam_heads(
+        self,
+        backbone_features,
+        point_inputs=None,
+        mask_inputs=None,
+        high_res_features=None,
+        multimask_output=False,
+        gt_masks=None,
+    ):
+        """
+        Forward SAM prompt encoders and mask heads.
+
+        Inputs:
+        - backbone_features: image features of [B, C, H, W] shape
+        - point_inputs: a dictionary with "point_coords" and "point_labels", where
+          1) "point_coords" has [B, P, 2] shape and float32 dtype and contains the
+             absolute pixel-unit coordinate in (x, y) format of the P input points
+          2) "point_labels" has shape [B, P] and int32 dtype, where 1 means
+             positive clicks, 0 means negative clicks, and -1 means padding
+        - mask_inputs: a mask of [B, 1, H*16, W*16] shape, float or bool, with the
+          same spatial size as the image.
+        - high_res_features: either 1) None or 2) or a list of length 2 containing
+          two feature maps of [B, C, 4*H, 4*W] and [B, C, 2*H, 2*W] shapes respectively,
+          which will be used as high-resolution feature maps for SAM decoder.
+        - multimask_output: if it's True, we output 3 candidate masks and their 3
+          corresponding IoU estimates, and if it's False, we output only 1 mask and
+          its corresponding IoU estimate.
+
+        Outputs:
+        - low_res_multimasks: [B, M, H*4, W*4] shape (where M = 3 if
+          `multimask_output=True` and M = 1 if `multimask_output=False`), the SAM
+          output mask logits (before sigmoid) for the low-resolution masks, with 4x
+          the resolution (1/4 stride) of the input backbone_features.
+        - high_res_multimasks: [B, M, H*16, W*16] shape (where M = 3
+          if `multimask_output=True` and M = 1 if `multimask_output=False`),
+          upsampled from the low-resolution masks, with shape size as the image
+          (stride is 1 pixel).
+        - ious, [B, M] shape, where (where M = 3 if `multimask_output=True` and M = 1
+          if `multimask_output=False`), the estimated IoU of each output mask.
+        - low_res_masks: [B, 1, H*4, W*4] shape, the best mask in `low_res_multimasks`.
+          If `multimask_output=True`, it's the mask with the highest IoU estimate.
+          If `multimask_output=False`, it's the same as `low_res_multimasks`.
+        - high_res_masks: [B, 1, H*16, W*16] shape, the best mask in `high_res_multimasks`.
+          If `multimask_output=True`, it's the mask with the highest IoU estimate.
+          If `multimask_output=False`, it's the same as `high_res_multimasks`.
+        - obj_ptr: [B, C] shape, the object pointer vector for the output mask, extracted
+          based on the output token from the SAM mask decoder.
+        """
+        B = backbone_features.size(0)
+        device = backbone_features.device
+        assert backbone_features.size(1) == self.sam_prompt_embed_dim
+        assert backbone_features.size(2) == self.sam_image_embedding_size
+        assert backbone_features.size(3) == self.sam_image_embedding_size
+
+        # a) Handle point prompts
+        if point_inputs is not None:
+            sam_point_coords = point_inputs["point_coords"]
+            sam_point_labels = point_inputs["point_labels"]
+            assert sam_point_coords.size(0) == B and sam_point_labels.size(0) == B
+        else:
+            # If no points are provide, pad with an empty point (with label -1)
+            sam_point_coords = torch.zeros(B, 1, 2, device=device)
+            sam_point_labels = -torch.ones(B, 1, dtype=torch.int32, device=device)
+
+        # b) Handle mask prompts
+        if mask_inputs is not None:
+            # If mask_inputs is provided, downsize it into low-res mask input if needed
+            # and feed it as a dense mask prompt into the SAM mask encoder
+            assert len(mask_inputs.shape) == 4 and mask_inputs.shape[:2] == (B, 1)
+            if mask_inputs.shape[-2:] != self.sam_prompt_encoder.mask_input_size:
+                sam_mask_prompt = F.interpolate(
+                    mask_inputs.float(),
+                    size=self.sam_prompt_encoder.mask_input_size,
+                    align_corners=False,
+                    mode="bilinear",
+                    antialias=True,  # use antialias for downsampling
+                )
+            else:
+                sam_mask_prompt = mask_inputs
+        else:
+            # Otherwise, simply feed None (and SAM's prompt encoder will add
+            # a learned `no_mask_embed` to indicate no mask input in this case).
+            sam_mask_prompt = None
+
+        sparse_embeddings, dense_embeddings = self.sam_prompt_encoder(
+            points=(sam_point_coords, sam_point_labels),
+            boxes=None,
+            masks=sam_mask_prompt,
+        )
+        # Clone image_pe and the outputs of sam_prompt_encoder
+        # to enable compilation
+        sparse_embeddings = self._maybe_clone(sparse_embeddings)
+        dense_embeddings = self._maybe_clone(dense_embeddings)
+        image_pe = self._maybe_clone(self.sam_prompt_encoder.get_dense_pe())
+        with torch.profiler.record_function("sam_mask_decoder"):
+            (
+                low_res_multimasks,
+                ious,
+                sam_output_tokens,
+                object_score_logits,
+            ) = self.sam_mask_decoder(
+                image_embeddings=backbone_features,
+                image_pe=image_pe,
+                sparse_prompt_embeddings=sparse_embeddings,
+                dense_prompt_embeddings=dense_embeddings,
+                multimask_output=multimask_output,
+                repeat_image=False,  # the image is already batched
+                high_res_features=high_res_features,
+            )
+        # Clone the output of sam_mask_decoder
+        # to enable compilation
+        low_res_multimasks = self._maybe_clone(low_res_multimasks)
+        ious = self._maybe_clone(ious)
+        sam_output_tokens = self._maybe_clone(sam_output_tokens)
+        object_score_logits = self._maybe_clone(object_score_logits)
+
+        if self.training and self.teacher_force_obj_scores_for_mem:
+            # we use gt to detect if there is an object or not to
+            # select no obj ptr and use an empty mask for spatial memory
+            is_obj_appearing = torch.any(gt_masks.float().flatten(1) > 0, dim=1)
+            is_obj_appearing = is_obj_appearing[..., None]
+        else:
+            is_obj_appearing = object_score_logits > 0
+
+        # Mask used for spatial memories is always a *hard* choice between obj and no obj,
+        # consistent with the actual mask prediction
+        low_res_multimasks = torch.where(
+            is_obj_appearing[:, None, None],
+            low_res_multimasks,
+            NO_OBJ_SCORE,
+        )
+
+        # convert masks from possibly bfloat16 (or float16) to float32
+        # (older PyTorch versions before 2.1 don't support `interpolate` on bf16)
+        low_res_multimasks = low_res_multimasks.float()
+        high_res_multimasks = F.interpolate(
+            low_res_multimasks,
+            size=(self.image_size, self.image_size),
+            mode="bilinear",
+            align_corners=False,
+        )
+
+        sam_output_token = sam_output_tokens[:, 0]
+        if multimask_output:
+            # take the best mask prediction (with the highest IoU estimation)
+            best_iou_inds = torch.argmax(ious, dim=-1)
+            batch_inds = torch.arange(B, device=device)
+            low_res_masks = low_res_multimasks[batch_inds, best_iou_inds].unsqueeze(1)
+            high_res_masks = high_res_multimasks[batch_inds, best_iou_inds].unsqueeze(1)
+            if sam_output_tokens.size(1) > 1:
+                sam_output_token = sam_output_tokens[batch_inds, best_iou_inds]
+        else:
+            low_res_masks, high_res_masks = low_res_multimasks, high_res_multimasks
+
+        # Extract object pointer from the SAM output token (with occlusion handling)
+        obj_ptr = self.obj_ptr_proj(sam_output_token)
+        lambda_is_obj_appearing = is_obj_appearing.float()
+
+        obj_ptr = lambda_is_obj_appearing * obj_ptr
+        obj_ptr = obj_ptr + (1 - lambda_is_obj_appearing) * self.no_obj_ptr
+
+        return (
+            low_res_multimasks,
+            high_res_multimasks,
+            ious,
+            low_res_masks,
+            high_res_masks,
+            obj_ptr,
+            object_score_logits,
+        )
+
+    def _use_mask_as_output(self, backbone_features, high_res_features, mask_inputs):
+        """
+        Directly turn binary `mask_inputs` into a output mask logits without using SAM.
+        (same input and output shapes as in _forward_sam_heads above).
+        """
+        # Use -10/+10 as logits for neg/pos pixels (very close to 0/1 in prob after sigmoid).
+        out_scale, out_bias = 20.0, -10.0  # sigmoid(-10.0)=4.5398e-05
+        mask_inputs_float = mask_inputs.float()
+        high_res_masks = mask_inputs_float * out_scale + out_bias
+        low_res_masks = F.interpolate(
+            high_res_masks,
+            size=(
+                high_res_masks.size(-2) // self.backbone_stride * 4,
+                high_res_masks.size(-1) // self.backbone_stride * 4,
+            ),
+            align_corners=False,
+            mode="bilinear",
+            antialias=True,  # use antialias for downsampling
+        )
+        # a dummy IoU prediction of all 1's under mask input
+        ious = mask_inputs.new_ones(mask_inputs.size(0), 1).float()
+        # produce an object pointer using the SAM decoder from the mask input
+        _, _, _, _, _, obj_ptr, _ = self._forward_sam_heads(
+            backbone_features=backbone_features,
+            mask_inputs=self.mask_downsample(mask_inputs_float),
+            high_res_features=high_res_features,
+            gt_masks=mask_inputs,
+        )
+        # In this method, we are treating mask_input as output, e.g. using it directly to create spatial mem;
+        # Below, we follow the same design axiom to use mask_input to decide if obj appears or not instead of relying
+        # on the object_scores from the SAM decoder.
+        is_obj_appearing = torch.any(mask_inputs.flatten(1).float() > 0.0, dim=1)
+        is_obj_appearing = is_obj_appearing[..., None]
+        lambda_is_obj_appearing = is_obj_appearing.float()
+        object_score_logits = out_scale * lambda_is_obj_appearing + out_bias
+        obj_ptr = lambda_is_obj_appearing * obj_ptr
+        obj_ptr = obj_ptr + (1 - lambda_is_obj_appearing) * self.no_obj_ptr
+
+        return (
+            low_res_masks,
+            high_res_masks,
+            ious,
+            low_res_masks,
+            high_res_masks,
+            obj_ptr,
+            object_score_logits,
+        )
+
+    def forward(self, input: BatchedDatapoint, is_inference=False):
+        raise NotImplementedError(
+            "Please use the corresponding methods in SAM3VideoPredictor for inference."
+            "See examples/sam3_dense_video_tracking.ipynb for an inference example."
+        )
+
+    def forward_image(self, img_batch):
+        """Get the image feature on the input batch."""
+        # This line is the only change from the parent class
+        # to use the SAM3 backbone instead of the SAM2 backbone.
+        backbone_out = self.backbone.forward_image(img_batch)["sam2_backbone_out"]
+        # precompute projected level 0 and level 1 features in SAM decoder
+        # to avoid running it again on every SAM click
+        backbone_out["backbone_fpn"][0] = self.sam_mask_decoder.conv_s0(
+            backbone_out["backbone_fpn"][0]
+        )
+        backbone_out["backbone_fpn"][1] = self.sam_mask_decoder.conv_s1(
+            backbone_out["backbone_fpn"][1]
+        )
+        # Clone to help torch.compile
+        for i in range(len(backbone_out["backbone_fpn"])):
+            backbone_out["backbone_fpn"][i] = self._maybe_clone(
+                backbone_out["backbone_fpn"][i]
+            )
+            backbone_out["vision_pos_enc"][i] = self._maybe_clone(
+                backbone_out["vision_pos_enc"][i]
+            )
+        return backbone_out
+
+    def _prepare_backbone_features(self, backbone_out):
+        """Prepare and flatten visual features (same as in MDETR_API model)."""
+        backbone_out = backbone_out.copy()
+        assert len(backbone_out["backbone_fpn"]) == len(backbone_out["vision_pos_enc"])
+        assert len(backbone_out["backbone_fpn"]) >= self.num_feature_levels
+
+        feature_maps = backbone_out["backbone_fpn"][-self.num_feature_levels :]
+        vision_pos_embeds = backbone_out["vision_pos_enc"][-self.num_feature_levels :]
+
+        feat_sizes = [(x.shape[-2], x.shape[-1]) for x in vision_pos_embeds]
+        # flatten NxCxHxW to HWxNxC
+        vision_feats = [x.flatten(2).permute(2, 0, 1) for x in feature_maps]
+        vision_pos_embeds = [x.flatten(2).permute(2, 0, 1) for x in vision_pos_embeds]
+
+        return backbone_out, vision_feats, vision_pos_embeds, feat_sizes
+
+    def _prepare_backbone_features_per_frame(self, img_batch, img_ids):
+        """Compute the image backbone features on the fly for the given img_ids."""
+        # Only forward backbone on unique image ids to avoid repeatitive computation
+        # (if `img_ids` has only one element, it's already unique so we skip this step).
+        if img_ids.numel() > 1:
+            unique_img_ids, inv_ids = torch.unique(img_ids, return_inverse=True)
+        else:
+            unique_img_ids, inv_ids = img_ids, None
+
+        # Compute the image features on those unique image ids
+        image = img_batch[unique_img_ids]
+        backbone_out = self.forward_image(image)
+        (
+            _,
+            vision_feats,
+            vision_pos_embeds,
+            feat_sizes,
+        ) = self._prepare_backbone_features(backbone_out)
+        # Inverse-map image features for `unique_img_ids` to the final image features
+        # for the original input `img_ids`.
+        if inv_ids is not None:
+            image = image[inv_ids]
+            vision_feats = [x[:, inv_ids] for x in vision_feats]
+            vision_pos_embeds = [x[:, inv_ids] for x in vision_pos_embeds]
+
+        return image, vision_feats, vision_pos_embeds, feat_sizes
+
+    def cal_mem_score(self, object_score_logits, iou_score):
+        object_score_norm = torch.where(
+            object_score_logits > 0,
+            object_score_logits.sigmoid() * 2 - 1,  ## rescale to [0, 1]
+            torch.zeros_like(object_score_logits),
+        )
+        score_per_frame = (object_score_norm * iou_score).mean()
+        return score_per_frame
+
+    def frame_filter(self, output_dict, track_in_reverse, frame_idx, num_frames, r):
+        if (frame_idx == 0 and not track_in_reverse) or (
+            frame_idx == num_frames - 1 and track_in_reverse
+        ):
+            return []
+
+        max_num = min(
+            num_frames, self.max_obj_ptrs_in_encoder
+        )  ## maximum number of pointer memory frames to consider
+
+        if not track_in_reverse:
+            start = frame_idx - 1
+            end = 0
+            step = -r
+            must_include = frame_idx - 1
+        else:
+            start = frame_idx + 1
+            end = num_frames
+            step = r
+            must_include = frame_idx + 1
+
+        valid_indices = []
+        for i in range(start, end, step):
+            if (
+                i not in output_dict["non_cond_frame_outputs"]
+                or "eff_iou_score" not in output_dict["non_cond_frame_outputs"][i]
+            ):
+                continue
+
+            score_per_frame = output_dict["non_cond_frame_outputs"][i]["eff_iou_score"]
+
+            if score_per_frame > self.mf_threshold:  # threshold
+                valid_indices.insert(0, i)
+
+            if len(valid_indices) >= max_num - 1:
+                break
+
+        if must_include not in valid_indices:
+            valid_indices.append(must_include)
+
+        return valid_indices
+
+    def _prepare_memory_conditioned_features(
+        self,
+        frame_idx,
+        is_init_cond_frame,
+        current_vision_feats,
+        current_vision_pos_embeds,
+        feat_sizes,
+        output_dict,
+        num_frames,
+        track_in_reverse=False,  # tracking in reverse time order (for demo usage)
+        use_prev_mem_frame=True,
+    ):
+        """Fuse the current frame's visual feature map with previous memory."""
+        B = current_vision_feats[-1].size(1)  # batch size on this frame
+        C = self.hidden_dim
+        H, W = feat_sizes[-1]  # top-level (lowest-resolution) feature size
+        device = current_vision_feats[-1].device
+        # The case of `self.num_maskmem == 0` below is primarily used for reproducing SAM on images.
+        # In this case, we skip the fusion with any memory.
+        if self.num_maskmem == 0:  # Disable memory and skip fusion
+            pix_feat = current_vision_feats[-1].permute(1, 2, 0).view(B, C, H, W)
+            return pix_feat
+
+        num_obj_ptr_tokens = 0
+        tpos_sign_mul = -1 if track_in_reverse else 1
+        # Step 1: condition the visual features of the current frame on previous memories
+        if not is_init_cond_frame and use_prev_mem_frame:
+            # Retrieve the memories encoded with the maskmem backbone
+            to_cat_prompt, to_cat_prompt_mask, to_cat_prompt_pos_embed = [], [], []
+            # Add conditioning frames's output first (all cond frames have t_pos=0 for
+            # when getting temporal positional embedding below)
+            assert len(output_dict["cond_frame_outputs"]) > 0
+            # Select a maximum number of temporally closest cond frames for cross attention
+            cond_outputs = output_dict["cond_frame_outputs"]
+            selected_cond_outputs, unselected_cond_outputs = select_closest_cond_frames(
+                frame_idx,
+                cond_outputs,
+                self.max_cond_frames_in_attn,
+                keep_first_cond_frame=self.keep_first_cond_frame,
+            )
+            t_pos_and_prevs = [
+                ((frame_idx - t) * tpos_sign_mul, out, True)
+                for t, out in selected_cond_outputs.items()
+            ]
+            # Add last (self.num_maskmem - 1) frames before current frame for non-conditioning memory
+            # the earliest one has t_pos=1 and the latest one has t_pos=self.num_maskmem-1
+            # We also allow taking the memory frame non-consecutively (with r>1), in which case
+            # we take (self.num_maskmem - 2) frames among every r-th frames plus the last frame.
+            r = 1 if self.training else self.memory_temporal_stride_for_eval
+
+            if self.use_memory_selection:
+                valid_indices = self.frame_filter(
+                    output_dict, track_in_reverse, frame_idx, num_frames, r
+                )
+
+            for t_pos in range(1, self.num_maskmem):
+                t_rel = self.num_maskmem - t_pos  # how many frames before current frame
+                if self.use_memory_selection:
+                    if t_rel > len(valid_indices):
+                        continue
+                    prev_frame_idx = valid_indices[-t_rel]
+                else:
+                    if t_rel == 1:
+                        # for t_rel == 1, we take the last frame (regardless of r)
+                        if not track_in_reverse:
+                            # the frame immediately before this frame (i.e. frame_idx - 1)
+                            prev_frame_idx = frame_idx - t_rel
+                        else:
+                            # the frame immediately after this frame (i.e. frame_idx + 1)
+                            prev_frame_idx = frame_idx + t_rel
+                    else:
+                        # for t_rel >= 2, we take the memory frame from every r-th frames
+                        if not track_in_reverse:
+                            # first find the nearest frame among every r-th frames before this frame
+                            # for r=1, this would be (frame_idx - 2)
+                            prev_frame_idx = ((frame_idx - 2) // r) * r
+                            # then seek further among every r-th frames
+                            prev_frame_idx = prev_frame_idx - (t_rel - 2) * r
+                        else:
+                            # first find the nearest frame among every r-th frames after this frame
+                            # for r=1, this would be (frame_idx + 2)
+                            prev_frame_idx = -(-(frame_idx + 2) // r) * r
+                            # then seek further among every r-th frames
+                            prev_frame_idx = prev_frame_idx + (t_rel - 2) * r
+
+                out = output_dict["non_cond_frame_outputs"].get(prev_frame_idx, None)
+                if out is None:
+                    # If an unselected conditioning frame is among the last (self.num_maskmem - 1)
+                    # frames, we still attend to it as if it's a non-conditioning frame.
+                    out = unselected_cond_outputs.get(prev_frame_idx, None)
+                t_pos_and_prevs.append((t_pos, out, False))
+
+            for t_pos, prev, is_selected_cond_frame in t_pos_and_prevs:
+                if prev is None:
+                    continue  # skip padding frames
+                # "maskmem_features" might have been offloaded to CPU in demo use cases,
+                # so we load it back to GPU (it's a no-op if it's already on GPU).
+                feats = prev["maskmem_features"].cuda(non_blocking=True)
+                seq_len = feats.shape[-2] * feats.shape[-1]
+                to_cat_prompt.append(feats.flatten(2).permute(2, 0, 1))
+                to_cat_prompt_mask.append(
+                    torch.zeros(B, seq_len, device=device, dtype=bool)
+                )
+                # Spatial positional encoding (it might have been offloaded to CPU in eval)
+                maskmem_enc = prev["maskmem_pos_enc"][-1].cuda()
+                maskmem_enc = maskmem_enc.flatten(2).permute(2, 0, 1)
+
+                if (
+                    is_selected_cond_frame
+                    and getattr(self, "cond_frame_spatial_embedding", None) is not None
+                ):
+                    # add a spatial embedding for the conditioning frame
+                    maskmem_enc = maskmem_enc + self.cond_frame_spatial_embedding
+
+                # Temporal positional encoding
+                t = t_pos if not is_selected_cond_frame else 0
+                maskmem_enc = (
+                    maskmem_enc + self.maskmem_tpos_enc[self.num_maskmem - t - 1]
+                )
+                to_cat_prompt_pos_embed.append(maskmem_enc)
+
+            # Construct the list of past object pointers
+            # Optionally, select only a subset of spatial memory frames during trainining
+            if (
+                self.training
+                and self.prob_to_dropout_spatial_mem > 0
+                and self.rng.random() < self.prob_to_dropout_spatial_mem
+            ):
+                num_spatial_mem_keep = self.rng.integers(len(to_cat_prompt) + 1)
+                keep = self.rng.choice(
+                    range(len(to_cat_prompt)), num_spatial_mem_keep, replace=False
+                ).tolist()
+                to_cat_prompt = [to_cat_prompt[i] for i in keep]
+                to_cat_prompt_mask = [to_cat_prompt_mask[i] for i in keep]
+                to_cat_prompt_pos_embed = [to_cat_prompt_pos_embed[i] for i in keep]
+
+            max_obj_ptrs_in_encoder = min(num_frames, self.max_obj_ptrs_in_encoder)
+            # First add those object pointers from selected conditioning frames
+            # (optionally, only include object pointers in the past during evaluation)
+            if not self.training:
+                ptr_cond_outputs = {
+                    t: out
+                    for t, out in selected_cond_outputs.items()
+                    if (t >= frame_idx if track_in_reverse else t <= frame_idx)
+                }
+            else:
+                ptr_cond_outputs = selected_cond_outputs
+            pos_and_ptrs = [
+                # Temporal pos encoding contains how far away each pointer is from current frame
+                (
+                    (frame_idx - t) * tpos_sign_mul,
+                    out["obj_ptr"],
+                    True,  # is_selected_cond_frame
+                )
+                for t, out in ptr_cond_outputs.items()
+            ]
+
+            # Add up to (max_obj_ptrs_in_encoder - 1) non-conditioning frames before current frame
+            for t_diff in range(1, max_obj_ptrs_in_encoder):
+                if not self.use_memory_selection:
+                    t = frame_idx + t_diff if track_in_reverse else frame_idx - t_diff
+                    if t < 0 or (num_frames is not None and t >= num_frames):
+                        break
+                else:
+                    if -t_diff <= -len(valid_indices):
+                        break
+                    t = valid_indices[-t_diff]
+
+                out = output_dict["non_cond_frame_outputs"].get(
+                    t, unselected_cond_outputs.get(t, None)
+                )
+                if out is not None:
+                    pos_and_ptrs.append((t_diff, out["obj_ptr"], False))
+
+            # If we have at least one object pointer, add them to the across attention
+            if len(pos_and_ptrs) > 0:
+                pos_list, ptrs_list, is_selected_cond_frame_list = zip(*pos_and_ptrs)
+                # stack object pointers along dim=0 into [ptr_seq_len, B, C] shape
+                obj_ptrs = torch.stack(ptrs_list, dim=0)
+                if getattr(self, "cond_frame_obj_ptr_embedding", None) is not None:
+                    obj_ptrs = (
+                        obj_ptrs
+                        + self.cond_frame_obj_ptr_embedding
+                        * torch.tensor(is_selected_cond_frame_list, device=device)[
+                            ..., None, None
+                        ].float()
+                    )
+                # a temporal positional embedding based on how far each object pointer is from
+                # the current frame (sine embedding normalized by the max pointer num).
+                obj_pos = self._get_tpos_enc(
+                    pos_list,
+                    max_abs_pos=max_obj_ptrs_in_encoder,
+                    device=device,
+                )
+                # expand to batch size
+                obj_pos = obj_pos.unsqueeze(1).expand(-1, B, -1)
+
+                if self.mem_dim < C:
+                    # split a pointer into (C // self.mem_dim) tokens for self.mem_dim < C
+                    obj_ptrs = obj_ptrs.reshape(-1, B, C // self.mem_dim, self.mem_dim)
+                    obj_ptrs = obj_ptrs.permute(0, 2, 1, 3).flatten(0, 1)
+                    obj_pos = obj_pos.repeat_interleave(C // self.mem_dim, dim=0)
+                to_cat_prompt.append(obj_ptrs)
+                to_cat_prompt_mask.append(None)  # "to_cat_prompt_mask" is not used
+                to_cat_prompt_pos_embed.append(obj_pos)
+                num_obj_ptr_tokens = obj_ptrs.shape[0]
+            else:
+                num_obj_ptr_tokens = 0
+        else:
+            # directly add no-mem embedding (instead of using the transformer encoder)
+            pix_feat_with_mem = current_vision_feats[-1] + self.no_mem_embed
+            pix_feat_with_mem = pix_feat_with_mem.permute(1, 2, 0).view(B, C, H, W)
+            return pix_feat_with_mem
+
+            # Use a dummy token on the first grame (to avoid emtpy memory input to tranformer encoder)
+            to_cat_prompt = [self.no_mem_embed.expand(1, B, self.mem_dim)]
+            to_cat_prompt_mask = [torch.zeros(B, 1, device=device, dtype=bool)]
+            to_cat_prompt_pos_embed = [self.no_mem_pos_enc.expand(1, B, self.mem_dim)]
+
+        # Step 2: Concatenate the memories and forward through the transformer encoder
+        prompt = torch.cat(to_cat_prompt, dim=0)
+        prompt_mask = None  # For now, we always masks are zeros anyways
+        prompt_pos_embed = torch.cat(to_cat_prompt_pos_embed, dim=0)
+        encoder_out = self.transformer.encoder(
+            src=current_vision_feats,
+            src_key_padding_mask=[None],
+            src_pos=current_vision_pos_embeds,
+            prompt=prompt,
+            prompt_pos=prompt_pos_embed,
+            prompt_key_padding_mask=prompt_mask,
+            feat_sizes=feat_sizes,
+            num_obj_ptr_tokens=num_obj_ptr_tokens,
+        )
+        # reshape the output (HW)BC => BCHW
+        pix_feat_with_mem = encoder_out["memory"].permute(1, 2, 0).view(B, C, H, W)
+        return pix_feat_with_mem
+
+    def _encode_new_memory(
+        self,
+        image,
+        current_vision_feats,
+        feat_sizes,
+        pred_masks_high_res,
+        object_score_logits,
+        is_mask_from_pts,
+        output_dict=None,
+        is_init_cond_frame=False,
+    ):
+        """Encode the current image and its prediction into a memory feature."""
+        B = current_vision_feats[-1].size(1)  # batch size on this frame
+        C = self.hidden_dim
+        H, W = feat_sizes[-1]  # top-level (lowest-resolution) feature size
+        # top-level feature, (HW)BC => BCHW
+        pix_feat = current_vision_feats[-1].permute(1, 2, 0).view(B, C, H, W)
+        if self.non_overlap_masks_for_mem_enc and not self.training:
+            # optionally, apply non-overlapping constraints to the masks (it's applied
+            # in the batch dimension and should only be used during eval, where all
+            # the objects come from the same video under batch size 1).
+            pred_masks_high_res = self._apply_non_overlapping_constraints(
+                pred_masks_high_res
+            )
+        # scale the raw mask logits with a temperature before applying sigmoid
+        if is_mask_from_pts and not self.training:
+            mask_for_mem = (pred_masks_high_res > 0).float()
+        else:
+            # apply sigmoid on the raw mask logits to turn them into range (0, 1)
+            mask_for_mem = torch.sigmoid(pred_masks_high_res)
+        # apply scale and bias terms to the sigmoid probabilities
+        if self.sigmoid_scale_for_mem_enc != 1.0:
+            mask_for_mem = mask_for_mem * self.sigmoid_scale_for_mem_enc
+        if self.sigmoid_bias_for_mem_enc != 0.0:
+            mask_for_mem = mask_for_mem + self.sigmoid_bias_for_mem_enc
+
+        if isinstance(self.maskmem_backbone, SimpleMaskEncoder):
+            pix_feat = pix_feat.view_as(pix_feat)
+            maskmem_out = self.maskmem_backbone(
+                pix_feat, mask_for_mem, skip_mask_sigmoid=True
+            )
+        else:
+            maskmem_out = self.maskmem_backbone(image, pix_feat, mask_for_mem)
+        # Clone the feats and pos_enc to enable compilation
+        maskmem_features = self._maybe_clone(maskmem_out["vision_features"])
+        maskmem_pos_enc = [self._maybe_clone(m) for m in maskmem_out["vision_pos_enc"]]
+        # add a no-object embedding to the spatial memory to indicate that the frame
+        # is predicted to be occluded (i.e. no object is appearing in the frame)
+        is_obj_appearing = (object_score_logits > 0).float()
+        maskmem_features += (
+            1 - is_obj_appearing[..., None, None]
+        ) * self.no_obj_embed_spatial[..., None, None].expand(*maskmem_features.shape)
+
+        return maskmem_features, maskmem_pos_enc
+
+    def forward_tracking(self, backbone_out, input, return_dict=False):
+        """Forward video tracking on each frame (and sample correction clicks)."""
+        img_feats_already_computed = backbone_out["backbone_fpn"] is not None
+        if img_feats_already_computed:
+            # Prepare the backbone features
+            # - vision_feats and vision_pos_embeds are in (HW)BC format
+            (
+                _,
+                vision_feats,
+                vision_pos_embeds,
+                feat_sizes,
+            ) = self._prepare_backbone_features(backbone_out)
+
+        # Starting the stage loop
+        num_frames = backbone_out["num_frames"]
+        init_cond_frames = backbone_out["init_cond_frames"]
+        frames_to_add_correction_pt = backbone_out["frames_to_add_correction_pt"]
+        # first process all the initial conditioning frames to encode them as memory,
+        # and then conditioning on them to track the remaining frames
+        processing_order = init_cond_frames + backbone_out["frames_not_in_init_cond"]
+        output_dict = {
+            "cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+            "non_cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+        }
+        for stage_id in processing_order:
+            # Get the image features for the current frames
+            img_ids = input.find_inputs[stage_id].img_ids
+            if img_feats_already_computed:
+                # Retrieve image features according to img_ids (if they are already computed).
+                current_image = input.img_batch[img_ids]
+                current_vision_feats = [x[:, img_ids] for x in vision_feats]
+                current_vision_pos_embeds = [x[:, img_ids] for x in vision_pos_embeds]
+            else:
+                # Otherwise, compute the image features on the fly for the given img_ids
+                # (this might be used for evaluation on long videos to avoid backbone OOM).
+                (
+                    current_image,
+                    current_vision_feats,
+                    current_vision_pos_embeds,
+                    feat_sizes,
+                ) = self._prepare_backbone_features_per_frame(input.img_batch, img_ids)
+            # Get output masks based on this frame's prompts and previous memory
+            current_out = self.track_step(
+                frame_idx=stage_id,
+                is_init_cond_frame=stage_id in init_cond_frames,
+                current_vision_feats=current_vision_feats,
+                current_vision_pos_embeds=current_vision_pos_embeds,
+                feat_sizes=feat_sizes,
+                image=current_image,
+                point_inputs=backbone_out["point_inputs_per_frame"].get(stage_id, None),
+                mask_inputs=backbone_out["mask_inputs_per_frame"].get(stage_id, None),
+                gt_masks=backbone_out["gt_masks_per_frame"].get(stage_id, None),
+                frames_to_add_correction_pt=frames_to_add_correction_pt,
+                output_dict=output_dict,
+                num_frames=num_frames,
+            )
+            # Append the output, depending on whether it's a conditioning frame
+            add_output_as_cond_frame = stage_id in init_cond_frames or (
+                self.add_all_frames_to_correct_as_cond
+                and stage_id in frames_to_add_correction_pt
+            )
+            if add_output_as_cond_frame:
+                output_dict["cond_frame_outputs"][stage_id] = current_out
+            else:
+                output_dict["non_cond_frame_outputs"][stage_id] = current_out
+
+        if return_dict:
+            return output_dict
+        # turn `output_dict` into a list for loss function
+        all_frame_outputs = {}
+        all_frame_outputs.update(output_dict["cond_frame_outputs"])
+        all_frame_outputs.update(output_dict["non_cond_frame_outputs"])
+        all_frame_outputs = [all_frame_outputs[t] for t in range(num_frames)]
+        # Make DDP happy with activation checkpointing by removing unused keys
+        all_frame_outputs = [
+            {k: v for k, v in d.items() if k != "obj_ptr"} for d in all_frame_outputs
+        ]
+
+        return all_frame_outputs
+
+    def track_step(
+        self,
+        frame_idx,
+        is_init_cond_frame,
+        current_vision_feats,
+        current_vision_pos_embeds,
+        feat_sizes,
+        image,
+        point_inputs,
+        mask_inputs,
+        output_dict,
+        num_frames,
+        track_in_reverse=False,  # tracking in reverse time order (for demo usage)
+        # Whether to run the memory encoder on the predicted masks. Sometimes we might want
+        # to skip the memory encoder with `run_mem_encoder=False`. For example,
+        # in demo we might call `track_step` multiple times for each user click,
+        # and only encode the memory when the user finalizes their clicks. And in ablation
+        # settings like SAM training on static images, we don't need the memory encoder.
+        run_mem_encoder=True,
+        # The previously predicted SAM mask logits (which can be fed together with new clicks in demo).
+        prev_sam_mask_logits=None,
+        use_prev_mem_frame=True,
+    ):
+        current_out = {"point_inputs": point_inputs, "mask_inputs": mask_inputs}
+        # High-resolution feature maps for the SAM head, reshape (HW)BC => BCHW
+        if len(current_vision_feats) > 1:
+            high_res_features = [
+                x.permute(1, 2, 0).view(x.size(1), x.size(2), *s)
+                for x, s in zip(current_vision_feats[:-1], feat_sizes[:-1])
+            ]
+        else:
+            high_res_features = None
+        if mask_inputs is not None:
+            # (see it as a GT mask) without using a SAM prompt encoder + mask decoder.
+            pix_feat = current_vision_feats[-1].permute(1, 2, 0)
+            pix_feat = pix_feat.view(-1, self.hidden_dim, *feat_sizes[-1])
+            sam_outputs = self._use_mask_as_output(
+                pix_feat, high_res_features, mask_inputs
+            )
+        else:
+            # fused the visual feature with previous memory features in the memory bank
+            pix_feat_with_mem = self._prepare_memory_conditioned_features(
+                frame_idx=frame_idx,
+                is_init_cond_frame=is_init_cond_frame,
+                current_vision_feats=current_vision_feats[-1:],
+                current_vision_pos_embeds=current_vision_pos_embeds[-1:],
+                feat_sizes=feat_sizes[-1:],
+                output_dict=output_dict,
+                num_frames=num_frames,
+                track_in_reverse=track_in_reverse,
+                use_prev_mem_frame=use_prev_mem_frame,
+            )
+            # apply SAM-style segmentation head
+            # here we might feed previously predicted low-res SAM mask logits into the SAM mask decoder,
+            # e.g. in demo where such logits come from earlier interaction instead of correction sampling
+            # (in this case, the SAM mask decoder should have `self.iter_use_prev_mask_pred=True`, and
+            # any `mask_inputs` shouldn't reach here as they are sent to _use_mask_as_output instead)
+            if prev_sam_mask_logits is not None:
+                assert self.iter_use_prev_mask_pred
+                assert point_inputs is not None and mask_inputs is None
+                mask_inputs = prev_sam_mask_logits
+            multimask_output = self._use_multimask(is_init_cond_frame, point_inputs)
+            sam_outputs = self._forward_sam_heads(
+                backbone_features=pix_feat_with_mem,
+                point_inputs=point_inputs,
+                mask_inputs=mask_inputs,
+                high_res_features=high_res_features,
+                multimask_output=multimask_output,
+            )
+        (
+            _,
+            high_res_multimasks,
+            ious,
+            low_res_masks,
+            high_res_masks,
+            obj_ptr,
+            object_score_logits,
+        ) = sam_outputs
+        # Use the final prediction (after all correction steps for output and eval)
+        current_out["pred_masks"] = low_res_masks
+        current_out["pred_masks_high_res"] = high_res_masks
+        current_out["obj_ptr"] = obj_ptr
+        if self.use_memory_selection:
+            current_out["object_score_logits"] = object_score_logits
+            iou_score = ious.max(-1)[0]
+            current_out["iou_score"] = iou_score
+            current_out["eff_iou_score"] = self.cal_mem_score(
+                object_score_logits, iou_score
+            )
+        if not self.training:
+            # Only add this in inference (to avoid unused param in activation checkpointing;
+            # it's mainly used in the demo to encode spatial memories w/ consolidated masks)
+            current_out["object_score_logits"] = object_score_logits
+
+        # Finally run the memory encoder on the predicted mask to encode
+        # it into a new memory feature (that can be used in future frames)
+        # (note that `self.num_maskmem == 0` is primarily used for reproducing SAM on
+        # images, in which case we'll just skip memory encoder to save compute).
+        if run_mem_encoder and self.num_maskmem > 0:
+            high_res_masks_for_mem_enc = high_res_masks
+            maskmem_features, maskmem_pos_enc = self._encode_new_memory(
+                image=image,
+                current_vision_feats=current_vision_feats,
+                feat_sizes=feat_sizes,
+                pred_masks_high_res=high_res_masks_for_mem_enc,
+                object_score_logits=object_score_logits,
+                is_mask_from_pts=(point_inputs is not None),
+                output_dict=output_dict,
+                is_init_cond_frame=is_init_cond_frame,
+            )
+            current_out["maskmem_features"] = maskmem_features
+            current_out["maskmem_pos_enc"] = maskmem_pos_enc
+        else:
+            current_out["maskmem_features"] = None
+            current_out["maskmem_pos_enc"] = None
+
+        # Optionally, offload the outputs to CPU memory during evaluation to avoid
+        # GPU OOM on very long videos or very large resolution or too many objects
+        if self.offload_output_to_cpu_for_eval and not self.training:
+            # Here we only keep those keys needed for evaluation to get a compact output
+            trimmed_out = {
+                "pred_masks": current_out["pred_masks"].cpu(),
+                "pred_masks_high_res": current_out["pred_masks_high_res"].cpu(),
+                # other items for evaluation (these are small tensors so we keep them on GPU)
+                "obj_ptr": current_out["obj_ptr"],
+                "object_score_logits": current_out["object_score_logits"],
+            }
+            if run_mem_encoder and self.num_maskmem > 0:
+                trimmed_out["maskmem_features"] = maskmem_features.cpu()
+                trimmed_out["maskmem_pos_enc"] = [x.cpu() for x in maskmem_pos_enc]
+            if self.use_memory_selection:
+                trimmed_out["iou_score"] = current_out["iou_score"].cpu()
+                trimmed_out["eff_iou_score"] = current_out["eff_iou_score"].cpu()
+            current_out = trimmed_out
+
+        # Optionally, trim the output of past non-conditioning frame (r * num_maskmem frames
+        # before the current frame) during evaluation. This is intended to save GPU or CPU
+        # memory for semi-supervised VOS eval, where only the first frame receives prompts.
+        def _trim_past_out(past_out, current_out):
+            if past_out is None:
+                return None
+            return {
+                "pred_masks": past_out["pred_masks"],
+                "obj_ptr": past_out["obj_ptr"],
+                "object_score_logits": past_out["object_score_logits"],
+            }
+
+        if self.trim_past_non_cond_mem_for_eval and not self.training:
+            r = self.memory_temporal_stride_for_eval
+            past_frame_idx = frame_idx - r * self.num_maskmem
+            past_out = output_dict["non_cond_frame_outputs"].get(past_frame_idx, None)
+
+            if past_out is not None:
+                print(past_out.get("eff_iou_score", 0))
+                if (
+                    self.use_memory_selection
+                    and past_out.get("eff_iou_score", 0) < self.mf_threshold
+                ) or not self.use_memory_selection:
+                    output_dict["non_cond_frame_outputs"][past_frame_idx] = (
+                        _trim_past_out(past_out, current_out)
+                    )
+
+            if (
+                self.use_memory_selection and not self.offload_output_to_cpu_for_eval
+            ):  ## design for memory selection, trim too old frames to save memory
+                far_old_frame_idx = frame_idx - 20 * self.max_obj_ptrs_in_encoder
+                past_out = output_dict["non_cond_frame_outputs"].get(
+                    far_old_frame_idx, None
+                )
+                if past_out is not None:
+                    output_dict["non_cond_frame_outputs"][far_old_frame_idx] = (
+                        _trim_past_out(past_out, current_out)
+                    )
+
+        return current_out
+
+    def _use_multimask(self, is_init_cond_frame, point_inputs):
+        """Whether to use multimask output in the SAM head."""
+        num_pts = 0 if point_inputs is None else point_inputs["point_labels"].size(1)
+        multimask_output = (
+            self.multimask_output_in_sam
+            and (is_init_cond_frame or self.multimask_output_for_tracking)
+            and (self.multimask_min_pt_num <= num_pts <= self.multimask_max_pt_num)
+        )
+        return multimask_output
+
+    def _apply_non_overlapping_constraints(self, pred_masks):
+        """
+        Apply non-overlapping constraints to the object scores in pred_masks. Here we
+        keep only the highest scoring object at each spatial location in pred_masks.
+        """
+        batch_size = pred_masks.size(0)
+        if batch_size == 1:
+            return pred_masks
+
+        device = pred_masks.device
+        # "max_obj_inds": object index of the object with the highest score at each location
+        max_obj_inds = torch.argmax(pred_masks, dim=0, keepdim=True)
+        # "batch_obj_inds": object index of each object slice (along dim 0) in `pred_masks`
+        batch_obj_inds = torch.arange(batch_size, device=device)[:, None, None, None]
+        keep = max_obj_inds == batch_obj_inds
+        # suppress overlapping regions' scores below -10.0 so that the foreground regions
+        # don't overlap (here sigmoid(-10.0)=4.5398e-05)
+        pred_masks = torch.where(keep, pred_masks, torch.clamp(pred_masks, max=-10.0))
+        return pred_masks
+
+    def _compile_all_components(self):
+        """Compile all model components for faster inference."""
+        # a larger cache size to hold varying number of shapes for torch.compile
+        # see https://github.com/pytorch/pytorch/blob/v2.5.1/torch/_dynamo/config.py#L42-L49
+        torch._dynamo.config.cache_size_limit = 64
+        torch._dynamo.config.accumulated_cache_size_limit = 2048
+        from sam3.perflib.compile import compile_wrapper
+
+        logging.info("Compiling all components. First time may be very slow.")
+
+        self.maskmem_backbone.forward = compile_wrapper(
+            self.maskmem_backbone.forward,
+            mode="max-autotune",
+            fullgraph=True,
+            dynamic=False,
+        )
+        self.transformer.encoder.forward = compile_wrapper(
+            self.transformer.encoder.forward,
+            mode="max-autotune",
+            fullgraph=True,
+            dynamic=True,  # Num. of memories varies
+        )
+        # We disable compilation of sam_prompt_encoder as it sometimes gives a large accuracy regression,
+        # especially when sam_mask_prompt (previous mask logits) is not None
+        # self.sam_prompt_encoder.forward = torch.compile(
+        #     self.sam_prompt_encoder.forward,
+        #     mode="max-autotune",
+        #     fullgraph=True,
+        #     dynamic=False,  # Accuracy regression on True
+        # )
+        self.sam_mask_decoder.forward = compile_wrapper(
+            self.sam_mask_decoder.forward,
+            mode="max-autotune",
+            fullgraph=True,
+            dynamic=False,  # Accuracy regression on True
+        )
+
+    def _maybe_clone(self, x):
+        """Clone a tensor if and only if `self.compile_all_components` is True."""
+        return x.clone() if self.compile_all_components else x
+
+
+def concat_points(old_point_inputs, new_points, new_labels):
+    """Add new points and labels to previous point inputs (add at the end)."""
+    if old_point_inputs is None:
+        points, labels = new_points, new_labels
+    else:
+        points = torch.cat([old_point_inputs["point_coords"], new_points], dim=1)
+        labels = torch.cat([old_point_inputs["point_labels"], new_labels], dim=1)
+
+    return {"point_coords": points, "point_labels": labels}
diff --git a/sam3/model/sam3_tracker_utils.py b/sam3/model/sam3_tracker_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7afc70aab716b4ad22370350ac836fbc46c9012f
--- /dev/null
+++ b/sam3/model/sam3_tracker_utils.py
@@ -0,0 +1,427 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import numpy as np
+import torch
+import torch.nn.functional as F
+from numpy.typing import NDArray
+
+from sam3.model.edt import edt_triton
+
+
+def sample_box_points(
+    masks: torch.Tensor,
+    noise: float = 0.1,  # SAM default
+    noise_bound: int = 20,  # SAM default
+    top_left_label: int = 2,
+    bottom_right_label: int = 3,
+) -> tuple[NDArray, NDArray]:
+    """
+    Sample a noised version of the top left and bottom right corners of a given `bbox`
+
+    Inputs:
+    - masks: [B, 1, H, W] tensor
+    - noise: noise as a fraction of box width and height, dtype=float
+    - noise_bound: maximum amount of noise (in pure pixels), dtype=int
+
+    Returns:
+    - box_coords: [B, num_pt, 2], contains (x, y) coordinates of top left and bottom right box corners, dtype=torch.float
+    - box_labels: [B, num_pt], label 2 is reserverd for top left and 3 for bottom right corners, dtype=torch.int32
+    """
+    device = masks.device
+    box_coords = mask_to_box(masks)
+    B, _, H, W = masks.shape
+    box_labels = torch.tensor(
+        [top_left_label, bottom_right_label], dtype=torch.int, device=device
+    ).repeat(B)
+    if noise > 0.0:
+        if not isinstance(noise_bound, torch.Tensor):
+            noise_bound = torch.tensor(noise_bound, device=device)
+        bbox_w = box_coords[..., 2] - box_coords[..., 0]
+        bbox_h = box_coords[..., 3] - box_coords[..., 1]
+        max_dx = torch.min(bbox_w * noise, noise_bound)
+        max_dy = torch.min(bbox_h * noise, noise_bound)
+        box_noise = 2 * torch.rand(B, 1, 4, device=device) - 1
+        box_noise = box_noise * torch.stack((max_dx, max_dy, max_dx, max_dy), dim=-1)
+
+        box_coords = box_coords + box_noise
+        img_bounds = (
+            torch.tensor([W, H, W, H], device=device) - 1
+        )  # uncentered pixel coords
+        box_coords.clamp_(torch.zeros_like(img_bounds), img_bounds)  # In place clamping
+
+    box_coords = box_coords.reshape(-1, 2, 2)  # always 2 points
+    box_labels = box_labels.reshape(-1, 2)
+    return box_coords, box_labels
+
+
+def mask_to_box(masks: torch.Tensor):
+    """
+    compute bounding box given an input mask
+
+    Inputs:
+    - masks: [B, 1, H, W] tensor
+
+    Returns:
+    - box_coords: [B, 1, 4], contains (x, y) coordinates of top left and bottom right box corners, dtype=torch.Tensor
+    """
+    B, _, h, w = masks.shape
+    device = masks.device
+    mask_area = masks.sum(dim=(-1, -2))
+    xs = torch.arange(w, device=device, dtype=torch.int32)
+    ys = torch.arange(h, device=device, dtype=torch.int32)
+    grid_xs, grid_ys = torch.meshgrid(xs, ys, indexing="xy")
+    grid_xs = grid_xs[None, None, ...].expand(B, 1, h, w)
+    grid_ys = grid_ys[None, None, ...].expand(B, 1, h, w)
+    min_xs, _ = torch.min(torch.where(masks, grid_xs, w).flatten(-2), dim=-1)
+    max_xs, _ = torch.max(torch.where(masks, grid_xs, -1).flatten(-2), dim=-1)
+    min_ys, _ = torch.min(torch.where(masks, grid_ys, h).flatten(-2), dim=-1)
+    max_ys, _ = torch.max(torch.where(masks, grid_ys, -1).flatten(-2), dim=-1)
+    bbox_coords = torch.stack((min_xs, min_ys, max_xs, max_ys), dim=-1)
+    bbox_coords = torch.where(
+        mask_area[..., None] > 0, bbox_coords, torch.zeros_like(bbox_coords)
+    )
+    return bbox_coords
+
+
+def sample_random_points_from_errors(gt_masks, pred_masks, num_pt=1):
+    """
+    Sample `num_pt` random points (along with their labels) independently from the error regions.
+
+    Inputs:
+    - gt_masks: [B, 1, H_im, W_im] masks, dtype=torch.bool
+    - pred_masks: [B, 1, H_im, W_im] masks, dtype=torch.bool or None
+    - num_pt: int, number of points to sample independently for each of the B error maps
+
+    Outputs:
+    - points: [B, num_pt, 2], dtype=torch.float, contains (x, y) coordinates of each sampled point
+    - labels: [B, num_pt], dtype=torch.int32, where 1 means positive clicks and 0 means
+      negative clicks
+    """
+    if pred_masks is None:  # if pred_masks is not provided, treat it as empty
+        pred_masks = torch.zeros_like(gt_masks)
+    assert gt_masks.dtype == torch.bool and gt_masks.size(1) == 1
+    assert pred_masks.dtype == torch.bool and pred_masks.shape == gt_masks.shape
+    assert num_pt >= 0
+
+    B, _, H_im, W_im = gt_masks.shape
+    device = gt_masks.device
+
+    # false positive region, a new point sampled in this region should have
+    # negative label to correct the FP error
+    fp_masks = ~gt_masks & pred_masks
+    # false negative region, a new point sampled in this region should have
+    # positive label to correct the FN error
+    fn_masks = gt_masks & ~pred_masks
+    # whether the prediction completely match the ground-truth on each mask
+    all_correct = torch.all((gt_masks == pred_masks).flatten(2), dim=2)
+    all_correct = all_correct[..., None, None]
+
+    # channel 0 is FP map, while channel 1 is FN map
+    pts_noise = torch.rand(B, num_pt, H_im, W_im, 2, device=device)
+    # sample a negative new click from FP region or a positive new click
+    # from FN region, depend on where the maximum falls,
+    # and in case the predictions are all correct (no FP or FN), we just
+    # sample a negative click from the background region
+    pts_noise[..., 0] *= fp_masks | (all_correct & ~gt_masks)
+    pts_noise[..., 1] *= fn_masks
+    pts_idx = pts_noise.flatten(2).argmax(dim=2)
+    labels = (pts_idx % 2).to(torch.int32)
+    pts_idx = pts_idx // 2
+    pts_x = pts_idx % W_im
+    pts_y = pts_idx // W_im
+    points = torch.stack([pts_x, pts_y], dim=2).to(torch.float)
+    return points, labels
+
+
+def sample_one_point_from_error_center(gt_masks, pred_masks, padding=True):
+    """
+    Sample 1 random point (along with its label) from the center of each error region,
+    that is, the point with the largest distance to the boundary of each error region.
+    This is the RITM sampling method from https://github.com/saic-vul/ritm_interactive_segmentation/blob/master/isegm/inference/clicker.py
+
+    Inputs:
+    - gt_masks: [B, 1, H_im, W_im] masks, dtype=torch.bool
+    - pred_masks: [B, 1, H_im, W_im] masks, dtype=torch.bool or None
+    - padding: if True, pad with boundary of 1 px for distance transform
+
+    Outputs:
+    - points: [B, 1, 2], dtype=torch.float, contains (x, y) coordinates of each sampled point
+    - labels: [B, 1], dtype=torch.int32, where 1 means positive clicks and 0 means negative clicks
+    """
+    if pred_masks is None:
+        pred_masks = torch.zeros_like(gt_masks)
+    assert gt_masks.dtype == torch.bool and gt_masks.size(1) == 1
+    assert pred_masks.dtype == torch.bool and pred_masks.shape == gt_masks.shape
+
+    B, _, H, W = gt_masks.shape
+
+    # false positive region, a new point sampled in this region should have
+    # negative label to correct the FP error
+    fp_masks = (~gt_masks & pred_masks).squeeze(1)
+    # false negative region, a new point sampled in this region should have
+    # positive label to correct the FN error
+    fn_masks = (gt_masks & ~pred_masks).squeeze(1)
+
+    if padding:
+        padded_fp_masks = torch.zeros(
+            B, H + 2, W + 2, dtype=fp_masks.dtype, device=fp_masks.device
+        )
+        padded_fp_masks[:, 1 : H + 1, 1 : W + 1] = fp_masks
+        padded_fn_masks = torch.zeros(
+            B, H + 2, W + 2, dtype=fp_masks.dtype, device=fp_masks.device
+        )
+        padded_fn_masks[:, 1 : H + 1, 1 : W + 1] = fn_masks
+    else:
+        padded_fp_masks = fp_masks
+        padded_fn_masks = fn_masks
+
+    fn_mask_dt = edt_triton(padded_fn_masks)
+    fp_mask_dt = edt_triton(padded_fp_masks)
+    if padding:
+        fn_mask_dt = fn_mask_dt[:, 1:-1, 1:-1]
+        fp_mask_dt = fp_mask_dt[:, 1:-1, 1:-1]
+
+    fn_max, fn_argmax = fn_mask_dt.reshape(B, -1).max(dim=-1)
+    fp_max, fp_argmax = fp_mask_dt.reshape(B, -1).max(dim=-1)
+    is_positive = fn_max > fp_max
+    chosen = torch.where(is_positive, fn_argmax, fp_argmax)
+    points_x = chosen % W
+    points_y = chosen // W
+
+    labels = is_positive.long()
+    points = torch.stack([points_x, points_y], -1)
+    return points.unsqueeze(1), labels.unsqueeze(1)
+
+
+def sample_one_point_from_error_center_slow(gt_masks, pred_masks, padding=True):
+    """
+    Sample 1 random point (along with its label) from the center of each error region,
+    that is, the point with the largest distance to the boundary of each error region.
+    This is the RITM sampling method from https://github.com/saic-vul/ritm_interactive_segmentation/blob/master/isegm/inference/clicker.py
+
+    Inputs:
+    - gt_masks: [B, 1, H_im, W_im] masks, dtype=torch.bool
+    - pred_masks: [B, 1, H_im, W_im] masks, dtype=torch.bool or None
+    - padding: if True, pad with boundary of 1 px for distance transform
+
+    Outputs:
+    - points: [B, 1, 2], dtype=torch.float, contains (x, y) coordinates of each sampled point
+    - labels: [B, 1], dtype=torch.int32, where 1 means positive clicks and 0 means negative clicks
+    """
+    import cv2  # delay OpenCV import to avoid unnecessary dependency
+
+    if pred_masks is None:
+        pred_masks = torch.zeros_like(gt_masks)
+    assert gt_masks.dtype == torch.bool and gt_masks.size(1) == 1
+    assert pred_masks.dtype == torch.bool and pred_masks.shape == gt_masks.shape
+
+    B, _, _, W_im = gt_masks.shape
+    device = gt_masks.device
+
+    # false positive region, a new point sampled in this region should have
+    # negative label to correct the FP error
+    fp_masks = ~gt_masks & pred_masks
+    # false negative region, a new point sampled in this region should have
+    # positive label to correct the FN error
+    fn_masks = gt_masks & ~pred_masks
+
+    fp_masks = fp_masks.cpu().numpy()
+    fn_masks = fn_masks.cpu().numpy()
+    points = torch.zeros(B, 1, 2, dtype=torch.float)
+    labels = torch.ones(B, 1, dtype=torch.int32)
+    for b in range(B):
+        fn_mask = fn_masks[b, 0]
+        fp_mask = fp_masks[b, 0]
+        if padding:
+            fn_mask = np.pad(fn_mask, ((1, 1), (1, 1)), "constant")
+            fp_mask = np.pad(fp_mask, ((1, 1), (1, 1)), "constant")
+        # compute the distance of each point in FN/FP region to its boundary
+        fn_mask_dt = cv2.distanceTransform(fn_mask.astype(np.uint8), cv2.DIST_L2, 0)
+        fp_mask_dt = cv2.distanceTransform(fp_mask.astype(np.uint8), cv2.DIST_L2, 0)
+        if padding:
+            fn_mask_dt = fn_mask_dt[1:-1, 1:-1]
+            fp_mask_dt = fp_mask_dt[1:-1, 1:-1]
+
+        # take the point in FN/FP region with the largest distance to its boundary
+        fn_mask_dt_flat = fn_mask_dt.reshape(-1)
+        fp_mask_dt_flat = fp_mask_dt.reshape(-1)
+        fn_argmax = np.argmax(fn_mask_dt_flat)
+        fp_argmax = np.argmax(fp_mask_dt_flat)
+        is_positive = fn_mask_dt_flat[fn_argmax] > fp_mask_dt_flat[fp_argmax]
+        pt_idx = fn_argmax if is_positive else fp_argmax
+        points[b, 0, 0] = pt_idx % W_im  # x
+        points[b, 0, 1] = pt_idx // W_im  # y
+        labels[b, 0] = int(is_positive)
+
+    points = points.to(device)
+    labels = labels.to(device)
+    return points, labels
+
+
+def get_next_point(gt_masks, pred_masks, method):
+    if method == "uniform":
+        return sample_random_points_from_errors(gt_masks, pred_masks)
+    elif method == "center":
+        return sample_one_point_from_error_center(gt_masks, pred_masks)
+    else:
+        raise ValueError(f"unknown sampling method {method}")
+
+
+def select_closest_cond_frames(
+    frame_idx, cond_frame_outputs, max_cond_frame_num, keep_first_cond_frame=False
+):
+    """
+    Select up to `max_cond_frame_num` conditioning frames from `cond_frame_outputs`
+    that are temporally closest to the current frame at `frame_idx`. Here, we take
+    - a) the closest conditioning frame before `frame_idx` (if any);
+    - b) the closest conditioning frame after `frame_idx` (if any);
+    - c) any other temporally closest conditioning frames until reaching a total
+         of `max_cond_frame_num` conditioning frames.
+
+    Outputs:
+    - selected_outputs: selected items (keys & values) from `cond_frame_outputs`.
+    - unselected_outputs: items (keys & values) not selected in `cond_frame_outputs`.
+    """
+    if max_cond_frame_num == -1 or len(cond_frame_outputs) <= max_cond_frame_num:
+        selected_outputs = cond_frame_outputs
+        unselected_outputs = {}
+    else:
+        assert max_cond_frame_num >= 2, "we should allow using 2+ conditioning frames"
+        selected_outputs = {}
+        if keep_first_cond_frame:
+            idx_first = min(
+                (t for t in cond_frame_outputs if t < frame_idx), default=None
+            )
+            if idx_first is None:
+                # Maybe we are tracking in reverse
+                idx_first = max(
+                    (t for t in cond_frame_outputs if t > frame_idx), default=None
+                )
+            if idx_first is not None:
+                selected_outputs[idx_first] = cond_frame_outputs[idx_first]
+        # the closest conditioning frame before `frame_idx` (if any)
+        idx_before = max((t for t in cond_frame_outputs if t < frame_idx), default=None)
+        if idx_before is not None:
+            selected_outputs[idx_before] = cond_frame_outputs[idx_before]
+
+        # the closest conditioning frame after `frame_idx` (if any)
+        idx_after = min((t for t in cond_frame_outputs if t >= frame_idx), default=None)
+        if idx_after is not None:
+            selected_outputs[idx_after] = cond_frame_outputs[idx_after]
+
+        # add other temporally closest conditioning frames until reaching a total
+        # of `max_cond_frame_num` conditioning frames.
+        num_remain = max_cond_frame_num - len(selected_outputs)
+        inds_remain = sorted(
+            (t for t in cond_frame_outputs if t not in selected_outputs),
+            key=lambda x: abs(x - frame_idx),
+        )[:num_remain]
+        selected_outputs.update((t, cond_frame_outputs[t]) for t in inds_remain)
+        unselected_outputs = {
+            t: v for t, v in cond_frame_outputs.items() if t not in selected_outputs
+        }
+
+    return selected_outputs, unselected_outputs
+
+
+def get_1d_sine_pe(pos_inds, dim, temperature=10000):
+    """
+    Get 1D sine positional embedding as in the original Transformer paper.
+    """
+    pe_dim = dim // 2
+    dim_t = torch.arange(pe_dim, dtype=torch.float32, device=pos_inds.device)
+    dim_t = temperature ** (2 * (dim_t // 2) / pe_dim)
+
+    pos_embed = pos_inds.unsqueeze(-1) / dim_t
+    pos_embed = torch.cat([pos_embed.sin(), pos_embed.cos()], dim=-1)
+    return pos_embed
+
+
+def get_best_gt_match_from_multimasks(pred_multimasks, gt_masks, pred_scores=None):
+    """
+    Get the mask with the best match to GT masks (based on IoU) from pred_multimasks.
+    Optionally, use `pred_scores` to break ties in case all IoUs are zeros.
+    """
+    assert pred_multimasks.ndim == 4 and gt_masks.ndim == 4
+    if pred_multimasks.size(1) == 1:
+        return pred_multimasks  # only a single mask channel, nothing to select
+
+    pred_multimasks_binary = pred_multimasks > 0
+    area_i = torch.sum(pred_multimasks_binary & gt_masks, dim=(2, 3)).float()
+    area_u = torch.sum(pred_multimasks_binary | gt_masks, dim=(2, 3)).float()
+    ious = area_i / torch.clamp(area_u, min=1.0)
+
+    # In case all IoUs are zeros (e.g. because the GT mask is empty), use pred_scores
+    # to break ties and select the best mask
+    if pred_scores is not None:
+        has_nonzero_ious = torch.any(ious > 0).expand_as(ious)
+        scores = torch.where(has_nonzero_ious, ious, pred_scores)
+    else:
+        scores = ious
+
+    # Finally, take the best mask prediction (with the highest score)
+    best_scores_inds = torch.argmax(scores, dim=-1)
+    batch_inds = torch.arange(scores.size(0), device=scores.device)
+    best_pred_mask = pred_multimasks[batch_inds, best_scores_inds].unsqueeze(1)
+    return best_pred_mask
+
+
+def fill_holes_in_mask_scores(mask, max_area, fill_holes=True, remove_sprinkles=True):
+    """
+    A post processor to fill small holes in mask scores with area under `max_area`.
+    Holes are those small connected components in either background or foreground.
+
+    Note that it relies on the "cc_torch" package to find connected components fast. You can
+    install it via the following command (`TORCH_CUDA_ARCH_LIST=8.0` is for A100 GPUs):
+    ```
+    pip uninstall -y cc_torch; TORCH_CUDA_ARCH_LIST=8.0 9.0 pip install git+https://github.com/ronghanghu/cc_torch
+    ```
+    Otherwise, it will fallback to a slightly slower triton implementation, or skimage if the tensor is on cpu
+    """
+
+    if max_area <= 0:
+        return mask  # nothing to fill in this case
+
+    if fill_holes:
+        # We remove small connected components in background by changing them to foreground
+        # with a small positive mask score (0.1).
+        mask_bg = mask <= 0
+        bg_area_thresh = max_area
+        _, areas_bg = _get_connected_components_with_padding(mask_bg)
+        small_components_bg = mask_bg & (areas_bg <= bg_area_thresh)
+        mask = torch.where(small_components_bg, 0.1, mask)
+
+    if remove_sprinkles:
+        # We remove small connected components in foreground by changing them to background
+        # with a small negative mask score (-0.1). Here we only remove connected components
+        # whose areas are under both `max_area` and half of the entire mask's area. This
+        # removes sprinkles while avoids filtering out tiny objects that we want to track.
+        mask_fg = mask > 0
+        fg_area_thresh = torch.sum(mask_fg, dim=(2, 3), keepdim=True, dtype=torch.int32)
+        fg_area_thresh.floor_divide_(2).clamp_(max=max_area)
+        _, areas_fg = _get_connected_components_with_padding(mask_fg)
+        small_components_fg = mask_fg & (areas_fg <= fg_area_thresh)
+        mask = torch.where(small_components_fg, -0.1, mask)
+    return mask
+
+
+def _get_connected_components_with_padding(mask):
+    """Get connected components from masks (possibly padding them to an even size)."""
+    from sam3.perflib.connected_components import connected_components
+
+    mask = mask.to(torch.uint8)
+    _, _, H, W = mask.shape
+    # make sure both height and width are even (to be compatible with cc_torch)
+    pad_h = H % 2
+    pad_w = W % 2
+    if pad_h == 0 and pad_w == 0:
+        labels, counts = connected_components(mask)
+    else:
+        # pad the mask to make its height and width even
+        # padding format is (padding_left,padding_right,padding_top,padding_bottom)
+        mask_pad = F.pad(mask, (0, pad_w, 0, pad_h), mode="constant", value=0)
+        labels, counts = connected_components(mask_pad)
+        labels = labels[:, :, :H, :W]
+        counts = counts[:, :, :H, :W]
+
+    return labels, counts
diff --git a/sam3/model/sam3_tracking_predictor.py b/sam3/model/sam3_tracking_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..28ab2bd32c4c19f7f2fa93b5360f24485c65ca2d
--- /dev/null
+++ b/sam3/model/sam3_tracking_predictor.py
@@ -0,0 +1,1370 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import logging
+from collections import OrderedDict
+
+import torch
+
+from sam3.model.sam3_tracker_base import concat_points, NO_OBJ_SCORE, Sam3TrackerBase
+from sam3.model.sam3_tracker_utils import fill_holes_in_mask_scores
+from sam3.model.utils.sam2_utils import load_video_frames
+from tqdm.auto import tqdm
+
+
+class Sam3TrackerPredictor(Sam3TrackerBase):
+    """
+    The demo class that extends the `Sam3TrackerBase` to handle user interactions
+    and manage inference states, with support for multi-object tracking.
+    """
+
+    def __init__(
+        self,
+        # whether to clear non-conditioning memory of the surrounding frames (which may contain outdated information) after adding correction clicks;
+        # note that this would only apply to *single-object tracking* unless `clear_non_cond_mem_for_multi_obj` is also set to True)
+        clear_non_cond_mem_around_input=False,
+        # whether to also clear non-conditioning memory of the surrounding frames (only effective when `clear_non_cond_mem_around_input` is True).
+        clear_non_cond_mem_for_multi_obj=False,
+        # if fill_hole_area > 0, we fill small holes in the final masks up to this area (after resizing them to the original video resolution)
+        fill_hole_area=0,
+        # if always_start_from_first_ann_frame is True, we always start tracking from the frame where we receive the first annotation (clicks or mask)
+        # and ignore the `start_frame_idx` passed to `propagate_in_video`
+        always_start_from_first_ann_frame=False,
+        # the maximum number of points to be used in the prompt encoder, which reduce the domain gap between training (that only has 8 points)
+        # - if it's set to a positive integer, we only take the `max_point_num_in_prompt_enc//2` points and
+        #   the last `(max_point_num_in_prompt_enc - max_point_num_in_prompt_enc//2)` points in the prompt encoder
+        # - if it's set to 0 or negative, this option is turned off and we use all points in the prompt encoder
+        max_point_num_in_prompt_enc=16,
+        non_overlap_masks_for_output=True,
+        # checkpoint_file=None,
+        **kwargs,
+    ):
+        super().__init__(**kwargs)
+        self.clear_non_cond_mem_around_input = clear_non_cond_mem_around_input
+        self.clear_non_cond_mem_for_multi_obj = clear_non_cond_mem_for_multi_obj
+        self.fill_hole_area = fill_hole_area
+        self.always_start_from_first_ann_frame = always_start_from_first_ann_frame
+        self.max_point_num_in_prompt_enc = max_point_num_in_prompt_enc
+        self.non_overlap_masks_for_output = non_overlap_masks_for_output
+
+        self.bf16_context = torch.autocast(device_type="cuda", dtype=torch.bfloat16)
+        self.bf16_context.__enter__()  # keep using for the entire model process
+
+        self.iter_use_prev_mask_pred = True
+        self.add_all_frames_to_correct_as_cond = True
+
+    @torch.inference_mode()
+    def init_state(
+        self,
+        video_height=None,
+        video_width=None,
+        num_frames=None,
+        video_path=None,
+        cached_features=None,
+        offload_video_to_cpu=False,
+        offload_state_to_cpu=False,
+        async_loading_frames=False,
+    ):
+        """Initialize a inference state."""
+        inference_state = {}
+        # whether to offload the video frames to CPU memory
+        # turning on this option saves the GPU memory with only a very small overhead
+        inference_state["offload_video_to_cpu"] = offload_video_to_cpu
+        # whether to offload the inference state to CPU memory
+        # turning on this option saves the GPU memory at the cost of a lower tracking fps
+        # (e.g. in a test case of 768x768 model, fps dropped from 27 to 24 when tracking one object
+        # and from 24 to 21 when tracking two objects)
+        inference_state["offload_state_to_cpu"] = offload_state_to_cpu
+        inference_state["device"] = self.device
+        if offload_state_to_cpu:
+            inference_state["storage_device"] = torch.device("cpu")
+        else:
+            inference_state["storage_device"] = torch.device("cuda")
+
+        if video_path is not None:
+            images, video_height, video_width = load_video_frames(
+                video_path=video_path,
+                image_size=self.image_size,
+                offload_video_to_cpu=offload_video_to_cpu,
+                async_loading_frames=async_loading_frames,
+                compute_device=inference_state["storage_device"],
+            )
+            inference_state["images"] = images
+            inference_state["num_frames"] = len(images)
+            inference_state["video_height"] = video_height
+            inference_state["video_width"] = video_width
+        else:
+            # the original video height and width, used for resizing final output scores
+            inference_state["video_height"] = video_height
+            inference_state["video_width"] = video_width
+            inference_state["num_frames"] = num_frames
+        # inputs on each frame
+        inference_state["point_inputs_per_obj"] = {}
+        inference_state["mask_inputs_per_obj"] = {}
+        # visual features on a small number of recently visited frames for quick interactions
+        inference_state["cached_features"] = (
+            {} if cached_features is None else cached_features
+        )
+        # values that don't change across frames (so we only need to hold one copy of them)
+        inference_state["constants"] = {}
+        # mapping between client-side object id and model-side object index
+        inference_state["obj_id_to_idx"] = OrderedDict()
+        inference_state["obj_idx_to_id"] = OrderedDict()
+        inference_state["obj_ids"] = []
+        # A storage to hold the model's tracking results and states on each frame
+        inference_state["output_dict"] = {
+            "cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+            "non_cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+        }
+        # The index of the frame that received the first annotation
+        inference_state["first_ann_frame_idx"] = None
+        # Slice (view) of each object tracking results, sharing the same memory with "output_dict"
+        inference_state["output_dict_per_obj"] = {}
+        # A temporary storage to hold new outputs when user interact with a frame
+        # to add clicks or mask (it's merged into "output_dict" before propagation starts)
+        inference_state["temp_output_dict_per_obj"] = {}
+        # Frames that already holds consolidated outputs from click or mask inputs
+        # (we directly use their consolidated outputs during tracking)
+        inference_state["consolidated_frame_inds"] = {
+            "cond_frame_outputs": set(),  # set containing frame indices
+            "non_cond_frame_outputs": set(),  # set containing frame indices
+        }
+        # metadata for each tracking frame (e.g. which direction it's tracked)
+        inference_state["tracking_has_started"] = False
+        inference_state["frames_already_tracked"] = {}
+        self.clear_all_points_in_video(inference_state)
+        return inference_state
+
+    def _obj_id_to_idx(self, inference_state, obj_id):
+        """Map client-side object id to model-side object index."""
+        obj_idx = inference_state["obj_id_to_idx"].get(obj_id, None)
+        if obj_idx is not None:
+            return obj_idx
+
+        # This is a new object id not sent to the server before. We only allow adding
+        # new objects *before* the tracking starts.
+        allow_new_object = not inference_state["tracking_has_started"]
+        if allow_new_object:
+            # get the next object slot
+            obj_idx = len(inference_state["obj_id_to_idx"])
+            inference_state["obj_id_to_idx"][obj_id] = obj_idx
+            inference_state["obj_idx_to_id"][obj_idx] = obj_id
+            inference_state["obj_ids"] = list(inference_state["obj_id_to_idx"])
+            # set up input and output structures for this object
+            inference_state["point_inputs_per_obj"][obj_idx] = {}
+            inference_state["mask_inputs_per_obj"][obj_idx] = {}
+            inference_state["output_dict_per_obj"][obj_idx] = {
+                "cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+                "non_cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+            }
+            inference_state["temp_output_dict_per_obj"][obj_idx] = {
+                "cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+                "non_cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+            }
+            return obj_idx
+        else:
+            raise RuntimeError(
+                f"Cannot add new object id {obj_id} after tracking starts. "
+                f"All existing object ids: {inference_state['obj_ids']}."
+            )
+
+    def _obj_idx_to_id(self, inference_state, obj_idx):
+        """Map model-side object index to client-side object id."""
+        return inference_state["obj_idx_to_id"][obj_idx]
+
+    def _get_obj_num(self, inference_state):
+        """Get the total number of unique object ids received so far in this session."""
+        return len(inference_state["obj_idx_to_id"])
+
+    @torch.inference_mode()
+    def add_new_points_or_box(
+        self,
+        inference_state,
+        frame_idx,
+        obj_id,
+        points=None,
+        labels=None,
+        clear_old_points=True,
+        rel_coordinates=True,
+        use_prev_mem_frame=False,
+        normalize_coords=True,
+        box=None,
+    ):
+        """Add new points to a frame."""
+        obj_idx = self._obj_id_to_idx(inference_state, obj_id)
+        point_inputs_per_frame = inference_state["point_inputs_per_obj"][obj_idx]
+        mask_inputs_per_frame = inference_state["mask_inputs_per_obj"][obj_idx]
+
+        if (points is not None) != (labels is not None):
+            raise ValueError("points and labels must be provided together")
+        if points is None and box is None:
+            raise ValueError("at least one of points or box must be provided as input")
+
+        if points is None:
+            points = torch.zeros(0, 2, dtype=torch.float32)
+        elif not isinstance(points, torch.Tensor):
+            points = torch.tensor(points, dtype=torch.float32)
+        if labels is None:
+            labels = torch.zeros(0, dtype=torch.int32)
+        elif not isinstance(labels, torch.Tensor):
+            labels = torch.tensor(labels, dtype=torch.int32)
+        if points.dim() == 2:
+            points = points.unsqueeze(0)  # add batch dimension
+        if labels.dim() == 1:
+            labels = labels.unsqueeze(0)  # add batch dimension
+
+        if rel_coordinates:
+            # convert the points from relative coordinates to absolute coordinates
+            if points is not None:
+                points = points * self.image_size
+            if box is not None:
+                box = box * self.image_size
+
+        # If `box` is provided, we add it as the first two points with labels 2 and 3
+        # along with the user-provided points (consistent with how SAM 2 is trained).
+        if box is not None:
+            if not clear_old_points:
+                raise ValueError(
+                    "cannot add box without clearing old points, since "
+                    "box prompt must be provided before any point prompt "
+                    "(please use clear_old_points=True instead)"
+                )
+            if not isinstance(box, torch.Tensor):
+                box = torch.tensor(box, dtype=torch.float32, device=points.device)
+            box_coords = box.reshape(1, 2, 2)
+            box_labels = torch.tensor([2, 3], dtype=torch.int32, device=labels.device)
+            box_labels = box_labels.reshape(1, 2)
+            points = torch.cat([box_coords, points], dim=1)
+            labels = torch.cat([box_labels, labels], dim=1)
+
+        points = points.to(inference_state["device"])
+        labels = labels.to(inference_state["device"])
+
+        if not clear_old_points:
+            point_inputs = point_inputs_per_frame.get(frame_idx, None)
+        else:
+            point_inputs = None
+        point_inputs = concat_points(point_inputs, points, labels)
+
+        point_inputs_per_frame[frame_idx] = point_inputs
+        mask_inputs_per_frame.pop(frame_idx, None)
+        # If this frame hasn't been tracked before, we treat it as an initial conditioning
+        # frame, meaning that the inputs points are to generate segments on this frame without
+        # using any memory from other frames, like in SAM. Otherwise (if it has been tracked),
+        # the input points will be used to correct the already tracked masks.
+        is_init_cond_frame = frame_idx not in inference_state["frames_already_tracked"]
+        # whether to track in reverse time order
+        if is_init_cond_frame:
+            reverse = False
+        else:
+            reverse = inference_state["frames_already_tracked"][frame_idx]["reverse"]
+        obj_output_dict = inference_state["output_dict_per_obj"][obj_idx]
+        obj_temp_output_dict = inference_state["temp_output_dict_per_obj"][obj_idx]
+        # Add a frame to conditioning output if it's an initial conditioning frame or
+        # if the model sees all frames receiving clicks/mask as conditioning frames.
+        is_cond = is_init_cond_frame or self.add_all_frames_to_correct_as_cond
+        storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
+
+        # Limit to a maximum number of input points to the prompt encoder (to reduce domain gap)
+        num_points = point_inputs["point_coords"].size(1)
+        if num_points > self.max_point_num_in_prompt_enc > 0:
+            num_first = self.max_point_num_in_prompt_enc // 2
+            num_last = self.max_point_num_in_prompt_enc - num_first
+            point_inputs["point_coords"] = torch.cat(
+                [
+                    point_inputs["point_coords"][:, :num_first],
+                    point_inputs["point_coords"][:, -num_last:],
+                ],
+                dim=1,
+            )
+            point_inputs["point_labels"] = torch.cat(
+                [
+                    point_inputs["point_labels"][:, :num_first],
+                    point_inputs["point_labels"][:, -num_last:],
+                ],
+                dim=1,
+            )
+            logging.warning(
+                f"Too many points ({num_points}) are provided on frame {frame_idx}. Only "
+                f"the first {num_first} points and the last {num_last} points will be used."
+            )
+        # Get any previously predicted mask logits on this object and feed it along with
+        # the new clicks into the SAM mask decoder when `self.iter_use_prev_mask_pred=True`.
+        prev_sam_mask_logits = None
+        if self.iter_use_prev_mask_pred:
+            # lookup temporary output dict first, which contains the most recent output
+            # (if not found, then lookup conditioning and non-conditioning frame output)
+            prev_out = obj_temp_output_dict[storage_key].get(frame_idx)
+            if prev_out is None:
+                prev_out = obj_output_dict["cond_frame_outputs"].get(frame_idx)
+                if prev_out is None:
+                    prev_out = obj_output_dict["non_cond_frame_outputs"].get(frame_idx)
+
+            if prev_out is not None and prev_out["pred_masks"] is not None:
+                prev_sam_mask_logits = prev_out["pred_masks"].cuda(non_blocking=True)
+                # Clamp the scale of prev_sam_mask_logits to avoid rare numerical issues.
+                prev_sam_mask_logits = torch.clamp(prev_sam_mask_logits, -32.0, 32.0)
+        current_out, _ = self._run_single_frame_inference(
+            inference_state=inference_state,
+            output_dict=obj_output_dict,  # run on the slice of a single object
+            frame_idx=frame_idx,
+            batch_size=1,  # run on the slice of a single object
+            is_init_cond_frame=is_init_cond_frame,
+            point_inputs=point_inputs,
+            mask_inputs=None,
+            reverse=reverse,
+            # Skip the memory encoder when adding clicks or mask. We execute the memory encoder
+            # at the beginning of `propagate_in_video` (after user finalize their clicks). This
+            # allows us to enforce non-overlapping constraints on all objects before encoding
+            # them into memory.
+            run_mem_encoder=False,
+            prev_sam_mask_logits=prev_sam_mask_logits,
+            use_prev_mem_frame=use_prev_mem_frame,
+        )
+        # Add the output to the output dict (to be used as future memory)
+        obj_temp_output_dict[storage_key][frame_idx] = current_out
+
+        # Resize the output mask to the original video resolution
+        obj_ids = inference_state["obj_ids"]
+        consolidated_out = self._consolidate_temp_output_across_obj(
+            inference_state,
+            frame_idx,
+            is_cond=is_cond,
+            run_mem_encoder=False,
+            consolidate_at_video_res=True,
+        )
+        _, video_res_masks = self._get_orig_video_res_output(
+            inference_state, consolidated_out["pred_masks_video_res"]
+        )
+        low_res_masks = None  # not needed by the demo
+        return frame_idx, obj_ids, low_res_masks, video_res_masks
+
+    @torch.inference_mode()
+    def add_new_mask(
+        self,
+        inference_state,
+        frame_idx,
+        obj_id,
+        mask,
+        add_mask_to_memory=False,
+    ):
+        """Add new mask to a frame."""
+        obj_idx = self._obj_id_to_idx(inference_state, obj_id)
+        point_inputs_per_frame = inference_state["point_inputs_per_obj"][obj_idx]
+        mask_inputs_per_frame = inference_state["mask_inputs_per_obj"][obj_idx]
+
+        assert mask.dim() == 2
+        mask_H, mask_W = mask.shape
+        mask_inputs_orig = mask[None, None]  # add batch and channel dimension
+        mask_inputs_orig = mask_inputs_orig.float().to(inference_state["device"])
+
+        # resize the mask if it doesn't match the model's input mask size
+        if mask_H != self.input_mask_size or mask_W != self.input_mask_size:
+            mask_inputs = torch.nn.functional.interpolate(
+                mask_inputs_orig,
+                size=(self.input_mask_size, self.input_mask_size),
+                align_corners=False,
+                mode="bilinear",
+                antialias=True,  # use antialias for downsampling
+            )
+        else:
+            mask_inputs = mask_inputs_orig
+
+        # also get the mask at the original video resolution (for outputting)
+        video_H = inference_state["video_height"]
+        video_W = inference_state["video_width"]
+        if mask_H != video_H or mask_W != video_W:
+            mask_inputs_video_res = torch.nn.functional.interpolate(
+                mask_inputs_orig,
+                size=(video_H, video_W),
+                align_corners=False,
+                mode="bilinear",
+                antialias=True,  # use antialias for potential downsampling
+            )
+        else:
+            mask_inputs_video_res = mask_inputs_orig
+        # convert mask_inputs_video_res to binary (threshold at 0.5 as it is in range 0~1)
+        mask_inputs_video_res = mask_inputs_video_res > 0.5
+
+        mask_inputs_per_frame[frame_idx] = mask_inputs_video_res
+        point_inputs_per_frame.pop(frame_idx, None)
+        # If this frame hasn't been tracked before, we treat it as an initial conditioning
+        # frame, meaning that the inputs points are to generate segments on this frame without
+        # using any memory from other frames, like in SAM. Otherwise (if it has been tracked),
+        # the input points will be used to correct the already tracked masks.
+        is_init_cond_frame = frame_idx not in inference_state["frames_already_tracked"]
+        # whether to track in reverse time order
+        if is_init_cond_frame:
+            reverse = False
+        else:
+            reverse = inference_state["frames_already_tracked"][frame_idx]["reverse"]
+        obj_output_dict = inference_state["output_dict_per_obj"][obj_idx]
+        obj_temp_output_dict = inference_state["temp_output_dict_per_obj"][obj_idx]
+        # Add a frame to conditioning output if it's an initial conditioning frame or
+        # if the model sees all frames receiving clicks/mask as conditioning frames.
+        is_cond = is_init_cond_frame or self.add_all_frames_to_correct_as_cond
+        storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
+
+        current_out, _ = self._run_single_frame_inference(
+            inference_state=inference_state,
+            output_dict=obj_output_dict,  # run on the slice of a single object
+            frame_idx=frame_idx,
+            batch_size=1,  # run on the slice of a single object
+            is_init_cond_frame=is_init_cond_frame,
+            point_inputs=None,
+            mask_inputs=mask_inputs,
+            reverse=reverse,
+            # Skip the memory encoder when adding clicks or mask. We execute the memory encoder
+            # at the beginning of `propagate_in_video` (after user finalize their clicks). This
+            # allows us to enforce non-overlapping constraints on all objects before encoding
+            # them into memory.
+            run_mem_encoder=False,
+        )
+        # We directly use the input mask at video resolution as the output mask for a better
+        # video editing experience (so that the masks don't change after each brushing).
+        # Here NO_OBJ_SCORE is a large negative value to represent the background and
+        # similarly -NO_OBJ_SCORE is a large positive value to represent the foreground.
+        current_out["pred_masks"] = None
+        current_out["pred_masks_video_res"] = torch.where(
+            mask_inputs_video_res, -NO_OBJ_SCORE, NO_OBJ_SCORE
+        )
+        # Add the output to the output dict (to be used as future memory)
+        obj_temp_output_dict[storage_key][frame_idx] = current_out
+        # Remove the overlapping proportion of other objects' input masks on this frame
+        temp_output_dict_per_obj = inference_state["temp_output_dict_per_obj"]
+        for obj_idx2, obj_temp_output_dict2 in temp_output_dict_per_obj.items():
+            if obj_idx2 == obj_idx:
+                continue
+            current_out2 = obj_temp_output_dict2[storage_key].get(frame_idx, None)
+            if current_out2 is not None and "pred_masks_video_res" in current_out2:
+                current_out2["pred_masks_video_res"] = torch.where(
+                    mask_inputs_video_res,
+                    NO_OBJ_SCORE,
+                    current_out2["pred_masks_video_res"],
+                )
+
+        # Resize the output mask to the original video resolution
+        obj_ids = inference_state["obj_ids"]
+        consolidated_out = self._consolidate_temp_output_across_obj(
+            inference_state,
+            frame_idx,
+            is_cond=is_cond,
+            run_mem_encoder=False,
+            consolidate_at_video_res=True,
+        )
+        _, video_res_masks = self._get_orig_video_res_output(
+            inference_state, consolidated_out["pred_masks_video_res"]
+        )
+        low_res_masks = None  # not needed by the demo
+        return frame_idx, obj_ids, low_res_masks, video_res_masks
+
+    def add_new_points(self, *args, **kwargs):
+        """Deprecated method. Please use `add_new_points_or_box` instead."""
+        return self.add_new_points_or_box(*args, **kwargs)
+
+    def _get_orig_video_res_output(self, inference_state, any_res_masks):
+        """
+        Resize the object scores to the original video resolution (video_res_masks)
+        and apply non-overlapping constraints for final output.
+        """
+        device = inference_state["device"]
+        video_H = inference_state["video_height"]
+        video_W = inference_state["video_width"]
+        any_res_masks = any_res_masks.to(device, non_blocking=True)
+        if any_res_masks.shape[-2:] == (video_H, video_W):
+            video_res_masks = any_res_masks
+        else:
+            video_res_masks = torch.nn.functional.interpolate(
+                any_res_masks,
+                size=(video_H, video_W),
+                mode="bilinear",
+                align_corners=False,
+            )
+        if self.non_overlap_masks_for_output:
+            video_res_masks = self._apply_non_overlapping_constraints(video_res_masks)
+        # potentially fill holes in the predicted masks
+        if self.fill_hole_area > 0:
+            video_res_masks = fill_holes_in_mask_scores(
+                video_res_masks, self.fill_hole_area
+            )
+        return any_res_masks, video_res_masks
+
+    def _consolidate_temp_output_across_obj(
+        self,
+        inference_state,
+        frame_idx,
+        is_cond,
+        run_mem_encoder,
+        consolidate_at_video_res=False,
+    ):
+        """
+        Consolidate the per-object temporary outputs in `temp_output_dict_per_obj` on
+        a frame into a single output for all objects, including
+        1) fill any missing objects either from `output_dict_per_obj` (if they exist in
+           `output_dict_per_obj` for this frame) or leave them as placeholder values
+           (if they don't exist in `output_dict_per_obj` for this frame);
+        2) if specified, rerun memory encoder after apply non-overlapping constraints
+           on the object scores.
+        """
+        batch_size = self._get_obj_num(inference_state)
+        storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
+        # Optionally, we allow consolidating the temporary outputs at the original
+        # video resolution (to provide a better editing experience for mask prompts).
+        if consolidate_at_video_res:
+            assert not run_mem_encoder, "memory encoder cannot run at video resolution"
+            consolidated_H = inference_state["video_height"]
+            consolidated_W = inference_state["video_width"]
+            consolidated_mask_key = "pred_masks_video_res"
+        else:
+            consolidated_H = consolidated_W = self.low_res_mask_size
+            consolidated_mask_key = "pred_masks"
+
+        # Initialize `consolidated_out`. Its "maskmem_features" and "maskmem_pos_enc"
+        # will be added when rerunning the memory encoder after applying non-overlapping
+        # constraints to object scores. Its "pred_masks" are prefilled with a large
+        # negative value (NO_OBJ_SCORE) to represent missing objects.
+        consolidated_out = {
+            "maskmem_features": None,
+            "maskmem_pos_enc": None,
+            consolidated_mask_key: torch.full(
+                size=(batch_size, 1, consolidated_H, consolidated_W),
+                fill_value=NO_OBJ_SCORE,
+                dtype=torch.float32,
+                device=inference_state["storage_device"],
+            ),
+            "obj_ptr": torch.full(
+                size=(batch_size, self.hidden_dim),
+                fill_value=NO_OBJ_SCORE,
+                dtype=torch.float32,
+                device=inference_state["device"],
+            ),
+            "object_score_logits": torch.full(
+                size=(batch_size, 1),
+                # default to 10.0 for object_score_logits, i.e. assuming the object is
+                # present as sigmoid(10)=1, same as in `predict_masks` of `MaskDecoder`
+                fill_value=10.0,
+                dtype=torch.float32,
+                device=inference_state["device"],
+            ),
+        }
+        if self.use_memory_selection:
+            consolidated_out["iou_score"] = torch.full(
+                size=(batch_size, 1),
+                fill_value=0.0,
+                dtype=torch.float32,
+                device=inference_state["device"],
+            )
+        empty_mask_ptr = None
+        for obj_idx in range(batch_size):
+            obj_temp_output_dict = inference_state["temp_output_dict_per_obj"][obj_idx]
+            obj_output_dict = inference_state["output_dict_per_obj"][obj_idx]
+            out = obj_temp_output_dict[storage_key].get(frame_idx, None)
+            # If the object doesn't appear in "temp_output_dict_per_obj" on this frame,
+            # we fall back and look up its previous output in "output_dict_per_obj".
+            # We look up both "cond_frame_outputs" and "non_cond_frame_outputs" in
+            # "output_dict_per_obj" to find a previous output for this object.
+            if out is None:
+                out = obj_output_dict["cond_frame_outputs"].get(frame_idx, None)
+            if out is None:
+                out = obj_output_dict["non_cond_frame_outputs"].get(frame_idx, None)
+            # If the object doesn't appear in "output_dict_per_obj" either, we skip it
+            # and leave its mask scores to the default scores (i.e. the NO_OBJ_SCORE
+            # placeholder above) and set its object pointer to be a dummy pointer.
+            if out is None:
+                # Fill in dummy object pointers for those objects without any inputs or
+                # tracking outcomes on this frame (only do it under `run_mem_encoder=True`,
+                # i.e. when we need to build the memory for tracking).
+                if run_mem_encoder:
+                    if empty_mask_ptr is None:
+                        empty_mask_ptr = self._get_empty_mask_ptr(
+                            inference_state, frame_idx
+                        )
+                    # fill object pointer with a dummy pointer (based on an empty mask)
+                    consolidated_out["obj_ptr"][obj_idx : obj_idx + 1] = empty_mask_ptr
+                continue
+            # Add the temporary object output mask to consolidated output mask
+            # (use "pred_masks_video_res" if it's available)
+            obj_mask = out.get("pred_masks_video_res", out["pred_masks"])
+            consolidated_pred_masks = consolidated_out[consolidated_mask_key]
+            if obj_mask.shape[-2:] == consolidated_pred_masks.shape[-2:]:
+                consolidated_pred_masks[obj_idx : obj_idx + 1] = obj_mask
+            else:
+                # Resize first if temporary object mask has a different resolution
+                is_downsampling = "pred_masks_video_res" in out
+                resized_obj_mask = torch.nn.functional.interpolate(
+                    obj_mask,
+                    size=consolidated_pred_masks.shape[-2:],
+                    mode="bilinear",
+                    align_corners=False,
+                    antialias=is_downsampling,  # use antialias for downsampling
+                )
+                consolidated_pred_masks[obj_idx : obj_idx + 1] = resized_obj_mask
+            consolidated_out["obj_ptr"][obj_idx : obj_idx + 1] = out["obj_ptr"]
+            consolidated_out["object_score_logits"][obj_idx : obj_idx + 1] = out[
+                "object_score_logits"
+            ]
+            if self.use_memory_selection:
+                consolidated_out["iou_score"][obj_idx : obj_idx + 1] = out["iou_score"]
+        # Optionally, apply non-overlapping constraints on the consolidated scores
+        # and rerun the memory encoder
+        if run_mem_encoder:
+            device = inference_state["device"]
+            high_res_masks = torch.nn.functional.interpolate(
+                consolidated_out["pred_masks"].to(device, non_blocking=True),
+                size=(self.image_size, self.image_size),
+                mode="bilinear",
+                align_corners=False,
+            )
+            high_res_masks = self._apply_non_overlapping_constraints(high_res_masks)
+            maskmem_features, maskmem_pos_enc = self._run_memory_encoder(
+                inference_state=inference_state,
+                frame_idx=frame_idx,
+                batch_size=batch_size,
+                high_res_masks=high_res_masks,
+                object_score_logits=consolidated_out["object_score_logits"],
+                is_mask_from_pts=True,  # these frames are what the user interacted with
+            )
+            consolidated_out["maskmem_features"] = maskmem_features
+            consolidated_out["maskmem_pos_enc"] = maskmem_pos_enc
+
+        return consolidated_out
+
+    def _get_empty_mask_ptr(self, inference_state, frame_idx):
+        """Get a dummy object pointer based on an empty mask on the current frame."""
+        # A dummy (empty) mask with a single object
+        batch_size = 1
+        mask_inputs = torch.zeros(
+            (batch_size, 1, self.image_size, self.image_size),
+            dtype=torch.float32,
+            device=inference_state["device"],
+        )
+
+        # Retrieve correct image features
+        (
+            image,
+            _,
+            current_vision_feats,
+            current_vision_pos_embeds,
+            feat_sizes,
+        ) = self._get_image_feature(inference_state, frame_idx, batch_size)
+
+        # Feed the empty mask and image feature above to get a dummy object pointer
+        current_out = self.track_step(
+            frame_idx=frame_idx,
+            is_init_cond_frame=True,
+            current_vision_feats=current_vision_feats,
+            current_vision_pos_embeds=current_vision_pos_embeds,
+            feat_sizes=feat_sizes,
+            image=image,
+            point_inputs=None,
+            mask_inputs=mask_inputs,
+            gt_masks=None,
+            frames_to_add_correction_pt=[],
+            output_dict={
+                "cond_frame_outputs": {},
+                "non_cond_frame_outputs": {},
+            },
+            num_frames=inference_state["num_frames"],
+            track_in_reverse=False,
+            run_mem_encoder=False,
+            prev_sam_mask_logits=None,
+        )
+        return current_out["obj_ptr"]
+
+    @torch.inference_mode()
+    def propagate_in_video_preflight(self, inference_state, run_mem_encoder=True):
+        """Prepare inference_state and consolidate temporary outputs before tracking."""
+        # Tracking has started and we don't allow adding new objects until session is reset.
+        inference_state["tracking_has_started"] = True
+        batch_size = self._get_obj_num(inference_state)
+
+        # Consolidate per-object temporary outputs in "temp_output_dict_per_obj" and
+        # add them into "output_dict".
+        temp_output_dict_per_obj = inference_state["temp_output_dict_per_obj"]
+        output_dict = inference_state["output_dict"]
+        # "consolidated_frame_inds" contains indices of those frames where consolidated
+        # temporary outputs have been added (either in this call or any previous calls
+        # to `propagate_in_video_preflight`).
+        consolidated_frame_inds = inference_state["consolidated_frame_inds"]
+        for is_cond in [False, True]:
+            # Separately consolidate conditioning and non-conditioning temp outptus
+            storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
+            # Find all the frames that contain temporary outputs for any objects
+            # (these should be the frames that have just received clicks for mask inputs
+            # via `add_new_points` or `add_new_mask`)
+            temp_frame_inds = set()
+            for obj_temp_output_dict in temp_output_dict_per_obj.values():
+                temp_frame_inds.update(obj_temp_output_dict[storage_key].keys())
+            consolidated_frame_inds[storage_key].update(temp_frame_inds)
+            # consolidate the temprary output across all objects on this frame
+            for frame_idx in temp_frame_inds:
+                consolidated_out = self._consolidate_temp_output_across_obj(
+                    inference_state,
+                    frame_idx,
+                    is_cond=is_cond,
+                    run_mem_encoder=run_mem_encoder,
+                )
+                # merge them into "output_dict" and also create per-object slices
+                output_dict[storage_key][frame_idx] = consolidated_out
+                self._add_output_per_object(
+                    inference_state, frame_idx, consolidated_out, storage_key
+                )
+                clear_non_cond_mem = self.clear_non_cond_mem_around_input and (
+                    self.clear_non_cond_mem_for_multi_obj or batch_size <= 1
+                )
+                if clear_non_cond_mem:
+                    # clear non-conditioning memory of the surrounding frames
+                    self._clear_non_cond_mem_around_input(inference_state, frame_idx)
+
+            # clear temporary outputs in `temp_output_dict_per_obj`
+            for obj_temp_output_dict in temp_output_dict_per_obj.values():
+                obj_temp_output_dict[storage_key].clear()
+
+        # edge case: if an output is added to "cond_frame_outputs", we remove any prior
+        # output on the same frame in "non_cond_frame_outputs"
+        for frame_idx in output_dict["cond_frame_outputs"]:
+            output_dict["non_cond_frame_outputs"].pop(frame_idx, None)
+        for obj_output_dict in inference_state["output_dict_per_obj"].values():
+            for frame_idx in obj_output_dict["cond_frame_outputs"]:
+                obj_output_dict["non_cond_frame_outputs"].pop(frame_idx, None)
+        for frame_idx in consolidated_frame_inds["cond_frame_outputs"]:
+            assert frame_idx in output_dict["cond_frame_outputs"]
+            consolidated_frame_inds["non_cond_frame_outputs"].discard(frame_idx)
+
+        # Make sure that the frame indices in "consolidated_frame_inds" are exactly those frames
+        # with either points or mask inputs (which should be true under a correct demo workflow).
+        all_consolidated_frame_inds = (
+            consolidated_frame_inds["cond_frame_outputs"]
+            | consolidated_frame_inds["non_cond_frame_outputs"]
+        )
+        input_frames_inds = set()
+        for point_inputs_per_frame in inference_state["point_inputs_per_obj"].values():
+            input_frames_inds.update(point_inputs_per_frame.keys())
+        for mask_inputs_per_frame in inference_state["mask_inputs_per_obj"].values():
+            input_frames_inds.update(mask_inputs_per_frame.keys())
+        assert all_consolidated_frame_inds == input_frames_inds
+        # Record the first interacted frame index (for tracking start)
+        if inference_state["first_ann_frame_idx"] is None:
+            inference_state["first_ann_frame_idx"] = min(
+                input_frames_inds, default=None
+            )
+        # In case `first_ann_frame_idx` is not in the conditioning frames (e.g. because
+        # we cleared the input points on that frame), pick the first conditioning frame
+        if (
+            inference_state["first_ann_frame_idx"]
+            not in output_dict["cond_frame_outputs"]
+        ):
+            inference_state["first_ann_frame_idx"] = min(
+                output_dict["cond_frame_outputs"], default=None
+            )
+
+    def _get_processing_order(
+        self, inference_state, start_frame_idx, max_frame_num_to_track, reverse
+    ):
+        num_frames = inference_state["num_frames"]
+        # set start index, end index, and processing order
+        if self.always_start_from_first_ann_frame:
+            # in this case, we always start tracking from the frame where we receive
+            # the initial annotation and ignore the provided start_frame_idx
+            start_frame_idx = inference_state["first_ann_frame_idx"]
+        if start_frame_idx is None:
+            # default: start from the earliest frame with input points
+            start_frame_idx = min(inference_state["output_dict"]["cond_frame_outputs"])
+        if max_frame_num_to_track is None:
+            # default: track all the frames in the video
+            max_frame_num_to_track = num_frames
+        if reverse:
+            end_frame_idx = max(start_frame_idx - max_frame_num_to_track, 0)
+            if start_frame_idx > 0:
+                processing_order = range(start_frame_idx, end_frame_idx - 1, -1)
+            else:
+                # this is the edge case where we start from frame 0 and track in reverse order;
+                # in this case, we track a single frame (frame 0)
+                processing_order = [0]
+        else:
+            end_frame_idx = min(
+                start_frame_idx + max_frame_num_to_track, num_frames - 1
+            )
+            processing_order = range(start_frame_idx, end_frame_idx + 1)
+        return processing_order
+
+    @torch.inference_mode()
+    def propagate_in_video(
+        self,
+        inference_state,
+        start_frame_idx,
+        max_frame_num_to_track,
+        reverse,
+        tqdm_disable=False,
+        obj_ids=None,
+        run_mem_encoder=True,
+        propagate_preflight=False,
+    ):
+        """Propagate the input points across frames to track in the entire video."""
+        if propagate_preflight:
+            self.propagate_in_video_preflight(inference_state)
+        # NOTE: This is a copy from the parent class, except that we return object scores as well.
+        output_dict = inference_state["output_dict"]
+        consolidated_frame_inds = inference_state["consolidated_frame_inds"]
+        if obj_ids is not None:
+            raise NotImplementedError(
+                "Per-object tracking yet for batched inference if not implemented."
+            )
+        obj_ids = inference_state["obj_ids"]
+        batch_size = self._get_obj_num(inference_state)
+        if len(output_dict["cond_frame_outputs"]) == 0:
+            raise RuntimeError("No points are provided; please add points first")
+        clear_non_cond_mem = self.clear_non_cond_mem_around_input and (
+            self.clear_non_cond_mem_for_multi_obj or batch_size <= 1
+        )
+
+        processing_order = self._get_processing_order(
+            inference_state,
+            start_frame_idx,
+            max_frame_num_to_track,
+            reverse,
+        )
+
+        for frame_idx in tqdm(
+            processing_order, desc="propagate in video", disable=tqdm_disable
+        ):
+            # We skip those frames already in consolidated outputs (these are frames
+            # that received input clicks or mask). Note that we cannot directly run
+            # batched forward on them via `_run_single_frame_inference` because the
+            # number of clicks on each object might be different.
+            if frame_idx in consolidated_frame_inds["cond_frame_outputs"]:
+                storage_key = "cond_frame_outputs"
+                current_out = output_dict[storage_key][frame_idx]
+                pred_masks = current_out["pred_masks"]
+                obj_scores = current_out["object_score_logits"]
+                if clear_non_cond_mem:
+                    # clear non-conditioning memory of the surrounding frames
+                    self._clear_non_cond_mem_around_input(inference_state, frame_idx)
+            elif frame_idx in consolidated_frame_inds["non_cond_frame_outputs"]:
+                storage_key = "non_cond_frame_outputs"
+                current_out = output_dict[storage_key][frame_idx]
+                pred_masks = current_out["pred_masks"]
+                obj_scores = current_out["object_score_logits"]
+            else:
+                storage_key = "non_cond_frame_outputs"
+                current_out, pred_masks = self._run_single_frame_inference(
+                    inference_state=inference_state,
+                    output_dict=output_dict,
+                    frame_idx=frame_idx,
+                    batch_size=batch_size,
+                    is_init_cond_frame=False,
+                    point_inputs=None,
+                    mask_inputs=None,
+                    reverse=reverse,
+                    run_mem_encoder=run_mem_encoder,
+                )
+                obj_scores = current_out["object_score_logits"]
+                output_dict[storage_key][frame_idx] = current_out
+            # Create slices of per-object outputs for subsequent interaction with each
+            # individual object after tracking.
+            self._add_output_per_object(
+                inference_state, frame_idx, current_out, storage_key
+            )
+            inference_state["frames_already_tracked"][frame_idx] = {"reverse": reverse}
+
+            # Resize the output mask to the original video resolution (we directly use
+            # the mask scores on GPU for output to avoid any CPU conversion in between)
+            low_res_masks, video_res_masks = self._get_orig_video_res_output(
+                inference_state, pred_masks
+            )
+            yield frame_idx, obj_ids, low_res_masks, video_res_masks, obj_scores
+
+    def _add_output_per_object(
+        self, inference_state, frame_idx, current_out, storage_key
+    ):
+        """
+        Split a multi-object output into per-object output slices and add them into
+        `output_dict_per_obj`. The resulting slices share the same tensor storage.
+        """
+        maskmem_features = current_out["maskmem_features"]
+        assert maskmem_features is None or isinstance(maskmem_features, torch.Tensor)
+
+        maskmem_pos_enc = current_out["maskmem_pos_enc"]
+        assert maskmem_pos_enc is None or isinstance(maskmem_pos_enc, list)
+
+        output_dict_per_obj = inference_state["output_dict_per_obj"]
+        for obj_idx, obj_output_dict in output_dict_per_obj.items():
+            obj_slice = slice(obj_idx, obj_idx + 1)
+            obj_out = {
+                "maskmem_features": None,
+                "maskmem_pos_enc": None,
+                "pred_masks": current_out["pred_masks"][obj_slice],
+                "obj_ptr": current_out["obj_ptr"][obj_slice],
+                "object_score_logits": current_out["object_score_logits"][obj_slice],
+            }
+            if self.use_memory_selection:
+                obj_out["iou_score"] = current_out["iou_score"][obj_slice]
+            if maskmem_features is not None:
+                obj_out["maskmem_features"] = maskmem_features[obj_slice]
+            if maskmem_pos_enc is not None:
+                obj_out["maskmem_pos_enc"] = [x[obj_slice] for x in maskmem_pos_enc]
+            obj_output_dict[storage_key][frame_idx] = obj_out
+
+    @torch.inference_mode()
+    def clear_all_points_in_frame(
+        self, inference_state, frame_idx, obj_id, need_output=True
+    ):
+        """Remove all input points or mask in a specific frame for a given object."""
+        obj_idx = self._obj_id_to_idx(inference_state, obj_id)
+
+        # Clear the conditioning information on the given frame
+        inference_state["point_inputs_per_obj"][obj_idx].pop(frame_idx, None)
+        inference_state["mask_inputs_per_obj"][obj_idx].pop(frame_idx, None)
+
+        temp_output_dict_per_obj = inference_state["temp_output_dict_per_obj"]
+        temp_output_dict_per_obj[obj_idx]["cond_frame_outputs"].pop(frame_idx, None)
+        temp_output_dict_per_obj[obj_idx]["non_cond_frame_outputs"].pop(frame_idx, None)
+
+        # Check and see if there are still any inputs left on this frame
+        batch_size = self._get_obj_num(inference_state)
+        frame_has_input = False
+        for obj_idx2 in range(batch_size):
+            if frame_idx in inference_state["point_inputs_per_obj"][obj_idx2]:
+                frame_has_input = True
+                break
+            if frame_idx in inference_state["mask_inputs_per_obj"][obj_idx2]:
+                frame_has_input = True
+                break
+
+        # If this frame has no remaining inputs for any objects, we further clear its
+        # conditioning frame status
+        if not frame_has_input:
+            output_dict = inference_state["output_dict"]
+            consolidated_frame_inds = inference_state["consolidated_frame_inds"]
+            consolidated_frame_inds["cond_frame_outputs"].discard(frame_idx)
+            consolidated_frame_inds["non_cond_frame_outputs"].discard(frame_idx)
+            # Remove the frame's conditioning output (possibly downgrading it to non-conditioning)
+            out = output_dict["cond_frame_outputs"].pop(frame_idx, None)
+            if out is not None:
+                # The frame is not a conditioning frame anymore since it's not receiving inputs,
+                # so we "downgrade" its output (if exists) to a non-conditioning frame output.
+                output_dict["non_cond_frame_outputs"][frame_idx] = out
+                inference_state["frames_already_tracked"].pop(frame_idx, None)
+            # Similarly, do it for the sliced output on each object.
+            for obj_idx2 in range(batch_size):
+                obj_output_dict = inference_state["output_dict_per_obj"][obj_idx2]
+                obj_out = obj_output_dict["cond_frame_outputs"].pop(frame_idx, None)
+                if obj_out is not None:
+                    obj_output_dict["non_cond_frame_outputs"][frame_idx] = obj_out
+
+            # If all the conditioning frames have been removed, we also clear the tracking outputs
+            if len(output_dict["cond_frame_outputs"]) == 0:
+                self._reset_tracking_results(inference_state)
+
+        if not need_output:
+            return
+        # Finally, output updated masks per object (after removing the inputs above)
+        obj_ids = inference_state["obj_ids"]
+        is_cond = any(
+            frame_idx in obj_temp_output_dict["cond_frame_outputs"]
+            for obj_temp_output_dict in temp_output_dict_per_obj.values()
+        )
+        consolidated_out = self._consolidate_temp_output_across_obj(
+            inference_state,
+            frame_idx,
+            is_cond=is_cond,
+            run_mem_encoder=False,
+            consolidate_at_video_res=True,
+        )
+        _, video_res_masks = self._get_orig_video_res_output(
+            inference_state, consolidated_out["pred_masks_video_res"]
+        )
+        low_res_masks = None  # not needed by the demo
+        return frame_idx, obj_ids, low_res_masks, video_res_masks
+
+    @torch.inference_mode()
+    def clear_all_points_in_video(self, inference_state):
+        """Remove all input points or mask in all frames throughout the video."""
+        self._reset_tracking_results(inference_state)
+        # Remove all object ids
+        inference_state["obj_id_to_idx"].clear()
+        inference_state["obj_idx_to_id"].clear()
+        inference_state["obj_ids"].clear()
+        inference_state["point_inputs_per_obj"].clear()
+        inference_state["mask_inputs_per_obj"].clear()
+        inference_state["output_dict_per_obj"].clear()
+        inference_state["temp_output_dict_per_obj"].clear()
+
+    def _reset_tracking_results(self, inference_state):
+        """Reset all tracking inputs and results across the videos."""
+        for v in inference_state["point_inputs_per_obj"].values():
+            v.clear()
+        for v in inference_state["mask_inputs_per_obj"].values():
+            v.clear()
+        for v in inference_state["output_dict_per_obj"].values():
+            v["cond_frame_outputs"].clear()
+            v["non_cond_frame_outputs"].clear()
+        for v in inference_state["temp_output_dict_per_obj"].values():
+            v["cond_frame_outputs"].clear()
+            v["non_cond_frame_outputs"].clear()
+        inference_state["output_dict"]["cond_frame_outputs"].clear()
+        inference_state["output_dict"]["non_cond_frame_outputs"].clear()
+        inference_state["consolidated_frame_inds"]["cond_frame_outputs"].clear()
+        inference_state["consolidated_frame_inds"]["non_cond_frame_outputs"].clear()
+        inference_state["tracking_has_started"] = False
+        inference_state["frames_already_tracked"].clear()
+        inference_state["first_ann_frame_idx"] = None
+
+    def _get_image_feature(self, inference_state, frame_idx, batch_size):
+        """Compute the image features on a given frame."""
+        # Look up in the cache
+        image, backbone_out = inference_state["cached_features"].get(
+            frame_idx, (None, None)
+        )
+        if backbone_out is None:
+            if self.backbone is None:
+                raise RuntimeError(
+                    f"Image features for frame {frame_idx} are not cached. "
+                    "Please run inference on this frame first."
+                )
+            else:
+                # Cache miss -- we will run inference on a single image
+                image = inference_state["images"][frame_idx].cuda().float().unsqueeze(0)
+                backbone_out = self.forward_image(image)
+                # Cache the most recent frame's feature (for repeated interactions with
+                # a frame; we can use an LRU cache for more frames in the future).
+                inference_state["cached_features"] = {frame_idx: (image, backbone_out)}
+        if "tracker_backbone_out" in backbone_out:
+            backbone_out = backbone_out["tracker_backbone_out"]  # get backbone output
+
+        # expand the features to have the same dimension as the number of objects
+        expanded_image = image.expand(batch_size, -1, -1, -1)
+        expanded_backbone_out = {
+            "backbone_fpn": backbone_out["backbone_fpn"].copy(),
+            "vision_pos_enc": backbone_out["vision_pos_enc"].copy(),
+        }
+        for i, feat in enumerate(expanded_backbone_out["backbone_fpn"]):
+            feat = feat.expand(batch_size, -1, -1, -1)
+            expanded_backbone_out["backbone_fpn"][i] = feat
+        for i, pos in enumerate(expanded_backbone_out["vision_pos_enc"]):
+            pos = pos.expand(batch_size, -1, -1, -1)
+            expanded_backbone_out["vision_pos_enc"][i] = pos
+
+        features = self._prepare_backbone_features(expanded_backbone_out)
+        features = (expanded_image,) + features
+        return features
+
+    def _run_single_frame_inference(
+        self,
+        inference_state,
+        output_dict,
+        frame_idx,
+        batch_size,
+        is_init_cond_frame,
+        point_inputs,
+        mask_inputs,
+        reverse,
+        run_mem_encoder,
+        prev_sam_mask_logits=None,
+        use_prev_mem_frame=True,
+    ):
+        """Run tracking on a single frame based on current inputs and previous memory."""
+        # Retrieve correct image features
+        (
+            image,
+            _,
+            current_vision_feats,
+            current_vision_pos_embeds,
+            feat_sizes,
+        ) = self._get_image_feature(inference_state, frame_idx, batch_size)
+
+        # point and mask should not appear as input simultaneously on the same frame
+        assert point_inputs is None or mask_inputs is None
+        current_out = self.track_step(
+            frame_idx=frame_idx,
+            is_init_cond_frame=is_init_cond_frame,
+            current_vision_feats=current_vision_feats,
+            current_vision_pos_embeds=current_vision_pos_embeds,
+            feat_sizes=feat_sizes,
+            image=image,
+            point_inputs=point_inputs,
+            mask_inputs=mask_inputs,
+            output_dict=output_dict,
+            num_frames=inference_state["num_frames"],
+            track_in_reverse=reverse,
+            run_mem_encoder=run_mem_encoder,
+            prev_sam_mask_logits=prev_sam_mask_logits,
+            use_prev_mem_frame=use_prev_mem_frame,
+        )
+
+        # optionally offload the output to CPU memory to save GPU space
+        storage_device = inference_state["storage_device"]
+        maskmem_features = current_out["maskmem_features"]
+        if maskmem_features is not None:
+            maskmem_features = maskmem_features.to(torch.bfloat16)
+            maskmem_features = maskmem_features.to(storage_device, non_blocking=True)
+        pred_masks_gpu = current_out["pred_masks"]
+        pred_masks = pred_masks_gpu.to(storage_device, non_blocking=True)
+        # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
+        maskmem_pos_enc = self._get_maskmem_pos_enc(inference_state, current_out)
+        # object pointer is a small tensor, so we always keep it on GPU memory for fast access
+        obj_ptr = current_out["obj_ptr"]
+        object_score_logits = current_out["object_score_logits"]
+        # make a compact version of this frame's output to reduce the state size
+        compact_current_out = {
+            "maskmem_features": maskmem_features,
+            "maskmem_pos_enc": maskmem_pos_enc,
+            "pred_masks": pred_masks,
+            "obj_ptr": obj_ptr,
+            "object_score_logits": object_score_logits,
+        }
+        if self.use_memory_selection:
+            compact_current_out["iou_score"] = current_out["iou_score"]
+            compact_current_out["eff_iou_score"] = current_out["eff_iou_score"]
+        return compact_current_out, pred_masks_gpu
+
+    def _run_memory_encoder(
+        self,
+        inference_state,
+        frame_idx,
+        batch_size,
+        high_res_masks,
+        object_score_logits,
+        is_mask_from_pts,
+    ):
+        """
+        Run the memory encoder on `high_res_masks`. This is usually after applying
+        non-overlapping constraints to object scores. Since their scores changed, their
+        memory also need to be computed again with the memory encoder.
+        """
+        # Retrieve correct image features
+        image, _, current_vision_feats, _, feat_sizes = self._get_image_feature(
+            inference_state, frame_idx, batch_size
+        )
+        maskmem_features, maskmem_pos_enc = self._encode_new_memory(
+            image=image,
+            current_vision_feats=current_vision_feats,
+            feat_sizes=feat_sizes,
+            pred_masks_high_res=high_res_masks,
+            object_score_logits=object_score_logits,
+            is_mask_from_pts=is_mask_from_pts,
+        )
+
+        # optionally offload the output to CPU memory to save GPU space
+        storage_device = inference_state["storage_device"]
+        maskmem_features = maskmem_features.to(torch.bfloat16)
+        maskmem_features = maskmem_features.to(storage_device, non_blocking=True)
+        # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
+        maskmem_pos_enc = self._get_maskmem_pos_enc(
+            inference_state, {"maskmem_pos_enc": maskmem_pos_enc}
+        )
+        return maskmem_features, maskmem_pos_enc
+
+    def _get_maskmem_pos_enc(self, inference_state, current_out):
+        """
+        `maskmem_pos_enc` is the same across frames and objects, so we cache it as
+        a constant in the inference session to reduce session storage size.
+        """
+        model_constants = inference_state["constants"]
+        # "out_maskmem_pos_enc" should be either a list of tensors or None
+        out_maskmem_pos_enc = current_out["maskmem_pos_enc"]
+        if out_maskmem_pos_enc is not None:
+            if "maskmem_pos_enc" not in model_constants:
+                assert isinstance(out_maskmem_pos_enc, list)
+                # only take the slice for one object, since it's same across objects
+                maskmem_pos_enc = [x[0:1].clone() for x in out_maskmem_pos_enc]
+                model_constants["maskmem_pos_enc"] = maskmem_pos_enc
+            else:
+                maskmem_pos_enc = model_constants["maskmem_pos_enc"]
+            # expand the cached maskmem_pos_enc to the actual batch size
+            batch_size = out_maskmem_pos_enc[0].size(0)
+            expanded_maskmem_pos_enc = [
+                x.expand(batch_size, -1, -1, -1) for x in maskmem_pos_enc
+            ]
+        else:
+            expanded_maskmem_pos_enc = None
+        return expanded_maskmem_pos_enc
+
+    @torch.inference_mode()
+    def remove_object(self, inference_state, obj_id, strict=False, need_output=True):
+        """
+        Remove an object id from the tracking state. If strict is True, we check whether
+        the object id actually exists and raise an error if it doesn't exist.
+        """
+        old_obj_idx_to_rm = inference_state["obj_id_to_idx"].get(obj_id, None)
+        updated_frames = []
+        # Check whether this object_id to remove actually exists and possibly raise an error.
+        if old_obj_idx_to_rm is None:
+            if not strict:
+                return inference_state["obj_ids"], updated_frames
+            raise RuntimeError(
+                f"Cannot remove object id {obj_id} as it doesn't exist. "
+                f"All existing object ids: {inference_state['obj_ids']}."
+            )
+
+        # If this is the only remaining object id, we simply reset the state.
+        if len(inference_state["obj_id_to_idx"]) == 1:
+            self.clear_all_points_in_video(inference_state)
+            return inference_state["obj_ids"], updated_frames
+
+        # There are still remaining objects after removing this object id. In this case,
+        # we need to delete the object storage from inference state tensors.
+        # Step 0: clear the input on those frames where this object id has point or mask input
+        # (note that this step is required as it might downgrade conditioning frames to
+        # non-conditioning ones)
+        obj_input_frames_inds = set()
+        obj_input_frames_inds.update(
+            inference_state["point_inputs_per_obj"][old_obj_idx_to_rm]
+        )
+        obj_input_frames_inds.update(
+            inference_state["mask_inputs_per_obj"][old_obj_idx_to_rm]
+        )
+        for frame_idx in obj_input_frames_inds:
+            self.clear_all_points_in_frame(
+                inference_state, frame_idx, obj_id, need_output=False
+            )
+
+        # Step 1: Update the object id mapping (note that it must be done after Step 0,
+        # since Step 0 still requires the old object id mappings in inference_state)
+        old_obj_ids = inference_state["obj_ids"]
+        old_obj_inds = list(range(len(old_obj_ids)))
+        remain_old_obj_inds = old_obj_inds.copy()
+        remain_old_obj_inds.remove(old_obj_idx_to_rm)
+        new_obj_ids = [old_obj_ids[old_idx] for old_idx in remain_old_obj_inds]
+        new_obj_inds = list(range(len(new_obj_ids)))
+        # build new mappings
+        old_idx_to_new_idx = dict(zip(remain_old_obj_inds, new_obj_inds))
+        inference_state["obj_id_to_idx"] = dict(zip(new_obj_ids, new_obj_inds))
+        inference_state["obj_idx_to_id"] = dict(zip(new_obj_inds, new_obj_ids))
+        inference_state["obj_ids"] = new_obj_ids
+
+        # Step 2: For per-object tensor storage, we shift their obj_idx in the dict keys.
+        # (note that "consolidated_frame_inds" doesn't need to be updated in this step as
+        # it's already handled in Step 0)
+        def _map_keys(container):
+            new_kvs = []
+            for k in old_obj_inds:
+                v = container.pop(k)
+                if k in old_idx_to_new_idx:
+                    new_kvs.append((old_idx_to_new_idx[k], v))
+            container.update(new_kvs)
+
+        _map_keys(inference_state["point_inputs_per_obj"])
+        _map_keys(inference_state["mask_inputs_per_obj"])
+        _map_keys(inference_state["output_dict_per_obj"])
+        _map_keys(inference_state["temp_output_dict_per_obj"])
+
+        # Step 3: For packed tensor storage, we index the remaining ids and rebuild the per-object slices.
+        def _slice_state(output_dict, storage_key):
+            for frame_idx, out in output_dict[storage_key].items():
+                out["maskmem_features"] = out["maskmem_features"][remain_old_obj_inds]
+                out["maskmem_pos_enc"] = [
+                    x[remain_old_obj_inds] for x in out["maskmem_pos_enc"]
+                ]
+                # "maskmem_pos_enc" is the same across frames, so we only need to store one copy of it
+                out["maskmem_pos_enc"] = self._get_maskmem_pos_enc(inference_state, out)
+                out["pred_masks"] = out["pred_masks"][remain_old_obj_inds]
+                out["obj_ptr"] = out["obj_ptr"][remain_old_obj_inds]
+                out["object_score_logits"] = out["object_score_logits"][
+                    remain_old_obj_inds
+                ]
+                if self.use_memory_selection:
+                    out["iou_score"] = out["iou_score"][remain_old_obj_inds]
+                    out["eff_iou_score"] = self.cal_mem_score(
+                        out["object_score_logits"], out["iou_score"]
+                    )  # recalculate the memory frame score
+                # also update the per-object slices
+                self._add_output_per_object(
+                    inference_state, frame_idx, out, storage_key
+                )
+
+        _slice_state(inference_state["output_dict"], "cond_frame_outputs")
+        _slice_state(inference_state["output_dict"], "non_cond_frame_outputs")
+
+        # Step 4: Further collect the outputs on those frames in `obj_input_frames_inds`, which
+        # could show an updated mask for objects previously occluded by the object being removed
+        if need_output:
+            temp_output_dict_per_obj = inference_state["temp_output_dict_per_obj"]
+            for frame_idx in obj_input_frames_inds:
+                is_cond = any(
+                    frame_idx in obj_temp_output_dict["cond_frame_outputs"]
+                    for obj_temp_output_dict in temp_output_dict_per_obj.values()
+                )
+                consolidated_out = self._consolidate_temp_output_across_obj(
+                    inference_state,
+                    frame_idx,
+                    is_cond=is_cond,
+                    run_mem_encoder=False,
+                    consolidate_at_video_res=True,
+                )
+                _, video_res_masks = self._get_orig_video_res_output(
+                    inference_state, consolidated_out["pred_masks_video_res"]
+                )
+                updated_frames.append((frame_idx, video_res_masks))
+
+        return inference_state["obj_ids"], updated_frames
+
+    def _clear_non_cond_mem_around_input(self, inference_state, frame_idx):
+        """
+        Remove the non-conditioning memory around the input frame. When users provide
+        correction clicks, the surrounding frames' non-conditioning memories can still
+        contain outdated object appearance information and could confuse the model.
+
+        This method clears those non-conditioning memories surrounding the interacted
+        frame to avoid giving the model both old and new information about the object.
+        """
+        r = self.memory_temporal_stride_for_eval
+        frame_idx_begin = frame_idx - r * self.num_maskmem
+        frame_idx_end = frame_idx + r * self.num_maskmem
+        batch_size = self._get_obj_num(inference_state)
+        for obj_idx in range(batch_size):
+            obj_output_dict = inference_state["output_dict_per_obj"][obj_idx]
+            non_cond_frame_outputs = obj_output_dict["non_cond_frame_outputs"]
+            for t in range(frame_idx_begin, frame_idx_end + 1):
+                non_cond_frame_outputs.pop(t, None)
+
+    def _suppress_shrinked_masks(
+        self, pred_masks, new_pred_masks, shrink_threshold=0.3
+    ):
+        area_before = (pred_masks > 0).sum(dim=(-1, -2))
+        area_after = (new_pred_masks > 0).sum(dim=(-1, -2))
+        area_before = torch.clamp(area_before, min=1.0)
+        area_ratio = area_after / area_before
+        keep = area_ratio >= shrink_threshold
+        keep_mask = keep[..., None, None].expand_as(pred_masks)
+        pred_masks_after = torch.where(
+            keep_mask, pred_masks, torch.clamp(pred_masks, max=-10.0)
+        )
+        return pred_masks_after
+
+    def _suppress_object_pw_area_shrinkage(self, pred_masks):
+        """
+        This function suppresses masks that shrink in area after applying pixelwise non-overlapping constriants.
+        Note that the final output can still be overlapping.
+        """
+        # Apply pixel-wise non-overlapping constraint based on mask scores
+        pixel_level_non_overlapping_masks = super()._apply_non_overlapping_constraints(
+            pred_masks
+        )
+        # Fully suppress masks with high shrinkage (probably noisy) based on the pixel wise non-overlapping constraints
+        # NOTE: The output of this function can be a no op if none of the masks shrinked by a large factor.
+        pred_masks = self._suppress_shrinked_masks(
+            pred_masks, pixel_level_non_overlapping_masks
+        )
+        return pred_masks
+
+    def _apply_object_wise_non_overlapping_constraints(
+        self, pred_masks, obj_scores, background_value=-10.0
+    ):
+        """
+        Applies non-overlapping constraints object wise (i.e. only one object can claim the overlapping region)
+        """
+        # Replace pixel scores with object scores
+        pred_masks_single_score = torch.where(
+            pred_masks > 0, obj_scores[..., None, None], background_value
+        )
+        # Apply pixel-wise non-overlapping constraint based on mask scores
+        pixel_level_non_overlapping_masks = super()._apply_non_overlapping_constraints(
+            pred_masks_single_score
+        )
+        # Replace object scores with pixel scores. Note, that now only one object can claim the overlapping region
+        pred_masks = torch.where(
+            pixel_level_non_overlapping_masks > 0,
+            pred_masks,
+            torch.clamp(pred_masks, max=background_value),
+        )
+        return pred_masks
diff --git a/sam3/model/sam3_video_base.py b/sam3/model/sam3_video_base.py
new file mode 100644
index 0000000000000000000000000000000000000000..e61969f6d581101bb49e8eeb604a0d38d88da5bb
--- /dev/null
+++ b/sam3/model/sam3_video_base.py
@@ -0,0 +1,1767 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import datetime
+import logging
+import math
+import os
+from collections import defaultdict
+from copy import deepcopy
+from enum import Enum
+from typing import Any, Dict, List, Set
+
+import numpy as np
+import numpy.typing as npt
+import torch
+import torch.distributed as dist
+import torch.nn.functional as F
+
+from sam3 import perflib
+from sam3.logger import get_logger
+from sam3.model.box_ops import fast_diag_box_iou
+from sam3.model.data_misc import BatchedDatapoint
+from sam3.model.sam3_tracker_utils import fill_holes_in_mask_scores, mask_to_box
+from sam3.perflib.masks_ops import mask_iou
+from sam3.train.masks_ops import rle_encode
+from torch import nn, Tensor
+
+logger = get_logger(__name__)
+
+
+class MaskletConfirmationStatus(Enum):
+    UNCONFIRMED = 1  # newly added masklet, not confirmed by any detection yet
+    CONFIRMED = 2  # confirmed by at least one detection
+
+
+class Sam3VideoBase(nn.Module):
+    def __init__(
+        self,
+        detector: nn.Module,
+        tracker: nn.Module,
+        # prob threshold for detection outputs -- only keep detections above this threshold
+        # enters NMS and det-to-track matching
+        score_threshold_detection=0.5,
+        # IoU threshold for detection NMS
+        det_nms_thresh=0.0,
+        # IoU threshold for det-to-track matching -- a detection is considered "matched" to a tracklet it
+        # overlaps with a tracklet above this threshold -- it is often a loose threshold like 0.1
+        assoc_iou_thresh=0.5,
+        # IoU threshold for det-to-track matching, which is used to determine whether a masklet is "unmatched"
+        # by any detections -- it is often a stricter threshold like 0.5
+        trk_assoc_iou_thresh=0.5,
+        # prob threshold for a detection to be added as a new object
+        new_det_thresh=0.0,
+        # hotstart parameters: we hold off the outputs for `hotstart_delay` frames and
+        # 1) remove those tracklets unmatched by any detections based on `hotstart_unmatch_thresh`
+        # 2) remove those tracklets overlapping with one another based on `hotstart_dup_thresh`
+        hotstart_delay=0,
+        hotstart_unmatch_thresh=3,
+        hotstart_dup_thresh=3,
+        # Whether to suppress masks only within hotstart. If False, we can suppress masks even if they start before hotstart period.
+        suppress_unmatched_only_within_hotstart=True,
+        init_trk_keep_alive=0,
+        max_trk_keep_alive=8,
+        min_trk_keep_alive=-4,
+        # Threshold for suppressing overlapping objects based on recent occlusion
+        suppress_overlapping_based_on_recent_occlusion_threshold=0.0,
+        decrease_trk_keep_alive_for_empty_masklets=False,
+        o2o_matching_masklets_enable=False,  # Enable hungarian matching to match existing masklets
+        suppress_det_close_to_boundary=False,
+        fill_hole_area=16,
+        # The maximum number of objects (masklets) to track across all GPUs (for no limit, set it to -1)
+        max_num_objects=-1,
+        recondition_every_nth_frame=-1,
+        # masket confirmation status (to suppress unconfirmed masklets)
+        masklet_confirmation_enable=False,
+        # a masklet is confirmed after being consecutively detected and matched for
+        # `masklet_confirmation_consecutive_det_thresh`
+        masklet_confirmation_consecutive_det_thresh=3,
+        # bbox heuristic parameters
+        reconstruction_bbox_iou_thresh=0.0,
+        reconstruction_bbox_det_score=0.0,
+    ):
+        super().__init__()
+        self.detector = detector
+        self.tracker = tracker
+        self.score_threshold_detection = score_threshold_detection
+        self.det_nms_thresh = det_nms_thresh
+        self.assoc_iou_thresh = assoc_iou_thresh
+        self.trk_assoc_iou_thresh = trk_assoc_iou_thresh
+        self.new_det_thresh = new_det_thresh
+
+        # hotstart parameters
+        if hotstart_delay > 0:
+            assert hotstart_unmatch_thresh <= hotstart_delay
+            assert hotstart_dup_thresh <= hotstart_delay
+        self.hotstart_delay = hotstart_delay
+        self.hotstart_unmatch_thresh = hotstart_unmatch_thresh
+        self.hotstart_dup_thresh = hotstart_dup_thresh
+        self.suppress_unmatched_only_within_hotstart = (
+            suppress_unmatched_only_within_hotstart
+        )
+        self.init_trk_keep_alive = init_trk_keep_alive
+        self.max_trk_keep_alive = max_trk_keep_alive
+        self.min_trk_keep_alive = min_trk_keep_alive
+        self.suppress_overlapping_based_on_recent_occlusion_threshold = (
+            suppress_overlapping_based_on_recent_occlusion_threshold
+        )
+        self.suppress_det_close_to_boundary = suppress_det_close_to_boundary
+        self.decrease_trk_keep_alive_for_empty_masklets = (
+            decrease_trk_keep_alive_for_empty_masklets
+        )
+        self.o2o_matching_masklets_enable = o2o_matching_masklets_enable
+        self.fill_hole_area = fill_hole_area
+        self.eval()
+        self.rank = int(os.getenv("RANK", "0"))
+        self.world_size = int(os.getenv("WORLD_SIZE", "1"))
+        self._dist_pg_cpu = None  # CPU process group (lazy-initialized on first use)
+
+        # the maximum object number
+        if max_num_objects > 0:
+            num_obj_for_compile = math.ceil(max_num_objects / self.world_size)
+        else:
+            max_num_objects = 10000  # no limit
+            num_obj_for_compile = 16
+        logger.info(f"setting {max_num_objects=} and {num_obj_for_compile=}")
+        self.max_num_objects = max_num_objects
+        self.num_obj_for_compile = num_obj_for_compile
+        self.recondition_every_nth_frame = recondition_every_nth_frame
+        self.masklet_confirmation_enable = masklet_confirmation_enable
+        self.masklet_confirmation_consecutive_det_thresh = (
+            masklet_confirmation_consecutive_det_thresh
+        )
+        self.reconstruction_bbox_iou_thresh = reconstruction_bbox_iou_thresh
+        self.reconstruction_bbox_det_score = reconstruction_bbox_det_score
+
+    @property
+    def device(self):
+        self._device = getattr(self, "_device", None) or next(self.parameters()).device
+        return self._device
+
+    def _init_dist_pg_cpu(self):
+        # a short 3-min timeout to quickly detect any synchronization failures
+        timeout_sec = int(os.getenv("SAM3_COLLECTIVE_OP_TIMEOUT_SEC", "180"))
+        timeout = datetime.timedelta(seconds=timeout_sec)
+        self._dist_pg_cpu = dist.new_group(backend="gloo", timeout=timeout)
+
+    def broadcast_python_obj_cpu(self, python_obj_list, src):
+        if self._dist_pg_cpu is None:
+            self._init_dist_pg_cpu()
+        dist.broadcast_object_list(python_obj_list, src=src, group=self._dist_pg_cpu)
+
+    def _det_track_one_frame(
+        self,
+        frame_idx: int,
+        num_frames: int,
+        reverse: bool,
+        input_batch: BatchedDatapoint,
+        geometric_prompt: Any,
+        tracker_states_local: List[Any],
+        tracker_metadata_prev: Dict[str, Any],
+        feature_cache: Dict,
+        orig_vid_height: int,
+        orig_vid_width: int,
+        is_image_only: bool = False,
+        allow_new_detections: bool = True,
+    ):
+        """
+        This function handles one-step inference for the DenseTracking model in an SPMD manner.
+        At a high-level, all GPUs execute the same function calls as if it's done on a single GPU,
+        while under the hood, some function calls involve distributed computation based on sharded
+        SAM2 states.
+
+        - `input_batch` contains image and other inputs on the entire video; it should be identical across GPUs
+        - `tracker_states_local` holds the local masklet information in this GPU shard
+        - `tracker_metadata_prev` manages the metadata for SAM2 objects, such as which masklet is hold on which GPUs
+          it contains both global and local masklet information
+        """
+
+        # Step 1: run backbone and detector in a distributed manner -- this is done via Sam3ImageOnVideoMultiGPU,
+        # a MultiGPU model (assigned to `self.detector`) that shards frames in a round-robin manner.
+        # It returns a "det_out" dict for `frame_idx` and fills SAM2 backbone features for `frame_idx`
+        # into `feature_cache`. Despite its distributed inference under the hood, the results would be
+        # the same as if it is running backbone and detector for every frame on a single GPU.
+        det_out = self.run_backbone_and_detection(
+            frame_idx=frame_idx,
+            num_frames=num_frames,
+            reverse=reverse,
+            input_batch=input_batch,
+            geometric_prompt=geometric_prompt,
+            feature_cache=feature_cache,
+            allow_new_detections=allow_new_detections,
+        )
+
+        # Step 2: each GPU propagates its local SAM2 states to get the SAM2 prediction masks.
+        # the returned `tracker_low_res_masks_global` contains the concatenated masklet predictions
+        # gathered from all GPUs (as if they are propagated on a single GPU). Note that this step only
+        # runs the SAM2 propagation step, but doesn't encode new memory for the predicted masks;
+        # we defer memory encoding to `run_tracker_update_execution_phase` after resolving all heuristics.
+        if tracker_metadata_prev == {}:
+            # initialize masklet metadata if it's uninitialized (empty dict)
+            tracker_metadata_prev.update(self._initialize_metadata())
+        tracker_low_res_masks_global, tracker_obj_scores_global = (
+            self.run_tracker_propagation(
+                frame_idx=frame_idx,
+                num_frames=num_frames,
+                reverse=reverse,
+                tracker_states_local=tracker_states_local,
+                tracker_metadata_prev=tracker_metadata_prev,
+            )
+        )
+
+        # Step 3: based on detection outputs and the propagated SAM2 prediction masks, we make plans
+        # for SAM2 masklet updates (i.e. which objects to add and remove, how to load-balance them, etc).
+        # We also run SAM2 memory encoder globally in this step to resolve non-overlapping constraints.
+        # **This step should involve all the heuristics needed for any updates.** Most of the update
+        # planning will be done on the master rank (GPU 0) and the resulting plan `tracker_update_plan` is
+        # broadcasted to other GPUs (to be executed in a distributed manner). This step also generates the
+        # new masklet metadata `tracker_metadata_new` (based on its previous version `tracker_metadata_prev`).
+        tracker_update_plan, tracker_metadata_new = (
+            self.run_tracker_update_planning_phase(
+                frame_idx=frame_idx,
+                num_frames=num_frames,
+                reverse=reverse,
+                det_out=det_out,
+                tracker_low_res_masks_global=tracker_low_res_masks_global,
+                tracker_obj_scores_global=tracker_obj_scores_global,
+                tracker_metadata_prev=tracker_metadata_prev,
+                tracker_states_local=tracker_states_local,
+                is_image_only=is_image_only,
+            )
+        )
+
+        # Get reconditioning info from the update plan
+        reconditioned_obj_ids = tracker_update_plan.get("reconditioned_obj_ids", set())
+        det_to_matched_trk_obj_ids = tracker_update_plan.get(
+            "det_to_matched_trk_obj_ids", {}
+        )
+
+        # Step 4: based on `tracker_update_plan`, each GPU executes the update w.r.t. its local SAM2 inference states
+        tracker_states_local_new = self.run_tracker_update_execution_phase(
+            frame_idx=frame_idx,
+            num_frames=num_frames,
+            reverse=reverse,
+            det_out=det_out,
+            tracker_states_local=tracker_states_local,
+            tracker_update_plan=tracker_update_plan,
+            orig_vid_height=orig_vid_height,
+            orig_vid_width=orig_vid_width,
+            feature_cache=feature_cache,
+        )
+
+        # Step 5: finally, build the outputs for this frame (it only needs to be done on GPU 0 since
+        # only GPU 0 will send outputs to the server).
+        if self.rank == 0:
+            obj_id_to_mask = self.build_outputs(
+                frame_idx=frame_idx,
+                num_frames=num_frames,
+                reverse=reverse,
+                det_out=det_out,
+                tracker_low_res_masks_global=tracker_low_res_masks_global,
+                tracker_obj_scores_global=tracker_obj_scores_global,
+                tracker_metadata_prev=tracker_metadata_prev,
+                tracker_update_plan=tracker_update_plan,
+                orig_vid_height=orig_vid_height,
+                orig_vid_width=orig_vid_width,
+                reconditioned_obj_ids=reconditioned_obj_ids,
+                det_to_matched_trk_obj_ids=det_to_matched_trk_obj_ids,
+            )
+            obj_id_to_score = tracker_metadata_new["obj_id_to_score"]
+        else:
+            obj_id_to_mask, obj_id_to_score = {}, {}  # dummy outputs on other GPUs
+        # a few statistics for the current frame as a part of the output
+        frame_stats = {
+            "num_obj_tracked": np.sum(tracker_metadata_new["num_obj_per_gpu"]),
+            "num_obj_dropped": tracker_update_plan["num_obj_dropped_due_to_limit"],
+        }
+        # add tracker scores to metadata, it should be fired for frames except the first frame
+        if tracker_obj_scores_global.shape[0] > 0:
+            # Convert tracker_obj_scores_global to sigmoid scores before updating
+            tracker_obj_scores_global = tracker_obj_scores_global.sigmoid().tolist()
+            tracker_obj_ids = tracker_metadata_prev["obj_ids_all_gpu"]
+            tracker_metadata_new["obj_id_to_tracker_score_frame_wise"][
+                frame_idx
+            ].update(dict(zip(tracker_obj_ids, tracker_obj_scores_global)))
+        return (
+            obj_id_to_mask,  # a dict: obj_id --> output mask
+            obj_id_to_score,  # a dict: obj_id --> output score (prob)
+            tracker_states_local_new,
+            tracker_metadata_new,
+            frame_stats,
+            tracker_obj_scores_global,  # a dict: obj_id --> tracker frame-level scores
+        )
+
+    def _suppress_detections_close_to_boundary(self, boxes, margin=0.025):
+        """
+        Suppress detections too close to image edges (for normalized boxes).
+
+        boxes: (N, 4) in xyxy format, normalized [0,1]
+        margin: fraction of image
+        """
+        x_min, y_min, x_max, y_max = boxes.unbind(-1)
+        x_c = (x_min + x_max) / 2
+        y_c = (y_min + y_max) / 2
+        keep = (
+            (x_c > margin)
+            & (x_c < 1.0 - margin)
+            & (y_c > margin)
+            & (y_c < 1.0 - margin)
+        )
+
+        return keep
+
+    def run_backbone_and_detection(
+        self,
+        frame_idx: int,
+        num_frames: int,
+        input_batch: BatchedDatapoint,
+        geometric_prompt: Any,
+        feature_cache: Dict,
+        reverse: bool,
+        allow_new_detections: bool,
+    ):
+        # Step 1: if text feature is not cached in `feature_cache`, compute and cache it
+        text_batch_key = tuple(input_batch.find_text_batch)
+        if "text" not in feature_cache or text_batch_key not in feature_cache["text"]:
+            text_outputs = self.detector.backbone.forward_text(
+                input_batch.find_text_batch, device=self.device
+            )
+            # note: we only cache the text feature of the most recent prompt
+            feature_cache["text"] = {text_batch_key: text_outputs}
+        else:
+            text_outputs = feature_cache["text"][text_batch_key]
+
+        # Step 2: run backbone, detector, and post-processing with NMS
+        if "multigpu_buffer" not in feature_cache:
+            # "multigpu_buffer" is a buffer cache used by `self.detector` and it needs
+            # to be passed to `forward_video_grounding_multigpu` for every call
+            feature_cache["multigpu_buffer"] = {}
+
+        # Extract max_frame_num_to_track from feature_cache if available
+        tracking_bounds = feature_cache.get("tracking_bounds", {})
+        max_frame_num_to_track = tracking_bounds.get("max_frame_num_to_track")
+        start_frame_idx = tracking_bounds.get("propagate_in_video_start_frame_idx")
+
+        sam3_image_out, _ = self.detector.forward_video_grounding_multigpu(
+            backbone_out={
+                "img_batch_all_stages": input_batch.img_batch,
+                **text_outputs,
+            },
+            find_inputs=input_batch.find_inputs,
+            geometric_prompt=geometric_prompt,
+            frame_idx=frame_idx,
+            num_frames=num_frames,
+            multigpu_buffer=feature_cache["multigpu_buffer"],
+            track_in_reverse=reverse,
+            # also get the SAM2 backbone features
+            return_tracker_backbone_feats=True,
+            # run NMS as a part of distributed computation
+            run_nms=self.det_nms_thresh > 0.0,
+            nms_prob_thresh=self.score_threshold_detection,
+            nms_iou_thresh=self.det_nms_thresh,
+            # pass max_frame_num_to_track to respect tracking limits
+            max_frame_num_to_track=max_frame_num_to_track,
+            propagate_in_video_start_frame_idx=start_frame_idx,
+        )
+        # note: detections in `sam3_image_out` has already gone through NMS
+        pred_probs = sam3_image_out["pred_logits"].squeeze(-1).sigmoid()
+        if not allow_new_detections:
+            pred_probs = pred_probs - 1e8  # make sure no detections are kept
+        pred_boxes_xyxy = sam3_image_out["pred_boxes_xyxy"]
+        pred_masks = sam3_image_out["pred_masks"]
+        # get the positive detection outputs above threshold
+        pos_pred_idx = torch.where(pred_probs > self.score_threshold_detection)
+        det_out = {
+            "bbox": pred_boxes_xyxy[pos_pred_idx[0], pos_pred_idx[1]],
+            "mask": pred_masks[pos_pred_idx[0], pos_pred_idx[1]],
+            "scores": pred_probs[pos_pred_idx[0], pos_pred_idx[1]],
+        }
+
+        # Step 3: build SAM2 backbone features and store them in `feature_cache`
+        backbone_cache = {}
+        sam_mask_decoder = self.tracker.sam_mask_decoder
+        tracker_backbone_fpn = [
+            sam_mask_decoder.conv_s0(sam3_image_out["tracker_backbone_fpn_0"]),
+            sam_mask_decoder.conv_s1(sam3_image_out["tracker_backbone_fpn_1"]),
+            sam3_image_out["tracker_backbone_fpn_2"],  # fpn_2 doesn't need conv
+        ]
+        tracker_backbone_out = {
+            "vision_features": tracker_backbone_fpn[-1],  # top-level feature
+            "vision_pos_enc": sam3_image_out["tracker_backbone_pos_enc"],
+            "backbone_fpn": tracker_backbone_fpn,
+        }
+        backbone_cache["tracker_backbone_out"] = tracker_backbone_out
+        feature_cache[frame_idx] = (
+            input_batch.img_batch[frame_idx],
+            backbone_cache,
+        )
+        # remove from `feature_cache` old features to save GPU memory
+        feature_cache.pop(frame_idx - 1 if not reverse else frame_idx + 1, None)
+        return det_out
+
+    def run_tracker_propagation(
+        self,
+        frame_idx: int,
+        num_frames: int,
+        reverse: bool,
+        tracker_states_local: List[Any],
+        tracker_metadata_prev: Dict[str, npt.NDArray],
+    ):
+        # Step 1: propagate the local SAM2 states to get the current frame's prediction
+        # `low_res_masks_local` of the existing masklets on this GPU
+        # - obj_ids_local: List[int] -- list of object IDs
+        # - low_res_masks_local: Tensor -- (num_local_obj, H_mask, W_mask)
+        obj_ids_local, low_res_masks_local, obj_scores_local = (
+            self._propogate_tracker_one_frame_local_gpu(
+                tracker_states_local, frame_idx=frame_idx, reverse=reverse
+            )
+        )
+
+        assert np.all(
+            obj_ids_local == tracker_metadata_prev["obj_ids_per_gpu"][self.rank]
+        ), "{} != {}".format(
+            obj_ids_local, tracker_metadata_prev["obj_ids_per_gpu"][self.rank]
+        )
+
+        # Step 2: all-gather `low_res_masks_local` into `low_res_masks_global`
+        # - low_res_masks_global: Tensor -- (num_global_obj, H_mask, W_mask)
+        _, H_mask, W_mask = low_res_masks_local.shape
+        if self.world_size > 1:
+            # `low_res_masks_local` and `obj_scores_local` need to be contiguous and float32
+            # (they could be non-contiguous due to slicing and/or bfloat16 due to autocast)
+            low_res_masks_local = low_res_masks_local.float().contiguous()
+            obj_scores_local = obj_scores_local.float().contiguous()
+            num_obj_this_gpu = tracker_metadata_prev["num_obj_per_gpu"][self.rank]
+            assert low_res_masks_local.size(0) == num_obj_this_gpu
+            assert obj_scores_local.size(0) == num_obj_this_gpu
+            low_res_masks_peers = [
+                low_res_masks_local.new_empty(num_obj, H_mask, W_mask)
+                for num_obj in tracker_metadata_prev["num_obj_per_gpu"]
+            ]
+            obj_scores_peers = [
+                obj_scores_local.new_empty(num_obj)
+                for num_obj in tracker_metadata_prev["num_obj_per_gpu"]
+            ]
+            dist.all_gather(low_res_masks_peers, low_res_masks_local)
+            dist.all_gather(obj_scores_peers, obj_scores_local)
+            low_res_masks_global = torch.cat(low_res_masks_peers, dim=0)
+            obj_scores_global = torch.cat(obj_scores_peers, dim=0)
+        else:
+            low_res_masks_global = low_res_masks_local
+            obj_scores_global = obj_scores_local
+        return low_res_masks_global, obj_scores_global
+
+    def _recondition_masklets(
+        self,
+        frame_idx,
+        det_out: Dict[str, Tensor],
+        trk_id_to_max_iou_high_conf_det: List[int],
+        tracker_states_local: List[Any],
+        tracker_metadata: Dict[str, npt.NDArray],
+        tracker_obj_scores_global: Tensor,
+    ):
+        # Recondition the masklets based on the new detections
+        for trk_obj_id, det_idx in trk_id_to_max_iou_high_conf_det.items():
+            new_mask = det_out["mask"][det_idx : det_idx + 1]
+            input_mask_res = self.tracker.input_mask_size
+            new_mask_binary = (
+                F.interpolate(
+                    new_mask.unsqueeze(1),
+                    size=(input_mask_res, input_mask_res),
+                    mode="bilinear",
+                    align_corners=False,
+                ).squeeze(1)[0]
+                > 0
+            )
+            HIGH_CONF_THRESH = 0.8
+            reconditioned_states_idx = set()
+            obj_idx = np.where(tracker_metadata["obj_ids_all_gpu"] == trk_obj_id)[
+                0
+            ].item()
+            obj_score = tracker_obj_scores_global[obj_idx]
+            for state_idx, inference_state in enumerate(tracker_states_local):
+                if (
+                    trk_obj_id in inference_state["obj_ids"]
+                    # NOTE: Goal of this condition is to avoid reconditioning masks that are occluded/low qualiy.
+                    # Unfortunately, these can get reconditioned anyway due to batching. We should consider removing these heuristics.
+                    and obj_score > HIGH_CONF_THRESH
+                ):
+                    logger.debug(
+                        f"Adding new mask for track {trk_obj_id} at frame {frame_idx}. Objects {inference_state['obj_ids']} are all reconditioned."
+                    )
+                    self.tracker.add_new_mask(
+                        inference_state=inference_state,
+                        frame_idx=frame_idx,
+                        obj_id=trk_obj_id,
+                        mask=new_mask_binary,
+                    )
+                    reconditioned_states_idx.add(state_idx)
+
+            for idx in reconditioned_states_idx:
+                self.tracker.propagate_in_video_preflight(
+                    tracker_states_local[idx], run_mem_encoder=True
+                )
+        return tracker_states_local
+
+    def run_tracker_update_planning_phase(
+        self,
+        frame_idx: int,
+        num_frames: int,
+        reverse: bool,
+        det_out: Dict[str, Tensor],
+        tracker_low_res_masks_global: Tensor,
+        tracker_obj_scores_global: Tensor,
+        tracker_metadata_prev: Dict[str, npt.NDArray],
+        tracker_states_local: List[Any],
+        is_image_only: bool = False,
+    ):
+        # initialize new metadata from previous metadata (its values will be updated later)
+        tracker_metadata_new = {
+            "obj_ids_per_gpu": deepcopy(tracker_metadata_prev["obj_ids_per_gpu"]),
+            "obj_ids_all_gpu": None,  # will be filled later
+            "num_obj_per_gpu": deepcopy(tracker_metadata_prev["num_obj_per_gpu"]),
+            "obj_id_to_score": deepcopy(tracker_metadata_prev["obj_id_to_score"]),
+            "obj_id_to_tracker_score_frame_wise": deepcopy(
+                tracker_metadata_prev["obj_id_to_tracker_score_frame_wise"]
+            ),
+            "obj_id_to_last_occluded": {},  # will be filled later
+            "max_obj_id": deepcopy(tracker_metadata_prev["max_obj_id"]),
+        }
+
+        # Initialize reconditioned_obj_ids early to avoid UnboundLocalError
+        reconditioned_obj_ids = set()
+
+        # Step 1: make the update plan and resolve heuristics on GPU 0
+        det_mask_preds: Tensor = det_out["mask"]  # low-res mask logits
+        det_scores_np: npt.NDArray = det_out["scores"].float().cpu().numpy()
+        det_bbox_xyxy: Tensor = det_out["bbox"]
+        if self.rank == 0:
+            # a) match detector and tracker masks and find new objects
+            (
+                new_det_fa_inds,
+                unmatched_trk_obj_ids,
+                det_to_matched_trk_obj_ids,
+                trk_id_to_max_iou_high_conf_det,
+                empty_trk_obj_ids,
+            ) = self._associate_det_trk(
+                det_masks=det_mask_preds,
+                det_scores_np=det_scores_np,
+                trk_masks=tracker_low_res_masks_global,
+                trk_obj_ids=tracker_metadata_prev["obj_ids_all_gpu"],
+            )
+            if self.suppress_det_close_to_boundary:
+                keep = self._suppress_detections_close_to_boundary(
+                    det_bbox_xyxy[new_det_fa_inds]
+                )
+                new_det_fa_inds = new_det_fa_inds[keep.cpu().numpy()]
+
+            # check whether we've hit the maximum number of objects we can track (and if so, drop some detections)
+            prev_obj_num = np.sum(tracker_metadata_prev["num_obj_per_gpu"])
+            new_det_num = len(new_det_fa_inds)
+            num_obj_dropped_due_to_limit = 0
+            if not is_image_only and prev_obj_num + new_det_num > self.max_num_objects:
+                logger.warning(
+                    f"hitting {self.max_num_objects=} with {new_det_num=} and {prev_obj_num=}"
+                )
+                new_det_num_to_keep = self.max_num_objects - prev_obj_num
+                num_obj_dropped_due_to_limit = new_det_num - new_det_num_to_keep
+                new_det_fa_inds = self._drop_new_det_with_obj_limit(
+                    new_det_fa_inds, det_scores_np, new_det_num_to_keep
+                )
+                assert len(new_det_fa_inds) == new_det_num_to_keep
+                new_det_num = len(new_det_fa_inds)
+
+            # assign object IDs to new detections and decide which GPU to place them
+            new_det_start_obj_id = tracker_metadata_prev["max_obj_id"] + 1
+            new_det_obj_ids = new_det_start_obj_id + np.arange(new_det_num)
+            prev_workload_per_gpu = tracker_metadata_prev["num_obj_per_gpu"]
+            new_det_gpu_ids = self._assign_new_det_to_gpus(
+                new_det_num=new_det_num,
+                prev_workload_per_gpu=prev_workload_per_gpu,
+            )
+
+            # b) handle hotstart heuristics to remove objects
+            # here `rank0_metadata` contains metadata stored on (and only accessible to) GPU 0;
+            # we avoid broadcasting them to other GPUs to save communication cost, assuming
+            # that `rank0_metadata` is not needed by other GPUs
+            rank0_metadata_new = deepcopy(tracker_metadata_prev["rank0_metadata"])
+            if not hasattr(self, "_warm_up_complete") or self._warm_up_complete:
+                obj_ids_newly_removed, rank0_metadata_new = self._process_hotstart(
+                    frame_idx=frame_idx,
+                    num_frames=num_frames,
+                    reverse=reverse,
+                    det_to_matched_trk_obj_ids=det_to_matched_trk_obj_ids,
+                    new_det_obj_ids=new_det_obj_ids,
+                    empty_trk_obj_ids=empty_trk_obj_ids,
+                    unmatched_trk_obj_ids=unmatched_trk_obj_ids,
+                    rank0_metadata=rank0_metadata_new,
+                    tracker_metadata=tracker_metadata_prev,
+                )
+            else:
+                # if warm-up is not complete, we don't remove any objects
+                obj_ids_newly_removed = set()
+            tracker_metadata_new["rank0_metadata"] = rank0_metadata_new
+
+        # Step 2: broadcast the update plan to other GPUs
+        NUM_BROADCAST_ITEMS = 9
+        if self.rank == 0 and self.world_size > 1:
+            # `num_obj_per_gpu_on_rank0` is used for metadata consistency check on other GPUs
+            # (it's a small array with length==self.world_size, so broadcasting it is cheap)
+            num_obj_per_gpu_on_rank0 = tracker_metadata_prev["num_obj_per_gpu"]
+            update_plan = [
+                new_det_fa_inds,
+                new_det_obj_ids,
+                new_det_gpu_ids,
+                num_obj_per_gpu_on_rank0,
+                unmatched_trk_obj_ids,
+                det_to_matched_trk_obj_ids,
+                obj_ids_newly_removed,
+                num_obj_dropped_due_to_limit,
+                trk_id_to_max_iou_high_conf_det,
+            ]
+            assert (
+                len(update_plan) == NUM_BROADCAST_ITEMS
+            ), f"Manually update NUM_BROADCAST_ITEMS to be: {len(update_plan)}"
+            self.broadcast_python_obj_cpu(update_plan, src=0)
+        elif self.rank > 0 and self.world_size > 1:
+            update_plan = [
+                None
+            ] * NUM_BROADCAST_ITEMS  # other ranks receive the plan from rank 0
+            self.broadcast_python_obj_cpu(update_plan, src=0)
+            (
+                new_det_fa_inds,
+                new_det_obj_ids,
+                new_det_gpu_ids,
+                num_obj_per_gpu_on_rank0,
+                unmatched_trk_obj_ids,
+                det_to_matched_trk_obj_ids,
+                obj_ids_newly_removed,
+                num_obj_dropped_due_to_limit,
+                trk_id_to_max_iou_high_conf_det,
+            ) = update_plan
+            # metadata consistency check: verify that the received `num_obj_per_gpu_on_rank0` is consistent with the local metadata
+            # it's critical that all GPUs agree on the previous number of objects (otherwise the inference might hang or fail silently)
+            if not np.all(
+                num_obj_per_gpu_on_rank0 == tracker_metadata_prev["num_obj_per_gpu"]
+            ):
+                raise RuntimeError(
+                    f"{self.rank=} received {num_obj_per_gpu_on_rank0=}, which is inconsistent with local record "
+                    f"{tracker_metadata_prev['num_obj_per_gpu']=}. There's likely a bug in update planning or execution."
+                )
+
+        # `tracker_update_plan` should be identical on all GPUs after broadcasting
+        tracker_update_plan = {
+            "new_det_fa_inds": new_det_fa_inds,  # npt.NDArray
+            "new_det_obj_ids": new_det_obj_ids,  # npt.NDArray
+            "new_det_gpu_ids": new_det_gpu_ids,  # npt.NDArray
+            "unmatched_trk_obj_ids": unmatched_trk_obj_ids,  # npt.NDArray
+            "det_to_matched_trk_obj_ids": det_to_matched_trk_obj_ids,  # dict
+            "obj_ids_newly_removed": obj_ids_newly_removed,  # set
+            "num_obj_dropped_due_to_limit": num_obj_dropped_due_to_limit,  # int
+            "trk_id_to_max_iou_high_conf_det": trk_id_to_max_iou_high_conf_det,  # dict
+            "reconditioned_obj_ids": reconditioned_obj_ids,  # set
+        }
+
+        # Step 3 (optional): recondition masklets based on high-confidence detections before memory encoding
+        # NOTE: Running this in execution phase (after memory encoding) can lead to suboptimal results
+        should_recondition_iou = False
+
+        # Evaluate tracklets for reconditioning based on bbox IoU mismatch with detections
+        if (
+            self.reconstruction_bbox_iou_thresh > 0
+            and len(trk_id_to_max_iou_high_conf_det) > 0
+        ):
+            for trk_obj_id, det_idx in trk_id_to_max_iou_high_conf_det.items():
+                det_box = det_out["bbox"][det_idx]
+                det_score = det_out["scores"][det_idx]
+
+                try:
+                    trk_idx = list(tracker_metadata_prev["obj_ids_all_gpu"]).index(
+                        trk_obj_id
+                    )
+                except ValueError:
+                    continue  # Skip if tracklet not found
+
+                tracker_mask = tracker_low_res_masks_global[trk_idx]
+                mask_binary = tracker_mask > 0
+                mask_area = mask_binary.sum().item()
+
+                if mask_area == 0:
+                    continue  # Skip tracklets with zero mask area
+
+                # Get bounding box from SAM2 mask and convert to normalized coordinates
+                tracker_box_pixels = (
+                    mask_to_box(mask_binary.unsqueeze(0).unsqueeze(0))
+                    .squeeze(0)
+                    .squeeze(0)
+                )
+                mask_height, mask_width = tracker_mask.shape[-2:]
+                tracker_box_normalized = torch.tensor(
+                    [
+                        tracker_box_pixels[0] / mask_width,
+                        tracker_box_pixels[1] / mask_height,
+                        tracker_box_pixels[2] / mask_width,
+                        tracker_box_pixels[3] / mask_height,
+                    ],
+                    device=tracker_box_pixels.device,
+                )
+
+                # Compute IoU between detection and SAM2 tracklet bounding boxes
+                det_box_batch = det_box.unsqueeze(0)
+                tracker_box_batch = tracker_box_normalized.unsqueeze(0)
+                iou = fast_diag_box_iou(det_box_batch, tracker_box_batch)[0]
+
+                if (
+                    iou < self.reconstruction_bbox_iou_thresh
+                    and det_score >= self.reconstruction_bbox_det_score
+                ):
+                    should_recondition_iou = True
+                    reconditioned_obj_ids.add(trk_obj_id)
+
+        should_recondition_periodic = (
+            self.recondition_every_nth_frame > 0
+            and frame_idx % self.recondition_every_nth_frame == 0
+            and len(trk_id_to_max_iou_high_conf_det) > 0
+        )
+
+        # Recondition if periodic or IoU condition met
+        if should_recondition_periodic or should_recondition_iou:
+            self._recondition_masklets(
+                frame_idx,
+                det_out,
+                trk_id_to_max_iou_high_conf_det,
+                tracker_states_local,
+                tracker_metadata_prev,
+                tracker_obj_scores_global,
+            )
+
+        # Step 4: Run SAM2 memory encoder on the current frame's prediction masks
+        # This is done on all GPUs
+        batch_size = tracker_low_res_masks_global.size(0)
+        if batch_size > 0:
+            if not hasattr(self, "_warm_up_complete") or self._warm_up_complete:
+                if self.suppress_overlapping_based_on_recent_occlusion_threshold > 0.0:
+                    # NOTE: tracker_low_res_masks_global is updated in-place then returned
+                    tracker_low_res_masks_global = (
+                        self._suppress_overlapping_based_on_recent_occlusion(
+                            frame_idx,
+                            tracker_low_res_masks_global,
+                            tracker_metadata_prev,
+                            tracker_metadata_new,
+                            obj_ids_newly_removed,
+                            reverse,
+                        )
+                    )
+
+            self._tracker_update_memories(
+                tracker_states_local,
+                frame_idx,
+                tracker_metadata=tracker_metadata_prev,
+                low_res_masks=tracker_low_res_masks_global,
+            )
+
+        # Step 4: update the SAM2 metadata based on the update plan
+        # note: except for "rank0_metadata" (that is only available on GPU 0),
+        # the updated `tracker_metadata_new` should be identical on all GPUs
+        for rank in range(self.world_size):
+            new_det_obj_ids_this_gpu = new_det_obj_ids[new_det_gpu_ids == rank]
+            updated_obj_ids_this_gpu = tracker_metadata_new["obj_ids_per_gpu"][rank]
+            if len(new_det_obj_ids_this_gpu) > 0:
+                updated_obj_ids_this_gpu = np.concatenate(
+                    [updated_obj_ids_this_gpu, new_det_obj_ids_this_gpu]
+                )
+            if len(obj_ids_newly_removed) > 0:
+                is_removed = np.isin(
+                    updated_obj_ids_this_gpu, list(obj_ids_newly_removed)
+                )
+                updated_obj_ids_this_gpu = updated_obj_ids_this_gpu[~is_removed]
+            tracker_metadata_new["obj_ids_per_gpu"][rank] = updated_obj_ids_this_gpu
+            tracker_metadata_new["num_obj_per_gpu"][rank] = len(
+                updated_obj_ids_this_gpu
+            )
+        tracker_metadata_new["obj_ids_all_gpu"] = np.concatenate(
+            tracker_metadata_new["obj_ids_per_gpu"]
+        )
+        # update object scores and the maximum object ID assigned so far
+        if len(new_det_obj_ids) > 0:
+            tracker_metadata_new["obj_id_to_score"].update(
+                zip(new_det_obj_ids, det_scores_np[new_det_fa_inds])
+            )
+            # tracker scores are not available for new objects, use det score instead.
+            tracker_metadata_new["obj_id_to_tracker_score_frame_wise"][
+                frame_idx
+            ].update(zip(new_det_obj_ids, det_scores_np[new_det_fa_inds]))
+            tracker_metadata_new["max_obj_id"] = max(
+                tracker_metadata_new["max_obj_id"],
+                np.max(new_det_obj_ids),
+            )
+        # for removed objects, we set their scores to a very low value (-1e4) but still
+        # keep them in "obj_id_to_score" (it's easier to handle outputs this way)
+        for obj_id in obj_ids_newly_removed:
+            tracker_metadata_new["obj_id_to_score"][obj_id] = -1e4
+            tracker_metadata_new["obj_id_to_tracker_score_frame_wise"][frame_idx][
+                obj_id
+            ] = -1e4
+            tracker_metadata_new["obj_id_to_last_occluded"].pop(obj_id, None)
+        # check that "rank0_metadata" is in tracker_metadata_new if and only if it's GPU 0
+        assert ("rank0_metadata" in tracker_metadata_new) == (self.rank == 0)
+        if self.rank == 0 and self.masklet_confirmation_enable:
+            rank0_metadata = self.update_masklet_confirmation_status(
+                rank0_metadata=tracker_metadata_new["rank0_metadata"],
+                obj_ids_all_gpu_prev=tracker_metadata_prev["obj_ids_all_gpu"],
+                obj_ids_all_gpu_updated=tracker_metadata_new["obj_ids_all_gpu"],
+                det_to_matched_trk_obj_ids=det_to_matched_trk_obj_ids,
+                new_det_obj_ids=new_det_obj_ids,
+            )
+            tracker_metadata_new["rank0_metadata"] = rank0_metadata
+
+        return tracker_update_plan, tracker_metadata_new
+
+    def _suppress_overlapping_based_on_recent_occlusion(
+        self,
+        frame_idx: int,
+        tracker_low_res_masks_global: Tensor,
+        tracker_metadata_prev: Dict[str, Any],
+        tracker_metadata_new: Dict[str, Any],
+        obj_ids_newly_removed: Set[int],
+        reverse: bool = False,
+    ):
+        """
+        Suppress overlapping masks based on the most recent occlusion information. If an object is removed by hotstart, we always suppress it if it overlaps with any other object.
+        Args:
+            frame_idx (int): The current frame index.
+            tracker_low_res_masks_global (Tensor): The low-resolution masks for the current frame.
+            tracker_metadata_prev (Dict[str, Any]): The metadata from the previous frame.
+            tracker_metadata_new (Dict[str, Any]): The metadata for the current frame.
+            obj_ids_newly_removed (Set[int]): The object IDs that have been removed.
+        Return:
+            Tensor: The updated low-resolution masks with some objects suppressed.
+        """
+        obj_ids_global = tracker_metadata_prev["obj_ids_all_gpu"]
+        binary_tracker_low_res_masks_global = tracker_low_res_masks_global > 0
+        batch_size = tracker_low_res_masks_global.size(0)
+        if batch_size > 0:
+            assert (
+                len(obj_ids_global) == batch_size
+            ), f"Mismatch in number of objects: {len(obj_ids_global)} vs {batch_size}"
+            NEVER_OCCLUDED = -1
+            ALWAYS_OCCLUDED = 100000  # This value should be larger than any possible frame index, indicates that the object was removed by hotstart logic
+            last_occluded_prev = torch.cat(
+                [
+                    tracker_metadata_prev["obj_id_to_last_occluded"].get(
+                        obj_id,
+                        torch.full(
+                            (1,),
+                            fill_value=(
+                                NEVER_OCCLUDED
+                                if obj_id not in obj_ids_newly_removed
+                                else ALWAYS_OCCLUDED
+                            ),
+                            device=binary_tracker_low_res_masks_global.device,
+                            dtype=torch.long,
+                        ),
+                    )
+                    for obj_id in obj_ids_global
+                ],
+                dim=0,
+            )
+            to_suppress = self._get_objects_to_suppress_based_on_most_recently_occluded(
+                binary_tracker_low_res_masks_global,
+                last_occluded_prev,
+                obj_ids_global,
+                frame_idx,
+                reverse,
+            )
+
+            # Update metadata with occlusion information
+            is_obj_occluded = ~(binary_tracker_low_res_masks_global.any(dim=(-1, -2)))
+            is_obj_occluded_or_suppressed = is_obj_occluded | to_suppress
+            last_occluded_new = last_occluded_prev.clone()
+            last_occluded_new[is_obj_occluded_or_suppressed] = frame_idx
+            # Slice out the last occluded frame for each object
+            tracker_metadata_new["obj_id_to_last_occluded"] = {
+                obj_id: last_occluded_new[obj_idx : obj_idx + 1]
+                for obj_idx, obj_id in enumerate(obj_ids_global)
+            }
+
+            # Zero out suppressed masks before memory encoding
+            NO_OBJ_LOGIT = -10
+            tracker_low_res_masks_global[to_suppress] = NO_OBJ_LOGIT
+
+        return tracker_low_res_masks_global
+
+    def run_tracker_update_execution_phase(
+        self,
+        frame_idx: int,
+        num_frames: int,
+        reverse: bool,
+        det_out: Dict[str, Tensor],
+        tracker_states_local: List[Any],
+        tracker_update_plan: Dict[str, npt.NDArray],
+        orig_vid_height: int,
+        orig_vid_width: int,
+        feature_cache: Dict,
+    ):
+        # initialize tracking scores with detection scores
+        new_det_fa_inds: npt.NDArray = tracker_update_plan["new_det_fa_inds"]
+        new_det_obj_ids: npt.NDArray = tracker_update_plan["new_det_obj_ids"]
+        new_det_gpu_ids: npt.NDArray = tracker_update_plan["new_det_gpu_ids"]
+        is_on_this_gpu: npt.NDArray = new_det_gpu_ids == self.rank
+        new_det_obj_ids_local: npt.NDArray = new_det_obj_ids[is_on_this_gpu]
+        new_det_fa_inds_local: npt.NDArray = new_det_fa_inds[is_on_this_gpu]
+        obj_ids_newly_removed: Set[int] = tracker_update_plan["obj_ids_newly_removed"]
+
+        # Step 1: add new objects from the detector to SAM2 inference states
+        if len(new_det_fa_inds_local) > 0:
+            new_det_fa_inds_local_t = torch.from_numpy(new_det_fa_inds_local)
+            new_det_masks: Tensor = det_out["mask"][new_det_fa_inds_local_t]
+            # initialize SAM2 with new object masks
+            tracker_states_local = self._tracker_add_new_objects(
+                frame_idx=frame_idx,
+                num_frames=num_frames,
+                new_obj_ids=new_det_obj_ids_local,
+                new_obj_masks=new_det_masks,
+                tracker_states_local=tracker_states_local,
+                orig_vid_height=orig_vid_height,
+                orig_vid_width=orig_vid_width,
+                feature_cache=feature_cache,
+            )
+
+        # Step 2: remove from SAM2 inference states those objects removed by heuristics
+        if len(obj_ids_newly_removed) > 0:
+            self._tracker_remove_objects(tracker_states_local, obj_ids_newly_removed)
+
+        return tracker_states_local
+
+    def build_outputs(
+        self,
+        frame_idx: int,
+        num_frames: int,
+        reverse: bool,
+        det_out: Dict[str, Tensor],
+        tracker_low_res_masks_global: Tensor,
+        tracker_obj_scores_global: Tensor,
+        tracker_metadata_prev: Dict[str, npt.NDArray],
+        tracker_update_plan: Dict[str, npt.NDArray],
+        orig_vid_height: int,
+        orig_vid_width: int,
+        reconditioned_obj_ids: set = None,
+        det_to_matched_trk_obj_ids: dict = None,
+    ):
+        new_det_fa_inds: npt.NDArray = tracker_update_plan["new_det_fa_inds"]
+        new_det_obj_ids: npt.NDArray = tracker_update_plan["new_det_obj_ids"]
+        obj_id_to_mask = {}  # obj_id --> output mask tensor
+
+        # Part 1: masks from previous SAM2 propagation
+        existing_masklet_obj_ids = tracker_metadata_prev["obj_ids_all_gpu"]
+        existing_masklet_video_res_masks = F.interpolate(
+            tracker_low_res_masks_global.unsqueeze(1),
+            size=(orig_vid_height, orig_vid_width),
+            mode="bilinear",
+            align_corners=False,
+        )  # (num_obj, 1, H_video, W_video)
+        existing_masklet_binary = existing_masklet_video_res_masks > 0
+        assert len(existing_masklet_obj_ids) == len(existing_masklet_binary)
+        for obj_id, mask in zip(existing_masklet_obj_ids, existing_masklet_binary):
+            obj_id_to_mask[obj_id] = mask  # (1, H_video, W_video)
+
+        # Part 2: masks from new detections
+        new_det_fa_inds_t = torch.from_numpy(new_det_fa_inds)
+        new_det_low_res_masks = det_out["mask"][new_det_fa_inds_t].unsqueeze(1)
+        new_det_low_res_masks = fill_holes_in_mask_scores(
+            new_det_low_res_masks,
+            max_area=self.fill_hole_area,
+            fill_holes=True,
+            remove_sprinkles=True,
+        )
+        new_masklet_video_res_masks = F.interpolate(
+            new_det_low_res_masks,
+            size=(orig_vid_height, orig_vid_width),
+            mode="bilinear",
+            align_corners=False,
+        )  # (num_obj, 1, H_video, W_video)
+
+        new_masklet_binary = new_masklet_video_res_masks > 0
+        assert len(new_det_obj_ids) == len(new_masklet_video_res_masks)
+        for obj_id, mask in zip(new_det_obj_ids, new_masklet_binary):
+            obj_id_to_mask[obj_id] = mask  # (1, H_video, W_video)
+
+        # Part 3: Override masks for reconditioned objects using detection masks
+        if reconditioned_obj_ids is not None and len(reconditioned_obj_ids) > 0:
+            trk_id_to_max_iou_high_conf_det = tracker_update_plan.get(
+                "trk_id_to_max_iou_high_conf_det", {}
+            )
+
+            for obj_id in reconditioned_obj_ids:
+                det_idx = trk_id_to_max_iou_high_conf_det.get(obj_id)
+
+                if det_idx is not None:
+                    det_mask = det_out["mask"][det_idx]
+                    det_mask = det_mask.unsqueeze(0).unsqueeze(0)
+                    det_mask_resized = (
+                        F.interpolate(
+                            det_mask.float(),
+                            size=(orig_vid_height, orig_vid_width),
+                            mode="bilinear",
+                            align_corners=False,
+                        )
+                        > 0
+                    )
+
+                    det_mask_final = det_mask_resized.squeeze(0)
+                    obj_id_to_mask[obj_id] = det_mask_final
+
+        return obj_id_to_mask
+
+    def _get_objects_to_suppress_based_on_most_recently_occluded(
+        self,
+        binary_low_res_masks: Tensor,
+        last_occluded: List[int],
+        obj_ids: List[int],
+        frame_idx: int = None,
+        reverse: bool = False,
+    ):
+        # Suppress overlapping masks for objects that were most recently occluded
+        assert (
+            binary_low_res_masks.dtype == torch.bool
+        ), f"Expected boolean tensor, got {binary_low_res_masks.dtype}"
+        to_suppress = torch.zeros(
+            binary_low_res_masks.size(0),
+            device=binary_low_res_masks.device,
+            dtype=torch.bool,
+        )
+        if len(obj_ids) <= 1:
+            return to_suppress
+
+        iou = mask_iou(binary_low_res_masks, binary_low_res_masks)  # [N,N]
+
+        # Create masks for upper triangular matrix (i < j) and IoU threshold
+        mask_iou_thresh = (
+            iou >= self.suppress_overlapping_based_on_recent_occlusion_threshold
+        )
+        overlapping_pairs = torch.triu(mask_iou_thresh, diagonal=1)  # [N,N]
+
+        last_occ_expanded_i = last_occluded.unsqueeze(1)  # (N, 1)
+        last_occ_expanded_j = last_occluded.unsqueeze(0)  # (1, N)
+        # Suppress most recently occluded
+        cmp_op = torch.gt if not reverse else torch.lt
+        suppress_i_mask = (
+            overlapping_pairs
+            & cmp_op(
+                last_occ_expanded_i, last_occ_expanded_j
+            )  # (last_occ_expanded_i > last_occ_expanded_j)
+            & (
+                last_occ_expanded_j > -1
+            )  # j can suppress i only if i was previously occluded
+        )
+        suppress_j_mask = (
+            overlapping_pairs
+            & cmp_op(last_occ_expanded_j, last_occ_expanded_i)
+            & (
+                last_occ_expanded_i > -1
+            )  # i can suppress j only if j was previously occluded
+        )
+        # Apply suppression
+        to_suppress = suppress_i_mask.any(dim=1) | suppress_j_mask.any(dim=0)
+
+        # Log for debugging
+        if (
+            self.rank == 0
+            and logger.isEnabledFor(logging.DEBUG)
+            and frame_idx is not None
+        ):
+            suppress_i_mask = suppress_i_mask.cpu().numpy()
+            suppress_j_mask = suppress_j_mask.cpu().numpy()
+            last_occluded = last_occluded.cpu().numpy()
+
+            # Find all suppression pairs without using torch.where
+            batch_size = suppress_i_mask.shape[0]
+
+            # Log i-suppression cases (where i gets suppressed in favor of j)
+            for i in range(batch_size):
+                for j in range(batch_size):
+                    if suppress_i_mask[i, j]:
+                        logger.debug(
+                            f"{frame_idx=}: Suppressing obj {obj_ids[i]} last occluded {last_occluded[i]} in favor of {obj_ids[j]} last occluded {last_occluded[j]}"
+                        )
+
+            # Log j-suppression cases (where j gets suppressed in favor of i)
+            for i in range(batch_size):
+                for j in range(batch_size):
+                    if suppress_j_mask[i, j]:
+                        logger.debug(
+                            f"{frame_idx=}: Suppressing obj {obj_ids[j]} last occluded {last_occluded[j]} in favor of {obj_ids[i]} last occluded {last_occluded[i]}"
+                        )
+
+        return to_suppress
+
+    def _propogate_tracker_one_frame_local_gpu(
+        self,
+        inference_states: List[Any],
+        frame_idx: int,
+        reverse: bool,
+        # by default, we disable memory encoding until we gather all outputs
+        run_mem_encoder: bool = False,
+    ):
+        """
+        inference_states: List of inference states, each state corresponds to a different set of objects.
+        """
+        obj_ids_local = []
+        low_res_masks_list = []
+        obj_scores_list = []
+        for inference_state in inference_states:
+            if len(inference_state["obj_ids"]) == 0:
+                continue  # skip propagation on empty inference states
+
+            # propagate one frame
+            num_frames_propagated = 0
+            for out in self.tracker.propagate_in_video(
+                inference_state,
+                start_frame_idx=frame_idx,
+                # end_frame_idx = start_frame_idx + max_frame_num_to_track
+                # (i.e. propagating 1 frame since end_frame_idx is inclusive)
+                max_frame_num_to_track=0,
+                reverse=reverse,
+                tqdm_disable=True,
+                run_mem_encoder=run_mem_encoder,
+            ):
+                out_frame_idx, out_obj_ids, out_low_res_masks, _, out_obj_scores = out
+                num_frames_propagated += 1
+
+            # only 1 frames should be propagated
+            assert (
+                num_frames_propagated == 1 and out_frame_idx == frame_idx
+            ), f"num_frames_propagated: {num_frames_propagated}, out_frame_idx: {out_frame_idx}, frame_idx: {frame_idx}"
+            assert isinstance(out_obj_ids, list)
+            obj_ids_local.extend(out_obj_ids)
+            low_res_masks_list.append(out_low_res_masks.squeeze(1))
+            obj_scores_list.append(out_obj_scores.squeeze(1))
+
+        # concatenate the output masklets from all local inference states
+        H_mask = W_mask = self.tracker.low_res_mask_size
+        if len(low_res_masks_list) > 0:
+            low_res_masks_local = torch.cat(low_res_masks_list, dim=0)
+            obj_scores_local = torch.cat(obj_scores_list, dim=0)
+            assert low_res_masks_local.shape[1:] == (H_mask, W_mask)
+
+            # Apply hole filling to the masks
+            low_res_masks_local = fill_holes_in_mask_scores(
+                low_res_masks_local.unsqueeze(1),
+                max_area=self.fill_hole_area,
+                fill_holes=True,
+                remove_sprinkles=True,
+            )
+            low_res_masks_local = low_res_masks_local.squeeze(1)
+        else:
+            low_res_masks_local = torch.zeros(0, H_mask, W_mask, device=self.device)
+            obj_scores_local = torch.zeros(0, device=self.device)
+
+        return obj_ids_local, low_res_masks_local, obj_scores_local
+
+    def _associate_det_trk(
+        self,
+        det_masks: Tensor,
+        det_scores_np: npt.NDArray,
+        trk_masks: Tensor,
+        trk_obj_ids: npt.NDArray,
+    ):
+        """
+        Match detections on the current frame with the existing masklets.
+
+        Args:
+          - det_masks: (N, H, W) tensor of predicted masks
+          - det_scores_np: (N,) array of detection scores
+          - trk_masks: (M, H, W) tensor of track masks
+          - trk_obj_ids: (M,) array of object IDs corresponding to trk_masks
+
+        Returns:
+          - new_det_fa_inds: array of new object indices.
+          - unmatched_trk_obj_ids: array of existing masklet object IDs that are not matched
+            to any detections on this frame (for unmatched, we only count masklets with >0 area)
+          - det_to_matched_trk_obj_ids: dict[int, npt.NDArray]: mapping from detector's detection indices
+            to the list of matched tracklet object IDs
+          - empty_trk_obj_ids: array of existing masklet object IDs with zero area in SAM2 prediction
+        """
+        iou_threshold = self.assoc_iou_thresh
+        iou_threshold_trk = self.trk_assoc_iou_thresh
+        new_det_thresh = self.new_det_thresh
+
+        assert det_masks.is_floating_point(), "float tensor expected (do not binarize)"
+        assert trk_masks.is_floating_point(), "float tensor expected (do not binarize)"
+        assert (
+            trk_masks.size(0) == len(trk_obj_ids)
+        ), f"trk_masks and trk_obj_ids should have the same length, {trk_masks.size(0)} vs {len(trk_obj_ids)}"
+        if trk_masks.size(0) == 0:
+            # all detections are new
+            new_det_fa_inds = np.arange(det_masks.size(0))
+            unmatched_trk_obj_ids = np.array([], np.int64)
+            empty_trk_obj_ids = np.array([], np.int64)
+            det_to_matched_trk_obj_ids = {}
+            trk_id_to_max_iou_high_conf_det = {}
+            return (
+                new_det_fa_inds,
+                unmatched_trk_obj_ids,
+                det_to_matched_trk_obj_ids,
+                trk_id_to_max_iou_high_conf_det,
+                empty_trk_obj_ids,
+            )
+        elif det_masks.size(0) == 0:
+            # all previous tracklets are unmatched if they have a non-zero area
+            new_det_fa_inds = np.array([], np.int64)
+            trk_is_nonempty = (trk_masks > 0).any(dim=(1, 2)).cpu().numpy()
+            unmatched_trk_obj_ids = trk_obj_ids[trk_is_nonempty]
+            empty_trk_obj_ids = trk_obj_ids[~trk_is_nonempty]
+            det_to_matched_trk_obj_ids = {}
+            trk_id_to_max_iou_high_conf_det = {}
+            return (
+                new_det_fa_inds,
+                unmatched_trk_obj_ids,
+                det_to_matched_trk_obj_ids,
+                trk_id_to_max_iou_high_conf_det,
+                empty_trk_obj_ids,
+            )
+
+        if det_masks.shape[-2:] != trk_masks.shape[-2:]:
+            # resize to the smaller size to save GPU memory
+            if np.prod(det_masks.shape[-2:]) < np.prod(trk_masks.shape[-2:]):
+                trk_masks = F.interpolate(
+                    trk_masks.unsqueeze(1),
+                    size=det_masks.shape[-2:],
+                    mode="bilinear",
+                    align_corners=False,
+                ).squeeze(1)
+            else:
+                # resize detections to track size
+                det_masks = F.interpolate(
+                    det_masks.unsqueeze(1),
+                    size=trk_masks.shape[-2:],
+                    mode="bilinear",
+                    align_corners=False,
+                ).squeeze(1)
+
+        det_masks_binary = det_masks > 0
+        trk_masks_binary = trk_masks > 0
+        ious = mask_iou(det_masks_binary, trk_masks_binary)  # (N, M)
+
+        ious_np = ious.cpu().numpy()
+        if self.o2o_matching_masklets_enable:
+            from scipy.optimize import linear_sum_assignment
+
+            # Hungarian matching for tracks (one-to-one: each track matches at most one detection)
+            cost_matrix = 1 - ious_np  # Hungarian solves for minimum cost
+            row_ind, col_ind = linear_sum_assignment(cost_matrix)
+            trk_is_matched = np.zeros(trk_masks.size(0), dtype=bool)
+            for d, t in zip(row_ind, col_ind):
+                if ious_np[d, t] >= iou_threshold_trk:
+                    trk_is_matched[t] = True
+        else:
+            trk_is_matched = (ious_np >= iou_threshold_trk).any(axis=0)
+        # Non-empty tracks not matched by Hungarian assignment above threshold are unmatched
+        trk_is_nonempty = trk_masks_binary.any(dim=(1, 2)).cpu().numpy()
+        trk_is_unmatched = np.logical_and(trk_is_nonempty, ~trk_is_matched)
+        unmatched_trk_obj_ids = trk_obj_ids[trk_is_unmatched]
+        # also record masklets that have zero area in SAM 2 prediction
+        empty_trk_obj_ids = trk_obj_ids[~trk_is_nonempty]
+
+        # For detections: allow many tracks to match to the same detection (many-to-one)
+        # So, a detection is 'new' if it does not match any track above threshold
+        is_new_det = np.logical_and(
+            det_scores_np >= new_det_thresh,
+            np.logical_not(np.any(ious_np >= iou_threshold, axis=1)),
+        )
+        new_det_fa_inds = np.nonzero(is_new_det)[0]
+
+        # for each detection, which tracks it matched to (above threshold)
+        det_to_matched_trk_obj_ids = {}
+        trk_id_to_max_iou_high_conf_det = {}  # trk id --> exactly one detection idx
+        HIGH_CONF_THRESH = 0.8
+        HIGH_IOU_THRESH = 0.8
+        det_to_max_iou_trk_idx = np.argmax(ious_np, axis=1)
+        det_is_high_conf = (det_scores_np >= HIGH_CONF_THRESH) & ~is_new_det
+        det_is_high_iou = np.max(ious_np, axis=1) >= HIGH_IOU_THRESH
+        det_is_high_conf_and_iou = set(
+            np.nonzero(det_is_high_conf & det_is_high_iou)[0]
+        )
+        for d in range(det_masks.size(0)):
+            det_to_matched_trk_obj_ids[d] = trk_obj_ids[ious_np[d, :] >= iou_threshold]
+            if d in det_is_high_conf_and_iou:
+                trk_obj_id = trk_obj_ids[det_to_max_iou_trk_idx[d]].item()
+                trk_id_to_max_iou_high_conf_det[trk_obj_id] = d
+
+        return (
+            new_det_fa_inds,
+            unmatched_trk_obj_ids,
+            det_to_matched_trk_obj_ids,
+            trk_id_to_max_iou_high_conf_det,
+            empty_trk_obj_ids,
+        )
+
+    def _assign_new_det_to_gpus(self, new_det_num, prev_workload_per_gpu):
+        """Distribute the new objects to the GPUs with the least workload."""
+        workload_per_gpu: npt.NDArray = prev_workload_per_gpu.copy()
+        new_det_gpu_ids = np.zeros(new_det_num, np.int64)
+
+        # assign the objects one by one
+        for i in range(len(new_det_gpu_ids)):
+            # find the GPU with the least workload
+            min_gpu = np.argmin(workload_per_gpu)
+            new_det_gpu_ids[i] = min_gpu
+            workload_per_gpu[min_gpu] += 1
+        return new_det_gpu_ids
+
+    def _process_hotstart(
+        self,
+        frame_idx: int,
+        num_frames: int,
+        reverse: bool,
+        det_to_matched_trk_obj_ids: Dict[int, npt.NDArray],
+        new_det_obj_ids: npt.NDArray,
+        empty_trk_obj_ids: npt.NDArray,
+        unmatched_trk_obj_ids: npt.NDArray,
+        rank0_metadata: Dict[str, Any],
+        tracker_metadata: Dict[str, Any],
+    ):
+        """Handle hotstart heuristics to remove unmatched or duplicated objects."""
+        # obj_id --> first frame index where the object was detected
+        obj_first_frame_idx = rank0_metadata["obj_first_frame_idx"]
+        # obj_id --> [mismatched frame indices]
+        unmatched_frame_inds = rank0_metadata["unmatched_frame_inds"]
+        trk_keep_alive = rank0_metadata["trk_keep_alive"]
+        # (first_appear_obj_id, obj_id) --> [overlap frame indices]
+        overlap_pair_to_frame_inds = rank0_metadata["overlap_pair_to_frame_inds"]
+        # removed_obj_ids: object IDs that are suppressed via hot-start
+        removed_obj_ids = rank0_metadata["removed_obj_ids"]
+        suppressed_obj_ids = rank0_metadata["suppressed_obj_ids"][frame_idx]
+
+        obj_ids_newly_removed = set()  # object IDs to be newly removed on this frame
+        hotstart_diff = (
+            frame_idx - self.hotstart_delay
+            if not reverse
+            else frame_idx + self.hotstart_delay
+        )
+
+        # Step 1: log the frame index where each object ID first appears
+        for obj_id in new_det_obj_ids:
+            if obj_id not in obj_first_frame_idx:
+                obj_first_frame_idx[obj_id] = frame_idx
+            assert obj_id not in trk_keep_alive
+            trk_keep_alive[obj_id] = self.init_trk_keep_alive
+
+        matched_trks = set()
+        # We use the det-->tracks list to check for matched objects. Otherwise, we need to compute areas to decide whether they're occluded
+        for matched_trks_per_det in det_to_matched_trk_obj_ids.values():
+            matched_trks.update(matched_trks_per_det)
+        for obj_id in matched_trks:
+            # NOTE: To minimize number of configurable params, we use the hotstart_unmatch_thresh to set the max value of trk_keep_alive
+            trk_keep_alive[obj_id] = min(
+                self.max_trk_keep_alive, trk_keep_alive[obj_id] + 1
+            )
+        for obj_id in unmatched_trk_obj_ids:
+            unmatched_frame_inds[obj_id].append(frame_idx)
+            # NOTE: To minimize number of configurable params, we use the hotstart_unmatch_thresh to set the min value of trk_keep_alive
+            # The max keep alive is 2x the min, means the model prefers to keep the prediction rather than suppress it if it was matched long enough.
+            trk_keep_alive[obj_id] = max(
+                self.min_trk_keep_alive, trk_keep_alive[obj_id] - 1
+            )
+        if self.decrease_trk_keep_alive_for_empty_masklets:
+            for obj_id in empty_trk_obj_ids:
+                # NOTE: To minimize number of configurable params, we use the hotstart_unmatch_thresh to set the min value of trk_keep_alive
+                trk_keep_alive[obj_id] = max(
+                    self.min_trk_keep_alive, trk_keep_alive[obj_id] - 1
+                )
+
+        # Step 2: removed tracks that has not matched with detections for `hotstart_unmatch_thresh` frames with hotstart period
+        # a) add unmatched frame indices for each existing object ID
+        # note that `unmatched_trk_obj_ids` contains those frames where the SAM2 output mask
+        # doesn't match any detection; it excludes those frames where SAM2 gives an empty mask
+        # b) remove a masklet if it first appears after `hotstart_diff` and is unmatched for more
+        # than `self.hotstart_unmatch_thresh` frames
+        for obj_id, frame_indices in unmatched_frame_inds.items():
+            if obj_id in removed_obj_ids or obj_id in obj_ids_newly_removed:
+                continue  # skip if the object is already removed
+            if len(frame_indices) >= self.hotstart_unmatch_thresh:
+                is_within_hotstart = (
+                    obj_first_frame_idx[obj_id] > hotstart_diff and not reverse
+                ) or (obj_first_frame_idx[obj_id] < hotstart_diff and reverse)
+                if is_within_hotstart:
+                    obj_ids_newly_removed.add(obj_id)
+                    logger.debug(
+                        f"Removing object {obj_id} at frame {frame_idx} "
+                        f"since it is unmatched for frames: {frame_indices}"
+                    )
+            if (
+                trk_keep_alive[obj_id] <= 0  # Object has not been matched for too long
+                and not self.suppress_unmatched_only_within_hotstart
+                and obj_id not in removed_obj_ids
+                and obj_id not in obj_ids_newly_removed
+            ):
+                logger.debug(
+                    f"Suppressing object {obj_id} at frame {frame_idx}, due to being unmatched"
+                )
+                suppressed_obj_ids.add(obj_id)
+
+        # Step 3: removed tracks that overlaps with another track for `hotstart_dup_thresh` frames
+        # a) find overlaps tracks -- we consider overlap if they match to the same detection
+        for _, matched_trk_obj_ids in det_to_matched_trk_obj_ids.items():
+            if len(matched_trk_obj_ids) < 2:
+                continue  # only count detections that are matched to multiple (>=2) masklets
+            # if there are multiple matched track ids, we need to find the one that appeared first;
+            # these later appearing ids may be removed since they may be considered as duplicates
+            first_appear_obj_id = (
+                min(matched_trk_obj_ids, key=lambda x: obj_first_frame_idx[x])
+                if not reverse
+                else max(matched_trk_obj_ids, key=lambda x: obj_first_frame_idx[x])
+            )
+            for obj_id in matched_trk_obj_ids:
+                if obj_id != first_appear_obj_id:
+                    key = (first_appear_obj_id, obj_id)
+                    overlap_pair_to_frame_inds[key].append(frame_idx)
+
+        # b) remove a masklet if it first appears after `hotstart_diff` and it overlaps with another
+        # masklet (that appears earlier) for more than `self.hotstart_dup_thresh` frames
+        for (first_obj_id, obj_id), frame_indices in overlap_pair_to_frame_inds.items():
+            if obj_id in removed_obj_ids or obj_id in obj_ids_newly_removed:
+                continue  # skip if the object is already removed
+            if (obj_first_frame_idx[obj_id] > hotstart_diff and not reverse) or (
+                obj_first_frame_idx[obj_id] < hotstart_diff and reverse
+            ):
+                if len(frame_indices) >= self.hotstart_dup_thresh:
+                    obj_ids_newly_removed.add(obj_id)
+                    logger.debug(
+                        f"Removing object {obj_id} at frame {frame_idx} "
+                        f"since it overlaps with another track {first_obj_id} at frames: {frame_indices}"
+                    )
+
+        removed_obj_ids.update(obj_ids_newly_removed)
+        return obj_ids_newly_removed, rank0_metadata
+
+    def _tracker_update_memories(
+        self,
+        tracker_inference_states: List[Any],
+        frame_idx: int,
+        tracker_metadata: Dict[str, Any],
+        low_res_masks: Tensor,
+    ):
+        """
+        Run Sam2 memory encoder, enforcing non-overlapping constraints globally.
+        """
+        if len(tracker_inference_states) == 0:
+            return
+        # Avoid an extra interpolation step by directly interpolating to `interpol_size`
+        high_res_H, high_res_W = (
+            self.tracker.maskmem_backbone.mask_downsampler.interpol_size
+        )
+        # NOTE: inspect this part if we observe OOMs in the demo
+        high_res_masks = F.interpolate(
+            low_res_masks.unsqueeze(1),
+            size=(high_res_H, high_res_W),
+            mode="bilinear",
+            align_corners=False,
+        )
+        # We first apply non-overlapping constraints before memory encoding. This may include some suppression heuristics.
+        if not hasattr(self, "_warm_up_complete") or self._warm_up_complete:
+            high_res_masks = self.tracker._suppress_object_pw_area_shrinkage(
+                high_res_masks
+            )
+        # Instead of gathering the predicted object scores, we use mask areas as a proxy.
+        object_score_logits = torch.where(
+            (high_res_masks > 0).any(dim=(-1, -2)), 10.0, -10.0
+        )
+
+        # Run the memory encoder on local slices for each GPU
+        start_idx_gpu = sum(tracker_metadata["num_obj_per_gpu"][: self.rank])
+        start_idx_state = start_idx_gpu
+        for tracker_state in tracker_inference_states:
+            num_obj_per_state = len(tracker_state["obj_ids"])
+            if num_obj_per_state == 0:
+                continue
+            # Get the local high-res masks and object score logits for this inference state
+            end_idx_state = start_idx_state + num_obj_per_state
+            local_high_res_masks = high_res_masks[start_idx_state:end_idx_state]
+            local_object_score_logits = object_score_logits[
+                start_idx_state:end_idx_state
+            ]
+            local_batch_size = local_high_res_masks.size(0)
+            # Run Sam2 memory encoder. Note that we do not re-enforce the non-overlapping constraint as it is turned off by default
+
+            encoded_mem = self.tracker._run_memory_encoder(
+                tracker_state,
+                frame_idx,
+                local_batch_size,
+                local_high_res_masks,
+                local_object_score_logits,
+                is_mask_from_pts=False,
+            )
+            local_maskmem_features, local_maskmem_pos_enc = encoded_mem
+            # Store encoded memories in the local inference state
+            output_dict = tracker_state["output_dict"]
+            for storage_key in ["cond_frame_outputs", "non_cond_frame_outputs"]:
+                if frame_idx not in output_dict[storage_key]:
+                    continue
+                output_dict[storage_key][frame_idx]["maskmem_features"] = (
+                    local_maskmem_features
+                )
+                output_dict[storage_key][frame_idx]["maskmem_pos_enc"] = [
+                    pos for pos in local_maskmem_pos_enc
+                ]
+                # for batched inference state, we also need to add per-object
+                # memory slides to support instance interactivity
+                self.tracker._add_output_per_object(
+                    inference_state=tracker_state,
+                    frame_idx=frame_idx,
+                    current_out=output_dict[storage_key][frame_idx],
+                    storage_key=storage_key,
+                )
+            start_idx_state += num_obj_per_state
+
+    def _tracker_add_new_objects(
+        self,
+        frame_idx: int,
+        num_frames: int,
+        new_obj_ids: List[int],
+        new_obj_masks: Tensor,
+        tracker_states_local: List[Any],
+        orig_vid_height: int,
+        orig_vid_width: int,
+        feature_cache: Dict,
+    ):
+        """Add a new object to SAM2 inference states."""
+        prev_tracker_state = (
+            tracker_states_local[0] if len(tracker_states_local) > 0 else None
+        )
+
+        # prepare inference_state
+        # batch objects that first appear on the same frame together
+        # Clear inference state. Keep the cached image features if available.
+        new_tracker_state = self.tracker.init_state(
+            cached_features=feature_cache,
+            video_height=orig_vid_height,
+            video_width=orig_vid_width,
+            num_frames=num_frames,
+        )
+        new_tracker_state["backbone_out"] = (
+            prev_tracker_state.get("backbone_out", None)
+            if prev_tracker_state is not None
+            else None
+        )
+
+        assert len(new_obj_ids) == new_obj_masks.size(0)
+        assert new_obj_masks.is_floating_point()
+        input_mask_res = self.tracker.input_mask_size
+        new_obj_masks = F.interpolate(
+            new_obj_masks.unsqueeze(1),
+            size=(input_mask_res, input_mask_res),
+            mode="bilinear",
+            align_corners=False,
+        ).squeeze(1)
+        new_obj_masks = new_obj_masks > 0
+
+        # add object one by one
+        for new_obj_id, new_mask in zip(new_obj_ids, new_obj_masks):
+            self.tracker.add_new_mask(
+                inference_state=new_tracker_state,
+                frame_idx=frame_idx,
+                obj_id=new_obj_id,
+                mask=new_mask,
+                add_mask_to_memory=True,
+            )
+        # NOTE: we skip enforcing the non-overlapping constraint **globally** when adding new objects.
+        self.tracker.propagate_in_video_preflight(
+            new_tracker_state, run_mem_encoder=True
+        )
+        tracker_states_local.append(new_tracker_state)
+        return tracker_states_local
+
+    def _tracker_remove_object(self, tracker_states_local: List[Any], obj_id: int):
+        """
+        Remove an object from SAM2 inference states. This would remove the object from
+        all frames in the video.
+        """
+        tracker_states_local_before_removal = tracker_states_local.copy()
+        tracker_states_local.clear()
+        for tracker_inference_state in tracker_states_local_before_removal:
+            # we try to remove `obj_id` on every inference state with `strict=False`
+            # it will not do anything if an inference state doesn't contain `obj_id`
+            new_obj_ids, _ = self.tracker.remove_object(
+                tracker_inference_state, obj_id, strict=False, need_output=False
+            )
+            # only keep an inference state if it's non-empty after object removal
+            if len(new_obj_ids) > 0:
+                tracker_states_local.append(tracker_inference_state)
+
+    def _tracker_remove_objects(
+        self, tracker_states_local: List[Any], obj_ids: list[int]
+    ):
+        """
+        Remove an object from SAM2 inference states. This would remove the object from
+        all frames in the video.
+        """
+        for obj_id in obj_ids:
+            self._tracker_remove_object(tracker_states_local, obj_id)
+
+    def _initialize_metadata(self):
+        """Initialize metadata for the masklets."""
+        tracker_metadata = {
+            "obj_ids_per_gpu": [np.array([], np.int64) for _ in range(self.world_size)],
+            "obj_ids_all_gpu": np.array([], np.int64),
+            "num_obj_per_gpu": np.zeros(self.world_size, np.int64),
+            "max_obj_id": -1,
+            "obj_id_to_score": {},
+            "obj_id_to_tracker_score_frame_wise": defaultdict(dict),
+            "obj_id_to_last_occluded": {},
+        }
+        if self.rank == 0:
+            # "rank0_metadata" contains metadata that is only stored on (and accessible to) GPU 0
+            # - obj_first_frame_idx: obj_id --> first frame index where the object was detected
+            # - unmatched_frame_inds: obj_id --> [mismatched frame indices]
+            # - overlap_pair_to_frame_inds: (first_appear_obj_id, obj_id) --> [overlap frame indices]
+            # - removed_obj_ids: object IDs that are suppressed via hot-start
+            rank0_metadata = {
+                "obj_first_frame_idx": {},
+                "unmatched_frame_inds": defaultdict(list),
+                "trk_keep_alive": defaultdict(
+                    int
+                ),  # This is used only for object suppression not for removal
+                "overlap_pair_to_frame_inds": defaultdict(list),
+                "removed_obj_ids": set(),
+                "suppressed_obj_ids": defaultdict(
+                    set
+                ),  # frame_idx --> set of objects with suppressed outputs, but still continue to be tracked
+            }
+            if self.masklet_confirmation_enable:
+                # all the following are npt.NDArray with the same shape as `obj_ids_all_gpu`
+                rank0_metadata["masklet_confirmation"] = {
+                    # "status" is the confirmation status of each masklet (in `MaskletConfirmationStatus`)
+                    "status": np.array([], np.int64),
+                    # "consecutive_det_num" is the number of consecutive frames where the masklet is
+                    # detected by the detector (with a matched detection)
+                    "consecutive_det_num": np.array([], np.int64),
+                }
+            tracker_metadata["rank0_metadata"] = rank0_metadata
+
+        return tracker_metadata
+
+    def update_masklet_confirmation_status(
+        self,
+        rank0_metadata: Dict[str, Any],
+        obj_ids_all_gpu_prev: npt.NDArray,
+        obj_ids_all_gpu_updated: npt.NDArray,
+        det_to_matched_trk_obj_ids: Dict[int, npt.NDArray],
+        new_det_obj_ids: npt.NDArray,
+    ):
+        confirmation_data = rank0_metadata["masklet_confirmation"]
+
+        # a) first, expand "confirmation_data" to include new masklets added in this frame
+        status_prev = confirmation_data["status"]
+        consecutive_det_num_prev = confirmation_data["consecutive_det_num"]
+        assert (
+            status_prev.shape == obj_ids_all_gpu_prev.shape
+        ), f"Got {status_prev.shape} vs {obj_ids_all_gpu_prev.shape}"
+
+        obj_id_to_updated_idx = {
+            obj_id: idx for idx, obj_id in enumerate(obj_ids_all_gpu_updated)
+        }
+        prev_elem_is_in_updated = np.isin(obj_ids_all_gpu_prev, obj_ids_all_gpu_updated)
+        prev_elem_obj_ids_in_updated = obj_ids_all_gpu_prev[prev_elem_is_in_updated]
+        prev_elem_inds_in_updated = np.array(
+            [obj_id_to_updated_idx[obj_id] for obj_id in prev_elem_obj_ids_in_updated],
+            dtype=np.int64,
+        )
+        # newly added masklets are initialized to "UNCONFIRMED" status
+        unconfirmed_val = MaskletConfirmationStatus.UNCONFIRMED.value
+        status = np.full_like(obj_ids_all_gpu_updated, fill_value=unconfirmed_val)
+        status[prev_elem_inds_in_updated] = status_prev[prev_elem_is_in_updated]
+        consecutive_det_num = np.zeros_like(obj_ids_all_gpu_updated)
+        consecutive_det_num[prev_elem_inds_in_updated] = consecutive_det_num_prev[
+            prev_elem_is_in_updated
+        ]
+
+        # b) update the confirmation status of all masklets based on the current frame
+        # b.1) update "consecutive_det_num"
+        # "is_matched": whether a masklet is matched to a detection on this frame
+        is_matched = np.isin(obj_ids_all_gpu_updated, new_det_obj_ids)
+        for matched_trk_obj_ids in det_to_matched_trk_obj_ids.values():
+            is_matched |= np.isin(obj_ids_all_gpu_updated, matched_trk_obj_ids)
+        consecutive_det_num = np.where(is_matched, consecutive_det_num + 1, 0)
+
+        # b.2) update "status"
+        change_to_confirmed = (
+            consecutive_det_num >= self.masklet_confirmation_consecutive_det_thresh
+        )
+        status[change_to_confirmed] = MaskletConfirmationStatus.CONFIRMED.value
+
+        confirmation_data["status"] = status
+        confirmation_data["consecutive_det_num"] = consecutive_det_num
+        return rank0_metadata
+
+    def forward(self, input: BatchedDatapoint, is_inference: bool = False):
+        raise NotImplementedError("Evaluation outside demo is not implemented yet")
+
+    def _load_checkpoint(self, ckpt_path: str, strict: bool = True):
+        sd = torch.load(ckpt_path, map_location="cpu", weights_only=True)["model"]
+        missing_keys, unexpected_keys = self.load_state_dict(sd, strict=strict)
+        if len(missing_keys) > 0 or len(unexpected_keys) > 0:
+            logger.warning(f"Loaded ckpt with {missing_keys=}, {unexpected_keys=}")
+        else:
+            logger.info("Loaded ckpt successfully without missing or unexpected keys")
+
+    def prep_for_evaluator(self, video_frames, tracking_res, scores_labels):
+        """This method is only used for benchmark eval (not used in the demo)."""
+        num_frames = len(video_frames)
+        w, h = video_frames[0].size
+        zero_mask = torch.zeros((1, h, w), dtype=torch.bool)
+        object_ids = list(scores_labels.keys())
+        preds = {"scores": [], "labels": [], "boxes": [], "masks_rle": []}
+        for oid in object_ids:
+            o_masks = []
+            o_score = scores_labels[oid][0].item()
+            o_label = scores_labels[oid][1]
+            for frame_idx in range(num_frames):
+                if frame_idx not in tracking_res:
+                    o_masks.append(zero_mask)
+                else:
+                    o_masks.append(tracking_res[frame_idx].get(oid, zero_mask))
+
+            o_masks = torch.cat(o_masks, dim=0)  # (n_frames, H, W)
+            preds["scores"].append(o_score)
+            preds["labels"].append(o_label)
+            preds["boxes"].append(mask_to_box(o_masks.unsqueeze(1)).squeeze())
+            preds["masks_rle"].append(rle_encode(o_masks, return_areas=True))
+
+        preds["boxes"] = (
+            torch.stack(preds["boxes"], dim=0)
+            if len(preds["boxes"]) > 0
+            else torch.empty(
+                (0, num_frames, 4), dtype=torch.float32, device=self.device
+            )
+        )
+        preds["scores"] = (
+            torch.tensor(preds["scores"], device=self.device)
+            if len(preds["scores"]) > 0
+            else torch.empty((0,), device=self.device)
+        )
+        preds["per_frame_scores"] = preds["scores"]
+        preds["labels"] = (
+            torch.tensor(preds["labels"], device=self.device)
+            if len(preds["labels"]) > 0
+            else torch.empty((0,), device=self.device)
+        )
+        return preds
+
+    def _encode_prompt(self, **kwargs):
+        return self.detector._encode_prompt(**kwargs)
+
+    def _drop_new_det_with_obj_limit(self, new_det_fa_inds, det_scores_np, num_to_keep):
+        """
+        Drop a few new detections based on the maximum number of objects. We drop new objects based
+        on their detection scores, keeping the high-scoring ones and dropping the low-scoring ones.
+        """
+        assert 0 <= num_to_keep <= len(new_det_fa_inds)
+        if num_to_keep == 0:
+            return np.array([], np.int64)  # keep none
+        if num_to_keep == len(new_det_fa_inds):
+            return new_det_fa_inds  # keep all
+
+        # keep the top-scoring detections
+        score_order = np.argsort(det_scores_np[new_det_fa_inds])[::-1]
+        new_det_fa_inds = new_det_fa_inds[score_order[:num_to_keep]]
+        return new_det_fa_inds
diff --git a/sam3/model/sam3_video_inference.py b/sam3/model/sam3_video_inference.py
new file mode 100644
index 0000000000000000000000000000000000000000..7fb87d016a28efe63de2837692a4ba8fa01c004d
--- /dev/null
+++ b/sam3/model/sam3_video_inference.py
@@ -0,0 +1,1709 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import logging
+from collections import defaultdict
+
+import numpy as np
+import torch
+import torch.distributed as dist
+import torch.nn.functional as F
+
+from sam3 import perflib
+from sam3.logger import get_logger
+from sam3.model.act_ckpt_utils import clone_output_wrapper
+from sam3.model.box_ops import box_xywh_to_cxcywh, box_xyxy_to_xywh
+from sam3.model.data_misc import BatchedDatapoint, convert_my_tensors, FindStage
+from sam3.model.geometry_encoders import Prompt
+from sam3.model.io_utils import IMAGE_EXTS, load_resource_as_video_frames
+from sam3.model.sam3_tracker_utils import fill_holes_in_mask_scores
+from sam3.model.sam3_video_base import MaskletConfirmationStatus, Sam3VideoBase
+from sam3.model.utils.misc import copy_data_to_device
+from sam3.perflib.compile import compile_wrapper, shape_logging_wrapper
+from sam3.perflib.masks_ops import masks_to_boxes as perf_masks_to_boxes
+from torchvision.ops import masks_to_boxes
+from tqdm.auto import tqdm
+
+logger = get_logger(__name__)
+
+
+class Sam3VideoInference(Sam3VideoBase):
+    TEXT_ID_FOR_TEXT = 0
+    TEXT_ID_FOR_VISUAL = 1
+
+    def __init__(
+        self,
+        image_size=1008,
+        image_mean=(0.5, 0.5, 0.5),
+        image_std=(0.5, 0.5, 0.5),
+        compile_model=False,
+        **kwargs,
+    ):
+        """
+        hotstart_delay: int, the delay (in #frames) before the model starts to yield output, 0 to disable hotstart delay.
+        hotstart_unmatch_thresh: int, remove the object if it has this many unmatched frames within its hotstart_delay period.
+            If `hotstart_delay` is set to 0, this parameter is ignored.
+        hotstart_dup_thresh: int, remove the object if it has overlapped with another object this many frames within its hotstart_delay period.
+        """
+        super().__init__(**kwargs)
+        self.image_size = image_size
+        self.image_mean = image_mean
+        self.image_std = image_std
+        self.compile_model = compile_model
+
+    @torch.inference_mode()
+    def init_state(
+        self,
+        resource_path,
+        offload_video_to_cpu=False,
+        async_loading_frames=False,
+        video_loader_type="cv2",
+    ):
+        """Initialize an inference state from `resource_path` (an image or a video)."""
+        images, orig_height, orig_width = load_resource_as_video_frames(
+            resource_path=resource_path,
+            image_size=self.image_size,
+            offload_video_to_cpu=offload_video_to_cpu,
+            img_mean=self.image_mean,
+            img_std=self.image_std,
+            async_loading_frames=async_loading_frames,
+            video_loader_type=video_loader_type,
+        )
+        inference_state = {}
+        inference_state["image_size"] = self.image_size
+        inference_state["num_frames"] = len(images)
+        # the original video height and width, used for resizing final output scores
+        inference_state["orig_height"] = orig_height
+        inference_state["orig_width"] = orig_width
+        # values that don't change across frames (so we only need to hold one copy of them)
+        inference_state["constants"] = {}
+        # inputs on each frame
+        self._construct_initial_input_batch(inference_state, images)
+        # initialize extra states
+        inference_state["tracker_inference_states"] = []
+        inference_state["tracker_metadata"] = {}
+        inference_state["feature_cache"] = {}
+        inference_state["cached_frame_outputs"] = {}
+        inference_state["action_history"] = []  # for logging user actions
+        inference_state["is_image_only"] = is_image_type(resource_path)
+        return inference_state
+
+    @torch.inference_mode()
+    def reset_state(self, inference_state):
+        """Revert `inference_state` to what it was right after initialization."""
+        inference_state["input_batch"].find_text_batch[0] = "<text placeholder>"
+        inference_state["text_prompt"] = None
+        for t in range(inference_state["num_frames"]):
+            inference_state["input_batch"].find_inputs[t].text_ids[...] = 0
+            # constructing an output list in inference state (we start with an empty list)
+            inference_state["previous_stages_out"][t] = None
+            inference_state["per_frame_raw_point_input"][t] = None
+            inference_state["per_frame_raw_box_input"][t] = None
+            inference_state["per_frame_visual_prompt"][t] = None
+            inference_state["per_frame_geometric_prompt"][t] = None
+            inference_state["per_frame_cur_step"][t] = 0
+
+        inference_state["visual_prompt_embed"] = None
+        inference_state["visual_prompt_mask"] = None
+        inference_state["tracker_inference_states"].clear()
+        inference_state["tracker_metadata"].clear()
+        inference_state["feature_cache"].clear()
+        inference_state["cached_frame_outputs"].clear()
+        inference_state["action_history"].clear()  # for logging user actions
+
+    def _construct_initial_input_batch(self, inference_state, images):
+        """Construct an initial `BatchedDatapoint` instance as input."""
+        # 1) img_batch
+        num_frames = len(images)
+        device = self.device
+
+        # 2) find_text_batch
+        # "<text placeholder>" will be replaced by the actual text prompt when adding prompts
+        find_text_batch = ["<text placeholder>", "visual"]
+
+        # 3) find_inputs
+        input_box_embedding_dim = 258  # historical default
+        input_points_embedding_dim = 257  # historical default
+        stages = [
+            FindStage(
+                img_ids=[stage_id],
+                text_ids=[0],
+                input_boxes=[torch.zeros(input_box_embedding_dim)],
+                input_boxes_mask=[torch.empty(0, dtype=torch.bool)],
+                input_boxes_label=[torch.empty(0, dtype=torch.long)],
+                input_points=[torch.empty(0, input_points_embedding_dim)],
+                input_points_mask=[torch.empty(0)],
+                object_ids=[],
+            )
+            for stage_id in range(num_frames)
+        ]
+        for i in range(len(stages)):
+            stages[i] = convert_my_tensors(stages[i])
+
+        # construct the final `BatchedDatapoint` and cast to GPU
+        input_batch = BatchedDatapoint(
+            img_batch=images,
+            find_text_batch=find_text_batch,
+            find_inputs=stages,
+            find_targets=[None] * num_frames,
+            find_metadatas=[None] * num_frames,
+        )
+        input_batch = copy_data_to_device(input_batch, device, non_blocking=True)
+        inference_state["input_batch"] = input_batch
+
+        # construct the placeholder interactive prompts and tracking queries
+        bs = 1
+        inference_state["constants"]["empty_geometric_prompt"] = Prompt(
+            box_embeddings=torch.zeros(0, bs, 4, device=device),
+            box_mask=torch.zeros(bs, 0, device=device, dtype=torch.bool),
+            box_labels=torch.zeros(0, bs, device=device, dtype=torch.long),
+            point_embeddings=torch.zeros(0, bs, 2, device=device),
+            point_mask=torch.zeros(bs, 0, device=device, dtype=torch.bool),
+            point_labels=torch.zeros(0, bs, device=device, dtype=torch.long),
+        )
+
+        # constructing an output list in inference state (we start with an empty list)
+        inference_state["previous_stages_out"] = [None] * num_frames
+        inference_state["text_prompt"] = None
+        inference_state["per_frame_raw_point_input"] = [None] * num_frames
+        inference_state["per_frame_raw_box_input"] = [None] * num_frames
+        inference_state["per_frame_visual_prompt"] = [None] * num_frames
+        inference_state["per_frame_geometric_prompt"] = [None] * num_frames
+        inference_state["per_frame_cur_step"] = [0] * num_frames
+
+        # placeholders for cached outputs
+        # (note: currently, a single visual prompt embedding is shared for all frames)
+        inference_state["visual_prompt_embed"] = None
+        inference_state["visual_prompt_mask"] = None
+
+    def _get_visual_prompt(self, inference_state, frame_idx, boxes_cxcywh, box_labels):
+        """
+        Handle the case of visual prompt. Currently, in the inference API we do not
+        explicitly distinguish between initial box as visual prompt vs subsequent boxes
+        or boxes after inference for refinement.
+        """
+        # If the frame hasn't had any inference results before (prompting or propagation),
+        # we treat the first added box prompt as a visual prompt; otherwise, we treat
+        # the first box just as a refinement prompt.
+        is_new_visual_prompt = (
+            inference_state["per_frame_visual_prompt"][frame_idx] is None
+            and inference_state["previous_stages_out"][frame_idx] is None
+        )
+        if is_new_visual_prompt:
+            if boxes_cxcywh.size(0) != 1:
+                raise RuntimeError(
+                    "visual prompts (box as an initial prompt) should only have one box, "
+                    f"but got {boxes_cxcywh.shape=}"
+                )
+            if not box_labels.item():
+                logging.warning("A negative box is added as a visual prompt.")
+            # take the first box prompt as a visual prompt
+            device = self.device
+            new_visual_prompt = Prompt(
+                box_embeddings=boxes_cxcywh[None, 0:1, :].to(device),  # (seq, bs, 4)
+                box_mask=None,
+                box_labels=box_labels[None, 0:1].to(device),  # (seq, bs)
+                point_embeddings=None,
+                point_mask=None,
+                point_labels=None,
+            )
+            inference_state["per_frame_visual_prompt"][frame_idx] = new_visual_prompt
+        else:
+            new_visual_prompt = None
+
+        # `boxes_cxcywh` and `box_labels` contains all the raw box inputs added so far
+        # strip any visual prompt from the input boxes (for geometric prompt encoding)
+        if inference_state["per_frame_visual_prompt"][frame_idx] is not None:
+            boxes_cxcywh = boxes_cxcywh[1:]
+            box_labels = box_labels[1:]
+
+        return boxes_cxcywh, box_labels, new_visual_prompt
+
+    def _get_processing_order(
+        self, inference_state, start_frame_idx, max_frame_num_to_track, reverse
+    ):
+        num_frames = inference_state["num_frames"]
+        previous_stages_out = inference_state["previous_stages_out"]
+        if all(out is None for out in previous_stages_out) and start_frame_idx is None:
+            raise RuntimeError(
+                "No prompts are received on any frames. Please add prompt on at least one frame before propagation."
+            )
+        # set start index, end index, and processing order
+        if start_frame_idx is None:
+            # default: start from the earliest frame with input points
+            start_frame_idx = min(
+                t for t, out in enumerate(previous_stages_out) if out is not None
+            )
+        if max_frame_num_to_track is None:
+            # default: track all the frames in the video
+            max_frame_num_to_track = num_frames
+        if reverse:
+            end_frame_idx = start_frame_idx - max_frame_num_to_track
+            end_frame_idx = max(end_frame_idx, 0)
+            processing_order = range(start_frame_idx - 1, end_frame_idx - 1, -1)
+        else:
+            end_frame_idx = start_frame_idx + max_frame_num_to_track
+            end_frame_idx = min(end_frame_idx, num_frames - 1)
+            processing_order = range(start_frame_idx, end_frame_idx + 1)
+        return processing_order, end_frame_idx
+
+    @torch.inference_mode()
+    def propagate_in_video(
+        self,
+        inference_state,
+        start_frame_idx=None,
+        max_frame_num_to_track=None,
+        reverse=False,
+    ):
+        """
+        Propagate the prompts to get grounding results for the entire video. This method
+        is a generator and yields inference outputs for all frames in the range specified
+        by `start_frame_idx`, `max_frame_num_to_track`, and `reverse`.
+        """
+        # compile the model (it's a no-op if the model is already compiled)
+        # note that it's intentionally added to `self.propagate_in_video`, so that the first
+        # `self.add_prompt` call will be done in eager mode to fill in the decoder buffers
+        # such as positional encoding cache)
+        self._compile_model()
+
+        processing_order, end_frame_idx = self._get_processing_order(
+            inference_state,
+            start_frame_idx,
+            max_frame_num_to_track,
+            reverse=reverse,
+        )
+
+        # Store max_frame_num_to_track in feature_cache for downstream methods
+        inference_state["feature_cache"]["tracking_bounds"] = {
+            "max_frame_num_to_track": max_frame_num_to_track,
+            "propagate_in_video_start_frame_idx": start_frame_idx,
+        }
+
+        hotstart_buffer = []
+        hotstart_removed_obj_ids = set()
+        # when deciding whether to output a masklet on `yield_frame_idx`, we check whether the object is confirmed
+        # in a future frame (`unconfirmed_frame_delay` frames after the current frame). For example, if we require
+        # an object to be detected in 3 consecutive frames to be confirmed, then we look 2 frames in the future --
+        # e.g., we output an object on frame 4 only if it becomes confirmed on frame 6.
+        unconfirmed_status_delay = self.masklet_confirmation_consecutive_det_thresh - 1
+        unconfirmed_obj_ids_per_frame = {}  # frame_idx -> hidden_obj_ids
+        for frame_idx in tqdm(
+            processing_order, desc="propagate_in_video", disable=self.rank > 0
+        ):
+            out = self._run_single_frame_inference(inference_state, frame_idx, reverse)
+
+            if self.hotstart_delay > 0:
+                # accumulate the outputs for the first `hotstart_delay` frames
+                hotstart_buffer.append([frame_idx, out])
+                # update the object IDs removed by hotstart so that we don't output them
+                if self.rank == 0:
+                    hotstart_removed_obj_ids.update(out["removed_obj_ids"])
+                    unconfirmed_obj_ids = out.get("unconfirmed_obj_ids", None)
+                    if unconfirmed_obj_ids is not None:
+                        unconfirmed_obj_ids_per_frame[frame_idx] = unconfirmed_obj_ids
+
+                if frame_idx == end_frame_idx:
+                    # we reached the end of propagation -- yield all frames in the buffer
+                    yield_list = hotstart_buffer
+                    hotstart_buffer = []
+                elif len(hotstart_buffer) >= self.hotstart_delay:
+                    # we have enough frames -- yield and remove the first (oldest) frame from the buffer
+                    yield_list = hotstart_buffer[:1]
+                    hotstart_buffer = hotstart_buffer[1:]
+                else:
+                    # not enough frames yet -- skip yielding
+                    yield_list = []
+            else:
+                yield_list = [(frame_idx, out)]  # output the current frame
+
+            for yield_frame_idx, yield_out in yield_list:
+                # post-process the output and yield it
+                if self.rank == 0:
+                    suppressed_obj_ids = yield_out["suppressed_obj_ids"]
+                    unconfirmed_status_frame_idx = (
+                        yield_frame_idx + unconfirmed_status_delay
+                        if not reverse
+                        else yield_frame_idx - unconfirmed_status_delay
+                    )
+
+                    # Clamp the frame index to stay within video bounds
+                    num_frames = inference_state["num_frames"]
+                    unconfirmed_status_frame_idx = max(
+                        0, min(unconfirmed_status_frame_idx, num_frames - 1)
+                    )
+
+                    unconfirmed_obj_ids = unconfirmed_obj_ids_per_frame.get(
+                        unconfirmed_status_frame_idx, None
+                    )
+                    postprocessed_out = self._postprocess_output(
+                        inference_state,
+                        yield_out,
+                        hotstart_removed_obj_ids,
+                        suppressed_obj_ids,
+                        unconfirmed_obj_ids,
+                    )
+
+                    self._cache_frame_outputs(
+                        inference_state,
+                        yield_frame_idx,
+                        yield_out["obj_id_to_mask"],
+                        suppressed_obj_ids=suppressed_obj_ids,
+                        removed_obj_ids=hotstart_removed_obj_ids,
+                        unconfirmed_obj_ids=unconfirmed_obj_ids,
+                    )
+                else:
+                    postprocessed_out = None  # no output on other GPUs
+                yield yield_frame_idx, postprocessed_out
+
+    def _run_single_frame_inference(self, inference_state, frame_idx, reverse):
+        """
+        Perform inference on a single frame and get its inference results. This would
+        also update `inference_state`.
+        """
+        # prepare inputs
+        input_batch = inference_state["input_batch"]
+        tracker_states_local = inference_state["tracker_inference_states"]
+        has_text_prompt = inference_state["text_prompt"] is not None
+        has_geometric_prompt = (
+            inference_state["per_frame_geometric_prompt"][frame_idx] is not None
+        )
+        # run inference for the current frame
+        (
+            obj_id_to_mask,
+            obj_id_to_score,
+            tracker_states_local_new,
+            tracker_metadata_new,
+            frame_stats,
+            _,
+        ) = self._det_track_one_frame(
+            frame_idx=frame_idx,
+            num_frames=inference_state["num_frames"],
+            reverse=reverse,
+            input_batch=input_batch,
+            geometric_prompt=(
+                inference_state["constants"]["empty_geometric_prompt"]
+                if not has_geometric_prompt
+                else inference_state["per_frame_geometric_prompt"][frame_idx]
+            ),
+            tracker_states_local=tracker_states_local,
+            tracker_metadata_prev=inference_state["tracker_metadata"],
+            feature_cache=inference_state["feature_cache"],
+            orig_vid_height=inference_state["orig_height"],
+            orig_vid_width=inference_state["orig_width"],
+            is_image_only=inference_state["is_image_only"],
+            allow_new_detections=has_text_prompt or has_geometric_prompt,
+        )
+        # update inference state
+        inference_state["tracker_inference_states"] = tracker_states_local_new
+        inference_state["tracker_metadata"] = tracker_metadata_new
+        # use a dummy string in "previous_stages_out" to indicate this frame has outputs
+        inference_state["previous_stages_out"][frame_idx] = "_THIS_FRAME_HAS_OUTPUTS_"
+
+        if self.rank == 0:
+            self._cache_frame_outputs(inference_state, frame_idx, obj_id_to_mask)
+
+        out = {
+            "obj_id_to_mask": obj_id_to_mask,
+            "obj_id_to_score": obj_id_to_score,  # first frame detection score
+            "obj_id_to_tracker_score": tracker_metadata_new[
+                "obj_id_to_tracker_score_frame_wise"
+            ][frame_idx],
+        }
+        # removed_obj_ids is only needed on rank 0 to handle hotstart delay buffer
+        if self.rank == 0:
+            rank0_metadata = tracker_metadata_new["rank0_metadata"]
+            removed_obj_ids = rank0_metadata["removed_obj_ids"]
+            out["removed_obj_ids"] = removed_obj_ids
+            out["suppressed_obj_ids"] = rank0_metadata["suppressed_obj_ids"][frame_idx]
+            out["frame_stats"] = frame_stats
+            if self.masklet_confirmation_enable:
+                status = rank0_metadata["masklet_confirmation"]["status"]
+                is_unconfirmed = status == MaskletConfirmationStatus.UNCONFIRMED.value
+                out["unconfirmed_obj_ids"] = tracker_metadata_new["obj_ids_all_gpu"][
+                    is_unconfirmed
+                ].tolist()
+            else:
+                out["unconfirmed_obj_ids"] = []
+
+        return out
+
+    def _postprocess_output(
+        self,
+        inference_state,
+        out,
+        removed_obj_ids=None,
+        suppressed_obj_ids=None,
+        unconfirmed_obj_ids=None,
+    ):
+        obj_id_to_mask = out["obj_id_to_mask"]  # low res masks
+        curr_obj_ids = sorted(obj_id_to_mask.keys())
+        H_video, W_video = inference_state["orig_height"], inference_state["orig_width"]
+        if len(curr_obj_ids) == 0:
+            out_obj_ids = torch.zeros(0, dtype=torch.int64)
+            out_probs = torch.zeros(0, dtype=torch.float32)
+            out_binary_masks = torch.zeros(0, H_video, W_video, dtype=torch.bool)
+            out_boxes_xywh = torch.zeros(0, 4, dtype=torch.float32)
+        else:
+            out_obj_ids = torch.tensor(curr_obj_ids, dtype=torch.int64)
+            out_probs = torch.tensor(
+                [out["obj_id_to_score"][obj_id] for obj_id in curr_obj_ids]
+            )
+            out_tracker_probs = torch.tensor(
+                [
+                    (
+                        out["obj_id_to_tracker_score"][obj_id]
+                        if obj_id in out["obj_id_to_tracker_score"]
+                        else 0.0
+                    )
+                    for obj_id in curr_obj_ids
+                ]
+            )
+            out_binary_masks = torch.cat(
+                [obj_id_to_mask[obj_id] for obj_id in curr_obj_ids], dim=0
+            )
+
+            assert out_binary_masks.dtype == torch.bool
+            keep = out_binary_masks.any(dim=(1, 2)).cpu()  # remove masks with 0 areas
+            # hide outputs for those object IDs in `obj_ids_to_hide`
+            obj_ids_to_hide = []
+            if suppressed_obj_ids is not None:
+                obj_ids_to_hide.extend(suppressed_obj_ids)
+            if removed_obj_ids is not None:
+                obj_ids_to_hide.extend(removed_obj_ids)
+            if unconfirmed_obj_ids is not None:
+                obj_ids_to_hide.extend(unconfirmed_obj_ids)
+            if len(obj_ids_to_hide) > 0:
+                obj_ids_to_hide_t = torch.tensor(obj_ids_to_hide, dtype=torch.int64)
+                keep &= ~torch.isin(out_obj_ids, obj_ids_to_hide_t)
+
+            # slice those valid entries from the original outputs
+            keep_idx = torch.nonzero(keep, as_tuple=True)[0]
+            keep_idx_gpu = keep_idx.pin_memory().to(
+                device=out_binary_masks.device, non_blocking=True
+            )
+
+            out_obj_ids = torch.index_select(out_obj_ids, 0, keep_idx)
+            out_probs = torch.index_select(out_probs, 0, keep_idx)
+            out_tracker_probs = torch.index_select(out_tracker_probs, 0, keep_idx)
+            out_binary_masks = torch.index_select(out_binary_masks, 0, keep_idx_gpu)
+
+            if perflib.is_enabled:
+                out_boxes_xyxy = perf_masks_to_boxes(
+                    out_binary_masks, out_obj_ids.tolist()
+                )
+            else:
+                out_boxes_xyxy = masks_to_boxes(out_binary_masks)
+
+            out_boxes_xywh = box_xyxy_to_xywh(out_boxes_xyxy)  # convert to xywh format
+            # normalize boxes
+            out_boxes_xywh[..., 0] /= W_video
+            out_boxes_xywh[..., 1] /= H_video
+            out_boxes_xywh[..., 2] /= W_video
+            out_boxes_xywh[..., 3] /= H_video
+
+        # apply non-overlapping constraints on the existing masklets
+        if out_binary_masks.shape[0] > 1:
+            assert len(out_binary_masks) == len(out_tracker_probs)
+            out_binary_masks = (
+                self.tracker._apply_object_wise_non_overlapping_constraints(
+                    out_binary_masks.unsqueeze(1),
+                    out_tracker_probs.unsqueeze(1).to(out_binary_masks.device),
+                    background_value=0,
+                ).squeeze(1)
+            ) > 0
+
+        outputs = {
+            "out_obj_ids": out_obj_ids.cpu().numpy(),
+            "out_probs": out_probs.cpu().numpy(),
+            "out_boxes_xywh": out_boxes_xywh.cpu().numpy(),
+            "out_binary_masks": out_binary_masks.cpu().numpy(),
+            "frame_stats": out.get("frame_stats", None),
+        }
+        return outputs
+
+    def _cache_frame_outputs(
+        self,
+        inference_state,
+        frame_idx,
+        obj_id_to_mask,
+        suppressed_obj_ids=None,
+        removed_obj_ids=None,
+        unconfirmed_obj_ids=None,
+    ):
+        # Filter out suppressed, removed, and unconfirmed objects from the cache
+        filtered_obj_id_to_mask = obj_id_to_mask.copy()
+
+        objects_to_exclude = set()
+        if suppressed_obj_ids is not None:
+            objects_to_exclude.update(suppressed_obj_ids)
+        if removed_obj_ids is not None:
+            objects_to_exclude.update(removed_obj_ids)
+        if unconfirmed_obj_ids is not None:
+            objects_to_exclude.update(unconfirmed_obj_ids)
+
+        if objects_to_exclude:
+            for obj_id in objects_to_exclude:
+                if obj_id in filtered_obj_id_to_mask:
+                    del filtered_obj_id_to_mask[obj_id]
+
+        inference_state["cached_frame_outputs"][frame_idx] = filtered_obj_id_to_mask
+
+    def _build_tracker_output(
+        self, inference_state, frame_idx, refined_obj_id_to_mask=None
+    ):
+        assert (
+            "cached_frame_outputs" in inference_state
+            and frame_idx in inference_state["cached_frame_outputs"]
+        ), "No cached outputs found. Ensure normal propagation has run first to populate the cache."
+        cached_outputs = inference_state["cached_frame_outputs"][frame_idx]
+
+        obj_id_to_mask = cached_outputs.copy()
+
+        # Update with refined masks if provided
+        if refined_obj_id_to_mask is not None:
+            for obj_id, refined_mask in refined_obj_id_to_mask.items():
+                assert (
+                    refined_mask is not None
+                ), f"Refined mask data must be provided for obj_id {obj_id}"
+                obj_id_to_mask[obj_id] = refined_mask
+
+        return obj_id_to_mask
+
+    def _compile_model(self):
+        """Compile the SAM model with torch.compile for speedup."""
+        is_compiled = getattr(self, "_model_is_compiled", False)
+        if is_compiled or not self.compile_model:
+            return
+
+        import torch._dynamo
+
+        # a larger cache size to hold varying number of shapes for torch.compile
+        # see https://github.com/pytorch/pytorch/blob/v2.5.1/torch/_dynamo/config.py#L42-L49
+        torch._dynamo.config.cache_size_limit = 128
+        torch._dynamo.config.accumulated_cache_size_limit = 2048
+        torch._dynamo.config.capture_scalar_outputs = True
+        torch._dynamo.config.suppress_errors = True
+
+        # Compile module components
+        # skip compilation of `_encode_prompt` since it sometimes tiggger SymInt errors
+        # self._encode_prompt = clone_output_wrapper(
+        #     torch.compile(self._encode_prompt, fullgraph=True, mode="max-autotune")
+        # )
+
+        ## Compile SAM3 model components
+        self.detector.backbone.vision_backbone.forward = clone_output_wrapper(
+            torch.compile(
+                self.detector.backbone.vision_backbone.forward,
+                fullgraph=True,
+                mode="max-autotune",
+            )
+        )
+        self.detector.transformer.encoder.forward = clone_output_wrapper(
+            torch.compile(
+                self.detector.transformer.encoder.forward,
+                fullgraph=True,
+                mode="max-autotune",
+            )
+        )
+        self.detector.transformer.decoder.forward = clone_output_wrapper(
+            torch.compile(
+                self.detector.transformer.decoder.forward,
+                fullgraph=True,
+                mode="max-autotune",
+                dynamic=False,
+            )
+        )
+
+        self.detector.segmentation_head.forward = clone_output_wrapper(
+            torch.compile(
+                self.detector.segmentation_head.forward,
+                fullgraph=True,
+                mode="max-autotune",
+            )
+        )
+
+        ## Compile Tracker model components
+        self.tracker.maskmem_backbone.forward = compile_wrapper(
+            self.tracker.maskmem_backbone.forward,
+            mode="max-autotune",
+            fullgraph=True,
+            dynamic=False,
+        )
+
+        self.tracker.transformer.encoder.forward = shape_logging_wrapper(
+            compile_wrapper(
+                self.tracker.transformer.encoder.forward,
+                mode="max-autotune-no-cudagraphs",
+                fullgraph=True,
+                dynamic=True,
+            ),
+            keep_kwargs=["src", "src_pos", "prompt", "prompt_pos"],
+        )
+
+        self.tracker.sam_mask_decoder.forward = compile_wrapper(
+            self.tracker.sam_mask_decoder.forward,
+            mode="max-autotune",
+            fullgraph=True,
+            dynamic=False,  # Accuracy regression on True
+        )
+
+        self._model_is_compiled = True
+
+    def _warm_up_vg_propagation(self, inference_state, start_frame_idx=0):
+        # use different tracking score thresholds for each round to simulate different number of output objects
+        num_objects_list = range(self.num_obj_for_compile + 1)
+        new_det_score_thresh_list = [0.3, 0.5, 0.7]
+        num_rounds = len(new_det_score_thresh_list)
+        orig_new_det_thresh = self.new_det_thresh
+
+        for i, thresh in enumerate(new_det_score_thresh_list):
+            self.new_det_thresh = thresh
+            for num_objects in num_objects_list:
+                logger.info(f"{i+1}/{num_rounds} warming up model compilation")
+                self.add_prompt(
+                    inference_state, frame_idx=start_frame_idx, text_str="cat"
+                )
+                logger.info(
+                    f"{i+1}/{num_rounds} warming up model compilation -- simulating {num_objects}/{self.num_obj_for_compile} objects"
+                )
+                inference_state = self.add_fake_objects_to_inference_state(
+                    inference_state, num_objects, frame_idx=start_frame_idx
+                )
+                inference_state["tracker_metadata"]["rank0_metadata"].update(
+                    {
+                        "masklet_confirmation": {
+                            "status": np.zeros(num_objects, dtype=np.int64),
+                            "consecutive_det_num": np.zeros(
+                                num_objects, dtype=np.int64
+                            ),
+                        }
+                    }
+                )
+                for _ in self.propagate_in_video(
+                    inference_state, start_frame_idx, reverse=False
+                ):
+                    pass
+                for _ in self.propagate_in_video(
+                    inference_state, start_frame_idx, reverse=True
+                ):
+                    pass
+                self.reset_state(inference_state)
+                logger.info(
+                    f"{i+1}/{num_rounds} warming up model compilation -- completed round {i+1} out of {num_rounds}"
+                )
+
+        # Warm up Tracker memory encoder with varying input shapes
+        num_iters = 3
+        feat_size = self.tracker.sam_image_embedding_size**2  # 72 * 72 = 5184
+        hidden_dim = self.tracker.hidden_dim  # 256
+        mem_dim = self.tracker.mem_dim  # 64
+        for _ in tqdm(range(num_iters)):
+            for b in range(1, self.num_obj_for_compile + 1):
+                for i in range(
+                    1,
+                    self.tracker.max_cond_frames_in_attn + self.tracker.num_maskmem,
+                ):
+                    for j in range(
+                        self.tracker.max_cond_frames_in_attn
+                        + self.tracker.max_obj_ptrs_in_encoder
+                    ):
+                        num_obj_ptr_tokens = (hidden_dim // mem_dim) * j
+                        src = torch.randn(feat_size, b, hidden_dim, device=self.device)
+                        src_pos = torch.randn(
+                            feat_size, b, hidden_dim, device=self.device
+                        )
+                        prompt = torch.randn(
+                            feat_size * i + num_obj_ptr_tokens,
+                            b,
+                            mem_dim,
+                            device=self.device,
+                        )
+                        prompt_pos = torch.randn(
+                            feat_size * i + num_obj_ptr_tokens,
+                            b,
+                            mem_dim,
+                            device=self.device,
+                        )
+
+                        self.tracker.transformer.encoder.forward(
+                            src=src,
+                            src_pos=src_pos,
+                            prompt=prompt,
+                            prompt_pos=prompt_pos,
+                            num_obj_ptr_tokens=num_obj_ptr_tokens,
+                        )
+
+        self.new_det_thresh = orig_new_det_thresh
+        return inference_state
+
+    def add_fake_objects_to_inference_state(
+        self, inference_state, num_objects, frame_idx
+    ):
+        new_det_obj_ids_local = np.arange(num_objects)
+        high_res_H, high_res_W = (
+            self.tracker.maskmem_backbone.mask_downsampler.interpol_size
+        )
+        new_det_masks = torch.ones(
+            len(new_det_obj_ids_local), high_res_H, high_res_W
+        ).to(self.device)
+
+        inference_state["tracker_inference_states"] = self._tracker_add_new_objects(
+            frame_idx=frame_idx,
+            num_frames=inference_state["num_frames"],
+            new_obj_ids=new_det_obj_ids_local,
+            new_obj_masks=new_det_masks,
+            tracker_states_local=inference_state["tracker_inference_states"],
+            orig_vid_height=inference_state["orig_height"],
+            orig_vid_width=inference_state["orig_width"],
+            feature_cache=inference_state["feature_cache"],
+        )
+
+        # Synthesize obj_id_to_mask data for cached_frame_outputs to support _build_tracker_output during warmup
+        obj_id_to_mask = {}
+        if num_objects > 0:
+            H_video = inference_state["orig_height"]
+            W_video = inference_state["orig_width"]
+
+            video_res_masks = F.interpolate(
+                new_det_masks.unsqueeze(1),  # Add channel dimension for interpolation
+                size=(H_video, W_video),
+                mode="bilinear",
+                align_corners=False,
+            )  # (num_objects, 1, H_video, W_video)
+            for i, obj_id in enumerate(new_det_obj_ids_local):
+                obj_id_to_mask[obj_id] = (video_res_masks[i] > 0.0).to(torch.bool)
+        if self.rank == 0:
+            for fidx in range(inference_state["num_frames"]):
+                self._cache_frame_outputs(inference_state, fidx, obj_id_to_mask)
+
+        inference_state["tracker_metadata"].update(
+            {
+                "obj_ids_per_gpu": [np.arange(num_objects)],
+                "obj_ids_all_gpu": np.arange(num_objects),  # Same as 1 GPU
+                "num_obj_per_gpu": [num_objects],
+                "obj_id_to_score": {i: 1.0 for i in range(num_objects)},
+                "max_obj_id": num_objects,
+                "rank0_metadata": {
+                    "masklet_confirmation": {
+                        "status": np.zeros(num_objects, dtype=np.int64),
+                        "consecutive_det_num": np.zeros(num_objects, dtype=np.int64),
+                    },
+                    "removed_obj_ids": set(),
+                    "suppressed_obj_ids": defaultdict(set),
+                },
+            }
+        )
+        return inference_state
+
+    @torch.inference_mode()
+    @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
+    def warm_up_compilation(self):
+        """
+        Warm up the model by running a dummy inference to compile the model. This is
+        useful to avoid the compilation overhead in the first inference call.
+        """
+        if not self.compile_model:
+            return
+        self._warm_up_complete = False
+        if self.device.type != "cuda":
+            raise RuntimeError(
+                f"The model must be on CUDA for warm-up compilation, got {self.device=}."
+            )
+
+        # temporally set to single GPU temporarily for warm-up compilation
+        orig_rank = self.rank
+        orig_world_size = self.world_size
+        self.rank = self.detector.rank = 0
+        self.world_size = self.detector.world_size = 1
+        orig_recondition_every_nth_frame = self.recondition_every_nth_frame
+        # self.recondition_every_nth_frame = 2
+
+        # Get a random video
+        inference_state = self.init_state(resource_path="<load-dummy-video-30>")
+        start_frame_idx = 0
+
+        # Run basic propagation warm-up
+        inference_state = self._warm_up_vg_propagation(inference_state, start_frame_idx)
+
+        logger.info("Warm-up compilation completed.")
+
+        # revert to the original GPU and rank
+        self.rank = self.detector.rank = orig_rank
+        self.world_size = self.detector.world_size = orig_world_size
+        self.recondition_every_nth_frame = orig_recondition_every_nth_frame
+        self._warm_up_complete = True
+        self.tracker.transformer.encoder.forward.set_logging(True)
+
+    @torch.inference_mode()
+    def add_prompt(
+        self,
+        inference_state,
+        frame_idx,
+        text_str=None,
+        boxes_xywh=None,
+        box_labels=None,
+    ):
+        """
+        Add text, point or box prompts on a single frame. This method returns the inference
+        outputs only on the prompted frame.
+
+        Note that text prompts are NOT associated with a particular frame (i.e. they apply
+        to all frames). However, we only run inference on the frame specified in `frame_idx`.
+        """
+        logger.debug("Running add_prompt on frame %d", frame_idx)
+
+        num_frames = inference_state["num_frames"]
+        assert (
+            text_str is not None or boxes_xywh is not None
+        ), "at least one type of prompt (text, boxes) must be provided"
+        assert (
+            0 <= frame_idx < num_frames
+        ), f"{frame_idx=} is out of range for a total of {num_frames} frames"
+
+        # since it's a semantic prompt, we start over
+        self.reset_state(inference_state)
+
+        # 1) add text prompt
+        if text_str is not None and text_str != "visual":
+            inference_state["text_prompt"] = text_str
+            inference_state["input_batch"].find_text_batch[0] = text_str
+            text_id = self.TEXT_ID_FOR_TEXT
+        else:
+            inference_state["text_prompt"] = None
+            inference_state["input_batch"].find_text_batch[0] = "<text placeholder>"
+            text_id = self.TEXT_ID_FOR_VISUAL
+        for t in range(inference_state["num_frames"]):
+            inference_state["input_batch"].find_inputs[t].text_ids[...] = text_id
+
+        # 2) handle box prompt
+        assert (boxes_xywh is not None) == (box_labels is not None)
+        if boxes_xywh is not None:
+            boxes_xywh = torch.as_tensor(boxes_xywh, dtype=torch.float32)
+            box_labels = torch.as_tensor(box_labels, dtype=torch.long)
+            # input boxes are expected to be [xmin, ymin, width, height] format
+            # in normalized coordinates of range 0~1, similar to FA
+            assert boxes_xywh.dim() == 2
+            assert boxes_xywh.size(0) > 0 and boxes_xywh.size(-1) == 4
+            assert box_labels.dim() == 1 and box_labels.size(0) == boxes_xywh.size(0)
+            boxes_cxcywh = box_xywh_to_cxcywh(boxes_xywh)
+            assert (boxes_xywh >= 0).all().item() and (boxes_xywh <= 1).all().item()
+            assert (boxes_cxcywh >= 0).all().item() and (boxes_cxcywh <= 1).all().item()
+
+            new_box_input = boxes_cxcywh, box_labels
+            inference_state["per_frame_raw_box_input"][frame_idx] = new_box_input
+
+            # handle the case of visual prompt (also added as an input box from the UI)
+            boxes_cxcywh, box_labels, geometric_prompt = self._get_visual_prompt(
+                inference_state, frame_idx, boxes_cxcywh, box_labels
+            )
+
+            inference_state["per_frame_geometric_prompt"][frame_idx] = geometric_prompt
+
+        out = self._run_single_frame_inference(
+            inference_state, frame_idx, reverse=False
+        )
+        return frame_idx, self._postprocess_output(inference_state, out)
+
+    @torch.autocast(device_type="cuda", dtype=torch.bfloat16)
+    def forward(self, input: BatchedDatapoint, is_inference: bool = False):
+        """This method is only used for benchmark eval (not used in the demo)."""
+        # set the model to single GPU for benchmark evaluation (to be compatible with trainer)
+        orig_rank = self.rank
+        orig_world_size = self.world_size
+        self.rank = self.detector.rank = 0
+        self.world_size = self.detector.world_size = 1
+
+        # get data
+        text_prompt_ids = input.find_metadatas[0].original_category_id
+        text_prompt_list = input.find_text_batch
+
+        # loop over txt prompts
+        tracking_res = defaultdict(dict)  # frame_idx --> {obj_id: mask}
+        scores_labels = defaultdict(tuple)  # obj_id --> (score, text_prompt_id)
+        inference_state = self.init_state(resource_path=input.raw_images)
+        for prompt_id, prompt in zip(text_prompt_ids, text_prompt_list):
+            self.add_prompt(inference_state, frame_idx=0, text_str=prompt)
+            start_obj_id = max(scores_labels.keys(), default=-1) + 1  # prev max + 1
+
+            # propagate the prompts
+            obj_ids_this_prompt = set()
+            for frame_idx, out in self.propagate_in_video(
+                inference_state,
+                start_frame_idx=0,
+                max_frame_num_to_track=inference_state["num_frames"],
+                reverse=False,
+            ):
+                current_frame_res = tracking_res[frame_idx]
+                for obj_id, mask in zip(out["out_obj_ids"], out["out_binary_masks"]):
+                    mask_tensor = torch.tensor(mask[None], dtype=torch.bool)
+                    current_frame_res[obj_id + start_obj_id] = mask_tensor
+                obj_ids_this_prompt.update(current_frame_res.keys())
+
+            obj_id_to_score = inference_state["tracker_metadata"]["obj_id_to_score"]
+            for obj_id, score in obj_id_to_score.items():
+                if obj_id + start_obj_id in obj_ids_this_prompt:
+                    score_tensor = torch.tensor(score, dtype=torch.float32)
+                    scores_labels[obj_id + start_obj_id] = (score_tensor, prompt_id)
+
+            self.reset_state(inference_state)
+
+        video_id = input.find_metadatas[0].original_image_id[0].cpu().item()
+        preds = self.prep_for_evaluator(input.raw_images, tracking_res, scores_labels)
+
+        # revert the model to the original GPU and rank
+        self.rank = self.detector.rank = orig_rank
+        self.world_size = self.detector.world_size = orig_world_size
+        return {video_id: preds}
+
+    def back_convert(self, targets):
+        # Needed for retraining compatibility with trainer
+        return targets
+
+
+class Sam3VideoInferenceWithInstanceInteractivity(Sam3VideoInference):
+    def __init__(
+        self,
+        use_prev_mem_frame=False,
+        use_stateless_refinement=False,
+        refinement_detector_cond_frame_removal_window=16,
+        **kwargs,
+    ):
+        """
+        use_prev_mem_frame: bool, whether to condition on previous memory frames for adding points
+        use_stateless_refinement: bool, whether to enable stateless refinement behavior
+        refinement_detector_cond_frame_removal_window: int, we remove a detector conditioning frame if it
+            is within this many frames of a user refined frame. Set to a large value (e.g. 10000) to
+            always remove detector conditioning frames if there is any user refinement in the video.
+        """
+        super().__init__(**kwargs)
+        self.use_prev_mem_frame = use_prev_mem_frame
+        self.use_stateless_refinement = use_stateless_refinement
+        self.refinement_detector_cond_frame_removal_window = (
+            refinement_detector_cond_frame_removal_window
+        )
+
+    def _init_new_tracker_state(self, inference_state):
+        return self.tracker.init_state(
+            cached_features=inference_state["feature_cache"],
+            video_height=inference_state["orig_height"],
+            video_width=inference_state["orig_width"],
+            num_frames=inference_state["num_frames"],
+        )
+
+    @torch.inference_mode()
+    def propagate_in_video(
+        self,
+        inference_state,
+        start_frame_idx=None,
+        max_frame_num_to_track=None,
+        reverse=False,
+    ):
+        # step 1: check which type of propagation to run, should be the same for all GPUs.
+        propagation_type, obj_ids = self.parse_action_history_for_propagation(
+            inference_state
+        )
+        self.add_action_history(
+            inference_state,
+            action_type=propagation_type,
+            obj_ids=obj_ids,
+            frame_idx=start_frame_idx,
+        )
+
+        # step 2: run full VG propagation
+        if propagation_type == "propagation_full":
+            logger.debug(f"Running full VG propagation (reverse={reverse}).")
+            yield from super().propagate_in_video(
+                inference_state,
+                start_frame_idx=start_frame_idx,
+                max_frame_num_to_track=max_frame_num_to_track,
+                reverse=reverse,
+            )
+            return
+
+        # step 3: run Tracker partial propagation or direct fetch existing predictions
+        assert propagation_type in ["propagation_partial", "propagation_fetch"]
+        logger.debug(
+            f"Running Tracker propagation for objects {obj_ids} and merging it with existing VG predictions (reverse={reverse})."
+            if propagation_type == "propagation_partial"
+            else f"Fetching existing VG predictions without running any propagation (reverse={reverse})."
+        )
+        processing_order, _ = self._get_processing_order(
+            inference_state,
+            start_frame_idx=start_frame_idx,
+            max_frame_num_to_track=max_frame_num_to_track,
+            reverse=reverse,
+        )
+
+        tracker_metadata = inference_state["tracker_metadata"]
+
+        # if fetch just return from output
+        if propagation_type == "propagation_fetch":
+            for frame_idx in tqdm(processing_order):
+                if self.rank == 0:
+                    obj_id_to_mask = inference_state["cached_frame_outputs"].get(
+                        frame_idx, {}
+                    )
+                    # post processing - remove suppressed obj_ids
+                    obj_id_to_score = tracker_metadata["obj_id_to_score"]
+                    suppressed_obj_ids = tracker_metadata["rank0_metadata"][
+                        "suppressed_obj_ids"
+                    ][frame_idx]
+                    obj_id_to_tracker_score = tracker_metadata[
+                        "obj_id_to_tracker_score_frame_wise"
+                    ][frame_idx]
+
+                    out = {
+                        "obj_id_to_mask": obj_id_to_mask,
+                        "obj_id_to_score": obj_id_to_score,
+                        "obj_id_to_tracker_score": obj_id_to_tracker_score,
+                    }
+                    yield (
+                        frame_idx,
+                        self._postprocess_output(
+                            inference_state, out, suppressed_obj_ids=suppressed_obj_ids
+                        ),
+                    )
+                else:
+                    yield frame_idx, None
+
+            return
+
+        # get Tracker inference states containing selected obj_ids
+        if propagation_type == "propagation_partial":
+            # can be empty for GPUs where objects are not in their inference states
+            tracker_states_local = self._get_tracker_inference_states_by_obj_ids(
+                inference_state, obj_ids
+            )
+            for tracker_state in tracker_states_local:
+                self.tracker.propagate_in_video_preflight(
+                    tracker_state, run_mem_encoder=True
+                )
+
+        for frame_idx in tqdm(processing_order):
+            # run Tracker propagation
+            if propagation_type == "propagation_partial":
+                self._prepare_backbone_feats(inference_state, frame_idx, reverse)
+                obj_ids_local, low_res_masks_local, tracker_scores_local = (
+                    self._propogate_tracker_one_frame_local_gpu(
+                        tracker_states_local,
+                        frame_idx=frame_idx,
+                        reverse=reverse,
+                        run_mem_encoder=True,
+                    )
+                )
+
+                # broadcast refined object tracker scores and masks to all GPUs
+                # handle multiple objects that can be located on different GPUs
+                refined_obj_data = {}  # obj_id -> (score, mask_video_res)
+
+                # Collect data for objects on this GPU
+                local_obj_data = {}
+                for obj_id in obj_ids:
+                    obj_rank = self._get_gpu_id_by_obj_id(inference_state, obj_id)
+                    if self.rank == obj_rank and obj_id in obj_ids_local:
+                        refined_obj_idx = obj_ids_local.index(obj_id)
+                        refined_mask_low_res = low_res_masks_local[
+                            refined_obj_idx
+                        ]  # (H_low_res, W_low_res)
+                        refined_score = tracker_scores_local[refined_obj_idx]
+
+                        # Keep low resolution for broadcasting to reduce communication cost
+                        local_obj_data[obj_id] = (refined_score, refined_mask_low_res)
+
+                # Broadcast data from each GPU that has refined objects
+                if self.world_size > 1:
+                    for obj_id in obj_ids:
+                        obj_rank = self._get_gpu_id_by_obj_id(inference_state, obj_id)
+                        if self.rank == obj_rank:
+                            # This GPU has the object, broadcast its data
+                            data_to_broadcast = local_obj_data.get(obj_id, None)
+                            data_list = [
+                                (data_to_broadcast[0].cpu(), data_to_broadcast[1].cpu())
+                            ]
+                            self.broadcast_python_obj_cpu(data_list, src=obj_rank)
+                            if data_to_broadcast is not None:
+                                refined_obj_data[obj_id] = data_to_broadcast
+                        elif self.rank != obj_rank:
+                            # This GPU doesn't have the object, receive data
+                            data_list = [None]
+                            self.broadcast_python_obj_cpu(data_list, src=obj_rank)
+                            refined_obj_data[obj_id] = (
+                                data_list[0][0].to(self.device),
+                                data_list[0][1].to(self.device),
+                            )
+                else:
+                    # Single GPU case
+                    refined_obj_data = local_obj_data
+
+                # Update Tracker scores for all refined objects
+                for obj_id, (refined_score, _) in refined_obj_data.items():
+                    tracker_metadata["obj_id_to_tracker_score_frame_wise"][
+                        frame_idx
+                    ].update({obj_id: refined_score.item()})
+
+                if self.rank == 0:
+                    # get predictions from Tracker inference states, it includes the original
+                    # VG predictions and the refined predictions from interactivity.
+
+                    # Prepare refined masks dictionary - upscale to video resolution after broadcast
+                    refined_obj_id_to_mask = {}
+                    for obj_id, (_, refined_mask_low_res) in refined_obj_data.items():
+                        refined_mask_video_res = (
+                            self._convert_low_res_mask_to_video_res(
+                                refined_mask_low_res, inference_state
+                            )
+                        )  # (1, H_video, W_video) bool
+                        refined_obj_id_to_mask[obj_id] = refined_mask_video_res
+
+                    obj_id_to_mask = self._build_tracker_output(
+                        inference_state, frame_idx, refined_obj_id_to_mask
+                    )
+                    out = {
+                        "obj_id_to_mask": obj_id_to_mask,
+                        "obj_id_to_score": tracker_metadata["obj_id_to_score"],
+                        "obj_id_to_tracker_score": tracker_metadata[
+                            "obj_id_to_tracker_score_frame_wise"
+                        ][frame_idx],
+                    }
+                    suppressed_obj_ids = tracker_metadata["rank0_metadata"][
+                        "suppressed_obj_ids"
+                    ][frame_idx]
+                    self._cache_frame_outputs(
+                        inference_state,
+                        frame_idx,
+                        obj_id_to_mask,
+                        suppressed_obj_ids=suppressed_obj_ids,
+                    )
+                    suppressed_obj_ids = tracker_metadata["rank0_metadata"][
+                        "suppressed_obj_ids"
+                    ][frame_idx]
+                    yield (
+                        frame_idx,
+                        self._postprocess_output(
+                            inference_state, out, suppressed_obj_ids=suppressed_obj_ids
+                        ),
+                    )
+                else:
+                    yield frame_idx, None
+
+    def add_action_history(
+        self, inference_state, action_type, frame_idx=None, obj_ids=None
+    ):
+        """
+        action_history is used to automatically decide what to do during propagation.
+        action_type: one of ["add", "remove", "refine"] + ["propagation_full", "propagation_partial", "propagation_fetch"]
+        """
+        instance_actions = ["add", "remove", "refine"]
+        propagation_actions = [
+            "propagation_full",
+            "propagation_partial",
+            "propagation_fetch",
+        ]
+        assert (
+            action_type in instance_actions + propagation_actions
+        ), f"Invalid action type: {action_type}, must be one of {instance_actions + propagation_actions}"
+        action = {
+            "type": action_type,
+            "frame_idx": frame_idx,
+            "obj_ids": obj_ids,
+        }
+        inference_state["action_history"].append(action)
+
+    def _has_object_been_refined(self, inference_state, obj_id):
+        action_history = inference_state["action_history"]
+        for action in action_history:
+            if action["type"] in ["add", "refine"] and action.get("obj_ids"):
+                if obj_id in action["obj_ids"]:
+                    return True
+        return False
+
+    def parse_action_history_for_propagation(self, inference_state):
+        """
+        Parse the actions in history before the last propagation and prepare for the next propagation.
+        We support multiple actions (add/remove/refine) between two propagations. If we had an action
+        history similar to this ["propagate", "add", "refine", "remove", "add"], the next propagation
+        would remove the removed object, and also propagate the two added/refined objects.
+
+        Returns:
+            propagation_type: one of ["propagation_full", "propagation_partial", "propagation_fetch"]
+                - "propagation_full": run VG propagation for all objects
+                - "propagation_partial": run Tracker propagation for selected objects, useful for add/refine actions
+                - "propagation_fetch": fetch existing VG predictions without running any propagation
+            obj_ids: list of object ids to run Tracker propagation on if propagation_type is "propagation_partial".
+        """
+        action_history = inference_state["action_history"]
+        if len(action_history) == 0:
+            # we run propagation for the first time
+            return "propagation_full", None
+
+        if "propagation" in action_history[-1]["type"]:
+            if action_history[-1]["type"] in ["propagation_fetch"]:
+                # last propagation is direct fetch, we fetch existing predictions
+                return "propagation_fetch", None
+            elif action_history[-1]["type"] in [
+                "propagation_partial",
+                "propagation_full",
+            ]:
+                # we do fetch prediction if we have already run propagation twice or we have run
+                # propagation once and it is from the first frame or last frame.
+                if (
+                    len(action_history) > 1
+                    and action_history[-2]["type"]
+                    in ["propagation_partial", "propagation_full"]
+                ) or action_history[-1]["frame_idx"] in [
+                    0,
+                    inference_state["num_frames"] - 1,
+                ]:
+                    # we have run both forward and backward partial/full propagation
+                    return "propagation_fetch", None
+                else:
+                    # we have run partial/full forward or backward propagation once, need run it for the rest of the frames
+                    return action_history[-1]["type"], action_history[-1]["obj_ids"]
+
+        # parse actions since last propagation
+        obj_ids = []
+        for action in action_history[::-1]:
+            if "propagation" in action["type"]:
+                # we reached the last propagation action, stop parsing
+                break
+            if action["type"] in ["add", "refine"]:
+                obj_ids.extend(action["obj_ids"])
+            # else action["type"] == "remove": noop
+        obj_ids = list(set(obj_ids)) if len(obj_ids) > 0 else None
+        propagation_type = (
+            "propagation_partial" if obj_ids is not None else "propagation_fetch"
+        )
+        return propagation_type, obj_ids
+
+    def remove_object(self, inference_state, obj_id, is_user_action=False):
+        """
+        We try to remove object from tracker states on every GPU, it will do nothing
+        for states without this object.
+        """
+        obj_rank = self._get_gpu_id_by_obj_id(inference_state, obj_id)
+        assert obj_rank is not None, f"Object {obj_id} not found in any GPU."
+
+        tracker_states_local = inference_state["tracker_inference_states"]
+        if self.rank == obj_rank:
+            self._tracker_remove_object(tracker_states_local, obj_id)
+
+        if is_user_action:
+            self.add_action_history(
+                inference_state, action_type="remove", obj_ids=[obj_id]
+            )
+
+        # update metadata
+        tracker_metadata = inference_state["tracker_metadata"]
+        _obj_ids = tracker_metadata["obj_ids_per_gpu"][obj_rank]
+        tracker_metadata["obj_ids_per_gpu"][obj_rank] = _obj_ids[_obj_ids != obj_id]
+        tracker_metadata["num_obj_per_gpu"][obj_rank] = len(
+            tracker_metadata["obj_ids_per_gpu"][obj_rank]
+        )
+        tracker_metadata["obj_ids_all_gpu"] = np.concatenate(
+            tracker_metadata["obj_ids_per_gpu"]
+        )
+        tracker_metadata["obj_id_to_score"].pop(obj_id, None)
+        # tracker_metadata["max_obj_id"] # we do not reuse the object id, so we do not update it here
+
+        # Clean up cached frame outputs to remove references to the deleted object
+        if "cached_frame_outputs" in inference_state:
+            for frame_idx in inference_state["cached_frame_outputs"]:
+                frame_cache = inference_state["cached_frame_outputs"][frame_idx]
+                if obj_id in frame_cache:
+                    del frame_cache[obj_id]
+
+    def _get_gpu_id_by_obj_id(self, inference_state, obj_id):
+        """
+        Locate GPU ID for a given object.
+        """
+        obj_ids_per_gpu = inference_state["tracker_metadata"]["obj_ids_per_gpu"]
+        for rank, obj_ids in enumerate(obj_ids_per_gpu):
+            if obj_id in obj_ids:
+                return rank
+        return None  # object not found in any GPU
+
+    def _get_tracker_inference_states_by_obj_ids(self, inference_state, obj_ids):
+        """
+        Get the Tracker inference states that contain the given object ids.
+        This is used to run partial Tracker propagation on a single object/bucket.
+        Possibly multiple or zero states can be returned.
+        """
+        states = [
+            state
+            for state in inference_state["tracker_inference_states"]
+            if set(obj_ids) & set(state["obj_ids"])
+        ]
+        return states
+
+    def _prepare_backbone_feats(self, inference_state, frame_idx, reverse):
+        input_batch = inference_state["input_batch"]
+        feature_cache = inference_state["feature_cache"]
+        num_frames = inference_state["num_frames"]
+        geometric_prompt = (
+            inference_state["constants"]["empty_geometric_prompt"]
+            if inference_state["per_frame_geometric_prompt"][frame_idx] is None
+            else inference_state["per_frame_geometric_prompt"][frame_idx]
+        )
+        _ = self.run_backbone_and_detection(
+            frame_idx=frame_idx,
+            num_frames=num_frames,
+            input_batch=input_batch,
+            geometric_prompt=geometric_prompt,
+            feature_cache=feature_cache,
+            reverse=reverse,
+            allow_new_detections=True,
+        )
+
+    @torch.inference_mode()
+    def add_prompt(
+        self,
+        inference_state,
+        frame_idx,
+        text_str=None,
+        boxes_xywh=None,
+        box_labels=None,
+        points=None,
+        point_labels=None,
+        obj_id=None,
+        rel_coordinates=True,
+    ):
+        if points is not None:
+            # Tracker instance prompts
+            assert (
+                text_str is None and boxes_xywh is None
+            ), "When points are provided, text_str and boxes_xywh must be None."
+            assert (
+                obj_id is not None
+            ), "When points are provided, obj_id must be provided."
+            return self.add_tracker_new_points(
+                inference_state,
+                frame_idx,
+                obj_id=obj_id,
+                points=points,
+                labels=point_labels,
+                rel_coordinates=rel_coordinates,
+                use_prev_mem_frame=self.use_prev_mem_frame,
+            )
+        else:
+            # SAM3 prompts
+            return super().add_prompt(
+                inference_state,
+                frame_idx,
+                text_str=text_str,
+                boxes_xywh=boxes_xywh,
+                box_labels=box_labels,
+            )
+
+    @torch.inference_mode()
+    def add_tracker_new_points(
+        self,
+        inference_state,
+        frame_idx,
+        obj_id,
+        points,
+        labels,
+        rel_coordinates=True,
+        use_prev_mem_frame=False,
+    ):
+        """Add a new point prompt to Tracker. Suppporting instance refinement to existing
+        objects by passing existing obj_id or adding a new object by passing a new obj_id.
+        use_prev_mem_frame=False to disable cross attention to previous memory frames.
+        Every GPU returns the same results, and results should contain all masks including
+        these masks not refined or not added by the current user points.
+        """
+        assert obj_id is not None, "obj_id must be provided to add new points"
+        tracker_metadata = inference_state["tracker_metadata"]
+        if tracker_metadata == {}:
+            # initialize masklet metadata if it's uninitialized (empty dict)
+            tracker_metadata.update(self._initialize_metadata())
+
+        obj_rank = self._get_gpu_id_by_obj_id(inference_state, obj_id)
+
+        # prepare feature
+        self._prepare_backbone_feats(inference_state, frame_idx, reverse=False)
+
+        object_has_been_refined = self._has_object_been_refined(inference_state, obj_id)
+        if (
+            obj_rank is not None
+            and self.use_stateless_refinement
+            and not object_has_been_refined
+        ):
+            # The first time we start refinement on the object, we remove it.
+            logger.debug(
+                f"[rank={self.rank}] Removing object {obj_id} before refinement."
+            )
+            self.remove_object(inference_state, obj_id, is_user_action=False)
+            obj_rank = None
+
+        if obj_rank is None:
+            # new object, we assign it a GPU and create a new inference state if limit allows
+            num_prev_obj = np.sum(tracker_metadata["num_obj_per_gpu"])
+            if num_prev_obj >= self.max_num_objects:
+                logger.warning(
+                    f"add_tracker_new_points: cannot add a new object as we are already tracking {num_prev_obj=} "
+                    f"masklets (under {self.max_num_objects=})"
+                )
+                obj_ids = []
+                H_low_res = W_low_res = self.tracker.low_res_mask_size
+                H_video_res = inference_state["orig_height"]
+                W_video_res = inference_state["orig_width"]
+                low_res_masks = torch.zeros(0, 1, H_low_res, W_low_res)
+                video_res_masks = torch.zeros(0, 1, H_video_res, W_video_res)
+                return frame_idx, obj_ids, low_res_masks, video_res_masks
+
+            new_det_gpu_ids = self._assign_new_det_to_gpus(
+                new_det_num=1,
+                prev_workload_per_gpu=tracker_metadata["num_obj_per_gpu"],
+            )
+            obj_rank = new_det_gpu_ids[0]
+
+            # get tracker inference state for the new object
+            if self.rank == obj_rank:
+                # for batched inference, we create a new inference state
+                tracker_state = self._init_new_tracker_state(inference_state)
+                inference_state["tracker_inference_states"].append(tracker_state)
+
+            # update metadata
+            tracker_metadata["obj_ids_per_gpu"][obj_rank] = np.concatenate(
+                [
+                    tracker_metadata["obj_ids_per_gpu"][obj_rank],
+                    np.array([obj_id], dtype=np.int64),
+                ]
+            )
+            tracker_metadata["num_obj_per_gpu"][obj_rank] = len(
+                tracker_metadata["obj_ids_per_gpu"][obj_rank]
+            )
+            tracker_metadata["obj_ids_all_gpu"] = np.concatenate(
+                tracker_metadata["obj_ids_per_gpu"]
+            )
+            tracker_metadata["max_obj_id"] = max(tracker_metadata["max_obj_id"], obj_id)
+
+            logger.debug(
+                f"[rank={self.rank}] Adding new object with id {obj_id} at frame {frame_idx}."
+            )
+            self.add_action_history(
+                inference_state, "add", frame_idx=frame_idx, obj_ids=[obj_id]
+            )
+        else:
+            # existing object, for refinement
+            if self.rank == obj_rank:
+                tracker_states = self._get_tracker_inference_states_by_obj_ids(
+                    inference_state, [obj_id]
+                )
+                assert (
+                    len(tracker_states) == 1
+                ), f"[rank={self.rank}] Multiple Tracker inference states found for the same object id."
+                tracker_state = tracker_states[0]
+
+            # log
+            logger.debug(
+                f"[rank={self.rank}] Refining existing object with id {obj_id} at frame {frame_idx}."
+            )
+            self.add_action_history(
+                inference_state, "refine", frame_idx=frame_idx, obj_ids=[obj_id]
+            )
+
+        # assign higher score to added/refined object
+        tracker_metadata["obj_id_to_score"][obj_id] = 1.0
+        tracker_metadata["obj_id_to_tracker_score_frame_wise"][frame_idx][obj_id] = 1.0
+
+        if self.rank == 0:
+            rank0_metadata = tracker_metadata.get("rank0_metadata", {})
+
+            if "removed_obj_ids" in rank0_metadata:
+                rank0_metadata["removed_obj_ids"].discard(obj_id)
+
+            if "suppressed_obj_ids" in rank0_metadata:
+                for frame_id in rank0_metadata["suppressed_obj_ids"]:
+                    rank0_metadata["suppressed_obj_ids"][frame_id].discard(obj_id)
+
+            if "masklet_confirmation" in rank0_metadata:
+                obj_ids_all_gpu = tracker_metadata["obj_ids_all_gpu"]
+                obj_indices = np.where(obj_ids_all_gpu == obj_id)[0]
+                if len(obj_indices) > 0:
+                    obj_idx = obj_indices[0]
+                    if obj_idx < len(rank0_metadata["masklet_confirmation"]["status"]):
+                        rank0_metadata["masklet_confirmation"]["status"][obj_idx] = 1
+                        rank0_metadata["masklet_confirmation"]["consecutive_det_num"][
+                            obj_idx
+                        ] = self.masklet_confirmation_consecutive_det_thresh
+
+        if self.rank == obj_rank:
+            frame_idx, obj_ids, low_res_masks, video_res_masks = (
+                self.tracker.add_new_points(
+                    inference_state=tracker_state,
+                    frame_idx=frame_idx,
+                    obj_id=obj_id,
+                    points=points,
+                    labels=labels,
+                    clear_old_points=True,
+                    rel_coordinates=rel_coordinates,
+                    use_prev_mem_frame=use_prev_mem_frame,
+                )
+            )
+
+            if video_res_masks is not None and len(video_res_masks) > 0:
+                video_res_masks = fill_holes_in_mask_scores(
+                    video_res_masks,  # shape (N, 1, H_video, W_video)
+                    max_area=self.fill_hole_area,
+                    fill_holes=True,
+                    remove_sprinkles=True,
+                )
+
+            # Since the mem encoder has already run for the current input points?
+            self.tracker.propagate_in_video_preflight(
+                tracker_state, run_mem_encoder=True
+            )
+            # Clear detector conditioning frames when user clicks are received to allow
+            # model updating masks on these frames. It is a noop if user is refining on the
+            # detector conditioning frames or adding new objects.
+            self.clear_detector_added_cond_frame_in_tracker(
+                tracker_state, obj_id, frame_idx
+            )
+
+        # fetch results from states and gather across GPUs
+        # Use optimized caching approach to avoid reprocessing unmodified objects
+        if self.rank == obj_rank and len(obj_ids) > 0:
+            new_mask_data = (video_res_masks[obj_ids.index(obj_id)] > 0.0).to(
+                torch.bool
+            )
+        else:
+            new_mask_data = None
+        # Broadcast the new mask data across all ranks for consistency
+        if self.world_size > 1:
+            data_list = [new_mask_data.cpu() if new_mask_data is not None else None]
+            self.broadcast_python_obj_cpu(data_list, src=obj_rank)
+            new_mask_data = data_list[0].to(self.device)
+
+        if self.rank == 0:
+            obj_id_to_mask = self._build_tracker_output(
+                inference_state,
+                frame_idx,
+                {obj_id: new_mask_data} if new_mask_data is not None else None,
+            )
+            # post processing - remove suppressed obj_ids
+            obj_id_to_score = tracker_metadata["obj_id_to_score"]
+            suppressed_obj_ids = tracker_metadata["rank0_metadata"][
+                "suppressed_obj_ids"
+            ][frame_idx]
+            obj_id_to_tracker_score = tracker_metadata[
+                "obj_id_to_tracker_score_frame_wise"
+            ][frame_idx]
+
+            out = {
+                "obj_id_to_mask": obj_id_to_mask,
+                "obj_id_to_score": obj_id_to_score,
+                "obj_id_to_tracker_score": obj_id_to_tracker_score,
+            }
+            self._cache_frame_outputs(
+                inference_state,
+                frame_idx,
+                obj_id_to_mask,
+                suppressed_obj_ids=suppressed_obj_ids,
+            )
+            return frame_idx, self._postprocess_output(
+                inference_state, out, suppressed_obj_ids=suppressed_obj_ids
+            )
+        else:
+            return frame_idx, None  # no output on other GPUs
+
+    def _gather_obj_id_to_mask_across_gpus(self, inference_state, obj_id_to_mask_local):
+        """Gather obj_id_to_mask from all GPUs. Optionally resize the masks to the video resolution."""
+        tracker_metadata = inference_state["tracker_metadata"]
+
+        # concatenate the output masklets from all local inference states
+        H_mask = W_mask = self.tracker.low_res_mask_size
+        obj_ids_local = tracker_metadata["obj_ids_per_gpu"][self.rank]
+        low_res_masks_local = []
+        for obj_id in obj_ids_local:
+            if obj_id in obj_id_to_mask_local:
+                low_res_masks_local.append(obj_id_to_mask_local[obj_id])
+            else:
+                low_res_masks_local.append(
+                    torch.full((H_mask, W_mask), -1024.0, device=self.device)
+                )
+        if len(low_res_masks_local) > 0:
+            low_res_masks_local = torch.stack(low_res_masks_local, dim=0)  # (N, H, W)
+            assert low_res_masks_local.shape[1:] == (H_mask, W_mask)
+        else:
+            low_res_masks_local = torch.zeros(0, H_mask, W_mask, device=self.device)
+
+        # all-gather `low_res_masks_local` into `low_res_masks_global`
+        # - low_res_masks_global: Tensor -- (num_global_obj, H_mask, W_mask)
+        if self.world_size > 1:
+            low_res_masks_local = low_res_masks_local.float().contiguous()
+            low_res_masks_peers = [
+                low_res_masks_local.new_empty(num_obj, H_mask, W_mask)
+                for num_obj in tracker_metadata["num_obj_per_gpu"]
+            ]
+            dist.all_gather(low_res_masks_peers, low_res_masks_local)
+            low_res_masks_global = torch.cat(low_res_masks_peers, dim=0)
+        else:
+            low_res_masks_global = low_res_masks_local
+        return low_res_masks_global
+
+    def _convert_low_res_mask_to_video_res(self, low_res_mask, inference_state):
+        """
+        Convert a low-res mask to video resolution, matching the format expected by _build_tracker_output.
+
+        Args:
+            low_res_mask: Tensor of shape (H_low_res, W_low_res)
+            inference_state: Contains video dimensions
+
+        Returns:
+            video_res_mask: Tensor of shape (1, H_video, W_video) bool
+        """
+        if low_res_mask is None:
+            return None
+
+        # Convert to 3D for interpolation: (H_low_res, W_low_res) -> (1, H_low_res, W_low_res)
+        low_res_mask_3d = low_res_mask.unsqueeze(0).unsqueeze(0)
+
+        # Get video dimensions
+        H_video = inference_state["orig_height"]
+        W_video = inference_state["orig_width"]
+
+        video_res_mask = F.interpolate(
+            low_res_mask_3d.float(),
+            size=(H_video, W_video),
+            mode="bilinear",
+            align_corners=False,
+        )  # (1, H_video, W_video)
+
+        # Convert to boolean - already in the right shape!
+        return (video_res_mask.squeeze(0) > 0.0).to(torch.bool)
+
+    def clear_detector_added_cond_frame_in_tracker(
+        self, tracker_state, obj_id, refined_frame_idx
+    ):
+        """Clear detector added conditioning frame if it is within a predefined window
+        of the refined frame. This allow model to update masks on these frames."""
+        obj_idx = self.tracker._obj_id_to_idx(tracker_state, obj_id)
+
+        mask_only_cond_frame_indices = []
+        window = self.refinement_detector_cond_frame_removal_window
+        for frame_idx in tracker_state["mask_inputs_per_obj"][obj_idx]:
+            if frame_idx not in tracker_state["point_inputs_per_obj"][obj_idx]:
+                # clear conditioning frames within a window of the refined frame
+                if abs(frame_idx - refined_frame_idx) <= window:
+                    mask_only_cond_frame_indices.append(frame_idx)
+
+        # clear
+        if len(mask_only_cond_frame_indices) > 0:
+            for frame_idx in mask_only_cond_frame_indices:
+                # obj_ids_on_this_frame is essentially all obj_ids in the state
+                # since they are bucket batched
+                obj_ids_on_this_frame = tracker_state["obj_id_to_idx"].keys()
+                for obj_id2 in obj_ids_on_this_frame:
+                    self.tracker.clear_all_points_in_frame(
+                        tracker_state, frame_idx, obj_id2, need_output=False
+                    )
+            logger.debug(
+                f"Cleared detector mask only conditioning frames ({mask_only_cond_frame_indices}) in Tracker."
+            )
+        return
+
+
+def is_image_type(resource_path: str) -> bool:
+    if isinstance(resource_path, list):
+        return len(resource_path) == 1
+    return resource_path.lower().endswith(tuple(IMAGE_EXTS))
diff --git a/sam3/model/sam3_video_predictor.py b/sam3/model/sam3_video_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..c639e1d058eb7389aef914e0b7de27c86b693230
--- /dev/null
+++ b/sam3/model/sam3_video_predictor.py
@@ -0,0 +1,521 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import datetime
+import gc
+import multiprocessing as mp
+import os
+import queue
+import socket
+import sys
+import time
+import uuid
+from contextlib import closing
+from typing import List, Optional
+
+import psutil
+import torch
+
+from sam3.logger import get_logger
+
+logger = get_logger(__name__)
+
+
+class Sam3VideoPredictor:
+    # a global dictionary that holds all inference states for this model (key is session_id)
+    _ALL_INFERENCE_STATES = {}
+
+    def __init__(
+        self,
+        checkpoint_path=None,
+        bpe_path=None,
+        has_presence_token=True,
+        geo_encoder_use_img_cross_attn=True,
+        strict_state_dict_loading=True,
+        async_loading_frames=False,
+        video_loader_type="cv2",
+        apply_temporal_disambiguation: bool = True,
+    ):
+        self.async_loading_frames = async_loading_frames
+        self.video_loader_type = video_loader_type
+        from sam3.model_builder import build_sam3_video_model
+
+        self.model = (
+            build_sam3_video_model(
+                checkpoint_path=checkpoint_path,
+                bpe_path=bpe_path,
+                has_presence_token=has_presence_token,
+                geo_encoder_use_img_cross_attn=geo_encoder_use_img_cross_attn,
+                strict_state_dict_loading=strict_state_dict_loading,
+                apply_temporal_disambiguation=apply_temporal_disambiguation,
+            )
+            .cuda()
+            .eval()
+        )
+
+    @torch.inference_mode()
+    def handle_request(self, request):
+        """Dispatch a request based on its type."""
+        request_type = request["type"]
+        if request_type == "start_session":
+            return self.start_session(
+                resource_path=request["resource_path"],
+                session_id=request.get("session_id", None),
+            )
+        elif request_type == "add_prompt":
+            return self.add_prompt(
+                session_id=request["session_id"],
+                frame_idx=request["frame_index"],
+                text=request.get("text", None),
+                points=request.get("points", None),
+                point_labels=request.get("point_labels", None),
+                bounding_boxes=request.get("bounding_boxes", None),
+                bounding_box_labels=request.get("bounding_box_labels", None),
+                obj_id=request.get("obj_id", None),
+            )
+        elif request_type == "remove_object":
+            return self.remove_object(
+                session_id=request["session_id"],
+                obj_id=request["obj_id"],
+                is_user_action=request.get("is_user_action", True),
+            )
+        elif request_type == "reset_session":
+            return self.reset_session(session_id=request["session_id"])
+        elif request_type == "close_session":
+            return self.close_session(session_id=request["session_id"])
+        else:
+            raise RuntimeError(f"invalid request type: {request_type}")
+
+    @torch.inference_mode()
+    def handle_stream_request(self, request):
+        """Dispatch a stream request based on its type."""
+        request_type = request["type"]
+        if request_type == "propagate_in_video":
+            yield from self.propagate_in_video(
+                session_id=request["session_id"],
+                propagation_direction=request.get("propagation_direction", "both"),
+                start_frame_idx=request.get("start_frame_index", None),
+                max_frame_num_to_track=request.get("max_frame_num_to_track", None),
+            )
+        else:
+            raise RuntimeError(f"invalid request type: {request_type}")
+
+    def start_session(self, resource_path, session_id=None):
+        """
+        Start a new inference session on an image or a video. Here `resource_path`
+        can be either a path to an image file (for image inference) or an MP4 file
+        or directory with JPEG video frames (for video inference).
+
+        If `session_id` is defined, it will be used as identifier for the
+        session. If it is not defined, the start_session function will create
+        a session id and return it.
+        """
+        # get an initial inference_state from the model
+        inference_state = self.model.init_state(
+            resource_path=resource_path,
+            async_loading_frames=self.async_loading_frames,
+            video_loader_type=self.video_loader_type,
+        )
+        if not session_id:
+            session_id = str(uuid.uuid4())
+        self._ALL_INFERENCE_STATES[session_id] = {
+            "state": inference_state,
+            "session_id": session_id,
+            "start_time": time.time(),
+        }
+        logger.debug(
+            f"started new session {session_id}; {self._get_session_stats()}; "
+            f"{self._get_torch_and_gpu_properties()}"
+        )
+        return {"session_id": session_id}
+
+    def add_prompt(
+        self,
+        session_id: str,
+        frame_idx: int,
+        text: Optional[str] = None,
+        points: Optional[List[List[float]]] = None,
+        point_labels: Optional[List[int]] = None,
+        bounding_boxes: Optional[List[List[float]]] = None,
+        bounding_box_labels: Optional[List[int]] = None,
+        obj_id: Optional[int] = None,
+    ):
+        """Add text, box and/or point prompt on a specific video frame."""
+        logger.debug(
+            f"add prompt on frame {frame_idx} in session {session_id}: "
+            f"{text=}, {points=}, {point_labels=}, "
+            f"{bounding_boxes=}, {bounding_box_labels=}"
+        )
+        session = self._get_session(session_id)
+        inference_state = session["state"]
+
+        frame_idx, outputs = self.model.add_prompt(
+            inference_state=inference_state,
+            frame_idx=frame_idx,
+            text_str=text,
+            points=points,
+            point_labels=point_labels,
+            boxes_xywh=bounding_boxes,
+            box_labels=bounding_box_labels,
+            obj_id=obj_id,
+        )
+        return {"frame_index": frame_idx, "outputs": outputs}
+
+    def remove_object(
+        self,
+        session_id: str,
+        obj_id: int,
+        is_user_action: bool = True,
+    ):
+        """Remove an object from tracking."""
+        logger.debug(
+            f"remove object {obj_id} in session {session_id}: " f"{is_user_action=}"
+        )
+        session = self._get_session(session_id)
+        inference_state = session["state"]
+
+        self.model.remove_object(
+            inference_state=inference_state,
+            obj_id=obj_id,
+            is_user_action=is_user_action,
+        )
+        return {"is_success": True}
+
+    def propagate_in_video(
+        self,
+        session_id,
+        propagation_direction,
+        start_frame_idx,
+        max_frame_num_to_track,
+    ):
+        """Propagate the added prompts to get grounding results on all video frames."""
+        logger.debug(
+            f"propagate in video in session {session_id}: "
+            f"{propagation_direction=}, {start_frame_idx=}, {max_frame_num_to_track=}"
+        )
+        try:
+            session = self._get_session(session_id)
+            inference_state = session["state"]
+            if propagation_direction not in ["both", "forward", "backward"]:
+                raise ValueError(
+                    f"invalid propagation direction: {propagation_direction}"
+                )
+
+            # First doing the forward propagation
+            if propagation_direction in ["both", "forward"]:
+                for frame_idx, outputs in self.model.propagate_in_video(
+                    inference_state=inference_state,
+                    start_frame_idx=start_frame_idx,
+                    max_frame_num_to_track=max_frame_num_to_track,
+                    reverse=False,
+                ):
+                    yield {"frame_index": frame_idx, "outputs": outputs}
+            # Then doing the backward propagation (reverse in time)
+            if propagation_direction in ["both", "backward"]:
+                for frame_idx, outputs in self.model.propagate_in_video(
+                    inference_state=inference_state,
+                    start_frame_idx=start_frame_idx,
+                    max_frame_num_to_track=max_frame_num_to_track,
+                    reverse=True,
+                ):
+                    yield {"frame_index": frame_idx, "outputs": outputs}
+        finally:
+            # Log upon completion (so that e.g. we can see if two propagations happen in parallel).
+            # Using `finally` here to log even when the tracking is aborted with GeneratorExit.
+            logger.debug(
+                f"propagation ended in session {session_id}; {self._get_session_stats()}"
+            )
+
+    def reset_session(self, session_id):
+        """Reset the session to its initial state (as when it's initial opened)."""
+        logger.debug(f"reset session {session_id}")
+        session = self._get_session(session_id)
+        inference_state = session["state"]
+        self.model.reset_state(inference_state)
+        return {"is_success": True}
+
+    def close_session(self, session_id):
+        """
+        Close a session. This method is idempotent and can be called multiple
+        times on the same "session_id".
+        """
+        session = self._ALL_INFERENCE_STATES.pop(session_id, None)
+        if session is None:
+            logger.warning(
+                f"cannot close session {session_id} as it does not exist (it might have expired); "
+                f"{self._get_session_stats()}"
+            )
+        else:
+            del session
+            gc.collect()
+            logger.info(f"removed session {session_id}; {self._get_session_stats()}")
+        return {"is_success": True}
+
+    def _get_session(self, session_id):
+        session = self._ALL_INFERENCE_STATES.get(session_id, None)
+        if session is None:
+            raise RuntimeError(
+                f"Cannot find session {session_id}; it might have expired"
+            )
+        return session
+
+    def _get_session_stats(self):
+        """Get a statistics string for live sessions and their GPU usage."""
+        # print both the session ids and their video frame numbers
+        live_session_strs = [
+            f"'{session_id}' ({session['state']['num_frames']} frames)"
+            for session_id, session in self._ALL_INFERENCE_STATES.items()
+        ]
+        session_stats_str = (
+            f"live sessions: [{', '.join(live_session_strs)}], GPU memory: "
+            f"{torch.cuda.memory_allocated() // 1024**2} MiB used and "
+            f"{torch.cuda.memory_reserved() // 1024**2} MiB reserved"
+            f" (max over time: {torch.cuda.max_memory_allocated() // 1024**2} MiB used "
+            f"and {torch.cuda.max_memory_reserved() // 1024**2} MiB reserved)"
+        )
+        return session_stats_str
+
+    def _get_torch_and_gpu_properties(self):
+        """Get a string for PyTorch and GPU properties (for logging and debugging)."""
+        torch_and_gpu_str = (
+            f"torch: {torch.__version__} with CUDA arch {torch.cuda.get_arch_list()}, "
+            f"GPU device: {torch.cuda.get_device_properties(torch.cuda.current_device())}"
+        )
+        return torch_and_gpu_str
+
+    def shutdown(self):
+        """Shutdown the predictor and clear all sessions."""
+        self._ALL_INFERENCE_STATES.clear()
+
+
+class Sam3VideoPredictorMultiGPU(Sam3VideoPredictor):
+    def __init__(self, *model_args, gpus_to_use=None, **model_kwargs):
+        if gpus_to_use is None:
+            # if not specified, use only the current GPU by default
+            gpus_to_use = [torch.cuda.current_device()]
+
+        IS_MAIN_PROCESS = os.getenv("IS_MAIN_PROCESS", "1") == "1"
+        if IS_MAIN_PROCESS:
+            gpus_to_use = sorted(set(gpus_to_use))
+            logger.info(f"using the following GPU IDs: {gpus_to_use}")
+            assert len(gpus_to_use) > 0 and all(isinstance(i, int) for i in gpus_to_use)
+            assert all(0 <= i < torch.cuda.device_count() for i in gpus_to_use)
+            os.environ["MASTER_ADDR"] = "localhost"
+            os.environ["MASTER_PORT"] = f"{self._find_free_port()}"
+            os.environ["RANK"] = "0"
+            os.environ["WORLD_SIZE"] = f"{len(gpus_to_use)}"
+
+        self.gpus_to_use = gpus_to_use
+        self.rank = int(os.environ["RANK"])
+        self.world_size = int(os.environ["WORLD_SIZE"])
+        self.rank_str = f"rank={self.rank} with world_size={self.world_size}"
+        self.device = torch.device(f"cuda:{self.gpus_to_use[self.rank]}")
+        torch.cuda.set_device(self.device)
+        self.has_shutdown = False
+        if self.rank == 0:
+            logger.info("\n\n\n\t*** START loading model on all ranks ***\n\n")
+
+        logger.info(f"loading model on {self.rank_str} -- this could take a while ...")
+        super().__init__(*model_args, **model_kwargs)
+        logger.info(f"loading model on {self.rank_str} -- DONE locally")
+
+        if self.world_size > 1 and self.rank == 0:
+            # start the worker processes *after* the model is loaded in the main process
+            # so that the main process can run torch.compile and fill the cache first
+            self._start_worker_processes(*model_args, **model_kwargs)
+            for rank in range(1, self.world_size):
+                self.command_queues[rank].put(("start_nccl_process_group", None))
+            self._start_nccl_process_group()
+
+        if self.rank == 0:
+            logger.info("\n\n\n\t*** DONE loading model on all ranks ***\n\n")
+
+    @torch.inference_mode()
+    def handle_request(self, request):
+        """Dispatch a request based on its type."""
+        if self.has_shutdown:
+            raise RuntimeError(
+                "cannot handle request after the predictor has shutdown; please create a new predictor"
+            )
+
+        # when starting a session, we need to create a session id before dispatching
+        # the request to the workers
+        if request["type"] == "start_session" and request.get("session_id") is None:
+            request["session_id"] = str(uuid.uuid4())
+        # dispatch the request to all worker processes
+        if self.world_size > 1 and self.rank == 0:
+            for rank in range(1, self.world_size):
+                self.command_queues[rank].put((request, False))
+
+        response = super().handle_request(request)
+
+        if self.world_size > 1:
+            torch.distributed.barrier()  # wait for all ranks to finish
+        return response
+
+    @torch.inference_mode()
+    def handle_stream_request(self, request):
+        """Dispatch a stream request based on its type."""
+        if self.has_shutdown:
+            raise RuntimeError(
+                "cannot handle request after the predictor has shutdown; please create a new predictor"
+            )
+
+        # dispatch the request to all worker processes
+        if self.world_size > 1 and self.rank == 0:
+            for rank in range(1, self.world_size):
+                self.command_queues[rank].put((request, True))
+
+        yield from super().handle_stream_request(request)
+
+        if self.world_size > 1:
+            torch.distributed.barrier()  # wait for all ranks to finish
+
+    def _start_worker_processes(self, *model_args, **model_kwargs):
+        """Start worker processes for handling model inference."""
+        world_size = self.world_size
+        logger.info(f"spawning {world_size - 1} worker processes")
+        # Use "spawn" (instead of "fork") for different PyTorch or CUDA context
+        mp_ctx = mp.get_context("spawn")
+        self.command_queues = {rank: mp_ctx.Queue() for rank in range(1, world_size)}
+        self.result_queues = {rank: mp_ctx.Queue() for rank in range(1, world_size)}
+        parent_pid = os.getpid()
+        for rank in range(1, world_size):
+            # set the environment variables for each worker process
+            os.environ["IS_MAIN_PROCESS"] = "0"  # mark this as a worker process
+            os.environ["RANK"] = f"{rank}"
+            worker_process = mp_ctx.Process(
+                target=Sam3VideoPredictorMultiGPU._worker_process_command_loop,
+                args=(
+                    rank,
+                    world_size,
+                    self.command_queues[rank],
+                    self.result_queues[rank],
+                    model_args,
+                    model_kwargs,
+                    self.gpus_to_use,
+                    parent_pid,
+                ),
+                daemon=True,
+            )
+            worker_process.start()
+        # revert the environment variables for the main process
+        os.environ["IS_MAIN_PROCESS"] = "1"
+        os.environ["RANK"] = "0"
+        # wait for all the worker processes to load the model and collect their PIDs
+        self.worker_pids = {}
+        for rank in range(1, self.world_size):
+            # a large timeout to cover potentially long model loading time due to compilation
+            _, worker_pid = self.result_queues[rank].get(timeout=7200)
+            self.worker_pids[rank] = worker_pid
+        logger.info(f"spawned {world_size - 1} worker processes")
+
+    def _start_nccl_process_group(self):
+        rank = int(os.environ["RANK"])
+        world_size = int(os.environ["WORLD_SIZE"])
+        if world_size == 1:
+            return
+
+        logger.debug(f"starting NCCL process group on {rank=} with {world_size=}")
+        assert not torch.distributed.is_initialized()
+        # use the "env://" init method with environment variables set in start_worker_processes
+        # a short 3-min timeout to quickly detect any synchronization failures
+        timeout_sec = int(os.getenv("SAM3_COLLECTIVE_OP_TIMEOUT_SEC", "180"))
+        timeout = datetime.timedelta(seconds=timeout_sec)
+        torch.distributed.init_process_group(
+            backend="nccl",
+            init_method="env://",
+            timeout=timeout,
+            device_id=self.device,
+        )
+        # warm-up the NCCL process group by running a dummy all-reduce
+        tensor = torch.ones(1024, 1024).cuda()
+        torch.distributed.all_reduce(tensor)
+        logger.debug(f"started NCCL process group on {rank=} with {world_size=}")
+
+    def _find_free_port(self) -> int:
+        """
+        Find a free port (a random free port from 1024 to 65535 will be selected)
+        https://stackoverflow.com/questions/1365265/on-localhost-how-do-i-pick-a-free-port-number)
+        """
+        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+            s.bind(("", 0))
+            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+            return s.getsockname()[1]
+
+    @staticmethod
+    def _worker_process_command_loop(
+        rank,
+        world_size,
+        command_queue,
+        result_queue,
+        model_args,
+        model_kwargs,
+        gpus_to_use,
+        parent_pid,
+    ):
+        """
+        The command loop for each worker process. It listens to commands from the main process
+        and executes them using the model.
+        """
+        logger.info(f"starting worker process {rank=} with {world_size=}")
+        # verify that the environment variables are set correctly
+        assert int(os.environ["IS_MAIN_PROCESS"]) == 0
+        assert int(os.environ["RANK"]) == rank
+        assert int(os.environ["WORLD_SIZE"]) == world_size
+        # load the model in this worker process
+        predictor = Sam3VideoPredictorMultiGPU(
+            *model_args, gpus_to_use=gpus_to_use, **model_kwargs
+        )
+        logger.info(f"started worker {rank=} with {world_size=}")
+        # return the worker process id to the main process for bookkeeping
+        worker_pid = os.getpid()
+        result_queue.put(("load_model", worker_pid))
+
+        # wait for the command to start the NCCL process group
+        request_type, _ = command_queue.get(timeout=7200)
+        assert request_type == "start_nccl_process_group"
+        predictor._start_nccl_process_group()
+
+        # keep listening to commands from the main process
+        while True:
+            try:
+                request, is_stream_request = command_queue.get(timeout=5.0)
+                if request == "shutdown":
+                    logger.info(f"worker {rank=} shutting down")
+                    torch.distributed.destroy_process_group()
+                    result_queue.put(("shutdown", True))  # acknowledge the shutdown
+                    sys.exit(0)
+
+                logger.debug(f"worker {rank=} received request {request['type']=}")
+                if is_stream_request:
+                    for _ in predictor.handle_stream_request(request):
+                        pass  # handle stream requests in a generator fashion
+                else:
+                    predictor.handle_request(request)
+            except queue.Empty:
+                # Usually Python's multiprocessing module will shutdown all the daemon worker
+                # processes when the main process exits gracefully. However, the user may kill
+                # the main process using SIGKILL and thereby leaving no chance for the main process
+                # to clean up its daemon child processes. So here we manually check whether the
+                # parent process still exists (every 5 sec as in `command_queue.get` timeout).
+                if not psutil.pid_exists(parent_pid):
+                    logger.info(
+                        f"stopping worker {rank=} as its parent process has exited"
+                    )
+                    sys.exit(1)
+            except Exception as e:
+                logger.error(f"worker {rank=} exception: {e}", exc_info=True)
+
+    def shutdown(self):
+        """Shutdown all worker processes."""
+        if self.rank == 0 and self.world_size > 1:
+            logger.info(f"shutting down {self.world_size - 1} worker processes")
+            for rank in range(1, self.world_size):
+                self.command_queues[rank].put(("shutdown", False))
+            torch.distributed.destroy_process_group()
+            for rank in range(1, self.world_size):
+                self.result_queues[rank].get()  # wait for the worker to acknowledge
+            logger.info(f"shut down {self.world_size - 1} worker processes")
+        self.has_shutdown = True
+
+        super().shutdown()
diff --git a/sam3/model/text_encoder_ve.py b/sam3/model/text_encoder_ve.py
new file mode 100644
index 0000000000000000000000000000000000000000..b1cf145ca5eae383e0ecce8d7358570125eaadb3
--- /dev/null
+++ b/sam3/model/text_encoder_ve.py
@@ -0,0 +1,328 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from collections import OrderedDict
+from typing import Callable, List, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+from torch.utils.checkpoint import checkpoint
+
+from .model_misc import LayerScale
+
+
+class ResidualAttentionBlock(nn.Module):
+    def __init__(
+        self,
+        d_model: int,
+        n_head: int,
+        mlp_ratio: float = 4.0,
+        ls_init_value: Optional[float] = None,
+        act_layer: Callable[[], nn.Module] = nn.GELU,
+        norm_layer: Callable[[int], nn.Module] = nn.LayerNorm,
+    ):
+        super().__init__()
+        # Attention
+        self.attn = nn.MultiheadAttention(d_model, n_head, batch_first=True)
+
+        # LayerNorm, LayerScale
+        self.ln_1 = norm_layer(d_model)
+        self.ln_2 = norm_layer(d_model)
+
+        self.ls_1 = (
+            LayerScale(d_model, ls_init_value)
+            if ls_init_value is not None
+            else nn.Identity()
+        )
+        self.ls_2 = (
+            LayerScale(d_model, ls_init_value)
+            if ls_init_value is not None
+            else nn.Identity()
+        )
+
+        # MLP
+        mlp_width = int(d_model * mlp_ratio)
+        self.mlp = nn.Sequential(
+            OrderedDict(
+                [
+                    ("c_fc", nn.Linear(d_model, mlp_width)),
+                    ("gelu", act_layer()),
+                    ("c_proj", nn.Linear(mlp_width, d_model)),
+                ]
+            )
+        )
+
+    def attention(
+        self,
+        q_x: torch.Tensor,
+        k_x: Optional[torch.Tensor] = None,
+        v_x: Optional[torch.Tensor] = None,
+        attn_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        k_x = k_x if k_x is not None else q_x
+        v_x = v_x if v_x is not None else q_x
+        if attn_mask is not None:
+            # Leave boolean masks as is
+            if not attn_mask.dtype == torch.bool:
+                attn_mask = attn_mask.to(q_x.dtype)
+
+        return self.attn(q_x, k_x, v_x, need_weights=False, attn_mask=attn_mask)[0]
+
+    def forward(
+        self,
+        q_x: torch.Tensor,
+        k_x: Optional[torch.Tensor] = None,
+        v_x: Optional[torch.Tensor] = None,
+        attn_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        k_x = (
+            self.ln_1_kv(k_x) if hasattr(self, "ln_1_kv") and k_x is not None else None
+        )
+        v_x = (
+            self.ln_1_kv(v_x) if hasattr(self, "ln_1_kv") and v_x is not None else None
+        )
+        x = q_x + self.ls_1(
+            self.attention(q_x=self.ln_1(q_x), k_x=k_x, v_x=v_x, attn_mask=attn_mask)
+        )
+        x = x + self.ls_2(self.mlp(self.ln_2(x)))
+        return x
+
+
+class Transformer(nn.Module):
+    def __init__(
+        self,
+        width: int,
+        layers: int,
+        heads: int,
+        mlp_ratio: float = 4.0,
+        ls_init_value: Optional[float] = None,
+        act_layer: Callable[[], nn.Module] = nn.GELU,
+        norm_layer: Callable[[int], nn.Module] = nn.LayerNorm,
+        compile_mode: Optional[str] = None,
+        use_act_checkpoint: bool = False,
+    ):
+        super().__init__()
+        self.width = width
+        self.layers = layers
+        self.grad_checkpointing = use_act_checkpoint
+        self.resblocks = nn.ModuleList(
+            [
+                ResidualAttentionBlock(
+                    width,
+                    heads,
+                    mlp_ratio,
+                    ls_init_value=ls_init_value,
+                    act_layer=act_layer,
+                    norm_layer=norm_layer,
+                )
+                for _ in range(layers)
+            ]
+        )
+
+        if compile_mode is not None:
+            self.forward = torch.compile(
+                self.forward, mode=compile_mode, fullgraph=True
+            )
+            if self.grad_checkpointing:
+                torch._dynamo.config.optimize_ddp = False
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        attn_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        for _, r in enumerate(self.resblocks):
+            if (
+                self.grad_checkpointing
+                and not torch.jit.is_scripting()
+                and self.training
+            ):
+                x = checkpoint(r, x, None, None, attn_mask, use_reentrant=False)
+            else:
+                x = r(
+                    x,
+                    attn_mask=attn_mask,
+                )
+        return x
+
+
+def text_global_pool(
+    x: torch.Tensor, text: Optional[torch.Tensor] = None, pool_type: str = "argmax"
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    if pool_type == "first":
+        pooled, tokens = x[:, 0], x[:, 1:]
+    elif pool_type == "last":
+        pooled, tokens = x[:, -1], x[:, :-1]
+    elif pool_type == "argmax":
+        # take features from the eot embedding (eot_token is the highest number in each sequence)
+        assert text is not None
+        pooled, tokens = x[torch.arange(x.shape[0]), text.argmax(dim=-1)], x
+    else:
+        pooled = tokens = x
+    return pooled, tokens
+
+
+class TextTransformer(nn.Module):
+    def __init__(
+        self,
+        context_length: int = 77,
+        vocab_size: int = 49408,
+        width: int = 512,
+        heads: int = 8,
+        layers: int = 12,
+        mlp_ratio: float = 4.0,
+        ls_init_value: Optional[float] = None,
+        output_dim: int = 512,
+        no_causal_mask: bool = False,
+        pool_type: str = "none",  # no pooling
+        proj_bias: bool = False,
+        act_layer: Callable = nn.GELU,
+        norm_layer: Callable = nn.LayerNorm,
+        output_tokens: bool = False,
+        use_ln_post: bool = True,
+        compile_mode: Optional[str] = None,
+        use_act_checkpoint: bool = False,
+    ):
+        super().__init__()
+        assert pool_type in ("first", "last", "argmax", "none")
+        self.output_tokens = output_tokens
+        self.num_pos = self.context_length = context_length
+        self.vocab_size = vocab_size
+        self.width = width
+        self.output_dim = output_dim
+        self.heads = heads
+        self.pool_type = pool_type
+
+        self.token_embedding = nn.Embedding(self.vocab_size, width)
+        self.positional_embedding = nn.Parameter(torch.empty(self.num_pos, width))
+        self.transformer = Transformer(
+            width=width,
+            layers=layers,
+            heads=heads,
+            mlp_ratio=mlp_ratio,
+            ls_init_value=ls_init_value,
+            act_layer=act_layer,
+            norm_layer=norm_layer,
+            compile_mode=compile_mode,
+            use_act_checkpoint=use_act_checkpoint,
+        )
+        self.ln_final = norm_layer(width) if use_ln_post else nn.Identity()
+        if no_causal_mask:
+            self.attn_mask = None
+        else:
+            self.register_buffer(
+                "attn_mask", self.build_causal_mask(), persistent=False
+            )
+        if proj_bias:
+            self.text_projection = nn.Linear(width, output_dim)
+        else:
+            self.text_projection = nn.Parameter(torch.empty(width, output_dim))
+
+    def build_causal_mask(self) -> torch.Tensor:
+        # lazily create causal attention mask, with full attention between the tokens
+        # pytorch uses additive attention mask; fill with -inf
+        mask = torch.empty(self.num_pos, self.num_pos)
+        mask.fill_(float("-inf"))
+        mask.triu_(1)  # zero out the lower diagonal
+        return mask
+
+    def forward(
+        self, text: torch.Tensor
+    ) -> Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]:
+        seq_len = text.shape[1]
+        x = self.token_embedding(text)  # [batch_size, n_ctx, d_model]
+
+        attn_mask = self.attn_mask
+        if attn_mask is not None:
+            attn_mask = attn_mask[:seq_len, :seq_len]
+
+        x = x + self.positional_embedding[:seq_len]
+        x = self.transformer(x, attn_mask=attn_mask)
+
+        x = self.ln_final(x)
+        pooled, tokens = text_global_pool(x, text, pool_type=self.pool_type)
+        if self.text_projection is not None:
+            if isinstance(self.text_projection, nn.Linear):
+                pooled = self.text_projection(pooled)
+            else:
+                pooled = pooled @ self.text_projection
+        if self.output_tokens:
+            return pooled, tokens
+        return pooled
+
+
+class VETextEncoder(nn.Module):
+    def __init__(
+        self,
+        d_model: int,
+        tokenizer: Callable,
+        width: int = 1024,
+        heads: int = 16,
+        layers: int = 24,
+        context_length: int = 32,
+        vocab_size: int = 49408,
+        use_ln_post: bool = True,
+        compile_mode: Optional[str] = None,
+        use_act_checkpoint: bool = True,
+    ):
+        super().__init__()
+        self.context_length = context_length
+        self.use_ln_post = use_ln_post
+        self.tokenizer = tokenizer
+
+        self.encoder = TextTransformer(
+            context_length=self.context_length,
+            vocab_size=vocab_size,
+            width=width,
+            heads=heads,
+            layers=layers,
+            # we want the tokens, not just the pooled output
+            output_tokens=True,
+            use_ln_post=use_ln_post,
+            compile_mode=compile_mode,
+            use_act_checkpoint=use_act_checkpoint,
+        )
+        self.resizer = nn.Linear(self.encoder.width, d_model)
+
+    def forward(
+        self,
+        text: Union[List[str], Tuple[torch.Tensor, torch.Tensor, dict]],
+        input_boxes: Optional[List] = None,
+        device: torch.device = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
+        if isinstance(text[0], str):
+            # no use case for this
+            assert input_boxes is None or len(input_boxes) == 0, "not supported"
+
+            # Encode the text
+            tokenized = self.tokenizer(text, context_length=self.context_length).to(
+                device
+            )  # [b, seq_len]
+            text_attention_mask = (tokenized != 0).bool()
+
+            # manually embed the tokens
+            inputs_embeds = self.encoder.token_embedding(
+                tokenized
+            )  # [b, seq_len, d=1024]
+            _, text_memory = self.encoder(tokenized)  # [b, seq_len, d=1024]
+
+            assert text_memory.shape[1] == inputs_embeds.shape[1]
+            # Invert attention mask because its the opposite in pytorch transformer
+            text_attention_mask = text_attention_mask.ne(1)
+            # Transpose memory because pytorch's attention expects sequence first
+            text_memory = text_memory.transpose(0, 1)
+            # Resize the encoder hidden states to be of the same d_model as the decoder
+            text_memory_resized = self.resizer(text_memory)
+        else:
+            # The text is already encoded, use as is.
+            text_attention_mask, text_memory_resized, tokenized = text
+            inputs_embeds = tokenized["inputs_embeds"]
+            assert (
+                input_boxes is None or len(input_boxes) == 0
+            ), "Can't replace boxes in text if it's already encoded"
+
+        # Note that the input_embeds are returned in pytorch's convention (sequence first)
+        return (
+            text_attention_mask,
+            text_memory_resized,
+            inputs_embeds.transpose(0, 1),
+        )
diff --git a/sam3/model/tokenizer_ve.py b/sam3/model/tokenizer_ve.py
new file mode 100644
index 0000000000000000000000000000000000000000..ef42773db37e84ad8fe165e7f334dab617d47f35
--- /dev/null
+++ b/sam3/model/tokenizer_ve.py
@@ -0,0 +1,253 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+Text Tokenizer.
+
+Copied and lightly adapted from VE repo, which in turn copied
+from open_clip and openAI CLIP.
+"""
+
+import gzip
+import html
+import io
+import os
+import string
+from functools import lru_cache
+from typing import List, Optional, Union
+
+import ftfy
+import regex as re
+import torch
+from iopath.common.file_io import g_pathmgr
+
+
+# https://stackoverflow.com/q/62691279
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+DEFAULT_CONTEXT_LENGTH = 77
+
+
+@lru_cache()
+def bytes_to_unicode():
+    """
+    Returns list of utf-8 byte and a corresponding list of unicode strings.
+    The reversible bpe codes work on unicode strings.
+    This means you need a large # of unicode characters in your vocab if you want to avoid UNKs.
+    When you're at something like a 10B token dataset you end up needing around 5K for decent coverage.
+    This is a significant percentage of your normal, say, 32K bpe vocab.
+    To avoid that, we want lookup tables between utf-8 bytes and unicode strings.
+    And avoids mapping to whitespace/control characters the bpe code barfs on.
+    """
+    bs = (
+        list(range(ord("!"), ord("~") + 1))
+        + list(range(ord("¡"), ord("¬") + 1))
+        + list(range(ord("®"), ord("ÿ") + 1))
+    )
+    cs = bs[:]
+    n = 0
+    for b in range(2**8):
+        if b not in bs:
+            bs.append(b)
+            cs.append(2**8 + n)
+            n += 1
+    cs = [chr(n) for n in cs]
+    return dict(zip(bs, cs))
+
+
+def get_pairs(word):
+    """Return set of symbol pairs in a word.
+    Word is represented as tuple of symbols (symbols being variable-length strings).
+    """
+    pairs = set()
+    prev_char = word[0]
+    for char in word[1:]:
+        pairs.add((prev_char, char))
+        prev_char = char
+    return pairs
+
+
+def basic_clean(text):
+    text = ftfy.fix_text(text)
+    text = html.unescape(html.unescape(text))
+    return text.strip()
+
+
+def whitespace_clean(text):
+    text = re.sub(r"\s+", " ", text)
+    text = text.strip()
+    return text
+
+
+def _clean_canonicalize(x):
+    # basic, remove whitespace, remove punctuation, lower case
+    return canonicalize_text(basic_clean(x))
+
+
+def _clean_lower(x):
+    # basic, remove whitespace, lower case
+    return whitespace_clean(basic_clean(x)).lower()
+
+
+def _clean_whitespace(x):
+    # basic, remove whitespace
+    return whitespace_clean(basic_clean(x))
+
+
+def get_clean_fn(type: str):
+    if type == "canonicalize":
+        return _clean_canonicalize
+    elif type == "lower":
+        return _clean_lower
+    elif type == "whitespace":
+        return _clean_whitespace
+    else:
+        assert False, f"Invalid clean function ({type})."
+
+
+def canonicalize_text(text, *, keep_punctuation_exact_string=None):
+    """Returns canonicalized `text` (lowercase and punctuation removed).
+    From: https://github.com/google-research/big_vision/blob/53f18caf27a9419231bbf08d3388b07671616d3d/big_vision/evaluators/proj/image_text/prompt_engineering.py#L94
+    Args:
+      text: string to be canonicalized.
+      keep_punctuation_exact_string: If provided, then this exact string kept.
+        For example providing '{}' will keep any occurrences of '{}' (but will
+        still remove '{' and '}' that appear separately).
+    """
+    text = text.replace("_", " ")
+    if keep_punctuation_exact_string:
+        text = keep_punctuation_exact_string.join(
+            part.translate(str.maketrans("", "", string.punctuation))
+            for part in text.split(keep_punctuation_exact_string)
+        )
+    else:
+        text = text.translate(str.maketrans("", "", string.punctuation))
+    text = text.lower()
+    text = re.sub(r"\s+", " ", text)
+    return text.strip()
+
+
+class SimpleTokenizer(object):
+    def __init__(
+        self,
+        bpe_path: Union[str, os.PathLike],
+        additional_special_tokens: Optional[List[str]] = None,
+        context_length: Optional[int] = DEFAULT_CONTEXT_LENGTH,
+        clean: str = "lower",
+    ):
+        self.byte_encoder = bytes_to_unicode()
+        self.byte_decoder = {v: k for k, v in self.byte_encoder.items()}
+        with g_pathmgr.open(bpe_path, "rb") as fh:
+            bpe_bytes = io.BytesIO(fh.read())
+            merges = gzip.open(bpe_bytes).read().decode("utf-8").split("\n")
+        # merges = gzip.open(bpe_path).read().decode("utf-8").split("\n")
+        merges = merges[1 : 49152 - 256 - 2 + 1]
+        merges = [tuple(merge.split()) for merge in merges]
+        vocab = list(bytes_to_unicode().values())
+        vocab = vocab + [v + "</w>" for v in vocab]
+        for merge in merges:
+            vocab.append("".join(merge))
+        special_tokens = ["<start_of_text>", "<end_of_text>"]
+        if additional_special_tokens:
+            special_tokens += additional_special_tokens
+        vocab.extend(special_tokens)
+        self.encoder = dict(zip(vocab, range(len(vocab))))
+        self.decoder = {v: k for k, v in self.encoder.items()}
+        self.bpe_ranks = dict(zip(merges, range(len(merges))))
+        self.cache = {t: t for t in special_tokens}
+        special = "|".join(special_tokens)
+        self.pat = re.compile(
+            special + r"""|'s|'t|'re|'ve|'m|'ll|'d|[\p{L}]+|[\p{N}]|[^\s\p{L}\p{N}]+""",
+            re.IGNORECASE,
+        )
+        self.vocab_size = len(self.encoder)
+        self.all_special_ids = [self.encoder[t] for t in special_tokens]
+        self.sot_token_id = self.all_special_ids[0]
+        self.eot_token_id = self.all_special_ids[1]
+        self.context_length = context_length
+        self.clean_fn = get_clean_fn(clean)
+
+    def bpe(self, token):
+        if token in self.cache:
+            return self.cache[token]
+        word = tuple(token[:-1]) + (token[-1] + "</w>",)
+        pairs = get_pairs(word)
+        if not pairs:
+            return token + "</w>"
+        while True:
+            bigram = min(pairs, key=lambda pair: self.bpe_ranks.get(pair, float("inf")))
+            if bigram not in self.bpe_ranks:
+                break
+            first, second = bigram
+            new_word = []
+            i = 0
+            while i < len(word):
+                try:
+                    j = word.index(first, i)
+                    new_word.extend(word[i:j])
+                    i = j
+                except:
+                    new_word.extend(word[i:])
+                    break
+                if word[i] == first and i < len(word) - 1 and word[i + 1] == second:
+                    new_word.append(first + second)
+                    i += 2
+                else:
+                    new_word.append(word[i])
+                    i += 1
+            new_word = tuple(new_word)
+            word = new_word
+            if len(word) == 1:
+                break
+            else:
+                pairs = get_pairs(word)
+        word = " ".join(word)
+        self.cache[token] = word
+        return word
+
+    def encode(self, text):
+        bpe_tokens = []
+        text = self.clean_fn(text)
+        for token in re.findall(self.pat, text):
+            token = "".join(self.byte_encoder[b] for b in token.encode("utf-8"))
+            bpe_tokens.extend(
+                self.encoder[bpe_token] for bpe_token in self.bpe(token).split(" ")
+            )
+        return bpe_tokens
+
+    def decode(self, tokens):
+        text = "".join([self.decoder[token] for token in tokens])
+        text = (
+            bytearray([self.byte_decoder[c] for c in text])
+            .decode("utf-8", errors="replace")
+            .replace("</w>", " ")
+        )
+        return text
+
+    def __call__(
+        self, texts: Union[str, List[str]], context_length: Optional[int] = None
+    ) -> torch.LongTensor:
+        """Returns the tokenized representation of given input string(s)
+        Parameters
+        ----------
+        texts : Union[str, List[str]]
+            An input string or a list of input strings to tokenize
+        context_length : int
+            The context length to use; all CLIP models use 77 as the context length
+        Returns
+        -------
+        A two-dimensional tensor containing the resulting tokens, shape = [number of input strings, context_length]
+        """
+        if isinstance(texts, str):
+            texts = [texts]
+        context_length = context_length or self.context_length
+        assert context_length, "Please set a valid context length"
+        all_tokens = [
+            [self.sot_token_id] + self.encode(text) + [self.eot_token_id]
+            for text in texts
+        ]
+        result = torch.zeros(len(all_tokens), context_length, dtype=torch.long)
+        for i, tokens in enumerate(all_tokens):
+            if len(tokens) > context_length:
+                tokens = tokens[:context_length]  # Truncate
+                tokens[-1] = self.eot_token_id
+            result[i, : len(tokens)] = torch.tensor(tokens)
+        return result
diff --git a/sam3/model/utils/__init__.py b/sam3/model/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..47d98588ca232dc5bbaa771a23e5afe3e5deaa13
--- /dev/null
+++ b/sam3/model/utils/__init__.py
@@ -0,0 +1,5 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/sam3/model/utils/misc.py b/sam3/model/utils/misc.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f072067d5c91e34bd8a98beea9ff2f02e986189
--- /dev/null
+++ b/sam3/model/utils/misc.py
@@ -0,0 +1,77 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from collections import defaultdict
+from dataclasses import fields, is_dataclass
+from typing import Any, Mapping, Protocol, runtime_checkable
+
+import torch
+
+
+def _is_named_tuple(x) -> bool:
+    return isinstance(x, tuple) and hasattr(x, "_asdict") and hasattr(x, "_fields")
+
+
+@runtime_checkable
+class _CopyableData(Protocol):
+    def to(self, device: torch.device, *args: Any, **kwargs: Any):
+        """Copy data to the specified device"""
+        ...
+
+
+def copy_data_to_device(data, device: torch.device, *args: Any, **kwargs: Any):
+    """Function that recursively copies data to a torch.device.
+
+    Args:
+        data: The data to copy to device
+        device: The device to which the data should be copied
+        args: positional arguments that will be passed to the `to` call
+        kwargs: keyword arguments that will be passed to the `to` call
+
+    Returns:
+        The data on the correct device
+    """
+
+    if _is_named_tuple(data):
+        return type(data)(
+            **copy_data_to_device(data._asdict(), device, *args, **kwargs)
+        )
+    elif isinstance(data, (list, tuple)):
+        return type(data)(copy_data_to_device(e, device, *args, **kwargs) for e in data)
+    elif isinstance(data, defaultdict):
+        return type(data)(
+            data.default_factory,
+            {
+                k: copy_data_to_device(v, device, *args, **kwargs)
+                for k, v in data.items()
+            },
+        )
+    elif isinstance(data, Mapping):
+        return type(data)(
+            {
+                k: copy_data_to_device(v, device, *args, **kwargs)
+                for k, v in data.items()
+            }
+        )
+    elif is_dataclass(data) and not isinstance(data, type):
+        new_data_class = type(data)(
+            **{
+                field.name: copy_data_to_device(
+                    getattr(data, field.name), device, *args, **kwargs
+                )
+                for field in fields(data)
+                if field.init
+            }
+        )
+        for field in fields(data):
+            if not field.init:
+                setattr(
+                    new_data_class,
+                    field.name,
+                    copy_data_to_device(
+                        getattr(data, field.name), device, *args, **kwargs
+                    ),
+                )
+        return new_data_class
+    elif isinstance(data, _CopyableData):
+        return data.to(device, *args, **kwargs)
+    return data
diff --git a/sam3/model/utils/sam1_utils.py b/sam3/model/utils/sam1_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..18f0d04c91bb0e12904a58d3553b20a97b0f6f35
--- /dev/null
+++ b/sam3/model/utils/sam1_utils.py
@@ -0,0 +1,119 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import warnings
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.transforms import Normalize, Resize, ToTensor
+
+
+# Adapted from https://github.com/facebookresearch/sam2/blob/main/sam2/utils/transforms.py
+class SAM2Transforms(nn.Module):
+    def __init__(
+        self, resolution, mask_threshold, max_hole_area=0.0, max_sprinkle_area=0.0
+    ):
+        """
+        Transforms for SAM2.
+        """
+        super().__init__()
+        self.resolution = resolution
+        self.mask_threshold = mask_threshold
+        self.max_hole_area = max_hole_area
+        self.max_sprinkle_area = max_sprinkle_area
+        self.mean = [0.5, 0.5, 0.5]
+        self.std = [0.5, 0.5, 0.5]
+        self.to_tensor = ToTensor()
+        self.transforms = torch.jit.script(
+            nn.Sequential(
+                Resize((self.resolution, self.resolution)),
+                Normalize(self.mean, self.std),
+            )
+        )
+
+    def __call__(self, x):
+        x = self.to_tensor(x)
+        return self.transforms(x)
+
+    def forward_batch(self, img_list):
+        img_batch = [self.transforms(self.to_tensor(img)) for img in img_list]
+        img_batch = torch.stack(img_batch, dim=0)
+        return img_batch
+
+    def transform_coords(
+        self, coords: torch.Tensor, normalize=False, orig_hw=None
+    ) -> torch.Tensor:
+        """
+        Expects a torch tensor with length 2 in the last dimension. The coordinates can be in absolute image or normalized coordinates,
+        If the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
+
+        Returns
+            Un-normalized coordinates in the range of [0, 1] which is expected by the SAM2 model.
+        """
+        if normalize:
+            assert orig_hw is not None
+            h, w = orig_hw
+            coords = coords.clone()
+            coords[..., 0] = coords[..., 0] / w
+            coords[..., 1] = coords[..., 1] / h
+
+        coords = coords * self.resolution  # unnormalize coords
+        return coords
+
+    def transform_boxes(
+        self, boxes: torch.Tensor, normalize=False, orig_hw=None
+    ) -> torch.Tensor:
+        """
+        Expects a tensor of shape Bx4. The coordinates can be in absolute image or normalized coordinates,
+        if the coords are in absolute image coordinates, normalize should be set to True and original image size is required.
+        """
+        boxes = self.transform_coords(boxes.reshape(-1, 2, 2), normalize, orig_hw)
+        return boxes
+
+    def postprocess_masks(self, masks: torch.Tensor, orig_hw) -> torch.Tensor:
+        """
+        Perform PostProcessing on output masks.
+        """
+        masks = masks.float()
+        input_masks = masks
+        mask_flat = masks.flatten(0, 1).unsqueeze(1)  # flatten as 1-channel image
+        try:
+            from sam3.perflib.connected_components import connected_components
+
+            if self.max_hole_area > 0:
+                # Holes are those connected components in background with area <= self.fill_hole_area
+                # (background regions are those with mask scores <= self.mask_threshold)
+                labels, areas = connected_components(
+                    (mask_flat <= self.mask_threshold).to(torch.uint8)
+                )
+                is_hole = (labels > 0) & (areas <= self.max_hole_area)
+                is_hole = is_hole.reshape_as(masks)
+                # We fill holes with a small positive mask score (10.0) to change them to foreground.
+                masks = torch.where(is_hole, self.mask_threshold + 10.0, masks)
+
+            if self.max_sprinkle_area > 0:
+                labels, areas = connected_components(
+                    (mask_flat > self.mask_threshold).to(torch.uint8)
+                )
+                is_hole = (labels > 0) & (areas <= self.max_sprinkle_area)
+                is_hole = is_hole.reshape_as(masks)
+                # We fill holes with negative mask score (-10.0) to change them to background.
+                masks = torch.where(is_hole, self.mask_threshold - 10.0, masks)
+        except Exception as e:
+            # Skip the post-processing step if the CUDA kernel fails
+            warnings.warn(
+                f"{e}\n\nSkipping the post-processing step due to the error above. You can "
+                "still use SAM 3 and it's OK to ignore the error above, although some post-processing "
+                "functionality may be limited (which doesn't affect the results in most cases; see "
+                "https://github.com/facebookresearch/sam3/blob/main/INSTALL.md).",
+                category=UserWarning,
+                stacklevel=2,
+            )
+            masks = input_masks
+
+        masks = F.interpolate(masks, orig_hw, mode="bilinear", align_corners=False)
+        return masks
diff --git a/sam3/model/utils/sam2_utils.py b/sam3/model/utils/sam2_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..d91ba0f1e8d56e32b9e48bf610d8f35b3eaa78a5
--- /dev/null
+++ b/sam3/model/utils/sam2_utils.py
@@ -0,0 +1,233 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import os
+from threading import Thread
+
+import numpy as np
+import torch
+from PIL import Image
+from tqdm import tqdm
+
+
+def _load_img_as_tensor(img_path, image_size):
+    img_pil = Image.open(img_path)
+    img_np = np.array(img_pil.convert("RGB").resize((image_size, image_size)))
+    if img_np.dtype == np.uint8:  # np.uint8 is expected for JPEG images
+        img_np = img_np / 255.0
+    else:
+        raise RuntimeError(f"Unknown image dtype: {img_np.dtype} on {img_path}")
+    img = torch.from_numpy(img_np).permute(2, 0, 1)
+    video_width, video_height = img_pil.size  # the original video size
+    return img, video_height, video_width
+
+
+class AsyncVideoFrameLoader:
+    """
+    A list of video frames to be load asynchronously without blocking session start.
+    """
+
+    def __init__(
+        self,
+        img_paths,
+        image_size,
+        offload_video_to_cpu,
+        img_mean,
+        img_std,
+        compute_device,
+    ):
+        self.img_paths = img_paths
+        self.image_size = image_size
+        self.offload_video_to_cpu = offload_video_to_cpu
+        self.img_mean = img_mean
+        self.img_std = img_std
+        # items in `self.images` will be loaded asynchronously
+        self.images = [None] * len(img_paths)
+        # catch and raise any exceptions in the async loading thread
+        self.exception = None
+        # video_height and video_width be filled when loading the first image
+        self.video_height = None
+        self.video_width = None
+        self.compute_device = compute_device
+
+        # load the first frame to fill video_height and video_width and also
+        # to cache it (since it's most likely where the user will click)
+        self.__getitem__(0)
+
+        # load the rest of frames asynchronously without blocking the session start
+        def _load_frames():
+            try:
+                for n in tqdm(range(len(self.images)), desc="frame loading (JPEG)"):
+                    self.__getitem__(n)
+            except Exception as e:
+                self.exception = e
+
+        self.thread = Thread(target=_load_frames, daemon=True)
+        self.thread.start()
+
+    def __getitem__(self, index):
+        if self.exception is not None:
+            raise RuntimeError("Failure in frame loading thread") from self.exception
+
+        img = self.images[index]
+        if img is not None:
+            return img
+
+        img, video_height, video_width = _load_img_as_tensor(
+            self.img_paths[index], self.image_size
+        )
+        self.video_height = video_height
+        self.video_width = video_width
+        # normalize by mean and std
+        img -= self.img_mean
+        img /= self.img_std
+        if not self.offload_video_to_cpu:
+            img = img.to(self.compute_device, non_blocking=True)
+        self.images[index] = img
+        return img
+
+    def __len__(self):
+        return len(self.images)
+
+
+def load_video_frames(
+    video_path,
+    image_size,
+    offload_video_to_cpu,
+    img_mean=(0.5, 0.5, 0.5),
+    img_std=(0.5, 0.5, 0.5),
+    async_loading_frames=False,
+    compute_device=torch.device("cuda"),
+):
+    """
+    Load the video frames from video_path. The frames are resized to image_size as in
+    the model and are loaded to GPU if offload_video_to_cpu=False. This is used by the demo.
+    """
+    is_bytes = isinstance(video_path, bytes)
+    is_str = isinstance(video_path, str)
+    is_mp4_path = is_str and os.path.splitext(video_path)[-1] in [".mp4", ".MP4"]
+    if is_bytes or is_mp4_path:
+        return load_video_frames_from_video_file(
+            video_path=video_path,
+            image_size=image_size,
+            offload_video_to_cpu=offload_video_to_cpu,
+            img_mean=img_mean,
+            img_std=img_std,
+            compute_device=compute_device,
+        )
+    elif is_str and os.path.isdir(video_path):
+        return load_video_frames_from_jpg_images(
+            video_path=video_path,
+            image_size=image_size,
+            offload_video_to_cpu=offload_video_to_cpu,
+            img_mean=img_mean,
+            img_std=img_std,
+            async_loading_frames=async_loading_frames,
+            compute_device=compute_device,
+        )
+    else:
+        raise NotImplementedError(
+            "Only MP4 video and JPEG folder are supported at this moment"
+        )
+
+
+def load_video_frames_from_jpg_images(
+    video_path,
+    image_size,
+    offload_video_to_cpu,
+    img_mean=(0.5, 0.5, 0.5),
+    img_std=(0.5, 0.5, 0.5),
+    async_loading_frames=False,
+    compute_device=torch.device("cuda"),
+):
+    """
+    Load the video frames from a directory of JPEG files ("<frame_index>.jpg" format).
+
+    The frames are resized to image_size x image_size and are loaded to GPU if
+    `offload_video_to_cpu` is `False` and to CPU if `offload_video_to_cpu` is `True`.
+
+    You can load a frame asynchronously by setting `async_loading_frames` to `True`.
+    """
+    if isinstance(video_path, str) and os.path.isdir(video_path):
+        jpg_folder = video_path
+    else:
+        raise NotImplementedError(
+            "Only JPEG frames are supported at this moment. For video files, you may use "
+            "ffmpeg (https://ffmpeg.org/) to extract frames into a folder of JPEG files, such as \n"
+            "```\n"
+            "ffmpeg -i <your_video>.mp4 -q:v 2 -start_number 0 <output_dir>/'%05d.jpg'\n"
+            "```\n"
+            "where `-q:v` generates high-quality JPEG frames and `-start_number 0` asks "
+            "ffmpeg to start the JPEG file from 00000.jpg."
+        )
+
+    frame_names = [
+        p
+        for p in os.listdir(jpg_folder)
+        if os.path.splitext(p)[-1] in [".jpg", ".jpeg", ".JPG", ".JPEG"]
+    ]
+    frame_names.sort(key=lambda p: int(os.path.splitext(p)[0]))
+    num_frames = len(frame_names)
+    if num_frames == 0:
+        raise RuntimeError(f"no images found in {jpg_folder}")
+    img_paths = [os.path.join(jpg_folder, frame_name) for frame_name in frame_names]
+    img_mean = torch.tensor(img_mean, dtype=torch.float32)[:, None, None]
+    img_std = torch.tensor(img_std, dtype=torch.float32)[:, None, None]
+
+    if async_loading_frames:
+        lazy_images = AsyncVideoFrameLoader(
+            img_paths,
+            image_size,
+            offload_video_to_cpu,
+            img_mean,
+            img_std,
+            compute_device,
+        )
+        return lazy_images, lazy_images.video_height, lazy_images.video_width
+
+    images = torch.zeros(num_frames, 3, image_size, image_size, dtype=torch.float32)
+    for n, img_path in enumerate(tqdm(img_paths, desc="frame loading (JPEG)")):
+        images[n], video_height, video_width = _load_img_as_tensor(img_path, image_size)
+    if not offload_video_to_cpu:
+        images = images.to(compute_device)
+        img_mean = img_mean.to(compute_device)
+        img_std = img_std.to(compute_device)
+    # normalize by mean and std
+    images -= img_mean
+    images /= img_std
+    return images, video_height, video_width
+
+
+def load_video_frames_from_video_file(
+    video_path,
+    image_size,
+    offload_video_to_cpu,
+    img_mean=(0.5, 0.5, 0.5),
+    img_std=(0.5, 0.5, 0.5),
+    compute_device=torch.device("cuda"),
+):
+    """Load the video frames from a video file."""
+    import decord
+
+    img_mean = torch.tensor(img_mean, dtype=torch.float32)[:, None, None]
+    img_std = torch.tensor(img_std, dtype=torch.float32)[:, None, None]
+    # Get the original video height and width
+    decord.bridge.set_bridge("torch")
+    video_height, video_width, _ = decord.VideoReader(video_path).next().shape
+    # Iterate over all frames in the video
+    images = []
+    for frame in decord.VideoReader(video_path, width=image_size, height=image_size):
+        images.append(frame.permute(2, 0, 1))
+
+    images = torch.stack(images, dim=0).float() / 255.0
+    if not offload_video_to_cpu:
+        images = images.to(compute_device)
+        img_mean = img_mean.to(compute_device)
+        img_std = img_std.to(compute_device)
+    # normalize by mean and std
+    images -= img_mean
+    images /= img_std
+    return images, video_height, video_width
diff --git a/sam3/model/vitdet.py b/sam3/model/vitdet.py
new file mode 100644
index 0000000000000000000000000000000000000000..aa56664d8f011e2d9bc9f3004e1240dda835c2ac
--- /dev/null
+++ b/sam3/model/vitdet.py
@@ -0,0 +1,879 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+ViTDet backbone adapted from Detectron2.
+This module implements Vision Transformer (ViT) backbone for object detection.
+
+Rope embedding code adopted from:
+1. https://github.com/meta-llama/codellama/blob/main/llama/model.py
+2. https://github.com/naver-ai/rope-vit
+3. https://github.com/lucidrains/rotary-embedding-torch
+"""
+
+import math
+from functools import partial
+from typing import Callable, List, Optional, Tuple, Union
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.utils.checkpoint as checkpoint
+
+try:
+    from timm.layers import DropPath, Mlp, trunc_normal_
+except ModuleNotFoundError:
+    # compatibility for older timm versions
+    from timm.models.layers import DropPath, Mlp, trunc_normal_
+from torch import Tensor
+
+from .model_misc import LayerScale
+
+
+def init_t_xy(
+    end_x: int, end_y: int, scale: float = 1.0, offset: int = 0
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    t = torch.arange(end_x * end_y, dtype=torch.float32)
+    t_x = (t % end_x).float()
+    t_y = torch.div(t, end_x, rounding_mode="floor").float()
+    return t_x * scale + offset, t_y * scale + offset
+
+
+def compute_axial_cis(
+    dim: int,
+    end_x: int,
+    end_y: int,
+    theta: float = 10000.0,
+    scale_pos: float = 1.0,
+    offset: int = 0,
+) -> torch.Tensor:
+    freqs_x = 1.0 / (theta ** (torch.arange(0, dim, 4)[: (dim // 4)].float() / dim))
+    freqs_y = 1.0 / (theta ** (torch.arange(0, dim, 4)[: (dim // 4)].float() / dim))
+
+    t_x, t_y = init_t_xy(end_x, end_y, scale_pos, offset)
+    freqs_x = torch.outer(t_x, freqs_x)
+    freqs_y = torch.outer(t_y, freqs_y)
+    freqs_cis_x = torch.polar(torch.ones_like(freqs_x), freqs_x)
+    freqs_cis_y = torch.polar(torch.ones_like(freqs_y), freqs_y)
+    return torch.cat([freqs_cis_x, freqs_cis_y], dim=-1)
+
+
+def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor) -> torch.Tensor:
+    ndim = x.ndim
+    assert 0 <= 1 < ndim
+    assert freqs_cis.shape == (x.shape[-2], x.shape[-1])
+    shape = [d if i >= ndim - 2 else 1 for i, d in enumerate(x.shape)]
+    return freqs_cis.view(*shape)
+
+
+def apply_rotary_enc(
+    xq: torch.Tensor,
+    xk: torch.Tensor,
+    freqs_cis: torch.Tensor,
+    repeat_freqs_k: bool = False,
+) -> Tuple[torch.Tensor, torch.Tensor]:
+    xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
+    xk_ = (
+        torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
+        if xk.shape[-2] != 0
+        else None
+    )
+    freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
+    xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
+    if xk_ is None:
+        # no keys to rotate, due to dropout
+        return xq_out.type_as(xq).to(xq.device), xk
+    # repeat freqs along seq_len dim to match k seq_len
+    if repeat_freqs_k:
+        r = xk_.shape[-2] // xq_.shape[-2]
+        freqs_cis = freqs_cis.repeat(*([1] * (freqs_cis.ndim - 2)), r, 1)
+    xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
+    return xq_out.type_as(xq).to(xq.device), xk_out.type_as(xk).to(xk.device)
+
+
+def window_partition(x: Tensor, window_size: int) -> Tuple[Tensor, Tuple[int, int]]:
+    """
+    Partition into non-overlapping windows with padding if needed.
+    Args:
+        x (tensor): input tokens with [B, H, W, C].
+        window_size (int): window size.
+    Returns:
+        windows: windows after partition with [B * num_windows, window_size, window_size, C].
+        (Hp, Wp): padded height and width before partition
+    """
+    B, H, W, C = x.shape
+
+    pad_h = (window_size - H % window_size) % window_size
+    pad_w = (window_size - W % window_size) % window_size
+    if pad_h > 0 or pad_w > 0:
+        x = F.pad(x, (0, 0, 0, pad_w, 0, pad_h))
+    Hp, Wp = H + pad_h, W + pad_w
+
+    x = x.view(B, Hp // window_size, window_size, Wp // window_size, window_size, C)
+    windows = x.permute(0, 1, 3, 2, 4, 5).reshape(-1, window_size, window_size, C)
+    return windows, (Hp, Wp)
+
+
+def window_unpartition(
+    windows: Tensor, window_size: int, pad_hw: Tuple[int, int], hw: Tuple[int, int]
+) -> Tensor:
+    """
+    Window unpartition into original sequences and removing padding.
+    Args:
+        x (tensor): input tokens with [B * num_windows, window_size, window_size, C].
+        window_size (int): window size.
+        pad_hw (Tuple): padded height and width (Hp, Wp).
+        hw (Tuple): original height and width (H, W) before padding.
+    Returns:
+        x: unpartitioned sequences with [B, H, W, C].
+    """
+    Hp, Wp = pad_hw
+    H, W = hw
+    B = windows.shape[0] // (Hp * Wp // window_size // window_size)
+    x = windows.reshape(
+        B, Hp // window_size, Wp // window_size, window_size, window_size, -1
+    )
+    x = x.permute(0, 1, 3, 2, 4, 5).reshape(B, Hp, Wp, -1)
+
+    if Hp > H or Wp > W:
+        x = x[:, :H, :W, :]
+    return x
+
+
+def get_rel_pos(q_size: int, k_size: int, rel_pos: Tensor) -> Tensor:
+    """
+    Get relative positional embeddings according to the relative positions of
+        query and key sizes.
+    Args:
+        q_size (int): size of query q.
+        k_size (int): size of key k.
+        rel_pos (Tensor): relative position embeddings (L, C).
+    Returns:
+        Extracted positional embeddings according to relative positions.
+    """
+    max_rel_dist = int(2 * max(q_size, k_size) - 1)
+    # Interpolate rel pos if needed.
+    if rel_pos.shape[0] != max_rel_dist:
+        # Interpolate rel pos.
+        rel_pos_resized = F.interpolate(
+            rel_pos.reshape(1, rel_pos.shape[0], -1).permute(0, 2, 1),
+            size=max_rel_dist,
+            mode="linear",
+            align_corners=False,
+        )
+        rel_pos_resized = rel_pos_resized.reshape(-1, max_rel_dist).permute(1, 0)
+    else:
+        rel_pos_resized = rel_pos
+
+    # Scale the coords with short length if shapes for q and k are different.
+    q_coords = torch.arange(q_size)[:, None] * max(k_size / q_size, 1.0)
+    k_coords = torch.arange(k_size)[None, :] * max(q_size / k_size, 1.0)
+    relative_coords = (q_coords - k_coords) + (k_size - 1) * max(q_size / k_size, 1.0)
+
+    return rel_pos_resized[relative_coords.long()]
+
+
+def get_abs_pos(
+    abs_pos: Tensor,
+    has_cls_token: bool,
+    hw: Tuple[int, int],
+    retain_cls_token: bool = False,
+    tiling: bool = False,
+) -> Tensor:
+    """
+    Calculate absolute positional embeddings. If needed, resize embeddings and remove cls_token
+        dimension for the original embeddings.
+    Args:
+        abs_pos (Tensor): absolute positional embeddings with (1, num_position, C).
+        has_cls_token (bool): If true, has 1 embedding in abs_pos for cls token.
+        hw (Tuple): size of input image tokens.
+        retain_cls_token: whether to retain the cls_token
+        tiling: whether to tile the embeddings, *instead* of interpolation (a la abs_win)
+    Returns:
+        Absolute positional embeddings after processing with shape (1, H, W, C),
+        if retain_cls_token is False, otherwise (1, 1+H*W, C)
+    """
+    if retain_cls_token:
+        assert has_cls_token
+
+    h, w = hw
+    if has_cls_token:
+        cls_pos = abs_pos[:, :1]
+        abs_pos = abs_pos[:, 1:]
+
+    xy_num = abs_pos.shape[1]
+    size = int(math.sqrt(xy_num))
+    assert size * size == xy_num
+
+    if size != h or size != w:
+        new_abs_pos = abs_pos.reshape(1, size, size, -1).permute(0, 3, 1, 2)
+        if tiling:
+            new_abs_pos = new_abs_pos.tile(
+                [1, 1] + [x // y + 1 for x, y in zip((h, w), new_abs_pos.shape[2:])]
+            )[:, :, :h, :w]
+        else:
+            new_abs_pos = F.interpolate(
+                new_abs_pos,
+                size=(h, w),
+                mode="bicubic",
+                align_corners=False,
+            )
+
+        if not retain_cls_token:
+            return new_abs_pos.permute(0, 2, 3, 1)
+        else:
+            # add cls_token back, flatten spatial dims
+            assert has_cls_token
+            return torch.cat(
+                [cls_pos, new_abs_pos.permute(0, 2, 3, 1).reshape(1, h * w, -1)],
+                dim=1,
+            )
+
+    else:
+        if not retain_cls_token:
+            return abs_pos.reshape(1, h, w, -1)
+        else:
+            assert has_cls_token
+            return torch.cat([cls_pos, abs_pos], dim=1)
+
+
+def concat_rel_pos(
+    q: Tensor,
+    k: Tensor,
+    q_hw: Tuple[int, int],
+    k_hw: Tuple[int, int],
+    rel_pos_h: Tensor,
+    rel_pos_w: Tensor,
+    rescale: bool = False,
+    relative_coords: Optional[Tensor] = None,
+) -> Tuple[Tensor, Tensor]:
+    """
+    Concatenate rel pos coeffs to the q & k tensors, so that qk^T is now
+    effectively including rel pos biases.
+    Args:
+        q (Tensor): q tensor with shape (B, L_q, C).
+        k (Tensor): k tensor with shape (B, L_k, C).
+        q_hw, k_hw: These are spatial size of q & k tensors.
+        rel_pos_h, rel_pos_w: These are relative pos embeddings/params of height, width.
+        rescale (bool): whether to rescale. e.g. for use when using sdpa, pytorch will
+            scale by the wrong factor due to the concat.
+    Returns:
+        q, k: But, padded so that qk^T accounts for rel pos biases
+    """
+    q_h, q_w = q_hw
+    k_h, k_w = k_hw
+
+    assert (q_h == q_w) and (k_h == k_w), "only square inputs supported"
+
+    if relative_coords is not None:
+        Rh = rel_pos_h[relative_coords]
+        Rw = rel_pos_w[relative_coords]
+    else:
+        Rh = get_rel_pos(q_h, k_h, rel_pos_h)
+        Rw = get_rel_pos(q_w, k_w, rel_pos_w)
+
+    B, _, dim = q.shape
+    r_q = q.reshape(B, q_h, q_w, dim)
+
+    old_scale = dim**0.5
+    new_scale = (dim + k_h + k_w) ** 0.5 if rescale else old_scale  # for sdpa
+    # attn will be divided by new_scale, but we want to divide q by old_scale
+    scale_ratio = new_scale / old_scale
+
+    rel_h = torch.einsum("bhwc,hkc->bhwk", r_q, Rh) * new_scale  # (B, q_h, q_w, k_h)
+    rel_w = torch.einsum("bhwc,wkc->bhwk", r_q, Rw) * new_scale  # (B, q_h, q_w, k_w)
+
+    eye_h = torch.eye(k_h, dtype=q.dtype, device=q.device)
+    eye_w = torch.eye(k_w, dtype=q.dtype, device=q.device)
+
+    eye_h = eye_h.view(1, k_h, 1, k_h).expand([B, k_h, k_w, k_h])
+    eye_w = eye_w.view(1, 1, k_w, k_w).expand([B, k_h, k_w, k_w])
+
+    q = torch.cat([r_q * scale_ratio, rel_h, rel_w], dim=-1).view(B, q_h * q_w, -1)
+    k = torch.cat([k.view(B, k_h, k_w, -1), eye_h, eye_w], dim=-1).view(
+        B, k_h * k_w, -1
+    )
+
+    return q, k
+
+
+class PatchEmbed(nn.Module):
+    """
+    Image to Patch Embedding.
+    """
+
+    def __init__(
+        self,
+        kernel_size: Tuple[int, int] = (16, 16),
+        stride: Tuple[int, int] = (16, 16),
+        padding: Tuple[int, int] = (0, 0),
+        in_chans: int = 3,
+        embed_dim: int = 768,
+        bias: bool = True,
+    ):
+        """
+        Args:
+            kernel_size (Tuple): kernel size of the projection layer.
+            stride (Tuple): stride of the projection layer.
+            padding (Tuple): padding size of the projection layer.
+            in_chans (int): Number of input image channels.
+            embed_dim (int):  embed_dim (int): Patch embedding dimension.
+        """
+        super().__init__()
+
+        self.proj = nn.Conv2d(
+            in_chans,
+            embed_dim,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=bias,
+        )
+
+    def forward(self, x: Tensor) -> Tensor:
+        x = self.proj(x)
+        # B C H W -> B H W C
+        x = x.permute(0, 2, 3, 1)
+        return x
+
+
+class Attention(nn.Module):
+    """Multi-head Attention block with relative position embeddings and 2d-rope."""
+
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int = 8,
+        qkv_bias: bool = True,
+        use_rel_pos: bool = False,
+        rel_pos_zero_init: bool = True,
+        input_size: Optional[Tuple[int, int]] = None,
+        cls_token: bool = False,
+        use_rope: bool = False,
+        rope_theta: float = 10000.0,
+        rope_pt_size: Optional[Tuple[int, int]] = None,
+        rope_interp: bool = False,
+    ):
+        """
+        Args:
+            dim (int): Number of input channels.
+            num_heads (int): Number of attention heads.
+            qkv_bias (bool:  If True, add a learnable bias to query, key, value.
+            rel_pos (bool): If True, add relative positional embeddings to the attention map.
+            rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
+            input_size (int or None): Input resolution for calculating the relative positional
+                parameter size or rope size.
+            attn_type: Type of attention operation, e.g. "vanilla", "vanilla-xformer".
+            cls_token: whether a cls_token is present.
+            use_rope: whether to use rope 2d (indep of use_rel_pos, as it can be used together)
+            rope_theta: control frequencies of rope
+            rope_pt_size: size of rope in previous stage of training, needed for interpolation or tiling
+            rope_interp: whether to interpolate (or extrapolate) rope to match input size
+        """
+        super().__init__()
+        self.num_heads = num_heads
+        self.head_dim = dim // num_heads
+        self.scale = self.head_dim**-0.5
+        self.cls_token = cls_token
+
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.proj = nn.Linear(dim, dim)
+
+        # rel_pos embeddings and rope
+        self.use_rel_pos = use_rel_pos
+        self.input_size = input_size
+
+        self.use_rope = use_rope
+        self.rope_theta = rope_theta
+        self.rope_pt_size = rope_pt_size
+        self.rope_interp = rope_interp
+
+        # init rel_pos embeddings and rope
+        self._setup_rel_pos(rel_pos_zero_init)
+        self._setup_rope_freqs()
+
+    def _setup_rel_pos(self, rel_pos_zero_init: bool = True) -> None:
+        if not self.use_rel_pos:
+            self.rel_pos_h = None
+            self.rel_pos_w = None
+            return
+
+        assert self.input_size is not None
+        assert self.cls_token is False, "not supported"
+        # initialize relative positional embeddings
+        self.rel_pos_h = nn.Parameter(
+            torch.zeros(2 * self.input_size[0] - 1, self.head_dim)
+        )
+        self.rel_pos_w = nn.Parameter(
+            torch.zeros(2 * self.input_size[1] - 1, self.head_dim)
+        )
+
+        if not rel_pos_zero_init:
+            trunc_normal_(self.rel_pos_h, std=0.02)
+            trunc_normal_(self.rel_pos_w, std=0.02)
+
+        # Precompute the relative coords
+        H, W = self.input_size
+        q_coords = torch.arange(H)[:, None]
+        k_coords = torch.arange(W)[None, :]
+        relative_coords = (q_coords - k_coords) + (H - 1)
+        self.register_buffer("relative_coords", relative_coords.long())
+
+    def _setup_rope_freqs(self) -> None:
+        if not self.use_rope:
+            self.freqs_cis = None
+            return
+
+        assert self.input_size is not None
+        # determine rope input size
+        if self.rope_pt_size is None:
+            self.rope_pt_size = self.input_size
+
+        # initialize 2d rope freqs
+        self.compute_cis = partial(
+            compute_axial_cis,
+            dim=self.head_dim,
+            theta=self.rope_theta,
+        )
+
+        # interpolate rope
+        scale_pos = 1.0
+        if self.rope_interp:
+            scale_pos = self.rope_pt_size[0] / self.input_size[0]
+        # get scaled freqs_cis
+        freqs_cis = self.compute_cis(
+            end_x=self.input_size[0],
+            end_y=self.input_size[1],
+            scale_pos=scale_pos,
+        )
+        if self.cls_token:
+            t = torch.zeros(
+                self.head_dim // 2,
+                dtype=torch.float32,
+                device=freqs_cis.device,
+            )
+            cls_freqs_cis = torch.polar(torch.ones_like(t), t)[None, :]
+            freqs_cis = torch.cat([cls_freqs_cis, freqs_cis], dim=0)
+
+        self.register_buffer("freqs_cis", freqs_cis)
+
+    def _apply_rope(self, q, k) -> Tuple[Tensor, Tensor]:
+        if not self.use_rope:
+            return q, k
+
+        assert self.freqs_cis is not None
+        return apply_rotary_enc(q, k, freqs_cis=self.freqs_cis)
+
+    def forward(self, x: Tensor) -> Tensor:
+        s = 1 if self.cls_token else 0  # used to exclude cls_token
+        if x.ndim == 4:
+            B, H, W, _ = x.shape
+            assert s == 0  # no cls_token
+            L = H * W
+            ndim = 4
+        else:
+            assert x.ndim == 3
+            B, L, _ = x.shape
+            ndim = 3
+            H = W = math.sqrt(L - s)
+
+        # qkv with shape (3, B, nHead, L, C)
+        qkv = self.qkv(x).reshape(B, L, 3, self.num_heads, -1)
+        # q, k, v with shape (B, nHead, L, C)
+        q, k, v = qkv.permute(2, 0, 3, 1, 4).unbind(0)
+
+        # handle rope and rel pos embeddings
+        q, k = self._apply_rope(q, k)
+        if self.use_rel_pos:
+            q, k = concat_rel_pos(
+                q.flatten(0, 1),
+                k.flatten(0, 1),
+                (H, W),
+                x.shape[1:3],
+                self.rel_pos_h,
+                self.rel_pos_w,
+                rescale=True,
+                relative_coords=self.relative_coords,
+            )
+
+            # sdpa expects [B, nheads, H*W, C] so we transpose back
+            q = q.reshape(B, self.num_heads, H * W, -1)
+            k = k.reshape(B, self.num_heads, H * W, -1)
+
+        x = F.scaled_dot_product_attention(q, k, v)
+
+        if ndim == 4:
+            x = (
+                x.view(B, self.num_heads, H, W, -1)
+                .permute(0, 2, 3, 1, 4)
+                .reshape(B, H, W, -1)
+            )
+        else:
+            x = x.view(B, self.num_heads, L, -1).permute(0, 2, 1, 3).reshape(B, L, -1)
+
+        x = self.proj(x)
+
+        return x
+
+
+class Block(nn.Module):
+    """Transformer blocks with support of window attention"""
+
+    def __init__(
+        self,
+        dim: int,
+        num_heads: int,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = True,
+        drop_path: float = 0.0,
+        norm_layer: Callable[..., nn.Module] = nn.LayerNorm,
+        act_layer: Callable[..., nn.Module] = nn.GELU,
+        use_rel_pos: bool = False,
+        rel_pos_zero_init: bool = True,
+        window_size: int = 0,
+        input_size: Optional[Tuple[int, int]] = None,
+        use_rope: bool = False,
+        rope_pt_size: Optional[Tuple[int, int]] = None,
+        rope_tiled: bool = False,
+        rope_interp: bool = False,
+        use_ve_rope: bool = False,
+        cls_token: bool = False,
+        dropout: float = 0.0,
+        init_values: Optional[float] = None,
+    ):
+        """
+        Args:
+            dim (int): Number of input channels.
+            num_heads (int): Number of attention heads in each ViT block.
+            mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+            qkv_bias (bool): If True, add a learnable bias to query, key, value.
+            drop_path (float): Stochastic depth rate.
+            norm_layer (nn.Module): Normalization layer.
+            act_layer (nn.Module): Activation layer.
+            use_rel_pos (bool): If True, add relative positional embeddings to the attention map.
+            rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
+            window_size (int): Window size for window attention blocks. If it equals 0, then not
+                use window attention.
+            input_size (int or None): Input resolution for calculating the relative positional
+                parameter size.
+            dropout (float): Dropout rate.
+            cls_token: whether a cls_token is present.
+            use_rope: whether to use rope 2d (indep of use_rel_pos, as it can be used together)
+            rope_pt_size: size of rope in previous stage of training, needed for interpolation or tiling
+            rope_interp: whether to interpolate (or extrapolate) rope to match target input size,
+                expected to specify source size as rope_pt_size.
+        """
+        super().__init__()
+        self.norm1 = norm_layer(dim)
+        self.attn = Attention(
+            dim,
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            use_rel_pos=use_rel_pos,
+            rel_pos_zero_init=rel_pos_zero_init,
+            input_size=input_size if window_size == 0 else (window_size, window_size),
+            use_rope=use_rope,
+            rope_pt_size=rope_pt_size,
+            rope_interp=rope_interp,
+            cls_token=cls_token,
+        )
+        self.ls1 = (
+            LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
+        )
+        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else nn.Identity()
+
+        self.norm2 = norm_layer(dim)
+        self.mlp = Mlp(
+            in_features=dim,
+            hidden_features=int(dim * mlp_ratio),
+            act_layer=act_layer,
+            drop=(dropout, 0.0),
+        )
+        self.ls2 = (
+            LayerScale(dim, init_values=init_values) if init_values else nn.Identity()
+        )
+        self.dropout = nn.Dropout(dropout)
+        self.window_size = window_size
+
+    def forward(self, x: Tensor) -> Tensor:
+        shortcut = x
+        x = self.norm1(x)
+        # Window partition
+        if self.window_size > 0:
+            H, W = x.shape[1], x.shape[2]
+            x, pad_hw = window_partition(x, self.window_size)
+
+        x = self.ls1(self.attn(x))
+        # Reverse window partition
+        if self.window_size > 0:
+            x = window_unpartition(x, self.window_size, pad_hw, (H, W))
+
+        x = shortcut + self.dropout(self.drop_path(x))
+        x = x + self.dropout(self.drop_path(self.ls2(self.mlp(self.norm2(x)))))
+
+        return x
+
+
+class ViT(nn.Module):
+    """
+    This module implements Vision Transformer (ViT) backbone in :paper:`vitdet`.
+    "Exploring Plain Vision Transformer Backbones for Object Detection",
+    https://arxiv.org/abs/2203.16527
+    """
+
+    def __init__(
+        self,
+        img_size: int = 1024,
+        patch_size: int = 16,
+        in_chans: int = 3,
+        embed_dim: int = 768,
+        depth: int = 12,
+        num_heads: int = 12,
+        mlp_ratio: float = 4.0,
+        qkv_bias: bool = True,
+        drop_path_rate: float = 0.0,
+        norm_layer: Union[Callable[..., nn.Module], str] = "LayerNorm",
+        act_layer: Callable[..., nn.Module] = nn.GELU,
+        use_abs_pos: bool = True,
+        tile_abs_pos: bool = True,
+        rel_pos_blocks: Union[Tuple[int, ...], bool] = (2, 5, 8, 11),
+        rel_pos_zero_init: bool = True,
+        window_size: int = 14,
+        global_att_blocks: Tuple[int, ...] = (2, 5, 8, 11),
+        use_rope: bool = False,
+        rope_pt_size: Optional[int] = None,
+        use_interp_rope: bool = False,
+        pretrain_img_size: int = 224,
+        pretrain_use_cls_token: bool = True,
+        retain_cls_token: bool = True,
+        dropout: float = 0.0,
+        return_interm_layers: bool = False,
+        init_values: Optional[float] = None,  # for layerscale
+        ln_pre: bool = False,
+        ln_post: bool = False,
+        bias_patch_embed: bool = True,
+        compile_mode: Optional[str] = None,
+        use_act_checkpoint: bool = True,
+    ):
+        """
+        Args:
+            img_size (int): Input image size. Only relevant for rel pos or rope.
+            patch_size (int): Patch size.
+            in_chans (int): Number of input image channels.
+            embed_dim (int): Patch embedding dimension.
+            depth (int): Depth of ViT.
+            num_heads (int): Number of attention heads in each ViT block.
+            mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+            qkv_bias (bool): If True, add a learnable bias to query, key, value.
+            drop_path_rate (float): Stochastic depth rate.
+            norm_layer (nn.Module): Normalization layer.
+            act_layer (nn.Module): Activation layer.
+            use_abs_pos (bool): If True, use absolute positional embeddings.
+            tile_abs_pos (bool): If True, tile absolute positional embeddings instead of interpolation.
+            rel_pos_blocks (list): Blocks which have rel pos embeddings.
+            rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
+            window_size (int): Window size for window attention blocks.
+            global_att_blocks (list): Indexes for blocks using global attention (other blocks use window attention).
+            use_rope (bool): whether to use rope 2d (indep of rel_pos_blocks, as it can be used together).
+            rope_pt_size (int): size of rope in previous stage of training, needed for interpolation or tiling.
+            use_interp_rope: whether to interpolate (or extrapolate) rope to match target input size,
+                expected to specify source size as rope_pt_size.
+            use_act_checkpoint (bool): If True, use activation checkpointing.
+            pretrain_img_size (int): input image size for pretraining models.
+            pretrain_use_cls_token (bool): If True, pretraining models use class token.
+            retain_cls_token: whether cls_token should be retained.
+            dropout (float): Dropout rate. Applied in residual blocks of attn, mlp and inside the mlp.
+
+            return_interm_layers (bool): Whether to return intermediate layers (all global attention blocks).
+            init_values: layer scale init, None for no layer scale.
+
+            ln_pre (bool): If True, apply layer norm before transformer blocks.
+            ln_post (bool): If True, apply layer norm after transformer blocks.
+            bias_patch_embed (bool): bias in conv for patch embed?
+            compile_mode (str): mode to compile the forward
+        """
+        super().__init__()
+        self.pretrain_use_cls_token = pretrain_use_cls_token
+
+        window_block_indexes = [i for i in range(depth) if i not in global_att_blocks]
+        self.full_attn_ids = list(global_att_blocks)
+        self.rel_pos_blocks = [False] * depth
+        if isinstance(rel_pos_blocks, bool) and rel_pos_blocks:
+            self.rel_pos_blocks = [True] * depth
+        else:
+            for i in rel_pos_blocks:
+                self.rel_pos_blocks[i] = True
+
+        self.retain_cls_token = retain_cls_token
+        if self.retain_cls_token:
+            assert pretrain_use_cls_token
+            assert (
+                len(window_block_indexes) == 0
+            ), "windowing not supported with cls token"
+
+            assert sum(self.rel_pos_blocks) == 0, "rel pos not supported with cls token"
+
+            scale = embed_dim**-0.5
+            self.class_embedding = nn.Parameter(scale * torch.randn(1, 1, embed_dim))
+
+        if isinstance(norm_layer, str):
+            norm_layer = partial(getattr(nn, norm_layer), eps=1e-5)
+
+        self.patch_embed = PatchEmbed(
+            kernel_size=(patch_size, patch_size),
+            stride=(patch_size, patch_size),
+            in_chans=in_chans,
+            embed_dim=embed_dim,
+            bias=bias_patch_embed,
+        )
+
+        # Handle absolute positional embedding
+        self.tile_abs_pos = tile_abs_pos
+        self.use_abs_pos = use_abs_pos
+        if self.tile_abs_pos:
+            assert self.use_abs_pos
+
+        if self.use_abs_pos:
+            # Initialize absolute positional embedding with pretrain image size.
+            num_patches = (pretrain_img_size // patch_size) * (
+                pretrain_img_size // patch_size
+            )
+            num_positions = (num_patches + 1) if pretrain_use_cls_token else num_patches
+            self.pos_embed = nn.Parameter(torch.zeros(1, num_positions, embed_dim))
+        else:
+            self.pos_embed = None
+
+        # stochastic depth decay rule
+        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]
+
+        self.blocks = nn.ModuleList()
+        cur_stage = 1
+        for i in range(depth):
+            block = Block(
+                dim=embed_dim,
+                num_heads=num_heads,
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                drop_path=dpr[i],
+                norm_layer=norm_layer,
+                act_layer=act_layer,
+                use_rel_pos=self.rel_pos_blocks[i],
+                rel_pos_zero_init=rel_pos_zero_init,
+                window_size=window_size if i in window_block_indexes else 0,
+                input_size=(img_size // patch_size, img_size // patch_size),
+                use_rope=use_rope,
+                rope_pt_size=(
+                    (window_size, window_size)
+                    if rope_pt_size is None
+                    else (rope_pt_size, rope_pt_size)
+                ),
+                rope_interp=use_interp_rope,
+                cls_token=self.retain_cls_token,
+                dropout=dropout,
+                init_values=init_values,
+            )
+
+            if i not in window_block_indexes:
+                cur_stage += 1
+
+            self.use_act_checkpoint = use_act_checkpoint
+
+            self.blocks.append(block)
+
+        self.return_interm_layers = return_interm_layers
+        self.channel_list = (
+            [embed_dim] * len(self.full_attn_ids)
+            if return_interm_layers
+            else [embed_dim]
+        )
+
+        if self.pos_embed is not None:
+            trunc_normal_(self.pos_embed, std=0.02)
+
+        self.ln_pre = norm_layer(embed_dim) if ln_pre else nn.Identity()
+        self.ln_post = norm_layer(embed_dim) if ln_post else nn.Identity()
+
+        self.apply(self._init_weights)
+
+        if compile_mode is not None:
+            self.forward = torch.compile(
+                self.forward, mode=compile_mode, fullgraph=True
+            )
+            if self.use_act_checkpoint and self.training:
+                torch._dynamo.config.optimize_ddp = False
+
+    def _init_weights(self, m: nn.Module) -> None:
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight, std=0.02)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.LayerNorm):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+
+    def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
+        x = self.patch_embed(x)
+        h, w = x.shape[1], x.shape[2]
+
+        s = 0
+        if self.retain_cls_token:
+            # If cls_token is retained, we don't
+            # maintain spatial shape
+            x = torch.cat([self.class_embedding, x.flatten(1, 2)], dim=1)
+            s = 1
+
+        if self.pos_embed is not None:
+            x = x + get_abs_pos(
+                self.pos_embed,
+                self.pretrain_use_cls_token,
+                (h, w),
+                self.retain_cls_token,
+                tiling=self.tile_abs_pos,
+            )
+
+        x = self.ln_pre(x)
+
+        outputs = []
+        for i, blk in enumerate(self.blocks):
+            if self.use_act_checkpoint and self.training:
+                x = checkpoint.checkpoint(blk, x, use_reentrant=False)
+            else:
+                x = blk(x)
+            if (i == self.full_attn_ids[-1]) or (
+                self.return_interm_layers and i in self.full_attn_ids
+            ):
+                if i == self.full_attn_ids[-1]:
+                    x = self.ln_post(x)
+
+                feats = x[:, s:]
+                if feats.ndim == 4:
+                    feats = feats.permute(0, 3, 1, 2)
+                else:
+                    assert feats.ndim == 3
+                    h = w = math.sqrt(feats.shape[1])
+                    feats = feats.reshape(
+                        feats.shape[0], h, w, feats.shape[-1]
+                    ).permute(0, 3, 1, 2)
+
+                outputs.append(feats)
+
+        return outputs
+
+    def get_layer_id(self, layer_name: str) -> int:
+        # https://github.com/microsoft/unilm/blob/master/beit/optim_factory.py#L33
+        num_layers = self.get_num_layers()
+
+        if layer_name.find("rel_pos") != -1:
+            return num_layers + 1
+        elif layer_name.find("ln_pre") != -1:
+            return 0
+        elif layer_name.find("pos_embed") != -1 or layer_name.find("cls_token") != -1:
+            return 0
+        elif layer_name.find("patch_embed") != -1:
+            return 0
+        elif layer_name.find("blocks") != -1:
+            return int(layer_name.split("blocks")[1].split(".")[1]) + 1
+        else:
+            return num_layers + 1
+
+    def get_num_layers(self) -> int:
+        return len(self.blocks)
diff --git a/sam3/model/vl_combiner.py b/sam3/model/vl_combiner.py
new file mode 100644
index 0000000000000000000000000000000000000000..43bc7bd5e8fda4f4ed1e96cd7931b254f013616c
--- /dev/null
+++ b/sam3/model/vl_combiner.py
@@ -0,0 +1,176 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""Provides utility to combine a vision backbone with a language backbone."""
+
+from copy import copy
+from typing import List, Optional
+
+import torch
+import torch.nn as nn
+
+from torch.nn.attention import sdpa_kernel, SDPBackend
+
+from .act_ckpt_utils import activation_ckpt_wrapper
+from .necks import Sam3DualViTDetNeck
+
+
+class SAM3VLBackbone(nn.Module):
+    """This backbone combines a vision backbone and a language backbone without fusion.
+    As such it is more of a convenience wrapper to handle the two backbones together.
+
+    It adds support for activation checkpointing and compilation.
+    """
+
+    def __init__(
+        self,
+        visual: Sam3DualViTDetNeck,
+        text,
+        compile_visual: bool = False,
+        act_ckpt_whole_vision_backbone: bool = False,
+        act_ckpt_whole_language_backbone: bool = False,
+        scalp=0,
+    ):
+        """Initialize the backbone combiner.
+
+        :param visual: The vision backbone to use
+        :param text: The text encoder to use
+        """
+        super().__init__()
+        self.vision_backbone: Sam3DualViTDetNeck = (
+            torch.compile(visual) if compile_visual else visual
+        )
+        self.language_backbone = text
+        self.scalp = scalp
+        # allow running activation checkpointing on the entire vision and language backbones
+        self.act_ckpt_whole_vision_backbone = act_ckpt_whole_vision_backbone
+        self.act_ckpt_whole_language_backbone = act_ckpt_whole_language_backbone
+
+    def forward(
+        self,
+        samples: torch.Tensor,
+        captions: List[str],
+        input_boxes: Optional[torch.Tensor] = None,
+        additional_text: Optional[List[str]] = None,
+    ):
+        """Forward pass of the backbone combiner.
+
+        :param samples: The input images
+        :param captions: The input captions
+        :param input_boxes: If the text contains place-holders for boxes, this
+            parameter contains the tensor containing their spatial features
+        :param additional_text: This can be used to encode some additional text
+            (different from the captions) in the same forward of the backbone
+        :return: Output dictionary with the following keys:
+            - vision_features: The output of the vision backbone
+            - language_features: The output of the language backbone
+            - language_mask: The attention mask of the language backbone
+            - vision_pos_enc: The positional encoding of the vision backbone
+            - (optional) additional_text_features: The output of the language
+                backbone for the additional text
+            - (optional) additional_text_mask: The attention mask of the
+                language backbone for the additional text
+        """
+        output = self.forward_image(samples)
+        device = output["vision_features"].device
+        output.update(self.forward_text(captions, input_boxes, additional_text, device))
+        return output
+
+    def forward_image(self, samples: torch.Tensor):
+        return activation_ckpt_wrapper(self._forward_image_no_act_ckpt)(
+            samples=samples,
+            act_ckpt_enable=self.act_ckpt_whole_vision_backbone and self.training,
+        )
+
+    def _forward_image_no_act_ckpt(self, samples):
+        # Forward through backbone
+        sam3_features, sam3_pos, sam2_features, sam2_pos = self.vision_backbone.forward(
+            samples
+        )
+        if self.scalp > 0:
+            # Discard the lowest resolution features
+            sam3_features, sam3_pos = (
+                sam3_features[: -self.scalp],
+                sam3_pos[: -self.scalp],
+            )
+            if sam2_features is not None and sam2_pos is not None:
+                sam2_features, sam2_pos = (
+                    sam2_features[: -self.scalp],
+                    sam2_pos[: -self.scalp],
+                )
+
+        sam2_output = None
+
+        if sam2_features is not None and sam2_pos is not None:
+            sam2_src = sam2_features[-1]
+            sam2_output = {
+                "vision_features": sam2_src,
+                "vision_pos_enc": sam2_pos,
+                "backbone_fpn": sam2_features,
+            }
+
+        sam3_src = sam3_features[-1]
+        output = {
+            "vision_features": sam3_src,
+            "vision_pos_enc": sam3_pos,
+            "backbone_fpn": sam3_features,
+            "sam2_backbone_out": sam2_output,
+        }
+
+        return output
+
+    def forward_text(
+        self, captions, input_boxes=None, additional_text=None, device="cuda"
+    ):
+        return activation_ckpt_wrapper(self._forward_text_no_ack_ckpt)(
+            captions=captions,
+            input_boxes=input_boxes,
+            additional_text=additional_text,
+            device=device,
+            act_ckpt_enable=self.act_ckpt_whole_language_backbone and self.training,
+        )
+
+    def _forward_text_no_ack_ckpt(
+        self,
+        captions,
+        input_boxes=None,
+        additional_text=None,
+        device="cuda",
+    ):
+        output = {}
+
+        # Forward through text_encoder
+        text_to_encode = copy(captions)
+        if additional_text is not None:
+            # if there are additional_text, we piggy-back them into this forward.
+            # They'll be used later for output alignment
+            text_to_encode += additional_text
+
+        sdpa_context = sdpa_kernel(
+            [
+                SDPBackend.MATH,
+                SDPBackend.EFFICIENT_ATTENTION,
+                SDPBackend.FLASH_ATTENTION,
+            ]
+        )
+
+        with sdpa_context:
+            text_attention_mask, text_memory, text_embeds = self.language_backbone(
+                text_to_encode, input_boxes, device=device
+            )
+
+        if additional_text is not None:
+            output["additional_text_features"] = text_memory[:, -len(additional_text) :]
+            output["additional_text_mask"] = text_attention_mask[
+                -len(additional_text) :
+            ]
+
+        text_memory = text_memory[:, : len(captions)]
+        text_attention_mask = text_attention_mask[: len(captions)]
+        text_embeds = text_embeds[:, : len(captions)]
+        output["language_features"] = text_memory
+        output["language_mask"] = text_attention_mask
+        output["language_embeds"] = (
+            text_embeds  # Text embeddings before forward to the encoder
+        )
+
+        return output
diff --git a/sam3/model_builder.py b/sam3/model_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..058bbec3c2f0bbba6df0aefb5d513ff0d8aa9937
--- /dev/null
+++ b/sam3/model_builder.py
@@ -0,0 +1,793 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import os
+from typing import Optional
+
+import torch
+import torch.nn as nn
+from huggingface_hub import hf_hub_download
+from iopath.common.file_io import g_pathmgr
+from sam3.model.decoder import (
+    TransformerDecoder,
+    TransformerDecoderLayer,
+    TransformerDecoderLayerv2,
+    TransformerEncoderCrossAttention,
+)
+from sam3.model.encoder import TransformerEncoderFusion, TransformerEncoderLayer
+from sam3.model.geometry_encoders import SequenceGeometryEncoder
+from sam3.model.maskformer_segmentation import PixelDecoder, UniversalSegmentationHead
+from sam3.model.memory import (
+    CXBlock,
+    SimpleFuser,
+    SimpleMaskDownSampler,
+    SimpleMaskEncoder,
+)
+from sam3.model.model_misc import (
+    DotProductScoring,
+    MLP,
+    MultiheadAttentionWrapper as MultiheadAttention,
+    TransformerWrapper,
+)
+from sam3.model.necks import Sam3DualViTDetNeck
+from sam3.model.position_encoding import PositionEmbeddingSine
+from sam3.model.sam1_task_predictor import SAM3InteractiveImagePredictor
+from sam3.model.sam3_image import Sam3Image, Sam3ImageOnVideoMultiGPU
+from sam3.model.sam3_tracking_predictor import Sam3TrackerPredictor
+from sam3.model.sam3_video_inference import Sam3VideoInferenceWithInstanceInteractivity
+from sam3.model.sam3_video_predictor import Sam3VideoPredictorMultiGPU
+from sam3.model.text_encoder_ve import VETextEncoder
+from sam3.model.tokenizer_ve import SimpleTokenizer
+from sam3.model.vitdet import ViT
+from sam3.model.vl_combiner import SAM3VLBackbone
+from sam3.sam.transformer import RoPEAttention
+
+
+# Setup TensorFloat-32 for Ampere GPUs if available
+def _setup_tf32() -> None:
+    """Enable TensorFloat-32 for Ampere GPUs if available."""
+    if torch.cuda.is_available():
+        device_props = torch.cuda.get_device_properties(0)
+        if device_props.major >= 8:
+            torch.backends.cuda.matmul.allow_tf32 = True
+            torch.backends.cudnn.allow_tf32 = True
+
+
+_setup_tf32()
+
+
+def _create_position_encoding(precompute_resolution=None):
+    """Create position encoding for visual backbone."""
+    return PositionEmbeddingSine(
+        num_pos_feats=256,
+        normalize=True,
+        scale=None,
+        temperature=10000,
+        precompute_resolution=precompute_resolution,
+    )
+
+
+def _create_vit_backbone(compile_mode=None):
+    """Create ViT backbone for visual feature extraction."""
+    return ViT(
+        img_size=1008,
+        pretrain_img_size=336,
+        patch_size=14,
+        embed_dim=1024,
+        depth=32,
+        num_heads=16,
+        mlp_ratio=4.625,
+        norm_layer="LayerNorm",
+        drop_path_rate=0.1,
+        qkv_bias=True,
+        use_abs_pos=True,
+        tile_abs_pos=True,
+        global_att_blocks=(7, 15, 23, 31),
+        rel_pos_blocks=(),
+        use_rope=True,
+        use_interp_rope=True,
+        window_size=24,
+        pretrain_use_cls_token=True,
+        retain_cls_token=False,
+        ln_pre=True,
+        ln_post=False,
+        return_interm_layers=False,
+        bias_patch_embed=False,
+        compile_mode=compile_mode,
+    )
+
+
+def _create_vit_neck(position_encoding, vit_backbone, enable_inst_interactivity=False):
+    """Create ViT neck for feature pyramid."""
+    return Sam3DualViTDetNeck(
+        position_encoding=position_encoding,
+        d_model=256,
+        scale_factors=[4.0, 2.0, 1.0, 0.5],
+        trunk=vit_backbone,
+        add_sam2_neck=enable_inst_interactivity,
+    )
+
+
+def _create_vl_backbone(vit_neck, text_encoder):
+    """Create visual-language backbone."""
+    return SAM3VLBackbone(visual=vit_neck, text=text_encoder, scalp=1)
+
+
+def _create_transformer_encoder() -> TransformerEncoderFusion:
+    """Create transformer encoder with its layer."""
+    encoder_layer = TransformerEncoderLayer(
+        activation="relu",
+        d_model=256,
+        dim_feedforward=2048,
+        dropout=0.1,
+        pos_enc_at_attn=True,
+        pos_enc_at_cross_attn_keys=False,
+        pos_enc_at_cross_attn_queries=False,
+        pre_norm=True,
+        self_attention=MultiheadAttention(
+            num_heads=8,
+            dropout=0.1,
+            embed_dim=256,
+            batch_first=True,
+        ),
+        cross_attention=MultiheadAttention(
+            num_heads=8,
+            dropout=0.1,
+            embed_dim=256,
+            batch_first=True,
+        ),
+    )
+
+    encoder = TransformerEncoderFusion(
+        layer=encoder_layer,
+        num_layers=6,
+        d_model=256,
+        num_feature_levels=1,
+        frozen=False,
+        use_act_checkpoint=True,
+        add_pooled_text_to_img_feat=False,
+        pool_text_with_mask=True,
+    )
+    return encoder
+
+
+def _create_transformer_decoder() -> TransformerDecoder:
+    """Create transformer decoder with its layer."""
+    decoder_layer = TransformerDecoderLayer(
+        activation="relu",
+        d_model=256,
+        dim_feedforward=2048,
+        dropout=0.1,
+        cross_attention=MultiheadAttention(
+            num_heads=8,
+            dropout=0.1,
+            embed_dim=256,
+        ),
+        n_heads=8,
+        use_text_cross_attention=True,
+    )
+
+    decoder = TransformerDecoder(
+        layer=decoder_layer,
+        num_layers=6,
+        num_queries=200,
+        return_intermediate=True,
+        box_refine=True,
+        num_o2m_queries=0,
+        dac=True,
+        boxRPB="log",
+        d_model=256,
+        frozen=False,
+        interaction_layer=None,
+        dac_use_selfatt_ln=True,
+        resolution=1008,
+        stride=14,
+        use_act_checkpoint=True,
+        presence_token=True,
+    )
+    return decoder
+
+
+def _create_dot_product_scoring():
+    """Create dot product scoring module."""
+    prompt_mlp = MLP(
+        input_dim=256,
+        hidden_dim=2048,
+        output_dim=256,
+        num_layers=2,
+        dropout=0.1,
+        residual=True,
+        out_norm=nn.LayerNorm(256),
+    )
+    return DotProductScoring(d_model=256, d_proj=256, prompt_mlp=prompt_mlp)
+
+
+def _create_segmentation_head(compile_mode=None):
+    """Create segmentation head with pixel decoder."""
+    pixel_decoder = PixelDecoder(
+        num_upsampling_stages=3,
+        interpolation_mode="nearest",
+        hidden_dim=256,
+        compile_mode=compile_mode,
+    )
+
+    cross_attend_prompt = MultiheadAttention(
+        num_heads=8,
+        dropout=0,
+        embed_dim=256,
+    )
+
+    segmentation_head = UniversalSegmentationHead(
+        hidden_dim=256,
+        upsampling_stages=3,
+        aux_masks=False,
+        presence_head=False,
+        dot_product_scorer=None,
+        act_ckpt=True,
+        cross_attend_prompt=cross_attend_prompt,
+        pixel_decoder=pixel_decoder,
+    )
+    return segmentation_head
+
+
+def _create_geometry_encoder():
+    """Create geometry encoder with all its components."""
+    # Create position encoding for geometry encoder
+    geo_pos_enc = _create_position_encoding()
+    # Create CX block for fuser
+    cx_block = CXBlock(
+        dim=256,
+        kernel_size=7,
+        padding=3,
+        layer_scale_init_value=1.0e-06,
+        use_dwconv=True,
+    )
+    # Create geometry encoder layer
+    geo_layer = TransformerEncoderLayer(
+        activation="relu",
+        d_model=256,
+        dim_feedforward=2048,
+        dropout=0.1,
+        pos_enc_at_attn=False,
+        pre_norm=True,
+        self_attention=MultiheadAttention(
+            num_heads=8,
+            dropout=0.1,
+            embed_dim=256,
+            batch_first=False,
+        ),
+        pos_enc_at_cross_attn_queries=False,
+        pos_enc_at_cross_attn_keys=True,
+        cross_attention=MultiheadAttention(
+            num_heads=8,
+            dropout=0.1,
+            embed_dim=256,
+            batch_first=False,
+        ),
+    )
+
+    # Create geometry encoder
+    input_geometry_encoder = SequenceGeometryEncoder(
+        pos_enc=geo_pos_enc,
+        encode_boxes_as_points=False,
+        points_direct_project=True,
+        points_pool=True,
+        points_pos_enc=True,
+        boxes_direct_project=True,
+        boxes_pool=True,
+        boxes_pos_enc=True,
+        d_model=256,
+        num_layers=3,
+        layer=geo_layer,
+        use_act_ckpt=True,
+        add_cls=True,
+        add_post_encode_proj=True,
+    )
+    return input_geometry_encoder
+
+
+def _create_sam3_model(
+    backbone,
+    transformer,
+    input_geometry_encoder,
+    segmentation_head,
+    dot_prod_scoring,
+    inst_interactive_predictor,
+    eval_mode,
+):
+    """Create the SAM3 image model."""
+    common_params = {
+        "backbone": backbone,
+        "transformer": transformer,
+        "input_geometry_encoder": input_geometry_encoder,
+        "segmentation_head": segmentation_head,
+        "num_feature_levels": 1,
+        "o2m_mask_predict": True,
+        "dot_prod_scoring": dot_prod_scoring,
+        "use_instance_query": False,
+        "multimask_output": True,
+        "inst_interactive_predictor": inst_interactive_predictor,
+    }
+
+    matcher = None
+    if not eval_mode:
+        from sam3.train.matcher import BinaryHungarianMatcherV2
+
+        matcher = BinaryHungarianMatcherV2(
+            focal=True,
+            cost_class=2.0,
+            cost_bbox=5.0,
+            cost_giou=2.0,
+            alpha=0.25,
+            gamma=2,
+            stable=False,
+        )
+    common_params["matcher"] = matcher
+    model = Sam3Image(**common_params)
+
+    return model
+
+
+def _create_tracker_maskmem_backbone():
+    """Create the SAM3 Tracker memory encoder."""
+    # Position encoding for mask memory backbone
+    position_encoding = PositionEmbeddingSine(
+        num_pos_feats=64,
+        normalize=True,
+        scale=None,
+        temperature=10000,
+        precompute_resolution=1008,
+    )
+
+    # Mask processing components
+    mask_downsampler = SimpleMaskDownSampler(
+        kernel_size=3, stride=2, padding=1, interpol_size=[1152, 1152]
+    )
+
+    cx_block_layer = CXBlock(
+        dim=256,
+        kernel_size=7,
+        padding=3,
+        layer_scale_init_value=1.0e-06,
+        use_dwconv=True,
+    )
+
+    fuser = SimpleFuser(layer=cx_block_layer, num_layers=2)
+
+    maskmem_backbone = SimpleMaskEncoder(
+        out_dim=64,
+        position_encoding=position_encoding,
+        mask_downsampler=mask_downsampler,
+        fuser=fuser,
+    )
+
+    return maskmem_backbone
+
+
+def _create_tracker_transformer():
+    """Create the SAM3 Tracker transformer components."""
+    # Self attention
+    self_attention = RoPEAttention(
+        embedding_dim=256,
+        num_heads=1,
+        downsample_rate=1,
+        dropout=0.1,
+        rope_theta=10000.0,
+        feat_sizes=[72, 72],
+        use_fa3=False,
+        use_rope_real=False,
+    )
+
+    # Cross attention
+    cross_attention = RoPEAttention(
+        embedding_dim=256,
+        num_heads=1,
+        downsample_rate=1,
+        dropout=0.1,
+        kv_in_dim=64,
+        rope_theta=10000.0,
+        feat_sizes=[72, 72],
+        rope_k_repeat=True,
+        use_fa3=False,
+        use_rope_real=False,
+    )
+
+    # Encoder layer
+    encoder_layer = TransformerDecoderLayerv2(
+        cross_attention_first=False,
+        activation="relu",
+        dim_feedforward=2048,
+        dropout=0.1,
+        pos_enc_at_attn=False,
+        pre_norm=True,
+        self_attention=self_attention,
+        d_model=256,
+        pos_enc_at_cross_attn_keys=True,
+        pos_enc_at_cross_attn_queries=False,
+        cross_attention=cross_attention,
+    )
+
+    # Encoder
+    encoder = TransformerEncoderCrossAttention(
+        remove_cross_attention_layers=[],
+        batch_first=True,
+        d_model=256,
+        frozen=False,
+        pos_enc_at_input=True,
+        layer=encoder_layer,
+        num_layers=4,
+        use_act_checkpoint=False,
+    )
+
+    # Transformer wrapper
+    transformer = TransformerWrapper(
+        encoder=encoder,
+        decoder=None,
+        d_model=256,
+    )
+
+    return transformer
+
+
+def build_tracker(
+    apply_temporal_disambiguation: bool, with_backbone: bool = False, compile_mode=None
+) -> Sam3TrackerPredictor:
+    """
+    Build the SAM3 Tracker module for video tracking.
+
+    Returns:
+        Sam3TrackerPredictor: Wrapped SAM3 Tracker module
+    """
+
+    # Create model components
+    maskmem_backbone = _create_tracker_maskmem_backbone()
+    transformer = _create_tracker_transformer()
+    backbone = None
+    if with_backbone:
+        vision_backbone = _create_vision_backbone(compile_mode=compile_mode)
+        backbone = SAM3VLBackbone(scalp=1, visual=vision_backbone, text=None)
+    # Create the Tracker module
+    model = Sam3TrackerPredictor(
+        image_size=1008,
+        num_maskmem=7,
+        backbone=backbone,
+        backbone_stride=14,
+        transformer=transformer,
+        maskmem_backbone=maskmem_backbone,
+        # SAM parameters
+        multimask_output_in_sam=True,
+        # Evaluation
+        forward_backbone_per_frame_for_eval=True,
+        trim_past_non_cond_mem_for_eval=False,
+        # Multimask
+        multimask_output_for_tracking=True,
+        multimask_min_pt_num=0,
+        multimask_max_pt_num=1,
+        # Additional settings
+        always_start_from_first_ann_frame=False,
+        # Mask overlap
+        non_overlap_masks_for_mem_enc=False,
+        non_overlap_masks_for_output=False,
+        max_cond_frames_in_attn=4,
+        offload_output_to_cpu_for_eval=False,
+        # SAM decoder settings
+        sam_mask_decoder_extra_args={
+            "dynamic_multimask_via_stability": True,
+            "dynamic_multimask_stability_delta": 0.05,
+            "dynamic_multimask_stability_thresh": 0.98,
+        },
+        clear_non_cond_mem_around_input=True,
+        fill_hole_area=0,
+        use_memory_selection=apply_temporal_disambiguation,
+    )
+
+    return model
+
+
+def _create_text_encoder(bpe_path: str) -> VETextEncoder:
+    """Create SAM3 text encoder."""
+    tokenizer = SimpleTokenizer(bpe_path=bpe_path)
+    return VETextEncoder(
+        tokenizer=tokenizer,
+        d_model=256,
+        width=1024,
+        heads=16,
+        layers=24,
+    )
+
+
+def _create_vision_backbone(
+    compile_mode=None, enable_inst_interactivity=True
+) -> Sam3DualViTDetNeck:
+    """Create SAM3 visual backbone with ViT and neck."""
+    # Position encoding
+    position_encoding = _create_position_encoding(precompute_resolution=1008)
+    # ViT backbone
+    vit_backbone: ViT = _create_vit_backbone(compile_mode=compile_mode)
+    vit_neck: Sam3DualViTDetNeck = _create_vit_neck(
+        position_encoding,
+        vit_backbone,
+        enable_inst_interactivity=enable_inst_interactivity,
+    )
+    # Visual neck
+    return vit_neck
+
+
+def _create_sam3_transformer(has_presence_token: bool = True) -> TransformerWrapper:
+    """Create SAM3 transformer encoder and decoder."""
+    encoder: TransformerEncoderFusion = _create_transformer_encoder()
+    decoder: TransformerDecoder = _create_transformer_decoder()
+
+    return TransformerWrapper(encoder=encoder, decoder=decoder, d_model=256)
+
+
+def _load_checkpoint(model, checkpoint_path):
+    """Load model checkpoint from file."""
+    with g_pathmgr.open(checkpoint_path, "rb") as f:
+        ckpt = torch.load(f, map_location="cpu", weights_only=True)
+    if "model" in ckpt and isinstance(ckpt["model"], dict):
+        ckpt = ckpt["model"]
+    sam3_image_ckpt = {
+        k.replace("detector.", ""): v for k, v in ckpt.items() if "detector" in k
+    }
+    if model.inst_interactive_predictor is not None:
+        sam3_image_ckpt.update(
+            {
+                k.replace("tracker.", "inst_interactive_predictor.model."): v
+                for k, v in ckpt.items()
+                if "tracker" in k
+            }
+        )
+    missing_keys, _ = model.load_state_dict(sam3_image_ckpt, strict=False)
+    if len(missing_keys) > 0:
+        print(
+            f"loaded {checkpoint_path} and found "
+            f"missing and/or unexpected keys:\n{missing_keys=}"
+        )
+
+
+def _setup_device_and_mode(model, device, eval_mode):
+    """Setup model device and evaluation mode."""
+    if device == "cuda":
+        model = model.cuda()
+    if eval_mode:
+        model.eval()
+    return model
+
+
+def build_sam3_image_model(
+    bpe_path=None,
+    device="cuda" if torch.cuda.is_available() else "cpu",
+    eval_mode=True,
+    checkpoint_path=None,
+    load_from_HF=True,
+    enable_segmentation=True,
+    enable_inst_interactivity=False,
+    compile=False,
+):
+    """
+    Build SAM3 image model
+
+    Args:
+        bpe_path: Path to the BPE tokenizer vocabulary
+        device: Device to load the model on ('cuda' or 'cpu')
+        eval_mode: Whether to set the model to evaluation mode
+        checkpoint_path: Optional path to model checkpoint
+        enable_segmentation: Whether to enable segmentation head
+        enable_inst_interactivity: Whether to enable instance interactivity (SAM 1 task)
+        compile_mode: To enable compilation, set to "default"
+
+    Returns:
+        A SAM3 image model
+    """
+    if bpe_path is None:
+        bpe_path = os.path.join(
+            os.path.dirname(__file__), "..", "assets", "bpe_simple_vocab_16e6.txt.gz"
+        )
+    # Create visual components
+    compile_mode = "default" if compile else None
+    vision_encoder = _create_vision_backbone(
+        compile_mode=compile_mode, enable_inst_interactivity=enable_inst_interactivity
+    )
+
+    # Create text components
+    text_encoder = _create_text_encoder(bpe_path)
+
+    # Create visual-language backbone
+    backbone = _create_vl_backbone(vision_encoder, text_encoder)
+
+    # Create transformer components
+    transformer = _create_sam3_transformer()
+
+    # Create dot product scoring
+    dot_prod_scoring = _create_dot_product_scoring()
+
+    # Create segmentation head if enabled
+    segmentation_head = (
+        _create_segmentation_head(compile_mode=compile_mode)
+        if enable_segmentation
+        else None
+    )
+
+    # Create geometry encoder
+    input_geometry_encoder = _create_geometry_encoder()
+    if enable_inst_interactivity:
+        sam3_pvs_base = build_tracker(apply_temporal_disambiguation=False)
+        inst_predictor = SAM3InteractiveImagePredictor(sam3_pvs_base)
+    else:
+        inst_predictor = None
+    # Create the SAM3 model
+    model = _create_sam3_model(
+        backbone,
+        transformer,
+        input_geometry_encoder,
+        segmentation_head,
+        dot_prod_scoring,
+        inst_predictor,
+        eval_mode,
+    )
+    if load_from_HF and checkpoint_path is None:
+        checkpoint_path = download_ckpt_from_hf()
+    # Load checkpoint if provided
+    if checkpoint_path is not None:
+        _load_checkpoint(model, checkpoint_path)
+
+    # Setup device and mode
+    model = _setup_device_and_mode(model, device, eval_mode)
+
+    return model
+
+
+def download_ckpt_from_hf():
+    SAM3_MODEL_ID = "facebook/sam3"
+    SAM3_CKPT_NAME = "sam3.pt"
+    SAM3_CFG_NAME = "config.json"
+    _ = hf_hub_download(repo_id=SAM3_MODEL_ID, filename=SAM3_CFG_NAME)
+    checkpoint_path = hf_hub_download(repo_id=SAM3_MODEL_ID, filename=SAM3_CKPT_NAME)
+    return checkpoint_path
+
+
+def build_sam3_video_model(
+    checkpoint_path: Optional[str] = None,
+    load_from_HF=True,
+    bpe_path: Optional[str] = None,
+    has_presence_token: bool = True,
+    geo_encoder_use_img_cross_attn: bool = True,
+    strict_state_dict_loading: bool = True,
+    apply_temporal_disambiguation: bool = True,
+    device="cuda" if torch.cuda.is_available() else "cpu",
+    compile=False,
+) -> Sam3VideoInferenceWithInstanceInteractivity:
+    """
+    Build SAM3 dense tracking model.
+
+    Args:
+        checkpoint_path: Optional path to checkpoint file
+        bpe_path: Path to the BPE tokenizer file
+
+    Returns:
+        Sam3VideoInferenceWithInstanceInteractivity: The instantiated dense tracking model
+    """
+    if bpe_path is None:
+        bpe_path = os.path.join(
+            os.path.dirname(__file__), "..", "assets", "bpe_simple_vocab_16e6.txt.gz"
+        )
+
+    # Build Tracker module
+    tracker = build_tracker(apply_temporal_disambiguation=apply_temporal_disambiguation)
+
+    # Build Detector components
+    visual_neck = _create_vision_backbone()
+    text_encoder = _create_text_encoder(bpe_path)
+    backbone = SAM3VLBackbone(scalp=1, visual=visual_neck, text=text_encoder)
+    transformer = _create_sam3_transformer(has_presence_token=has_presence_token)
+    segmentation_head: UniversalSegmentationHead = _create_segmentation_head()
+    input_geometry_encoder = _create_geometry_encoder()
+
+    # Create main dot product scoring
+    main_dot_prod_mlp = MLP(
+        input_dim=256,
+        hidden_dim=2048,
+        output_dim=256,
+        num_layers=2,
+        dropout=0.1,
+        residual=True,
+        out_norm=nn.LayerNorm(256),
+    )
+    main_dot_prod_scoring = DotProductScoring(
+        d_model=256, d_proj=256, prompt_mlp=main_dot_prod_mlp
+    )
+
+    # Build Detector module
+    detector = Sam3ImageOnVideoMultiGPU(
+        num_feature_levels=1,
+        backbone=backbone,
+        transformer=transformer,
+        segmentation_head=segmentation_head,
+        semantic_segmentation_head=None,
+        input_geometry_encoder=input_geometry_encoder,
+        use_early_fusion=True,
+        use_dot_prod_scoring=True,
+        dot_prod_scoring=main_dot_prod_scoring,
+        supervise_joint_box_scores=has_presence_token,
+    )
+
+    # Build the main SAM3 video model
+    if apply_temporal_disambiguation:
+        model = Sam3VideoInferenceWithInstanceInteractivity(
+            detector=detector,
+            tracker=tracker,
+            score_threshold_detection=0.5,
+            assoc_iou_thresh=0.1,
+            det_nms_thresh=0.1,
+            new_det_thresh=0.7,
+            hotstart_delay=15,
+            hotstart_unmatch_thresh=8,
+            hotstart_dup_thresh=8,
+            suppress_unmatched_only_within_hotstart=True,
+            min_trk_keep_alive=-1,
+            max_trk_keep_alive=30,
+            init_trk_keep_alive=30,
+            suppress_overlapping_based_on_recent_occlusion_threshold=0.7,
+            suppress_det_close_to_boundary=False,
+            fill_hole_area=16,
+            recondition_every_nth_frame=16,
+            masklet_confirmation_enable=False,
+            decrease_trk_keep_alive_for_empty_masklets=False,
+            image_size=1008,
+            image_mean=(0.5, 0.5, 0.5),
+            image_std=(0.5, 0.5, 0.5),
+            compile_model=compile,
+        )
+    else:
+        # a version without any heuristics for ablation studies
+        model = Sam3VideoInferenceWithInstanceInteractivity(
+            detector=detector,
+            tracker=tracker,
+            score_threshold_detection=0.5,
+            assoc_iou_thresh=0.1,
+            det_nms_thresh=0.1,
+            new_det_thresh=0.7,
+            hotstart_delay=0,
+            hotstart_unmatch_thresh=0,
+            hotstart_dup_thresh=0,
+            suppress_unmatched_only_within_hotstart=True,
+            min_trk_keep_alive=-1,
+            max_trk_keep_alive=30,
+            init_trk_keep_alive=30,
+            suppress_overlapping_based_on_recent_occlusion_threshold=0.7,
+            suppress_det_close_to_boundary=False,
+            fill_hole_area=16,
+            recondition_every_nth_frame=0,
+            masklet_confirmation_enable=False,
+            decrease_trk_keep_alive_for_empty_masklets=False,
+            image_size=1008,
+            image_mean=(0.5, 0.5, 0.5),
+            image_std=(0.5, 0.5, 0.5),
+            compile_model=compile,
+        )
+
+    # Load checkpoint if provided
+    if load_from_HF and checkpoint_path is None:
+        checkpoint_path = download_ckpt_from_hf()
+    if checkpoint_path is not None:
+        with g_pathmgr.open(checkpoint_path, "rb") as f:
+            ckpt = torch.load(f, map_location="cpu", weights_only=True)
+        if "model" in ckpt and isinstance(ckpt["model"], dict):
+            ckpt = ckpt["model"]
+
+        missing_keys, unexpected_keys = model.load_state_dict(
+            ckpt, strict=strict_state_dict_loading
+        )
+        if missing_keys:
+            print(f"Missing keys: {missing_keys}")
+        if unexpected_keys:
+            print(f"Unexpected keys: {unexpected_keys}")
+
+    model.to(device=device)
+    return model
+
+
+def build_sam3_video_predictor(*model_args, gpus_to_use=None, **model_kwargs):
+    return Sam3VideoPredictorMultiGPU(
+        *model_args, gpus_to_use=gpus_to_use, **model_kwargs
+    )
diff --git a/sam3/perflib/__init__.py b/sam3/perflib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c3823b937eb21830e435700c972ea8f6a0efa9b
--- /dev/null
+++ b/sam3/perflib/__init__.py
@@ -0,0 +1,8 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import os
+
+is_enabled = False
+if os.getenv("USE_PERFLIB", "1") == "1":
+    # print("Enabled the use of perflib.\n", end="")
+    is_enabled = True
diff --git a/sam3/perflib/associate_det_trk.py b/sam3/perflib/associate_det_trk.py
new file mode 100644
index 0000000000000000000000000000000000000000..508ae817dcd60d835e3581cf0e7883163af0688e
--- /dev/null
+++ b/sam3/perflib/associate_det_trk.py
@@ -0,0 +1,137 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from collections import defaultdict
+
+import torch
+import torch.nn.functional as F
+from sam3.perflib.masks_ops import mask_iou
+from scipy.optimize import linear_sum_assignment
+
+
+def associate_det_trk(
+    det_masks,
+    track_masks,
+    iou_threshold=0.5,
+    iou_threshold_trk=0.5,
+    det_scores=None,
+    new_det_thresh=0.0,
+):
+    """
+    Optimized implementation of detection <-> track association that minimizes DtoH syncs.
+
+    Args:
+        det_masks: (N, H, W) tensor of predicted masks
+        track_masks: (M, H, W) tensor of track masks
+
+    Returns:
+        new_det_indices: list of indices in det_masks considered 'new'
+        unmatched_trk_indices: list of indices in track_masks considered 'unmatched'
+    """
+    with torch.autograd.profiler.record_function("perflib: associate_det_trk"):
+        assert isinstance(det_masks, torch.Tensor), "det_masks should be a tensor"
+        assert isinstance(track_masks, torch.Tensor), "track_masks should be a tensor"
+        if det_masks.size(0) == 0 or track_masks.size(0) == 0:
+            return list(range(det_masks.size(0))), [], {}, {}  # all detections are new
+
+        if list(det_masks.shape[-2:]) != list(track_masks.shape[-2:]):
+            # resize to the smaller size to save GPU memory
+            if torch.numel(det_masks[-2:]) < torch.numel(track_masks[-2:]):
+                track_masks = (
+                    F.interpolate(
+                        track_masks.unsqueeze(1).float(),
+                        size=det_masks.shape[-2:],
+                        mode="bilinear",
+                        align_corners=False,
+                    ).squeeze(1)
+                    > 0
+                )
+            else:
+                # resize detections to track size
+                det_masks = (
+                    F.interpolate(
+                        det_masks.unsqueeze(1).float(),
+                        size=track_masks.shape[-2:],
+                        mode="bilinear",
+                        align_corners=False,
+                    ).squeeze(1)
+                    > 0
+                )
+
+        det_masks = det_masks > 0
+        track_masks = track_masks > 0
+
+        iou = mask_iou(det_masks, track_masks)  # (N, M)
+        igeit = iou >= iou_threshold
+        igeit_any_dim_1 = igeit.any(dim=1)
+        igeit_trk = iou >= iou_threshold_trk
+
+        iou_list = iou.cpu().numpy().tolist()
+        igeit_list = igeit.cpu().numpy().tolist()
+        igeit_any_dim_1_list = igeit_any_dim_1.cpu().numpy().tolist()
+        igeit_trk_list = igeit_trk.cpu().numpy().tolist()
+
+        det_scores_list = (
+            det_scores
+            if det_scores is None
+            else det_scores.cpu().float().numpy().tolist()
+        )
+
+        # Hungarian matching for tracks (one-to-one: each track matches at most one detection)
+        # For detections: allow many tracks to match to the same detection (many-to-one)
+
+        # If either is empty, return all detections as new
+        if det_masks.size(0) == 0 or track_masks.size(0) == 0:
+            return list(range(det_masks.size(0))), [], {}
+
+        # Hungarian matching: maximize IoU for tracks
+        cost_matrix = 1 - iou.cpu().numpy()  # Hungarian solves for minimum cost
+        row_ind, col_ind = linear_sum_assignment(cost_matrix)
+
+        def branchy_hungarian_better_uses_the_cpu(
+            cost_matrix, row_ind, col_ind, iou_list, det_masks, track_masks
+        ):
+            matched_trk = set()
+            matched_det = set()
+            matched_det_scores = {}  # track index -> [det_score, det_score * iou] det score of matched detection mask
+            for d, t in zip(row_ind, col_ind):
+                matched_det_scores[t] = [
+                    det_scores_list[d],
+                    det_scores_list[d] * iou_list[d][t],
+                ]
+                if igeit_trk_list[d][t]:
+                    matched_trk.add(t)
+                    matched_det.add(d)
+
+            # Tracks not matched by Hungarian assignment above threshold are unmatched
+            unmatched_trk_indices = [
+                t for t in range(track_masks.size(0)) if t not in matched_trk
+            ]
+
+            # For detections: allow many tracks to match to the same detection (many-to-one)
+            # So, a detection is 'new' if it does not match any track above threshold
+            assert track_masks.size(0) == igeit.size(
+                1
+            )  # Needed for loop optimizaiton below
+            new_det_indices = []
+            for d in range(det_masks.size(0)):
+                if not igeit_any_dim_1_list[d]:
+                    if det_scores is not None and det_scores[d] >= new_det_thresh:
+                        new_det_indices.append(d)
+
+            # for each detection, which tracks it matched to (above threshold)
+            det_to_matched_trk = defaultdict(list)
+            for d in range(det_masks.size(0)):
+                for t in range(track_masks.size(0)):
+                    if igeit_list[d][t]:
+                        det_to_matched_trk[d].append(t)
+
+            return (
+                new_det_indices,
+                unmatched_trk_indices,
+                det_to_matched_trk,
+                matched_det_scores,
+            )
+
+        return (branchy_hungarian_better_uses_the_cpu)(
+            cost_matrix, row_ind, col_ind, iou_list, det_masks, track_masks
+        )
diff --git a/sam3/perflib/compile.py b/sam3/perflib/compile.py
new file mode 100644
index 0000000000000000000000000000000000000000..f427aa7b066ca366b3650e331b22072d4e07c4c5
--- /dev/null
+++ b/sam3/perflib/compile.py
@@ -0,0 +1,99 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import torch
+
+
+def recursive_fn_factory(fn):
+    def recursive_fn(b):
+        if isinstance(b, dict):
+            return {k: recursive_fn(b[k]) for k in b}
+        if isinstance(b, list):
+            return [recursive_fn(t) for t in b]
+        if isinstance(b, tuple):
+            return tuple(recursive_fn(t) for t in b)
+        if isinstance(b, torch.Tensor):
+            return fn(b)
+        # Yes, writing out an explicit white list of
+        # trivial types is tedious, but so are bugs that
+        # come from not applying fn, when expected to have
+        # applied it.
+        if b is None:
+            return b
+        trivial_types = [bool, int]
+        for t in trivial_types:
+            if isinstance(b, t):
+                return b
+        raise TypeError(f"Unexpected type {type(b)}")
+
+    return recursive_fn
+
+
+recursive_contiguous = recursive_fn_factory(lambda x: x.contiguous())
+recursive_clone = recursive_fn_factory(torch.clone)
+
+
+def compile_wrapper(
+    fn, *, mode="max-autotune", fullgraph=True, dynamic=False, name=None
+):
+    compiled_fn = torch.compile(fn, mode=mode, fullgraph=fullgraph, dynamic=dynamic)
+
+    def compiled_fn_wrapper(*args, **kwargs):
+        with torch.autograd.profiler.record_function(
+            f"compiled {fn}" if name is None else name
+        ):
+            cont_args = recursive_contiguous(args)
+            cont_kwargs = recursive_contiguous(kwargs)
+            result = compiled_fn(*cont_args, **cont_kwargs)
+            cloned_result = recursive_clone(result)
+            return cloned_result
+
+    return compiled_fn_wrapper
+
+
+def shape_logging_wrapper(fn, keep_kwargs, enable_logging=False):
+    """
+    Wraps a function and prints the shapes of all tensor inputs.
+    Only prints when a new combination of shapes is seen.
+    Thread-safe.
+
+    Args:
+        fn: Function to wrap
+        enable_logging: Boolean flag to enable/disable logging
+    """
+    seen_shapes = set()
+
+    def get_shape(obj):
+        if isinstance(obj, torch.Tensor):
+            return obj.shape
+        elif isinstance(obj, (list, tuple)):
+            if len(obj) > 1:
+                return tuple(get_shape(x) for x in obj)
+            return get_shape(obj[0])
+        elif isinstance(obj, dict):
+            return tuple(sorted((k, get_shape(v)) for k, v in obj.items()))
+        else:
+            return type(obj).__name__
+
+    def wrapper(*args, **kwargs):
+        shapes = tuple(get_shape(arg) for arg in args) + tuple(
+            (k, get_shape(v))
+            for k, v in kwargs.items()
+            if isinstance(v, (torch.Tensor, list))
+            and (len(keep_kwargs) > 0 and k in keep_kwargs)
+        )
+        if shapes not in seen_shapes:
+            seen_shapes.add(shapes)
+            if enable_logging:
+                print(f"[ShapeLogger] New input shapes for {fn.__qualname__}: {shapes}")
+        return fn(*args, **kwargs)
+
+    # Allow toggling the flag at runtime
+    wrapper.enable_logging = enable_logging
+
+    def set_logging(enabled=False):
+        nonlocal enable_logging
+        enable_logging = enabled
+        wrapper.enable_logging = enable_logging
+
+    wrapper.set_logging = set_logging
+    return wrapper
diff --git a/sam3/perflib/connected_components.py b/sam3/perflib/connected_components.py
new file mode 100644
index 0000000000000000000000000000000000000000..c96932a4ae31092872a0c50b7fec3c53662df354
--- /dev/null
+++ b/sam3/perflib/connected_components.py
@@ -0,0 +1,84 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+import logging
+
+import torch
+
+try:
+    from cc_torch import get_connected_components
+
+    HAS_CC_TORCH = True
+except ImportError:
+    logging.debug(
+        "cc_torch not found. Consider installing for better performance. Command line:"
+        " pip install git+https://github.com/ronghanghu/cc_torch.git"
+    )
+    HAS_CC_TORCH = False
+
+
+def connected_components_cpu_single(values: torch.Tensor):
+    assert values.dim() == 2
+    from skimage.measure import label
+
+    labels, num = label(values.cpu().numpy(), return_num=True)
+    labels = torch.from_numpy(labels)
+    counts = torch.zeros_like(labels)
+    for i in range(1, num + 1):
+        cur_mask = labels == i
+        cur_count = cur_mask.sum()
+        counts[cur_mask] = cur_count
+    return labels, counts
+
+
+def connected_components_cpu(input_tensor: torch.Tensor):
+    out_shape = input_tensor.shape
+    if input_tensor.dim() == 4 and input_tensor.shape[1] == 1:
+        input_tensor = input_tensor.squeeze(1)
+    else:
+        assert (
+            input_tensor.dim() == 3
+        ), "Input tensor must be (B, H, W) or (B, 1, H, W)."
+
+    batch_size = input_tensor.shape[0]
+    labels_list = []
+    counts_list = []
+    for b in range(batch_size):
+        labels, counts = connected_components_cpu_single(input_tensor[b])
+        labels_list.append(labels)
+        counts_list.append(counts)
+    labels_tensor = torch.stack(labels_list, dim=0).to(input_tensor.device)
+    counts_tensor = torch.stack(counts_list, dim=0).to(input_tensor.device)
+    return labels_tensor.view(out_shape), counts_tensor.view(out_shape)
+
+
+def connected_components(input_tensor: torch.Tensor):
+    """
+    Computes connected components labeling on a batch of 2D tensors, using the best available backend.
+
+    Args:
+        input_tensor (torch.Tensor): A BxHxW integer tensor or Bx1xHxW. Non-zero values are considered foreground. Bool tensor also accepted
+
+    Returns:
+        Tuple[torch.Tensor, torch.Tensor]: Both tensors have the same shape as input_tensor.
+            - A tensor with dense labels. Background is 0.
+            - A tensor with the size of the connected component for each pixel.
+    """
+    if input_tensor.dim() == 3:
+        input_tensor = input_tensor.unsqueeze(1)
+
+    assert (
+        input_tensor.dim() == 4 and input_tensor.shape[1] == 1
+    ), "Input tensor must be (B, H, W) or (B, 1, H, W)."
+
+    if input_tensor.is_cuda:
+        if HAS_CC_TORCH:
+            return get_connected_components(input_tensor.to(torch.uint8))
+        else:
+            # triton fallback
+            from sam3.perflib.triton.connected_components import (
+                connected_components_triton,
+            )
+
+            return connected_components_triton(input_tensor)
+
+    # CPU fallback
+    return connected_components_cpu(input_tensor)
diff --git a/sam3/perflib/fa3.py b/sam3/perflib/fa3.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f8c9bd544679d01963c8df2594a0184a93f72c8
--- /dev/null
+++ b/sam3/perflib/fa3.py
@@ -0,0 +1,27 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import torch
+
+
+@torch.library.custom_op("flash::flash_attn_func", mutates_args=())
+def flash_attn_func_op(
+    q: torch.Tensor, k: torch.Tensor, v: torch.Tensor
+) -> torch.Tensor:
+    from flash_attn_interface import flash_attn_func as fa3
+
+    return fa3(q, k, v)
+
+
+def flash_attn_func(q, k, v):
+    dtype = torch.float8_e4m3fn
+    return flash_attn_func_op(q.to(dtype), k.to(dtype), v.to(dtype)).to(q.dtype)
+
+
+@flash_attn_func_op.register_fake
+def _(q, k, v, **kwargs):
+    # two outputs:
+    # 1. output: (batch, seq_len, num_heads, head_dim)
+    # 2. softmax_lse: (batch, num_heads, seq_len) with dtype=torch.float32
+    # output needs to be bfloat16, not float8!
+    meta_q = torch.empty_like(q, dtype=torch.bfloat16).contiguous()
+    return meta_q
diff --git a/sam3/perflib/masks_ops.py b/sam3/perflib/masks_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..48299d585df170ac67cdf5e66d6108668cf6cd97
--- /dev/null
+++ b/sam3/perflib/masks_ops.py
@@ -0,0 +1,69 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import torch
+
+
+def masks_to_boxes(masks: torch.Tensor, obj_ids: list[int]):
+    with torch.autograd.profiler.record_function("perflib: masks_to_boxes"):
+        # Sanity check based on callsite for replacement
+        assert masks.shape[0] == len(obj_ids)
+        assert masks.dim() == 3
+
+        # Based on torchvision masks_to_boxes
+        if masks.numel() == 0:
+            return torch.zeros((0, 4), device=masks.device, dtype=torch.float)
+
+        N, H, W = masks.shape
+        device = masks.device
+        y = torch.arange(H, device=device).view(1, H)
+        x = torch.arange(W, device=device).view(1, W)
+
+        masks_with_obj = masks != 0  # N, H, W
+        masks_with_obj_x = masks_with_obj.amax(
+            dim=1
+        )  # N, H (which columns have objects)
+        masks_with_obj_y = masks_with_obj.amax(dim=2)  # N, W (which rows have objects)
+        masks_without_obj_x = ~masks_with_obj_x
+        masks_without_obj_y = ~masks_with_obj_y
+
+        bounding_boxes_0 = torch.amin(
+            (masks_without_obj_x * W) + (masks_with_obj_x * x), dim=1
+        )
+        bounding_boxes_1 = torch.amin(
+            (masks_without_obj_y * H) + (masks_with_obj_y * y), dim=1
+        )
+        bounding_boxes_2 = torch.amax(masks_with_obj_x * x, dim=1)
+        bounding_boxes_3 = torch.amax(masks_with_obj_y * y, dim=1)
+
+        bounding_boxes = torch.stack(
+            [bounding_boxes_0, bounding_boxes_1, bounding_boxes_2, bounding_boxes_3],
+            dim=1,
+        ).to(dtype=torch.float)
+        assert bounding_boxes.shape == (N, 4)
+        assert bounding_boxes.device == masks.device
+        assert bounding_boxes.dtype == torch.float
+        return bounding_boxes
+
+
+def mask_iou(pred_masks: torch.Tensor, gt_masks: torch.Tensor) -> torch.Tensor:
+    """
+    Compute the IoU (Intersection over Union) between predicted masks and ground truth masks.
+    Args:
+      - pred_masks: (N, H, W) bool Tensor, containing binary predicted segmentation masks
+      - gt_masks: (M, H, W) bool Tensor, containing binary ground truth segmentation masks
+    Returns:
+      - ious: (N, M) float Tensor, containing IoUs for each pair of predicted and ground truth masks
+    """
+    assert pred_masks.dtype == gt_masks.dtype == torch.bool
+    N, H, W = pred_masks.shape
+    M, _, _ = gt_masks.shape
+
+    # Flatten masks: (N, 1, H*W) and (1, M, H*W)
+    pred_flat = pred_masks.view(N, 1, H * W)
+    gt_flat = gt_masks.view(1, M, H * W)
+
+    # Compute intersection and union: (N, M)
+    intersection = (pred_flat & gt_flat).sum(dim=2).float()
+    union = (pred_flat | gt_flat).sum(dim=2).float()
+    ious = intersection / union.clamp(min=1)
+    return ious  # shape: (N, M)
diff --git a/sam3/perflib/nms.py b/sam3/perflib/nms.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3efc5995c33005d4ec9f683096eb1a8f63660c5
--- /dev/null
+++ b/sam3/perflib/nms.py
@@ -0,0 +1,91 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import logging
+
+import numpy as np
+import torch
+
+from sam3.perflib.masks_ops import mask_iou
+
+
+try:
+    from torch_generic_nms import generic_nms as generic_nms_cuda
+
+    GENERIC_NMS_AVAILABLE = True
+except ImportError:
+    logging.debug(
+        "Falling back to triton or CPU mask NMS implementation -- please install `torch_generic_nms` via\n\t"
+        'pip uninstall -y torch_generic_nms; TORCH_CUDA_ARCH_LIST="8.0 9.0" pip install git+https://github.com/ronghanghu/torch_generic_nms'
+    )
+    GENERIC_NMS_AVAILABLE = False
+
+
+def nms_masks(
+    pred_probs: torch.Tensor,
+    pred_masks: torch.Tensor,
+    prob_threshold: float,
+    iou_threshold: float,
+) -> torch.Tensor:
+    """
+    Args:
+      - pred_probs: (num_det,) float Tensor, containing the score (probability) of each detection
+      - pred_masks: (num_det, H_mask, W_mask) float Tensor, containing the binary segmentation mask of each detection
+      - prob_threshold: float, score threshold to prefilter detections (NMS is performed on detections above threshold)
+      - iou_threshold: float, mask IoU threshold for NMS
+
+    Returns:
+     - keep: (num_det,) bool Tensor, indicating whether each detection is kept after score thresholding + NMS
+    """
+    # prefilter the detections with prob_threshold ("valid" are those above prob_threshold)
+    is_valid = pred_probs > prob_threshold  # (num_det,)
+    probs = pred_probs[is_valid]  # (num_valid,)
+    masks_binary = pred_masks[is_valid] > 0  # (num_valid, H_mask, W_mask)
+    if probs.numel() == 0:
+        return is_valid  # no valid detection, return empty keep mask
+
+    ious = mask_iou(masks_binary, masks_binary)  # (num_valid, num_valid)
+    kept_inds = generic_nms(ious, probs, iou_threshold)
+
+    # valid_inds are the indices among `probs` of valid detections before NMS (or -1 for invalid)
+    valid_inds = torch.where(is_valid, is_valid.cumsum(dim=0) - 1, -1)  # (num_det,)
+    keep = torch.isin(valid_inds, kept_inds)  # (num_det,)
+    return keep
+
+
+def generic_nms(
+    ious: torch.Tensor, scores: torch.Tensor, iou_threshold=0.5
+) -> torch.Tensor:
+    """A generic version of `torchvision.ops.nms` that takes a pairwise IoU matrix."""
+
+    assert ious.dim() == 2 and ious.size(0) == ious.size(1)
+    assert scores.dim() == 1 and scores.size(0) == ious.size(0)
+
+    if ious.is_cuda:
+        if GENERIC_NMS_AVAILABLE:
+            return generic_nms_cuda(ious, scores, iou_threshold, use_iou_matrix=True)
+        else:
+            from sam3.perflib.triton.nms import nms_triton
+
+            return nms_triton(ious, scores, iou_threshold)
+
+    return generic_nms_cpu(ious, scores, iou_threshold)
+
+
+def generic_nms_cpu(
+    ious: torch.Tensor, scores: torch.Tensor, iou_threshold=0.5
+) -> torch.Tensor:
+    """
+    A generic version of `torchvision.ops.nms` that takes a pairwise IoU matrix. (CPU implementation
+    based on https://github.com/jwyang/faster-rcnn.pytorch/blob/master/lib/model/nms/nms_cpu.py)
+    """
+    ious_np = ious.float().detach().cpu().numpy()
+    scores_np = scores.float().detach().cpu().numpy()
+    order = scores_np.argsort()[::-1]
+    kept_inds = []
+    while order.size > 0:
+        i = order.item(0)
+        kept_inds.append(i)
+        inds = np.where(ious_np[i, order[1:]] <= iou_threshold)[0]
+        order = order[inds + 1]
+
+    return torch.tensor(kept_inds, dtype=torch.int64, device=scores.device)
diff --git a/sam3/perflib/tests/assets/masks.tiff b/sam3/perflib/tests/assets/masks.tiff
new file mode 100644
index 0000000000000000000000000000000000000000..5d05021c65ff41d8ff6cbd8ffec043c261bb5341
--- /dev/null
+++ b/sam3/perflib/tests/assets/masks.tiff
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e470fe2921b69eef47bcbf8394f60f86efa1304b63eb5b9efb297963d8485b60
+size 352484
diff --git a/sam3/perflib/tests/tests.py b/sam3/perflib/tests/tests.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fb88ad007951b69405be012f1fb0b242155c6cc
--- /dev/null
+++ b/sam3/perflib/tests/tests.py
@@ -0,0 +1,59 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import os
+
+import numpy as np
+import pytest
+import torch
+from PIL import Image
+from sam3.perflib.masks_ops import masks_to_boxes
+
+
+class TestMasksToBoxes:
+    def test_masks_box(self):
+        def masks_box_check(masks, expected, atol=1e-4):
+            out = masks_to_boxes(masks, [1 for _ in range(masks.shape[0])])
+            assert out.dtype == torch.float
+            print("out: ", out)
+            print("expected: ", expected)
+            torch.testing.assert_close(
+                out, expected, rtol=0.0, check_dtype=True, atol=atol
+            )
+
+        # Check for int type boxes.
+        def _get_image():
+            assets_directory = os.path.join(
+                os.path.dirname(os.path.abspath(__file__)), "assets"
+            )
+            mask_path = os.path.join(assets_directory, "masks.tiff")
+            image = Image.open(mask_path)
+            return image
+
+        def _create_masks(image, masks):
+            for index in range(image.n_frames):
+                image.seek(index)
+                frame = np.array(image)
+                masks[index] = torch.tensor(frame)
+
+            return masks
+
+        expected = torch.tensor(
+            [
+                [127, 2, 165, 40],
+                [2, 50, 44, 92],
+                [56, 63, 98, 100],
+                [139, 68, 175, 104],
+                [160, 112, 198, 145],
+                [49, 138, 99, 182],
+                [108, 148, 152, 213],
+            ],
+            dtype=torch.float,
+        )
+
+        image = _get_image()
+        for dtype in [torch.float16, torch.float32, torch.float64]:
+            masks = torch.zeros(
+                (image.n_frames, image.height, image.width), dtype=dtype
+            )
+            masks = _create_masks(image, masks)
+            masks_box_check(masks, expected)
diff --git a/sam3/perflib/triton/connected_components.py b/sam3/perflib/triton/connected_components.py
new file mode 100644
index 0000000000000000000000000000000000000000..253ca9d54bdf3b47dbff54721a4cfe78aa443f09
--- /dev/null
+++ b/sam3/perflib/triton/connected_components.py
@@ -0,0 +1,468 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+import math
+
+import torch
+import triton
+import triton.language as tl
+
+
+@triton.jit
+def _any_combine(a, b):
+    return a | b
+
+
+@triton.jit
+def tl_any(a, dim=0):
+    return tl.reduce(a, dim, _any_combine)
+
+
+# ==============================================================================
+# ## Phase 1: Initialization Kernel
+# ==============================================================================
+# Each foreground pixel (value > 0) gets a unique label equal to its
+# linear index. Background pixels (value == 0) get a sentinel label of -1.
+# Note that the indexing is done across batch boundaries for simplicity
+# (i.e., the first pixel of image 1 gets label H*W, etc.)
+
+
+@triton.jit
+def _init_labels_kernel(
+    input_ptr, labels_ptr, numel: tl.constexpr, BLOCK_SIZE: tl.constexpr
+):
+    pid = tl.program_id(0)
+    offsets = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    mask = offsets < numel
+    input_values = tl.load(input_ptr + offsets, mask=mask, other=0)
+
+    indices = tl.where((input_values != 0), offsets, -1)
+    tl.store(labels_ptr + offsets, indices, mask=mask)
+
+
+# ==============================================================================
+# ## Phase 2: Local merging
+# ==============================================================================
+# Each pixel tries to merge with its 8-connected neighbors (up, down, left, right)
+# if they have the same value. This is done using a disjoint-set union operation.
+
+
+@triton.jit
+def find(labels_ptr, indices, mask):
+    current_pids = indices
+
+    # 'is_done' tracks lanes that have finished their work.
+    # A lane is initially "done" if it's not active (mask is False).
+    is_done = ~mask
+
+    # Loop as long as there is at least one lane that is NOT done.
+    while tl_any(~is_done):
+        # The work_mask is for lanes that are still active and seeking their root.
+        work_mask = ~is_done
+        parents = tl.load(labels_ptr + current_pids, mask=work_mask, other=-1)
+        # A lane is now done if its parent is itself (it's a root)
+        # or if it hits a -1 sentinel (a safe exit condition).
+        is_root = parents == current_pids
+        is_sentinel = parents == -1
+        is_done |= is_root | is_sentinel
+
+        # For lanes that are not yet done, update their pid to their parent to continue traversal.
+        current_pids = tl.where(is_done, current_pids, parents)
+    # We could add the following line to do path compression, but experimentally it's slower
+    # tl.atomic_min(labels_ptr + indices, current_pids, mask=mask)
+    return current_pids
+
+
+@triton.jit
+def union(labels_ptr, a, b, process_mask):
+    # This function implements a disjoint-set union
+    # As an invariant, we use the fact that the roots have the lower id. That helps parallelization
+    # However, that is not sufficient by itself. Suppose two threads want to do union(0,2) and union(1,2) at the same time
+    # Then if we do a naive atomic_min, 0 and 1 will compete to be the new parent of 2 and min(0, 1) will win.
+    # However, 1 still needs to be merged with the new {0, 2} component.
+    # To ensure that merge is also done, we need to detect whether the merge was successful, and if not retry until it is
+
+    current_a = a
+    current_b = b
+
+    final_root = a
+    # A mask to track which lanes have successfully completed their union.
+    done_mask = ~process_mask  # tl.zeros_like(a) == 1  # Init with all False
+
+    while tl_any(~done_mask):
+        # Define the mask for lanes that still need work in this iteration
+        work_mask = process_mask & ~done_mask
+
+        # Find the roots for the current a and b values in the active lanes
+        root_a = find(labels_ptr, current_a, work_mask)
+        tl.debug_barrier()
+        root_b = find(labels_ptr, current_b, work_mask)
+
+        # 7. Merge logic
+        # If roots are already the same, the sets are already merged. Mark as done.
+        are_equal = root_a == root_b
+        final_root = tl.where(are_equal & work_mask & ~done_mask, root_a, final_root)
+        done_mask |= are_equal & work_mask
+
+        # Define masks for the two merge cases (a < b or b < a)
+        a_is_smaller = root_a < root_b
+
+        # Case 1: root_a < root_b. Attempt to set parent[root_b] = root_a
+        merge_mask_a_smaller = work_mask & a_is_smaller & ~are_equal
+        ptr_b = labels_ptr + root_b
+        old_val_b = tl.atomic_min(ptr_b, root_a, mask=merge_mask_a_smaller)
+
+        # A lane is done if its atomic op was successful (old value was what we expected)
+        success_b = old_val_b == root_b
+        final_root = tl.where(success_b & work_mask & ~done_mask, root_a, final_root)
+        done_mask |= success_b & merge_mask_a_smaller
+
+        # *** Crucial Retry Logic ***
+        # If the update failed (old_val_b != root_b), another thread interfered.
+        # We update `current_b` to this new root (`old_val_b`) and will retry in the next loop iteration.
+        current_b = tl.where(success_b | ~merge_mask_a_smaller, current_b, old_val_b)
+
+        # Case 2: root_b < root_a. Attempt to set parent[root_a] = root_b
+        merge_mask_b_smaller = work_mask & ~a_is_smaller & ~are_equal
+        ptr_a = labels_ptr + root_a
+        old_val_a = tl.atomic_min(ptr_a, root_b, mask=merge_mask_b_smaller)
+
+        success_a = old_val_a == root_a
+        final_root = tl.where(success_a & work_mask & ~done_mask, root_b, final_root)
+        done_mask |= success_a & merge_mask_b_smaller
+
+        # *** Crucial Retry Logic ***
+        # Similarly, update `current_a` if the atomic operation failed.
+        current_a = tl.where(success_a | ~merge_mask_b_smaller, current_a, old_val_a)
+
+    return final_root
+
+
+@triton.jit
+def _merge_helper(
+    input_ptr,
+    labels_ptr,
+    base_offset,
+    offsets_h,
+    offsets_w,
+    mask_2d,
+    valid_current,
+    current_values,
+    current_labels,
+    H,
+    W,
+    dx: tl.constexpr,
+    dy: tl.constexpr,
+):
+    # Helper functions to compute merge with a specific neighbor offset (dx, dy)
+
+    neighbor_h = offsets_h + dy
+    neighbor_w = offsets_w + dx
+    # Proper bounds checking: all four bounds must be satisfied
+    mask_n = (
+        mask_2d
+        & (neighbor_h[:, None] >= 0)
+        & (neighbor_h[:, None] < H)
+        & (neighbor_w[None, :] >= 0)
+        & (neighbor_w[None, :] < W)
+    )
+
+    offsets_neighbor = neighbor_h[:, None] * W + neighbor_w[None, :]
+    neighbor_values = tl.load(
+        input_ptr + base_offset + offsets_neighbor, mask=mask_n, other=-1
+    )
+
+    mask_n = tl.ravel(mask_n)
+    neighbor_labels = tl.load(
+        labels_ptr + tl.ravel(base_offset + offsets_neighbor), mask=mask_n, other=-1
+    )
+
+    to_merge = (
+        mask_n & (neighbor_labels != -1) & tl.ravel(current_values == neighbor_values)
+    )
+    valid_write = valid_current & to_merge
+
+    # returns new parents for the pixels that were merged (otherwise keeps current labels)
+    return tl.where(
+        valid_write,
+        union(labels_ptr, current_labels, neighbor_labels, valid_write),
+        current_labels,
+    )
+
+
+@triton.autotune(
+    configs=[
+        triton.Config(
+            {"BLOCK_SIZE_H": 4, "BLOCK_SIZE_W": 16}, num_stages=1, num_warps=2
+        ),
+        triton.Config(
+            {"BLOCK_SIZE_H": 4, "BLOCK_SIZE_W": 32}, num_stages=2, num_warps=4
+        ),
+    ],
+    key=["H", "W"],
+    restore_value=["labels_ptr"],
+)
+@triton.jit
+def _local_prop_kernel(
+    labels_ptr,
+    input_ptr,
+    H: tl.constexpr,
+    W: tl.constexpr,
+    BLOCK_SIZE_H: tl.constexpr,
+    BLOCK_SIZE_W: tl.constexpr,
+):
+    # This is the meat of the Phase 2 to do local merging
+    # It will be launched with a 2D grid:
+    # - dim 0: batch index
+    # - dim 1: block index over HxW image (2D tiling)
+    pid_b = tl.program_id(0)
+    pid_hw = tl.program_id(1)
+
+    # Calculate offsets for the core block
+    offsets_h = (pid_hw // tl.cdiv(W, BLOCK_SIZE_W)) * BLOCK_SIZE_H + tl.arange(
+        0, BLOCK_SIZE_H
+    )
+    offsets_w = (pid_hw % tl.cdiv(W, BLOCK_SIZE_W)) * BLOCK_SIZE_W + tl.arange(
+        0, BLOCK_SIZE_W
+    )
+
+    base_offset = pid_b * H * W
+    offsets_2d = offsets_h[:, None] * W + offsets_w[None, :]
+    mask_2d = (offsets_h[:, None] < H) & (offsets_w[None, :] < W)
+    mask_1d = tl.ravel(mask_2d)
+
+    # Load the current labels for the block - these are parent pointers
+    current_labels = tl.load(
+        labels_ptr + tl.ravel(base_offset + offsets_2d), mask=mask_1d, other=-1
+    )
+    current_values = tl.load(
+        input_ptr + base_offset + offsets_2d, mask=mask_2d, other=-1
+    )
+    valid_current = mask_1d & (current_labels != -1)
+
+    # Horizontal merge
+    current_labels = _merge_helper(
+        input_ptr,
+        labels_ptr,
+        base_offset,
+        offsets_h,
+        offsets_w,
+        mask_2d,
+        valid_current,
+        current_values,
+        current_labels,
+        H,
+        W,
+        -1,
+        0,
+    )
+    # Vertical merge
+    current_labels = _merge_helper(
+        input_ptr,
+        labels_ptr,
+        base_offset,
+        offsets_h,
+        offsets_w,
+        mask_2d,
+        valid_current,
+        current_values,
+        current_labels,
+        H,
+        W,
+        0,
+        -1,
+    )
+    # Diagonal merges
+    current_labels = _merge_helper(
+        input_ptr,
+        labels_ptr,
+        base_offset,
+        offsets_h,
+        offsets_w,
+        mask_2d,
+        valid_current,
+        current_values,
+        current_labels,
+        H,
+        W,
+        -1,
+        -1,
+    )
+    current_labels = _merge_helper(
+        input_ptr,
+        labels_ptr,
+        base_offset,
+        offsets_h,
+        offsets_w,
+        mask_2d,
+        valid_current,
+        current_values,
+        current_labels,
+        H,
+        W,
+        -1,
+        1,
+    )
+
+    # This actually does some path compression, in a lightweight but beneficial way
+    tl.atomic_min(
+        labels_ptr + tl.ravel(base_offset + offsets_2d), current_labels, mask=mask_1d
+    )
+
+
+# ==============================================================================
+# ## Phase 3: Pointer Jumping Kernel
+# ==============================================================================
+# This kernel performs pointer jumping to ensure that all pixels point directly to their root labels.
+# This is done in a loop until convergence.
+
+
+@triton.jit
+def _pointer_jump_kernel(
+    labels_in_ptr, labels_out_ptr, numel: tl.constexpr, BLOCK_SIZE: tl.constexpr
+):
+    """
+    Pointer jumping kernel with double buffering to avoid race conditions.
+    Reads from labels_in_ptr and writes to labels_out_ptr.
+    """
+    # This kernel is launched with a 1D grid, and does not care about batching explicitly.
+    # By construction, the labels are global indices across the batch, and we never perform
+    # cross-batch merges, so this is safe.
+
+    pid = tl.program_id(0)
+    offsets = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    mask = offsets < numel
+
+    # Load current labels from input buffer
+    current_labels = tl.load(labels_in_ptr + offsets, mask=mask, other=-1)
+    valid_mask = mask & (current_labels != -1)
+
+    # A mask to track which lanes have successfully completed their union.
+    done_mask = ~valid_mask
+    while tl_any(~(done_mask | ~valid_mask)):
+        parent_labels = tl.load(
+            labels_in_ptr + current_labels, mask=valid_mask, other=-1
+        )
+
+        are_equal = current_labels == parent_labels
+        done_mask |= are_equal & valid_mask
+
+        current_labels = tl.where(
+            ~done_mask, tl.minimum(current_labels, parent_labels), current_labels
+        )
+
+    # Write to output buffer (safe because we're not reading from it)
+    tl.store(labels_out_ptr + offsets, current_labels, mask=mask)
+
+
+# ==============================================================================
+# ## Phase 4: Kernels for Computing Component Sizes
+# ==============================================================================
+
+
+# Step 4.1: Count occurrences of each root label using atomic adds.
+@triton.jit
+def _count_labels_kernel(labels_ptr, sizes_ptr, numel, BLOCK_SIZE: tl.constexpr):
+    pid = tl.program_id(0)
+    offsets = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    mask = offsets < numel
+
+    # Load the final, converged labels
+    labels = tl.load(labels_ptr + offsets, mask=mask, other=-1)
+    valid_mask = mask & (labels != -1)
+
+    # Atomically increment the counter for each label. This builds a histogram.
+    tl.atomic_add(sizes_ptr + labels, 1, mask=valid_mask)
+
+
+# Step 4.2: Broadcast the computed sizes back to the output tensor.
+@triton.jit
+def _broadcast_sizes_kernel(
+    labels_ptr, sizes_ptr, out_ptr, numel, BLOCK_SIZE: tl.constexpr
+):
+    pid = tl.program_id(0)
+    offsets = pid * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
+    mask = offsets < numel
+
+    # Load the final labels
+    labels = tl.load(labels_ptr + offsets, mask=mask, other=-1)
+    valid_mask = mask & (labels != -1)
+
+    # Look up the size for each label from the histogram
+    component_sizes = tl.load(sizes_ptr + labels, mask=valid_mask, other=0)
+
+    # Write the size to the final output tensor. Background pixels get size 0.
+    tl.store(out_ptr + offsets, component_sizes, mask=mask)
+
+
+def connected_components_triton(input_tensor: torch.Tensor):
+    """
+    Computes connected components labeling on a batch of 2D integer tensors using Triton.
+
+    Args:
+        input_tensor (torch.Tensor): A BxHxW integer tensor or Bx1xHxW. Non-zero values are considered foreground. Bool tensor also accepted
+
+    Returns:
+        Tuple[torch.Tensor, int]: A tuple containing:
+            - A BxHxW output tensor with dense labels. Background is 0.
+            - A BxHxW tensor with the size of the connected component for each pixel.
+    """
+    assert (
+        input_tensor.is_cuda and input_tensor.is_contiguous()
+    ), "Input tensor must be a contiguous CUDA tensor."
+    out_shape = input_tensor.shape
+    if input_tensor.dim() == 4 and input_tensor.shape[1] == 1:
+        input_tensor = input_tensor.squeeze(1)
+    else:
+        assert (
+            input_tensor.dim() == 3
+        ), "Input tensor must be (B, H, W) or (B, 1, H, W)."
+
+    B, H, W = input_tensor.shape
+    numel = B * H * W
+    device = input_tensor.device
+
+    # --- Allocate Tensors ---
+    labels = torch.empty_like(input_tensor, dtype=torch.int32)
+    output = torch.empty_like(input_tensor, dtype=torch.int32)
+
+    # --- Phase 1 ---
+    BLOCK_SIZE = 256
+    grid_init = (triton.cdiv(numel, BLOCK_SIZE),)
+    _init_labels_kernel[grid_init](
+        input_tensor,
+        labels,
+        numel,
+        BLOCK_SIZE=BLOCK_SIZE,
+    )
+
+    # --- Phase 2 ---
+    grid_local_prop = lambda meta: (
+        B,
+        triton.cdiv(H, meta["BLOCK_SIZE_H"]) * triton.cdiv(W, meta["BLOCK_SIZE_W"]),
+    )
+    _local_prop_kernel[grid_local_prop](labels, input_tensor, H, W)
+
+    # --- Phase 3 ---
+    BLOCK_SIZE = 256
+    grid_jump = lambda meta: (triton.cdiv(numel, meta["BLOCK_SIZE"]),)
+    _pointer_jump_kernel[grid_jump](labels, output, numel, BLOCK_SIZE=BLOCK_SIZE)
+
+    # --- Phase 4 ---
+    # Allocate tensor to store the final output sizes
+    component_sizes_out = torch.empty_like(input_tensor, dtype=torch.int32)
+
+    # Allocate a temporary 1D tensor to act as the histogram
+    # Size is numel because labels can be up to numel-1
+    sizes_histogram = torch.zeros(numel, dtype=torch.int32, device=device)
+
+    # 4.1: Count the occurrences of each label
+    grid_count = (triton.cdiv(numel, BLOCK_SIZE),)
+    _count_labels_kernel[grid_count](
+        output, sizes_histogram, numel, BLOCK_SIZE=BLOCK_SIZE
+    )
+
+    # 2.2: Broadcast the counts to the final output tensor
+    grid_broadcast = (triton.cdiv(numel, BLOCK_SIZE),)
+    _broadcast_sizes_kernel[grid_broadcast](
+        output, sizes_histogram, component_sizes_out, numel, BLOCK_SIZE=BLOCK_SIZE
+    )
+    return output.view(out_shape) + 1, component_sizes_out.view(out_shape)
diff --git a/sam3/perflib/triton/nms.py b/sam3/perflib/triton/nms.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed800a1e9b5c131386d530df9bbe2be8ad670161
--- /dev/null
+++ b/sam3/perflib/triton/nms.py
@@ -0,0 +1,124 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+# Adapted from https://github.com/stackav-oss/conch/blob/main/conch/kernels/vision/nms.py
+
+import torch
+import triton
+import triton.language as tl
+
+
+@triton.autotune(
+    configs=[
+        triton.Config({"cxpr_block_size": 128}),
+        triton.Config({"cxpr_block_size": 256}),
+        triton.Config({"cxpr_block_size": 512}),
+        triton.Config({"cxpr_block_size": 1024}),
+        triton.Config({"cxpr_block_size": 2048}),
+        triton.Config({"cxpr_block_size": 4096}),
+        triton.Config({"cxpr_block_size": 8192}),
+    ],
+    key=["num_boxes"],
+)
+@triton.jit
+def _nms_suppression_kernel(
+    # Tensors
+    iou_mask_ptr: tl.tensor,  # [N, N]
+    keep_mask_ptr: tl.tensor,  # [N]
+    # Scalars
+    num_boxes: tl.int32,
+    # Strides
+    iou_mask_stride: tl.int32,
+    # Constexprs
+    cxpr_block_size: tl.constexpr,
+) -> None:
+    """NMS suppression kernel.
+
+    Args:
+        iou_mask_ptr: Pointer to precomputed IoU mask, shape: (N, N).
+        keep_mask_ptr: Pointer to keep mask tensor, shape: (N,).
+        num_boxes: Number of boxes.
+        iou_mask_stride: Stride for IoU mask tensor.
+        cxpr_block_size: Block size for processing.
+    """
+    # Sequential NMS: for each box in sorted order, suppress later boxes
+    for current_box_idx in range(num_boxes - 1):
+        # Check if current box is still kept
+        is_kept = tl.load(keep_mask_ptr + current_box_idx)
+        if is_kept:
+            # IoU mask row offset for the current box
+            # Because the IoU mask is sorted by score, we will only consider boxes that come after the current box.
+            # This means we only need to read the upper triangular part of the IoU mask.
+            iou_row_offset = current_box_idx * iou_mask_stride
+
+            # Only process boxes that come after the current box
+            next_box_idx = current_box_idx + 1
+            remaining_boxes = num_boxes - next_box_idx
+
+            # Iterate blockwise through the columns
+            for block_idx in range(tl.cdiv(remaining_boxes, cxpr_block_size)):
+                # Masked load of indices for the target boxes in the current block
+                block_start = next_box_idx + block_idx * cxpr_block_size
+                target_box_offsets = block_start + tl.arange(0, cxpr_block_size)
+                target_box_mask = target_box_offsets < num_boxes
+
+                # Suppress boxes with lower scores that have high IoU
+                suppression_mask = tl.load(
+                    iou_mask_ptr + iou_row_offset + target_box_offsets,
+                    mask=target_box_mask,
+                    other=False,
+                )
+                suppression_mask = tl.cast(suppression_mask, tl.int1)
+
+                # Conditionally store suppression result for high-IoU boxes
+                tl.store(
+                    keep_mask_ptr + target_box_offsets, False, mask=suppression_mask
+                )
+
+            # Potential race condition: we need to ensure all threads complete the store before the next
+            # iteration otherwise we may load stale data for whether or not a box has been suppressed.
+            tl.debug_barrier()
+
+
+def nms_triton(
+    ious: torch.Tensor,
+    scores: torch.Tensor,
+    iou_threshold: float,
+) -> torch.Tensor:
+    """Perform NMS given the iou matrix, the scores and the iou threshold
+
+    Args:
+        ious: Pairwise IoU tensor of shape (N, N).
+        scores: Scores tensor of shape (N,).
+        iou_threshold: IoU threshold for suppression.
+
+    Returns:
+        Tensor: Indices of kept boxes, sorted by decreasing score.
+    """
+    assert scores.dim() == 1, "Scores must be 1D"
+    iou_mask = ious > iou_threshold
+    assert iou_mask.dim() == 2
+    assert iou_mask.shape[0] == iou_mask.shape[1] == scores.shape[0]
+    assert iou_mask.device == scores.device
+    assert iou_mask.dtype == torch.bool
+
+    num_boxes = scores.size(0)
+    keep_mask = torch.ones(len(scores), device=scores.device, dtype=torch.bool)
+
+    # Sort boxes by scores in descending order
+    _, sorted_indices = torch.sort(scores, dim=0, stable=True, descending=True)
+    iou_mask = iou_mask[sorted_indices][:, sorted_indices].contiguous()
+
+    # For the suppression stage, we need to process sequentially, but we'll still take
+    # advantage of parallelism by processing in blocks in one program.
+    stage2_grid = (1,)
+    _nms_suppression_kernel[stage2_grid](
+        # Tensors
+        iou_mask_ptr=iou_mask,
+        keep_mask_ptr=keep_mask,
+        # Scalars
+        num_boxes=num_boxes,
+        # Strides
+        iou_mask_stride=iou_mask.stride(0),
+    )
+    # Extract indices of kept boxes
+    return sorted_indices[keep_mask]
diff --git a/sam3/sam/__init__.py b/sam3/sam/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b35da6d52b0366b4ff26fa508c34f5d281e0dfa
--- /dev/null
+++ b/sam3/sam/__init__.py
@@ -0,0 +1,4 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+from .mask_decoder import MaskDecoder
+from .prompt_encoder import PromptEncoder
+from .transformer import TwoWayTransformer
diff --git a/sam3/sam/common.py b/sam3/sam/common.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6d1587d3394377a8e4b3fd80475d49775da7ccb
--- /dev/null
+++ b/sam3/sam/common.py
@@ -0,0 +1,39 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from typing import Type
+
+import torch
+import torch.nn as nn
+
+
+class MLPBlock(nn.Module):
+    def __init__(
+        self,
+        embedding_dim: int,
+        mlp_dim: int,
+        act: Type[nn.Module] = nn.GELU,
+    ) -> None:
+        super().__init__()
+        self.lin1 = nn.Linear(embedding_dim, mlp_dim)
+        self.lin2 = nn.Linear(mlp_dim, embedding_dim)
+        self.act = act()
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.lin2(self.act(self.lin1(x)))
+
+
+# From https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py # noqa
+# Itself from https://github.com/facebookresearch/ConvNeXt/blob/d1fa8f6fef0a165b27399986cc2bdacc92777e40/models/convnext.py#L119  # noqa
+class LayerNorm2d(nn.Module):
+    def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
+        super().__init__()
+        self.weight = nn.Parameter(torch.ones(num_channels))
+        self.bias = nn.Parameter(torch.zeros(num_channels))
+        self.eps = eps
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        u = x.mean(1, keepdim=True)
+        s = (x - u).pow(2).mean(1, keepdim=True)
+        x = (x - u) / torch.sqrt(s + self.eps)
+        x = self.weight[:, None, None] * x + self.bias[:, None, None]
+        return x
diff --git a/sam3/sam/mask_decoder.py b/sam3/sam/mask_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4ac397bf5f433d58bcafd79f00a4e285f89f140
--- /dev/null
+++ b/sam3/sam/mask_decoder.py
@@ -0,0 +1,319 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from typing import List, Optional, Tuple, Type
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from .common import LayerNorm2d
+
+
+class MaskDecoder(nn.Module):
+    def __init__(
+        self,
+        *,
+        transformer_dim: int,
+        transformer: nn.Module,
+        num_multimask_outputs: int = 3,
+        activation: Type[nn.Module] = nn.GELU,
+        iou_head_depth: int = 3,
+        iou_head_hidden_dim: int = 256,
+        use_high_res_features: bool = False,
+        iou_prediction_use_sigmoid=False,
+        dynamic_multimask_via_stability=False,
+        dynamic_multimask_stability_delta=0.05,
+        dynamic_multimask_stability_thresh=0.98,
+        pred_obj_scores: bool = False,
+        pred_obj_scores_mlp: bool = False,
+        use_multimask_token_for_obj_ptr: bool = False,
+    ) -> None:
+        """
+        Predicts masks given an image and prompt embeddings, using a
+        transformer architecture.
+
+        Arguments:
+          transformer_dim (int): the channel dimension of the transformer
+          transformer (nn.Module): the transformer used to predict masks
+          num_multimask_outputs (int): the number of masks to predict
+            when disambiguating masks
+          activation (nn.Module): the type of activation to use when
+            upscaling masks
+          iou_head_depth (int): the depth of the MLP used to predict
+            mask quality
+          iou_head_hidden_dim (int): the hidden dimension of the MLP
+            used to predict mask quality
+        """
+        super().__init__()
+        self.transformer_dim = transformer_dim
+        self.transformer = transformer
+
+        self.num_multimask_outputs = num_multimask_outputs
+
+        self.iou_token = nn.Embedding(1, transformer_dim)
+        self.num_mask_tokens = num_multimask_outputs + 1
+        self.mask_tokens = nn.Embedding(self.num_mask_tokens, transformer_dim)
+
+        self.pred_obj_scores = pred_obj_scores
+        if self.pred_obj_scores:
+            self.obj_score_token = nn.Embedding(1, transformer_dim)
+        self.use_multimask_token_for_obj_ptr = use_multimask_token_for_obj_ptr
+
+        self.output_upscaling = nn.Sequential(
+            nn.ConvTranspose2d(
+                transformer_dim, transformer_dim // 4, kernel_size=2, stride=2
+            ),
+            LayerNorm2d(transformer_dim // 4),
+            activation(),
+            nn.ConvTranspose2d(
+                transformer_dim // 4, transformer_dim // 8, kernel_size=2, stride=2
+            ),
+            activation(),
+        )
+        self.use_high_res_features = use_high_res_features
+        if use_high_res_features:
+            self.conv_s0 = nn.Conv2d(
+                transformer_dim, transformer_dim // 8, kernel_size=1, stride=1
+            )
+            self.conv_s1 = nn.Conv2d(
+                transformer_dim, transformer_dim // 4, kernel_size=1, stride=1
+            )
+
+        self.output_hypernetworks_mlps = nn.ModuleList(
+            [
+                MLP(transformer_dim, transformer_dim, transformer_dim // 8, 3)
+                for i in range(self.num_mask_tokens)
+            ]
+        )
+
+        self.iou_prediction_head = MLP(
+            transformer_dim,
+            iou_head_hidden_dim,
+            self.num_mask_tokens,
+            iou_head_depth,
+            sigmoid_output=iou_prediction_use_sigmoid,
+        )
+        if self.pred_obj_scores:
+            self.pred_obj_score_head = nn.Linear(transformer_dim, 1)
+            if pred_obj_scores_mlp:
+                self.pred_obj_score_head = MLP(transformer_dim, transformer_dim, 1, 3)
+
+        # When outputting a single mask, optionally we can dynamically fall back to the best
+        # multimask output token if the single mask output token gives low stability scores.
+        self.dynamic_multimask_via_stability = dynamic_multimask_via_stability
+        self.dynamic_multimask_stability_delta = dynamic_multimask_stability_delta
+        self.dynamic_multimask_stability_thresh = dynamic_multimask_stability_thresh
+
+    def forward(
+        self,
+        image_embeddings: torch.Tensor,
+        image_pe: torch.Tensor,
+        sparse_prompt_embeddings: torch.Tensor,
+        dense_prompt_embeddings: torch.Tensor,
+        multimask_output: bool,
+        repeat_image: bool,
+        high_res_features: Optional[List[torch.Tensor]] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Predict masks given image and prompt embeddings.
+
+        Arguments:
+          image_embeddings (torch.Tensor): the embeddings from the image encoder
+          image_pe (torch.Tensor): positional encoding with the shape of image_embeddings
+          sparse_prompt_embeddings (torch.Tensor): the embeddings of the points and boxes
+          dense_prompt_embeddings (torch.Tensor): the embeddings of the mask inputs
+          multimask_output (bool): Whether to return multiple masks or a single
+            mask.
+
+        Returns:
+          torch.Tensor: batched predicted masks
+          torch.Tensor: batched predictions of mask quality
+          torch.Tensor: batched SAM token for mask output
+        """
+        masks, iou_pred, mask_tokens_out, object_score_logits = self.predict_masks(
+            image_embeddings=image_embeddings,
+            image_pe=image_pe,
+            sparse_prompt_embeddings=sparse_prompt_embeddings,
+            dense_prompt_embeddings=dense_prompt_embeddings,
+            repeat_image=repeat_image,
+            high_res_features=high_res_features,
+        )
+
+        # Select the correct mask or masks for output
+        if multimask_output:
+            masks = masks[:, 1:, :, :]
+            iou_pred = iou_pred[:, 1:]
+        elif self.dynamic_multimask_via_stability and not self.training:
+            masks, iou_pred = self._dynamic_multimask_via_stability(masks, iou_pred)
+        else:
+            masks = masks[:, 0:1, :, :]
+            iou_pred = iou_pred[:, 0:1]
+
+        if multimask_output and self.use_multimask_token_for_obj_ptr:
+            sam_tokens_out = mask_tokens_out[:, 1:]  # [b, 3, c] shape
+        else:
+            # Take the mask output token. Here we *always* use the token for single mask output.
+            # At test time, even if we track after 1-click (and using multimask_output=True),
+            # we still take the single mask token here. The rationale is that we always track
+            # after multiple clicks during training, so the past tokens seen during training
+            # are always the single mask token (and we'll let it be the object-memory token).
+            sam_tokens_out = mask_tokens_out[:, 0:1]  # [b, 1, c] shape
+
+        # Prepare output
+        return masks, iou_pred, sam_tokens_out, object_score_logits
+
+    def predict_masks(
+        self,
+        image_embeddings: torch.Tensor,
+        image_pe: torch.Tensor,
+        sparse_prompt_embeddings: torch.Tensor,
+        dense_prompt_embeddings: torch.Tensor,
+        repeat_image: bool,
+        high_res_features: Optional[List[torch.Tensor]] = None,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Predicts masks. See 'forward' for more details."""
+        # Concatenate output tokens
+        s = 0
+        if self.pred_obj_scores:
+            output_tokens = torch.cat(
+                [
+                    self.obj_score_token.weight,
+                    self.iou_token.weight,
+                    self.mask_tokens.weight,
+                ],
+                dim=0,
+            )
+            s = 1
+        else:
+            output_tokens = torch.cat(
+                [self.iou_token.weight, self.mask_tokens.weight], dim=0
+            )
+        output_tokens = output_tokens.unsqueeze(0).expand(
+            sparse_prompt_embeddings.size(0), -1, -1
+        )
+        tokens = torch.cat((output_tokens, sparse_prompt_embeddings), dim=1)
+
+        # Expand per-image data in batch direction to be per-mask
+        if repeat_image:
+            src = torch.repeat_interleave(image_embeddings, tokens.shape[0], dim=0)
+        else:
+            assert image_embeddings.shape[0] == tokens.shape[0]
+            src = image_embeddings
+        src = src + dense_prompt_embeddings
+        assert (
+            image_pe.size(0) == 1
+        ), "image_pe should have size 1 in batch dim (from `get_dense_pe()`)"
+        pos_src = torch.repeat_interleave(image_pe, tokens.shape[0], dim=0)
+        b, c, h, w = src.shape
+
+        # Run the transformer
+        hs, src = self.transformer(src, pos_src, tokens)
+        iou_token_out = hs[:, s, :]
+        mask_tokens_out = hs[:, s + 1 : (s + 1 + self.num_mask_tokens), :]
+
+        # Upscale mask embeddings and predict masks using the mask tokens
+        src = src.transpose(1, 2).view(b, c, h, w)
+        if not self.use_high_res_features:
+            upscaled_embedding = self.output_upscaling(src)
+        else:
+            dc1, ln1, act1, dc2, act2 = self.output_upscaling
+            feat_s0, feat_s1 = high_res_features
+            upscaled_embedding = act1(ln1(dc1(src) + feat_s1))
+            upscaled_embedding = act2(dc2(upscaled_embedding) + feat_s0)
+
+        hyper_in_list: List[torch.Tensor] = []
+        for i in range(self.num_mask_tokens):
+            hyper_in_list.append(
+                self.output_hypernetworks_mlps[i](mask_tokens_out[:, i, :])
+            )
+        hyper_in = torch.stack(hyper_in_list, dim=1)
+        b, c, h, w = upscaled_embedding.shape
+        masks = (hyper_in @ upscaled_embedding.view(b, c, h * w)).view(b, -1, h, w)
+
+        # Generate mask quality predictions
+        iou_pred = self.iou_prediction_head(iou_token_out)
+        if self.pred_obj_scores:
+            assert s == 1
+            object_score_logits = self.pred_obj_score_head(hs[:, 0, :])
+        else:
+            # Obj scores logits - default to 10.0, i.e. assuming the object is present, sigmoid(10)=1
+            object_score_logits = 10.0 * iou_pred.new_ones(iou_pred.shape[0], 1)
+
+        return masks, iou_pred, mask_tokens_out, object_score_logits
+
+    def _get_stability_scores(self, mask_logits):
+        """
+        Compute stability scores of the mask logits based on the IoU between upper and
+        lower thresholds.
+        """
+        mask_logits = mask_logits.flatten(-2)
+        stability_delta = self.dynamic_multimask_stability_delta
+        area_i = torch.sum(mask_logits > stability_delta, dim=-1).float()
+        area_u = torch.sum(mask_logits > -stability_delta, dim=-1).float()
+        stability_scores = torch.where(area_u > 0, area_i / area_u, 1.0)
+        return stability_scores
+
+    def _dynamic_multimask_via_stability(self, all_mask_logits, all_iou_scores):
+        """
+        When outputting a single mask, if the stability score from the current single-mask
+        output (based on output token 0) falls below a threshold, we instead select from
+        multi-mask outputs (based on output token 1~3) the mask with the highest predicted
+        IoU score. This is intended to ensure a valid mask for both clicking and tracking.
+        """
+        # The best mask from multimask output tokens (1~3)
+        multimask_logits = all_mask_logits[:, 1:, :, :]
+        multimask_iou_scores = all_iou_scores[:, 1:]
+        best_scores_inds = torch.argmax(multimask_iou_scores, dim=-1)
+        batch_inds = torch.arange(
+            multimask_iou_scores.size(0), device=all_iou_scores.device
+        )
+        best_multimask_logits = multimask_logits[batch_inds, best_scores_inds]
+        best_multimask_logits = best_multimask_logits.unsqueeze(1)
+        best_multimask_iou_scores = multimask_iou_scores[batch_inds, best_scores_inds]
+        best_multimask_iou_scores = best_multimask_iou_scores.unsqueeze(1)
+
+        # The mask from singlemask output token 0 and its stability score
+        singlemask_logits = all_mask_logits[:, 0:1, :, :]
+        singlemask_iou_scores = all_iou_scores[:, 0:1]
+        stability_scores = self._get_stability_scores(singlemask_logits)
+        is_stable = stability_scores >= self.dynamic_multimask_stability_thresh
+
+        # Dynamically fall back to best multimask output upon low stability scores.
+        mask_logits_out = torch.where(
+            is_stable[..., None, None].expand_as(singlemask_logits),
+            singlemask_logits,
+            best_multimask_logits,
+        )
+        iou_scores_out = torch.where(
+            is_stable.expand_as(singlemask_iou_scores),
+            singlemask_iou_scores,
+            best_multimask_iou_scores,
+        )
+        return mask_logits_out, iou_scores_out
+
+
+# Lightly adapted from
+# https://github.com/facebookresearch/MaskFormer/blob/main/mask_former/modeling/transformer/transformer_predictor.py # noqa
+class MLP(nn.Module):
+    def __init__(
+        self,
+        input_dim: int,
+        hidden_dim: int,
+        output_dim: int,
+        num_layers: int,
+        sigmoid_output: bool = False,
+    ) -> None:
+        super().__init__()
+        self.num_layers = num_layers
+        h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(
+            nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
+        )
+        self.sigmoid_output = sigmoid_output
+
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
+        if self.sigmoid_output:
+            x = F.sigmoid(x)
+        return x
diff --git a/sam3/sam/prompt_encoder.py b/sam3/sam/prompt_encoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..145ea9fff05d2d0ec55007bd8c08662c725d362a
--- /dev/null
+++ b/sam3/sam/prompt_encoder.py
@@ -0,0 +1,243 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from typing import Any, Optional, Tuple, Type
+
+import numpy as np
+import torch
+from torch import nn
+
+from .common import LayerNorm2d
+
+
+class PromptEncoder(nn.Module):
+    def __init__(
+        self,
+        embed_dim: int,
+        image_embedding_size: Tuple[int, int],
+        input_image_size: Tuple[int, int],
+        mask_in_chans: int,
+        activation: Type[nn.Module] = nn.GELU,
+    ) -> None:
+        """
+        Encodes prompts for input to SAM's mask decoder.
+
+        Arguments:
+          embed_dim (int): The prompts' embedding dimension
+          image_embedding_size (tuple(int, int)): The spatial size of the
+            image embedding, as (H, W).
+          input_image_size (int): The padded size of the image as input
+            to the image encoder, as (H, W).
+          mask_in_chans (int): The number of hidden channels used for
+            encoding input masks.
+          activation (nn.Module): The activation to use when encoding
+            input masks.
+        """
+        super().__init__()
+        self.embed_dim = embed_dim
+        self.input_image_size = input_image_size
+        self.image_embedding_size = image_embedding_size
+        self.pe_layer = PositionEmbeddingRandom(embed_dim // 2)
+
+        self.num_point_embeddings: int = 4  # pos/neg point + 2 box corners
+        point_embeddings = [
+            nn.Embedding(1, embed_dim) for i in range(self.num_point_embeddings)
+        ]
+        self.point_embeddings = nn.ModuleList(point_embeddings)
+        self.not_a_point_embed = nn.Embedding(1, embed_dim)
+
+        self.mask_input_size = (
+            4 * image_embedding_size[0],
+            4 * image_embedding_size[1],
+        )
+        self.mask_downscaling = nn.Sequential(
+            nn.Conv2d(1, mask_in_chans // 4, kernel_size=2, stride=2),
+            LayerNorm2d(mask_in_chans // 4),
+            activation(),
+            nn.Conv2d(mask_in_chans // 4, mask_in_chans, kernel_size=2, stride=2),
+            LayerNorm2d(mask_in_chans),
+            activation(),
+            nn.Conv2d(mask_in_chans, embed_dim, kernel_size=1),
+        )
+        self.no_mask_embed = nn.Embedding(1, embed_dim)
+
+    def get_dense_pe(self) -> torch.Tensor:
+        """
+        Returns the positional encoding used to encode point prompts,
+        applied to a dense set of points the shape of the image encoding.
+
+        Returns:
+          torch.Tensor: Positional encoding with shape
+            1x(embed_dim)x(embedding_h)x(embedding_w)
+        """
+        return self.pe_layer(self.image_embedding_size).unsqueeze(0)
+
+    def _embed_points(
+        self,
+        points: torch.Tensor,
+        labels: torch.Tensor,
+        pad: bool,
+    ) -> torch.Tensor:
+        """Embeds point prompts."""
+        points = points + 0.5  # Shift to center of pixel
+        if pad:
+            padding_point = torch.zeros((points.shape[0], 1, 2), device=points.device)
+            padding_label = -torch.ones((labels.shape[0], 1), device=labels.device)
+            points = torch.cat([points, padding_point], dim=1)
+            labels = torch.cat([labels, padding_label], dim=1)
+        point_embedding = self.pe_layer.forward_with_coords(
+            points, self.input_image_size
+        )
+
+        point_embedding = torch.where(
+            (labels == -1).unsqueeze(-1),
+            torch.zeros_like(point_embedding) + self.not_a_point_embed.weight,
+            point_embedding,
+        )
+        point_embedding = torch.where(
+            (labels == 0).unsqueeze(-1),
+            point_embedding + self.point_embeddings[0].weight,
+            point_embedding,
+        )
+        point_embedding = torch.where(
+            (labels == 1).unsqueeze(-1),
+            point_embedding + self.point_embeddings[1].weight,
+            point_embedding,
+        )
+        point_embedding = torch.where(
+            (labels == 2).unsqueeze(-1),
+            point_embedding + self.point_embeddings[2].weight,
+            point_embedding,
+        )
+        point_embedding = torch.where(
+            (labels == 3).unsqueeze(-1),
+            point_embedding + self.point_embeddings[3].weight,
+            point_embedding,
+        )
+        return point_embedding
+
+    def _embed_boxes(self, boxes: torch.Tensor) -> torch.Tensor:
+        """Embeds box prompts."""
+        boxes = boxes + 0.5  # Shift to center of pixel
+        coords = boxes.reshape(-1, 2, 2)
+        corner_embedding = self.pe_layer.forward_with_coords(
+            coords, self.input_image_size
+        )
+        corner_embedding[:, 0, :] += self.point_embeddings[2].weight
+        corner_embedding[:, 1, :] += self.point_embeddings[3].weight
+        return corner_embedding
+
+    def _embed_masks(self, masks: torch.Tensor) -> torch.Tensor:
+        """Embeds mask inputs."""
+        mask_embedding = self.mask_downscaling(masks)
+        return mask_embedding
+
+    def _get_batch_size(
+        self,
+        points: Optional[Tuple[torch.Tensor, torch.Tensor]],
+        boxes: Optional[torch.Tensor],
+        masks: Optional[torch.Tensor],
+    ) -> int:
+        """
+        Gets the batch size of the output given the batch size of the input prompts.
+        """
+        if points is not None:
+            return points[0].shape[0]
+        elif boxes is not None:
+            return boxes.shape[0]
+        elif masks is not None:
+            return masks.shape[0]
+        else:
+            return 1
+
+    def _get_device(self) -> torch.device:
+        return self.point_embeddings[0].weight.device
+
+    def forward(
+        self,
+        points: Optional[Tuple[torch.Tensor, torch.Tensor]],
+        boxes: Optional[torch.Tensor],
+        masks: Optional[torch.Tensor],
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        """
+        Embeds different types of prompts, returning both sparse and dense
+        embeddings.
+
+        Arguments:
+          points (tuple(torch.Tensor, torch.Tensor) or none): point coordinates
+            and labels to embed.
+          boxes (torch.Tensor or none): boxes to embed
+          masks (torch.Tensor or none): masks to embed
+
+        Returns:
+          torch.Tensor: sparse embeddings for the points and boxes, with shape
+            BxNx(embed_dim), where N is determined by the number of input points
+            and boxes.
+          torch.Tensor: dense embeddings for the masks, in the shape
+            Bx(embed_dim)x(embed_H)x(embed_W)
+        """
+        bs = self._get_batch_size(points, boxes, masks)
+        sparse_embeddings = torch.empty(
+            (bs, 0, self.embed_dim), device=self._get_device()
+        )
+        if points is not None:
+            coords, labels = points
+            point_embeddings = self._embed_points(coords, labels, pad=(boxes is None))
+            sparse_embeddings = torch.cat([sparse_embeddings, point_embeddings], dim=1)
+        if boxes is not None:
+            box_embeddings = self._embed_boxes(boxes)
+            sparse_embeddings = torch.cat([sparse_embeddings, box_embeddings], dim=1)
+
+        if masks is not None:
+            dense_embeddings = self._embed_masks(masks)
+        else:
+            dense_embeddings = self.no_mask_embed.weight.reshape(1, -1, 1, 1).expand(
+                bs, -1, self.image_embedding_size[0], self.image_embedding_size[1]
+            )
+
+        return sparse_embeddings, dense_embeddings
+
+
+class PositionEmbeddingRandom(nn.Module):
+    """
+    Positional encoding using random spatial frequencies.
+    """
+
+    def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None:
+        super().__init__()
+        if scale is None or scale <= 0.0:
+            scale = 1.0
+        self.register_buffer(
+            "positional_encoding_gaussian_matrix",
+            scale * torch.randn((2, num_pos_feats)),
+        )
+
+    def _pe_encoding(self, coords: torch.Tensor) -> torch.Tensor:
+        """Positionally encode points that are normalized to [0,1]."""
+        # assuming coords are in [0, 1]^2 square and have d_1 x ... x d_n x 2 shape
+        coords = 2 * coords - 1
+        coords = coords @ self.positional_encoding_gaussian_matrix
+        coords = 2 * np.pi * coords
+        # outputs d_1 x ... x d_n x C shape
+        return torch.cat([torch.sin(coords), torch.cos(coords)], dim=-1)
+
+    def forward(self, size: Tuple[int, int]) -> torch.Tensor:
+        """Generate positional encoding for a grid of the specified size."""
+        h, w = size
+        device: Any = self.positional_encoding_gaussian_matrix.device
+        grid = torch.ones((h, w), device=device, dtype=torch.float32)
+        y_embed = grid.cumsum(dim=0) - 0.5
+        x_embed = grid.cumsum(dim=1) - 0.5
+        y_embed = y_embed / h
+        x_embed = x_embed / w
+
+        pe = self._pe_encoding(torch.stack([x_embed, y_embed], dim=-1))
+        return pe.permute(2, 0, 1)  # C x H x W
+
+    def forward_with_coords(
+        self, coords_input: torch.Tensor, image_size: Tuple[int, int]
+    ) -> torch.Tensor:
+        """Positionally encode points that are not normalized to [0,1]."""
+        coords = coords_input.clone()
+        coords[:, :, 0] = coords[:, :, 0] / image_size[1]
+        coords[:, :, 1] = coords[:, :, 1] / image_size[0]
+        return self._pe_encoding(coords.to(torch.float))  # B x N x C
diff --git a/sam3/sam/rope.py b/sam3/sam/rope.py
new file mode 100644
index 0000000000000000000000000000000000000000..2db01b66765fca3cc074e883076bedf2a345d1f5
--- /dev/null
+++ b/sam3/sam/rope.py
@@ -0,0 +1,161 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+Adapted from:
+1. https://github.com/meta-llama/codellama/blob/main/llama/model.py
+2. https://github.com/naver-ai/rope-vit
+3. https://github.com/lucidrains/rotary-embedding-torch
+"""
+
+from typing import Optional
+
+import torch
+from einops import rearrange, repeat
+from torch import broadcast_tensors, nn
+
+
+def init_t_xy(end_x: int, end_y: int, scale: float = 1.0, offset: int = 0, device=None):
+    t = torch.arange(end_x * end_y, dtype=torch.float32, device=device)
+    t_x = (t % end_x).float()
+    t_y = torch.div(t, end_x, rounding_mode="floor").float()
+    return t_x * scale + offset, t_y * scale + offset
+
+
+def compute_axial_cis(
+    dim: int,
+    end_x: int,
+    end_y: int,
+    theta: float = 10000.0,
+    scale_pos: float = 1.0,
+    offset: int = 0,
+    device=None,
+):
+    freqs_x = 1.0 / (
+        theta ** (torch.arange(0, dim, 4, device=device)[: (dim // 4)].float() / dim)
+    )
+    freqs_y = 1.0 / (
+        theta ** (torch.arange(0, dim, 4, device=device)[: (dim // 4)].float() / dim)
+    )
+
+    t_x, t_y = init_t_xy(end_x, end_y, scale_pos, offset, device=device)
+    freqs_x = torch.outer(t_x, freqs_x)
+    freqs_y = torch.outer(t_y, freqs_y)
+    freqs_cis_x = torch.polar(torch.ones_like(freqs_x), freqs_x)
+    freqs_cis_y = torch.polar(torch.ones_like(freqs_y), freqs_y)
+    return torch.cat([freqs_cis_x, freqs_cis_y], dim=-1)
+
+
+def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor):
+    ndim = x.ndim
+    assert 0 <= 1 < ndim
+    assert freqs_cis.shape == (x.shape[-2], x.shape[-1])
+    shape = [d if i >= ndim - 2 else 1 for i, d in enumerate(x.shape)]
+    return freqs_cis.view(*shape)
+
+
+def apply_rotary_enc(
+    xq: torch.Tensor,
+    xk: torch.Tensor,
+    freqs_cis: torch.Tensor,
+    repeat_freqs_k: bool = False,
+):
+    xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2))
+    xk_ = (
+        torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2))
+        if xk.shape[-2] != 0
+        else None
+    )
+    freqs_cis = reshape_for_broadcast(freqs_cis, xq_)
+    xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3)
+    if xk_ is None:
+        # no keys to rotate, due to dropout
+        return xq_out.type_as(xq).to(xq.device), xk
+    # repeat freqs along seq_len dim to match k seq_len
+    if repeat_freqs_k:
+        r = xk_.shape[-2] // xq_.shape[-2]
+        freqs_cis = freqs_cis.repeat(*([1] * (freqs_cis.ndim - 2)), r, 1)
+    xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3)
+    return xq_out.type_as(xq).to(xq.device), xk_out.type_as(xk).to(xk.device)
+
+
+def complex_mult(xq_real, xq_imag, freqs_cis_real, freqs_cis_imag):
+    # Compute the real part of the product
+    real_part = xq_real * freqs_cis_real - xq_imag * freqs_cis_imag
+    # Compute the imaginary part of the product
+    imag_part = xq_real * freqs_cis_imag + xq_imag * freqs_cis_real
+    # Stack the real and imaginary parts along the last dimension
+    return torch.stack([real_part, imag_part], dim=-1)
+
+
+def apply_rotary_enc_real(
+    xq: torch.Tensor,
+    xk: torch.Tensor,
+    freqs_cis_real: torch.Tensor,
+    freqs_cis_imag: torch.Tensor,
+    repeat_freqs_k: bool = False,
+):
+    assert xk is not None
+    assert xk.shape[-2] != 0
+
+    xq_real = xq.float().reshape(*xq.shape[:-1], -1, 2)[..., 0]
+    xq_imag = xq.float().reshape(*xq.shape[:-1], -1, 2)[..., 1]
+    xk_real = xk.float().reshape(*xk.shape[:-1], -1, 2)[..., 0]
+    xk_imag = xk.float().reshape(*xk.shape[:-1], -1, 2)[..., 1]
+    freqs_cis_real = reshape_for_broadcast(freqs_cis_real, xq_real)
+    freqs_cis_imag = reshape_for_broadcast(freqs_cis_imag, xq_imag)
+    xq_out = complex_mult(xq_real, xq_imag, freqs_cis_real, freqs_cis_imag).flatten(3)
+    if repeat_freqs_k:
+        r = xk_real.shape[-2] // xq_real.shape[-2]
+        freqs_cis_real = freqs_cis_real.repeat(*([1] * (freqs_cis_real.ndim - 2)), r, 1)
+        freqs_cis_imag = freqs_cis_imag.repeat(*([1] * (freqs_cis_imag.ndim - 2)), r, 1)
+    xk_out = complex_mult(xk_real, xk_imag, freqs_cis_real, freqs_cis_imag).flatten(3)
+    # xq_out = torch.view_as_real(torch.complex(xq_real, xq_imag) * torch.complex(freqs_cis_real, freqs_cis_imag)).flatten(3)
+    # xk_out = torch.view_as_real(torch.compelx(xk_real, xk_imag) * torch.complex(freqs_cis_real, freqs_cis_imag)).flatten(3)
+    return xq_out.type_as(xq).to(xq.device), xk_out.type_as(xk).to(xk.device)
+
+
+# rotary embedding helper functions
+def broadcat(tensors, dim=-1):
+    broadcasted_tensors = broadcast_tensors(*tensors)
+    return torch.cat(broadcasted_tensors, dim=dim)
+
+
+def rotate_half(x: torch.Tensor):
+    x = rearrange(x, "... (d r) -> ... d r", r=2)
+    x1, x2 = x.unbind(dim=-1)
+    x = torch.stack((-x2, x1), dim=-1)
+    return rearrange(x, "... d r -> ... (d r)")
+
+
+class VisionRotaryEmbeddingVE(nn.Module):
+    def __init__(
+        self,
+        dim: int,
+        seq_len: int,
+        pt_seq_len: Optional[int] = None,
+        theta: float = 10000.0,
+        offset: int = 1,  # specific to VE
+    ):
+        super().__init__()
+
+        freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim))
+        scale = 1.0
+        if pt_seq_len is not None:
+            scale = pt_seq_len / seq_len
+
+        # offset of +1 following VE - even though for the
+        # attention op only differences matter
+        t = torch.arange(seq_len) * scale + offset
+
+        freqs = torch.einsum("..., f -> ... f", t, freqs)
+        freqs = repeat(freqs, "... n -> ... (n r)", r=2)
+
+        freqs = broadcat((freqs[None, :, :], freqs[:, None, :]), dim=-1)
+        freqs_cos = freqs.cos().view(-1, freqs.shape[-1])
+        freqs_sin = freqs.sin().view(-1, freqs.shape[-1])
+
+        self.register_buffer("freqs_cos", freqs_cos)
+        self.register_buffer("freqs_sin", freqs_sin)
+
+    def forward(self, t: torch.Tensor):
+        return t * self.freqs_cos + rotate_half(t) * self.freqs_sin
diff --git a/sam3/sam/transformer.py b/sam3/sam/transformer.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e96c28331bfa6726f3bec6dc216137c9a92a30c
--- /dev/null
+++ b/sam3/sam/transformer.py
@@ -0,0 +1,358 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import math
+from functools import partial
+from typing import Tuple, Type
+
+import torch
+import torch.nn.functional as F
+
+from sam3.sam.rope import apply_rotary_enc, apply_rotary_enc_real, compute_axial_cis
+from torch import nn, Tensor
+
+from .common import MLPBlock
+
+
+class TwoWayTransformer(nn.Module):
+    def __init__(
+        self,
+        depth: int,
+        embedding_dim: int,
+        num_heads: int,
+        mlp_dim: int,
+        activation: Type[nn.Module] = nn.ReLU,
+        attention_downsample_rate: int = 2,
+    ) -> None:
+        """
+        A transformer decoder that attends to an input image using
+        queries whose positional embedding is supplied.
+
+        Args:
+          depth (int): number of layers in the transformer
+          embedding_dim (int): the channel dimension for the input embeddings
+          num_heads (int): the number of heads for multihead attention. Must
+            divide embedding_dim
+          mlp_dim (int): the channel dimension internal to the MLP block
+          activation (nn.Module): the activation to use in the MLP block
+        """
+        super().__init__()
+        self.depth = depth
+        self.embedding_dim = embedding_dim
+        self.num_heads = num_heads
+        self.mlp_dim = mlp_dim
+        self.layers = nn.ModuleList()
+
+        for i in range(depth):
+            self.layers.append(
+                TwoWayAttentionBlock(
+                    embedding_dim=embedding_dim,
+                    num_heads=num_heads,
+                    mlp_dim=mlp_dim,
+                    activation=activation,
+                    attention_downsample_rate=attention_downsample_rate,
+                    skip_first_layer_pe=(i == 0),
+                )
+            )
+
+        self.final_attn_token_to_image = Attention(
+            embedding_dim, num_heads, downsample_rate=attention_downsample_rate
+        )
+        self.norm_final_attn = nn.LayerNorm(embedding_dim)
+
+    def forward(
+        self,
+        image_embedding: Tensor,
+        image_pe: Tensor,
+        point_embedding: Tensor,
+    ) -> Tuple[Tensor, Tensor]:
+        """
+        Args:
+          image_embedding (torch.Tensor): image to attend to. Should be shape
+            B x embedding_dim x h x w for any h and w.
+          image_pe (torch.Tensor): the positional encoding to add to the image. Must
+            have the same shape as image_embedding.
+          point_embedding (torch.Tensor): the embedding to add to the query points.
+            Must have shape B x N_points x embedding_dim for any N_points.
+
+        Returns:
+          torch.Tensor: the processed point_embedding
+          torch.Tensor: the processed image_embedding
+        """
+        # BxCxHxW -> BxHWxC == B x N_image_tokens x C
+        bs, c, h, w = image_embedding.shape
+        image_embedding = image_embedding.flatten(2).permute(0, 2, 1)
+        image_pe = image_pe.flatten(2).permute(0, 2, 1)
+
+        # Prepare queries
+        queries = point_embedding
+        keys = image_embedding
+
+        # Apply transformer blocks and final layernorm
+        for layer in self.layers:
+            queries, keys = layer(
+                queries=queries,
+                keys=keys,
+                query_pe=point_embedding,
+                key_pe=image_pe,
+            )
+
+        # Apply the final attention layer from the points to the image
+        q = queries + point_embedding
+        k = keys + image_pe
+        attn_out = self.final_attn_token_to_image(q=q, k=k, v=keys)
+        queries = queries + attn_out
+        queries = self.norm_final_attn(queries)
+
+        return queries, keys
+
+
+class TwoWayAttentionBlock(nn.Module):
+    def __init__(
+        self,
+        embedding_dim: int,
+        num_heads: int,
+        mlp_dim: int = 2048,
+        activation: Type[nn.Module] = nn.ReLU,
+        attention_downsample_rate: int = 2,
+        skip_first_layer_pe: bool = False,
+    ) -> None:
+        """
+        A transformer block with four layers: (1) self-attention of sparse
+        inputs, (2) cross attention of sparse inputs to dense inputs, (3) mlp
+        block on sparse inputs, and (4) cross attention of dense inputs to sparse
+        inputs.
+
+        Arguments:
+          embedding_dim (int): the channel dimension of the embeddings
+          num_heads (int): the number of heads in the attention layers
+          mlp_dim (int): the hidden dimension of the mlp block
+          activation (nn.Module): the activation of the mlp block
+          skip_first_layer_pe (bool): skip the PE on the first layer
+        """
+        super().__init__()
+        self.self_attn = Attention(embedding_dim, num_heads)
+        self.norm1 = nn.LayerNorm(embedding_dim)
+
+        self.cross_attn_token_to_image = Attention(
+            embedding_dim, num_heads, downsample_rate=attention_downsample_rate
+        )
+        self.norm2 = nn.LayerNorm(embedding_dim)
+
+        self.mlp = MLPBlock(embedding_dim, mlp_dim, activation)
+        self.norm3 = nn.LayerNorm(embedding_dim)
+
+        self.norm4 = nn.LayerNorm(embedding_dim)
+        self.cross_attn_image_to_token = Attention(
+            embedding_dim, num_heads, downsample_rate=attention_downsample_rate
+        )
+
+        self.skip_first_layer_pe = skip_first_layer_pe
+
+    def forward(
+        self, queries: Tensor, keys: Tensor, query_pe: Tensor, key_pe: Tensor
+    ) -> Tuple[Tensor, Tensor]:
+        # Self attention block
+        if self.skip_first_layer_pe:
+            queries = self.self_attn(q=queries, k=queries, v=queries)
+        else:
+            q = queries + query_pe
+            attn_out = self.self_attn(q=q, k=q, v=queries)
+            queries = queries + attn_out
+        queries = self.norm1(queries)
+
+        # Cross attention block, tokens attending to image embedding
+        q = queries + query_pe
+        k = keys + key_pe
+        attn_out = self.cross_attn_token_to_image(q=q, k=k, v=keys)
+        queries = queries + attn_out
+        queries = self.norm2(queries)
+
+        # MLP block
+        mlp_out = self.mlp(queries)
+        queries = queries + mlp_out
+        queries = self.norm3(queries)
+
+        # Cross attention block, image embedding attending to tokens
+        q = queries + query_pe
+        k = keys + key_pe
+        attn_out = self.cross_attn_image_to_token(q=k, k=q, v=queries)
+        keys = keys + attn_out
+        keys = self.norm4(keys)
+
+        return queries, keys
+
+
+class Attention(nn.Module):
+    """
+    An attention layer that allows for downscaling the size of the embedding
+    after projection to queries, keys, and values.
+    """
+
+    def __init__(
+        self,
+        embedding_dim: int,
+        num_heads: int,
+        downsample_rate: int = 1,
+        dropout: float = 0.0,
+        kv_in_dim: int = None,
+        use_fa3: bool = False,
+    ) -> None:
+        super().__init__()
+        self.embedding_dim = embedding_dim
+        self.kv_in_dim = kv_in_dim if kv_in_dim is not None else embedding_dim
+        self.internal_dim = embedding_dim // downsample_rate
+        self.num_heads = num_heads
+        self.use_fa3 = use_fa3
+        assert (
+            self.internal_dim % num_heads == 0
+        ), "num_heads must divide embedding_dim."
+
+        self.q_proj = nn.Linear(embedding_dim, self.internal_dim)
+        self.k_proj = nn.Linear(self.kv_in_dim, self.internal_dim)
+        self.v_proj = nn.Linear(self.kv_in_dim, self.internal_dim)
+        self.out_proj = nn.Linear(self.internal_dim, embedding_dim)
+
+        self.dropout_p = dropout
+
+    def _separate_heads(self, x: Tensor, num_heads: int) -> Tensor:
+        b, n, c = x.shape
+        x = x.reshape(b, n, num_heads, c // num_heads)
+        return x.transpose(1, 2)  # B x N_heads x N_tokens x C_per_head
+
+    def _recombine_heads(self, x: Tensor) -> Tensor:
+        b, n_heads, n_tokens, c_per_head = x.shape
+        x = x.transpose(1, 2)
+        return x.reshape(b, n_tokens, n_heads * c_per_head)  # B x N_tokens x C
+
+    def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
+        # Input projections
+        q = self.q_proj(q)
+        k = self.k_proj(k)
+        v = self.v_proj(v)
+
+        # Separate into heads
+        q = self._separate_heads(q, self.num_heads)
+        k = self._separate_heads(k, self.num_heads)
+        v = self._separate_heads(v, self.num_heads)
+
+        dropout_p = self.dropout_p if self.training else 0.0
+        # Attention
+        # with torch.backends.cuda.sdp_kernel(
+        #     enable_flash=USE_FLASH_ATTN,
+        #     # if Flash attention kernel is off, then math kernel needs to be enabled
+        #     enable_math=(OLD_GPU and dropout_p > 0.0) or MATH_KERNEL_ON,
+        #     enable_mem_efficient=OLD_GPU,
+        # ):
+        # Let's trust the dispatcher....
+        if self.use_fa3:
+            from sam3.perflib.fa3 import flash_attn_func
+
+            assert dropout_p == 0.0
+            out = flash_attn_func(
+                q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)
+            ).transpose(1, 2)
+        else:
+            torch.backends.cuda.enable_flash_sdp(True)
+            torch.backends.cuda.enable_math_sdp(True)
+            torch.backends.cuda.enable_mem_efficient_sdp(True)
+            out = F.scaled_dot_product_attention(q, k, v, dropout_p=dropout_p)
+
+        out = self._recombine_heads(out)
+        out = self.out_proj(out)
+
+        return out
+
+
+class RoPEAttention(Attention):
+    """Attention with rotary position encoding."""
+
+    def __init__(
+        self,
+        *args,
+        rope_theta=10000.0,
+        # whether to repeat q rope to match k length
+        # this is needed for cross-attention to memories
+        rope_k_repeat=False,
+        feat_sizes=(64, 64),  # [w, h] for stride 16 feats at 1024 resolution
+        use_rope_real=False,
+        **kwargs,
+    ):
+        super().__init__(*args, **kwargs)
+        self.use_rope_real = use_rope_real
+        self.compute_cis = partial(
+            compute_axial_cis, dim=self.internal_dim // self.num_heads, theta=rope_theta
+        )
+        device = torch.device("cuda") if torch.cuda.is_available() else None
+        self.freqs_cis = self.compute_cis(
+            end_x=feat_sizes[0], end_y=feat_sizes[1], device=device
+        )
+        if self.use_rope_real:
+            self.freqs_cis_real = self.freqs_cis.real
+            self.freqs_cis_imag = self.freqs_cis.imag
+        self.rope_k_repeat = rope_k_repeat
+
+    def forward(
+        self, q: Tensor, k: Tensor, v: Tensor, num_k_exclude_rope: int = 0
+    ) -> Tensor:
+        # Input projections
+        q = self.q_proj(q)
+        k = self.k_proj(k)
+        v = self.v_proj(v)
+
+        # Separate into heads
+        q = self._separate_heads(q, self.num_heads)
+        k = self._separate_heads(k, self.num_heads)
+        v = self._separate_heads(v, self.num_heads)
+
+        # Apply rotary position encoding
+        w = h = math.sqrt(q.shape[-2])
+        if self.freqs_cis.shape[0] != q.shape[-2]:
+            self.freqs_cis = self.compute_cis(end_x=w, end_y=h, device=q.device)
+            self.freqs_cis_real = self.freqs_cis.real
+            self.freqs_cis_imag = self.freqs_cis.imag
+        if q.shape[-2] != k.shape[-2]:
+            assert self.rope_k_repeat
+
+        num_k_rope = k.size(-2) - num_k_exclude_rope
+        if self.use_rope_real:
+            q, k[:, :, :num_k_rope] = apply_rotary_enc_real(
+                q,
+                k[:, :, :num_k_rope],
+                freqs_cis_real=self.freqs_cis_real,
+                freqs_cis_imag=self.freqs_cis_imag,
+                repeat_freqs_k=self.rope_k_repeat,
+            )
+        else:
+            q, k[:, :, :num_k_rope] = apply_rotary_enc(
+                q,
+                k[:, :, :num_k_rope],
+                self.freqs_cis,
+                repeat_freqs_k=self.rope_k_repeat,
+            )
+
+        dropout_p = self.dropout_p if self.training else 0.0
+        # Attention
+        # with torch.backends.cuda.sdp_kernel(
+        #     enable_flash=USE_FLASH_ATTN,
+        #     # if Flash attention kernel is off, then math kernel needs to be enabled
+        #     enable_math=(OLD_GPU and dropout_p > 0.0) or MATH_KERNEL_ON,
+        #     enable_mem_efficient=OLD_GPU,
+        # ):
+        # Let's trust the dispatcher....
+        if self.use_fa3:
+            from sam3.perflib.fa3 import flash_attn_func
+
+            assert dropout_p == 0.0
+            out = flash_attn_func(
+                q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)
+            ).transpose(1, 2)
+        else:
+            torch.backends.cuda.enable_flash_sdp(True)
+            torch.backends.cuda.enable_math_sdp(True)
+            torch.backends.cuda.enable_mem_efficient_sdp(True)
+            out = F.scaled_dot_product_attention(q, k, v, dropout_p=dropout_p)
+
+        out = self._recombine_heads(out)
+        out = self.out_proj(out)
+
+        return out
diff --git a/sam3/train/__init__.py b/sam3/train/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d37d2aaef52ec7de01999516a2b8c3e1fa4986
--- /dev/null
+++ b/sam3/train/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
diff --git a/sam3/train/configs/eval_base.yaml b/sam3/train/configs/eval_base.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..20890d86459218b95340ab6fd7f931f689e1a100
--- /dev/null
+++ b/sam3/train/configs/eval_base.yaml
@@ -0,0 +1,279 @@
+# @package _global_
+defaults:
+  - _self_
+
+# This config is the base configuration for all evaluations. Amongst other things, it defines:
+# - the model
+# - the image transforms
+# - the post processors
+# - cluster configuration (only relevant for slurm-based evals, ignored otherwise)
+#
+# Most of the parameters should be kept as-is. The main modifications you may want to make are:
+# - the cluster configuration, to adjust partitions/qos to your system
+# - the flag gather_pred_via_filesys if you ram is tight
+# - num_val_workers if your number of cores is small (should be roughly number of cores / number of gpus)
+# - the paths below
+
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+  # If you leave the checkpoint path to null, the model will be downloaded from hugging-face. Otherwise provide a path
+  checkpoint_path: null
+  # the experiments will be subfolders of this
+  base_experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+
+  # base path to the annotation folder for gold (refer to the readmes on how to download)
+  base_annotation_path: <YOUR_GOLD_GT_DIR>
+
+  # base path to the annotation folder for silver (refer to the readmes on how to download)
+  base_annotation_path_silver: <YOUR_SILVER_GT_DIR>
+
+  # path to the metaclip images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
+  metaclip_img_path: <YOUR_METACLIP_IMG_DIR>
+
+  # path to the sa1b images, used for SA-Co gold (refer to the readme for instructions). Can be null if you don't intend on evaluating on this dataset.
+  sa1b_img_path: <YOUR_SA1B_IMG_DIR>
+
+  # path to the SA-Co/silver images
+  silver_img_path: <YOUR_SILVER_IMG_DIR>
+
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+
+  use_presence_eval: True
+
+  base_val_transform:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        ######## transforms for validation (begin) ########
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          max_size:
+            _target_: sam3.train.transforms.basic.get_random_resize_max_size
+            size: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: False
+        ######## transforms for validation (end) ########
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  loss: null
+
+  # Model parameters
+  d_model: 256
+  input_box_embedding_dim: ${add:${scratch.d_model},2}
+
+  # Box processing
+  original_box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1  # infinite detections
+    use_original_ids: true
+    use_original_sizes_box: true
+    use_presence: ${scratch.use_presence_eval}
+
+  box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1 #infinite detections
+    use_original_ids: false
+    use_original_sizes_box: false
+    use_presence: ${scratch.use_presence_eval}
+
+  box_postprocessor_thresholded:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1 #infinite detections
+    use_original_ids: false
+    use_original_sizes_box: false
+    detection_threshold: 0.3
+    use_presence: ${scratch.use_presence_eval}
+
+  mask_postprocessor_thresholded:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1 #infinite detections
+    iou_type: "segm"
+    use_original_ids: false
+    use_original_sizes_box: false
+    use_original_sizes_mask: true
+    convert_mask_to_rle: True
+    detection_threshold: 0.3
+    use_presence: ${scratch.use_presence_eval}
+
+  # Image processing parameters
+  resolution: 1008
+  max_ann_per_img: 200
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  # Training parameters
+  train_batch_size: 1
+  val_batch_size: 1
+  num_train_workers: 0
+  num_val_workers: 10 # change this depending on the number of cpu cores available
+  max_data_epochs: 20
+  target_epoch_size: 1500
+  hybrid_repeats: 1
+  context_length: 2
+
+  # All reduce - this controls how the predictions are sent back to node 0.
+  # If you have a lot of ram, CPU gather is faster. Otherwise, we provide a fallback through filesystem (eg NFS)
+  # Switch to true if you get cpu ooms during gather.
+  gather_pred_via_filesys: false
+
+  # Learning rate and scheduler parameters (unused for eval)
+  lr_scale: 0.1
+  lr_transformer: ${times:8e-4,${scratch.lr_scale}}
+  lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
+  lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
+  lrd_vision_backbone: 0.9 # (lower for in-domain adn higher for ood)
+  wd: 0.1
+  scheduler_timescale: 20
+  scheduler_warmup: 20
+  scheduler_cooldown: 20
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val: null
+
+  model:
+    _target_: sam3.model_builder.build_sam3_image_model
+    bpe_path: ${paths.bpe_path}
+    device: cpus
+    eval_mode: true
+    enable_segmentation: true # Warning: Enable this if using segmentation.
+    checkpoint_path: ${paths.checkpoint_path}
+
+  meters:
+    val: null
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+    optimizer:
+      _target_: torch.optim.AdamW
+
+    gradient_clip:
+      _target_: sam3.train.optim.optimizer.GradientClipper
+      max_norm: 0.1
+      norm_type: 2
+
+    param_group_modifiers:
+      - _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
+        _partial_: True
+        layer_decay_value: ${scratch.lrd_vision_backbone}
+        apply_to: 'backbone.vision_backbone.trunk'
+        overrides:
+          - pattern: '*pos_embed*'
+            value: 1.0
+
+    options:
+      lr:
+        - scheduler:  # transformer and class_embed
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_transformer}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+        - scheduler:
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_vision_backbone}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+          param_names:
+            - 'backbone.vision_backbone.*'
+        - scheduler:
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_language_backbone}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+          param_names:
+            - 'backbone.language_backbone.*'
+
+      weight_decay:
+        - scheduler:
+            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
+            value: ${scratch.wd}
+        - scheduler:
+            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
+            value: 0.0
+          param_names:
+            - '*bias*'
+          module_cls_names: ['torch.nn.LayerNorm']
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 4
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+
+submitit:
+  account: null # Add your SLURM account if use_cluster == 1
+  partition: null
+  qos: null # Add your QoS if use_cluster == 1
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/gold_image_evals/sam3_gold_image_attributes.yaml b/sam3/train/configs/gold_image_evals/sam3_gold_image_attributes.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..8646b691734e1dd191d53e700d9b7dcb2c23de72
--- /dev/null
+++ b/sam3/train/configs/gold_image_evals/sam3_gold_image_attributes.yaml
@@ -0,0 +1,66 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/gold_attributes/
+  coco_gt: ${paths.base_annotation_path}/gold_attributes_merged_a_release_test.json
+  coco_gts:
+    - ${paths.base_annotation_path}/gold_attributes_merged_a_release_test.json
+    - ${paths.base_annotation_path}/gold_attributes_merged_b_release_test.json
+    - ${paths.base_annotation_path}/gold_attributes_merged_c_release_test.json
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.metaclip_img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: gold_attributes
+
+  meters:
+    val:
+      gold_attributes: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/gold_attributes
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "segm"
diff --git a/sam3/train/configs/gold_image_evals/sam3_gold_image_crowded.yaml b/sam3/train/configs/gold_image_evals/sam3_gold_image_crowded.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fef74a6ee56c901c258a8ced2beff773a38ec545
--- /dev/null
+++ b/sam3/train/configs/gold_image_evals/sam3_gold_image_crowded.yaml
@@ -0,0 +1,66 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/gold_crowded/
+  coco_gt: ${paths.base_annotation_path}/gold_crowded_merged_a_release_test.json
+  coco_gts:
+    - ${paths.base_annotation_path}/gold_crowded_merged_a_release_test.json
+    - ${paths.base_annotation_path}/gold_crowded_merged_b_release_test.json
+    - ${paths.base_annotation_path}/gold_crowded_merged_c_release_test.json
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.metaclip_img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: gold_crowded
+
+  meters:
+    val:
+      gold_crowded: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/gold_crowded
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "segm"
diff --git a/sam3/train/configs/gold_image_evals/sam3_gold_image_fg_food.yaml b/sam3/train/configs/gold_image_evals/sam3_gold_image_fg_food.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b08c4a46921db2123f2540a63536140ea641320e
--- /dev/null
+++ b/sam3/train/configs/gold_image_evals/sam3_gold_image_fg_food.yaml
@@ -0,0 +1,66 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/gold_fg_food/
+  coco_gt: ${paths.base_annotation_path}/gold_fg_food_merged_a_release_test.json
+  coco_gts:
+    - ${paths.base_annotation_path}/gold_fg_food_merged_a_release_test.json
+    - ${paths.base_annotation_path}/gold_fg_food_merged_b_release_test.json
+    - ${paths.base_annotation_path}/gold_fg_food_merged_c_release_test.json
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.metaclip_img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: gold_fg_food
+
+  meters:
+    val:
+      gold_fg_food: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/gold_fg_food
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "segm"
diff --git a/sam3/train/configs/gold_image_evals/sam3_gold_image_fg_sports.yaml b/sam3/train/configs/gold_image_evals/sam3_gold_image_fg_sports.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..89a93be2acf36ab0f84481dfded86340da97b9a6
--- /dev/null
+++ b/sam3/train/configs/gold_image_evals/sam3_gold_image_fg_sports.yaml
@@ -0,0 +1,66 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/gold_fg_sports_equipment/
+  coco_gt: ${paths.base_annotation_path}/gold_fg_sports_equipment_merged_a_release_test.json
+  coco_gts:
+    - ${paths.base_annotation_path}/gold_fg_sports_equipment_merged_a_release_test.json
+    - ${paths.base_annotation_path}/gold_fg_sports_equipment_merged_b_release_test.json
+    - ${paths.base_annotation_path}/gold_fg_sports_equipment_merged_c_release_test.json
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.metaclip_img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: gold_fg_sports_equipment
+
+  meters:
+    val:
+      gold_fg_sports_equipment: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/gold_fg_sports_equipment
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "segm"
diff --git a/sam3/train/configs/gold_image_evals/sam3_gold_image_metaclip_nps.yaml b/sam3/train/configs/gold_image_evals/sam3_gold_image_metaclip_nps.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e9c276f4299d4a53d5b44cea5194918541a0d25d
--- /dev/null
+++ b/sam3/train/configs/gold_image_evals/sam3_gold_image_metaclip_nps.yaml
@@ -0,0 +1,66 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/gold_metaclip_nps/
+  coco_gt: ${paths.base_annotation_path}/gold_metaclip_merged_a_release_test.json
+  coco_gts:
+    - ${paths.base_annotation_path}/gold_metaclip_merged_a_release_test.json
+    - ${paths.base_annotation_path}/gold_metaclip_merged_b_release_test.json
+    - ${paths.base_annotation_path}/gold_metaclip_merged_c_release_test.json
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.metaclip_img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: gold_metaclip_nps
+
+  meters:
+    val:
+      gold_metaclip_nps: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/gold_metaclip_nps
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "segm"
diff --git a/sam3/train/configs/gold_image_evals/sam3_gold_image_sa1b_nps.yaml b/sam3/train/configs/gold_image_evals/sam3_gold_image_sa1b_nps.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..52c87ee30545d24502160e7e8e3a565ce8d83bf2
--- /dev/null
+++ b/sam3/train/configs/gold_image_evals/sam3_gold_image_sa1b_nps.yaml
@@ -0,0 +1,66 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/gold_sa1b_nps/
+  coco_gt: ${paths.base_annotation_path}/gold_sa1b_merged_a_release_test.json
+  coco_gts:
+    - ${paths.base_annotation_path}/gold_sa1b_merged_a_release_test.json
+    - ${paths.base_annotation_path}/gold_sa1b_merged_b_release_test.json
+    - ${paths.base_annotation_path}/gold_sa1b_merged_c_release_test.json
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.sa1b_img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: gold_sa1b_nps
+
+  meters:
+    val:
+      gold_sa1b_nps: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/gold_sa1b_nps
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "segm"
diff --git a/sam3/train/configs/gold_image_evals/sam3_gold_image_wiki_common.yaml b/sam3/train/configs/gold_image_evals/sam3_gold_image_wiki_common.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..630495423c3840f0e795ee3c501ee5f5b44a3505
--- /dev/null
+++ b/sam3/train/configs/gold_image_evals/sam3_gold_image_wiki_common.yaml
@@ -0,0 +1,66 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/gold_wiki_common/
+  coco_gt: ${paths.base_annotation_path}/gold_wiki_common_merged_a_release_test.json
+  coco_gts:
+    - ${paths.base_annotation_path}/gold_wiki_common_merged_a_release_test.json
+    - ${paths.base_annotation_path}/gold_wiki_common_merged_b_release_test.json
+    - ${paths.base_annotation_path}/gold_wiki_common_merged_c_release_test.json
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.metaclip_img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: gold_wiki_common
+
+  meters:
+    val:
+      gold_wiki_common: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/gold_wiki_common
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gts}
+              iou_type: "segm"
diff --git a/sam3/train/configs/odinw13/odinw_text_and_visual.yaml b/sam3/train/configs/odinw13/odinw_text_and_visual.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..51e93b457c471c861fc6a3a4fd65ced25119aca1
--- /dev/null
+++ b/sam3/train/configs/odinw13/odinw_text_and_visual.yaml
@@ -0,0 +1,255 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+#  python sam3/train/train.py -c configs/odinw_text_only.yaml --use-cluster 1 --partition ${PARTITION} --account ${ACCOUNT} --qos ${QoS}
+
+paths:
+  odinw_data_root: <YOUR_DATA_DIR>
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+
+supercategory_tuple: ${all_odinw_supercategories.${string:${submitit.job_array.task_index}}}
+# Validation transforms pipeline
+val_transforms:
+  - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+    transforms:
+      - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+        sizes: ${scratch.resolution}
+        max_size:
+          _target_: sam3.train.transforms.basic.get_random_resize_max_size
+          size: ${scratch.resolution}
+        square: true
+        consistent_transform: False
+      - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+      - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+        mean: ${scratch.val_norm_mean}
+        std: ${scratch.val_norm_std}
+      - _target_: sam3.train.transforms.filter_query_transforms.TextQueryToVisual
+        keep_text_queries: true # Note: set this to false if you only want visual
+        probability: 1.0 # always
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  enable_segmentation: True
+  # Box processing
+  use_presence_eval: True
+  original_box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1  # infinite detections
+    use_original_ids: true
+    use_original_sizes_box: true
+    use_presence: ${scratch.use_presence_eval}
+
+  # Image processing parameters
+  resolution: 1008
+  # Normalization parameters
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  # Training parameters
+  val_batch_size: 2
+  num_val_workers: 0
+  gather_pred_via_filesys: false
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  max_epochs: 1
+  accelerator: cuda
+  seed_value: 123
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
+          prompts: ${odinw35_prompts.${supercategory_tuple.name}}
+          include_negatives: true
+          category_chunk_size: 20 # Note: Since we are doing AP +ve we need to include all categories!
+          _partial_: true
+        img_folder: ${paths.odinw_data_root}/${supercategory_tuple.val.img_folder}
+        ann_file:
+          _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+          input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
+        transforms: ${val_transforms}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: 1
+        dict_key: odinw35
+
+  model:
+    _target_: sam3.model_builder.build_sam3_image_model
+    bpe_path: ${paths.bpe_path}
+    device: cpus
+    eval_mode: true # Set to false if training
+    enable_segmentation: ${scratch.enable_segmentation} # Warning: Enable this if using segmentation.
+
+  meters:
+    val:
+      odinw35:
+        detection:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "bbox"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/roboflow/${supercategory_tuple.name}
+          merge_predictions: True
+          postprocessor: ${scratch.original_box_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 100
+          pred_file_evaluators:
+            - _target_: sam3.eval.coco_eval_offline.CocoEvaluatorOfflineWithPredFileEvaluators
+              gt_path:
+                _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+                input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
+              tide: False
+              iou_type: "bbox"
+              positive_split: true
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/${supercategory_tuple.name}
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 1
+  gpus_per_node: 2
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
+
+  job_array:
+    num_tasks: 13
+    task_index: 0
+
+# ============================================================================
+# ODinW13 Supercategories
+# ============================================================================
+
+all_odinw_supercategories:
+  - name: AerialMaritimeDrone_large
+    val:
+      img_folder: AerialMaritimeDrone/large/test/
+      json: AerialMaritimeDrone/large/test/annotations_without_background.json
+  - name: Aquarium
+    val:
+      img_folder: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/
+      json: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/annotations_without_background.json
+  - name: CottontailRabbits
+    val:
+      img_folder: CottontailRabbits/test/
+      json: CottontailRabbits/test/annotations_without_background.json
+  - name: EgoHands_generic
+    val:
+      img_folder: EgoHands/generic/test/
+      json: EgoHands/generic/test/annotations_without_background.json
+  - name: NorthAmericaMushrooms
+    val:
+      img_folder: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/
+      json: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/annotations_without_background.json
+  - name: Packages
+    val:
+      img_folder: Packages/Raw/test/
+      json: Packages/Raw/test/annotations_without_background.json
+  - name: PascalVOC
+    val:
+      img_folder: PascalVOC/valid/
+      json: PascalVOC/valid/annotations_without_background.json
+  - name: Raccoon
+    val:
+      img_folder: Raccoon/Raccoon.v2-raw.coco/test/
+      json: Raccoon/Raccoon.v2-raw.coco/test/annotations_without_background.json
+  - name: ShellfishOpenImages
+    val:
+      img_folder: ShellfishOpenImages/raw/test/
+      json: ShellfishOpenImages/raw/test/annotations_without_background.json
+  - name: VehiclesOpenImages
+    val:
+      img_folder: VehiclesOpenImages/416x416/test/
+      json: VehiclesOpenImages/416x416/test/annotations_without_background.json
+  - name: pistols
+    val:
+      img_folder: pistols/export/
+      json: pistols/export/test_annotations_without_background.json
+  - name: pothole
+    val:
+      img_folder: pothole/test/
+      json: pothole/test/annotations_without_background.json
+  - name: thermalDogsAndPeople
+    val:
+      img_folder: thermalDogsAndPeople/test/
+      json: thermalDogsAndPeople/test/annotations_without_background.json
+
+
+odinw35_prompts:
+  AerialMaritimeDrone_large: '[{"id": 1, "name": "boat", "supercategory": "movable-objects"},
+    {"id": 2, "name": "car", "supercategory": "movable-objects"}, {"id": 3, "name": "dock",
+    "supercategory": "movable-objects"}, {"id": 4, "name": "jet ski", "supercategory": "movable-objects"},
+    {"id": 5, "name": "boat lift", "supercategory": "movable-objects"}]'
+  Aquarium: null
+  CottontailRabbits: null
+  EgoHands_generic: null
+  NorthAmericaMushrooms: '[{''id'': 1, ''name'':
+    ''chicken of the woods'', ''supercategory'': ''mushroom''}, {''id'': 2, ''name'': ''chanterelle'', ''supercategory'': ''mushroom''}]'
+  Packages: null
+  PascalVOC: null
+  Raccoon: null
+  ShellfishOpenImages: null
+  VehiclesOpenImages: null
+  pistols: null
+  pothole: null
+  thermalDogsAndPeople: null
diff --git a/sam3/train/configs/odinw13/odinw_text_only.yaml b/sam3/train/configs/odinw13/odinw_text_only.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e28fa5df5308a550fccb14f66de4f67a928aff8f
--- /dev/null
+++ b/sam3/train/configs/odinw13/odinw_text_only.yaml
@@ -0,0 +1,253 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+#  python sam3/train/train.py -c configs/odinw_text_only.yaml --use-cluster 1 --partition ${PARTITION} --account ${ACCOUNT} --qos ${QoS}
+
+paths:
+  odinw_data_root: <YOUR_DATA_DIR>
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+
+
+supercategory_tuple: ${all_odinw_supercategories.${string:${submitit.job_array.task_index}}}
+# Validation transforms pipeline
+val_transforms:
+  - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+    transforms:
+      - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+        sizes: ${scratch.resolution}
+        max_size:
+          _target_: sam3.train.transforms.basic.get_random_resize_max_size
+          size: ${scratch.resolution}
+        square: true
+        consistent_transform: False
+      - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+      - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+        mean: ${scratch.val_norm_mean}
+        std: ${scratch.val_norm_std}
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  enable_segmentation: True
+  # Box processing
+  use_presence_eval: True
+  original_box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1  # infinite detections
+    use_original_ids: true
+    use_original_sizes_box: true
+    use_presence: ${scratch.use_presence_eval}
+
+  # Image processing parameters
+  resolution: 1008
+  # Normalization parameters
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  # Training parameters
+  val_batch_size: 2
+  num_val_workers: 0
+  gather_pred_via_filesys: false
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  max_epochs: 1
+  accelerator: cuda
+  seed_value: 123
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
+          prompts: ${odinw35_prompts.${supercategory_tuple.name}}
+          include_negatives: true
+          category_chunk_size: 20 # Note: Since we are doing AP +ve we need to include all categories!
+          _partial_: true
+        img_folder: ${paths.odinw_data_root}/${supercategory_tuple.val.img_folder}
+        ann_file:
+          _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+          input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
+        transforms: ${val_transforms}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: 1
+        dict_key: odinw35
+
+  model:
+    _target_: sam3.model_builder.build_sam3_image_model
+    bpe_path: ${paths.bpe_path}
+    device: cpus
+    eval_mode: true # Set to false if training
+    enable_segmentation: ${scratch.enable_segmentation} # Warning: Enable this if using segmentation.
+
+  meters:
+    val:
+      odinw35:
+        detection:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "bbox"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/odinw/${supercategory_tuple.name}
+          merge_predictions: True
+          postprocessor: ${scratch.original_box_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 100
+          pred_file_evaluators:
+            - _target_: sam3.eval.coco_eval_offline.CocoEvaluatorOfflineWithPredFileEvaluators
+              gt_path:
+                _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+                input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
+              tide: False
+              iou_type: "bbox"
+              positive_split: False
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/${supercategory_tuple.name}
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 1
+  gpus_per_node: 2
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
+
+  job_array:
+    num_tasks: 13
+    task_index: 0
+
+# ============================================================================
+# ODinW13 Supercategories
+# ============================================================================
+
+all_odinw_supercategories:
+  - name: AerialMaritimeDrone_large
+    val:
+      img_folder: AerialMaritimeDrone/large/test/
+      json: AerialMaritimeDrone/large/test/annotations_without_background.json
+  - name: Aquarium
+    val:
+      img_folder: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/
+      json: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/annotations_without_background.json
+  - name: CottontailRabbits
+    val:
+      img_folder: CottontailRabbits/test/
+      json: CottontailRabbits/test/annotations_without_background.json
+  - name: EgoHands_generic
+    val:
+      img_folder: EgoHands/generic/test/
+      json: EgoHands/generic/test/annotations_without_background.json
+  - name: NorthAmericaMushrooms
+    val:
+      img_folder: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/
+      json: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/annotations_without_background.json
+  - name: Packages
+    val:
+      img_folder: Packages/Raw/test/
+      json: Packages/Raw/test/annotations_without_background.json
+  - name: PascalVOC
+    val:
+      img_folder: PascalVOC/valid/
+      json: PascalVOC/valid/annotations_without_background.json
+  - name: Raccoon
+    val:
+      img_folder: Raccoon/Raccoon.v2-raw.coco/test/
+      json: Raccoon/Raccoon.v2-raw.coco/test/annotations_without_background.json
+  - name: ShellfishOpenImages
+    val:
+      img_folder: ShellfishOpenImages/raw/test/
+      json: ShellfishOpenImages/raw/test/annotations_without_background.json
+  - name: VehiclesOpenImages
+    val:
+      img_folder: VehiclesOpenImages/416x416/test/
+      json: VehiclesOpenImages/416x416/test/annotations_without_background.json
+  - name: pistols
+    val:
+      img_folder: pistols/export/
+      json: pistols/export/test_annotations_without_background.json
+  - name: pothole
+    val:
+      img_folder: pothole/test/
+      json: pothole/test/annotations_without_background.json
+  - name: thermalDogsAndPeople
+    val:
+      img_folder: thermalDogsAndPeople/test/
+      json: thermalDogsAndPeople/test/annotations_without_background.json
+
+
+odinw35_prompts:
+  AerialMaritimeDrone_large: '[{"id": 1, "name": "boat", "supercategory": "movable-objects"},
+    {"id": 2, "name": "car", "supercategory": "movable-objects"}, {"id": 3, "name": "dock",
+    "supercategory": "movable-objects"}, {"id": 4, "name": "jet ski", "supercategory": "movable-objects"},
+    {"id": 5, "name": "boat lift", "supercategory": "movable-objects"}]'
+  Aquarium: null
+  CottontailRabbits: null
+  EgoHands_generic: null
+  NorthAmericaMushrooms: '[{''id'': 1, ''name'':
+    ''chicken of the woods'', ''supercategory'': ''mushroom''}, {''id'': 2, ''name'': ''chanterelle'', ''supercategory'': ''mushroom''}]'
+  Packages: null
+  PascalVOC: null
+  Raccoon: null
+  ShellfishOpenImages: null
+  VehiclesOpenImages: null
+  pistols: null
+  pothole: null
+  thermalDogsAndPeople: null
diff --git a/sam3/train/configs/odinw13/odinw_text_only_positive.yaml b/sam3/train/configs/odinw13/odinw_text_only_positive.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9a86a5230935a1a5dc5b15c14066bb811f3a21de
--- /dev/null
+++ b/sam3/train/configs/odinw13/odinw_text_only_positive.yaml
@@ -0,0 +1,253 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+#  python sam3/train/train.py -c configs/odinw_text_only.yaml --use-cluster 1 --partition ${PARTITION} --account ${ACCOUNT} --qos ${QoS}
+
+paths:
+  odinw_data_root: <YOUR_DATA_DIR>
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+
+
+supercategory_tuple: ${all_odinw_supercategories.${string:${submitit.job_array.task_index}}}
+# Validation transforms pipeline
+val_transforms:
+  - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+    transforms:
+      - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+        sizes: ${scratch.resolution}
+        max_size:
+          _target_: sam3.train.transforms.basic.get_random_resize_max_size
+          size: ${scratch.resolution}
+        square: true
+        consistent_transform: False
+      - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+      - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+        mean: ${scratch.val_norm_mean}
+        std: ${scratch.val_norm_std}
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  enable_segmentation: True
+  # Box processing
+  use_presence_eval: True
+  original_box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1  # infinite detections
+    use_original_ids: true
+    use_original_sizes_box: true
+    use_presence: ${scratch.use_presence_eval}
+
+  # Image processing parameters
+  resolution: 1008
+  # Normalization parameters
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  # Training parameters
+  val_batch_size: 2
+  num_val_workers: 0
+  gather_pred_via_filesys: false
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  max_epochs: 1
+  accelerator: cuda
+  seed_value: 123
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
+          prompts: ${odinw35_prompts.${supercategory_tuple.name}}
+          include_negatives: true
+          category_chunk_size: 20 # Note: Since we are doing AP +ve we need to include all categories!
+          _partial_: true
+        img_folder: ${paths.odinw_data_root}/${supercategory_tuple.val.img_folder}
+        ann_file:
+          _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+          input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
+        transforms: ${val_transforms}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: 1
+        dict_key: odinw35
+
+  model:
+    _target_: sam3.model_builder.build_sam3_image_model
+    bpe_path: ${paths.bpe_path}
+    device: cpus
+    eval_mode: true # Set to false if training
+    enable_segmentation: ${scratch.enable_segmentation} # Warning: Enable this if using segmentation.
+
+  meters:
+    val:
+      odinw35:
+        detection:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "bbox"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/roboflow/${supercategory_tuple.name}
+          merge_predictions: True
+          postprocessor: ${scratch.original_box_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 100
+          pred_file_evaluators:
+            - _target_: sam3.eval.coco_eval_offline.CocoEvaluatorOfflineWithPredFileEvaluators
+              gt_path:
+                _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+                input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
+              tide: False
+              iou_type: "bbox"
+              positive_split: true
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/${supercategory_tuple.name}
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 1
+  gpus_per_node: 2
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
+
+  job_array:
+    num_tasks: 13
+    task_index: 0
+
+# ============================================================================
+# ODinW13 Supercategories
+# ============================================================================
+
+all_odinw_supercategories:
+  - name: AerialMaritimeDrone_large
+    val:
+      img_folder: AerialMaritimeDrone/large/test/
+      json: AerialMaritimeDrone/large/test/annotations_without_background.json
+  - name: Aquarium
+    val:
+      img_folder: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/
+      json: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/annotations_without_background.json
+  - name: CottontailRabbits
+    val:
+      img_folder: CottontailRabbits/test/
+      json: CottontailRabbits/test/annotations_without_background.json
+  - name: EgoHands_generic
+    val:
+      img_folder: EgoHands/generic/test/
+      json: EgoHands/generic/test/annotations_without_background.json
+  - name: NorthAmericaMushrooms
+    val:
+      img_folder: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/
+      json: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/annotations_without_background.json
+  - name: Packages
+    val:
+      img_folder: Packages/Raw/test/
+      json: Packages/Raw/test/annotations_without_background.json
+  - name: PascalVOC
+    val:
+      img_folder: PascalVOC/valid/
+      json: PascalVOC/valid/annotations_without_background.json
+  - name: Raccoon
+    val:
+      img_folder: Raccoon/Raccoon.v2-raw.coco/test/
+      json: Raccoon/Raccoon.v2-raw.coco/test/annotations_without_background.json
+  - name: ShellfishOpenImages
+    val:
+      img_folder: ShellfishOpenImages/raw/test/
+      json: ShellfishOpenImages/raw/test/annotations_without_background.json
+  - name: VehiclesOpenImages
+    val:
+      img_folder: VehiclesOpenImages/416x416/test/
+      json: VehiclesOpenImages/416x416/test/annotations_without_background.json
+  - name: pistols
+    val:
+      img_folder: pistols/export/
+      json: pistols/export/test_annotations_without_background.json
+  - name: pothole
+    val:
+      img_folder: pothole/test/
+      json: pothole/test/annotations_without_background.json
+  - name: thermalDogsAndPeople
+    val:
+      img_folder: thermalDogsAndPeople/test/
+      json: thermalDogsAndPeople/test/annotations_without_background.json
+
+
+odinw35_prompts:
+  AerialMaritimeDrone_large: '[{"id": 1, "name": "boat", "supercategory": "movable-objects"},
+    {"id": 2, "name": "car", "supercategory": "movable-objects"}, {"id": 3, "name": "dock",
+    "supercategory": "movable-objects"}, {"id": 4, "name": "jet ski", "supercategory": "movable-objects"},
+    {"id": 5, "name": "boat lift", "supercategory": "movable-objects"}]'
+  Aquarium: null
+  CottontailRabbits: null
+  EgoHands_generic: null
+  NorthAmericaMushrooms: '[{''id'': 1, ''name'':
+    ''chicken of the woods'', ''supercategory'': ''mushroom''}, {''id'': 2, ''name'': ''chanterelle'', ''supercategory'': ''mushroom''}]'
+  Packages: null
+  PascalVOC: null
+  Raccoon: null
+  ShellfishOpenImages: null
+  VehiclesOpenImages: null
+  pistols: null
+  pothole: null
+  thermalDogsAndPeople: null
diff --git a/sam3/train/configs/odinw13/odinw_text_only_train.yaml b/sam3/train/configs/odinw13/odinw_text_only_train.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..eb03cdf908df88e95c74742e68cb7f243db1ebe1
--- /dev/null
+++ b/sam3/train/configs/odinw13/odinw_text_only_train.yaml
@@ -0,0 +1,591 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+#  python sam3/train/train.py -c configs/odinw_text_only.yaml --use-cluster 1 --partition ${PARTITION} --account ${ACCOUNT} --qos ${QoS}
+
+paths:
+  odinw_data_root: <YOUR_DATA_DIR>
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+
+
+odinw_train:
+  train_file: fewshot_train_shot10_seed300
+  num_images: null
+  supercategory_tuple: ${all_odinw_supercategories.${string:${submitit.job_array.task_index}}}
+  # Training transforms pipeline
+  train_transforms:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+          query_filter:
+            _target_: sam3.train.transforms.filter_query_transforms.FilterCrowds
+        - _target_: sam3.train.transforms.point_sampling.RandomizeInputBbox
+          box_noise_std: 0.1
+          box_noise_max: 20
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes:
+            _target_: sam3.train.transforms.basic.get_random_resize_scales
+            size: ${scratch.resolution}
+            min_size: 480
+            rounded: false
+          max_size:
+            _target_: sam3.train.transforms.basic.get_random_resize_max_size
+            size: ${scratch.resolution}
+          square: true
+          consistent_transform: ${scratch.consistent_transform}
+        - _target_: sam3.train.transforms.basic_for_api.PadToSizeAPI
+          size: ${scratch.resolution}
+          consistent_transform: ${scratch.consistent_transform}
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+          query_filter:
+            _target_: sam3.train.transforms.filter_query_transforms.FilterEmptyTargets
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.train_norm_mean}
+          std: ${scratch.train_norm_std}
+        - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+          query_filter:
+            _target_: sam3.train.transforms.filter_query_transforms.FilterEmptyTargets
+    - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+      query_filter:
+        _target_: sam3.train.transforms.filter_query_transforms.FilterFindQueriesWithTooManyOut
+        max_num_objects: ${scratch.max_ann_per_img}
+
+  # Validation transforms pipeline
+  val_transforms:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}
+          max_size:
+            _target_: sam3.train.transforms.basic.get_random_resize_max_size
+            size: ${scratch.resolution}
+          square: true
+          consistent_transform: False
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # loss config (no mask loss)
+  loss:
+    _target_: sam3.train.loss.sam3_loss.Sam3LossWrapper
+    matcher: ${scratch.matcher}
+    o2m_weight: 2.0
+    o2m_matcher:
+      _target_: sam3.train.matcher.BinaryOneToManyMatcher
+      alpha: 0.3
+      threshold: 0.4
+      topk: 4
+    use_o2m_matcher_on_o2m_aux: ${scratch.use_o2m_matcher_on_o2m_aux}
+    loss_fns_find:
+      - _target_: sam3.train.loss.loss_fns.Boxes
+        weight_dict:
+          loss_bbox: 5.0
+          loss_giou: 2.0
+      - _target_: sam3.train.loss.loss_fns.IABCEMdetr
+        weak_loss: False
+        weight_dict:
+          loss_ce: ${scratch.loss_ce_weight}  # Change
+          presence_loss: ${scratch.presence_weight}  # Change
+        pos_weight: ${scratch.iabce_pos_weight}
+        alpha: ${scratch.iabce_alpha}
+        gamma: 2
+        use_presence: True  # Change
+        pos_focal: ${scratch.iabce_pos_focal}
+        pad_n_queries: ${scratch.num_queries}
+        pad_scale_pos: ${scratch.instance_query_loss_pad_scale_pos}
+
+    loss_fn_semantic_seg: null
+    scale_by_find_batch_size: ${scratch.scale_by_find_batch_size}
+
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  enable_segmentation: False
+  use_act_checkpoint_geo_encoder: True
+  input_geometry_encoder:
+    _target_: sam3.model.geometry_encoders.SequenceGeometryEncoder
+    pos_enc: ${scratch.pos_embed}
+    encode_boxes_as_points: False
+    points_direct_project: True
+    points_pool: True
+    points_pos_enc: True
+    boxes_direct_project: True
+    boxes_pool: True
+    boxes_pos_enc: True
+    d_model: ${scratch.d_model}
+    num_layers: 3
+    use_act_ckpt: ${scratch.use_act_checkpoint_geo_encoder}
+    layer:
+      _target_: sam3.model.encoder.TransformerEncoderLayer
+      activation: "relu"
+      d_model: ${scratch.d_model}
+      dim_feedforward: 2048
+      dropout: ${scratch.encoder_dropout}
+      pos_enc_at_attn: false
+      pre_norm: True
+      pos_enc_at_cross_attn_queries: false
+      pos_enc_at_cross_attn_keys: true
+      self_attention:
+        _target_: sam3.model.attention.MultiheadAttention
+        attn_type: Vanilla
+        num_heads: 8
+        dropout: ${scratch.encoder_dropout}
+        embed_dim: ${scratch.d_model}
+        batch_first: False
+      cross_attention:
+        _target_: sam3.model.attention.MultiheadAttention
+        attn_type: Vanilla
+        num_heads: 8
+        dropout: ${scratch.encoder_dropout}
+        embed_dim: ${scratch.d_model}
+        batch_first: False
+    add_cls: true
+    add_post_encode_proj: True
+
+  boxRPB: "log"
+  dac: True
+  use_early_fusion: true
+  o2m_mask: false
+  num_feature_levels: 1  # > 1 not implemented
+  encoder_dropout: 0.1
+  decoder_dropout: 0.1
+
+  tokenizer_ve:
+    _target_:  sam3.model.tokenizer_ve.SimpleTokenizer
+    bpe_path: ${paths.bpe_path}
+
+
+  freeze_text_tower: False
+  freeze_image_tower: NoFreeze
+  vis_backbone_dp: 0.0
+  # Activation checkpointing (Save memory)
+  use_act_checkpoint_vision_backbone: True
+  use_act_checkpoint_text_backbone: True
+  use_act_checkpoint_encoder: True
+  use_act_checkpoint_decoder: True
+
+  loss: null
+  # Loss parameters
+  num_queries: 200
+  presence_weight: 20.0
+  loss_ce_weight: 20.0
+  iabce_pos_weight: 5.0
+  iabce_pos_focal: false
+  iabce_alpha: 0.25
+  instance_query_loss_pad_scale_pos: 1.0
+  use_o2m_matcher_on_o2m_aux: false
+
+  # Model parameters
+  use_instance_query: true
+  d_model: 256
+  pos_embed:
+    _target_: sam3.model.position_encoding.PositionEmbeddingSine
+    num_pos_feats: ${scratch.d_model}
+    normalize: true
+    scale: null
+    temperature: 10000
+
+  # Box processing
+  use_presence_eval: True
+  original_box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1  # infinite detections
+    use_original_ids: true
+    use_original_sizes_box: true
+    use_presence: ${scratch.use_presence_eval}
+
+
+  # Matcher configuration
+  matcher:
+    _target_: sam3.train.matcher.BinaryHungarianMatcherV2
+    focal: true
+    cost_class: 2.0
+    cost_bbox: 5.0
+    cost_giou: 2.0
+    alpha: 0.25
+    gamma: 2
+    stable: False
+  scale_by_find_batch_size: True
+
+  # Image processing parameters
+  resolution: 1008
+  consistent_transform: False
+  max_ann_per_img: 200
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  # Training parameters
+  train_batch_size: 1
+  val_batch_size: 1
+  num_train_workers: 0
+  num_val_workers: 0
+  max_data_epochs: 40
+  target_epoch_size: 1500
+  hybrid_repeats: 1
+  context_length: 2
+  gather_pred_via_filesys: false
+
+  # Learning rate and scheduler parameters
+  lr_scale: 0.1
+  lr_transformer: ${times:8e-4,${scratch.lr_scale}}
+  lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
+  lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
+  lrd_vision_backbone: 0.9
+  wd: 0.1
+  scheduler_timescale: 20
+  scheduler_warmup: 20
+  scheduler_cooldown: 20
+
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  # _target_: sam3.train.trainer.Trainer
+  # skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: train
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all: ${odinw_train.loss}
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        limit_ids: ${odinw_train.num_images}
+        transforms: ${odinw_train.train_transforms}
+        load_segmentation: ${scratch.enable_segmentation}
+        max_ann_per_img: 500000
+        multiplier: 1
+        max_train_queries: 50000
+        max_val_queries: 50000
+        training: true
+        use_caching: False
+        img_folder: ${paths.odinw_data_root}/${odinw_train.supercategory_tuple.train.img_folder}
+        ann_file:
+          _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+          input_json_path: ${paths.odinw_data_root}/${odinw_train.supercategory_tuple.train.json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
+          prompts: ${odinw35_prompts.${odinw_train.supercategory_tuple.name}} #${odinw_train.supercategory_tuple.name)
+          _partial_: true
+      shuffle: True
+      batch_size: ${scratch.train_batch_size}
+      num_workers: ${scratch.num_train_workers}
+      pin_memory: False
+      drop_last: True
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: all
+        with_seg_masks: ${scratch.enable_segmentation}
+
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        load_segmentation: ${scratch.enable_segmentation}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
+          prompts: ${odinw35_prompts.${odinw_train.supercategory_tuple.name}}
+          include_negatives: true
+          category_chunk_size: 20 # Note: Since we are doing AP +ve we need to include all categories!
+          _partial_: true
+        img_folder: ${paths.odinw_data_root}/${odinw_train.supercategory_tuple.val.img_folder}
+        ann_file:
+          _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+          input_json_path: ${paths.odinw_data_root}/${odinw_train.supercategory_tuple.val.json}
+        transforms: ${odinw_train.val_transforms}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: 1
+        dict_key: odinw35
+        with_seg_masks: ${scratch.enable_segmentation}
+
+  model:
+    _target_: sam3.model_builder.build_sam3_image_model
+    bpe_path: ${paths.bpe_path}
+    device: cpus
+    eval_mode: false # Set to false if training
+    enable_segmentation: ${scratch.enable_segmentation} # Warning: Enable this if using segmentation.
+
+  meters:
+    val:
+      odinw35:
+        detection:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "bbox"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/odinw/${odinw_train.supercategory_tuple.name}
+          merge_predictions: True
+          postprocessor: ${scratch.original_box_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 100
+          pred_file_evaluators:
+            - _target_: sam3.eval.coco_eval_offline.CocoEvaluatorOfflineWithPredFileEvaluators
+              gt_path:
+                _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+                input_json_path: ${paths.odinw_data_root}/${odinw_train.supercategory_tuple.val.json}
+              tide: False
+              iou_type: "bbox"
+              positive_split: False
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+    optimizer:
+      _target_: torch.optim.AdamW
+
+    gradient_clip:
+      _target_: sam3.train.optim.optimizer.GradientClipper
+      max_norm: 0.1
+      norm_type: 2
+
+    param_group_modifiers:
+      - _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
+        _partial_: True
+        layer_decay_value: ${scratch.lrd_vision_backbone}
+        apply_to: 'backbone.vision_backbone.trunk'
+        overrides:
+          - pattern: '*pos_embed*'
+            value: 1.0
+
+    options:
+      lr:
+        - scheduler:  # transformer and class_embed
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_transformer}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+        - scheduler:
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_vision_backbone}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+          param_names:
+            - 'backbone.vision_backbone.*'
+        - scheduler:
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_language_backbone}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+          param_names:
+            - 'backbone.language_backbone.*'
+
+      weight_decay:
+        - scheduler:
+            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
+            value: ${scratch.wd}
+        - scheduler:
+            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
+            value: 0.0
+          param_names:
+            - '*bias*'
+          module_cls_names: ['torch.nn.LayerNorm']
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/${odinw_train.supercategory_tuple.name}
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 1
+  gpus_per_node: 2
+  experiment_log_dir: null #${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
+
+  # task_index: 2
+  # Uncomment for job array configuration
+  job_array:
+    num_tasks: 13
+    task_index: 0
+
+
+# ============================================================================
+# ODinW13 Supercategories
+# ============================================================================
+
+all_odinw_supercategories:
+  - name: AerialMaritimeDrone_large
+    val:
+      img_folder: AerialMaritimeDrone/large/test/
+      json: AerialMaritimeDrone/large/test/annotations_without_background.json
+    train:
+      img_folder: AerialMaritimeDrone/large/train/
+      json: AerialMaritimeDrone/large/train/${odinw_train.train_file}.json
+  - name: Aquarium
+    val:
+      img_folder: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/
+      json: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/annotations_without_background.json
+    train:
+      img_folder: Aquarium/Aquarium Combined.v2-raw-1024.coco/train/
+      json: Aquarium/Aquarium Combined.v2-raw-1024.coco/train/${odinw_train.train_file}.json
+  - name: CottontailRabbits
+    val:
+      img_folder: CottontailRabbits/test/
+      json: CottontailRabbits/test/annotations_without_background.json
+    train:
+      img_folder: CottontailRabbits/train/
+      json: CottontailRabbits/train/${odinw_train.train_file}.json
+  - name: EgoHands_generic
+    val:
+      img_folder: EgoHands/generic/test/
+      json: EgoHands/generic/test/annotations_without_background.json
+    train:
+      img_folder: EgoHands/generic/train/
+      json: EgoHands/generic/train/${odinw_train.train_file}.json
+  - name: NorthAmericaMushrooms
+    val:
+      img_folder: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/
+      json: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/annotations_without_background.json
+    train:
+      img_folder: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/train/
+      json: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/train/${odinw_train.train_file}.json
+  - name: Packages
+    val:
+      img_folder: Packages/Raw/test/
+      json: Packages/Raw/test/annotations_without_background.json
+    train:
+      img_folder: Packages/Raw/train/
+      json: Packages/Raw/train/${odinw_train.train_file}.json
+  - name: PascalVOC
+    val:
+      img_folder: PascalVOC/valid/
+      json: PascalVOC/valid/annotations_without_background.json
+    train:
+      img_folder: PascalVOC/train/
+      json:  PascalVOC/train/${odinw_train.train_file}.json
+  - name: Raccoon
+    val:
+      img_folder: Raccoon/Raccoon.v2-raw.coco/test/
+      json: Raccoon/Raccoon.v2-raw.coco/test/annotations_without_background.json
+    train:
+      img_folder: Raccoon/Raccoon.v2-raw.coco/train/
+      json: Raccoon/Raccoon.v2-raw.coco/train/${odinw_train.train_file}.json
+  - name: ShellfishOpenImages
+    val:
+      img_folder: ShellfishOpenImages/raw/test/
+      json: ShellfishOpenImages/raw/test/annotations_without_background.json
+    train:
+      img_folder: ShellfishOpenImages/raw/train/
+      json: ShellfishOpenImages/raw/train/${odinw_train.train_file}.json
+  - name: VehiclesOpenImages
+    val:
+      img_folder: VehiclesOpenImages/416x416/test/
+      json: VehiclesOpenImages/416x416/test/annotations_without_background.json
+    train:
+      img_folder: VehiclesOpenImages/416x416/train/
+      json: VehiclesOpenImages/416x416/train/${odinw_train.train_file}.json
+  - name: pistols
+    val:
+      img_folder: pistols/export/
+      json: pistols/export/test_annotations_without_background.json
+    train:
+      img_folder: pistols/export/
+      json: pistols/export/${odinw_train.train_file}.json
+  - name: pothole
+    val:
+      img_folder: pothole/test/
+      json: pothole/test/annotations_without_background.json
+    train:
+      img_folder: pothole/train/
+      json: pothole/train/${odinw_train.train_file}.json
+  - name: thermalDogsAndPeople
+    val:
+      img_folder: thermalDogsAndPeople/test/
+      json: thermalDogsAndPeople/test/annotations_without_background.json
+    train:
+      img_folder: thermalDogsAndPeople/train/
+      json: thermalDogsAndPeople/train/${odinw_train.train_file}.json
+
+
+odinw35_prompts:
+  AerialMaritimeDrone_large: '[{"id": 1, "name": "boat", "supercategory": "movable-objects"},
+    {"id": 2, "name": "car", "supercategory": "movable-objects"}, {"id": 3, "name": "dock",
+    "supercategory": "movable-objects"}, {"id": 4, "name": "jet ski", "supercategory": "movable-objects"},
+    {"id": 5, "name": "boat lift", "supercategory": "movable-objects"}]'
+  Aquarium: null
+  CottontailRabbits: null
+  EgoHands_generic: null
+  NorthAmericaMushrooms: '[{''id'': 1, ''name'':
+    ''chicken of the woods'', ''supercategory'': ''mushroom''}, {''id'': 2, ''name'': ''chanterelle'', ''supercategory'': ''mushroom''}]'
+  Packages: null
+  PascalVOC: null
+  Raccoon: null
+  ShellfishOpenImages: null
+  VehiclesOpenImages: null
+  pistols: null
+  pothole: null
+  thermalDogsAndPeople: null
diff --git a/sam3/train/configs/odinw13/odinw_visual_only.yaml b/sam3/train/configs/odinw13/odinw_visual_only.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e724f2d871e3af412078d744220f781a11b6f56f
--- /dev/null
+++ b/sam3/train/configs/odinw13/odinw_visual_only.yaml
@@ -0,0 +1,256 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+#  python sam3/train/train.py -c configs/odinw_text_only.yaml --use-cluster 1 --partition ${PARTITION} --account ${ACCOUNT} --qos ${QoS}
+
+paths:
+  odinw_data_root: <YOUR_DATA_DIR>
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+
+
+supercategory_tuple: ${all_odinw_supercategories.${string:${submitit.job_array.task_index}}}
+# Validation transforms pipeline
+val_transforms:
+  - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+    transforms:
+      - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+        sizes: ${scratch.resolution}
+        max_size:
+          _target_: sam3.train.transforms.basic.get_random_resize_max_size
+          size: ${scratch.resolution}
+        square: true
+        consistent_transform: False
+      - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+      - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+        mean: ${scratch.val_norm_mean}
+        std: ${scratch.val_norm_std}
+      - _target_: sam3.train.transforms.filter_query_transforms.TextQueryToVisual
+        keep_text_queries: false # Note: set this to false if you only want visual
+        probability: 1.0 # always
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  enable_segmentation: True
+  # Box processing
+  use_presence_eval: True
+  original_box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1  # infinite detections
+    use_original_ids: true
+    use_original_sizes_box: true
+    use_presence: ${scratch.use_presence_eval}
+
+  # Image processing parameters
+  resolution: 1008
+  # Normalization parameters
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  # Training parameters
+  val_batch_size: 2
+  num_val_workers: 0
+  gather_pred_via_filesys: false
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  max_epochs: 1
+  accelerator: cuda
+  seed_value: 123
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
+          prompts: ${odinw35_prompts.${supercategory_tuple.name}}
+          include_negatives: true
+          category_chunk_size: 20 # Note: Since we are doing AP +ve we need to include all categories!
+          _partial_: true
+        img_folder: ${paths.odinw_data_root}/${supercategory_tuple.val.img_folder}
+        ann_file:
+          _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+          input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
+        transforms: ${val_transforms}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: 1
+        dict_key: odinw35
+
+  model:
+    _target_: sam3.model_builder.build_sam3_image_model
+    bpe_path: ${paths.bpe_path}
+    device: cpus
+    eval_mode: true # Set to false if training
+    enable_segmentation: ${scratch.enable_segmentation} # Warning: Enable this if using segmentation.
+
+  meters:
+    val:
+      odinw35:
+        detection:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "bbox"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/roboflow/${supercategory_tuple.name}
+          merge_predictions: True
+          postprocessor: ${scratch.original_box_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 100
+          pred_file_evaluators:
+            - _target_: sam3.eval.coco_eval_offline.CocoEvaluatorOfflineWithPredFileEvaluators
+              gt_path:
+                _target_: sam3.eval.coco_reindex.reindex_coco_to_temp
+                input_json_path: ${paths.odinw_data_root}/${supercategory_tuple.val.json}
+              tide: False
+              iou_type: "bbox"
+              positive_split: true
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/${supercategory_tuple.name}
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 1
+  gpus_per_node: 2
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
+
+  job_array:
+    num_tasks: 13
+    task_index: 0
+
+# ============================================================================
+# ODinW13 Supercategories
+# ============================================================================
+
+all_odinw_supercategories:
+  - name: AerialMaritimeDrone_large
+    val:
+      img_folder: AerialMaritimeDrone/large/test/
+      json: AerialMaritimeDrone/large/test/annotations_without_background.json
+  - name: Aquarium
+    val:
+      img_folder: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/
+      json: Aquarium/Aquarium Combined.v2-raw-1024.coco/test/annotations_without_background.json
+  - name: CottontailRabbits
+    val:
+      img_folder: CottontailRabbits/test/
+      json: CottontailRabbits/test/annotations_without_background.json
+  - name: EgoHands_generic
+    val:
+      img_folder: EgoHands/generic/test/
+      json: EgoHands/generic/test/annotations_without_background.json
+  - name: NorthAmericaMushrooms
+    val:
+      img_folder: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/
+      json: NorthAmericaMushrooms/North American Mushrooms.v1-416x416.coco/test/annotations_without_background.json
+  - name: Packages
+    val:
+      img_folder: Packages/Raw/test/
+      json: Packages/Raw/test/annotations_without_background.json
+  - name: PascalVOC
+    val:
+      img_folder: PascalVOC/valid/
+      json: PascalVOC/valid/annotations_without_background.json
+  - name: Raccoon
+    val:
+      img_folder: Raccoon/Raccoon.v2-raw.coco/test/
+      json: Raccoon/Raccoon.v2-raw.coco/test/annotations_without_background.json
+  - name: ShellfishOpenImages
+    val:
+      img_folder: ShellfishOpenImages/raw/test/
+      json: ShellfishOpenImages/raw/test/annotations_without_background.json
+  - name: VehiclesOpenImages
+    val:
+      img_folder: VehiclesOpenImages/416x416/test/
+      json: VehiclesOpenImages/416x416/test/annotations_without_background.json
+  - name: pistols
+    val:
+      img_folder: pistols/export/
+      json: pistols/export/test_annotations_without_background.json
+  - name: pothole
+    val:
+      img_folder: pothole/test/
+      json: pothole/test/annotations_without_background.json
+  - name: thermalDogsAndPeople
+    val:
+      img_folder: thermalDogsAndPeople/test/
+      json: thermalDogsAndPeople/test/annotations_without_background.json
+
+
+odinw35_prompts:
+  AerialMaritimeDrone_large: '[{"id": 1, "name": "boat", "supercategory": "movable-objects"},
+    {"id": 2, "name": "car", "supercategory": "movable-objects"}, {"id": 3, "name": "dock",
+    "supercategory": "movable-objects"}, {"id": 4, "name": "jet ski", "supercategory": "movable-objects"},
+    {"id": 5, "name": "boat lift", "supercategory": "movable-objects"}]'
+  Aquarium: null
+  CottontailRabbits: null
+  EgoHands_generic: null
+  NorthAmericaMushrooms: '[{''id'': 1, ''name'':
+    ''chicken of the woods'', ''supercategory'': ''mushroom''}, {''id'': 2, ''name'': ''chanterelle'', ''supercategory'': ''mushroom''}]'
+  Packages: null
+  PascalVOC: null
+  Raccoon: null
+  ShellfishOpenImages: null
+  VehiclesOpenImages: null
+  pistols: null
+  pothole: null
+  thermalDogsAndPeople: null
diff --git a/sam3/train/configs/roboflow_v100/roboflow_v100_eval.yaml b/sam3/train/configs/roboflow_v100/roboflow_v100_eval.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..361e622bc0dd57ce361b449972d61d7dad6042e4
--- /dev/null
+++ b/sam3/train/configs/roboflow_v100/roboflow_v100_eval.yaml
@@ -0,0 +1,539 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+  roboflow_vl_100_root: <YOUR_DATASET_DIR>
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+
+# Roboflow dataset configuration
+roboflow_train:
+  num_images: 100 # Note: This is the number of images used for training. If null, all images are used.
+  supercategory: ${all_roboflow_supercategories.${string:${submitit.job_array.task_index}}}
+
+  # Training transforms pipeline
+  train_transforms:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+          query_filter:
+            _target_: sam3.train.transforms.filter_query_transforms.FilterCrowds
+        - _target_: sam3.train.transforms.point_sampling.RandomizeInputBbox
+          box_noise_std: 0.1
+          box_noise_max: 20
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes:
+            _target_: sam3.train.transforms.basic.get_random_resize_scales
+            size: ${scratch.resolution}
+            min_size: 480
+            rounded: false
+          max_size:
+            _target_: sam3.train.transforms.basic.get_random_resize_max_size
+            size: ${scratch.resolution}
+          square: true
+          consistent_transform: ${scratch.consistent_transform}
+        - _target_: sam3.train.transforms.basic_for_api.PadToSizeAPI
+          size: ${scratch.resolution}
+          consistent_transform: ${scratch.consistent_transform}
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+          query_filter:
+            _target_: sam3.train.transforms.filter_query_transforms.FilterEmptyTargets
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.train_norm_mean}
+          std: ${scratch.train_norm_std}
+        - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+          query_filter:
+            _target_: sam3.train.transforms.filter_query_transforms.FilterEmptyTargets
+    - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+      query_filter:
+        _target_: sam3.train.transforms.filter_query_transforms.FilterFindQueriesWithTooManyOut
+        max_num_objects: ${scratch.max_ann_per_img}
+
+  # Validation transforms pipeline
+  val_transforms:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}
+          max_size:
+            _target_: sam3.train.transforms.basic.get_random_resize_max_size
+            size: ${scratch.resolution}
+          square: true
+          consistent_transform: False
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.train_norm_mean}
+          std: ${scratch.train_norm_std}
+
+  # loss config (no mask loss)
+  loss:
+    _target_: sam3.train.loss.sam3_loss.Sam3LossWrapper
+    matcher: ${scratch.matcher}
+    o2m_weight: 2.0
+    o2m_matcher:
+      _target_: sam3.train.matcher.BinaryOneToManyMatcher
+      alpha: 0.3
+      threshold: 0.4
+      topk: 4
+    use_o2m_matcher_on_o2m_aux: false # Another option is true
+    loss_fns_find:
+      - _target_: sam3.train.loss.loss_fns.Boxes
+        weight_dict:
+          loss_bbox: 5.0
+          loss_giou: 2.0
+      - _target_: sam3.train.loss.loss_fns.IABCEMdetr
+        weak_loss: False
+        weight_dict:
+          loss_ce: 20.0 # Another option is 100.0
+          presence_loss: 20.0
+        pos_weight: 10.0 # Another option is 5.0
+        alpha: 0.25
+        gamma: 2
+        use_presence: True  # Change
+        pos_focal: false
+        pad_n_queries: 200
+        pad_scale_pos: 1.0
+
+    loss_fn_semantic_seg: null
+    scale_by_find_batch_size: ${scratch.scale_by_find_batch_size}
+
+
+  # NOTE: Loss to be used for training in case of segmentation
+  # loss:
+  #   _target_: sam3.train.loss.sam3_loss.Sam3LossWrapper
+  #   matcher: ${scratch.matcher}
+  #   o2m_weight: 2.0
+  #   o2m_matcher:
+  #     _target_: sam3.train.matcher.BinaryOneToManyMatcher
+  #     alpha: 0.3
+  #     threshold: 0.4
+  #     topk: 4
+  #   use_o2m_matcher_on_o2m_aux: false
+  #   loss_fns_find:
+  #     - _target_: sam3.train.loss.loss_fns.Boxes
+  #       weight_dict:
+  #         loss_bbox: 5.0
+  #         loss_giou: 2.0
+  #     - _target_: sam3.train.loss.loss_fns.IABCEMdetr
+  #       weak_loss: False
+  #       weight_dict:
+  #         loss_ce: 20.0 # Another option is 100.0
+  #         presence_loss: 20.0
+  #       pos_weight: 10.0 # Another option is 5.0
+  #       alpha: 0.25
+  #       gamma: 2
+  #       use_presence: True  # Change
+  #       pos_focal: false
+  #       pad_n_queries: 200
+  #       pad_scale_pos: 1.0
+  #     - _target_: sam3.train.loss.loss_fns.Masks
+  #       focal_alpha: 0.25
+  #       focal_gamma: 2.0
+  #       weight_dict:
+  #         loss_mask: 200.0
+  #         loss_dice: 10.0
+  #       compute_aux: false
+  #   loss_fn_semantic_seg:
+  #     _target_: sam3.losses.loss_fns.SemanticSegCriterion
+  #     presence_head: True
+  #     presence_loss: False  # Change
+  #     focal: True
+  #     focal_alpha: 0.6
+  #     focal_gamma: 2.0
+  #     downsample: False
+  #     weight_dict:
+  #       loss_semantic_seg: 20.0
+  #       loss_semantic_presence: 1.0
+  #       loss_semantic_dice: 30.0
+  #   scale_by_find_batch_size: ${scratch.scale_by_find_batch_size}
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  enable_segmentation: False # NOTE: This is the number of queries used for segmentation
+  # Model parameters
+  d_model: 256
+  pos_embed:
+    _target_: sam3.model.position_encoding.PositionEmbeddingSine
+    num_pos_feats: ${scratch.d_model}
+    normalize: true
+    scale: null
+    temperature: 10000
+
+  # Box processing
+  use_presence_eval: True
+  original_box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1  # infinite detections
+    use_original_ids: true
+    use_original_sizes_box: true
+    use_presence: ${scratch.use_presence_eval}
+
+  # Matcher configuration
+  matcher:
+    _target_: sam3.train.matcher.BinaryHungarianMatcherV2
+    focal: true  # with `focal: true` it is equivalent to BinaryFocalHungarianMatcher
+    cost_class: 2.0
+    cost_bbox: 5.0
+    cost_giou: 2.0
+    alpha: 0.25
+    gamma: 2
+    stable: False
+  scale_by_find_batch_size: True
+
+  # Image processing parameters
+  resolution: 1008
+  consistent_transform: False
+  max_ann_per_img: 200
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  # Training parameters
+  num_train_workers: 10
+  num_val_workers: 0
+  max_data_epochs: 20
+  target_epoch_size: 1500
+  hybrid_repeats: 1
+  context_length: 2
+  gather_pred_via_filesys: false
+
+  # Learning rate and scheduler parameters
+  lr_scale: 0.1
+  lr_transformer: ${times:8e-4,${scratch.lr_scale}}
+  lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
+  lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
+  lrd_vision_backbone: 0.9
+  wd: 0.1
+  scheduler_timescale: 20
+  scheduler_warmup: 20
+  scheduler_cooldown: 20
+
+  val_batch_size: 1
+  collate_fn_val:
+    _target_: sam3.train.data.collator.collate_fn_api
+    _partial_: true
+    repeats: ${scratch.hybrid_repeats}
+    dict_key: roboflow100
+    with_seg_masks: ${scratch.enable_segmentation} # Note: Set this to true if using segmentation masks!
+
+  gradient_accumulation_steps: 1
+  train_batch_size: 1
+  collate_fn:
+    _target_: sam3.train.data.collator.collate_fn_api
+    _partial_: true
+    repeats: ${scratch.hybrid_repeats}
+    dict_key: all
+    with_seg_masks: ${scratch.enable_segmentation} # Note: Set this to true if using segmentation masks!
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: 20
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+  gradient_accumulation_steps: ${scratch.gradient_accumulation_steps}
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all: ${roboflow_train.loss}
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        limit_ids: ${roboflow_train.num_images}
+        transforms: ${roboflow_train.train_transforms}
+        load_segmentation: ${scratch.enable_segmentation}
+        max_ann_per_img: 500000
+        multiplier: 1
+        max_train_queries: 50000
+        max_val_queries: 50000
+        training: true
+        use_caching: False
+        img_folder: ${paths.roboflow_vl_100_root}/${roboflow_train.supercategory}/train/
+        ann_file: ${paths.roboflow_vl_100_root}/${roboflow_train.supercategory}/train/_annotations.coco.json
+
+      shuffle: True
+      batch_size: ${scratch.train_batch_size}
+      num_workers: ${scratch.num_train_workers}
+      pin_memory: True
+      drop_last: True
+      collate_fn: ${scratch.collate_fn}
+
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        load_segmentation: ${scratch.enable_segmentation}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
+          include_negatives: true
+          category_chunk_size: 2 # Note: You can increase this based on the memory of your GPU.
+          _partial_: true
+        img_folder: ${paths.roboflow_vl_100_root}/${roboflow_train.supercategory}/test/
+        ann_file: ${paths.roboflow_vl_100_root}/${roboflow_train.supercategory}/test/_annotations.coco.json
+        transforms: ${roboflow_train.val_transforms}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn: ${scratch.collate_fn_val}
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_image_model
+    bpe_path: ${paths.bpe_path}
+    device: cpus
+    eval_mode: true
+    enable_segmentation: ${scratch.enable_segmentation} # Warning: Enable this if using segmentation.
+
+  meters:
+    val:
+      roboflow100:
+        detection:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "bbox"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/roboflow/${roboflow_train.supercategory}
+          merge_predictions: True
+          postprocessor: ${scratch.original_box_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 100
+          pred_file_evaluators:
+            - _target_: sam3.eval.coco_eval_offline.CocoEvaluatorOfflineWithPredFileEvaluators
+              gt_path: ${paths.roboflow_vl_100_root}/${roboflow_train.supercategory}/test/_annotations.coco.json
+              tide: False
+              iou_type: "bbox"
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+    optimizer:
+      _target_: torch.optim.AdamW
+
+    gradient_clip:
+      _target_: sam3.train.optim.optimizer.GradientClipper
+      max_norm: 0.1
+      norm_type: 2
+
+    param_group_modifiers:
+      - _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
+        _partial_: True
+        layer_decay_value: ${scratch.lrd_vision_backbone}
+        apply_to: 'backbone.vision_backbone.trunk'
+        overrides:
+          - pattern: '*pos_embed*'
+            value: 1.0
+
+    options:
+      lr:
+        - scheduler:  # transformer and class_embed
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_transformer}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+        - scheduler:
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_vision_backbone}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+          param_names:
+            - 'backbone.vision_backbone.*'
+        - scheduler:
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_language_backbone}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+          param_names:
+            - 'backbone.language_backbone.*'
+
+      weight_decay:
+        - scheduler:
+            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
+            value: ${scratch.wd}
+        - scheduler:
+            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
+            value: 0.0
+          param_names:
+            - '*bias*'
+          module_cls_names: ['torch.nn.LayerNorm']
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/${roboflow_train.supercategory}
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 1
+  gpus_per_node: 2
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
+  # Uncomment for job array configuration
+  job_array:
+    num_tasks: 100
+    task_index: 0
+
+# ============================================================================
+# Available Roboflow Supercategories (for reference)
+# ============================================================================
+
+all_roboflow_supercategories:
+  - -grccs
+  - zebrasatasturias
+  - cod-mw-warzone
+  - canalstenosis
+  - label-printing-defect-version-2
+  - new-defects-in-wood
+  - orionproducts
+  - aquarium-combined
+  - varroa-mites-detection--test-set
+  - clashroyalechardetector
+  - stomata-cells
+  - halo-infinite-angel-videogame
+  - pig-detection
+  - urine-analysis1
+  - aerial-sheep
+  - orgharvest
+  - actions
+  - mahjong
+  - liver-disease
+  - needle-base-tip-min-max
+  - wheel-defect-detection
+  - aircraft-turnaround-dataset
+  - xray
+  - wildfire-smoke
+  - spinefrxnormalvindr
+  - ufba-425
+  - speech-bubbles-detection
+  - train
+  - pill
+  - truck-movement
+  - car-logo-detection
+  - inbreast
+  - sea-cucumbers-new-tiles
+  - uavdet-small
+  - penguin-finder-seg
+  - aerial-airport
+  - bibdetection
+  - taco-trash-annotations-in-context
+  - bees
+  - recode-waste
+  - screwdetectclassification
+  - wine-labels
+  - aerial-cows
+  - into-the-vale
+  - gwhd2021
+  - lacrosse-object-detection
+  - defect-detection
+  - dataconvert
+  - x-ray-id
+  - ball
+  - tube
+  - 2024-frc
+  - crystal-clean-brain-tumors-mri-dataset
+  - grapes-5
+  - human-detection-in-floods
+  - buoy-onboarding
+  - apoce-aerial-photographs-for-object-detection-of-construction-equipment
+  - l10ul502
+  - floating-waste
+  - deeppcb
+  - ism-band-packet-detection
+  - weeds4
+  - invoice-processing
+  - thermal-cheetah
+  - tomatoes-2
+  - marine-sharks
+  - peixos-fish
+  - sssod
+  - aerial-pool
+  - countingpills
+  - asphaltdistressdetection
+  - roboflow-trained-dataset
+  - everdaynew
+  - underwater-objects
+  - soda-bottles
+  - dentalai
+  - jellyfish
+  - deepfruits
+  - activity-diagrams
+  - circuit-voltages
+  - all-elements
+  - macro-segmentation
+  - exploratorium-daphnia
+  - signatures
+  - conveyor-t-shirts
+  - fruitjes
+  - grass-weeds
+  - infraredimageofpowerequipment
+  - 13-lkc01
+  - wb-prova
+  - flir-camera-objects
+  - paper-parts
+  - football-player-detection
+  - trail-camera
+  - smd-components
+  - water-meter
+  - nih-xray
+  - the-dreidel-project
+  - electric-pylon-detection-in-rsi
+  - cable-damage
diff --git a/sam3/train/configs/roboflow_v100/roboflow_v100_full_ft_100_images.yaml b/sam3/train/configs/roboflow_v100/roboflow_v100_full_ft_100_images.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..6b95f628431298ec637a9b29c892bb0e2599bd80
--- /dev/null
+++ b/sam3/train/configs/roboflow_v100/roboflow_v100_full_ft_100_images.yaml
@@ -0,0 +1,539 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+  roboflow_vl_100_root: <YOUR_DATASET_DIR>
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+
+# Roboflow dataset configuration
+roboflow_train:
+  num_images: 100 # Note: This is the number of images used for training. If null, all images are used.
+  supercategory: ${all_roboflow_supercategories.${string:${submitit.job_array.task_index}}}
+
+  # Training transforms pipeline
+  train_transforms:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+          query_filter:
+            _target_: sam3.train.transforms.filter_query_transforms.FilterCrowds
+        - _target_: sam3.train.transforms.point_sampling.RandomizeInputBbox
+          box_noise_std: 0.1
+          box_noise_max: 20
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes:
+            _target_: sam3.train.transforms.basic.get_random_resize_scales
+            size: ${scratch.resolution}
+            min_size: 480
+            rounded: false
+          max_size:
+            _target_: sam3.train.transforms.basic.get_random_resize_max_size
+            size: ${scratch.resolution}
+          square: true
+          consistent_transform: ${scratch.consistent_transform}
+        - _target_: sam3.train.transforms.basic_for_api.PadToSizeAPI
+          size: ${scratch.resolution}
+          consistent_transform: ${scratch.consistent_transform}
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+          query_filter:
+            _target_: sam3.train.transforms.filter_query_transforms.FilterEmptyTargets
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.train_norm_mean}
+          std: ${scratch.train_norm_std}
+        - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+          query_filter:
+            _target_: sam3.train.transforms.filter_query_transforms.FilterEmptyTargets
+    - _target_: sam3.train.transforms.filter_query_transforms.FlexibleFilterFindGetQueries
+      query_filter:
+        _target_: sam3.train.transforms.filter_query_transforms.FilterFindQueriesWithTooManyOut
+        max_num_objects: ${scratch.max_ann_per_img}
+
+  # Validation transforms pipeline
+  val_transforms:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}
+          max_size:
+            _target_: sam3.train.transforms.basic.get_random_resize_max_size
+            size: ${scratch.resolution}
+          square: true
+          consistent_transform: False
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.train_norm_mean}
+          std: ${scratch.train_norm_std}
+
+  # loss config (no mask loss)
+  loss:
+    _target_: sam3.train.loss.sam3_loss.Sam3LossWrapper
+    matcher: ${scratch.matcher}
+    o2m_weight: 2.0
+    o2m_matcher:
+      _target_: sam3.train.matcher.BinaryOneToManyMatcher
+      alpha: 0.3
+      threshold: 0.4
+      topk: 4
+    use_o2m_matcher_on_o2m_aux: false # Another option is true
+    loss_fns_find:
+      - _target_: sam3.train.loss.loss_fns.Boxes
+        weight_dict:
+          loss_bbox: 5.0
+          loss_giou: 2.0
+      - _target_: sam3.train.loss.loss_fns.IABCEMdetr
+        weak_loss: False
+        weight_dict:
+          loss_ce: 20.0 # Another option is 100.0
+          presence_loss: 20.0
+        pos_weight: 10.0 # Another option is 5.0
+        alpha: 0.25
+        gamma: 2
+        use_presence: True  # Change
+        pos_focal: false
+        pad_n_queries: 200
+        pad_scale_pos: 1.0
+
+    loss_fn_semantic_seg: null
+    scale_by_find_batch_size: ${scratch.scale_by_find_batch_size}
+
+
+  # NOTE: Loss to be used for training in case of segmentation
+  # loss:
+  #   _target_: sam3.train.loss.sam3_loss.Sam3LossWrapper
+  #   matcher: ${scratch.matcher}
+  #   o2m_weight: 2.0
+  #   o2m_matcher:
+  #     _target_: sam3.train.matcher.BinaryOneToManyMatcher
+  #     alpha: 0.3
+  #     threshold: 0.4
+  #     topk: 4
+  #   use_o2m_matcher_on_o2m_aux: false
+  #   loss_fns_find:
+  #     - _target_: sam3.train.loss.loss_fns.Boxes
+  #       weight_dict:
+  #         loss_bbox: 5.0
+  #         loss_giou: 2.0
+  #     - _target_: sam3.train.loss.loss_fns.IABCEMdetr
+  #       weak_loss: False
+  #       weight_dict:
+  #         loss_ce: 20.0 # Another option is 100.0
+  #         presence_loss: 20.0
+  #       pos_weight: 10.0 # Another option is 5.0
+  #       alpha: 0.25
+  #       gamma: 2
+  #       use_presence: True  # Change
+  #       pos_focal: false
+  #       pad_n_queries: 200
+  #       pad_scale_pos: 1.0
+  #     - _target_: sam3.train.loss.loss_fns.Masks
+  #       focal_alpha: 0.25
+  #       focal_gamma: 2.0
+  #       weight_dict:
+  #         loss_mask: 200.0
+  #         loss_dice: 10.0
+  #       compute_aux: false
+  #   loss_fn_semantic_seg:
+  #     _target_: sam3.losses.loss_fns.SemanticSegCriterion
+  #     presence_head: True
+  #     presence_loss: False  # Change
+  #     focal: True
+  #     focal_alpha: 0.6
+  #     focal_gamma: 2.0
+  #     downsample: False
+  #     weight_dict:
+  #       loss_semantic_seg: 20.0
+  #       loss_semantic_presence: 1.0
+  #       loss_semantic_dice: 30.0
+  #   scale_by_find_batch_size: ${scratch.scale_by_find_batch_size}
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  enable_segmentation: False # NOTE: This is the number of queries used for segmentation
+  # Model parameters
+  d_model: 256
+  pos_embed:
+    _target_: sam3.model.position_encoding.PositionEmbeddingSine
+    num_pos_feats: ${scratch.d_model}
+    normalize: true
+    scale: null
+    temperature: 10000
+
+  # Box processing
+  use_presence_eval: True
+  original_box_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessImage
+    max_dets_per_img: -1  # infinite detections
+    use_original_ids: true
+    use_original_sizes_box: true
+    use_presence: ${scratch.use_presence_eval}
+
+  # Matcher configuration
+  matcher:
+    _target_: sam3.train.matcher.BinaryHungarianMatcherV2
+    focal: true  # with `focal: true` it is equivalent to BinaryFocalHungarianMatcher
+    cost_class: 2.0
+    cost_bbox: 5.0
+    cost_giou: 2.0
+    alpha: 0.25
+    gamma: 2
+    stable: False
+  scale_by_find_batch_size: True
+
+  # Image processing parameters
+  resolution: 1008
+  consistent_transform: False
+  max_ann_per_img: 200
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  # Training parameters
+  num_train_workers: 10
+  num_val_workers: 0
+  max_data_epochs: 20
+  target_epoch_size: 1500
+  hybrid_repeats: 1
+  context_length: 2
+  gather_pred_via_filesys: false
+
+  # Learning rate and scheduler parameters
+  lr_scale: 0.1
+  lr_transformer: ${times:8e-4,${scratch.lr_scale}}
+  lr_vision_backbone: ${times:2.5e-4,${scratch.lr_scale}}
+  lr_language_backbone: ${times:5e-5,${scratch.lr_scale}}
+  lrd_vision_backbone: 0.9
+  wd: 0.1
+  scheduler_timescale: 20
+  scheduler_warmup: 20
+  scheduler_cooldown: 20
+
+  val_batch_size: 1
+  collate_fn_val:
+    _target_: sam3.train.data.collator.collate_fn_api
+    _partial_: true
+    repeats: ${scratch.hybrid_repeats}
+    dict_key: roboflow100
+    with_seg_masks: ${scratch.enable_segmentation} # Note: Set this to true if using segmentation masks!
+
+  gradient_accumulation_steps: 1
+  train_batch_size: 1
+  collate_fn:
+    _target_: sam3.train.data.collator.collate_fn_api
+    _partial_: true
+    repeats: ${scratch.hybrid_repeats}
+    dict_key: all
+    with_seg_masks: ${scratch.enable_segmentation} # Note: Set this to true if using segmentation masks!
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: 20
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: train
+  gradient_accumulation_steps: ${scratch.gradient_accumulation_steps}
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all: ${roboflow_train.loss}
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        limit_ids: ${roboflow_train.num_images}
+        transforms: ${roboflow_train.train_transforms}
+        load_segmentation: ${scratch.enable_segmentation}
+        max_ann_per_img: 500000
+        multiplier: 1
+        max_train_queries: 50000
+        max_val_queries: 50000
+        training: true
+        use_caching: False
+        img_folder: ${paths.roboflow_vl_100_root}/${roboflow_train.supercategory}/train/
+        ann_file: ${paths.roboflow_vl_100_root}/${roboflow_train.supercategory}/train/_annotations.coco.json
+
+      shuffle: True
+      batch_size: ${scratch.train_batch_size}
+      num_workers: ${scratch.num_train_workers}
+      pin_memory: True
+      drop_last: True
+      collate_fn: ${scratch.collate_fn}
+
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        load_segmentation: ${scratch.enable_segmentation}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.COCO_FROM_JSON
+          include_negatives: true
+          category_chunk_size: 2 # Note: You can increase this based on the memory of your GPU.
+          _partial_: true
+        img_folder: ${paths.roboflow_vl_100_root}/${roboflow_train.supercategory}/test/
+        ann_file: ${paths.roboflow_vl_100_root}/${roboflow_train.supercategory}/test/_annotations.coco.json
+        transforms: ${roboflow_train.val_transforms}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn: ${scratch.collate_fn_val}
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_image_model
+    bpe_path: ${paths.bpe_path}
+    device: cpus
+    eval_mode: false
+    enable_segmentation: ${scratch.enable_segmentation} # Warning: Enable this if using segmentation.
+
+  meters:
+    val:
+      roboflow100:
+        detection:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "bbox"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/roboflow/${roboflow_train.supercategory}
+          merge_predictions: True
+          postprocessor: ${scratch.original_box_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 100
+          pred_file_evaluators:
+            - _target_: sam3.eval.coco_eval_offline.CocoEvaluatorOfflineWithPredFileEvaluators
+              gt_path: ${paths.roboflow_vl_100_root}/${roboflow_train.supercategory}/test/_annotations.coco.json
+              tide: False
+              iou_type: "bbox"
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+    optimizer:
+      _target_: torch.optim.AdamW
+
+    gradient_clip:
+      _target_: sam3.train.optim.optimizer.GradientClipper
+      max_norm: 0.1
+      norm_type: 2
+
+    param_group_modifiers:
+      - _target_: sam3.train.optim.optimizer.layer_decay_param_modifier
+        _partial_: True
+        layer_decay_value: ${scratch.lrd_vision_backbone}
+        apply_to: 'backbone.vision_backbone.trunk'
+        overrides:
+          - pattern: '*pos_embed*'
+            value: 1.0
+
+    options:
+      lr:
+        - scheduler:  # transformer and class_embed
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_transformer}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+        - scheduler:
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_vision_backbone}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+          param_names:
+            - 'backbone.vision_backbone.*'
+        - scheduler:
+            _target_: sam3.train.optim.schedulers.InverseSquareRootParamScheduler
+            base_lr: ${scratch.lr_language_backbone}
+            timescale: ${scratch.scheduler_timescale}
+            warmup_steps: ${scratch.scheduler_warmup}
+            cooldown_steps: ${scratch.scheduler_cooldown}
+          param_names:
+            - 'backbone.language_backbone.*'
+
+      weight_decay:
+        - scheduler:
+            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
+            value: ${scratch.wd}
+        - scheduler:
+            _target_: fvcore.common.param_scheduler.ConstantParamScheduler
+            value: 0.0
+          param_names:
+            - '*bias*'
+          module_cls_names: ['torch.nn.LayerNorm']
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/${roboflow_train.supercategory}
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 1
+  gpus_per_node: 2
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
+  # Uncomment for job array configuration
+  job_array:
+    num_tasks: 100
+    task_index: 0
+
+# ============================================================================
+# Available Roboflow Supercategories (for reference)
+# ============================================================================
+
+all_roboflow_supercategories:
+  - -grccs
+  - zebrasatasturias
+  - cod-mw-warzone
+  - canalstenosis
+  - label-printing-defect-version-2
+  - new-defects-in-wood
+  - orionproducts
+  - aquarium-combined
+  - varroa-mites-detection--test-set
+  - clashroyalechardetector
+  - stomata-cells
+  - halo-infinite-angel-videogame
+  - pig-detection
+  - urine-analysis1
+  - aerial-sheep
+  - orgharvest
+  - actions
+  - mahjong
+  - liver-disease
+  - needle-base-tip-min-max
+  - wheel-defect-detection
+  - aircraft-turnaround-dataset
+  - xray
+  - wildfire-smoke
+  - spinefrxnormalvindr
+  - ufba-425
+  - speech-bubbles-detection
+  - train
+  - pill
+  - truck-movement
+  - car-logo-detection
+  - inbreast
+  - sea-cucumbers-new-tiles
+  - uavdet-small
+  - penguin-finder-seg
+  - aerial-airport
+  - bibdetection
+  - taco-trash-annotations-in-context
+  - bees
+  - recode-waste
+  - screwdetectclassification
+  - wine-labels
+  - aerial-cows
+  - into-the-vale
+  - gwhd2021
+  - lacrosse-object-detection
+  - defect-detection
+  - dataconvert
+  - x-ray-id
+  - ball
+  - tube
+  - 2024-frc
+  - crystal-clean-brain-tumors-mri-dataset
+  - grapes-5
+  - human-detection-in-floods
+  - buoy-onboarding
+  - apoce-aerial-photographs-for-object-detection-of-construction-equipment
+  - l10ul502
+  - floating-waste
+  - deeppcb
+  - ism-band-packet-detection
+  - weeds4
+  - invoice-processing
+  - thermal-cheetah
+  - tomatoes-2
+  - marine-sharks
+  - peixos-fish
+  - sssod
+  - aerial-pool
+  - countingpills
+  - asphaltdistressdetection
+  - roboflow-trained-dataset
+  - everdaynew
+  - underwater-objects
+  - soda-bottles
+  - dentalai
+  - jellyfish
+  - deepfruits
+  - activity-diagrams
+  - circuit-voltages
+  - all-elements
+  - macro-segmentation
+  - exploratorium-daphnia
+  - signatures
+  - conveyor-t-shirts
+  - fruitjes
+  - grass-weeds
+  - infraredimageofpowerequipment
+  - 13-lkc01
+  - wb-prova
+  - flir-camera-objects
+  - paper-parts
+  - football-player-detection
+  - trail-camera
+  - smd-components
+  - water-meter
+  - nih-xray
+  - the-dreidel-project
+  - electric-pylon-detection-in-rsi
+  - cable-damage
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_sav_test.yaml b/sam3/train/configs/saco_video_evals/saco_veval_sav_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b5bed477a1d6fa5e54797db0177eb08eb279d2e5
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_sav_test.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_sav_test
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_sav_test.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: True
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_sav_test_noheur.yaml b/sam3/train/configs/saco_video_evals/saco_veval_sav_test_noheur.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..abc3289c6cb6606e7ec7607d167bccf04150751a
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_sav_test_noheur.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_sav_test
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_sav_test.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: False
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_sav_val.yaml b/sam3/train/configs/saco_video_evals/saco_veval_sav_val.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..25c6e609217a637e3633e8d21d503cdb56e0f720
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_sav_val.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_sav_val
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_sav_val.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: True
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_sav_val_noheur.yaml b/sam3/train/configs/saco_video_evals/saco_veval_sav_val_noheur.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..9a89ebaa3eb295534a8d09e17b4f6003cadb8a12
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_sav_val_noheur.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_sav_val
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_sav_val.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: False
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_test.yaml b/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..fdc818538143bbd69b11df026cb072cc4a385483
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_test.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_smartglasses_test
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_smartglasses_test.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: True
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_test_noheur.yaml b/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_test_noheur.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..2d6150e5cf9adde269a98061bff2332fb4f73866
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_test_noheur.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_smartglasses_test
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_smartglasses_test.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: False
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_val.yaml b/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_val.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..a9bffda589381a9785eecf5cef4b3958a7840816
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_val.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_smartglasses_val
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_smartglasses_val.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: True
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_val_noheur.yaml b/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_val_noheur.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e1f64436d606cbd6fc82f103397e147f3e65345f
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_smartglasses_val_noheur.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_smartglasses_val
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_smartglasses_val.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: False
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_yt1b_test.yaml b/sam3/train/configs/saco_video_evals/saco_veval_yt1b_test.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..71e50347e0ffa390bac299385d8be93e6bdffedc
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_yt1b_test.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_yt1b_test
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_yt1b_test.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: True
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_yt1b_test_noheur.yaml b/sam3/train/configs/saco_video_evals/saco_veval_yt1b_test_noheur.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..f8df6aeec0b574a767255b027aa68a8336c60e8e
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_yt1b_test_noheur.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_yt1b_test
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_yt1b_test.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: False
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_yt1b_val.yaml b/sam3/train/configs/saco_video_evals/saco_veval_yt1b_val.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5816952ebb253b098b5ebe0a4e41edf4ffed57ee
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_yt1b_val.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_yt1b_val
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_yt1b_val.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: True
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/saco_video_evals/saco_veval_yt1b_val_noheur.yaml b/sam3/train/configs/saco_video_evals/saco_veval_yt1b_val_noheur.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..374e5e22b91003b2c8bcbcf6f7dc9462ec3038c3
--- /dev/null
+++ b/sam3/train/configs/saco_video_evals/saco_veval_yt1b_val_noheur.yaml
@@ -0,0 +1,174 @@
+# @package _global_
+defaults:
+  - _self_
+
+# ============================================================================
+# Paths Configuration (Chage this to your own paths)
+# ============================================================================
+paths:
+
+  dump_file_name: saco_veval_yt1b_val
+  experiment_log_dir: <YOUR EXPERIMENET LOG_DIR>
+  ytvis_json: <YOUR_GT_PATH>/saco_veval_yt1b_val.json
+  ytvis_dir : <YOUR_VIDEO_JPG_DIR>
+  bpe_path: <BPE_PATH> # This should be under assets/bpe_simple_vocab_16e6.txt.gz
+  num_videos: null
+
+# ============================================================================
+# Different helper parameters and functions
+# ============================================================================
+scratch:
+  vid_mask_postprocessor:
+    _target_: sam3.eval.postprocessors.PostProcessNullOp
+
+  use_presence_eval: True
+
+  video_transforms_val:
+    - _target_: sam3.train.transforms.basic_for_api.ComposeAPI
+      transforms:
+        - _target_: sam3.train.transforms.segmentation.DecodeRle
+        # resize the image to 1024x1024 resolution
+        - _target_: sam3.train.transforms.basic_for_api.RandomResizeAPI
+          sizes: ${scratch.resolution}  # originally `resolution: 1024`
+          square: true
+          consistent_transform: true
+        - _target_: sam3.train.transforms.basic_for_api.ToTensorAPI
+        - _target_: sam3.train.transforms.basic_for_api.NormalizeAPI
+          mean: ${scratch.val_norm_mean}
+          std: ${scratch.val_norm_std}
+
+  # Model parameters
+  d_model: 256
+
+  # Image processing parameters
+  resolution: 1008
+
+  # Normalization parameters
+  train_norm_mean: [0.5, 0.5, 0.5]
+  train_norm_std: [0.5, 0.5, 0.5]
+  val_norm_mean: [0.5, 0.5, 0.5]
+  val_norm_std: [0.5, 0.5, 0.5]
+
+  val_batch_size: 1
+  num_val_workers: 0
+  max_data_epochs: 20
+  hybrid_repeats: 1
+  gather_pred_via_filesys: false
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  _target_: sam3.train.trainer.Trainer
+  skip_saving_ckpts: true
+  empty_gpu_mem_cache_after_eval: True
+  skip_first_val: True
+  max_epochs: ${scratch.max_data_epochs}
+  accelerator: cuda
+  seed_value: 123
+  val_epoch_freq: 10
+  mode: val
+
+  distributed:
+    backend: nccl
+    find_unused_parameters: True
+    gradient_as_bucket_view: True
+
+  loss:
+    all:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+    default:
+      _target_: sam3.train.loss.sam3_loss.DummyLoss
+
+  data:
+    train: null
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_video_dataset.VideoGroundingDataset
+        limit_ids: ${paths.num_videos}
+        img_folder: ${paths.ytvis_dir}
+        ann_file: ${paths.ytvis_json}
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_VEVAL_API_FROM_JSON_NP
+          _partial_: true
+
+        transforms: ${scratch.video_transforms_val}
+        max_ann_per_img: 100000  # filtered in transforms
+        max_val_queries: 100000
+        multiplier: 1
+        load_segmentation: true
+        training: false
+
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: True
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: ytvis_val
+        with_seg_masks: true
+
+
+  model:
+    _target_: sam3.model_builder.build_sam3_video_model
+    bpe_path: ${paths.bpe_path}
+    has_presence_token: True
+    geo_encoder_use_img_cross_attn: True
+    apply_temporal_disambiguation: False
+
+  meters:
+    val:
+      ytvis_val:
+        pred_file: # key
+          _target_: sam3.eval.ytvis_eval.YTVISResultsWriter
+          dump_file: ${launcher.experiment_log_dir}/preds/${paths.dump_file_name}.json
+          postprocessor: ${scratch.vid_mask_postprocessor}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+
+  optim:
+    amp:
+      enabled: True
+      amp_dtype: bfloat16
+
+
+  checkpoint:
+    save_dir: ${launcher.experiment_log_dir}/checkpoints
+    save_freq: 0  # 0 only last checkpoint is saved.
+
+
+  logging:
+    tensorboard_writer:
+      _target_: sam3.train.utils.logger.make_tensorboard_logger
+      log_dir: ${launcher.experiment_log_dir}/tensorboard
+      flush_secs: 120
+      should_log: True
+    wandb_writer: null
+    log_dir: ${launcher.experiment_log_dir}/logs/
+    log_freq: 10
+
+# ============================================================================
+# Launcher and Submitit Configuration
+# ============================================================================
+
+launcher:
+  num_nodes: 8
+  gpus_per_node: 8
+  experiment_log_dir: ${paths.experiment_log_dir}
+  multiprocessing_context: forkserver
+
+submitit:
+  account: null
+  partition: null
+  qos: null
+  timeout_hour: 72
+  use_cluster: True
+  cpus_per_task: 10
+  port_range: [10000, 65000]
+  constraint: null
diff --git a/sam3/train/configs/silver_image_evals/sam3_silver_image_bdd100k.yaml b/sam3/train/configs/silver_image_evals/sam3_silver_image_bdd100k.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..e5587cfb76237bfa6db8b5467632b2691f876cdf
--- /dev/null
+++ b/sam3/train/configs/silver_image_evals/sam3_silver_image_bdd100k.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/silver_bdd100k/
+  coco_gt: ${paths.base_annotation_path_silver}/silver_bdd100k_merged_test.json
+  img_path: ${paths.silver_img_path}/bdd100k/
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: silver_bdd100k
+
+  meters:
+    val:
+      silver_bdd100k: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/silver_bdd100k
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "segm"
diff --git a/sam3/train/configs/silver_image_evals/sam3_silver_image_droid.yaml b/sam3/train/configs/silver_image_evals/sam3_silver_image_droid.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..c0d62341ba915d5a04f9fc4d88d057aed15848f7
--- /dev/null
+++ b/sam3/train/configs/silver_image_evals/sam3_silver_image_droid.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/silver_droid/
+  coco_gt: ${paths.base_annotation_path_silver}/silver_droid_merged_test.json
+  img_path: ${paths.silver_img_path}/droid/
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: silver_droid
+
+  meters:
+    val:
+      silver_droid: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/silver_droid
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "segm"
diff --git a/sam3/train/configs/silver_image_evals/sam3_silver_image_ego4d.yaml b/sam3/train/configs/silver_image_evals/sam3_silver_image_ego4d.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5a036d93d44a093755462cf748b2ed66a1e8a4f
--- /dev/null
+++ b/sam3/train/configs/silver_image_evals/sam3_silver_image_ego4d.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/silver_ego4d/
+  coco_gt: ${paths.base_annotation_path_silver}/silver_ego4d_merged_test.json
+  img_path: ${paths.silver_img_path}/ego4d/
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: silver_ego4d
+
+  meters:
+    val:
+      silver_ego4d: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/silver_ego4d
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "segm"
diff --git a/sam3/train/configs/silver_image_evals/sam3_silver_image_fathomnet.yaml b/sam3/train/configs/silver_image_evals/sam3_silver_image_fathomnet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b15d0c82328171d8ed0c9d4b52c35477813ca389
--- /dev/null
+++ b/sam3/train/configs/silver_image_evals/sam3_silver_image_fathomnet.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/silver_fathomnet/
+  coco_gt: ${paths.base_annotation_path_silver}/silver_fathomnet_test.json
+  img_path: ${paths.silver_img_path}/fathomnet/
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: silver_fathomnet
+
+  meters:
+    val:
+      silver_fathomnet: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/silver_fathomnet
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "segm"
diff --git a/sam3/train/configs/silver_image_evals/sam3_silver_image_food_rec.yaml b/sam3/train/configs/silver_image_evals/sam3_silver_image_food_rec.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..5158ff551e5d2babb1100ba1978e0da4613bac8d
--- /dev/null
+++ b/sam3/train/configs/silver_image_evals/sam3_silver_image_food_rec.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/silver_food_rec/
+  coco_gt: ${paths.base_annotation_path_silver}/silver_food_rec_merged_test.json
+  img_path: ${paths.silver_img_path}/food_rec/
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: silver_food_rec
+
+  meters:
+    val:
+      silver_food_rec: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/silver_food_rec
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "segm"
diff --git a/sam3/train/configs/silver_image_evals/sam3_silver_image_geode.yaml b/sam3/train/configs/silver_image_evals/sam3_silver_image_geode.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..08f159fe9bc80072d8bd4a95f911bc70a555588d
--- /dev/null
+++ b/sam3/train/configs/silver_image_evals/sam3_silver_image_geode.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/silver_geode/
+  coco_gt: ${paths.base_annotation_path_silver}/silver_geode_merged_test.json
+  img_path: ${paths.silver_img_path}/geode/
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: silver_geode
+
+  meters:
+    val:
+      silver_geode: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/silver_geode
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "segm"
diff --git a/sam3/train/configs/silver_image_evals/sam3_silver_image_inaturalist.yaml b/sam3/train/configs/silver_image_evals/sam3_silver_image_inaturalist.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..1d56d9758d8cb5711911b60278fe454c975a8456
--- /dev/null
+++ b/sam3/train/configs/silver_image_evals/sam3_silver_image_inaturalist.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/silver_inaturalist/
+  coco_gt: ${paths.base_annotation_path_silver}/silver_inaturalist_merged_test.json
+  img_path: ${paths.silver_img_path}/inaturalist/
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: silver_inaturalist
+
+  meters:
+    val:
+      silver_inaturalist: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/silver_inaturalist
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "segm"
diff --git a/sam3/train/configs/silver_image_evals/sam3_silver_image_nga.yaml b/sam3/train/configs/silver_image_evals/sam3_silver_image_nga.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..b2de0afed4289272ca36634a911a7a1d38e03aa3
--- /dev/null
+++ b/sam3/train/configs/silver_image_evals/sam3_silver_image_nga.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/silver_nga_art/
+  coco_gt: ${paths.base_annotation_path_silver}/silver_nga_art_merged_test.json
+  img_path: ${paths.silver_img_path}/nga/
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: silver_nga_art
+
+  meters:
+    val:
+      silver_nga_art: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/silver_nga_art
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "segm"
diff --git a/sam3/train/configs/silver_image_evals/sam3_silver_image_sav.yaml b/sam3/train/configs/silver_image_evals/sam3_silver_image_sav.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..7ebbb0f2bbee9b06221a46d362b8ca719bed9b4b
--- /dev/null
+++ b/sam3/train/configs/silver_image_evals/sam3_silver_image_sav.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/silver_sav/
+  coco_gt: ${paths.base_annotation_path_silver}/silver_sav_merged_test.json
+  img_path: ${paths.silver_img_path}/sav/
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: silver_sav
+
+  meters:
+    val:
+      silver_sav: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/silver_sav
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "segm"
diff --git a/sam3/train/configs/silver_image_evals/sam3_silver_image_yt1b.yaml b/sam3/train/configs/silver_image_evals/sam3_silver_image_yt1b.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..901bd3a050f4041364b36299fa01648ce576d0af
--- /dev/null
+++ b/sam3/train/configs/silver_image_evals/sam3_silver_image_yt1b.yaml
@@ -0,0 +1,64 @@
+# @package _global_
+defaults:
+  - /configs/eval_base.yaml
+  - _self_
+
+# ============================================================================
+# Paths Configuration (you can override here, but it shouldn't require further changes if eval_base.yaml is correct
+# ============================================================================
+paths:
+  experiment_log_dir: ${paths.base_experiment_log_dir}/silver_yt1b/
+  coco_gt: ${paths.base_annotation_path_silver}/silver_yt1b_merged_test.json
+  img_path: ${paths.silver_img_path}/yt1b/
+
+
+
+# ============================================================================
+# Trainer Configuration
+# ============================================================================
+
+trainer:
+  data:
+    val:
+      _target_: sam3.train.data.torch_dataset.TorchDataset
+      dataset:
+        _target_: sam3.train.data.sam3_image_dataset.Sam3ImageDataset
+        coco_json_loader:
+          _target_: sam3.train.data.coco_json_loaders.SAM3_EVAL_API_FROM_JSON_NP
+          _partial_: true
+        img_folder: ${paths.img_path}
+        ann_file: ${paths.coco_gt}
+        transforms: ${scratch.base_val_transform}
+        max_ann_per_img: 100000
+        multiplier: 1
+        training: false
+
+      shuffle: False
+      batch_size: ${scratch.val_batch_size}
+      num_workers: ${scratch.num_val_workers}
+      pin_memory: False
+      drop_last: False
+      collate_fn:
+        _target_: sam3.train.data.collator.collate_fn_api
+        _partial_: true
+        repeats: ${scratch.hybrid_repeats}
+        dict_key: silver_yt1b
+
+  meters:
+    val:
+      silver_yt1b: # this key matches the "dict_key" in the dataloader's collate function
+        cgf1:
+          _target_: sam3.eval.coco_writer.PredictionDumper
+          iou_type: "segm"
+          dump_dir: ${launcher.experiment_log_dir}/dumps/silver_yt1b
+          merge_predictions: True
+          postprocessor: ${scratch.mask_postprocessor_thresholded}
+          gather_pred_via_filesys: ${scratch.gather_pred_via_filesys}
+          maxdets: 1000000 # no limit
+          pred_file_evaluators:
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "bbox"
+            - _target_: sam3.eval.cgf1_eval.CGF1Evaluator
+              gt_path: ${paths.coco_gt}
+              iou_type: "segm"
diff --git a/sam3/train/data/__init__.py b/sam3/train/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d37d2aaef52ec7de01999516a2b8c3e1fa4986
--- /dev/null
+++ b/sam3/train/data/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
diff --git a/sam3/train/data/coco_json_loaders.py b/sam3/train/data/coco_json_loaders.py
new file mode 100644
index 0000000000000000000000000000000000000000..4bdf4878f9e95f765184d804ec65d7cce69160ef
--- /dev/null
+++ b/sam3/train/data/coco_json_loaders.py
@@ -0,0 +1,465 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import json
+from collections import defaultdict
+from typing import Dict, List, Tuple
+
+import torch
+from pycocotools import mask as mask_util
+
+
+# ============================================================================
+# Utility Functions
+# ============================================================================
+
+
+def convert_boxlist_to_normalized_tensor(box_list, image_width, image_height):
+    """
+    Converts a list of bounding boxes to a normalized PyTorch tensor.
+
+    Args:
+        box_list (list of list or tuples): Each box is [x_min, y_min, x_max, y_max].
+        image_width (int or float): Width of the image.
+        image_height (int or float): Height of the image.
+
+    Returns:
+        torch.Tensor: Normalized tensor of shape (N, 4), values in [0, 1].
+    """
+    boxes = torch.tensor(box_list, dtype=torch.float32)
+    boxes[:, [0, 2]] /= image_width  # x_min, x_max
+    boxes[:, [1, 3]] /= image_height  # y_min, y_max
+    boxes = boxes.clamp(0, 1)
+    return boxes
+
+
+def load_coco_and_group_by_image(json_path: str) -> Tuple[List[Dict], Dict[int, str]]:
+    """
+    Load COCO JSON file and group annotations by image.
+
+    Args:
+        json_path (str): Path to COCO JSON file.
+
+    Returns:
+        Tuple containing:
+            - List of dicts with 'image' and 'annotations' keys
+            - Dict mapping category IDs to category names
+    """
+    with open(json_path, "r") as f:
+        coco = json.load(f)
+
+    images = {img["id"]: img for img in coco["images"]}
+
+    anns_by_image = defaultdict(list)
+    for ann in coco["annotations"]:
+        anns_by_image[ann["image_id"]].append(ann)
+
+    sorted_image_ids = sorted(images.keys())
+
+    grouped = []
+    for image_id in sorted_image_ids:
+        image_info = images[image_id]
+        grouped.append(
+            {"image": image_info, "annotations": anns_by_image.get(image_id, [])}
+        )
+
+    cat_id_to_name = {cat["id"]: cat["name"] for cat in coco["categories"]}
+
+    return grouped, cat_id_to_name
+
+
+def ann_to_rle(segm, im_info: Dict) -> Dict:
+    """
+    Convert annotation which can be polygons or uncompressed RLE to RLE.
+
+    Args:
+        segm: Segmentation data (polygon list or RLE dict)
+        im_info (dict): Image info containing 'height' and 'width'
+
+    Returns:
+        RLE encoded segmentation
+    """
+    h, w = im_info["height"], im_info["width"]
+
+    if isinstance(segm, list):
+        # Polygon - merge all parts into one mask RLE code
+        rles = mask_util.frPyObjects(segm, h, w)
+        rle = mask_util.merge(rles)
+    elif isinstance(segm["counts"], list):
+        # Uncompressed RLE
+        rle = mask_util.frPyObjects(segm, h, w)
+    else:
+        # Already RLE
+        rle = segm
+
+    return rle
+
+
+# ============================================================================
+# COCO Training API
+# ============================================================================
+
+
+class COCO_FROM_JSON:
+    """
+    COCO training API for loading box-only annotations from JSON.
+    Groups all annotations per image and creates queries per category.
+    """
+
+    def __init__(
+        self,
+        annotation_file,
+        prompts=None,
+        include_negatives=True,
+        category_chunk_size=None,
+    ):
+        """
+        Initialize the COCO training API.
+
+        Args:
+            annotation_file (str): Path to COCO JSON annotation file
+            prompts: Optional custom prompts for categories
+            include_negatives (bool): Whether to include negative examples (categories with no instances)
+        """
+        self._raw_data, self._cat_idx_to_text = load_coco_and_group_by_image(
+            annotation_file
+        )
+        self._sorted_cat_ids = sorted(list(self._cat_idx_to_text.keys()))
+        self.prompts = None
+        self.include_negatives = include_negatives
+        self.category_chunk_size = (
+            category_chunk_size
+            if category_chunk_size is not None
+            else len(self._sorted_cat_ids)
+        )
+        self.category_chunks = [
+            self._sorted_cat_ids[i : i + self.category_chunk_size]
+            for i in range(0, len(self._sorted_cat_ids), self.category_chunk_size)
+        ]
+        if prompts is not None:
+            prompts = eval(prompts)
+            self.prompts = {}
+            for loc_dict in prompts:
+                self.prompts[int(loc_dict["id"])] = loc_dict["name"]
+            assert len(self.prompts) == len(
+                self._sorted_cat_ids
+            ), "Number of prompts must match number of categories"
+
+    def getDatapointIds(self):
+        """Return all datapoint indices for training."""
+        return list(range(len(self._raw_data) * len(self.category_chunks)))
+
+    def loadQueriesAndAnnotationsFromDatapoint(self, idx):
+        """
+        Load queries and annotations for a specific datapoint.
+
+        Args:
+            idx (int): Datapoint index
+
+        Returns:
+            Tuple of (queries, annotations) lists
+        """
+        img_idx = idx // len(self.category_chunks)
+        chunk_idx = idx % len(self.category_chunks)
+        cat_chunk = self.category_chunks[chunk_idx]
+
+        queries = []
+        annotations = []
+
+        query_template = {
+            "id": None,
+            "original_cat_id": None,
+            "object_ids_output": None,
+            "query_text": None,
+            "query_processing_order": 0,
+            "ptr_x_query_id": None,
+            "ptr_y_query_id": None,
+            "image_id": 0,  # Single image per datapoint
+            "input_box": None,
+            "input_box_label": None,
+            "input_points": None,
+            "is_exhaustive": True,
+        }
+
+        annot_template = {
+            "image_id": 0,
+            "bbox": None,  # Normalized bbox in xywh
+            "area": None,  # Unnormalized area
+            "segmentation": None,  # RLE encoded
+            "object_id": None,
+            "is_crowd": None,
+            "id": None,
+        }
+
+        raw_annotations = self._raw_data[img_idx]["annotations"]
+        image_info = self._raw_data[img_idx]["image"]
+        width, height = image_info["width"], image_info["height"]
+
+        # Group annotations by category
+        cat_id_to_anns = defaultdict(list)
+        for ann in raw_annotations:
+            cat_id_to_anns[ann["category_id"]].append(ann)
+
+        annotations_by_cat_sorted = [
+            (cat_id, cat_id_to_anns[cat_id]) for cat_id in cat_chunk
+        ]
+
+        for cat_id, anns in annotations_by_cat_sorted:
+            if len(anns) == 0 and not self.include_negatives:
+                continue
+
+            cur_ann_ids = []
+
+            # Create annotations for this category
+            for ann in anns:
+                annotation = annot_template.copy()
+                annotation["id"] = len(annotations)
+                annotation["object_id"] = annotation["id"]
+                annotation["is_crowd"] = ann["iscrowd"]
+
+                normalized_boxes = convert_boxlist_to_normalized_tensor(
+                    [ann["bbox"]], width, height
+                )
+                bbox = normalized_boxes[0]
+
+                annotation["area"] = (bbox[2] * bbox[3]).item()
+                annotation["bbox"] = bbox
+
+                if (
+                    "segmentation" in ann
+                    and ann["segmentation"] is not None
+                    and ann["segmentation"] != []
+                ):
+                    annotation["segmentation"] = ann_to_rle(
+                        ann["segmentation"], im_info=image_info
+                    )
+
+                annotations.append(annotation)
+                cur_ann_ids.append(annotation["id"])
+
+            # Create query for this category
+            query = query_template.copy()
+            query["id"] = len(queries)
+            query["original_cat_id"] = cat_id
+            query["query_text"] = (
+                self._cat_idx_to_text[cat_id]
+                if self.prompts is None
+                else self.prompts[cat_id]
+            )
+            query["object_ids_output"] = cur_ann_ids
+            queries.append(query)
+
+        return queries, annotations
+
+    def loadImagesFromDatapoint(self, idx):
+        """
+        Load image information for a specific datapoint.
+
+        Args:
+            idx (int): Datapoint index
+
+        Returns:
+            List containing image info dict
+        """
+        img_idx = idx // len(self.category_chunks)
+        img_data = self._raw_data[img_idx]["image"]
+        images = [
+            {
+                "id": 0,
+                "file_name": img_data["file_name"],
+                "original_img_id": img_data["id"],
+                "coco_img_id": img_data["id"],
+            }
+        ]
+        return images
+
+
+# ============================================================================
+# SAM3 Evaluation APIs
+# ============================================================================
+
+
+class SAM3_EVAL_API_FROM_JSON_NP:
+    """
+    SAM3 evaluation API for loading noun phrase queries from JSON.
+    """
+
+    def __init__(self, annotation_file):
+        """
+        Initialize the SAM3 evaluation API.
+
+        Args:
+            annotation_file (str): Path to SAM3 JSON annotation file
+        """
+        with open(annotation_file, "r") as f:
+            data = json.load(f)
+        self._image_data = data["images"]
+
+    def getDatapointIds(self):
+        """Return all datapoint indices."""
+        return list(range(len(self._image_data)))
+
+    def loadQueriesAndAnnotationsFromDatapoint(self, idx):
+        """
+        Load queries and annotations for a specific datapoint.
+
+        Args:
+            idx (int): Datapoint index
+
+        Returns:
+            Tuple of (queries, annotations) lists
+        """
+        cur_img_data = self._image_data[idx]
+        queries = []
+        annotations = []
+
+        query_template = {
+            "id": None,
+            "original_cat_id": None,
+            "object_ids_output": None,
+            "query_text": None,
+            "query_processing_order": 0,
+            "ptr_x_query_id": None,
+            "ptr_y_query_id": None,
+            "image_id": 0,
+            "input_box": None,
+            "input_box_label": None,
+            "input_points": None,
+            "is_exhaustive": True,
+        }
+
+        # Create query
+        query = query_template.copy()
+        query["id"] = len(queries)
+        query["original_cat_id"] = int(cur_img_data["queried_category"])
+        query["query_text"] = cur_img_data["text_input"]
+        query["object_ids_output"] = []
+        queries.append(query)
+
+        return queries, annotations
+
+    def loadImagesFromDatapoint(self, idx):
+        """
+        Load image information for a specific datapoint.
+
+        Args:
+            idx (int): Datapoint index
+
+        Returns:
+            List containing image info dict
+        """
+        img_data = self._image_data[idx]
+        images = [
+            {
+                "id": 0,
+                "file_name": img_data["file_name"],
+                "original_img_id": img_data["id"],
+                "coco_img_id": img_data["id"],
+            }
+        ]
+        return images
+
+
+class SAM3_VEVAL_API_FROM_JSON_NP:
+    """
+    SAM3 video evaluation API for loading noun phrase queries from JSON.
+    """
+
+    def __init__(self, annotation_file):
+        """
+        Initialize the SAM3 video evaluation API.
+
+        Args:
+            annotation_file (str): Path to SAM3 video JSON annotation file
+        """
+        with open(annotation_file, "r") as f:
+            data = json.load(f)
+
+        assert "video_np_pairs" in data, "Incorrect data format"
+
+        self._video_data = data["videos"]
+        self._video_id_to_np_ids = defaultdict(list)
+        self._cat_id_to_np = {}
+
+        for cat_dict in data["categories"]:
+            self._cat_id_to_np[cat_dict["id"]] = cat_dict["name"]
+
+        for video_np_dict in data["video_np_pairs"]:
+            self._video_id_to_np_ids[video_np_dict["video_id"]].append(
+                video_np_dict["category_id"]
+            )
+            assert (
+                self._cat_id_to_np[video_np_dict["category_id"]]
+                == video_np_dict["noun_phrase"]
+            ), "Category name does not match text input"
+
+    def getDatapointIds(self):
+        """Return all datapoint indices."""
+        return list(range(len(self._video_data)))
+
+    def loadQueriesAndAnnotationsFromDatapoint(self, idx):
+        """
+        Load queries and annotations for a specific video datapoint.
+
+        Args:
+            idx (int): Datapoint index
+
+        Returns:
+            Tuple of (queries, annotations) lists
+        """
+        cur_vid_data = self._video_data[idx]
+        queries = []
+        annotations = []
+
+        query_template = {
+            "id": None,
+            "original_cat_id": None,
+            "object_ids_output": None,
+            "query_text": None,
+            "query_processing_order": 0,
+            "ptr_x_query_id": None,
+            "ptr_y_query_id": None,
+            "image_id": 0,
+            "input_box": None,
+            "input_box_label": None,
+            "input_points": None,
+            "is_exhaustive": True,
+        }
+
+        all_np_ids = self._video_id_to_np_ids[cur_vid_data["id"]]
+
+        for np_id in all_np_ids:
+            text_input = self._cat_id_to_np[np_id]
+
+            for i, image_path in enumerate(cur_vid_data["file_names"]):
+                query = query_template.copy()
+                query["id"] = len(queries)
+                query["original_cat_id"] = np_id
+                query["query_text"] = text_input
+                query["image_id"] = i
+                query["query_processing_order"] = i
+                query["object_ids_output"] = []
+                queries.append(query)
+
+        return queries, annotations
+
+    def loadImagesFromDatapoint(self, idx):
+        """
+        Load image information for a specific video datapoint.
+
+        Args:
+            idx (int): Datapoint index
+
+        Returns:
+            List containing image info dicts for all frames
+        """
+        video_data = self._video_data[idx]
+        images = [
+            {
+                "id": i,
+                "file_name": file_name,
+                "original_img_id": video_data["id"],
+                "coco_img_id": video_data["id"],
+            }
+            for i, file_name in enumerate(video_data["file_names"])
+        ]
+        return images
diff --git a/sam3/train/data/collator.py b/sam3/train/data/collator.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0f7e2e265a36b4a08bffeecc28df8139167f2d4
--- /dev/null
+++ b/sam3/train/data/collator.py
@@ -0,0 +1,360 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from dataclasses import dataclass, field as field_ptr_behaviour, fields, is_dataclass
+from typing import Any, get_args, get_origin, List, Union
+
+import torch
+
+from sam3.model.data_misc import (
+    BatchedDatapoint,
+    BatchedFindTarget,
+    BatchedInferenceMetadata,
+    FindStage,
+)
+
+from .sam3_image_dataset import Datapoint
+
+
+MyTensor = Union[torch.Tensor, List[Any]]
+
+
+def convert_my_tensors(obj):
+    def is_optional_field(field) -> bool:
+        return get_origin(field) is Union and type(None) in get_args(field)
+
+    for field in fields(obj):
+        if is_dataclass(getattr(obj, field.name)):
+            convert_my_tensors(getattr(obj, field.name))
+            continue
+
+        field_type = field.type
+        if is_optional_field(field.type):
+            field_type = Union[get_args(field.type)[:-1]]  # Get the Optional field type
+
+        if field_type != MyTensor or getattr(obj, field.name) is None:
+            continue
+
+        elif len(getattr(obj, field.name)) and isinstance(
+            getattr(obj, field.name)[0], torch.Tensor
+        ):
+            stack_dim = 0
+            if field.name in [
+                "input_boxes",
+                "input_boxes_label",
+            ]:
+                stack_dim = 1
+            setattr(
+                obj,
+                field.name,
+                torch.stack(getattr(obj, field.name), dim=stack_dim).to(
+                    getattr(obj, field.name + "__type")
+                ),
+            )
+        else:
+            setattr(
+                obj,
+                field.name,
+                torch.as_tensor(
+                    getattr(obj, field.name), dtype=getattr(obj, field.name + "__type")
+                ),
+            )
+    return obj
+
+
+def packed_to_padded_naive(boxes_packed, num_boxes, fill_value=0):
+    """
+    Convert a packed tensor of bounding boxes to a padded tensor of bounding
+    boxes. Naive implementation using a loop.
+
+    Inputs:
+    - boxes_packed: Tensor of shape (N_1 + ... + N_B, 4)
+    - num_boxes: Tensor of shape (B,) where num_boxes[i] = N_i
+
+    Returns:
+    - boxes_padded: Tensor of shape (B, N_max, 4) where N_max = max_i N_i
+    """
+    B = num_boxes.shape[0]
+    Ns = num_boxes.tolist()
+
+    boxes_padded = boxes_packed.new_zeros(B, max(Ns), *boxes_packed.shape[1:])
+    if fill_value != 0:
+        boxes_padded[...] = fill_value
+    prev_idx = 0
+    for i in range(B):
+        next_idx = prev_idx + Ns[i]
+        boxes_padded[i, : Ns[i]] = boxes_packed[prev_idx:next_idx]
+        prev_idx = next_idx
+    return boxes_padded
+
+
+def pad_tensor_list_to_longest(
+    tensors: List[torch.Tensor], dim=0, pad_val=0
+) -> List[torch.Tensor]:
+    # Edits the list in-place
+    if not tensors:
+        return tensors
+    pad_len = max(t.shape[dim] for t in tensors)
+    for i in range(len(tensors)):
+        n_dims = len(tensors[i].shape)
+        n_right_dims = (n_dims - 1) - (n_dims + dim) % n_dims
+        n_pad = pad_len - tensors[i].shape[dim]
+        pad_tuple = tuple([0] * 2 * n_right_dims + [0, n_pad])
+        tensors[i] = torch.nn.functional.pad(tensors[i], pad_tuple, value=pad_val)
+    return tensors
+
+
+def collate_fn_api_with_chunking(
+    batch,
+    num_chunks,
+    dict_key,
+    with_seg_masks=False,
+    input_points_embedding_dim=257,
+    repeats: int = 0,
+    load_image_in_fp16: bool = False,
+):
+    assert num_chunks >= 1, "num_chunks must be >= 1"
+
+    # split the batch into num_chunks chunks
+    batch_chunks = [batch[i::num_chunks] for i in range(num_chunks)]
+
+    # collate each chunk
+    collated_chunks = [
+        collate_fn_api(
+            chunk,
+            dict_key,
+            with_seg_masks,
+            input_points_embedding_dim,
+            repeats,
+            # ptr_behaviour,
+            load_image_in_fp16,
+        )
+        for chunk in batch_chunks
+    ]
+    return collated_chunks
+
+
+def collate_fn_api(
+    batch: List[Datapoint],
+    dict_key,
+    with_seg_masks=False,
+    input_points_embedding_dim=257,
+    repeats: int = 0,
+    load_image_in_fp16: bool = False,
+):
+    # img_batch = torch.stack(sum([[img.data for img in v.images] for v in batch], []))
+    img_batch = []
+    text_batch = []
+    raw_images = None
+
+    num_stages = (
+        max(q.query_processing_order for data in batch for q in data.find_queries) + 1
+    )
+
+    stages = [
+        FindStage(
+            img_ids=[],
+            text_ids=[],
+            input_boxes=[],
+            input_boxes_label=[],
+            input_boxes_mask=[],
+            input_points=[],
+            input_points_mask=[],
+            object_ids=[],
+        )
+        for _ in range(num_stages)
+    ]
+    find_targets = [
+        BatchedFindTarget(
+            num_boxes=[],
+            boxes=[],
+            boxes_padded=[],
+            is_exhaustive=[],
+            segments=[],
+            semantic_segments=[],
+            is_valid_segment=[],
+            repeated_boxes=[],
+            object_ids=[],
+            object_ids_padded=[],
+        )
+        for _ in range(num_stages)
+    ]
+    find_metadatas = [
+        BatchedInferenceMetadata(
+            coco_image_id=[],
+            original_size=[],
+            object_id=[],
+            frame_index=[],
+            original_image_id=[],
+            original_category_id=[],
+            is_conditioning_only=[],
+        )
+        for _ in range(num_stages)
+    ]
+
+    offset_img_id = 0
+    offset_query_id = [0 for _ in range(num_stages)]
+    for i, data in enumerate(batch):
+        img_batch.extend([img.data for img in data.images])
+
+        if data.raw_images is not None:
+            if raw_images is None:
+                raw_images = []
+            raw_images.extend(data.raw_images)
+
+        # Conversion of query_ids indexing in a datapoint to query_ids indexing in a stage
+        datapoint_query_id_2_stage_query_id = []
+        for q in data.find_queries:
+            stage_id = q.query_processing_order
+            datapoint_query_id_2_stage_query_id.append(offset_query_id[stage_id])
+            offset_query_id[stage_id] += 1
+
+        for j, q in enumerate(data.find_queries):
+            stage_id = q.query_processing_order
+            stages[stage_id].img_ids.append(q.image_id + offset_img_id)
+            if q.query_text not in text_batch:
+                text_batch.append(q.query_text)
+            stages[stage_id].text_ids.append(text_batch.index(q.query_text))
+
+            assert (
+                q.inference_metadata is not None
+            ), "inference_metadata must be provided when FindQueryLoaded is created."
+            for f in fields(q.inference_metadata):
+                getattr(find_metadatas[stage_id], f.name).append(
+                    getattr(q.inference_metadata, f.name)
+                )
+
+            if q.input_bbox is not None:
+                assert q.input_bbox.numel() % 4 == 0
+                assert q.input_bbox_label is not None
+                nb_boxes = q.input_bbox.numel() // 4
+                assert len(q.input_bbox_label) == nb_boxes
+                stages[stage_id].input_boxes.append(q.input_bbox.view(nb_boxes, 4))
+                stages[stage_id].input_boxes_label.append(
+                    q.input_bbox_label.view(nb_boxes)
+                )
+                stages[stage_id].input_boxes_mask.append(
+                    torch.zeros(nb_boxes, dtype=torch.bool)
+                )
+            else:
+                stages[stage_id].input_boxes.append(torch.zeros(0, 4))
+                stages[stage_id].input_boxes_label.append(
+                    torch.zeros(0, dtype=torch.bool)
+                )
+                stages[stage_id].input_boxes_mask.append(
+                    torch.ones(0, dtype=torch.bool)
+                )
+
+            if q.input_points is not None:
+                stages[stage_id].input_points.append(
+                    q.input_points.squeeze(0)  # Strip a trivial batch index
+                )
+                # All masks will be padded up to the longest length
+                # with 1s before final conversion to batchd tensors
+                stages[stage_id].input_points_mask.append(
+                    torch.zeros(q.input_points.shape[1])
+                )
+            else:
+                stages[stage_id].input_points.append(
+                    torch.empty(0, input_points_embedding_dim)
+                )
+                stages[stage_id].input_points_mask.append(torch.empty(0))
+
+            current_out_boxes = []
+            current_out_object_ids = []
+            # Set the object ids referred to by this query
+            stages[stage_id].object_ids.append(q.object_ids_output)
+            for object_id in q.object_ids_output:
+                current_out_boxes.append(
+                    data.images[q.image_id].objects[object_id].bbox
+                )
+                current_out_object_ids.append(object_id)
+            find_targets[stage_id].boxes.extend(current_out_boxes)
+            find_targets[stage_id].object_ids.extend(current_out_object_ids)
+            if repeats > 0:
+                for _ in range(repeats):
+                    find_targets[stage_id].repeated_boxes.extend(current_out_boxes)
+            find_targets[stage_id].num_boxes.append(len(current_out_boxes))
+            find_targets[stage_id].is_exhaustive.append(q.is_exhaustive)
+
+            if with_seg_masks:
+                current_seg_mask = []
+                current_is_valid_segment = []
+                for object_id in q.object_ids_output:
+                    seg_mask = data.images[q.image_id].objects[object_id].segment
+                    if seg_mask is not None:
+                        current_seg_mask.append(seg_mask)
+                        current_is_valid_segment.append(1)
+                    else:
+                        dummy_mask = torch.zeros(
+                            data.images[q.image_id].data.shape[-2:], dtype=torch.bool
+                        )
+                        current_seg_mask.append(dummy_mask)
+                        current_is_valid_segment.append(0)
+                find_targets[stage_id].segments.extend(current_seg_mask)
+                find_targets[stage_id].is_valid_segment.extend(current_is_valid_segment)
+            else:
+                # We are not loading segmentation masks
+                find_targets[stage_id].segments = None
+                find_targets[stage_id].is_valid_segment = None
+
+            if q.semantic_target is not None:
+                find_targets[stage_id].semantic_segments.append(q.semantic_target)
+
+        offset_img_id += len(data.images)
+
+    # Pad input points to equal sequence lengths
+    for i in range(len(stages)):
+        stages[i].input_points = pad_tensor_list_to_longest(
+            stages[i].input_points, dim=0, pad_val=0
+        )
+        # Masked-out regions indicated by 1s.
+        stages[i].input_points_mask = pad_tensor_list_to_longest(
+            stages[i].input_points_mask, dim=0, pad_val=1
+        )
+
+    # Pad input boxes to equal sequence lengths
+    for i in range(len(stages)):
+        stages[i].input_boxes = pad_tensor_list_to_longest(
+            stages[i].input_boxes, dim=0, pad_val=0
+        )
+        stages[i].input_boxes_label = pad_tensor_list_to_longest(
+            stages[i].input_boxes_label, dim=0, pad_val=0
+        )
+        # Masked-out regions indicated by 1s.
+        stages[i].input_boxes_mask = pad_tensor_list_to_longest(
+            stages[i].input_boxes_mask, dim=0, pad_val=1
+        )
+
+    # Convert to tensors
+    for i in range(len(stages)):
+        stages[i] = convert_my_tensors(stages[i])
+        find_targets[i] = convert_my_tensors(find_targets[i])
+        find_metadatas[i] = convert_my_tensors(find_metadatas[i])
+        # get padded representation for the boxes
+        find_targets[i].boxes_padded = packed_to_padded_naive(
+            find_targets[i].boxes.view(-1, 4), find_targets[i].num_boxes
+        )
+        find_targets[i].object_ids_padded = packed_to_padded_naive(
+            find_targets[i].object_ids, find_targets[i].num_boxes, fill_value=-1
+        )
+
+    # Finalize the image batch
+    # check sizes
+    for img in img_batch[1:]:
+        assert img.shape == img_batch[0].shape, "All images must have the same size"
+    image_batch = torch.stack(img_batch)
+    if load_image_in_fp16:
+        # Optionally, cast the image tensors to fp16, which helps save GPU memory on
+        # long videos with thousands of frames (where image tensors could be several GBs)
+        image_batch = image_batch.half()
+
+    return {
+        dict_key: BatchedDatapoint(
+            img_batch=image_batch,
+            find_text_batch=text_batch,
+            find_inputs=stages,
+            find_targets=find_targets,
+            find_metadatas=find_metadatas,
+            raw_images=raw_images,
+        )
+    }
diff --git a/sam3/train/data/sam3_image_dataset.py b/sam3/train/data/sam3_image_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..97efb1d15bb62a7a8d2b04df1e5edaeb958aa235
--- /dev/null
+++ b/sam3/train/data/sam3_image_dataset.py
@@ -0,0 +1,528 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""Dataset class for modulated detection"""
+
+import json
+import os
+import random
+import sys
+import traceback
+from collections import Counter
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
+
+import torch
+import torch.utils.data
+import torchvision
+from decord import cpu, VideoReader
+from iopath.common.file_io import g_pathmgr
+
+from PIL import Image as PILImage
+from PIL.Image import DecompressionBombError
+
+from sam3.model.box_ops import box_xywh_to_xyxy
+from torchvision.datasets.vision import VisionDataset
+
+from .coco_json_loaders import COCO_FROM_JSON
+
+
+@dataclass
+class InferenceMetadata:
+    """Metadata required for postprocessing"""
+
+    # Coco id that corresponds to the "image" for evaluation by the coco evaluator
+    # This is used for our own "class agnostic" evaluation
+    coco_image_id: int
+
+    # id in the original dataset, such that we can use the original evaluator
+    original_image_id: int
+
+    # Original category id (if we want to use the original evaluator)
+    original_category_id: int
+
+    # Size of the raw image (height, width)
+    original_size: Tuple[int, int]
+
+    # Id of the object in the media
+    object_id: int
+
+    # Index of the frame in the media (0 if single image)
+    frame_index: int
+
+    # Whether it is for conditioning only, e.g., 0-th frame in TA is for conditioning
+    # as we assume GT available in frame 0.
+    is_conditioning_only: Optional[bool] = False
+
+
+@dataclass
+class FindQuery:
+    query_text: str
+
+    image_id: int
+
+    # In case of a find query, the list of object ids that have to be predicted
+    object_ids_output: List[int]
+
+    # This is "instance exhaustivity".
+    # true iff all instances are separable and annotated
+    # See below the slightly different "pixel exhaustivity"
+    is_exhaustive: bool
+
+    # The order in which the queries are processed (only meaningful for video)
+    query_processing_order: int = 0
+
+    # Input geometry, initially in denormalized XYXY format. Then
+    # 1. converted to normalized CxCyWH by the Normalize transform
+    input_bbox: Optional[torch.Tensor] = None
+    input_bbox_label: Optional[torch.Tensor] = None
+
+    # Only for the PVS task
+    input_points: Optional[torch.Tensor] = None
+
+    semantic_target: Optional[torch.Tensor] = None
+
+    # pixel exhaustivity: true iff the union of all segments (including crowds)
+    # covers every pixel belonging to the target class
+    # Note that instance_exhaustive implies pixel_exhaustive
+    is_pixel_exhaustive: Optional[bool] = None
+
+
+@dataclass
+class FindQueryLoaded(FindQuery):
+    # Must have default value since FindQuery has entries with default values
+    inference_metadata: Optional[InferenceMetadata] = None
+
+
+@dataclass
+class Object:
+    # Initially in denormalized XYXY format, gets converted to normalized CxCyWH by the Normalize transform
+    bbox: torch.Tensor
+    area: float
+
+    # Id of the object in the media
+    object_id: Optional[int] = -1
+
+    # Index of the frame in the media (0 if single image)
+    frame_index: Optional[int] = -1
+
+    segment: Optional[Union[torch.Tensor, dict]] = None  # RLE dict or binary mask
+
+    is_crowd: bool = False
+
+    source: Optional[str] = None
+
+
+@dataclass
+class Image:
+    data: Union[torch.Tensor, PILImage.Image]
+    objects: List[Object]
+    size: Tuple[int, int]  # (height, width)
+
+    # For blurring augmentation
+    blurring_mask: Optional[Dict[str, Any]] = None
+
+
+@dataclass
+class Datapoint:
+    """Refers to an image/video and all its annotations"""
+
+    find_queries: List[FindQueryLoaded]
+    images: List[Image]
+    raw_images: Optional[List[PILImage.Image]] = None
+
+
+class CustomCocoDetectionAPI(VisionDataset):
+    """`MS Coco Detection <https://cocodataset.org/#detection-2016>`_ Dataset.
+
+    Args:
+        root (string): Root directory where images are downloaded to.
+        annFile (string): Path to json annotation file.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.ToTensor``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        transforms (callable, optional): A function/transform that takes input sample and its target as entry
+            and returns a transformed version.
+    """
+
+    def __init__(
+        self,
+        root: str,
+        annFile: str,
+        load_segmentation: bool,
+        fix_fname: bool = False,
+        training: bool = True,
+        blurring_masks_path: Optional[str] = None,
+        use_caching: bool = True,
+        zstd_dict_path=None,
+        filter_query=None,
+        coco_json_loader: Callable = COCO_FROM_JSON,
+        limit_ids: int = None,
+    ) -> None:
+        super().__init__(root)
+
+        self.annFile = annFile
+        self.use_caching = use_caching
+        self.zstd_dict_path = zstd_dict_path
+
+        self.curr_epoch = 0  # Used in case data loader behavior changes across epochs
+        self.load_segmentation = load_segmentation
+        self.fix_fname = fix_fname
+        self.filter_query = filter_query
+
+        self.coco = None
+        self.coco_json_loader = coco_json_loader
+        self.limit_ids = limit_ids
+        self.set_sharded_annotation_file(0)
+        self.training = training
+        self.blurring_masks_path = blurring_masks_path
+
+    def _load_images(
+        self, datapoint_id: int, img_ids_to_load: Optional[Set[int]] = None
+    ) -> Tuple[List[Tuple[int, PILImage.Image]], List[Dict[str, Any]]]:
+        all_images = []
+        all_img_metadata = []
+        for current_meta in self.coco.loadImagesFromDatapoint(datapoint_id):
+            img_id = current_meta["id"]
+            if img_ids_to_load is not None and img_id not in img_ids_to_load:
+                continue
+            if self.fix_fname:
+                current_meta["file_name"] = current_meta["file_name"].split("/")[-1]
+            path = current_meta["file_name"]
+            if self.blurring_masks_path is not None:
+                mask_fname = os.path.basename(path).replace(".jpg", "-mask.json")
+                mask_path = os.path.join(self.blurring_masks_path, mask_fname)
+                if os.path.exists(mask_path):
+                    with open(mask_path, "r") as fopen:
+                        current_meta["blurring_mask"] = json.load(fopen)
+
+            all_img_metadata.append(current_meta)
+            path = os.path.join(self.root, path)
+            try:
+                if ".mp4" in path and path[-4:] == ".mp4":
+                    # Going to load a video frame
+                    video_path, frame = path.split("@")
+                    video = VideoReader(video_path, ctx=cpu(0))
+                    # Convert to PIL image
+                    all_images.append(
+                        (
+                            img_id,
+                            torchvision.transforms.ToPILImage()(
+                                video[int(frame)].asnumpy()
+                            ),
+                        )
+                    )
+                else:
+                    with g_pathmgr.open(path, "rb") as fopen:
+                        all_images.append((img_id, PILImage.open(fopen).convert("RGB")))
+            except FileNotFoundError as e:
+                print(f"File not found: {path} from dataset: {self.annFile}")
+                raise e
+
+        return all_images, all_img_metadata
+
+    def set_curr_epoch(self, epoch: int):
+        self.curr_epoch = epoch
+
+    def set_epoch(self, epoch: int):
+        pass
+
+    def set_sharded_annotation_file(self, data_epoch: int):
+        if self.coco is not None:
+            return
+
+        assert g_pathmgr.isfile(
+            self.annFile
+        ), f"please provide valid annotation file. Missing: {self.annFile}"
+        annFile = g_pathmgr.get_local_path(self.annFile)
+
+        if self.coco is not None:
+            del self.coco
+
+        self.coco = self.coco_json_loader(annFile)
+        # Use a torch tensor here to optimize memory usage when using several dataloaders
+        ids_list = list(sorted(self.coco.getDatapointIds()))
+        if self.limit_ids is not None:
+            local_random = random.Random(len(ids_list))
+            local_random.shuffle(ids_list)
+            ids_list = ids_list[: self.limit_ids]
+        self.ids = torch.as_tensor(ids_list, dtype=torch.long)
+
+    def __getitem__(self, index: int) -> Datapoint:
+        return self._load_datapoint(index)
+
+    def _load_datapoint(self, index: int) -> Datapoint:
+        """A separate method for easy overriding in subclasses."""
+        id = self.ids[index].item()
+        pil_images, img_metadata = self._load_images(id)
+        queries, annotations = self.coco.loadQueriesAndAnnotationsFromDatapoint(id)
+        return self.load_queries(pil_images, annotations, queries, img_metadata)
+
+    def load_queries(self, pil_images, annotations, queries, img_metadata):
+        """Transform the raw image and queries into a Datapoint sample."""
+        images: List[Image] = []
+        id2index_img = {}
+        id2index_obj = {}
+        id2index_find_query = {}
+        id2imsize = {}
+        assert len(pil_images) == len(img_metadata)
+        for i in range(len(pil_images)):
+            w, h = pil_images[i][1].size
+            blurring_mask = None
+            if "blurring_mask" in img_metadata[i]:
+                blurring_mask = img_metadata[i]["blurring_mask"]
+            images.append(
+                Image(
+                    data=pil_images[i][1],
+                    objects=[],
+                    size=(h, w),
+                    blurring_mask=blurring_mask,
+                )
+            )
+            id2index_img[pil_images[i][0]] = i
+            id2imsize[pil_images[i][0]] = (h, w)
+
+        for annotation in annotations:
+            image_id = id2index_img[annotation["image_id"]]
+            bbox = box_xywh_to_xyxy(torch.as_tensor(annotation["bbox"])).view(1, 4)
+            h, w = id2imsize[annotation["image_id"]]
+            bbox[:, 0::2].mul_(w).clamp_(min=0, max=w)
+            bbox[:, 1::2].mul_(h).clamp_(min=0, max=h)
+            segment = None
+            if self.load_segmentation and "segmentation" in annotation:
+                # We're not decoding the RLE here, a transform will do it lazily later
+                segment = annotation["segmentation"]
+            images[image_id].objects.append(
+                Object(
+                    bbox=bbox[0],
+                    area=annotation["area"],
+                    object_id=(
+                        annotation["object_id"] if "object_id" in annotation else -1
+                    ),
+                    frame_index=(
+                        annotation["frame_index"] if "frame_index" in annotation else -1
+                    ),
+                    segment=segment,
+                    is_crowd=(
+                        annotation["is_crowd"] if "is_crowd" in annotation else None
+                    ),
+                    source=annotation["source"] if "source" in annotation else "",
+                )
+            )
+            id2index_obj[annotation["id"]] = len(images[image_id].objects) - 1
+
+        find_queries = []
+        stage2num_queries = Counter()
+        for i, query in enumerate(queries):
+            stage2num_queries[query["query_processing_order"]] += 1
+            id2index_find_query[query["id"]] = i
+
+        # Sanity check: all the stages should have the same number of queries
+        if len(stage2num_queries) == 0:
+            num_queries_per_stage = 0
+        else:
+            num_queries_per_stage = stage2num_queries.most_common(1)[0][1]
+        for stage, num_queries in stage2num_queries.items():
+            assert (
+                num_queries == num_queries_per_stage
+            ), f"Number of queries in stage {stage} is {num_queries}, expected {num_queries_per_stage}"
+
+        for query_id, query in enumerate(queries):
+            h, w = id2imsize[query["image_id"]]
+            if (
+                "input_box" in query
+                and query["input_box"] is not None
+                and len(query["input_box"]) > 0
+            ):
+                bbox = box_xywh_to_xyxy(torch.as_tensor(query["input_box"])).view(-1, 4)
+                bbox[:, 0::2].mul_(w).clamp_(min=0, max=w)
+                bbox[:, 1::2].mul_(h).clamp_(min=0, max=h)
+                if "input_box_label" in query and query["input_box_label"] is not None:
+                    bbox_label = torch.as_tensor(
+                        query["input_box_label"], dtype=torch.long
+                    ).view(-1)
+                    assert len(bbox_label) == len(bbox)
+                else:
+                    # assume the boxes are positives
+                    bbox_label = torch.ones(len(bbox), dtype=torch.long)
+            else:
+                bbox = None
+                bbox_label = None
+
+            if "input_points" in query and query["input_points"] is not None:
+                points = torch.as_tensor(query["input_points"]).view(1, -1, 3)
+                points[:, :, 0:1].mul_(w).clamp_(min=0, max=w)
+                points[:, :, 1:2].mul_(h).clamp_(min=0, max=h)
+            else:
+                points = None
+
+            try:
+                original_image_id = int(
+                    img_metadata[id2index_img[query["image_id"]]]["original_img_id"]
+                )
+            except ValueError:
+                original_image_id = -1
+
+            try:
+                img_metadata_query = img_metadata[id2index_img[query["image_id"]]]
+                coco_image_id = (
+                    int(img_metadata_query["coco_img_id"])
+                    if "coco_img_id" in img_metadata_query
+                    else query["id"]
+                )
+            except KeyError:
+                coco_image_id = -1
+
+            try:
+                original_category_id = int(query["original_cat_id"])
+            except (ValueError, KeyError):
+                original_category_id = -1
+
+            # For evaluation, we associate the ids of the object to be tracked to the query
+            if query["object_ids_output"]:
+                obj_id = query["object_ids_output"][0]
+                obj_idx = id2index_obj[obj_id]
+                image_idx = id2index_img[query["image_id"]]
+                object_id = images[image_idx].objects[obj_idx].object_id
+                frame_index = images[image_idx].objects[obj_idx].frame_index
+            else:
+                object_id = -1
+                frame_index = -1
+
+            find_queries.append(
+                FindQueryLoaded(
+                    # id=query["id"],
+                    # query_type=qtype,
+                    query_text=(
+                        query["query_text"] if query["query_text"] is not None else ""
+                    ),
+                    image_id=id2index_img[query["image_id"]],
+                    input_bbox=bbox,
+                    input_bbox_label=bbox_label,
+                    input_points=points,
+                    object_ids_output=[
+                        id2index_obj[obj_id] for obj_id in query["object_ids_output"]
+                    ],
+                    is_exhaustive=query["is_exhaustive"],
+                    is_pixel_exhaustive=(
+                        query["is_pixel_exhaustive"]
+                        if "is_pixel_exhaustive" in query
+                        else (
+                            query["is_exhaustive"] if query["is_exhaustive"] else None
+                        )
+                    ),
+                    query_processing_order=query["query_processing_order"],
+                    inference_metadata=InferenceMetadata(
+                        coco_image_id=-1 if self.training else coco_image_id,
+                        original_image_id=(-1 if self.training else original_image_id),
+                        frame_index=frame_index,
+                        original_category_id=original_category_id,
+                        original_size=(h, w),
+                        object_id=object_id,
+                    ),
+                )
+            )
+
+        return Datapoint(
+            find_queries=find_queries,
+            images=images,
+            raw_images=[p[1] for p in pil_images],
+        )
+
+    def __len__(self) -> int:
+        return len(self.ids)
+
+
+class Sam3ImageDataset(CustomCocoDetectionAPI):
+    def __init__(
+        self,
+        img_folder,
+        ann_file,
+        transforms,
+        max_ann_per_img: int,
+        multiplier: int,
+        training: bool,
+        load_segmentation: bool = False,
+        max_train_queries: int = 81,
+        max_val_queries: int = 300,
+        fix_fname: bool = False,
+        is_sharded_annotation_dir: bool = False,
+        blurring_masks_path: Optional[str] = None,
+        use_caching: bool = True,
+        zstd_dict_path=None,
+        filter_query=None,
+        coco_json_loader: Callable = COCO_FROM_JSON,
+        limit_ids: int = None,
+    ):
+        super(Sam3ImageDataset, self).__init__(
+            img_folder,
+            ann_file,
+            fix_fname=fix_fname,
+            load_segmentation=load_segmentation,
+            training=training,
+            blurring_masks_path=blurring_masks_path,
+            use_caching=use_caching,
+            zstd_dict_path=zstd_dict_path,
+            filter_query=filter_query,
+            coco_json_loader=coco_json_loader,
+            limit_ids=limit_ids,
+        )
+
+        self._transforms = transforms
+        self.training = training
+        self.max_ann_per_img = max_ann_per_img
+        self.max_train_queries = max_train_queries
+        self.max_val_queries = max_val_queries
+
+        self.repeat_factors = torch.ones(len(self.ids), dtype=torch.float32)
+
+        self.repeat_factors *= multiplier
+        print(f"Raw dataset length = {len(self.ids)}")
+
+        self._MAX_RETRIES = 100
+
+    def __getitem__(self, idx):
+        return self.__orig_getitem__(idx)
+
+    def __orig_getitem__(self, idx):
+        for _ in range(self._MAX_RETRIES):
+            try:
+                datapoint = super(Sam3ImageDataset, self).__getitem__(idx)
+
+                # This can be done better by filtering the offending find queries
+                # However, this requires care:
+                # - Delete any find/get query that may depend on the deleted one
+                # - Re-compute the indexes in the pointers to account for the deleted finds
+                for q in datapoint.find_queries:
+                    if len(q.object_ids_output) > self.max_ann_per_img:
+                        raise DecompressionBombError(
+                            f"Too many outputs ({len(q.object_ids_output)})"
+                        )
+
+                max_queries = (
+                    self.max_train_queries if self.training else self.max_val_queries
+                )
+
+                if len(datapoint.find_queries) > max_queries:
+                    raise DecompressionBombError(
+                        f"Too many find queries ({len(datapoint.find_queries)})"
+                    )
+
+                if len(datapoint.find_queries) == 0:
+                    raise DecompressionBombError("No find queries")
+                for transform in self._transforms:
+                    datapoint = transform(datapoint, epoch=self.curr_epoch)
+
+                break
+            except (DecompressionBombError, OSError, ValueError) as error:
+                sys.stderr.write(f"ERROR: got loading error on datapoint {idx}\n")
+                sys.stderr.write(f"Exception: {error}\n")
+                sys.stderr.write(traceback.format_exc())
+                idx = (idx + 1) % len(self)
+        else:
+            raise RuntimeError(
+                f"Failed {self._MAX_RETRIES} times trying to load an image."
+            )
+
+        return datapoint
diff --git a/sam3/train/data/sam3_video_dataset.py b/sam3/train/data/sam3_video_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ee8a4f82f1f37e598626ecde31742df9776ea12
--- /dev/null
+++ b/sam3/train/data/sam3_video_dataset.py
@@ -0,0 +1,327 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import copy
+
+import io
+import json
+import logging
+import math
+import os
+import pickle
+import random
+import sys
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
+
+import torch
+import torchvision
+
+# from decord import cpu, VideoReader
+
+from iopath.common.file_io import PathManager
+from PIL import Image as PILImage
+
+from .sam3_image_dataset import Datapoint, Sam3ImageDataset
+
+
+SEED = 42
+
+
+class VideoGroundingDataset(Sam3ImageDataset):
+    def __init__(
+        self,
+        num_stages_sample: int = 4,
+        stage_stride_min: int = 1,
+        stage_stride_max: int = 5,
+        random_reverse_time_axis: bool = True,
+        is_tiling_single_image: bool = False,
+        # By default, we remove find those queries with geometric inputs (input_box or input_points)
+        # when creating synthetic videos from frames (since they are not *video-level* text prompts).
+        # If we need them later, we can sample them on-the-fly via transforms or inside the model.
+        tile_img_keep_find_queries_with_geo_inputs: bool = False,
+        tile_img_keep_get_queries: bool = False,
+        # the maximum number of find queries (for each frame) to keep in a video; if the datapoint
+        # contains more queries per frame than this limit, we subsample them to avoid OOM errors
+        max_query_num: int = -1,  # the default -1 means no limit
+        # whether to override the "is_exhaustive" flag of the loaded find queries to True
+        # (by default, our video datasets are ingested with is_exhaustive=False, since the YTVIS format
+        # annotations doesn't involve an "is_exhaustive" flag; this means that those unmatched (negative)
+        # detection queries or tracking queries do not receive a classification loss given that we have
+        # weak_loss=True in IABCEMdetr -- this could lead to false positives for both image detection
+        # and video association.)
+        override_query_is_exhaustive_to_true: bool = False,
+        # the maximum number of masklets in a video; if the datapoint contains more masklets
+        # than this limit, we skip the datapoint to avoid OOM errors (this is useful for
+        # training with large videos that contain many objects)
+        max_masklet_num_in_video: int = 300,  # 300 masklets is usually OK to avoid OOM
+        **kwargs,
+    ):
+        """
+        Loading video grounding data
+
+        Video frame sampling parameters (for training only):
+        - num_stages_sample: number of frames to sample from the video during training
+        - stage_stride_min: minimum stride between sampled frames during training
+        - stage_stride_max: maximum stride between sampled frames during training (if it's
+          greater than stage_stride_min, the actual stride is sampled uniformly between min
+          and max; during inference, we always use all frames in the video with stride=1)
+        - random_reverse_time_axis: whether to randomly invert the video's temporal axis
+          (i.e. playing it backwards) during training
+        """
+        super().__init__(**kwargs)
+        assert num_stages_sample >= 1
+        assert stage_stride_min >= 1
+        assert stage_stride_max >= stage_stride_min
+        self.num_stages_sample = num_stages_sample
+        self.stage_stride_min = stage_stride_min
+        self.stage_stride_max = stage_stride_max
+        self.random_reverse_time_axis = random_reverse_time_axis
+        self.is_tiling_single_image = is_tiling_single_image
+        self.tile_img_keep_find_queries_with_geo_inputs = (
+            tile_img_keep_find_queries_with_geo_inputs
+        )
+        self.tile_img_keep_get_queries = tile_img_keep_get_queries
+        self.max_query_num = max_query_num
+        self.override_query_is_exhaustive_to_true = override_query_is_exhaustive_to_true
+        self.max_masklet_num_in_video = max_masklet_num_in_video
+        self.rng = random.Random()
+        self.set_curr_epoch(0)
+
+    def set_curr_epoch(self, epoch: int):
+        super().set_curr_epoch(epoch)
+        self.rng.seed(SEED + epoch)
+
+    def _load_datapoint(self, index: int) -> Datapoint:
+        id = self.ids[index].item()
+        queries, annotations = self.coco.loadQueriesAndAnnotationsFromDatapoint(id)
+
+        # we subsample the video frames during training
+        if self.training and not self.is_tiling_single_image:
+            # pick a random stride for sampling query stages (`randint` includes both ends)
+            stage_stride = self.rng.randint(
+                self.stage_stride_min, self.stage_stride_max
+            )
+            stage_ids_to_keep = self._sample_stage_ids(
+                queries, self.num_stages_sample, stage_stride
+            )
+            # filter the queries and annotations to keep only the selected stages
+            # (also remap the stage ids so that they are contiguous and start from 0)
+            reverse_time_axis = (
+                self.rng.random() < 0.5 if self.random_reverse_time_axis else False
+            )
+            queries, annotations, kept_img_ids = self._filter_query_and_anns(
+                queries,
+                annotations,
+                stage_ids_to_keep,
+                remap_stage_id=True,
+                reverse_time_axis=reverse_time_axis,
+            )
+            pil_images, img_metadata = self._load_images(id, kept_img_ids)
+            if reverse_time_axis:
+                # reverse the temporal ordering of the images and their metadata
+                # so that the image order matches the query order
+                pil_images = pil_images[::-1]
+                img_metadata = img_metadata[::-1]
+        else:
+            pil_images, img_metadata = self._load_images(id)
+
+        # check that all the images have the same image size (they are expected
+        # to have the same image size since they are frames from the same video)
+        assert all(p.size == pil_images[0][1].size for _, p in pil_images)
+
+        queries.sort(key=lambda q: q["query_processing_order"])
+        if self.override_query_is_exhaustive_to_true:
+            for query in queries:
+                query["is_exhaustive"] = True
+        datapoint = self.load_queries(pil_images, annotations, queries, img_metadata)
+
+        # skip datapoints with too many masklets to avoid OOM errors
+        num_masklets_in_video = len(datapoint.images[0].objects)
+        if num_masklets_in_video > self.max_masklet_num_in_video > 0:
+            logging.warning(
+                f"Datapoint {id} has ({num_masklets_in_video=}), exceeding "
+                f"the maximum allowed ({self.max_masklet_num_in_video}). "
+                "Skipping this datapoint."
+            )
+            next_index = (index + 1) % len(self)
+            return self._load_datapoint(next_index)  # move to the next datapoint
+
+        if self.is_tiling_single_image:
+            datapoint = self._tile_single_image_data(datapoint, self.num_stages_sample)
+        if self.max_query_num > 0:
+            datapoint = self._subsample_queries(datapoint, self.max_query_num)
+
+        # ensure that all find queries have the same processing order as their image id
+        for query in datapoint.find_queries:
+            assert query.image_id == query.query_processing_order, (
+                f"find query has inconsistent image_id and "
+                f"query_processing_order: {query.image_id=} vs "
+                f"{query.query_processing_order=}"
+            )
+        return datapoint
+
+    def _sample_stage_ids(self, queries, num_stages_sample, stage_stride):
+        """Sample a subset of stage ids from all queries."""
+        # Later we can perhaps turn it into a Sampler class to be more flexible.
+        all_stage_ids = sorted(set(q["query_processing_order"] for q in queries))
+        num_stages_total = len(all_stage_ids)
+        if num_stages_total < num_stages_sample:
+            raise ValueError("Not enough stages to sample")
+
+        # the difference in index between the first and the last sampled stage ids
+        b_e_gap = (num_stages_sample - 1) * stage_stride
+        if b_e_gap > num_stages_total - 1:
+            # In this case, it's not possible to sample with the provide stride,
+            # so we use the maximum possible stride.
+            prev_stage_stride = stage_stride
+            stage_stride = math.floor((num_stages_total - 1) / (num_stages_sample - 1))
+            logging.info(
+                f"lowering stride from {prev_stage_stride} to {stage_stride} to "
+                f"sample {num_stages_sample} stages (from {num_stages_total} total)"
+            )
+            b_e_gap = (num_stages_sample - 1) * stage_stride
+
+        # randomly select a starting stage id (`randint` includes both ends)
+        b_max = len(all_stage_ids) - 1 - b_e_gap
+        b = self.rng.randint(0, b_max)
+        e = b + b_e_gap
+        stage_ids_to_keep = all_stage_ids[b : e + 1 : stage_stride]
+        return stage_ids_to_keep
+
+    def _filter_query_and_anns(
+        self, queries, annotations, stage_ids_to_keep, remap_stage_id, reverse_time_axis
+    ):
+        """Filter queries and annotations to only keep those in `stage_ids_to_keep`."""
+        stage_ids_to_keep = set(stage_ids_to_keep)
+        kept_img_ids = set()
+        kept_stage_ids = set()
+
+        # Filter queries -- keep those queries with stage_id in `stage_ids_to_keep`
+        filtered_queries = []
+        for query in queries:
+            input_box = query.get("input_box", None)
+            input_points = query.get("input_points", None)
+            has_geo_input = input_box is not None or input_points is not None
+            if has_geo_input and not self.tile_img_keep_find_queries_with_geo_inputs:
+                continue
+            stage_id = query["query_processing_order"]
+            if stage_id in stage_ids_to_keep:
+                kept_img_ids.add(query["image_id"])
+                kept_stage_ids.add(stage_id)
+                filtered_queries.append(query)
+        # Check that all frames in `stage_ids_to_keep` are present after filtering
+        all_frame_present = kept_stage_ids == stage_ids_to_keep
+        assert all_frame_present, f"{kept_stage_ids=} vs {stage_ids_to_keep=}"
+        if remap_stage_id:
+            # Remap those kept stage ids to be contiguous and starting from 0
+            old_stage_ids = sorted(kept_stage_ids, reverse=reverse_time_axis)
+            stage_id_old2new = {old: new for new, old in enumerate(old_stage_ids)}
+            for query in filtered_queries:
+                ptr_x_is_empty = query["ptr_x_query_id"] in [None, -1]
+                ptr_y_is_empty = query["ptr_y_query_id"] in [None, -1]
+                assert (
+                    ptr_x_is_empty and ptr_y_is_empty
+                ), "Remapping stage ids is not supported for queries with non-empty ptr_x or ptr_y pointers"
+                query["query_processing_order"] = stage_id_old2new[
+                    query["query_processing_order"]
+                ]
+
+        # Filter annotations -- keep those annotations with image_id in `kept_img_ids`
+        filtered_annotations = [
+            ann for ann in annotations if ann["image_id"] in kept_img_ids
+        ]
+
+        return filtered_queries, filtered_annotations, kept_img_ids
+
+    def _tile_single_image_data(self, datapoint: Datapoint, num_stages_sample: int):
+        """
+        Tile a single image and its queries to simulate video frames. The output is a
+        datapoint with *identical video frames* (i.e. the same static image) and needs
+        further transforms (e.g. affine) to get video frames with different content.
+        """
+        # tile `images: List[Image]`
+        assert len(datapoint.images) == 1, "Expected only one single image"
+        tiled_images = [
+            copy.deepcopy(datapoint.images[0]) for _ in range(num_stages_sample)
+        ]
+        for stage_id, img in enumerate(tiled_images):
+            for obj in img.objects:
+                obj.frame_index = stage_id
+
+        # tile `raw_images: Optional[List[PILImage.Image]] = None`
+        tiled_raw_images = None
+        if datapoint.raw_images is not None:
+            assert len(datapoint.raw_images) == 1, "Expected only one single image"
+            tiled_raw_images = [
+                datapoint.raw_images[0].copy() for _ in range(num_stages_sample)
+            ]
+
+        # tile `find_queries: List[FindQueryLoaded]`
+        tiled_find_queries_per_stage = [[] for _ in range(num_stages_sample)]
+        for query in datapoint.find_queries:
+            assert query.image_id == 0
+            assert query.query_processing_order == 0
+            # check and make sure that a query doesn't contain pointers or references
+            # to other queries (that cannot be tiled)
+            assert query.ptr_x is None and query.ptr_y is None
+            assert query.ptr_mem is None
+            # assert query.wkdata_qid is None
+            # assert query.other_positive_qids is None
+            # assert query.negative_qids is None
+            has_geo_input = (
+                query.input_bbox is not None or query.input_points is not None
+            )
+            if has_geo_input and not self.tile_img_keep_find_queries_with_geo_inputs:
+                continue
+            for stage_id in range(num_stages_sample):
+                # copy the query and update the image_id
+                new_query = copy.deepcopy(query)
+                new_query.image_id = stage_id
+                new_query.query_processing_order = stage_id
+                if new_query.inference_metadata is not None:
+                    new_query.inference_metadata.frame_index = stage_id
+                tiled_find_queries_per_stage[stage_id].append(new_query)
+
+        tiled_find_queries = sum(tiled_find_queries_per_stage, [])
+
+        # tile `get_queries: List[GetQuery]` -- we skip them for now (since they involve
+        # a pointer to a find query that is complicated to tile, and there is not an
+        # imminent use case for them in the video grounding task in the near future)
+        if self.tile_img_keep_get_queries:
+            raise NotImplementedError("Tiling get queries is not implemented yet")
+        else:
+            tiled_get_queries = []
+
+        return Datapoint(
+            images=tiled_images,
+            raw_images=tiled_raw_images,
+            find_queries=tiled_find_queries,
+            get_queries=tiled_get_queries,
+        )
+
+    def _subsample_queries(self, datapoint: Datapoint, max_query_num: int):
+        """Subsample to keep at most `max_query_num` queries per frame in a datapoint."""
+        # aggregate the find queries per stage
+        num_frames = max(q.query_processing_order for q in datapoint.find_queries) + 1
+        find_queries_per_stage = [[] for _ in range(num_frames)]
+        for query in datapoint.find_queries:
+            find_queries_per_stage[query.query_processing_order].append(query)
+
+        # verify that all the stages have the same number of queries
+        num_queries_per_stage = len(find_queries_per_stage[0])
+        for queries in find_queries_per_stage:
+            assert len(queries) == num_queries_per_stage
+        if max_query_num <= 0 or num_queries_per_stage <= max_query_num:
+            return datapoint
+
+        # subsample the queries to keep only `max_query_num` queries
+        sampled_inds = self.rng.sample(range(num_queries_per_stage), max_query_num)
+        sampled_find_queries_per_stage = [
+            [queries[idx] for idx in sampled_inds] for queries in find_queries_per_stage
+        ]
+        sampled_find_queries = sum(sampled_find_queries_per_stage, [])
+        return Datapoint(
+            images=datapoint.images,
+            raw_images=datapoint.raw_images,
+            find_queries=sampled_find_queries,
+            get_queries=datapoint.get_queries,
+        )
diff --git a/sam3/train/data/torch_dataset.py b/sam3/train/data/torch_dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..05556171c0decff7c110803afeb1226b1a157e48
--- /dev/null
+++ b/sam3/train/data/torch_dataset.py
@@ -0,0 +1,52 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from typing import Callable, Iterable, Optional
+
+from torch.utils.data import DataLoader, Dataset, DistributedSampler, IterableDataset
+
+
+class TorchDataset:
+    def __init__(
+        self,
+        dataset: Dataset,
+        batch_size: int,
+        num_workers: int,
+        shuffle: bool,
+        pin_memory: bool,
+        drop_last: bool,
+        collate_fn: Optional[Callable] = None,
+        worker_init_fn: Optional[Callable] = None,
+        enable_distributed_sampler=True,
+    ) -> None:
+        self.dataset = dataset
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.shuffle = shuffle
+        self.pin_memory = pin_memory
+        self.drop_last = drop_last
+        self.collate_fn = collate_fn
+        self.worker_init_fn = worker_init_fn
+        assert not isinstance(self.dataset, IterableDataset), "Not supported yet"
+        if enable_distributed_sampler:
+            self.sampler = DistributedSampler(self.dataset, shuffle=self.shuffle)
+        else:
+            self.sampler = None
+
+    def get_loader(self, epoch) -> Iterable:
+        if self.sampler:
+            self.sampler.set_epoch(epoch)
+        if hasattr(self.dataset, "epoch"):
+            self.dataset.epoch = epoch
+        if hasattr(self.dataset, "set_epoch"):
+            self.dataset.set_epoch(epoch)
+
+        return DataLoader(
+            self.dataset,
+            batch_size=self.batch_size,
+            num_workers=self.num_workers,
+            pin_memory=self.pin_memory,
+            drop_last=self.drop_last,
+            sampler=self.sampler,
+            collate_fn=self.collate_fn,
+            worker_init_fn=self.worker_init_fn,
+        )
diff --git a/sam3/train/loss/__init__.py b/sam3/train/loss/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d37d2aaef52ec7de01999516a2b8c3e1fa4986
--- /dev/null
+++ b/sam3/train/loss/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
diff --git a/sam3/train/loss/loss_fns.py b/sam3/train/loss/loss_fns.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b61d7f59f9b9bd2c155d1e118954d6215fefefe
--- /dev/null
+++ b/sam3/train/loss/loss_fns.py
@@ -0,0 +1,1319 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import logging
+import warnings
+
+import torch
+import torch.distributed
+import torch.nn.functional as F
+import torchmetrics
+
+from sam3.model import box_ops
+
+from sam3.model.data_misc import interpolate
+
+from sam3.train.loss.sigmoid_focal_loss import (
+    triton_sigmoid_focal_loss,
+    triton_sigmoid_focal_loss_reduce,
+)
+from torch import nn
+
+from .mask_sampling import (
+    calculate_uncertainty,
+    get_uncertain_point_coords_with_randomness,
+    point_sample,
+)
+
+
+CORE_LOSS_KEY = "core_loss"
+
+
+def instance_masks_to_semantic_masks(
+    instance_masks: torch.Tensor, num_instances: torch.Tensor
+) -> torch.Tensor:
+    """This function converts instance masks to semantic masks.
+    It accepts a collapsed batch of instances masks (ie all instance masks are concatenated in a single tensor) and
+    the number of instances in each image of the batch.
+    It returns a mask with the same spatial dimensions as the input instance masks, where for each batch element the
+    semantic mask is the union of all the instance masks in the batch element.
+
+    If for a given batch element there are no instances (ie num_instances[i]==0), the corresponding semantic mask will be a tensor of zeros.
+
+    Args:
+        instance_masks (torch.Tensor): A tensor of shape (N, H, W) where N is the number of instances in the batch.
+        num_instances (torch.Tensor): A tensor of shape (B,) where B is the batch size. It contains the number of instances
+            in each image of the batch.
+
+    Returns:
+        torch.Tensor: A tensor of shape (B, H, W) where B is the batch size and H, W are the spatial dimensions of the
+            input instance masks.
+    """
+    if num_instances.sum() == 0:
+        # all negative batch, create a tensor of zeros (B, 1, 1)
+        return num_instances.unsqueeze(-1).unsqueeze(-1)
+
+    masks_per_query = torch.split(instance_masks, num_instances.tolist())
+
+    return torch.stack([torch.any(masks, dim=0) for masks in masks_per_query], dim=0)
+
+
+@torch.no_grad()
+def accuracy(output, target, topk=(1,)):
+    """Computes the precision@k for the specified values of k"""
+    if target.numel() == 0:
+        return [torch.zeros([], device=output.device)]
+    maxk = max(topk)
+    batch_size = target.size(0)
+
+    _, pred = output.topk(maxk, 1, True, True)
+    pred = pred.t()
+    correct = pred.eq(target.view(1, -1).expand_as(pred))
+
+    res = []
+    for k in topk:
+        correct_k = correct[:k].view(-1).float().sum(0)
+        res.append(correct_k.mul_(100.0 / batch_size))
+    return res
+
+
+def dice_loss(inputs, targets, num_boxes, loss_on_multimask=False, reduce=True):
+    """
+    Compute the DICE loss, similar to generalized IOU for masks
+    Args:
+        inputs: A float tensor of arbitrary shape.
+                The predictions for each example.
+        targets: A float tensor with the same shape as inputs. Stores the binary
+                 classification label for each element in inputs
+                (0 for the negative class and 1 for the positive class).
+    """
+    try:
+        loss = _dice_loss(inputs, targets, num_boxes, loss_on_multimask, reduce)
+    except torch.OutOfMemoryError:
+        logging.error("GPU OOM, computing dice loss on CPU")
+        # try to recover from GPU OOM by moving tensors to CPU and computing loss there
+        orig_device = inputs.device
+        inputs = inputs.cpu()
+        targets = targets.cpu()
+        if isinstance(num_boxes, torch.Tensor):
+            num_boxes = num_boxes.cpu()
+        loss = _dice_loss(inputs, targets, num_boxes, loss_on_multimask, reduce)
+        loss = loss.to(orig_device)
+
+    return loss
+
+
+def _dice_loss(inputs, targets, num_boxes, loss_on_multimask=False, reduce=True):
+    inputs = inputs.sigmoid()
+    if loss_on_multimask:
+        # inputs and targets are [N, M, H, W] where M corresponds to multiple predicted masks
+        assert inputs.dim() == 4 and targets.dim() == 4
+        # flatten spatial dimension while keeping multimask channel dimension
+        inputs = inputs.flatten(2)
+        targets = targets.flatten(2)
+        numerator = 2 * (inputs * targets).sum(-1)
+    else:
+        inputs = inputs.flatten(1)
+        numerator = 2 * (inputs * targets).sum(1)
+    denominator = inputs.sum(-1) + targets.sum(-1)
+    loss = 1 - (numerator + 1) / (denominator + 1)
+    if loss_on_multimask:
+        return loss / num_boxes
+    if not reduce:
+        return loss
+    return loss.sum() / num_boxes
+
+
+def sigmoid_focal_loss(
+    inputs,
+    targets,
+    num_boxes,
+    alpha: float = 0.25,
+    gamma: float = 2,
+    loss_on_multimask=False,
+    reduce=True,
+    triton=True,
+):
+    """
+    Loss used in RetinaNet for dense detection: https://arxiv.org/abs/1708.02002.
+    Args:
+        inputs: A float tensor of arbitrary shape.
+                The predictions for each example.
+        targets: A float tensor with the same shape as inputs. Stores the binary
+                 classification label for each element in inputs
+                (0 for the negative class and 1 for the positive class).
+        alpha: (optional) Weighting factor in range (0,1) to balance
+                positive vs negative examples. Default = -1 (no weighting).
+        gamma: Exponent of the modulating factor (1 - p_t) to
+               balance easy vs hard examples.
+    Returns:
+        Loss tensor
+    """
+    if not (0 <= alpha <= 1) and triton:
+        raise RuntimeError(f"Alpha should be in [0,1], got {alpha}")
+    if triton:
+        if reduce and not loss_on_multimask:
+            loss = triton_sigmoid_focal_loss_reduce(inputs, targets, alpha, gamma)
+            return loss / (num_boxes * inputs.shape[1])
+
+        loss = triton_sigmoid_focal_loss(inputs, targets, alpha, gamma)
+    else:
+        prob = inputs.sigmoid()
+        ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
+        p_t = prob * targets + (1 - prob) * (1 - targets)
+        loss = ce_loss * ((1 - p_t) ** gamma)
+
+        if alpha >= 0:
+            alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
+            loss = alpha_t * loss
+
+    if not reduce:
+        return loss
+
+    if loss_on_multimask:
+        # loss is [N, M, H, W] where M corresponds to multiple predicted masks
+        assert loss.dim() == 4
+        return loss.flatten(2).mean(-1) / num_boxes  # average over spatial dims
+    return loss.mean(1).sum() / num_boxes
+
+
+def iou_loss(
+    inputs, targets, pred_ious, num_boxes, loss_on_multimask=False, use_l1_loss=False
+):
+    """MSE loss between predicted IoUs and actual IoUs between inputs and targets."""
+    assert inputs.dim() == 4 and targets.dim() == 4
+    pred_mask = inputs.flatten(2) > 0
+    gt_mask = targets.flatten(2) > 0
+    area_i = torch.sum(pred_mask & gt_mask, dim=-1).float()
+    area_u = torch.sum(pred_mask | gt_mask, dim=-1).float()
+    actual_ious = area_i / torch.clamp(area_u, min=1.0)
+
+    if use_l1_loss:
+        loss = F.l1_loss(pred_ious, actual_ious, reduction="none")
+    else:
+        loss = F.mse_loss(pred_ious, actual_ious, reduction="none")
+    if loss_on_multimask:
+        return loss / num_boxes
+    return loss.sum() / num_boxes
+
+
+@torch.jit.script
+def _contrastive_align(logits, positive_map):
+    positive_logits = -logits.masked_fill(~positive_map, 0)
+    negative_logits = logits  # .masked_fill(positive_map, -1000000)
+
+    boxes_with_pos = positive_map.any(2)
+    pos_term = positive_logits.sum(2)
+    neg_term = negative_logits.logsumexp(2)
+
+    nb_pos = positive_map.sum(2) + 1e-6
+
+    box_to_token_loss = (
+        (pos_term / nb_pos + neg_term).masked_fill(~boxes_with_pos, 0).sum()
+    )
+
+    tokens_with_pos = positive_map.any(1)
+    pos_term = positive_logits.sum(1)
+    neg_term = negative_logits.logsumexp(1)
+
+    nb_pos = positive_map.sum(1) + 1e-6
+
+    tokens_to_boxes_loss = (
+        (pos_term / nb_pos + neg_term).masked_fill(~tokens_with_pos, 0).sum()
+    )
+    return (box_to_token_loss + tokens_to_boxes_loss) / 2
+
+
+def _get_src_permutation_idx(indices):
+    # permute predictions following indices
+    batch_idx = torch.cat(
+        [torch.full_like(src, i) for i, (src, _) in enumerate(indices)]
+    )
+    src_idx = torch.cat([src for (src, _) in indices])
+    return batch_idx, src_idx
+
+
+class LossWithWeights(nn.Module):
+    def __init__(self, weight_dict, compute_aux, supports_o2m_loss=True):
+        super().__init__()
+        # weights for each computed loss key (those losses not in weight_dict
+        # will not be aggregated in the final reduced core loss)
+        self.weight_dict = weight_dict if weight_dict is not None else {}
+        # whether this loss will be applied on auxiliary outputs
+        self.compute_aux = compute_aux
+        self.supports_o2m_loss = supports_o2m_loss
+        self.target_keys = []
+
+    def forward(self, *args, is_aux=False, **kwargs):
+        if is_aux and not self.compute_aux:
+            return {CORE_LOSS_KEY: 0.0}
+        losses = self.get_loss(*args, **kwargs)
+        losses[CORE_LOSS_KEY] = self.reduce_loss(losses)
+        return losses
+
+    def get_loss(self, **kwargs):
+        raise NotImplementedError()
+
+    def reduce_loss(self, losses):
+        reduced_loss = 0.0
+        for loss_key, weight in self.weight_dict.items():
+            if loss_key not in losses:
+                raise ValueError(f"{type(self)} doesn't compute {loss_key}")
+            if weight != 0:
+                reduced_loss += losses[loss_key] * weight
+
+        return reduced_loss
+
+
+class IABCEMdetr(LossWithWeights):
+    def __init__(
+        self,
+        pos_weight,
+        weight_dict=None,
+        compute_aux=True,
+        gamma=0,
+        weak_loss=True,
+        alpha=0.25,
+        pad_n_queries=None,
+        pad_scale_pos=1.0,
+        use_separate_loss_for_det_and_trk=False,
+        num_det_queries=None,
+        det_exhaustive_loss_scale_pos=1.0,
+        det_exhaustive_loss_scale_neg=1.0,
+        det_non_exhaustive_loss_scale_pos=1.0,
+        det_non_exhaustive_loss_scale_neg=1.0,
+        trk_loss_scale_pos=1.0,
+        trk_loss_scale_neg=1.0,
+        no_loss_for_fp_propagation=False,
+        apply_loss_to_det_queries_in_video_grounding=True,
+        use_presence=False,
+        use_presence_semgseg=False,  # If True, use presence scores from the semgseg head.
+        presence_alpha=0.5,
+        presence_gamma=0.0,
+        pos_focal: bool = False,  # for box scores, use focal loss for positives as well
+    ):
+        super().__init__(weight_dict, compute_aux)
+        self.pos_weight = pos_weight
+        self.gamma = gamma
+        self.weak_loss = weak_loss
+        self.alpha = alpha
+        self.target_keys.append("boxes_xyxy")
+        self.no_loss_for_fp_propagation = no_loss_for_fp_propagation
+        if self.weak_loss:
+            self.target_keys.append("is_exhaustive")
+        # NOTE: This is hacky solution to have the same CE loss scale across datasets where the model might predict different number of object queries for different tasks.
+        # If not None, we assume there are a total pad_n_queries object queries.
+        # For example, if the model predicts only 1 object query and pad_n_queries=100, we pad the predictions with 99 zero preds.
+        # Currently this only affects the BCE loss and not the F1 score.
+        self.pad_n_queries = pad_n_queries
+        self.pad_scale_pos = pad_scale_pos
+        if self.pad_scale_pos != 1.0:
+            assert self.pad_n_queries is not None
+        # whether to use presence scores
+        self.use_presence = use_presence
+        self.use_presence_semgseg = use_presence_semgseg
+        if self.use_presence_semgseg:
+            assert self.use_presence
+        self.presence_alpha = presence_alpha
+        self.presence_gamma = presence_gamma
+        self.pos_focal = pos_focal
+
+        # Decoupled loss for detection and tracking queries
+        self.apply_loss_to_det_queries_in_video_grounding = (
+            apply_loss_to_det_queries_in_video_grounding
+        )
+        self.use_separate_loss_for_det_and_trk = use_separate_loss_for_det_and_trk
+        if num_det_queries is not None:
+            logging.warning("note: it's not needed to set num_det_queries anymore")
+        if self.use_separate_loss_for_det_and_trk:
+            assert not self.weak_loss, "Do not use weak_loss in this case -- set separate loss for detection and tracking queries instead"
+            self.det_exhaustive_loss_scale_pos = det_exhaustive_loss_scale_pos
+            self.det_exhaustive_loss_scale_neg = det_exhaustive_loss_scale_neg
+            self.det_non_exhaustive_loss_scale_pos = det_non_exhaustive_loss_scale_pos
+            self.det_non_exhaustive_loss_scale_neg = det_non_exhaustive_loss_scale_neg
+            self.trk_loss_scale_pos = trk_loss_scale_pos
+            self.trk_loss_scale_neg = trk_loss_scale_neg
+        else:
+            assert (
+                det_exhaustive_loss_scale_pos == 1.0
+                and det_exhaustive_loss_scale_neg == 1.0
+                and det_non_exhaustive_loss_scale_pos == 1.0
+                and det_non_exhaustive_loss_scale_neg == 1.0
+                and trk_loss_scale_pos == 1.0
+                and trk_loss_scale_neg == 1.0
+            ), "If not using separate loss for detection and tracking queries, separate detection and tracking loss scales should all be 1.0"
+
+    def get_loss(self, outputs, targets, indices, num_boxes):
+        assert len(outputs["pred_logits"].shape) > 2, "Incorrect predicted logits shape"
+        assert outputs["pred_logits"].shape[-1] == 1, "Incorrect predicted logits shape"
+        src_logits = outputs["pred_logits"].squeeze(-1)
+        prob = src_logits.sigmoid()
+
+        with torch.no_grad():
+            target_classes = torch.full(
+                src_logits.shape[:2],
+                0,
+                dtype=torch.float,
+                device=src_logits.device,
+            )
+            target_classes[(indices[0], indices[1])] = 1
+            src_boxes_xyxy = outputs["pred_boxes_xyxy"][(indices[0], indices[1])]
+            target_boxes_giou = (
+                targets["boxes_xyxy"][indices[2]]
+                if indices[2] is not None
+                else targets["boxes_xyxy"]
+            )
+
+            iou = box_ops.fast_diag_box_iou(src_boxes_xyxy, target_boxes_giou)
+            t = prob[(indices[0], indices[1])] ** self.alpha * iou ** (1 - self.alpha)
+            t = torch.clamp(t, 0.01).detach()
+            positive_target_classes = target_classes.clone()
+            positive_target_classes[(indices[0], indices[1])] = t
+
+        # Soft loss on positives
+        if self.pos_focal:
+            loss_bce = sigmoid_focal_loss(
+                src_logits.contiguous(),
+                positive_target_classes,
+                num_boxes=1,
+                alpha=0.5,
+                gamma=self.gamma,
+                reduce=False,
+            )
+        else:
+            loss_bce = F.binary_cross_entropy_with_logits(
+                src_logits, positive_target_classes, reduction="none"
+            )
+        loss_bce = loss_bce * target_classes * self.pos_weight
+
+        if (
+            self.pad_n_queries is not None
+            and isinstance(self.pad_n_queries, int)
+            and loss_bce.size(1) < self.pad_n_queries
+        ):
+            loss_bce = loss_bce * self.pad_scale_pos
+        # Negatives
+        loss_bce = loss_bce + F.binary_cross_entropy_with_logits(
+            src_logits, target_classes, reduction="none"
+        ) * (1 - target_classes) * (prob**self.gamma)
+
+        # Optionally, not applying IABCEMdetr loss to detection queries in video.
+        is_video_grounding = outputs.get("is_video_grounding_batch", False)
+        if is_video_grounding and not self.apply_loss_to_det_queries_in_video_grounding:
+            Q_det = outputs["Q_det"]
+            loss_bce[:, :Q_det] *= 0.0
+        presence_loss = torch.tensor(0.0, device=src_logits.device)
+        presence_dec_acc = torch.tensor(0.0, device=src_logits.device)
+        if self.use_presence:
+            # no classifiction loss for individual tokens if no target gt
+            # cannot directly use targets["num_boxes"] to check if some
+            # GT box exists as there may be dummy boxes for "invisible objects"
+            # in video grounding data
+
+            gt_padded_object_ids = targets["object_ids_padded"]  # (B, H)
+            gt_padded_boxes = targets["boxes_padded"]  # (B, H, 4) shape, CxCyWH
+            gt_padded_is_visible = (
+                (gt_padded_object_ids >= 0)
+                & (gt_padded_boxes[..., 2] > 0)  # width > 0
+                & (gt_padded_boxes[..., 3] > 0)  # height > 0
+            )
+            keep_loss = (gt_padded_is_visible.sum(dim=-1)[..., None] != 0).float()
+
+            loss_bce = loss_bce * keep_loss
+
+            if self.use_presence_semgseg:
+                # no loss here, has it's own separate loss computation
+                assert "presence_logit_dec" not in outputs
+            elif "presence_logit_dec" in outputs:
+                presence_logits = outputs["presence_logit_dec"].view_as(keep_loss)
+                bs = presence_logits.shape[0]
+                presence_loss = sigmoid_focal_loss(
+                    presence_logits,
+                    keep_loss,
+                    # not num_boxes, but we'll use it to normalize by bs
+                    num_boxes=bs,
+                    alpha=self.presence_alpha,
+                    gamma=self.presence_gamma,
+                )
+                pred = (presence_logits.sigmoid() > 0.5).float()
+                presence_dec_acc = (pred == keep_loss).float().mean()
+            else:
+                # for o2m, nothing to do
+                pass
+
+        if self.weak_loss:
+            assert not self.use_separate_loss_for_det_and_trk, "Do not use weak_loss in this case -- set separate loss for detection and tracking queries instead"
+
+            # nullify the negative loss for the non-exhaustive classes
+            assert loss_bce.shape[0] == targets["is_exhaustive"].shape[0]
+            assert targets["is_exhaustive"].ndim == 1
+
+            loss_mask = (~targets["is_exhaustive"]).view(-1, 1).expand_as(loss_bce)
+            # restrict the mask to the negative supervision
+            loss_mask = loss_mask & (target_classes < 0.5)
+            loss_mask = ~loss_mask
+            # Mask the loss
+            loss_bce = loss_bce * loss_mask.float()
+            # Average
+            loss_bce = loss_bce.sum() / (loss_mask.sum() + 1e-6)
+        else:
+            # apply separate loss weights to detection and tracking queries
+            if self.use_separate_loss_for_det_and_trk:
+                Q_det = outputs["Q_det"]
+                assert loss_bce.size(1) >= Q_det
+                is_positive = target_classes > 0.5
+                is_positive_det = is_positive[:, :Q_det]
+                is_positive_trk = is_positive[:, Q_det:]
+                assert loss_bce.size(0) == targets["is_exhaustive"].size(0)
+                is_exhaustive = targets["is_exhaustive"].unsqueeze(1).bool()
+                loss_scales = torch.zeros_like(loss_bce)
+                # detection query loss weights
+                loss_scales[:, :Q_det] = (
+                    (is_exhaustive & is_positive_det).float()
+                    * self.det_exhaustive_loss_scale_pos
+                    + (is_exhaustive & ~is_positive_det).float()
+                    * self.det_exhaustive_loss_scale_neg
+                    + (~is_exhaustive & is_positive_det).float()
+                    * self.det_non_exhaustive_loss_scale_pos
+                    + (~is_exhaustive & ~is_positive_det).float()
+                    * self.det_non_exhaustive_loss_scale_neg
+                )
+                # tracking query weights
+                loss_scales[:, Q_det:] = (
+                    is_positive_trk.float() * self.trk_loss_scale_pos
+                    + (~is_positive_trk).float() * self.trk_loss_scale_neg
+                )
+                # apply the loss weights
+
+                # if the id is -2 means it is a fp propagation , we don't apply the loss to them
+                if self.no_loss_for_fp_propagation:
+                    is_original_queries = outputs["pred_old_obj_ids"] != -2
+                    loss_scales *= (is_exhaustive | is_original_queries).float()
+
+                loss_bce = loss_bce * loss_scales
+
+            if self.pad_n_queries is None or loss_bce.size(1) >= self.pad_n_queries:
+                loss_bce = loss_bce.mean()
+            else:
+                assert isinstance(self.pad_n_queries, int)
+                assert (
+                    loss_bce.size(1) < self.pad_n_queries
+                ), f"The number of predictions is more than the expected total after padding. Got {loss_bce.size(1)} predictions."
+                loss_bce = loss_bce.sum() / (self.pad_n_queries * loss_bce.size(0))
+
+        bce_f1 = torchmetrics.functional.f1_score(
+            src_logits.sigmoid().flatten(),
+            target=target_classes.flatten().long(),
+            task="binary",
+        )
+
+        losses = {
+            "loss_ce": loss_bce,
+            "ce_f1": bce_f1,
+            "presence_loss": presence_loss,
+            "presence_dec_acc": presence_dec_acc,
+        }
+        return losses
+
+
+class Boxes(LossWithWeights):
+    def __init__(
+        self,
+        weight_dict=None,
+        compute_aux=True,
+        apply_loss_to_det_queries_in_video_grounding=True,
+    ):
+        super().__init__(weight_dict, compute_aux)
+        self.apply_loss_to_det_queries_in_video_grounding = (
+            apply_loss_to_det_queries_in_video_grounding
+        )
+        self.target_keys.extend(["boxes", "boxes_xyxy"])
+
+    def get_loss(self, outputs, targets, indices, num_boxes):
+        """Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss
+        targets dicts must contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]
+        The target boxes are expected in format (center_x, center_y, h, w), normalized by the image size.
+        """
+        # Optionally, not applying Boxes loss to detection queries in video.
+        is_video_grounding = outputs.get("is_video_grounding_batch", False)
+        if is_video_grounding and not self.apply_loss_to_det_queries_in_video_grounding:
+            indices = _keep_only_trk_queries_in_match_inds(
+                indices, Q_det=outputs["Q_det"]
+            )
+
+        assert "pred_boxes" in outputs
+        # idx = self._get_src_permutation_idx(indices)
+        src_boxes = outputs["pred_boxes"][(indices[0], indices[1])]
+        src_boxes_xyxy = outputs["pred_boxes_xyxy"][(indices[0], indices[1])]
+        target_boxes = (
+            targets["boxes"] if indices[2] is None else targets["boxes"][indices[2]]
+        )
+        target_boxes_giou = (
+            targets["boxes_xyxy"]
+            if indices[2] is None
+            else targets["boxes_xyxy"][indices[2]]
+        )
+
+        loss_bbox = F.l1_loss(src_boxes, target_boxes, reduction="none")
+
+        losses = {}
+        losses["loss_bbox"] = loss_bbox.sum() / num_boxes
+
+        loss_giou = 1 - box_ops.fast_diag_generalized_box_iou(
+            src_boxes_xyxy, target_boxes_giou
+        )
+        losses["loss_giou"] = loss_giou.sum() / num_boxes
+        return losses
+
+
+class Masks(LossWithWeights):
+    def __init__(
+        self,
+        weight_dict=None,
+        compute_aux=False,
+        focal_alpha=0.25,
+        focal_gamma=2,
+        num_sample_points=None,
+        oversample_ratio=None,
+        importance_sample_ratio=None,
+        apply_loss_to_det_queries_in_video_grounding=True,
+    ):
+        super().__init__(weight_dict, compute_aux)
+        if compute_aux:
+            warnings.warn("Masks loss usually shouldn't be applied to aux outputs")
+        self.focal_alpha = focal_alpha
+        self.focal_gamma = focal_gamma
+        self.num_sample_points = num_sample_points
+        self.oversample_ratio = oversample_ratio
+        self.importance_sample_ratio = importance_sample_ratio
+        self.apply_loss_to_det_queries_in_video_grounding = (
+            apply_loss_to_det_queries_in_video_grounding
+        )
+        self.target_keys.extend(["masks", "is_valid_mask"])
+
+    def _sampled_loss(self, src_masks, target_masks, num_boxes):
+        assert len(src_masks.shape) == 3 and len(target_masks.shape) == 3
+        src_masks = src_masks[:, None]
+        target_masks = target_masks[:, None]
+        with torch.no_grad():
+            # Sample point_coords
+            point_coords = get_uncertain_point_coords_with_randomness(
+                src_masks,
+                calculate_uncertainty,
+                self.num_sample_points,
+                self.oversample_ratio,
+                self.importance_sample_ratio,
+            )
+
+            # get GT labels
+            sampled_target_masks = point_sample(
+                target_masks,
+                point_coords,
+                align_corners=False,
+            ).squeeze(1)
+
+        sampled_src_masks = point_sample(
+            src_masks,
+            point_coords,
+            align_corners=False,
+        ).squeeze(1)
+
+        losses = {
+            "loss_mask": sigmoid_focal_loss(
+                sampled_src_masks,
+                sampled_target_masks,
+                num_boxes,
+                alpha=self.focal_alpha,
+                gamma=self.focal_gamma,
+            ),
+            "loss_dice": dice_loss(sampled_src_masks, sampled_target_masks, num_boxes),
+        }
+        # Not needed for backward
+        del src_masks
+        del target_masks
+
+        return losses
+
+    def get_loss(self, outputs, targets, indices, num_boxes):
+        """Compute the losses related to the masks: the focal loss and the dice loss.
+        targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w]
+        """
+        assert "pred_masks" in outputs
+        assert "is_valid_mask" in targets
+        # Optionally, not applying Masks loss to detection queries in video.
+        is_video_grounding = outputs.get("is_video_grounding_batch", False)
+        if is_video_grounding and not self.apply_loss_to_det_queries_in_video_grounding:
+            indices = _keep_only_trk_queries_in_match_inds(
+                indices, Q_det=outputs["Q_det"]
+            )
+
+        src_masks = outputs["pred_masks"]
+
+        # Dataset doesn't have segmentation masks
+        if targets["masks"] is None:
+            return {
+                "loss_mask": torch.tensor(0.0, device=src_masks.device),
+                "loss_dice": torch.tensor(0.0, device=src_masks.device),
+            }
+
+        target_masks = (
+            targets["masks"] if indices[2] is None else targets["masks"][indices[2]]
+        )
+        target_masks = target_masks.to(src_masks)
+        keep = (
+            targets["is_valid_mask"]
+            if indices[2] is None
+            else targets["is_valid_mask"][indices[2]]
+        )
+
+        src_masks = src_masks[(indices[0], indices[1])]
+
+        # Remove invalid masks from loss
+        src_masks = src_masks[keep]
+        target_masks = target_masks[keep]
+
+        if self.num_sample_points is not None:
+            # Compute loss on sampled points for the Mask
+            losses = self._sampled_loss(src_masks, target_masks, num_boxes)
+
+        else:
+            # upsample predictions to the target size
+            if target_masks.shape[0] == 0 and src_masks.shape[0] == 0:
+                src_masks = src_masks.flatten(1)
+                target_masks = target_masks.reshape(src_masks.shape)
+            else:
+                if len(src_masks.shape) == 3:
+                    src_masks = src_masks[:, None]
+                if src_masks.dtype == torch.bfloat16:
+                    # Bilinear interpolation does not support bf16
+                    src_masks = src_masks.to(dtype=torch.float32)
+                src_masks = interpolate(
+                    src_masks,
+                    size=target_masks.shape[-2:],
+                    mode="bilinear",
+                    align_corners=False,
+                )
+                src_masks = src_masks[:, 0].flatten(1)
+                target_masks = target_masks.flatten(1)
+
+            losses = {
+                "loss_mask": sigmoid_focal_loss(
+                    src_masks,
+                    target_masks,
+                    num_boxes,
+                    alpha=self.focal_alpha,
+                    gamma=self.focal_gamma,
+                ),
+                "loss_dice": dice_loss(src_masks, target_masks, num_boxes),
+            }
+
+        return losses
+
+
+# class MultiStepIteractiveMasks(LossWithWeights):
+#     def __init__(
+#         self,
+#         weight_dict=None,
+#         compute_aux=False,
+#         focal_alpha=0.25,
+#         focal_gamma=2,
+#     ):
+#         warnings.warn(
+#             "MultiStepIteractiveMasks is deprecated. Please use MultiStepMultiMasksAndIous",
+#             DeprecationWarning,
+#         )
+#         super().__init__(weight_dict, compute_aux)
+#         self.focal_alpha = focal_alpha
+#         self.focal_gamma = focal_gamma
+#         self.target_keys.extend(["masks"])
+
+#     def get_loss(self, outputs, targets, indices, num_boxes):
+#         """Compute the losses related to the masks: the focal loss and the dice loss.
+#         targets dicts must contain the key "masks" containing a tensor of dim [nb_target_boxes, h, w]
+
+#         Unlike `Masks`, here the "multistep_pred_masks" can have multiple channels, each
+#         corresponding to one iterative prediction step in SAM-style training. We treat each
+#         channel as a mask prediction and sum the loss across channels.
+#         """
+#         src_masks = outputs["multistep_pred_masks"]
+#         target_masks = targets["masks"]
+#         assert src_masks.size(0) == target_masks.size(0)
+#         assert src_masks.dim() == 4
+#         assert target_masks.dim() == 3
+
+#         # tile target_masks according to the number of
+#         # channels `src_masks`.
+#         num_steps = src_masks.size(1)
+#         target_masks = target_masks.unsqueeze(1).to(src_masks.dtype)
+#         if num_steps > 1:
+#             target_masks = target_masks.repeat(1, num_steps, 1, 1)
+
+#         # resize `src_masks` to target mask resolution
+#         if src_masks.shape != target_masks.shape:
+#             src_masks = interpolate(
+#                 src_masks,
+#                 size=target_masks.shape[-2:],
+#                 mode="bilinear",
+#                 align_corners=False,
+#             )
+#             assert src_masks.shape == target_masks.shape
+
+#         # flatten the multiple steps in to the batch dimension
+#         src_masks = src_masks.flatten(0, 1).flatten(1)
+#         target_masks = target_masks.flatten(0, 1).flatten(1)
+#         losses = {
+#             "loss_mask": sigmoid_focal_loss(
+#                 src_masks,
+#                 target_masks,
+#                 num_boxes,
+#                 alpha=self.focal_alpha,
+#                 gamma=self.focal_gamma,
+#             ),
+#             "loss_dice": dice_loss(src_masks, target_masks, num_boxes),
+#         }
+
+#         return losses
+
+
+# class MultiStepMultiMasksAndIous(LossWithWeights):
+#     def __init__(
+#         self,
+#         weight_dict=None,
+#         compute_aux=False,
+#         focal_alpha=0.25,
+#         focal_gamma=2,
+#         # if True, back-prop on all predicted ious
+#         # not just the one with lowest loss_combo
+#         supervise_all_iou=False,
+#         # Less slack vs MSE loss in [-1, 1] error range
+#         iou_use_l1_loss=False,
+#         # Settings for obj score prediction
+#         pred_obj_scores=False,
+#         focal_gamma_obj_score=0.0,
+#         focal_alpha_obj_score=-1,
+#     ):
+#         super().__init__(weight_dict, compute_aux)
+#         self.focal_alpha = focal_alpha
+#         self.focal_gamma = focal_gamma
+#         self.target_keys.extend(["masks"])
+#         assert "loss_mask" in self.weight_dict
+#         assert "loss_dice" in self.weight_dict
+#         assert "loss_iou" in self.weight_dict
+#         if "loss_class" not in self.weight_dict:
+#             self.weight_dict["loss_class"] = 0.0
+#         self.focal_alpha_obj_score = focal_alpha_obj_score
+#         self.focal_gamma_obj_score = focal_gamma_obj_score
+#         self.supervise_all_iou = supervise_all_iou
+#         self.iou_use_l1_loss = iou_use_l1_loss
+#         self.pred_obj_scores = pred_obj_scores
+
+#     def get_loss(self, outputs, targets, indices, num_boxes):
+#         """
+#         Compute the losses related to the masks: the focal loss and the dice loss.
+#         and also the MSE loss between predicted IoUs and actual IoUs.
+
+#         Here "multistep_pred_multimasks_high_res" is a list of multimasks (tensors
+#         of shape [N, M, H, W], where M could be 1 or larger, corresponding to
+#         one or multiple predicted masks from a click.
+
+#         We back-propagate focal, dice and iou losses only on the prediction channel
+#         with the lowest focal+dice loss between predicted mask and ground-truth.
+#         """
+
+#         target_masks = targets["masks"].unsqueeze(1).float()
+#         assert target_masks.dim() == 4  # [N, 1, H, W]
+#         src_masks_list = outputs["multistep_pred_multimasks_high_res"]
+#         ious_list = outputs["multistep_pred_ious"]
+#         object_score_logits_list = outputs["multistep_object_score_logits"]
+
+#         assert len(src_masks_list) == len(ious_list)
+#         assert len(object_score_logits_list) == len(ious_list)
+
+#         # Remove invalid masks from loss
+#         keep = targets["is_valid_mask"]
+#         target_masks = target_masks[keep]
+
+#         # accumulate the loss over prediction steps
+#         losses = {"loss_mask": 0, "loss_dice": 0, "loss_iou": 0, "loss_class": 0}
+#         for src_masks, ious, object_score_logits in zip(
+#             src_masks_list, ious_list, object_score_logits_list
+#         ):
+#             object_score_logits = object_score_logits[keep]
+#             ious = ious[keep]
+#             src_masks = src_masks[keep]
+#             self._update_losses(
+#                 losses, src_masks, target_masks, ious, num_boxes, object_score_logits
+#             )
+#         return losses
+
+#     def _update_losses(
+#         self, losses, src_masks, target_masks, ious, num_boxes, object_score_logits
+#     ):
+#         target_masks = target_masks.expand_as(src_masks)
+#         # get focal, dice and iou loss on all output masks in a prediction step
+#         loss_multimask = sigmoid_focal_loss(
+#             src_masks,
+#             target_masks,
+#             num_boxes,
+#             alpha=self.focal_alpha,
+#             gamma=self.focal_gamma,
+#             loss_on_multimask=True,
+#             triton=False,  # only use triton if alpha > 0
+#         )
+#         loss_multidice = dice_loss(
+#             src_masks, target_masks, num_boxes, loss_on_multimask=True
+#         )
+#         if not self.pred_obj_scores:
+#             loss_class = torch.tensor(
+#                 0.0, dtype=loss_multimask.dtype, device=loss_multimask.device
+#             )
+#             target_obj = torch.ones(
+#                 loss_multimask.shape[0],
+#                 1,
+#                 dtype=loss_multimask.dtype,
+#                 device=loss_multimask.device,
+#             )
+#         else:
+#             target_obj = torch.any((target_masks[:, 0] > 0).flatten(1), dim=-1)[
+#                 ..., None
+#             ].float()
+#             loss_class = sigmoid_focal_loss(
+#                 object_score_logits,
+#                 target_obj,
+#                 num_boxes,
+#                 alpha=self.focal_alpha_obj_score,
+#                 gamma=self.focal_gamma_obj_score,
+#                 triton=False,
+#             )
+
+#         loss_multiiou = iou_loss(
+#             src_masks,
+#             target_masks,
+#             ious,
+#             num_boxes,
+#             loss_on_multimask=True,
+#             use_l1_loss=self.iou_use_l1_loss,
+#         )
+#         assert loss_multimask.dim() == 2
+#         assert loss_multidice.dim() == 2
+#         assert loss_multiiou.dim() == 2
+#         if loss_multimask.size(1) > 1:
+#             # take the mask indices with the smallest focal + dice loss for back propagation
+#             loss_combo = (
+#                 loss_multimask * self.weight_dict["loss_mask"]
+#                 + loss_multidice * self.weight_dict["loss_dice"]
+#             )
+#             best_loss_inds = torch.argmin(loss_combo, dim=-1)
+#             batch_inds = torch.arange(loss_combo.size(0), device=loss_combo.device)
+#             loss_mask = loss_multimask[batch_inds, best_loss_inds].unsqueeze(1)
+#             loss_dice = loss_multidice[batch_inds, best_loss_inds].unsqueeze(1)
+#             # calculate the iou prediction and slot losses only in the index
+#             # with the minimum loss for each mask (to be consistent w/ SAM)
+#             if self.supervise_all_iou:
+#                 loss_iou = loss_multiiou.mean(dim=-1).unsqueeze(1)
+#             else:
+#                 loss_iou = loss_multiiou[batch_inds, best_loss_inds].unsqueeze(1)
+#         else:
+#             loss_mask = loss_multimask
+#             loss_dice = loss_multidice
+#             loss_iou = loss_multiiou
+
+#         # backprop focal, dice and iou loss only if obj present
+#         loss_mask = loss_mask * target_obj
+#         loss_dice = loss_dice * target_obj
+#         loss_iou = loss_iou * target_obj
+
+#         # sum over batch dimension (note that the losses are already divided by num_boxes)
+#         losses["loss_mask"] += loss_mask.sum()
+#         losses["loss_dice"] += loss_dice.sum()
+#         losses["loss_iou"] += loss_iou.sum()
+#         losses["loss_class"] += loss_class
+
+
+# class TextCriterion(LossWithWeights):
+#     def __init__(
+#         self,
+#         pad_token,
+#         max_seq_len=100,
+#         weight_dict=None,
+#         compute_aux=False,
+#     ):
+#         super().__init__(weight_dict, compute_aux)
+#         self.pad_token = pad_token
+#         self.max_seq_len = max_seq_len
+#         self.in_lengths = None
+
+#     def get_loss(self, outputs, **kwargs):
+#         nb_tokens = outputs["captioning_tokenized_target"].input_ids.numel()
+#         bs, seq_len = outputs["captioning_tokenized_target"].input_ids.shape
+#         ce = F.cross_entropy(
+#             outputs["captioning_pred_text"].flatten(0, -2),
+#             outputs["captioning_tokenized_target"].input_ids.flatten(),
+#             ignore_index=self.pad_token,
+#             reduction="sum",
+#         )
+
+#         not_pad = (
+#             outputs["captioning_tokenized_target"]
+#             .input_ids.reshape(-1)
+#             .ne(self.pad_token)
+#         )
+
+#         if nb_tokens > 0:
+#             nb_non_pad = not_pad.numel()
+#             ce = ce / nb_non_pad
+
+#         preds = outputs["captioning_pred_text"].flatten(0, -2).argmax(-1)[not_pad]
+#         targets = outputs["captioning_tokenized_target"].input_ids.flatten()[not_pad]
+#         correct = preds == targets
+#         correct = correct.sum() / (correct.numel() + 1e-5)
+
+#         correct_sequence_level = torch.all(
+#             (
+#                 outputs["captioning_pred_text"]
+#                 .flatten(0, -2)
+#                 .argmax(-1)
+#                 .reshape(bs, seq_len)
+#                 == outputs["captioning_tokenized_target"].input_ids
+#             )
+#             | (~not_pad).view(bs, seq_len),
+#             dim=1,
+#         )
+#         seq_level_acc = correct_sequence_level.float().mean()
+
+#         return {"loss_text": ce, "text_acc": correct, "text_seq_acc": seq_level_acc}
+
+
+def segment_miou(source, target):
+    """Compute the mean IoU between two sets of masks"""
+    assert source.shape == target.shape, "The two masks must have the same shape"
+    assert source.ndim == 3, "The masks must be 3D"
+
+    valid_targets = (target.sum(dim=(1, 2)) > 0).sum()
+    if valid_targets == 0:
+        return torch.tensor(1.0, device=source.device)
+    intersection = (source.bool() & target.bool()).sum(dim=(1, 2))
+    union = (source.bool() | target.bool()).sum(dim=(1, 2))
+    iou = intersection / (union + 1e-8)
+    return iou.sum() / valid_targets
+
+
+class SemanticSegCriterion(LossWithWeights):
+    def __init__(
+        self,
+        weight_dict,
+        focal: bool = False,
+        focal_alpha: float = 0.6,
+        focal_gamma: float = 1.6,
+        downsample: bool = True,
+        presence_head: bool = False,
+        # Option to turn off presence loss, if some other component
+        # is already doing it, e.g. decoder - in which case,
+        # we could still set presence_head to True so that
+        # losses are not propogated to masks when there is no GT mask
+        presence_loss: bool = True,
+    ):
+        super().__init__(weight_dict, False)
+        self.focal = focal
+        self.focal_alpha = focal_alpha
+        self.focal_gamma = focal_gamma
+        self.downsample = downsample
+        self.presence_head = presence_head
+        self.presence_loss = presence_loss
+
+    def get_loss(self, out_dict, targets):
+        outputs = out_dict["semantic_seg"]
+        presence_logit = out_dict["presence_logit"]
+        if (
+            "semantic_masks" in targets
+            and targets["semantic_masks"] is not None
+            and targets["semantic_masks"].size(0) > 0
+        ):
+            semantic_targets = targets["semantic_masks"]
+            with torch.no_grad():
+                if self.downsample:
+                    # downsample targets to the size of predictions
+                    size = outputs.shape[-2:]
+                    semantic_targets = (
+                        F.interpolate(
+                            semantic_targets.float().unsqueeze(1),
+                            size=size,
+                            mode="bilinear",
+                            align_corners=False,
+                        )
+                        .squeeze(1)
+                        .bool()
+                    )
+        else:
+            with torch.no_grad():
+                if self.downsample:
+                    # downsample targets to the size of predictions
+                    size = outputs.shape[-2:]
+                    segments = (
+                        F.interpolate(
+                            targets["masks"].float().unsqueeze(1),
+                            size=size,
+                            mode="bilinear",
+                            align_corners=False,
+                        )
+                        .squeeze(1)
+                        .bool()
+                    )
+                else:
+                    segments = targets["masks"].bool()
+
+                # the annotations are for instance segmentation, so we merge them to get semantic segmentation
+                semantic_targets = instance_masks_to_semantic_masks(
+                    segments, targets["num_boxes"]
+                )
+
+        if not self.downsample:
+            # upsample predictions to the target size
+            size = semantic_targets.shape[-2:]
+            outputs = F.interpolate(
+                outputs.float(),
+                size=size,
+                mode="bilinear",
+                align_corners=False,
+            )
+
+        if self.focal:
+            loss = sigmoid_focal_loss(
+                outputs.squeeze(1).flatten(-2),
+                semantic_targets.float().flatten(-2),
+                num_boxes=len(semantic_targets),
+                alpha=self.focal_alpha,
+                gamma=self.focal_gamma,
+                reduce=not self.presence_head,
+            )
+            if self.presence_head:
+                loss = loss.mean(1)
+        else:
+            loss = F.binary_cross_entropy_with_logits(
+                outputs.squeeze(1),
+                semantic_targets.float(),
+                reduction="none" if self.presence_head else "mean",
+            )
+            if self.presence_head:
+                loss = loss.flatten(1).mean(1)
+
+        loss_dice = dice_loss(
+            outputs.squeeze(1).flatten(1),
+            semantic_targets.flatten(1),
+            len(semantic_targets),
+            reduce=not self.presence_head,
+        )
+
+        miou = segment_miou(outputs.sigmoid().squeeze(1) > 0.5, semantic_targets)
+
+        loss_dict = {}
+
+        if self.presence_head:
+            presence_target = semantic_targets.flatten(1).any(-1)
+            if self.presence_loss:
+                loss_presence = F.binary_cross_entropy_with_logits(
+                    presence_logit.flatten(),
+                    presence_target.float(),
+                )
+                presence_acc = (
+                    ((presence_logit.flatten().sigmoid() > 0.5) == presence_target)
+                    .float()
+                    .mean()
+                )
+            else:
+                # Dummy values
+                loss_presence = torch.tensor(0.0, device=loss.device)
+                # Whichever component is computing the presence loss,
+                # should also track presence_acc
+                presence_acc = torch.tensor(0.0, device=loss.device)
+
+            loss_dict["loss_semantic_presence"] = loss_presence
+            loss_dict["presence_acc"] = presence_acc
+
+            # reduce the other losses, skipping the negative ones
+            bs = loss.shape[0]
+            assert presence_target.numel() == bs
+
+            mask = presence_target
+            nb_valid = presence_target.sum().item()
+
+            loss = (loss * mask.float()).sum() / (nb_valid + 1e-6)
+            loss_dice = (loss_dice * mask.float()).sum() / (nb_valid + 1e-6)
+
+        loss_dict.update(
+            {
+                "loss_semantic_seg": loss,
+                "loss_semantic_dice": loss_dice,
+                "miou_semantic_seg": miou,
+            }
+        )
+
+        return loss_dict
+
+
+class Det2TrkAssoc(LossWithWeights):
+    def __init__(
+        self,
+        weight_dict,
+        use_fp_loss=False,
+        fp_loss_on_exhaustive_only=True,
+        treat_fp_as_new_obj=False,
+    ):
+        super().__init__(weight_dict, compute_aux=False)
+        self.use_fp_loss = use_fp_loss
+        self.fp_loss_on_exhaustive_only = fp_loss_on_exhaustive_only
+        self.treat_fp_as_new_obj = treat_fp_as_new_obj
+        if self.use_fp_loss:
+            self.target_keys.append("is_exhaustive")
+
+    def get_loss(self, outputs, targets, indices, num_boxes):
+        det2trk_assoc_logits = outputs["det2trk_assoc_logits"]
+        device = det2trk_assoc_logits.device
+        B, Q_det, Q_trk_plus_2 = det2trk_assoc_logits.shape
+        assert Q_trk_plus_2 >= 2
+        Q_trk = Q_trk_plus_2 - 2
+
+        # We only apply association losses to those detection queries that either match
+        # a GT instance or have score > 0 (i.e. those TP, FN and FP detection queries)
+        matched_object_ids = outputs["matched_object_ids"]
+        assert matched_object_ids.shape == (B, Q_det + Q_trk)
+        matched_obj_ids_det = matched_object_ids[:, :Q_det]
+        matched_obj_ids_trk = matched_object_ids[:, Q_det:]
+        det_is_matched_to_gt = matched_obj_ids_det >= 0
+        trk_is_matched_to_gt = matched_obj_ids_trk >= 0
+
+        # note: -1 label is ignored in the (softmax) cross_entropy loss below
+        det2trk_assoc_labels = -torch.ones(B, Q_det, dtype=torch.long, device=device)
+        # a) If a detection query is matched to a same object ID as a tracking query,
+        # we assign it the index of the tracking query as a label
+        det_is_same_obj_id_as_trk = (
+            det_is_matched_to_gt[:, :, None]
+            & trk_is_matched_to_gt[:, None, :]
+            & (matched_obj_ids_det[:, :, None] == matched_obj_ids_trk[:, None, :])
+        )
+        batch_idx, det_idx, trk_idx = det_is_same_obj_id_as_trk.nonzero(as_tuple=True)
+        det2trk_assoc_labels[batch_idx, det_idx] = trk_idx
+
+        # b) If a detection query is matched to GT but not to any tracking query,
+        # we assign it a "new_object" label
+        det_is_new_obj = det_is_matched_to_gt & ~det_is_same_obj_id_as_trk.any(dim=-1)
+        det2trk_assoc_labels[det_is_new_obj] = Q_trk
+
+        # c) If a detection query is not matched to GT but have score > 0,
+        # we assign it a "false_positive" label
+        if self.use_fp_loss:
+            det_is_above_thresh = outputs["pred_logits"][:, :Q_det].squeeze(2) > 0
+            det_is_fp = ~det_is_matched_to_gt & det_is_above_thresh
+            if self.treat_fp_as_new_obj:
+                det2trk_assoc_labels[det_is_fp] = Q_trk
+            else:
+                if self.fp_loss_on_exhaustive_only:
+                    # only count FP detections on batches that are exhaustively annotated
+                    det_is_fp &= targets["is_exhaustive"].unsqueeze(1).bool()
+                det2trk_assoc_labels[det_is_fp] = Q_trk + 1
+
+        # softmax cross-entropy loss for detection-to-tracking association
+        loss_det2trk_assoc = F.cross_entropy(
+            input=det2trk_assoc_logits.flatten(0, 1),  # (B * Q_det, Q_trk + 2)
+            target=det2trk_assoc_labels.flatten(0, 1),  # (B * Q_det)
+            ignore_index=-1,
+            reduction="none",
+        ).view(B, Q_det)
+        # skip det2trk assocation loss on frames w/o any (non-padding) tracking queries
+        frame_has_valid_trk = trk_is_matched_to_gt.any(dim=-1, keepdims=True)  # (B, 1)
+        loss_det2trk_assoc = loss_det2trk_assoc * frame_has_valid_trk.float()
+
+        loss_det2trk_assoc = loss_det2trk_assoc.sum() / (B * num_boxes)
+        return {"loss_det2trk_assoc": loss_det2trk_assoc}
+
+
+class TrackingByDetectionAssoc(LossWithWeights):
+    def __init__(self, weight_dict):
+        super().__init__(weight_dict, compute_aux=False, supports_o2m_loss=False)
+        assert "loss_det2trk_assoc" in self.weight_dict
+        assert "loss_trk2det_assoc" in self.weight_dict
+
+    def get_loss(self, outputs, targets, indices, num_boxes):
+        # Part A: gather object id matching between detection and tracking
+        det2trk_assoc_logits = outputs["det2trk_assoc_logits"]  # (B, Q_det+1, Q_trk+1)
+        B, Q_det_plus_1, Q_trk_plus_1 = det2trk_assoc_logits.shape
+        assert Q_det_plus_1 >= 1 and Q_trk_plus_1 >= 1
+        Q_det = Q_det_plus_1 - 1
+        Q_trk = Q_trk_plus_1 - 1
+        device = det2trk_assoc_logits.device
+
+        matched_obj_ids_det = outputs["matched_object_ids"]
+        assert matched_obj_ids_det.shape == (B, Q_det)
+        det_is_matched_to_gt = matched_obj_ids_det >= 0
+        matched_obj_ids_trk = outputs["prev_trk_object_ids"]
+        assert matched_obj_ids_trk.shape == (B, Q_trk)
+        trk_is_matched_to_gt = matched_obj_ids_trk >= 0
+        frame_has_valid_trk = trk_is_matched_to_gt.any(dim=-1, keepdims=True)  # (B, 1)
+
+        # check whether a detection object is the same as a tracking object
+        det_is_same_obj_id_as_trk = (
+            det_is_matched_to_gt[:, :, None]
+            & trk_is_matched_to_gt[:, None, :]
+            & (matched_obj_ids_det[:, :, None] == matched_obj_ids_trk[:, None, :])
+        )  # (B, Q_det, Q_trk)
+        # there should be at most one match for each detection and each previous tracked object
+        torch._assert_async(torch.all(det_is_same_obj_id_as_trk.sum(dim=2) <= 1))
+        torch._assert_async(torch.all(det_is_same_obj_id_as_trk.sum(dim=1) <= 1))
+        batch_idx, det_idx, trk_idx = det_is_same_obj_id_as_trk.nonzero(as_tuple=True)
+
+        # Part B: Detection-to-tracking association loss
+        # assign detection-to-tracking labels (note: -1 label is ignored in the loss below)
+        det2trk_assoc_labels = -torch.ones(B, Q_det, dtype=torch.long, device=device)
+        det2trk_assoc_labels[batch_idx, det_idx] = trk_idx
+        # if a detection is matched to GT but not to any tracking, assign it a "new-object" label
+        det_is_new_obj = det_is_matched_to_gt & ~det_is_same_obj_id_as_trk.any(dim=2)
+        det2trk_assoc_labels[det_is_new_obj] = Q_trk  # "Q_trk" label is "new-object"
+
+        # softmax cross-entropy loss for detection-to-tracking association
+        loss_det2trk_assoc = F.cross_entropy(
+            input=det2trk_assoc_logits[:, :-1].flatten(0, 1),  # (B*Q_det, Q_trk+1)
+            target=det2trk_assoc_labels.flatten(0, 1),  # (B*Q_det)
+            ignore_index=-1,
+            reduction="none",
+        ).view(B, Q_det)
+        # skip det2trk assocation loss on frames w/o any (non-padding) tracking queries
+        loss_det2trk_assoc = loss_det2trk_assoc * frame_has_valid_trk.float()
+        loss_det2trk_assoc = loss_det2trk_assoc.sum() / (B * num_boxes)
+        loss_dict = {"loss_det2trk_assoc": loss_det2trk_assoc}
+
+        # Part C: tracking-to-detection association loss
+        trk2det_assoc_logits = det2trk_assoc_logits.transpose(1, 2)
+        assert trk2det_assoc_logits.shape == (B, Q_trk + 1, Q_det + 1)
+        # assign tracking-to-detection labels (note: -1 label is ignored in the loss below)
+        trk2det_assoc_labels = -torch.ones(B, Q_trk, dtype=torch.long, device=device)
+        trk2det_assoc_labels[batch_idx, trk_idx] = det_idx
+        # if a tracking is matched to GT but not to any detection, assign it a "occluded" label
+        trk_is_occluded = trk_is_matched_to_gt & ~det_is_same_obj_id_as_trk.any(dim=1)
+        trk2det_assoc_labels[trk_is_occluded] = Q_det  # "Q_det" label is "occluded"
+
+        # softmax cross-entropy loss for tracking-to-detection association
+        loss_trk2det_assoc = F.cross_entropy(
+            input=trk2det_assoc_logits[:, :-1].flatten(0, 1),  # (B*Q_trk, Q_det+1)
+            target=trk2det_assoc_labels.flatten(0, 1),  # (B*Q_trk)
+            ignore_index=-1,
+            reduction="none",
+        ).view(B, Q_trk)
+        # skip trk2det association loss on frames w/o any (non-padding) tracking queries
+        loss_trk2det_assoc = loss_trk2det_assoc * frame_has_valid_trk.float()
+        loss_trk2det_assoc = loss_trk2det_assoc.sum() / (B * num_boxes)
+        loss_dict["loss_trk2det_assoc"] = loss_trk2det_assoc
+
+        return loss_dict
+
+
+def _keep_only_trk_queries_in_match_inds(inds, Q_det):
+    """Keep only the tracking query indices in the indices tuple"""
+    batch_idx, src_idx, tgt_idx = inds
+    if batch_idx.numel() == 0:
+        return (batch_idx, src_idx, tgt_idx)  # empty indices, nothing to filter
+
+    # keep only the tracking query indices
+    is_trk_query = src_idx >= Q_det
+    batch_idx_trk = batch_idx[is_trk_query]
+    src_idx_trk = src_idx[is_trk_query]
+    tgt_idx_trk = tgt_idx[is_trk_query] if tgt_idx is not None else None
+    return (batch_idx_trk, src_idx_trk, tgt_idx_trk)
diff --git a/sam3/train/loss/mask_sampling.py b/sam3/train/loss/mask_sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..aeba3fe7ff3ede863a3b3135b036220074010243
--- /dev/null
+++ b/sam3/train/loss/mask_sampling.py
@@ -0,0 +1,113 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+from typing import Callable
+
+import torch
+from torch.nn import functional as F
+
+
+# Adapted from https://github.com/facebookresearch/detectron2/blob/main/projects/PointRend/point_rend/point_features.py
+def point_sample(input, point_coords, **kwargs):
+    """
+    A wrapper around :function:`torch.nn.functional.grid_sample` to support 3D point_coords tensors.
+    Unlike :function:`torch.nn.functional.grid_sample` it assumes `point_coords` to lie inside
+    [0, 1] x [0, 1] square.
+
+    Args:
+        input (Tensor): A tensor of shape (N, C, H, W) that contains features map on a H x W grid.
+        point_coords (Tensor): A tensor of shape (N, P, 2) or (N, Hgrid, Wgrid, 2) that contains
+        [0, 1] x [0, 1] normalized point coordinates.
+
+    Returns:
+        output (Tensor): A tensor of shape (N, C, P) or (N, C, Hgrid, Wgrid) that contains
+            features for points in `point_coords`. The features are obtained via bilinear
+            interplation from `input` the same way as :function:`torch.nn.functional.grid_sample`.
+    """
+    add_dim = False
+    if point_coords.dim() == 3:
+        add_dim = True
+        point_coords = point_coords.unsqueeze(2)
+    normalized_point_coords = 2.0 * point_coords - 1.0  # Normalize to [-1,1]
+    output = F.grid_sample(input, normalized_point_coords, **kwargs)
+    if add_dim:
+        output = output.squeeze(3)
+    return output
+
+
+# Adapted from https://github.com/facebookresearch/detectron2/blob/main/projects/PointRend/point_rend/point_features.py
+def get_uncertain_point_coords_with_randomness(
+    logits: torch.Tensor,
+    uncertainty_func: Callable,
+    num_points: int,
+    oversample_ratio: int,
+    importance_sample_ratio: float,
+) -> torch.Tensor:
+    """
+    Sample points in [0, 1] x [0, 1] coordinate space based on their uncertainty. The unceratinties
+        are calculated for each point using 'uncertainty_func' function that takes point's logit
+        prediction as input.
+    See PointRend paper for details.
+
+    Args:
+        logits (Tensor): A tensor of shape (N, C, Hmask, Wmask) or (N, 1, Hmask, Wmask) for
+            class-specific or class-agnostic prediction.
+        uncertainty_func: A function that takes a Tensor of shape (N, C, P) or (N, 1, P) that
+            contains logit predictions for P points and returns their uncertainties as a Tensor of
+            shape (N, 1, P).
+        num_points (int): The number of points P to sample.
+        oversample_ratio (int): Oversampling parameter.
+        importance_sample_ratio (float): Ratio of points that are sampled via importnace sampling.
+
+    Returns:
+        point_coords (Tensor): A tensor of shape (N, P, 2) that contains the coordinates of P
+            sampled points.
+    """
+    assert oversample_ratio >= 1
+    assert importance_sample_ratio <= 1 and importance_sample_ratio >= 0
+    num_boxes = logits.shape[0]
+    num_sampled = int(num_points * oversample_ratio)
+    point_coords = torch.rand(num_boxes, num_sampled, 2, device=logits.device)
+    point_logits = point_sample(logits, point_coords, align_corners=False)
+    # It is crucial to calculate uncertainty based on the sampled prediction value for the points.
+    # Calculating uncertainties of the predictions first and sampling them for points leads
+    # to incorrect results.
+    # To illustrate this: assume uncertainty_func(logits)=-abs(logits), a sampled point between
+    # two predictions with -1 and 1 logits has 0 logits, and therefore 0 uncertainty value.
+    # However, if we calculate uncertainties for the predictions first,
+    # both will have -1 uncertainty, and the sampled point will get -1 uncertainty.
+    point_uncertainties = uncertainty_func(point_logits)
+    num_uncertain_points = int(importance_sample_ratio * num_points)
+    num_random_points = num_points - num_uncertain_points
+    idx = torch.topk(point_uncertainties[:, 0, :], k=num_uncertain_points, dim=1)[1]
+    # Flatten the indices
+    shift = num_sampled * torch.arange(
+        num_boxes, dtype=torch.long, device=logits.device
+    )
+    idx += shift[:, None]
+    point_coords = point_coords.view(-1, 2)[idx.view(-1), :].view(
+        num_boxes, num_uncertain_points, 2
+    )
+    if num_random_points > 0:
+        point_coords = torch.cat(
+            [
+                point_coords,
+                torch.rand(num_boxes, num_random_points, 2, device=logits.device),
+            ],
+            dim=1,
+        )
+    return point_coords
+
+
+# Adapted from https://github.com/facebookresearch/Mask2Former/blob/main/mask2former/modeling/criterion.py
+def calculate_uncertainty(logits: torch.Tensor) -> torch.Tensor:
+    """
+    Estimates uncerainty as L1 distance between 0.0 and the logit prediction.
+    Args:
+        logits (Tensor): A tensor of shape (R, 1, ...) for class-agnostic
+        predicted masks
+    Returns:
+        scores (Tensor): A tensor of shape (R, 1, ...) that contains uncertainty scores with
+            the most uncertain locations having the highest uncertainty score.
+    """
+    assert logits.shape[1] == 1
+    return -(torch.abs(logits))
diff --git a/sam3/train/loss/sam3_loss.py b/sam3/train/loss/sam3_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ef59dc9e00206aa51efb44391e81ea5d3d665e9
--- /dev/null
+++ b/sam3/train/loss/sam3_loss.py
@@ -0,0 +1,203 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import torch
+
+from sam3.model.model_misc import SAM3Output
+
+from sam3.train.utils.distributed import get_world_size
+
+from .loss_fns import CORE_LOSS_KEY, Det2TrkAssoc, Masks
+
+
+class DummyLoss(torch.nn.Module):
+    """A dummy loss that always returns 0 (as a placeholder for eval)"""
+
+    def __init__(
+        self,
+        core_loss_key: str = CORE_LOSS_KEY,
+        device: str = "cuda",
+        **kwargs,
+    ):
+        super().__init__()
+        self.core_loss_key = core_loss_key
+        self.device = torch.device(device)
+
+    def forward(self, *args, **kwargs):
+        return {self.core_loss_key: torch.tensor(0.0, device=self.device)}
+
+    def accumulate(self, out_dict):
+        """
+        Called by iterative losses.
+        """
+        if self.core_loss_key not in out_dict:
+            out_dict[self.core_loss_key] = torch.tensor(0.0, device=self.device)
+        return out_dict
+
+
+class Sam3LossWrapper(torch.nn.Module):
+    def __init__(
+        self,
+        loss_fns_find,
+        normalization="global",
+        matcher=None,
+        o2m_matcher=None,
+        o2m_weight=1.0,
+        use_o2m_matcher_on_o2m_aux=True,
+        loss_fn_semantic_seg=None,
+        normalize_by_valid_object_num=False,
+        normalize_by_stage_num=False,
+        scale_by_find_batch_size=False,
+    ):
+        super().__init__()
+        self.loss_fns_find = loss_fns_find
+        assert normalization in ["global", "local", "none"]
+        self.normalization = normalization
+        self.normalize_by_valid_object_num = normalize_by_valid_object_num
+        self.normalize_by_stage_num = normalize_by_stage_num
+        self.matcher = matcher
+        self.o2m_matcher = o2m_matcher
+        self.o2m_weight = o2m_weight
+        # whether to use the o2m matcher on the o2m queries in auxiliary outputs
+        self.use_o2m_matcher_on_o2m_aux = use_o2m_matcher_on_o2m_aux
+        self.loss_fn_semantic_seg = loss_fn_semantic_seg
+        self.scale_by_find_batch_size = scale_by_find_batch_size
+
+    def _get_num_boxes(self, targets):
+        # the average number of target boxes for loss normalization
+        if self.normalize_by_valid_object_num:
+            # valid boxes are those with non-zero height and width
+            # (while padded invisible boxes are )
+            boxes_hw = targets["boxes"].view(-1, 4)  # cx, cy, w, h
+            num_boxes = (boxes_hw[:, 2:] > 0).all(dim=-1).sum().float()
+        else:
+            num_boxes = targets["num_boxes"].sum().float()
+        if self.normalization == "global":
+            torch.distributed.all_reduce(num_boxes)
+            num_boxes = torch.clamp(num_boxes / get_world_size(), min=1)
+        elif self.normalization == "local":
+            num_boxes = torch.clamp(num_boxes, min=1)
+        elif self.normalization == "none":
+            num_boxes = 1
+        return num_boxes
+
+    def compute_loss(self, nested_out, targets):
+        num_boxes = self._get_num_boxes(targets)
+        o2m_out_is_valid = nested_out.get("o2m_out_is_valid", None)
+        o2m_target_is_valid_padded = nested_out.get("o2m_target_is_valid_padded", None)
+
+        # Get a list of outputs, including auxiliary and first stage outputs
+        output_list = [(nested_out, "", False)]  # (out, suffix, is_aux)
+        if "aux_outputs" in nested_out:
+            output_list.extend(
+                (aux_out, f"_aux_{i}", True)
+                for i, aux_out in enumerate(nested_out["aux_outputs"])
+            )
+        if "first_stage" in nested_out:
+            output_list.append((nested_out["first_stage"], "_fs", True))
+
+        # Compute all the requested losses
+        losses = {}
+        total_core_loss = 0.0
+        for out, suffix, is_aux in output_list:
+            # o2o matcher indices need to be computed by the model (as the video model requires
+            # a specific way of matching free and locked indices beyond just calling the matcher)
+            indices = out["indices"]
+            has_o2m_out = "pred_logits_o2m" in out
+            if has_o2m_out:
+                o2m_out = {
+                    k[: -len("_o2m")]: v for k, v in out.items() if k.endswith("_o2m")
+                }
+                # o2m targets are the same as the o2o targets (assuming repeat=1)
+                o2m_targets = targets
+                if self.use_o2m_matcher_on_o2m_aux or not is_aux:
+                    o2m_indices = self.o2m_matcher(
+                        o2m_out,
+                        o2m_targets,
+                        out_is_valid=o2m_out_is_valid,
+                        target_is_valid_padded=o2m_target_is_valid_padded,
+                    )
+                else:
+                    o2m_indices = self.matcher(
+                        o2m_out,
+                        o2m_targets,
+                        out_is_valid=o2m_out_is_valid,
+                        target_is_valid_padded=o2m_target_is_valid_padded,
+                    )
+
+            for loss_fn in self.loss_fns_find:
+                l_dict = loss_fn(
+                    outputs=out,
+                    targets=targets,
+                    indices=indices,
+                    num_boxes=num_boxes,
+                    is_aux=is_aux,
+                )
+                total_core_loss += l_dict.pop(CORE_LOSS_KEY)
+                losses.update({f"{k}{suffix}": v for k, v in l_dict.items()})
+
+                compute_o2m_loss = has_o2m_out
+                # a special handling to allow turning off mask loss in o2m
+                # (to be compatible with the original implementation)
+                if isinstance(loss_fn, Masks):
+                    compute_o2m_loss = compute_o2m_loss and "pred_masks" in o2m_out
+                if isinstance(loss_fn, Det2TrkAssoc):
+                    compute_o2m_loss = False  # Det2TrkAssoc does not support o2m
+                if compute_o2m_loss:
+                    l_dict = loss_fn(
+                        outputs=o2m_out,
+                        targets=o2m_targets,
+                        indices=o2m_indices,
+                        num_boxes=num_boxes,
+                        is_aux=is_aux,
+                    )
+                    for k in l_dict:
+                        l_dict[k] *= self.o2m_weight
+                    total_core_loss += l_dict.pop(CORE_LOSS_KEY)
+                    losses.update({f"{k}{suffix}_o2m": v for k, v in l_dict.items()})
+
+        losses[CORE_LOSS_KEY] = total_core_loss
+        return losses
+
+    def forward(self, find_stages: SAM3Output, find_targets):
+        if find_stages.loss_stages is not None:
+            find_targets = [find_targets[i] for i in find_stages.loss_stages]
+        with SAM3Output.iteration_mode(
+            find_stages, iter_mode=SAM3Output.IterMode.ALL_STEPS_PER_STAGE
+        ) as find_stages:
+            assert len(find_stages) == len(find_targets)
+            total_losses = {}
+            for stage_outputs, stage_targets in zip(find_stages, find_targets):
+                stage_targets = [stage_targets] * len(stage_outputs)
+                # If there are multiple steps within a stage, compute the loss for all of them (e.g. interactivity)
+                for outputs, targets in zip(stage_outputs, stage_targets):
+                    cur_losses = self.compute_loss(outputs, targets)
+
+                    if self.loss_fn_semantic_seg is not None:
+                        cur_losses_semantic = self.loss_fn_semantic_seg(
+                            outputs, targets
+                        )
+                        cur_losses[CORE_LOSS_KEY] += cur_losses_semantic.pop(
+                            CORE_LOSS_KEY
+                        )
+                        # make sure the semantic losses don't overlap with the find losses
+                        assert set(cur_losses).isdisjoint(set(cur_losses_semantic))
+                        cur_losses.update(cur_losses_semantic)
+
+                    # Optionally, normalize the loss by the number of find stages (training video frames) so that
+                    # image batches and video batches have similar loss scales. (Otherwise video batches would
+                    # have a much higher loss scale due to summing the losses over all the find stages.)
+                    if self.normalize_by_stage_num:
+                        cur_losses[CORE_LOSS_KEY] /= len(find_stages)
+
+                    if self.scale_by_find_batch_size:
+                        bs = targets["num_boxes"].shape[0]
+                        # sqrt scaling based on the "effective" batch size
+                        cur_losses[CORE_LOSS_KEY] *= bs**0.5
+
+                    for k, v in cur_losses.items():
+                        if k not in total_losses:
+                            total_losses[k] = v
+                        else:
+                            total_losses[k] += v
+
+        return total_losses
diff --git a/sam3/train/loss/sigmoid_focal_loss.py b/sam3/train/loss/sigmoid_focal_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..15e6db43d48488f35d5ba28ea13d47348315ff46
--- /dev/null
+++ b/sam3/train/loss/sigmoid_focal_loss.py
@@ -0,0 +1,321 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""Triton kernel for faster and memory efficient sigmoid focal loss"""
+
+import torch
+import triton
+import triton.language as tl
+from torch._inductor.runtime.triton_helpers import libdevice
+
+"""
+
+The sigmoid focal loss is defined as:
+
+    prob = inputs.sigmoid()
+    ce_loss = F.binary_cross_entropy_with_logits(inputs, targets, reduction="none")
+    p_t = prob * targets + (1 - prob) * (1 - targets)
+    alpha_t = alpha * targets + (1 - alpha) * (1 - targets)
+    loss = alpha_t * ce_loss * ((1 - p_t) ** gamma)
+
+Where alpha and gamma are scalar parameters, inputs are the logits, targets the float targets.
+
+We implement two versions of the sigmoid focal loss: with and without sum reduction.
+The latter is implemented with built-in reduction to avoid materializing wrt the output of the loss.
+This can help save a bit of peak memory.
+
+The reduction version is implemented using somewhat of a hack. Pytorch's generated kernels usually do the point-wise operation in a first kernel, and implement the reduction another kernel launched on a grid of size 1, where the reduction happens as a for loop in the triton kernel.
+Since we want to fuse those two kernels, that is not a good idea: we'd have to launch the overall kernel on a grid of size 1, which is obviously inefficient.
+On the other hand, typical CUDA algorithms for reduction (eg reduction tree) are hard to implement in triton due to the lack of thread sync primitives.
+We settle for a version that abuses triton's atomic_add: we can have all threads simply add to the same location.
+In practice, this is not good, since it creates a massive bottleneck on the semaphore for that single memory location. So instead, we create M reduction locations. Each thread will simply write to thread_id%M. The python code can finally sum over the M reductions.
+M = 32 works fine in benchmarking tests. The forward is a tiny bit slower compared to the non-reduced kernel, but the backward breaks even due to one less memory allocation.
+"""
+
+
+@triton.jit
+def _inner_focal_loss_fwd(inputs, targets, alpha, gamma):
+    inv_targets = 1 - targets
+    # Sigmoid
+    sig = tl.sigmoid(inputs)
+
+    # Binary cross entropy with logits
+    # In practice, we want the following:
+    # bce_loss = -targets * tl.log(sig) - (1 - targets) * tl.log(1 - sig)
+    # However, the above is not numerically stable.
+    # We're also not directly taking the sum here, so the usual log-sum-exp trick doesn't apply
+    # The bce can be reformulated, after algebraic manipulation, to
+    # bce_loss = log(1 + exp(-x)) + x * (1-y)
+    # This is still not stable, because for large (-x) the exponential will blow up.
+    # We'll use the following alternate formulation:
+    # bce_loss = max(x, 0) - x * y + log(1 + exp(-abs(x)))
+    # Let's show that it's equivalent:
+    # Case x>=0: abs(x) = x , max(x, 0) = x
+    # so we get x - x * y + log(1 + exp(-x)) which is equivalent
+    # Case x<0: abs(x) = -x, max(x, 0) = 0
+    # we have log(1 + exp(-abs(x))) = log(1 + exp(x)) = log(exp(x)(1 + exp(-x))) = x+log(1 + exp(-x))
+    # plugging it in, we get
+    # 0 - x * y + x + log(1 + exp(-x)), which is also equivalent
+    # Note that this is stable because now the exponent are guaranteed to be below 0.
+    max_val = tl.clamp(inputs, min=0, max=1e9)
+    bce_loss = max_val - inputs * targets + tl.log(1 + tl.exp(-tl.abs(inputs)))
+
+    # Modulating factor
+    p_t = sig * targets + (1 - sig) * inv_targets
+    mod_factor = libdevice.pow(1 - p_t, gamma)
+
+    # Alpha factor
+    alpha_t = alpha * targets + (1 - alpha) * inv_targets
+
+    # Final loss calculation
+    return alpha_t * mod_factor * bce_loss
+
+
+# Non-reduced version
+@triton.jit
+def sigmoid_focal_loss_fwd_kernel(
+    inputs_ptr,
+    targets_ptr,
+    loss_ptr,
+    alpha: float,
+    gamma: float,
+    n_elements: int,
+    BLOCK_SIZE: tl.constexpr,
+):
+    pid = tl.program_id(axis=0)
+    block_start = pid * BLOCK_SIZE
+    offset = block_start + tl.arange(0, BLOCK_SIZE)
+    mask = offset < n_elements
+
+    # Load data
+    inputs = tl.load(inputs_ptr + offset, mask=mask).to(tl.float32)
+    targets = tl.load(targets_ptr + offset, mask=mask)
+
+    final_loss = _inner_focal_loss_fwd(inputs, targets, alpha, gamma)
+
+    # Store result
+    tl.store(loss_ptr + offset, final_loss, mask=mask)
+
+
+# version with reduction
+@triton.jit
+def sigmoid_focal_loss_fwd_kernel_reduce(
+    inputs_ptr,
+    targets_ptr,
+    loss_ptr,
+    alpha: float,
+    gamma: float,
+    n_elements: int,
+    BLOCK_SIZE: tl.constexpr,
+    REDUCE_SIZE: tl.constexpr,
+):
+    pid = tl.program_id(axis=0)
+    block_start = pid * BLOCK_SIZE
+    reduce_loc = pid % REDUCE_SIZE
+    offset = block_start + tl.arange(0, BLOCK_SIZE)
+    mask = offset < n_elements
+    # Load data
+    inputs = tl.load(inputs_ptr + offset, mask=mask).to(tl.float32)
+    targets = tl.load(targets_ptr + offset, mask=mask)
+
+    final_loss = _inner_focal_loss_fwd(inputs, targets, alpha, gamma) * mask
+
+    fl = tl.sum(final_loss)
+
+    # Store result
+    tl.atomic_add(loss_ptr + reduce_loc, fl)
+
+
+@triton.jit
+def _inner_focal_loss_bwd(inputs, targets, alpha, gamma):
+    inv_targets = 1 - targets
+
+    # Recompute forward
+    max_val = tl.clamp(inputs, min=0, max=1e9)
+    bce_loss = max_val - inputs * targets + tl.log(1 + tl.exp(-tl.abs(inputs)))
+
+    # Sigmoid
+    sig = tl.sigmoid(inputs)
+    inv_sig = 1 - sig
+
+    # Modulating factor
+    p_t = sig * targets + inv_sig * inv_targets
+    tmp = libdevice.pow(1 - p_t, gamma - 1)
+    mod_factor = tmp * (1 - p_t)
+
+    # Alpha factor
+    alpha_t = alpha * targets + (1 - alpha) * inv_targets
+
+    # Now computing the derivatives
+    d_pt = (2 * targets - 1) * sig * inv_sig
+    d_mod_factor = -gamma * d_pt * tmp
+
+    d_bce_loss = sig - targets
+
+    return alpha_t * (d_bce_loss * mod_factor + d_mod_factor * bce_loss)
+
+
+@triton.jit
+def sigmoid_focal_loss_bwd_kernel(
+    inputs_ptr,
+    targets_ptr,
+    grad_inputs_ptr,
+    grad_out_ptr,
+    alpha: float,
+    gamma: float,
+    n_elements: int,
+    BLOCK_SIZE: tl.constexpr,
+):
+    pid = tl.program_id(axis=0)
+    block_start = pid * BLOCK_SIZE
+    offset = block_start + tl.arange(0, BLOCK_SIZE)
+    mask = offset < n_elements
+    input_ptrs = inputs_ptr + offset
+    target_ptrs = targets_ptr + offset
+    grad_input_ptrs = grad_inputs_ptr + offset
+    grad_out_ptrs = grad_out_ptr + offset
+    # Load data
+    inputs = tl.load(input_ptrs, mask=mask).to(tl.float32)
+    targets = tl.load(target_ptrs, mask=mask)
+    grad_out = tl.load(grad_out_ptrs, mask=mask)
+    d_loss = grad_out * _inner_focal_loss_bwd(inputs, targets, alpha, gamma)
+    tl.store(grad_input_ptrs, d_loss, mask=mask)
+
+
+@triton.jit
+def sigmoid_focal_loss_bwd_kernel_reduce(
+    inputs_ptr,
+    targets_ptr,
+    grad_inputs_ptr,
+    grad_out_ptr,
+    alpha: float,
+    gamma: float,
+    n_elements: int,
+    BLOCK_SIZE: tl.constexpr,
+):
+    # The only difference is that the gradient is now a single scalar
+    pid = tl.program_id(axis=0)
+    block_start = pid * BLOCK_SIZE
+    offset = block_start + tl.arange(0, BLOCK_SIZE)
+    mask = offset < n_elements
+    input_ptrs = inputs_ptr + offset
+    target_ptrs = targets_ptr + offset
+    grad_input_ptrs = grad_inputs_ptr + offset
+    # Load data
+    inputs = tl.load(input_ptrs, mask=mask).to(tl.float32)
+    targets = tl.load(target_ptrs, mask=mask)
+    grad_out = tl.load(grad_out_ptr)
+    d_loss = grad_out * _inner_focal_loss_bwd(inputs, targets, alpha, gamma)
+    tl.store(grad_input_ptrs, d_loss, mask=mask)
+
+
+class SigmoidFocalLoss(torch.autograd.Function):
+    BLOCK_SIZE = 256
+
+    @staticmethod
+    def forward(ctx, inputs, targets, alpha=0.25, gamma=2):
+        n_elements = inputs.numel()
+        assert targets.numel() == n_elements
+        input_shape = inputs.shape
+        inputs = inputs.view(-1).contiguous()
+        targets = targets.view(-1).contiguous()
+        loss = torch.empty(inputs.shape, dtype=torch.float32, device=inputs.device)
+        grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
+        sigmoid_focal_loss_fwd_kernel[grid](
+            inputs, targets, loss, alpha, gamma, n_elements, SigmoidFocalLoss.BLOCK_SIZE
+        )
+        ctx.save_for_backward(inputs.view(input_shape), targets.view(input_shape))
+        ctx.alpha = alpha
+        ctx.gamma = gamma
+        return loss.view(input_shape)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        inputs, targets = ctx.saved_tensors
+        alpha = ctx.alpha
+        gamma = ctx.gamma
+        n_elements = inputs.numel()
+        input_shape = inputs.shape
+        grad_inputs = torch.empty(
+            inputs.shape, dtype=grad_output.dtype, device=grad_output.device
+        )
+        inputs_ptr = inputs.view(-1).contiguous()
+        targets_ptr = targets.view(-1).contiguous()
+        grad_output_ptr = grad_output.view(-1).contiguous()
+        grad_inputs_ptr = grad_inputs
+        assert grad_output.numel() == n_elements
+        grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
+        sigmoid_focal_loss_bwd_kernel[grid](
+            inputs_ptr,
+            targets_ptr,
+            grad_inputs_ptr,
+            grad_output_ptr,
+            alpha,
+            gamma,
+            n_elements,
+            SigmoidFocalLoss.BLOCK_SIZE,
+        )
+        return grad_inputs.view(input_shape), None, None, None
+
+
+triton_sigmoid_focal_loss = SigmoidFocalLoss.apply
+
+
+class SigmoidFocalLossReduced(torch.autograd.Function):
+    BLOCK_SIZE = 256
+    REDUCE_SIZE = 32
+
+    @staticmethod
+    def forward(ctx, inputs, targets, alpha=0.25, gamma=2):
+        n_elements = inputs.numel()
+        input_shape = inputs.shape
+        inputs = inputs.view(-1).contiguous()
+        targets = targets.view(-1).contiguous()
+        loss = torch.zeros(
+            SigmoidFocalLossReduced.REDUCE_SIZE,
+            device=inputs.device,
+            dtype=torch.float32,
+        )
+        grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
+        sigmoid_focal_loss_fwd_kernel_reduce[grid](
+            inputs,
+            targets,
+            loss,
+            alpha,
+            gamma,
+            n_elements,
+            SigmoidFocalLossReduced.BLOCK_SIZE,
+            SigmoidFocalLossReduced.REDUCE_SIZE,
+        )
+        ctx.save_for_backward(inputs.view(input_shape), targets.view(input_shape))
+        ctx.alpha = alpha
+        ctx.gamma = gamma
+        return loss.sum()
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        inputs, targets = ctx.saved_tensors
+        alpha = ctx.alpha
+        gamma = ctx.gamma
+        n_elements = inputs.numel()
+        input_shape = inputs.shape
+        grad_inputs = torch.empty(
+            inputs.shape, dtype=grad_output.dtype, device=grad_output.device
+        )
+        inputs_ptr = inputs.view(-1).contiguous()
+        targets_ptr = targets.reshape(-1).contiguous()
+        assert grad_output.numel() == 1
+        grid = lambda meta: (triton.cdiv(n_elements, meta["BLOCK_SIZE"]),)
+        sigmoid_focal_loss_bwd_kernel_reduce[grid](
+            inputs_ptr,
+            targets_ptr,
+            grad_inputs,
+            grad_output,
+            alpha,
+            gamma,
+            n_elements,
+            SigmoidFocalLossReduced.BLOCK_SIZE,
+        )
+        return grad_inputs.view(input_shape), None, None, None
+
+
+triton_sigmoid_focal_loss_reduce = SigmoidFocalLossReduced.apply
diff --git a/sam3/train/masks_ops.py b/sam3/train/masks_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9d2fd7c23396722a738a92393b92621943708dd
--- /dev/null
+++ b/sam3/train/masks_ops.py
@@ -0,0 +1,272 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""Utilities for masks manipulation"""
+
+import numpy as np
+import pycocotools.mask as maskUtils
+import torch
+from pycocotools import mask as mask_util
+
+
+def instance_masks_to_semantic_masks(
+    instance_masks: torch.Tensor, num_instances: torch.Tensor
+) -> torch.Tensor:
+    """This function converts instance masks to semantic masks.
+    It accepts a collapsed batch of instances masks (ie all instance masks are concatenated in a single tensor) and
+    the number of instances in each image of the batch.
+    It returns a mask with the same spatial dimensions as the input instance masks, where for each batch element the
+    semantic mask is the union of all the instance masks in the batch element.
+
+    If for a given batch element there are no instances (ie num_instances[i]==0), the corresponding semantic mask will be a tensor of zeros.
+
+    Args:
+        instance_masks (torch.Tensor): A tensor of shape (N, H, W) where N is the number of instances in the batch.
+        num_instances (torch.Tensor): A tensor of shape (B,) where B is the batch size. It contains the number of instances
+            in each image of the batch.
+
+    Returns:
+        torch.Tensor: A tensor of shape (B, H, W) where B is the batch size and H, W are the spatial dimensions of the
+            input instance masks.
+    """
+
+    masks_per_query = torch.split(instance_masks, num_instances.tolist())
+
+    return torch.stack([torch.any(masks, dim=0) for masks in masks_per_query], dim=0)
+
+
+def mask_intersection(masks1, masks2, block_size=16):
+    """Compute the intersection of two sets of masks, without blowing the memory"""
+
+    assert masks1.shape[1:] == masks2.shape[1:]
+    assert masks1.dtype == torch.bool and masks2.dtype == torch.bool
+
+    result = torch.zeros(
+        masks1.shape[0], masks2.shape[0], device=masks1.device, dtype=torch.long
+    )
+    for i in range(0, masks1.shape[0], block_size):
+        for j in range(0, masks2.shape[0], block_size):
+            intersection = (
+                (masks1[i : i + block_size, None] * masks2[None, j : j + block_size])
+                .flatten(-2)
+                .sum(-1)
+            )
+            result[i : i + block_size, j : j + block_size] = intersection
+    return result
+
+
+def mask_iom(masks1, masks2):
+    """
+    Similar to IoU, except the denominator is the area of the smallest mask
+    """
+    assert masks1.shape[1:] == masks2.shape[1:]
+    assert masks1.dtype == torch.bool and masks2.dtype == torch.bool
+
+    # intersection = (masks1[:, None] * masks2[None]).flatten(-2).sum(-1)
+    intersection = mask_intersection(masks1, masks2)
+    area1 = masks1.flatten(-2).sum(-1)
+    area2 = masks2.flatten(-2).sum(-1)
+    min_area = torch.min(area1[:, None], area2[None, :])
+    return intersection / (min_area + 1e-8)
+
+
+def compute_boundary(seg):
+    """
+    Adapted from https://github.com/JonathonLuiten/TrackEval/blob/master/trackeval/metrics/j_and_f.py#L148
+    Return a 1pix wide boundary of the given mask
+    """
+    assert seg.ndim >= 2
+    e = torch.zeros_like(seg)
+    s = torch.zeros_like(seg)
+    se = torch.zeros_like(seg)
+
+    e[..., :, :-1] = seg[..., :, 1:]
+    s[..., :-1, :] = seg[..., 1:, :]
+    se[..., :-1, :-1] = seg[..., 1:, 1:]
+
+    b = seg ^ e | seg ^ s | seg ^ se
+    b[..., -1, :] = seg[..., -1, :] ^ e[..., -1, :]
+    b[..., :, -1] = seg[..., :, -1] ^ s[..., :, -1]
+    b[..., -1, -1] = 0
+    return b
+
+
+def dilation(mask, kernel_size):
+    """
+    Implements the dilation operation. If the input is on cpu, we call the cv2 version.
+    Otherwise, we implement it using a convolution
+
+    The kernel is assumed to be a square kernel
+
+    """
+
+    assert mask.ndim == 3
+    kernel_size = int(kernel_size)
+    assert (
+        kernel_size % 2 == 1
+    ), f"Dilation expects a odd kernel size, got {kernel_size}"
+
+    if mask.is_cuda:
+        m = mask.unsqueeze(1).to(torch.float16)
+        k = torch.ones(1, 1, kernel_size, 1, dtype=m.dtype, device=m.device)
+
+        result = torch.nn.functional.conv2d(m, k, padding="same")
+        result = torch.nn.functional.conv2d(result, k.transpose(-1, -2), padding="same")
+        return result.view_as(mask) > 0
+
+    all_masks = mask.view(-1, mask.size(-2), mask.size(-1)).numpy().astype(np.uint8)
+    kernel = np.ones((kernel_size, kernel_size), dtype=np.uint8)
+
+    import cv2
+
+    processed = [torch.from_numpy(cv2.dilate(m, kernel)) for m in all_masks]
+    return torch.stack(processed).view_as(mask).to(mask)
+
+
+def compute_F_measure(
+    gt_boundary_rle, gt_dilated_boundary_rle, dt_boundary_rle, dt_dilated_boundary_rle
+):
+    """Adapted from https://github.com/JonathonLuiten/TrackEval/blob/master/trackeval/metrics/j_and_f.py#L207
+
+    Assumes the boundary and dilated boundaries have already been computed and converted to RLE
+    """
+    gt_match = maskUtils.merge([gt_boundary_rle, dt_dilated_boundary_rle], True)
+    dt_match = maskUtils.merge([dt_boundary_rle, gt_dilated_boundary_rle], True)
+
+    n_dt = maskUtils.area(dt_boundary_rle)
+    n_gt = maskUtils.area(gt_boundary_rle)
+    # % Compute precision and recall
+    if n_dt == 0 and n_gt > 0:
+        precision = 1
+        recall = 0
+    elif n_dt > 0 and n_gt == 0:
+        precision = 0
+        recall = 1
+    elif n_dt == 0 and n_gt == 0:
+        precision = 1
+        recall = 1
+    else:
+        precision = maskUtils.area(dt_match) / float(n_dt)
+        recall = maskUtils.area(gt_match) / float(n_gt)
+
+    # Compute F measure
+    if precision + recall == 0:
+        f_val = 0
+    else:
+        f_val = 2 * precision * recall / (precision + recall)
+
+    return f_val
+
+
+@torch.no_grad()
+def rle_encode(orig_mask, return_areas=False):
+    """Encodes a collection of masks in RLE format
+
+    This function emulates the behavior of the COCO API's encode function, but
+    is executed partially on the GPU for faster execution.
+
+    Args:
+        mask (torch.Tensor): A mask of shape (N, H, W) with dtype=torch.bool
+        return_areas (bool): If True, add the areas of the masks as a part of
+            the RLE output dict under the "area" key. Default is False.
+
+    Returns:
+        str: The RLE encoded masks
+    """
+    assert orig_mask.ndim == 3, "Mask must be of shape (N, H, W)"
+    assert orig_mask.dtype == torch.bool, "Mask must have dtype=torch.bool"
+
+    if orig_mask.numel() == 0:
+        return []
+
+    # First, transpose the spatial dimensions.
+    # This is necessary because the COCO API uses Fortran order
+    mask = orig_mask.transpose(1, 2)
+
+    # Flatten the mask
+    flat_mask = mask.reshape(mask.shape[0], -1)
+    if return_areas:
+        mask_areas = flat_mask.sum(-1).tolist()
+    # Find the indices where the mask changes
+    differences = torch.ones(
+        mask.shape[0], flat_mask.shape[1] + 1, device=mask.device, dtype=torch.bool
+    )
+    differences[:, 1:-1] = flat_mask[:, :-1] != flat_mask[:, 1:]
+    differences[:, 0] = flat_mask[:, 0]
+    _, change_indices = torch.where(differences)
+
+    try:
+        boundaries = torch.cumsum(differences.sum(-1), 0).cpu()
+    except RuntimeError as _:
+        boundaries = torch.cumsum(differences.cpu().sum(-1), 0)
+
+    change_indices_clone = change_indices.clone()
+    # First pass computes the RLEs on GPU, in a flatten format
+    for i in range(mask.shape[0]):
+        # Get the change indices for this batch item
+        beg = 0 if i == 0 else boundaries[i - 1].item()
+        end = boundaries[i].item()
+        change_indices[beg + 1 : end] -= change_indices_clone[beg : end - 1]
+
+    # Now we can split the RLES of each batch item, and convert them to strings
+    # No more gpu at this point
+    change_indices = change_indices.tolist()
+
+    batch_rles = []
+    # Process each mask in the batch separately
+    for i in range(mask.shape[0]):
+        beg = 0 if i == 0 else boundaries[i - 1].item()
+        end = boundaries[i].item()
+        run_lengths = change_indices[beg:end]
+
+        uncompressed_rle = {"counts": run_lengths, "size": list(orig_mask.shape[1:])}
+        h, w = uncompressed_rle["size"]
+        rle = mask_util.frPyObjects(uncompressed_rle, h, w)
+        rle["counts"] = rle["counts"].decode("utf-8")
+        if return_areas:
+            rle["area"] = mask_areas[i]
+        batch_rles.append(rle)
+
+    return batch_rles
+
+
+def robust_rle_encode(masks):
+    """Encodes a collection of masks in RLE format. Uses the gpu version fist, falls back to the cpu version if it fails"""
+
+    assert masks.ndim == 3, "Mask must be of shape (N, H, W)"
+    assert masks.dtype == torch.bool, "Mask must have dtype=torch.bool"
+
+    try:
+        return rle_encode(masks)
+    except RuntimeError as _:
+        masks = masks.cpu().numpy()
+        rles = [
+            mask_util.encode(
+                np.array(mask[:, :, np.newaxis], dtype=np.uint8, order="F")
+            )[0]
+            for mask in masks
+        ]
+        for rle in rles:
+            rle["counts"] = rle["counts"].decode("utf-8")
+        return rles
+
+
+def ann_to_rle(segm, im_info):
+    """Convert annotation which can be polygons, uncompressed RLE to RLE.
+    Args:
+        ann (dict) : annotation object
+    Returns:
+        ann (rle)
+    """
+    h, w = im_info["height"], im_info["width"]
+    if isinstance(segm, list):
+        # polygon -- a single object might consist of multiple parts
+        # we merge all parts into one mask rle code
+        rles = mask_util.frPyObjects(segm, h, w)
+        rle = mask_util.merge(rles)
+    elif isinstance(segm["counts"], list):
+        # uncompressed RLE
+        rle = mask_util.frPyObjects(segm, h, w)
+    else:
+        # rle
+        rle = segm
+    return rle
diff --git a/sam3/train/matcher.py b/sam3/train/matcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0b8d62a5279c0694fa079d004269f50996f76a1
--- /dev/null
+++ b/sam3/train/matcher.py
@@ -0,0 +1,806 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+Modules to compute the matching cost and solve the corresponding LSAP.
+"""
+
+import numpy as np
+import torch
+
+from sam3.model.box_ops import box_cxcywh_to_xyxy, box_iou, generalized_box_iou
+from scipy.optimize import linear_sum_assignment
+from torch import nn
+
+
+def _do_matching(cost, repeats=1, return_tgt_indices=False, do_filtering=False):
+    if repeats > 1:
+        cost = np.tile(cost, (1, repeats))
+
+    i, j = linear_sum_assignment(cost)
+    if do_filtering:
+        # filter out invalid entries (i.e. those with cost > 1e8)
+        valid_thresh = 1e8
+        valid_ijs = [(ii, jj) for ii, jj in zip(i, j) if cost[ii, jj] < valid_thresh]
+        i, j = zip(*valid_ijs) if len(valid_ijs) > 0 else ([], [])
+        i, j = np.array(i, dtype=np.int64), np.array(j, dtype=np.int64)
+    if return_tgt_indices:
+        return i, j
+    order = np.argsort(j)
+    return i[order]
+
+
+class HungarianMatcher(nn.Module):
+    """This class computes an assignment between the targets and the predictions of the network
+
+    For efficiency reasons, the targets don't include the no_object. Because of this, in general,
+    there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
+    while the others are un-matched (and thus treated as non-objects).
+    """
+
+    def __init__(
+        self,
+        cost_class: float = 1,
+        cost_bbox: float = 1,
+        cost_giou: float = 1,
+        focal_loss: bool = False,
+        focal_alpha: float = 0.25,
+        focal_gamma: float = 2,
+    ):
+        """Creates the matcher
+
+        Params:
+            cost_class: This is the relative weight of the classification error in the matching cost
+            cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
+            cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
+        """
+        super().__init__()
+        self.cost_class = cost_class
+        self.cost_bbox = cost_bbox
+        self.cost_giou = cost_giou
+        self.norm = nn.Sigmoid() if focal_loss else nn.Softmax(-1)
+        assert (
+            cost_class != 0 or cost_bbox != 0 or cost_giou != 0
+        ), "all costs cant be 0"
+        self.focal_loss = focal_loss
+        self.focal_alpha = focal_alpha
+        self.focal_gamma = focal_gamma
+
+    @torch.no_grad()
+    def forward(self, outputs, batched_targets):
+        """Performs the matching
+
+        Params:
+            outputs: This is a dict that contains at least these entries:
+                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
+                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
+
+            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
+                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
+                           objects in the target) containing the class labels
+                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
+
+        Returns:
+            A list of size batch_size, containing tuples of (index_i, index_j) where:
+                - index_i is the indices of the selected predictions (in order)
+                - index_j is the indices of the corresponding selected targets (in order)
+            For each batch element, it holds:
+                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
+        """
+        bs, num_queries = outputs["pred_logits"].shape[:2]
+
+        # We flatten to compute the cost matrices in a batch
+        out_prob = self.norm(
+            outputs["pred_logits"].flatten(0, 1)
+        )  # [batch_size * num_queries, num_classes]
+        out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+
+        # Also concat the target labels and boxes
+        tgt_bbox = batched_targets["boxes"]
+
+        if "positive_map" in batched_targets:
+            # In this case we have a multi-hot target
+            positive_map = batched_targets["positive_map"]
+            assert len(tgt_bbox) == len(positive_map)
+
+            if self.focal_loss:
+                positive_map = positive_map > 1e-4
+                alpha = self.focal_alpha
+                gamma = self.focal_gamma
+                neg_cost_class = (
+                    (1 - alpha) * (out_prob**gamma) * (-(1 - out_prob + 1e-8).log())
+                )
+                pos_cost_class = (
+                    alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+                )
+                cost_class = (
+                    (pos_cost_class - neg_cost_class).unsqueeze(1)
+                    * positive_map.unsqueeze(0)
+                ).sum(-1)
+            else:
+                # Compute the soft-cross entropy between the predicted token alignment and the GT one for each box
+                cost_class = -(out_prob.unsqueeze(1) * positive_map.unsqueeze(0)).sum(
+                    -1
+                )
+        else:
+            # In this case we are doing a "standard" cross entropy
+            tgt_ids = batched_targets["labels"]
+            assert len(tgt_bbox) == len(tgt_ids)
+
+            if self.focal_loss:
+                alpha = self.focal_alpha
+                gamma = self.focal_gamma
+                neg_cost_class = (
+                    (1 - alpha) * (out_prob**gamma) * (-(1 - out_prob + 1e-8).log())
+                )
+                pos_cost_class = (
+                    alpha * ((1 - out_prob) ** gamma) * (-(out_prob + 1e-8).log())
+                )
+                cost_class = pos_cost_class[:, tgt_ids] - neg_cost_class[:, tgt_ids]
+            else:
+                # Compute the classification cost. Contrary to the loss, we don't use the NLL,
+                # but approximate it in 1 - proba[target class].
+                # The 1 is a constant that doesn't change the matching, it can be omitted.
+                cost_class = -out_prob[:, tgt_ids]
+
+        # Compute the L1 cost between boxes
+        cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
+        assert cost_class.shape == cost_bbox.shape
+
+        # Compute the giou cost betwen boxes
+        cost_giou = -generalized_box_iou(
+            box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
+        )
+
+        # Final cost matrix
+        C = (
+            self.cost_bbox * cost_bbox
+            + self.cost_class * cost_class
+            + self.cost_giou * cost_giou
+        )
+        C = C.view(bs, num_queries, -1).cpu().numpy()
+
+        sizes = torch.cumsum(batched_targets["num_boxes"], -1)[:-1]
+        costs = [c[i] for i, c in enumerate(np.split(C, sizes.cpu().numpy(), axis=-1))]
+        indices = [_do_matching(c) for c in costs]
+        batch_idx = torch.as_tensor(
+            sum([[i] * len(src) for i, src in enumerate(indices)], []), dtype=torch.long
+        )
+        src_idx = torch.from_numpy(np.concatenate(indices)).long()
+        return batch_idx, src_idx
+
+
+class BinaryHungarianMatcher(nn.Module):
+    """This class computes an assignment between the targets and the predictions of the network
+
+    For efficiency reasons, the targets don't include the no_object. Because of this, in general,
+    there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
+    while the others are un-matched (and thus treated as non-objects).
+    """
+
+    def __init__(
+        self,
+        cost_class: float = 1,
+        cost_bbox: float = 1,
+        cost_giou: float = 1,
+    ):
+        """Creates the matcher
+
+        Params:
+            cost_class: This is the relative weight of the classification error in the matching cost
+            cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
+            cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
+        """
+        super().__init__()
+        self.cost_class = cost_class
+        self.cost_bbox = cost_bbox
+        self.cost_giou = cost_giou
+        self.norm = nn.Sigmoid()
+        assert (
+            cost_class != 0 or cost_bbox != 0 or cost_giou != 0
+        ), "all costs cant be 0"
+
+    @torch.no_grad()
+    def forward(self, outputs, batched_targets, repeats=0, repeat_batch=1):
+        """Performs the matching
+
+        Params:
+            outputs: This is a dict that contains at least these entries:
+                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
+                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
+
+            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
+                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
+                           objects in the target) containing the class labels
+                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
+
+        Returns:
+            A list of size batch_size, containing tuples of (index_i, index_j) where:
+                - index_i is the indices of the selected predictions (in order)
+                - index_j is the indices of the corresponding selected targets (in order)
+            For each batch element, it holds:
+                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
+        """
+        if repeat_batch != 1:
+            raise NotImplementedError("please use BinaryHungarianMatcherV2 instead")
+
+        bs, num_queries = outputs["pred_logits"].shape[:2]
+
+        # We flatten to compute the cost matrices in a batch
+        out_prob = self.norm(outputs["pred_logits"].flatten(0, 1)).squeeze(
+            -1
+        )  # [batch_size * num_queries]
+        out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+
+        # Also concat the target labels and boxes
+        tgt_bbox = batched_targets["boxes"]
+
+        # Compute the L1 cost between boxes
+        cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
+
+        cost_class = -out_prob.unsqueeze(-1).expand_as(cost_bbox)
+
+        assert cost_class.shape == cost_bbox.shape
+
+        # Compute the giou cost betwen boxes
+        cost_giou = -generalized_box_iou(
+            box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
+        )
+
+        # Final cost matrix
+        C = (
+            self.cost_bbox * cost_bbox
+            + self.cost_class * cost_class
+            + self.cost_giou * cost_giou
+        )
+        C = C.view(bs, num_queries, -1).cpu().numpy()
+
+        sizes = torch.cumsum(batched_targets["num_boxes"], -1)[:-1]
+        costs = [c[i] for i, c in enumerate(np.split(C, sizes.cpu().numpy(), axis=-1))]
+        return_tgt_indices = False
+        for c in costs:
+            n_targ = c.shape[1]
+            if repeats > 1:
+                n_targ *= repeats
+            if c.shape[0] < n_targ:
+                return_tgt_indices = True
+                break
+        if return_tgt_indices:
+            indices, tgt_indices = zip(
+                *(
+                    _do_matching(
+                        c, repeats=repeats, return_tgt_indices=return_tgt_indices
+                    )
+                    for c in costs
+                )
+            )
+            tgt_indices = list(tgt_indices)
+            for i in range(1, len(tgt_indices)):
+                tgt_indices[i] += sizes[i - 1].item()
+            tgt_idx = torch.from_numpy(np.concatenate(tgt_indices)).long()
+        else:
+            indices = [_do_matching(c, repeats=repeats) for c in costs]
+            tgt_idx = None
+
+        batch_idx = torch.as_tensor(
+            sum([[i] * len(src) for i, src in enumerate(indices)], []), dtype=torch.long
+        )
+        src_idx = torch.from_numpy(np.concatenate(indices)).long()
+        return batch_idx, src_idx, tgt_idx
+
+
+class BinaryFocalHungarianMatcher(nn.Module):
+    """This class computes an assignment between the targets and the predictions of the network
+
+    For efficiency reasons, the targets don't include the no_object. Because of this, in general,
+    there are more predictions than targets. In this case, we do a 1-to-1 matching of the best predictions,
+    while the others are un-matched (and thus treated as non-objects).
+    """
+
+    def __init__(
+        self,
+        cost_class: float = 1,
+        cost_bbox: float = 1,
+        cost_giou: float = 1,
+        alpha: float = 0.25,
+        gamma: float = 2.0,
+        stable: bool = False,
+    ):
+        """Creates the matcher
+
+        Params:
+            cost_class: This is the relative weight of the classification error in the matching cost
+            cost_bbox: This is the relative weight of the L1 error of the bounding box coordinates in the matching cost
+            cost_giou: This is the relative weight of the giou loss of the bounding box in the matching cost
+        """
+        super().__init__()
+        self.cost_class = cost_class
+        self.cost_bbox = cost_bbox
+        self.cost_giou = cost_giou
+        self.norm = nn.Sigmoid()
+        self.alpha = alpha
+        self.gamma = gamma
+        self.stable = stable
+        assert (
+            cost_class != 0 or cost_bbox != 0 or cost_giou != 0
+        ), "all costs cant be 0"
+
+    @torch.no_grad()
+    def forward(self, outputs, batched_targets, repeats=1, repeat_batch=1):
+        """Performs the matching
+
+        Params:
+            outputs: This is a dict that contains at least these entries:
+                 "pred_logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
+                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates
+
+            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
+                 "labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
+                           objects in the target) containing the class labels
+                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates
+
+        Returns:
+            A list of size batch_size, containing tuples of (index_i, index_j) where:
+                - index_i is the indices of the selected predictions (in order)
+                - index_j is the indices of the corresponding selected targets (in order)
+            For each batch element, it holds:
+                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
+        """
+        if repeat_batch != 1:
+            raise NotImplementedError("please use BinaryHungarianMatcherV2 instead")
+
+        bs, num_queries = outputs["pred_logits"].shape[:2]
+
+        # We flatten to compute the cost matrices in a batch
+        out_score = outputs["pred_logits"].flatten(0, 1).squeeze(-1)
+        out_prob = self.norm(out_score)  # [batch_size * num_queries]
+        out_bbox = outputs["pred_boxes"].flatten(0, 1)  # [batch_size * num_queries, 4]
+
+        # Also concat the target labels and boxes
+        tgt_bbox = batched_targets["boxes"]
+
+        # Compute the L1 cost between boxes
+        cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
+
+        # Compute the giou cost betwen boxes
+        cost_giou = -generalized_box_iou(
+            box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
+        )
+
+        # cost_class = -out_prob.unsqueeze(-1).expand_as(cost_bbox)
+        if self.stable:
+            rescaled_giou = (-cost_giou + 1) / 2
+            out_prob = out_prob.unsqueeze(-1).expand_as(cost_bbox) * rescaled_giou
+            cost_class = -self.alpha * (1 - out_prob) ** self.gamma * torch.log(
+                out_prob
+            ) + (1 - self.alpha) * out_prob**self.gamma * torch.log(1 - out_prob)
+        else:
+            # directly computing log sigmoid (more numerically stable)
+            log_out_prob = torch.nn.functional.logsigmoid(out_score)
+            log_one_minus_out_prob = torch.nn.functional.logsigmoid(-out_score)
+            cost_class = (
+                -self.alpha * (1 - out_prob) ** self.gamma * log_out_prob
+                + (1 - self.alpha) * out_prob**self.gamma * log_one_minus_out_prob
+            )
+        if not self.stable:
+            cost_class = cost_class.unsqueeze(-1).expand_as(cost_bbox)
+
+        assert cost_class.shape == cost_bbox.shape
+
+        # Final cost matrix
+        C = (
+            self.cost_bbox * cost_bbox
+            + self.cost_class * cost_class
+            + self.cost_giou * cost_giou
+        )
+        C = C.view(bs, num_queries, -1).cpu().numpy()
+
+        sizes = torch.cumsum(batched_targets["num_boxes"], -1)[:-1]
+        costs = [c[i] for i, c in enumerate(np.split(C, sizes.cpu().numpy(), axis=-1))]
+        return_tgt_indices = False
+        for c in costs:
+            n_targ = c.shape[1]
+            if repeats > 1:
+                n_targ *= repeats
+            if c.shape[0] < n_targ:
+                return_tgt_indices = True
+                break
+        if return_tgt_indices:
+            indices, tgt_indices = zip(
+                *(
+                    _do_matching(
+                        c, repeats=repeats, return_tgt_indices=return_tgt_indices
+                    )
+                    for c in costs
+                )
+            )
+            tgt_indices = list(tgt_indices)
+            for i in range(1, len(tgt_indices)):
+                tgt_indices[i] += sizes[i - 1].item()
+            tgt_idx = torch.from_numpy(np.concatenate(tgt_indices)).long()
+        else:
+            indices = [_do_matching(c, repeats=repeats) for c in costs]
+            tgt_idx = None
+
+        batch_idx = torch.as_tensor(
+            sum([[i] * len(src) for i, src in enumerate(indices)], []), dtype=torch.long
+        )
+        src_idx = torch.from_numpy(np.concatenate(indices)).long()
+        return batch_idx, src_idx, tgt_idx
+
+
+class BinaryHungarianMatcherV2(nn.Module):
+    """
+    This class computes an assignment between the targets and the predictions
+    of the network
+
+    For efficiency reasons, the targets don't include the no_object. Because of
+    this, in general, there are more predictions than targets. In this case, we
+    do a 1-to-1 matching of the best predictions, while the others are
+    un-matched (and thus treated as non-objects).
+
+    This is a more efficient implementation of BinaryHungarianMatcher.
+    """
+
+    def __init__(
+        self,
+        cost_class: float = 1,
+        cost_bbox: float = 1,
+        cost_giou: float = 1,
+        focal: bool = False,
+        alpha: float = 0.25,
+        gamma: float = 2.0,
+        stable: bool = False,
+        remove_samples_with_0_gt: bool = True,
+    ):
+        """
+        Creates the matcher
+
+        Params:
+        - cost_class: Relative weight of the classification error in the
+          matching cost
+        - cost_bbox: Relative weight of the L1 error of the bounding box
+          coordinates in the matching cost
+        - cost_giou: This is the relative weight of the giou loss of the
+          bounding box in the matching cost
+        """
+        super().__init__()
+        self.cost_class = cost_class
+        self.cost_bbox = cost_bbox
+        self.cost_giou = cost_giou
+        self.norm = nn.Sigmoid()
+        assert (
+            cost_class != 0 or cost_bbox != 0 or cost_giou != 0
+        ), "all costs cant be 0"
+        self.focal = focal
+        if focal:
+            self.alpha = alpha
+            self.gamma = gamma
+            self.stable = stable
+        self.remove_samples_with_0_gt = remove_samples_with_0_gt
+
+    @torch.no_grad()
+    def forward(
+        self,
+        outputs,
+        batched_targets,
+        repeats=1,
+        repeat_batch=1,
+        out_is_valid=None,
+        target_is_valid_padded=None,
+    ):
+        """
+        Performs the matching. The inputs and outputs are the same as
+        BinaryHungarianMatcher.forward, except for the optional cached_padded
+        flag and the optional "_boxes_padded" entry of batched_targets.
+
+        Inputs:
+        - outputs: A dict with the following keys:
+            - "pred_logits": Tensor of shape (batch_size, num_queries, 1) with
+               classification logits
+            - "pred_boxes": Tensor of shape (batch_size, num_queries, 4) with
+               predicted box coordinates in cxcywh format.
+        - batched_targets: A dict of targets. There may be a variable number of
+          targets per batch entry; suppose that there are T_b targets for batch
+          entry 0 <= b < batch_size. It should have the following keys:
+          - "boxes": Tensor of shape (sum_b T_b, 4) giving ground-truth boxes
+             in cxcywh format for all batch entries packed into a single tensor
+          - "num_boxes": int64 Tensor of shape (batch_size,) giving the number
+             of ground-truth boxes per batch entry: num_boxes[b] = T_b
+          - "_boxes_padded": Tensor of shape (batch_size, max_b T_b, 4) giving
+            a padded version of ground-truth boxes. If this is not present then
+            it will be computed from batched_targets["boxes"] instead, but
+            caching it here can improve performance for repeated calls with the
+            same targets.
+        - out_is_valid: If not None, it should be a boolean tensor of shape
+          (batch_size, num_queries) indicating which predictions are valid.
+          Invalid predictions are ignored during matching and won't appear in
+          the output indices.
+        - target_is_valid_padded: If not None, it should be a boolean tensor of
+          shape (batch_size, max_num_gt_boxes) in padded format indicating
+          which GT boxes are valid. Invalid GT boxes are ignored during matching
+          and won't appear in the output indices.
+
+        Returns:
+            A list of size batch_size, containing tuples of (idx_i, idx_j):
+                - idx_i is the indices of the selected predictions (in order)
+                - idx_j is the indices of the corresponding selected targets
+                  (in order)
+            For each batch element, it holds:
+                len(index_i) = len(index_j)
+                             = min(num_queries, num_target_boxes)
+        """
+        _, num_queries = outputs["pred_logits"].shape[:2]
+
+        out_score = outputs["pred_logits"].squeeze(-1)  # (B, Q)
+        out_bbox = outputs["pred_boxes"]  # (B, Q, 4))
+
+        device = out_score.device
+
+        num_boxes = batched_targets["num_boxes"].cpu()
+        # Get a padded version of target boxes (as precomputed in the collator).
+        # It should work for both repeat==1 (o2o) and repeat>1 (o2m) matching.
+        tgt_bbox = batched_targets["boxes_padded"]
+        if self.remove_samples_with_0_gt:
+            # keep only samples w/ at least 1 GT box in targets (num_boxes and tgt_bbox)
+            batch_keep = num_boxes > 0
+            num_boxes = num_boxes[batch_keep]
+            tgt_bbox = tgt_bbox[batch_keep]
+            if target_is_valid_padded is not None:
+                target_is_valid_padded = target_is_valid_padded[batch_keep]
+        # Repeat the targets (for the case of batched aux outputs in the matcher)
+        if repeat_batch > 1:
+            # In this case, out_prob and out_bbox will be a concatenation of
+            # both final and auxiliary outputs, so we also repeat the targets
+            num_boxes = num_boxes.repeat(repeat_batch)
+            tgt_bbox = tgt_bbox.repeat(repeat_batch, 1, 1)
+            if target_is_valid_padded is not None:
+                target_is_valid_padded = target_is_valid_padded.repeat(repeat_batch, 1)
+
+        # keep only samples w/ at least 1 GT box in outputs
+        if self.remove_samples_with_0_gt:
+            if repeat_batch > 1:
+                batch_keep = batch_keep.repeat(repeat_batch)
+            out_score = out_score[batch_keep]
+            out_bbox = out_bbox[batch_keep]
+            if out_is_valid is not None:
+                out_is_valid = out_is_valid[batch_keep]
+        assert out_bbox.shape[0] == tgt_bbox.shape[0]
+        assert out_bbox.shape[0] == num_boxes.shape[0]
+
+        # Compute the L1 cost between boxes
+        cost_bbox = torch.cdist(out_bbox, tgt_bbox, p=1)
+
+        # Compute the giou cost betwen boxes
+        cost_giou = -generalized_box_iou(
+            box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox)
+        )
+
+        out_prob = self.norm(out_score)
+        if not self.focal:
+            cost_class = -out_prob.unsqueeze(-1).expand_as(cost_bbox)
+        else:
+            if self.stable:
+                rescaled_giou = (-cost_giou + 1) / 2
+                out_prob = out_prob.unsqueeze(-1).expand_as(cost_bbox) * rescaled_giou
+                cost_class = -self.alpha * (1 - out_prob) ** self.gamma * torch.log(
+                    out_prob
+                ) + (1 - self.alpha) * out_prob**self.gamma * torch.log(1 - out_prob)
+            else:
+                # directly computing log sigmoid (more numerically stable)
+                log_out_prob = torch.nn.functional.logsigmoid(out_score)
+                log_one_minus_out_prob = torch.nn.functional.logsigmoid(-out_score)
+                cost_class = (
+                    -self.alpha * (1 - out_prob) ** self.gamma * log_out_prob
+                    + (1 - self.alpha) * out_prob**self.gamma * log_one_minus_out_prob
+                )
+            if not self.stable:
+                cost_class = cost_class.unsqueeze(-1).expand_as(cost_bbox)
+
+        assert cost_class.shape == cost_bbox.shape
+
+        # Final cost matrix
+        C = (
+            self.cost_bbox * cost_bbox
+            + self.cost_class * cost_class
+            + self.cost_giou * cost_giou
+        )
+        # assign a very high cost (1e9) to invalid outputs and targets, so that we can
+        # filter them out (in `_do_matching`) from bipartite matching results
+        do_filtering = out_is_valid is not None or target_is_valid_padded is not None
+        if out_is_valid is not None:
+            C = torch.where(out_is_valid[:, :, None], C, 1e9)
+        if target_is_valid_padded is not None:
+            C = torch.where(target_is_valid_padded[:, None, :], C, 1e9)
+        C = C.cpu().numpy()
+        costs = [C[i, :, :s] for i, s in enumerate(num_boxes.tolist())]
+        return_tgt_indices = (
+            do_filtering or torch.any(num_queries < num_boxes * max(repeats, 1)).item()
+        )
+        if len(costs) == 0:
+            # We have size 0 in the batch dimension, so we return empty matching indices
+            # (note that this can happen due to `remove_samples_with_0_gt=True` even if
+            # the original input batch size is not 0, when all queries have empty GTs).
+            indices = []
+            tgt_idx = torch.zeros(0).long().to(device) if return_tgt_indices else None
+        elif return_tgt_indices:
+            indices, tgt_indices = zip(
+                *(
+                    _do_matching(
+                        c,
+                        repeats=repeats,
+                        return_tgt_indices=return_tgt_indices,
+                        do_filtering=do_filtering,
+                    )
+                    for c in costs
+                )
+            )
+            tgt_indices = list(tgt_indices)
+            sizes = torch.cumsum(num_boxes, -1)[:-1]
+            for i in range(1, len(tgt_indices)):
+                tgt_indices[i] += sizes[i - 1].item()
+            tgt_idx = torch.from_numpy(np.concatenate(tgt_indices)).long().to(device)
+        else:
+            indices = [
+                _do_matching(c, repeats=repeats, do_filtering=do_filtering)
+                for c in costs
+            ]
+            tgt_idx = None
+
+        if self.remove_samples_with_0_gt:
+            kept_inds = batch_keep.nonzero().squeeze(1)
+            batch_idx = torch.as_tensor(
+                sum([[kept_inds[i]] * len(src) for i, src in enumerate(indices)], []),
+                dtype=torch.long,
+                device=device,
+            )
+        else:
+            batch_idx = torch.as_tensor(
+                sum([[i] * len(src) for i, src in enumerate(indices)], []),
+                dtype=torch.long,
+                device=device,
+            )
+
+        # indices could be an empty list (since we remove samples w/ 0 GT boxes)
+        if len(indices) > 0:
+            src_idx = torch.from_numpy(np.concatenate(indices)).long().to(device)
+        else:
+            src_idx = torch.empty(0, dtype=torch.long, device=device)
+        return batch_idx, src_idx, tgt_idx
+
+
+class BinaryOneToManyMatcher(nn.Module):
+    """
+    This class computes a greedy assignment between the targets and the predictions of the network.
+    In this formulation, several predictions can be assigned to each target, but each prediction can be assigned to
+    at most one target.
+
+    See DAC-Detr for details
+    """
+
+    def __init__(
+        self,
+        alpha: float = 0.3,
+        threshold: float = 0.4,
+        topk: int = 6,
+    ):
+        """
+        Creates the matcher
+
+        Params:
+                alpha: relative balancing between classification and localization
+                threshold: threshold used to select positive predictions
+                topk: number of top scoring predictions to consider
+        """
+        super().__init__()
+        self.norm = nn.Sigmoid()
+        self.alpha = alpha
+        self.threshold = threshold
+        self.topk = topk
+
+    @torch.no_grad()
+    def forward(
+        self,
+        outputs,
+        batched_targets,
+        repeats=1,
+        repeat_batch=1,
+        out_is_valid=None,
+        target_is_valid_padded=None,
+    ):
+        """
+        Performs the matching. The inputs and outputs are the same as
+        BinaryHungarianMatcher.forward
+
+        Inputs:
+        - outputs: A dict with the following keys:
+            - "pred_logits": Tensor of shape (batch_size, num_queries, 1) with
+               classification logits
+            - "pred_boxes": Tensor of shape (batch_size, num_queries, 4) with
+               predicted box coordinates in cxcywh format.
+        - batched_targets: A dict of targets. There may be a variable number of
+          targets per batch entry; suppose that there are T_b targets for batch
+          entry 0 <= b < batch_size. It should have the following keys:
+          - "num_boxes": int64 Tensor of shape (batch_size,) giving the number
+             of ground-truth boxes per batch entry: num_boxes[b] = T_b
+          - "_boxes_padded": Tensor of shape (batch_size, max_b T_b, 4) giving
+            a padded version of ground-truth boxes. If this is not present then
+            it will be computed from batched_targets["boxes"] instead, but
+            caching it here can improve performance for repeated calls with the
+            same targets.
+        - out_is_valid: If not None, it should be a boolean tensor of shape
+          (batch_size, num_queries) indicating which predictions are valid.
+          Invalid predictions are ignored during matching and won't appear in
+          the output indices.
+        - target_is_valid_padded: If not None, it should be a boolean tensor of
+          shape (batch_size, max_num_gt_boxes) in padded format indicating
+          which GT boxes are valid. Invalid GT boxes are ignored during matching
+          and won't appear in the output indices.
+        Returns:
+            A list of size batch_size, containing tuples of (idx_i, idx_j):
+                - idx_i is the indices of the selected predictions (in order)
+                - idx_j is the indices of the corresponding selected targets
+                  (in order)
+            For each batch element, it holds:
+                len(index_i) = len(index_j)
+                             = min(num_queries, num_target_boxes)
+        """
+        assert repeats <= 1 and repeat_batch <= 1
+        bs, num_queries = outputs["pred_logits"].shape[:2]
+
+        out_prob = self.norm(outputs["pred_logits"]).squeeze(-1)  # (B, Q)
+        out_bbox = outputs["pred_boxes"]  # (B, Q, 4))
+
+        num_boxes = batched_targets["num_boxes"]
+
+        # Get a padded version of target boxes (as precomputed in the collator).
+        tgt_bbox = batched_targets["boxes_padded"]
+        assert len(tgt_bbox) == bs
+        num_targets = tgt_bbox.shape[1]
+        if num_targets == 0:
+            return (
+                torch.empty(0, dtype=torch.long, device=out_prob.device),
+                torch.empty(0, dtype=torch.long, device=out_prob.device),
+                torch.empty(0, dtype=torch.long, device=out_prob.device),
+            )
+
+        iou, _ = box_iou(box_cxcywh_to_xyxy(out_bbox), box_cxcywh_to_xyxy(tgt_bbox))
+
+        assert iou.shape == (bs, num_queries, num_targets)
+
+        # Final cost matrix (higher is better in `C`; this is unlike the case
+        # of BinaryHungarianMatcherV2 above where lower is better in its `C`)
+        C = self.alpha * out_prob.unsqueeze(-1) + (1 - self.alpha) * iou
+        if out_is_valid is not None:
+            C = torch.where(out_is_valid[:, :, None], C, -1e9)
+        if target_is_valid_padded is not None:
+            C = torch.where(target_is_valid_padded[:, None, :], C, -1e9)
+
+        # Selecting topk predictions
+        matches = C > torch.quantile(
+            C, 1 - self.topk / num_queries, dim=1, keepdim=True
+        )
+
+        # Selecting predictions above threshold
+        matches = matches & (C > self.threshold)
+        if out_is_valid is not None:
+            matches = matches & out_is_valid[:, :, None]
+        if target_is_valid_padded is not None:
+            matches = matches & target_is_valid_padded[:, None, :]
+
+        # Removing padding
+        matches = matches & (
+            torch.arange(0, num_targets, device=num_boxes.device)[None]
+            < num_boxes[:, None]
+        ).unsqueeze(1)
+
+        batch_idx, src_idx, tgt_idx = torch.nonzero(matches, as_tuple=True)
+
+        cum_num_boxes = torch.cat(
+            [
+                torch.zeros(1, dtype=num_boxes.dtype, device=num_boxes.device),
+                num_boxes.cumsum(-1)[:-1],
+            ]
+        )
+        tgt_idx += cum_num_boxes[batch_idx]
+
+        return batch_idx, src_idx, tgt_idx
diff --git a/sam3/train/nms_helper.py b/sam3/train/nms_helper.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd5b6dc7b3f28878494bf06ac7ec71c879253304
--- /dev/null
+++ b/sam3/train/nms_helper.py
@@ -0,0 +1,306 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+import warnings
+from typing import Dict, List
+
+import numpy as np
+
+# Check if Numba is available
+HAS_NUMBA = False
+try:
+    import numba as nb
+
+    HAS_NUMBA = True
+except ImportError:
+    warnings.warn(
+        "Numba not found. Using slower pure Python implementations.", UserWarning
+    )
+
+
+# -------------------- Helper Functions --------------------
+def is_zero_box(bbox: list) -> bool:
+    """Check if bounding box is invalid"""
+    if bbox is None:
+        return True
+    return all(x <= 0 for x in bbox[:4]) or len(bbox) < 4
+
+
+def convert_bbox_format(bbox: list) -> List[float]:
+    """Convert bbox from (x,y,w,h) to (x1,y1,x2,y2)"""
+    x, y, w, h = bbox
+    return [x, y, x + w, y + h]
+
+
+# -------------------- Track-level NMS --------------------
+def process_track_level_nms(video_groups: Dict, nms_threshold: float) -> Dict:
+    """Apply track-level NMS to all videos"""
+    for video_id, tracks in video_groups.items():
+        track_detections = []
+
+        # Process tracks
+        for track_idx, track in enumerate(tracks):
+            if not track["bboxes"]:
+                continue
+
+            converted_bboxes = []
+            valid_frames = []
+            for bbox in track["bboxes"]:
+                if bbox and not is_zero_box(bbox):
+                    converted_bboxes.append(convert_bbox_format(bbox))
+                    valid_frames.append(True)
+                else:
+                    converted_bboxes.append([np.nan] * 4)
+                    valid_frames.append(False)
+
+            if any(valid_frames):
+                track_detections.append(
+                    {
+                        "track_idx": track_idx,
+                        "bboxes": np.array(converted_bboxes, dtype=np.float32),
+                        "score": track["score"],
+                    }
+                )
+
+        # Apply NMS
+        if track_detections:
+            scores = np.array([d["score"] for d in track_detections], dtype=np.float32)
+            keep = apply_track_nms(track_detections, scores, nms_threshold)
+
+            # Suppress non-kept tracks
+            for idx, track in enumerate(track_detections):
+                if idx not in keep:
+                    tracks[track["track_idx"]]["bboxes"] = [None] * len(track["bboxes"])
+
+    return video_groups
+
+
+# -------------------- Frame-level NMS --------------------
+def process_frame_level_nms(video_groups: Dict, nms_threshold: float) -> Dict:
+    """Apply frame-level NMS to all videos"""
+    for video_id, tracks in video_groups.items():
+        if not tracks:
+            continue
+
+        num_frames = len(tracks[0]["bboxes"])
+
+        for frame_idx in range(num_frames):
+            frame_detections = []
+
+            # Collect valid detections
+            for track_idx, track in enumerate(tracks):
+                bbox = track["bboxes"][frame_idx]
+                if bbox and not is_zero_box(bbox):
+                    frame_detections.append(
+                        {
+                            "track_idx": track_idx,
+                            "bbox": np.array(
+                                convert_bbox_format(bbox), dtype=np.float32
+                            ),
+                            "score": track["score"],
+                        }
+                    )
+
+            # Apply NMS
+            if frame_detections:
+                bboxes = np.stack([d["bbox"] for d in frame_detections])
+                scores = np.array(
+                    [d["score"] for d in frame_detections], dtype=np.float32
+                )
+                keep = apply_frame_nms(bboxes, scores, nms_threshold)
+
+                # Suppress non-kept detections
+                for i, d in enumerate(frame_detections):
+                    if i not in keep:
+                        tracks[d["track_idx"]]["bboxes"][frame_idx] = None
+
+    return video_groups
+
+
+# Track-level NMS helpers ------------------------------------------------------
+def compute_track_iou_matrix(
+    bboxes_stacked: np.ndarray, valid_masks: np.ndarray, areas: np.ndarray
+) -> np.ndarray:
+    """IoU matrix computation for track-level NMS with fallback to pure Python"""
+    num_tracks = bboxes_stacked.shape[0]
+    iou_matrix = np.zeros((num_tracks, num_tracks), dtype=np.float32)
+    if HAS_NUMBA:
+        iou_matrix = _compute_track_iou_matrix_numba(bboxes_stacked, valid_masks, areas)
+    else:
+        # Pure Python implementation
+        for i in range(num_tracks):
+            for j in range(i + 1, num_tracks):
+                valid_ij = valid_masks[i] & valid_masks[j]
+                if not valid_ij.any():
+                    continue
+                bboxes_i = bboxes_stacked[i, valid_ij]
+                bboxes_j = bboxes_stacked[j, valid_ij]
+                area_i = areas[i, valid_ij]
+                area_j = areas[j, valid_ij]
+                inter_total = 0.0
+                union_total = 0.0
+                for k in range(bboxes_i.shape[0]):
+                    x1 = max(bboxes_i[k, 0], bboxes_j[k, 0])
+                    y1 = max(bboxes_i[k, 1], bboxes_j[k, 1])
+                    x2 = min(bboxes_i[k, 2], bboxes_j[k, 2])
+                    y2 = min(bboxes_i[k, 3], bboxes_j[k, 3])
+                    inter = max(0, x2 - x1) * max(0, y2 - y1)
+                    union = area_i[k] + area_j[k] - inter
+                    inter_total += inter
+                    union_total += union
+                if union_total > 0:
+                    iou_matrix[i, j] = inter_total / union_total
+                    iou_matrix[j, i] = iou_matrix[i, j]
+    return iou_matrix
+
+
+if HAS_NUMBA:
+
+    @nb.jit(nopython=True, parallel=True)
+    def _compute_track_iou_matrix_numba(bboxes_stacked, valid_masks, areas):
+        """Numba-optimized IoU matrix computation for track-level NMS"""
+        num_tracks = bboxes_stacked.shape[0]
+        iou_matrix = np.zeros((num_tracks, num_tracks), dtype=np.float32)
+        for i in nb.prange(num_tracks):
+            for j in range(i + 1, num_tracks):
+                valid_ij = valid_masks[i] & valid_masks[j]
+                if not valid_ij.any():
+                    continue
+                bboxes_i = bboxes_stacked[i, valid_ij]
+                bboxes_j = bboxes_stacked[j, valid_ij]
+                area_i = areas[i, valid_ij]
+                area_j = areas[j, valid_ij]
+                inter_total = 0.0
+                union_total = 0.0
+                for k in range(bboxes_i.shape[0]):
+                    x1 = max(bboxes_i[k, 0], bboxes_j[k, 0])
+                    y1 = max(bboxes_i[k, 1], bboxes_j[k, 1])
+                    x2 = min(bboxes_i[k, 2], bboxes_j[k, 2])
+                    y2 = min(bboxes_i[k, 3], bboxes_j[k, 3])
+                    inter = max(0, x2 - x1) * max(0, y2 - y1)
+                    union = area_i[k] + area_j[k] - inter
+                    inter_total += inter
+                    union_total += union
+                if union_total > 0:
+                    iou_matrix[i, j] = inter_total / union_total
+                    iou_matrix[j, i] = iou_matrix[i, j]
+        return iou_matrix
+
+
+def apply_track_nms(
+    track_detections: List[dict], scores: np.ndarray, nms_threshold: float
+) -> List[int]:
+    """Vectorized track-level NMS implementation"""
+    if not track_detections:
+        return []
+    bboxes_stacked = np.stack([d["bboxes"] for d in track_detections], axis=0)
+    valid_masks = ~np.isnan(bboxes_stacked).any(axis=2)
+    areas = (bboxes_stacked[:, :, 2] - bboxes_stacked[:, :, 0]) * (
+        bboxes_stacked[:, :, 3] - bboxes_stacked[:, :, 1]
+    )
+    areas[~valid_masks] = 0
+    iou_matrix = compute_track_iou_matrix(bboxes_stacked, valid_masks, areas)
+    keep = []
+    order = np.argsort(-scores)
+    suppress = np.zeros(len(track_detections), dtype=bool)
+    for i in range(len(order)):
+        if not suppress[order[i]]:
+            keep.append(order[i])
+            suppress[order[i:]] = suppress[order[i:]] | (
+                iou_matrix[order[i], order[i:]] >= nms_threshold
+            )
+    return keep
+
+
+# Frame-level NMS helpers ------------------------------------------------------
+def compute_frame_ious(bbox: np.ndarray, bboxes: np.ndarray) -> np.ndarray:
+    """IoU computation for frame-level NMS with fallback to pure Python"""
+    if HAS_NUMBA:
+        return _compute_frame_ious_numba(bbox, bboxes)
+    else:
+        # Pure Python implementation
+        ious = np.zeros(len(bboxes), dtype=np.float32)
+        for i in range(len(bboxes)):
+            x1 = max(bbox[0], bboxes[i, 0])
+            y1 = max(bbox[1], bboxes[i, 1])
+            x2 = min(bbox[2], bboxes[i, 2])
+            y2 = min(bbox[3], bboxes[i, 3])
+
+            inter = max(0, x2 - x1) * max(0, y2 - y1)
+            area1 = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
+            area2 = (bboxes[i, 2] - bboxes[i, 0]) * (bboxes[i, 3] - bboxes[i, 1])
+            union = area1 + area2 - inter
+
+            ious[i] = inter / union if union > 0 else 0.0
+        return ious
+
+
+if HAS_NUMBA:
+
+    @nb.jit(nopython=True, parallel=True)
+    def _compute_frame_ious_numba(bbox, bboxes):
+        """Numba-optimized IoU computation"""
+        ious = np.zeros(len(bboxes), dtype=np.float32)
+        for i in nb.prange(len(bboxes)):
+            x1 = max(bbox[0], bboxes[i, 0])
+            y1 = max(bbox[1], bboxes[i, 1])
+            x2 = min(bbox[2], bboxes[i, 2])
+            y2 = min(bbox[3], bboxes[i, 3])
+
+            inter = max(0, x2 - x1) * max(0, y2 - y1)
+            area1 = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
+            area2 = (bboxes[i, 2] - bboxes[i, 0]) * (bboxes[i, 3] - bboxes[i, 1])
+            union = area1 + area2 - inter
+
+            ious[i] = inter / union if union > 0 else 0.0
+        return ious
+
+
+def apply_frame_nms(
+    bboxes: np.ndarray, scores: np.ndarray, nms_threshold: float
+) -> List[int]:
+    """Frame-level NMS implementation with fallback to pure Python"""
+    if HAS_NUMBA:
+        return _apply_frame_nms_numba(bboxes, scores, nms_threshold)
+    else:
+        # Pure Python implementation
+        order = np.argsort(-scores)
+        keep = []
+        suppress = np.zeros(len(bboxes), dtype=bool)
+
+        for i in range(len(order)):
+            if not suppress[order[i]]:
+                keep.append(order[i])
+                current_bbox = bboxes[order[i]]
+
+                remaining_bboxes = bboxes[order[i + 1 :]]
+                if len(remaining_bboxes) > 0:  # Check if there are any remaining boxes
+                    ious = compute_frame_ious(current_bbox, remaining_bboxes)
+                    suppress[order[i + 1 :]] = suppress[order[i + 1 :]] | (
+                        ious >= nms_threshold
+                    )
+
+        return keep
+
+
+if HAS_NUMBA:
+
+    @nb.jit(nopython=True)
+    def _apply_frame_nms_numba(bboxes, scores, nms_threshold):
+        """Numba-optimized NMS implementation"""
+        order = np.argsort(-scores)
+        keep = []
+        suppress = np.zeros(len(bboxes), dtype=nb.boolean)
+
+        for i in range(len(order)):
+            if not suppress[order[i]]:
+                keep.append(order[i])
+                current_bbox = bboxes[order[i]]
+
+                if i + 1 < len(order):  # Check bounds
+                    ious = _compute_frame_ious_numba(
+                        current_bbox, bboxes[order[i + 1 :]]
+                    )
+                    suppress[order[i + 1 :]] = suppress[order[i + 1 :]] | (
+                        ious >= nms_threshold
+                    )
+
+        return keep
diff --git a/sam3/train/optim/__init__.py b/sam3/train/optim/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d37d2aaef52ec7de01999516a2b8c3e1fa4986
--- /dev/null
+++ b/sam3/train/optim/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
diff --git a/sam3/train/optim/optimizer.py b/sam3/train/optim/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d401b98f2fff967cdbe5bf5c9db7c78f21b30503
--- /dev/null
+++ b/sam3/train/optim/optimizer.py
@@ -0,0 +1,498 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import fnmatch
+import inspect
+import itertools
+import logging
+import types
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Set,
+    Tuple,
+    Type,
+    Union,
+)
+
+import hydra
+
+import torch
+import torch.nn as nn
+from omegaconf import DictConfig
+from torch import Tensor
+
+
+class Optimizer:
+    def __init__(self, optimizer, schedulers=None) -> None:
+        self.optimizer = optimizer
+        self.schedulers = schedulers
+        self._validate_optimizer_schedulers()
+        self.step_schedulers(0.0, 0)
+
+    def _validate_optimizer_schedulers(self):
+        if self.schedulers is None:
+            return
+        for _, set_of_schedulers in enumerate(self.schedulers):
+            for option, _ in set_of_schedulers.items():
+                assert option in self.optimizer.defaults, (
+                    "Optimizer option "
+                    f"{option} not found in {self.optimizer}. Valid options are "
+                    f"{self.optimizer.defaults.keys()}"
+                )
+
+    def step_schedulers(self, where: float, step: int) -> None:
+        if self.schedulers is None:
+            return
+        for i, param_group in enumerate(self.optimizer.param_groups):
+            for option, scheduler in self.schedulers[i].items():
+                if "step" in inspect.signature(scheduler.__call__).parameters:
+                    new_value = scheduler(step=step, where=where)
+                elif (
+                    hasattr(scheduler, "scheduler")
+                    and "step"
+                    in inspect.signature(scheduler.scheduler.__call__).parameters
+                ):
+                    # To handle ValueScaler wrappers
+                    new_value = scheduler(step=step, where=where)
+                else:
+                    new_value = scheduler(where)
+                param_group[option] = new_value
+
+    def step(self, where, step, closure=None):
+        self.step_schedulers(where, step)
+        return self.optimizer.step(closure)
+
+    def zero_grad(self, *args, **kwargs):
+        return self.optimizer.zero_grad(*args, **kwargs)
+
+
+def set_default_parameters(
+    scheduler_cfgs: List[DictConfig], all_parameter_names: Set[str]
+) -> None:
+    """Set up the "default" scheduler with the right parameters.
+
+    Args:
+        scheduler_cgfs: A list of scheduler configs, where each scheduler also
+            specifies which parameters it applies to, based on the names of parameters
+            or the class of the modules. At most one scheduler is allowed to skip this
+            specification, which is used as a "default" specification for any remaining
+            parameters.
+        all_parameter_names: Names of all the parameters to consider.
+    """
+    constraints = [
+        scheduler_cfg.parameter_names
+        for scheduler_cfg in scheduler_cfgs
+        if scheduler_cfg.parameter_names is not None
+    ]
+    if len(constraints) == 0:
+        default_params = set(all_parameter_names)
+    else:
+        default_params = all_parameter_names - set.union(*constraints)
+    default_count = 0
+    for scheduler_cfg in scheduler_cfgs:
+        if scheduler_cfg.parameter_names is None:
+            scheduler_cfg.parameter_names = default_params
+            default_count += 1
+    assert default_count <= 1, "Only one scheduler per option can be default"
+    if default_count == 0:
+        # No default scheduler specified, add a default, but without any scheduler
+        # for that option
+        scheduler_cfgs.append({"parameter_names": default_params})
+
+
+def name_constraints_to_parameters(
+    param_constraints: List[Set[str]], named_parameters: Dict[str, Tensor]
+) -> List[torch.nn.Parameter]:
+    """Return parameters which match the intersection of parameter constraints.
+
+    Note that this returns the parameters themselves, not their names.
+
+    Args:
+        param_constraints: A list, with each element being a set of allowed parameters.
+        named_parameters: Mapping from a parameter name to the parameter itself.
+
+    Returns:
+        A list containing the parameters which overlap with _each_ constraint set from
+        param_constraints.
+    """
+    matching_names = set.intersection(*param_constraints)
+    return [value for name, value in named_parameters.items() if name in matching_names]
+
+
+def map_scheduler_cfgs_to_param_groups(
+    all_scheduler_cfgs: Iterable[List[Dict]],
+    named_parameters: Dict[str, Tensor],
+) -> Tuple[List[Dict[Any, Any]], List[Dict[str, List[torch.nn.Parameter]]]]:
+    """Produce parameter groups corresponding to all the scheduler configs.
+
+    Takes all the scheduler configs, each of which applies to a specific optimizer
+    option (like "lr" or "weight_decay") and has a set of parameter names which it
+    applies to, and produces a final set of param groups where each param group
+    covers all the options which apply to a particular set of parameters.
+
+    Args:
+        all_scheduler_cfgs: All the scheduler configs covering every option.
+        named_parameters: Mapping from a parameter name to the parameter itself.
+    Returns:
+        Tuple of lists of schedulers and param_groups, where schedulers[i]
+        applies to param_groups[i].
+    """
+
+    scheduler_cfgs_per_param_group = itertools.product(*all_scheduler_cfgs)
+    schedulers = []
+    param_groups = []
+    for scheduler_cfgs in scheduler_cfgs_per_param_group:
+        param_constraints = [
+            scheduler_cfg["parameter_names"] for scheduler_cfg in scheduler_cfgs
+        ]
+        matching_parameters = name_constraints_to_parameters(
+            param_constraints, named_parameters
+        )
+        if len(matching_parameters) == 0:  # If no overlap of parameters, skip
+            continue
+        schedulers_for_group = {
+            scheduler_cfg["option"]: scheduler_cfg["scheduler"]
+            for scheduler_cfg in scheduler_cfgs
+            if "option" in scheduler_cfg
+        }
+        schedulers.append(schedulers_for_group)
+        param_groups.append({"params": matching_parameters})
+    return schedulers, param_groups
+
+
+def validate_param_group_params(param_groups: List[Dict], model: nn.Module):
+    """Check that the param groups are non-overlapping and cover all the parameters.
+
+    Args:
+        param_groups: List of all param groups
+        model: Model to validate against. The check ensures that all the model
+            parameters are part of param_groups
+    """
+    for pg in param_groups:
+        # no param should be repeated within a group
+        assert len(pg["params"]) == len(set(pg["params"]))
+    parameters = [set(param_group["params"]) for param_group in param_groups]
+    model_parameters = {parameter for _, parameter in model.named_parameters()}
+    for p1, p2 in itertools.permutations(parameters, 2):
+        assert p1.isdisjoint(p2), "Scheduler generated param_groups should be disjoint"
+    assert set.union(*parameters) == model_parameters, (
+        "Scheduler generated param_groups must include all parameters of the model."
+        f" Found {len(set.union(*parameters))} params whereas model has"
+        f" {len(model_parameters)} params"
+    )
+
+
+def unix_module_cls_pattern_to_parameter_names(
+    filter_module_cls_names: List[str],
+    module_cls_to_param_names: Dict[Type, str],
+) -> Union[None, Set[str]]:
+    """Returns param names which pass the filters specified in filter_module_cls_names.
+
+    Args:
+        filter_module_cls_names: A list of filter strings containing class names, like
+            ["torch.nn.LayerNorm", "torch.nn.BatchNorm2d"]
+        module_cls_to_param_names: Mapping from module classes to the parameter names
+            they contain. See `get_module_cls_to_param_names`.
+    """
+    if filter_module_cls_names is None:
+        return set()
+    allowed_parameter_names = []
+    for module_cls_name in filter_module_cls_names:
+        module_cls = hydra.utils.get_class(module_cls_name)
+        if module_cls not in module_cls_to_param_names:
+            raise AssertionError(
+                f"module_cls_name {module_cls_name} does not "
+                "match any classes in the model"
+            )
+        matching_parameters = module_cls_to_param_names[module_cls]
+        assert (
+            len(matching_parameters) > 0
+        ), f"module_cls_name {module_cls_name} does not contain any parameters in the model"
+        logging.info(
+            f"Matches for module_cls_name [{module_cls_name}]: {matching_parameters} "
+        )
+        allowed_parameter_names.append(matching_parameters)
+    return set.union(*allowed_parameter_names)
+
+
+def unix_param_pattern_to_parameter_names(
+    filter_param_names: Optional[List[str]],
+    parameter_names: Dict[str, torch.Tensor],
+) -> Union[None, Set[str]]:
+    """Returns param names which pass the filters specified in filter_param_names.
+
+    Args:
+        filter_param_names: A list of unix-style filter strings with optional
+            wildcards, like ["block.2.*", "block.2.linear.weight"]
+        module_cls_to_param_names: Mapping from module classes to the parameter names
+            they contain. See `get_module_cls_to_param_names`.
+    """
+
+    if filter_param_names is None:
+        return set()
+    allowed_parameter_names = []
+    for param_name in filter_param_names:
+        matching_parameters = set(fnmatch.filter(parameter_names, param_name))
+        assert (
+            len(matching_parameters) >= 1
+        ), f"param_name {param_name} does not match any parameters in the model"
+        logging.info(f"Matches for param_name [{param_name}]: {matching_parameters}")
+        allowed_parameter_names.append(matching_parameters)
+    return set.union(*allowed_parameter_names)
+
+
+def _unix_pattern_to_parameter_names(
+    scheduler_cfg: DictConfig,
+    parameter_names: Set[str],
+    module_cls_to_param_names: Dict[Type, str],
+) -> Union[None, Set[str]]:
+    """Returns param names which pass the filters specified in scheduler_cfg.
+
+    Args:
+        scheduler_cfg: The config for the scheduler
+        parameter_names: The set of all parameter names which will be filtered
+    """
+    if "param_names" not in scheduler_cfg and "module_cls_names" not in scheduler_cfg:
+        return None
+    return unix_param_pattern_to_parameter_names(
+        scheduler_cfg.get("param_names"), parameter_names
+    ).union(
+        unix_module_cls_pattern_to_parameter_names(
+            scheduler_cfg.get("module_cls_names"), module_cls_to_param_names
+        )
+    )
+
+
+def get_module_cls_to_param_names(
+    model: nn.Module, param_allowlist: Set[str] = None
+) -> Dict[Type, str]:
+    """Produce a mapping from all the modules classes to the names of parames they own.
+
+    Only counts a parameter as part of the immediate parent module, i.e. recursive
+    parents do not count.
+
+    Args:
+        model: Model to iterate over
+        param_allowlist: If specified, only these param names will be processed
+    """
+
+    module_cls_to_params = {}
+    for module_name, module in model.named_modules():
+        module_cls = type(module)
+        module_cls_to_params.setdefault(module_cls, set())
+        for param_name, _ in module.named_parameters(recurse=False):
+            full_param_name = get_full_parameter_name(module_name, param_name)
+            if param_allowlist is None or full_param_name in param_allowlist:
+                module_cls_to_params[module_cls].add(full_param_name)
+    return module_cls_to_params
+
+
+def construct_optimizer(
+    model: torch.nn.Module,
+    optimizer_conf: Any,
+    options_conf: Mapping[str, List] = None,
+    param_group_modifiers_conf: List[Callable] = None,
+    param_allowlist: Optional[Set[str]] = None,
+    validate_param_groups=True,
+) -> Optimizer:
+    """
+    Constructs a stochastic gradient descent or ADAM (or ADAMw) optimizer
+    with momentum. i.e, constructs a torch.optim.Optimizer with zero-weight decay
+    Batchnorm and/or no-update 1-D parameters support, based on the config.
+
+    Supports wrapping the optimizer with Layer-wise Adaptive Rate Scaling
+    (LARS): https://arxiv.org/abs/1708.03888
+
+    Args:
+        model: model to perform stochastic gradient descent
+            optimization or ADAM optimization.
+        optimizer_conf: Hydra config consisting a partial torch optimizer like SGD or
+            ADAM, still missing the params argument which this function provides to
+            produce the final optimizer
+        param_group_modifiers_conf: Optional user specified functions which can modify
+            the final scheduler configs before the optimizer's param groups are built
+        param_allowlist: The parameters to optimize. Parameters which are not part of
+            this allowlist will be skipped.
+        validate_param_groups: If enabled, valides that the produced param_groups don't
+            overlap and cover all the model parameters.
+    """
+    if param_allowlist is None:
+        param_allowlist = {name for name, _ in model.named_parameters()}
+
+    named_parameters = {
+        name: param
+        for name, param in model.named_parameters()
+        if name in param_allowlist
+    }
+
+    if not options_conf:
+        optimizer = hydra.utils.instantiate(optimizer_conf, named_parameters.values())
+        return Optimizer(optimizer)
+
+    all_parameter_names = {
+        name for name, _ in model.named_parameters() if name in param_allowlist
+    }
+    module_cls_to_all_param_names = get_module_cls_to_param_names(
+        model, param_allowlist
+    )
+
+    scheduler_cfgs_per_option = hydra.utils.instantiate(options_conf)
+    all_scheduler_cfgs = []
+    for option, scheduler_cfgs in scheduler_cfgs_per_option.items():
+        for config in scheduler_cfgs:
+            config.option = option
+            config.parameter_names = _unix_pattern_to_parameter_names(
+                config, all_parameter_names, module_cls_to_all_param_names
+            )
+        set_default_parameters(scheduler_cfgs, all_parameter_names)
+        all_scheduler_cfgs.append(scheduler_cfgs)
+
+    if param_group_modifiers_conf:
+        for custom_param_modifier in param_group_modifiers_conf:
+            custom_param_modifier = hydra.utils.instantiate(custom_param_modifier)
+            all_scheduler_cfgs = custom_param_modifier(
+                scheduler_cfgs=all_scheduler_cfgs, model=model
+            )
+    schedulers, param_groups = map_scheduler_cfgs_to_param_groups(
+        all_scheduler_cfgs, named_parameters
+    )
+    if validate_param_groups:
+        validate_param_group_params(param_groups, model)
+    optimizer = hydra.utils.instantiate(optimizer_conf, param_groups)
+    return Optimizer(optimizer, schedulers)
+
+
+def get_full_parameter_name(module_name, param_name):
+    if module_name == "":
+        return param_name
+    return f"{module_name}.{param_name}"
+
+
+class GradientClipper:
+    """
+    Gradient clipping utils that works for DDP
+    """
+
+    def __init__(self, max_norm: float = 1.0, norm_type: int = 2):
+        assert isinstance(max_norm, (int, float)) or max_norm is None
+        self.max_norm = max_norm if max_norm is None else float(max_norm)
+        self.norm_type = norm_type
+
+    def __call__(self, model: nn.Module):
+        if self.max_norm is None:
+            return  # no-op
+
+        nn.utils.clip_grad_norm_(
+            model.parameters(), max_norm=self.max_norm, norm_type=self.norm_type
+        )
+
+
+class ValueScaler:
+    def __init__(self, scheduler, mult_val: float):
+        self.scheduler = scheduler
+        self.mult_val = mult_val
+
+    def __call__(self, *args, **kwargs):
+        val = self.scheduler(*args, **kwargs)
+        return val * self.mult_val
+
+
+def rgetattr(obj, rattrs: str = None):
+    """
+    Like getattr(), but supports dotted notation for nested objects.
+    rattrs is a str of form 'attr1.attr2', returns obj.attr1.attr2
+    """
+    if rattrs is None:
+        return obj
+    attrs = rattrs.split(".")
+    for attr in attrs:
+        obj = getattr(obj, attr)
+    return obj
+
+
+def layer_decay_param_modifier(
+    scheduler_cfgs: List[List[Dict]],
+    model,
+    layer_decay_value: float,
+    layer_decay_min: Optional[float] = None,
+    apply_to: Optional[str] = None,
+    overrides: List[Dict] = (),
+) -> List[List[Dict]]:
+    """
+    Args
+    - scheduler_cfgs: a list of omegaconf.ListConfigs.
+        Each element in the list is a omegaconfg.DictConfig with the following structure
+        {
+            "scheduler": <some fvcore scheduler>
+            "option": <value> possible options are "lr", "weight_decay" etc.
+            "parameter_names": Set of str indicating param names that this scheduler applies to
+        }
+    - model: a model that implements a method `get_layer_id` that maps layer_name to an integer and
+            and a method get_num_layers.
+            Alternatively, use apply_to argument to select a specific component of the model.
+    - layer_decay_value: float
+    - layer_decay_min: min val for layer decay
+    - apply_to: optional arg to select which component of the model to apply the the layer decay modifier to
+    - overrides: to manually override lr for specific patterns. Is a list of dicts. Each dict, has keys "pattern", "value".
+    Returns
+    - scheduler_configs: same structure as the input, elements can be modified
+    """
+    model = rgetattr(model, apply_to)
+    num_layers = model.get_num_layers() + 1
+    layer_decays = [
+        layer_decay_value ** (num_layers - i) for i in range(num_layers + 1)
+    ]
+    if layer_decay_min is not None:
+        layer_decays = [max(val, layer_decay_min) for val in layer_decays]
+    final_scheduler_cfgs = []
+    # scheduler_cfgs is a list of lists
+    for scheduler_cfg_group in scheduler_cfgs:
+        curr_cfg_group = []
+        # scheduler_cfg_group is a list of dictionaries
+        for scheduler_cfg in scheduler_cfg_group:
+            if scheduler_cfg["option"] != "lr":
+                curr_cfg_group.append(scheduler_cfg)
+                continue
+            # Need sorted so that the list of parameter names is deterministic and consistent
+            # across re-runs of this job. Else it was causing issues with loading the optimizer
+            # state during a job restart
+            parameter_names = sorted(scheduler_cfg["parameter_names"])
+
+            # Only want one cfg group per layer
+            layer_cfg_groups = {}
+            for param_name in parameter_names:
+                layer_id = num_layers
+                this_scale = layer_decays[layer_id]
+                if param_name.startswith(apply_to):
+                    layer_id = model.get_layer_id(param_name)
+                    this_scale = layer_decays[layer_id]
+                    # Overrides
+                    for override in overrides:
+                        if fnmatch.fnmatchcase(param_name, override["pattern"]):
+                            this_scale = float(override["value"])
+                            layer_id = override["pattern"]
+                            break
+
+                if layer_id not in layer_cfg_groups:
+                    curr_param = {
+                        "option": scheduler_cfg["option"],
+                        "scheduler": ValueScaler(
+                            scheduler_cfg["scheduler"], this_scale
+                        ),
+                        "parameter_names": {param_name},
+                    }
+                else:
+                    curr_param = layer_cfg_groups[layer_id]
+                    curr_param["parameter_names"].add(param_name)
+                layer_cfg_groups[layer_id] = curr_param
+
+            for layer_cfg in layer_cfg_groups.values():
+                curr_cfg_group.append(layer_cfg)
+
+        final_scheduler_cfgs.append(curr_cfg_group)
+    return final_scheduler_cfgs
diff --git a/sam3/train/optim/schedulers.py b/sam3/train/optim/schedulers.py
new file mode 100644
index 0000000000000000000000000000000000000000..59da840f1d0a854e816710273f341c471490b788
--- /dev/null
+++ b/sam3/train/optim/schedulers.py
@@ -0,0 +1,41 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import math
+
+
+class InverseSquareRootParamScheduler:
+    def __init__(
+        self,
+        base_lr: float,
+        warmup_steps: int,
+        cooldown_steps: int,
+        timescale: int,
+    ):
+        self.base_lr = base_lr
+        self.warmup_steps = warmup_steps
+        self.cooldown_steps = cooldown_steps
+        self.timescale = timescale
+
+    def __call__(self, step: int, where: float):
+        lr = self.base_lr
+
+        if where > 0:
+            total_steps = step / where
+            progress = (step - self.warmup_steps) / float(
+                total_steps - self.warmup_steps
+            )
+            progress = max(min(progress, 1), 0)
+        else:
+            progress = 0
+            total_steps = 1
+
+        shift = self.timescale - self.warmup_steps
+        if self.warmup_steps < step:
+            lr = lr / math.sqrt((step + shift) / self.timescale)
+
+        if self.warmup_steps:
+            lr = lr * min(1.0, step / self.warmup_steps)
+        if self.cooldown_steps:
+            lr = lr * min(1.0, (total_steps - step) / self.cooldown_steps)
+
+        return lr
diff --git a/sam3/train/train.py b/sam3/train/train.py
new file mode 100644
index 0000000000000000000000000000000000000000..b3e995c5e615f1c9bd8c6c8a1f93611166443e33
--- /dev/null
+++ b/sam3/train/train.py
@@ -0,0 +1,339 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import logging
+import os
+import random
+import sys
+import traceback
+from argparse import ArgumentParser
+from copy import deepcopy
+
+import submitit
+import torch
+
+from hydra import compose, initialize_config_module
+from hydra.utils import instantiate
+
+from iopath.common.file_io import g_pathmgr
+from omegaconf import OmegaConf
+
+from sam3.train.utils.train_utils import makedir, register_omegaconf_resolvers
+from tqdm import tqdm
+
+
+os.environ["HYDRA_FULL_ERROR"] = "1"
+
+
+class SlurmEvent:
+    QUEUED = "QUEUED"
+    START = "START"
+    FINISH = "FINISH"
+    JOB_ERROR = "JOB_ERROR"
+    SLURM_SIGNAL = "SLURM_SIGNAL"
+
+
+def handle_custom_resolving(cfg):
+    # We'll resolve the config here, so we can catch mistakes early.
+    # However, we need to pass the un-resolved config to the launcher
+    # (because DVC resolving needs to be done on the node it will run on)
+    # First, do a copy without triggering resolving
+    cfg_resolved = OmegaConf.to_container(cfg, resolve=False)
+    cfg_resolved = OmegaConf.create(cfg_resolved)
+    return cfg_resolved
+
+
+def single_proc_run(local_rank, main_port, cfg, world_size):
+    """Single GPU process"""
+    os.environ["MASTER_ADDR"] = "localhost"
+    os.environ["MASTER_PORT"] = str(main_port)
+    os.environ["RANK"] = str(local_rank)
+    os.environ["LOCAL_RANK"] = str(local_rank)
+    os.environ["WORLD_SIZE"] = str(world_size)
+    try:
+        register_omegaconf_resolvers()
+    except Exception as e:
+        logging.info(e)
+
+    trainer = instantiate(cfg.trainer, _recursive_=False)
+    trainer.run()
+
+
+def single_node_runner(cfg, main_port: int):
+    assert cfg.launcher.num_nodes == 1
+    # assert cfg.launcher.gpus_per_node == 1
+    num_proc = cfg.launcher.gpus_per_node
+    torch.multiprocessing.set_start_method(
+        "spawn"
+    )  # CUDA runtime does not support `fork`
+    if num_proc == 1:
+        # directly call single_proc so we can easily set breakpoints
+        # mp.spawn does not let us set breakpoints
+        single_proc_run(local_rank=0, main_port=main_port, cfg=cfg, world_size=num_proc)
+    else:
+        mp_runner = torch.multiprocessing.start_processes
+        args = (main_port, cfg, num_proc)
+        # Note: using "fork" below, "spawn" causes time and error regressions. Using
+        # spawn changes the default multiprocessing context to spawn, which doesn't
+        # interact well with the dataloaders (likely due to the use of OpenCV).
+        mp_runner(single_proc_run, args=args, nprocs=num_proc, start_method="spawn")
+
+
+def format_exception(e: Exception, limit=20):
+    traceback_str = "".join(traceback.format_tb(e.__traceback__, limit=limit))
+    return f"{type(e).__name__}: {e}\nTraceback:\n{traceback_str}"
+
+
+class SubmititRunner(submitit.helpers.Checkpointable):
+    """A callable which is passed to submitit to launch the jobs."""
+
+    def __init__(self, port, cfg):
+        self.cfg = cfg
+        self.port = port
+        self.has_setup = False
+
+    def run_trainer(self):
+        job_env = submitit.JobEnvironment()
+        # Need to add this again so the hydra.job.set_env PYTHONPATH
+        # is also set when launching jobs.
+        add_pythonpath_to_sys_path()
+        os.environ["MASTER_ADDR"] = job_env.hostnames[0]
+        os.environ["MASTER_PORT"] = str(self.port)
+        os.environ["RANK"] = str(job_env.global_rank)
+        os.environ["LOCAL_RANK"] = str(job_env.local_rank)
+        os.environ["WORLD_SIZE"] = str(job_env.num_tasks)
+
+        register_omegaconf_resolvers()
+        cfg_resolved = OmegaConf.to_container(self.cfg, resolve=False)
+        cfg_resolved = OmegaConf.create(cfg_resolved)
+
+        trainer = instantiate(cfg_resolved.trainer, _recursive_=False)
+        trainer.run()
+
+    def __call__(self):
+        job_env = submitit.JobEnvironment()
+        self.setup_job_info(job_env.job_id, job_env.global_rank)
+        try:
+            self.run_trainer()
+        except Exception as e:
+            # Log the exception. Then raise it again (as what SubmititRunner currently does).
+            message = format_exception(e)
+            logging.error(message)
+            raise e
+
+    def setup_job_info(self, job_id, rank):
+        """Set up slurm job info"""
+        self.job_info = {
+            "job_id": job_id,
+            "rank": rank,
+            "cluster": self.cfg.get("cluster", None),
+            "experiment_log_dir": self.cfg.launcher.experiment_log_dir,
+        }
+
+        self.has_setup = True
+
+
+def add_pythonpath_to_sys_path():
+    if "PYTHONPATH" not in os.environ or not os.environ["PYTHONPATH"]:
+        return
+    sys.path = os.environ["PYTHONPATH"].split(":") + sys.path
+
+
+def main(args) -> None:
+    cfg = compose(config_name=args.config)
+    if cfg.launcher.experiment_log_dir is None:
+        cfg.launcher.experiment_log_dir = os.path.join(
+            os.getcwd(), "sam3_logs", args.config
+        )
+    print("###################### Train App Config ####################")
+    print(OmegaConf.to_yaml(cfg))
+    print("############################################################")
+
+    add_pythonpath_to_sys_path()
+    makedir(cfg.launcher.experiment_log_dir)
+    with g_pathmgr.open(
+        os.path.join(cfg.launcher.experiment_log_dir, "config.yaml"), "w"
+    ) as f:
+        f.write(OmegaConf.to_yaml(cfg))
+
+    cfg_resolved = OmegaConf.to_container(cfg, resolve=False)
+    cfg_resolved = OmegaConf.create(cfg_resolved)
+
+    with g_pathmgr.open(
+        os.path.join(cfg.launcher.experiment_log_dir, "config_resolved.yaml"), "w"
+    ) as f:
+        f.write(OmegaConf.to_yaml(cfg_resolved, resolve=True))
+
+    submitit_conf = cfg.get("submitit", None)
+    assert submitit_conf is not None, "Missing submitit config"
+
+    experiment_log_dir = cfg.launcher.experiment_log_dir
+    print(f"Experiment Log Dir:\n{experiment_log_dir}")
+    submitit_dir = os.path.join(experiment_log_dir, "submitit_logs")
+
+    # Prioritize cmd line args
+    cfg.launcher.gpus_per_node = (
+        args.num_gpus if args.num_gpus is not None else cfg.launcher.gpus_per_node
+    )
+    cfg.launcher.num_nodes = (
+        args.num_nodes if args.num_nodes is not None else cfg.launcher.num_nodes
+    )
+    submitit_conf.use_cluster = (
+        args.use_cluster if args.use_cluster is not None else submitit_conf.use_cluster
+    )
+    if submitit_conf.use_cluster:
+        executor = submitit.AutoExecutor(folder=submitit_dir)
+        submitit_conf.partition = (
+            args.partition
+            if args.partition is not None
+            else submitit_conf.get("partition", None)
+        )
+        submitit_conf.account = (
+            args.account
+            if args.account is not None
+            else submitit_conf.get("account", None)
+        )
+        submitit_conf.qos = (
+            args.qos if args.qos is not None else submitit_conf.get("qos", None)
+        )
+        job_kwargs = {
+            "timeout_min": 60 * submitit_conf.timeout_hour,
+            "name": (
+                submitit_conf.name if hasattr(submitit_conf, "name") else args.config
+            ),
+            "slurm_partition": submitit_conf.partition,
+            "gpus_per_node": cfg.launcher.gpus_per_node,
+            "tasks_per_node": cfg.launcher.gpus_per_node,  # one task per GPU
+            "cpus_per_task": submitit_conf.cpus_per_task,
+            "nodes": cfg.launcher.num_nodes,
+            "slurm_additional_parameters": {
+                "exclude": " ".join(submitit_conf.get("exclude_nodes", [])),
+            },
+        }
+        if "include_nodes" in submitit_conf:
+            assert (
+                len(submitit_conf["include_nodes"]) >= cfg.launcher.num_nodes
+            ), "Not enough nodes"
+            job_kwargs["slurm_additional_parameters"]["nodelist"] = " ".join(
+                submitit_conf["include_nodes"]
+            )
+        if submitit_conf.account is not None:
+            job_kwargs["slurm_additional_parameters"]["account"] = submitit_conf.account
+        if submitit_conf.qos is not None:
+            job_kwargs["slurm_additional_parameters"]["qos"] = submitit_conf.qos
+
+        if submitit_conf.get("mem_gb", None) is not None:
+            job_kwargs["mem_gb"] = submitit_conf.mem_gb
+        elif submitit_conf.get("mem", None) is not None:
+            job_kwargs["slurm_mem"] = submitit_conf.mem
+
+        if submitit_conf.get("constraints", None) is not None:
+            job_kwargs["slurm_constraint"] = submitit_conf.constraints
+
+        if submitit_conf.get("comment", None) is not None:
+            job_kwargs["slurm_comment"] = submitit_conf.comment
+
+        # Supports only cpu-bind option within srun_args. New options can be added here
+        if submitit_conf.get("srun_args", None) is not None:
+            job_kwargs["slurm_srun_args"] = []
+            if submitit_conf.srun_args.get("cpu_bind", None) is not None:
+                job_kwargs["slurm_srun_args"].extend(
+                    ["--cpu-bind", submitit_conf.srun_args.cpu_bind]
+                )
+
+        print("###################### SLURM Config ####################")
+        print(job_kwargs)
+        print("##########################################")
+        executor.update_parameters(**job_kwargs)
+
+        if (
+            "job_array" in submitit_conf
+            and submitit_conf.job_array.get("num_tasks", -1) > 0
+        ):
+            num_tasks = submitit_conf.job_array.num_tasks
+            job_array_config_dir = os.path.join(
+                cfg.launcher.experiment_log_dir, "job_array_configs"
+            )
+            makedir(job_array_config_dir)
+
+            job_indices = range(num_tasks)
+            ports = random.sample(
+                range(submitit_conf.port_range[0], submitit_conf.port_range[1] + 1),
+                k=len(job_indices),
+            )
+
+            jobs_runners_configs = []
+            with executor.batch():
+                task_index = 0
+                for indices, main_port in tqdm(zip(job_indices, ports)):
+                    curr_cfg = deepcopy(cfg)
+                    curr_cfg.submitit.job_array["task_index"] = task_index
+                    curr_cfg_resolved = handle_custom_resolving(cfg)
+                    runner = SubmititRunner(main_port, curr_cfg)
+                    job = executor.submit(runner)
+                    jobs_runners_configs.append(
+                        (job, runner, curr_cfg, curr_cfg_resolved)
+                    )
+                    task_index += 1
+
+            for job, runner, job_cfg, job_cfg_resolved in jobs_runners_configs:
+                print("Submitit Job ID:", job.job_id)
+
+                # Save job specific config
+                job_array_config_file = os.path.join(
+                    job_array_config_dir, "{}.config.yaml".format(job.job_id)
+                )
+                with g_pathmgr.open(job_array_config_file, "w") as f:
+                    f.write(OmegaConf.to_yaml(job_cfg))
+
+                job_array_config_resolved_file = os.path.join(
+                    job_array_config_dir, "{}.config_resolved.yaml".format(job.job_id)
+                )
+                with g_pathmgr.open(job_array_config_resolved_file, "w") as f:
+                    f.write(OmegaConf.to_yaml(job_cfg_resolved, resolve=True))
+
+                runner.setup_job_info(job.job_id, rank=0)
+                # runner.log_event(event_type=SlurmEvent.QUEUED)
+        else:
+            main_port = random.randint(
+                submitit_conf.port_range[0], submitit_conf.port_range[1]
+            )
+            runner = SubmititRunner(main_port, cfg)
+            job = executor.submit(runner)
+            print(f"Submitit Job ID: {job.job_id}")
+            runner.setup_job_info(job.job_id, rank=0)
+
+    else:
+        cfg.launcher.num_nodes = 1
+        main_port = random.randint(
+            submitit_conf.port_range[0], submitit_conf.port_range[1]
+        )
+        single_node_runner(cfg, main_port)
+
+
+if __name__ == "__main__":
+    initialize_config_module("sam3.train", version_base="1.2")
+    parser = ArgumentParser()
+    parser.add_argument(
+        "-c",
+        "--config",
+        required=True,
+        type=str,
+        help="path to config file (e.g. configs/roboflow_v100_full_ft_100_images.yaml)",
+    )
+    parser.add_argument(
+        "--use-cluster",
+        type=int,
+        default=None,
+        help="whether to launch on a cluster, 0: run locally, 1: run on a cluster",
+    )
+    parser.add_argument("--partition", type=str, default=None, help="SLURM partition")
+    parser.add_argument("--account", type=str, default=None, help="SLURM account")
+    parser.add_argument("--qos", type=str, default=None, help="SLURM qos")
+    parser.add_argument(
+        "--num-gpus", type=int, default=None, help="number of GPUS per node"
+    )
+    parser.add_argument("--num-nodes", type=int, default=None, help="Number of nodes")
+    args = parser.parse_args()
+    args.use_cluster = bool(args.use_cluster) if args.use_cluster is not None else None
+    register_omegaconf_resolvers()
+    main(args)
diff --git a/sam3/train/trainer.py b/sam3/train/trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac7c1b52321ec08c69ba90b3c8bee6d428669013
--- /dev/null
+++ b/sam3/train/trainer.py
@@ -0,0 +1,1193 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import contextlib
+import fnmatch
+import gc
+import json
+import logging
+import math
+import os
+import time
+from collections import OrderedDict
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Mapping, Optional
+
+import numpy as np
+
+import torch
+import torch.distributed as dist
+import torch.nn as nn
+from hydra.utils import instantiate
+from iopath.common.file_io import g_pathmgr
+
+from sam3.model.data_misc import BatchedDatapoint
+from sam3.model.model_misc import SAM3Output
+from sam3.model.utils.misc import copy_data_to_device
+
+from sam3.train.optim.optimizer import construct_optimizer
+
+from sam3.train.utils.checkpoint_utils import (
+    assert_skipped_parameters_are_frozen,
+    exclude_params_matching_unix_pattern,
+    load_state_dict_into_model,
+    with_check_parameter_frozen,
+)
+
+from sam3.train.utils.distributed import all_reduce_max, barrier, get_rank
+
+from sam3.train.utils.logger import Logger, setup_logging
+from sam3.train.utils.train_utils import (
+    AverageMeter,
+    collect_dict_keys,
+    DurationMeter,
+    get_amp_type,
+    get_machine_local_and_dist_rank,
+    get_resume_checkpoint,
+    human_readable_time,
+    is_dist_avail_and_initialized,
+    log_env_variables,
+    makedir,
+    MemMeter,
+    Phase,
+    ProgressMeter,
+    set_seeds,
+    setup_distributed_backend,
+)
+
+
+CORE_LOSS_KEY = "core_loss"
+
+
+def unwrap_ddp_if_wrapped(model):
+    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
+        return model.module
+    return model
+
+
+@dataclass
+class OptimAMPConf:
+    enabled: bool = False
+    amp_dtype: str = "float16"
+
+
+@dataclass
+class OptimConf:
+    optimizer: torch.optim.Optimizer = None
+    options: Optional[Dict[str, Any]] = None
+    param_group_modifiers: Optional[List] = None
+    amp: Optional[Dict[str, Any]] = None
+    gradient_clip: Any = None
+    gradient_logger: Any = None
+
+    def __post_init__(self):
+        # amp
+        if not isinstance(self.amp, OptimAMPConf):
+            if self.amp is None:
+                self.amp = {}
+            assert isinstance(self.amp, Mapping)
+            self.amp = OptimAMPConf(**self.amp)
+
+
+@dataclass
+class DistributedConf:
+    backend: Optional[str] = None  # inferred from accelerator type
+    comms_dtype: Optional[str] = None
+    find_unused_parameters: bool = False
+    timeout_mins: int = 30
+    gradient_as_bucket_view: bool = False  # PyTorch DDP default is False
+    static_graph: bool = False  # PyTorch DDP default is False
+
+
+@dataclass
+class CudaConf:
+    cudnn_deterministic: bool = False
+    cudnn_benchmark: bool = True
+    allow_tf32: bool = False
+    # if not None, `matmul_allow_tf32` key will override `allow_tf32` for matmul
+    matmul_allow_tf32: Optional[bool] = None
+    # if not None, `cudnn_allow_tf32` key will override `allow_tf32` for cudnn
+    cudnn_allow_tf32: Optional[bool] = None
+
+
+@dataclass
+class CheckpointConf:
+    save_dir: str
+    save_freq: int
+    save_list: List[int] = field(default_factory=list)
+    model_weight_initializer: Any = None
+    save_best_meters: List[str] = None
+    skip_saving_parameters: List[str] = field(default_factory=list)
+    initialize_after_preemption: Optional[bool] = None
+    # if not None, training will be resumed from this checkpoint
+    resume_from: Optional[str] = None
+
+    def infer_missing(self):
+        if self.initialize_after_preemption is None:
+            with_skip_saving = len(self.skip_saving_parameters) > 0
+            self.initialize_after_preemption = with_skip_saving
+        return self
+
+
+@dataclass
+class LoggingConf:
+    log_dir: str
+    log_freq: int  # In iterations
+    tensorboard_writer: Any
+    log_level_primary: str = "INFO"
+    log_level_secondary: str = "ERROR"
+    log_scalar_frequency: int = 100
+    log_visual_frequency: int = 100
+    scalar_keys_to_log: Optional[Dict[str, Any]] = None
+    log_batch_stats: bool = False
+    wandb_writer: Optional[Any] = None
+
+
+class Trainer:
+    """
+    Trainer supporting the DDP training strategies.
+    """
+
+    EPSILON = 1e-8
+
+    def __init__(
+        self,
+        *,  # the order of these args can change at any time, so they are keyword-only
+        data: Dict[str, Any],
+        model: Dict[str, Any],
+        logging: Dict[str, Any],
+        checkpoint: Dict[str, Any],
+        max_epochs: int,
+        mode: str = "train",
+        accelerator: str = "cuda",
+        seed_value: int = 123,
+        val_epoch_freq: int = 1,
+        distributed: Dict[str, bool] = None,
+        cuda: Dict[str, bool] = None,
+        env_variables: Optional[Dict[str, Any]] = None,
+        optim: Optional[Dict[str, Any]] = None,
+        optim_overrides: Optional[List[Dict[str, Any]]] = None,
+        meters: Optional[Dict[str, Any]] = None,
+        loss: Optional[Dict[str, Any]] = None,
+        skip_first_val: bool = False,
+        skip_saving_ckpts: bool = False,
+        empty_gpu_mem_cache_after_eval: bool = True,
+        gradient_accumulation_steps: int = 1,
+    ):
+        self._setup_env_variables(env_variables)
+        self._setup_timers()
+
+        self.data_conf = data
+        self.model_conf = model
+        self.logging_conf = LoggingConf(**logging)
+        self.checkpoint_conf = CheckpointConf(**checkpoint).infer_missing()
+        self.max_epochs = max_epochs
+        self.mode = mode
+        self.val_epoch_freq = val_epoch_freq
+        self.optim_conf = OptimConf(**optim) if optim is not None else OptimConf()
+        self.meters_conf = meters
+        self.loss_conf = loss
+        self.gradient_accumulation_steps = gradient_accumulation_steps
+        distributed = DistributedConf(**distributed or {})
+        cuda = CudaConf(**cuda or {})
+        self.where = 0.0
+
+        self.skip_first_val = skip_first_val
+        self.skip_saving_ckpts = skip_saving_ckpts
+        self.empty_gpu_mem_cache_after_eval = empty_gpu_mem_cache_after_eval
+
+        self._infer_distributed_backend_if_none(distributed, accelerator)
+
+        self._setup_device(accelerator)
+
+        self._setup_torch_dist_and_backend(cuda, distributed)
+
+        makedir(self.logging_conf.log_dir)
+        setup_logging(
+            __name__,
+            output_dir=self.logging_conf.log_dir,
+            rank=self.rank,
+            log_level_primary=self.logging_conf.log_level_primary,
+            log_level_secondary=self.logging_conf.log_level_secondary,
+        )
+
+        set_seeds(seed_value, self.max_epochs, self.distributed_rank)
+        log_env_variables()
+
+        assert (
+            is_dist_avail_and_initialized()
+        ), "Torch distributed needs to be initialized before calling the trainer."
+
+        self._setup_components()  # Except Optimizer everything is setup here.
+        self._move_to_device()
+        self._construct_optimizers()
+        self._setup_dataloaders()
+
+        self.time_elapsed_meter = DurationMeter("Time Elapsed", self.device, ":.2f")
+
+        if self.checkpoint_conf.resume_from is not None:
+            assert os.path.exists(
+                self.checkpoint_conf.resume_from
+            ), f"The 'resume_from' checkpoint {self.checkpoint_conf.resume_from} does not exist!"
+            dst = os.path.join(self.checkpoint_conf.save_dir, "checkpoint.pt")
+            if self.distributed_rank == 0 and not os.path.exists(dst):
+                # Copy the "resume_from" checkpoint to the checkpoint folder
+                # if there is not a checkpoint to resume from already there
+                makedir(self.checkpoint_conf.save_dir)
+                g_pathmgr.copy(self.checkpoint_conf.resume_from, dst)
+            barrier()
+
+        self.load_checkpoint()
+        self._setup_ddp_distributed_training(distributed, accelerator)
+        barrier()
+
+    def _setup_timers(self):
+        """
+        Initializes counters for elapsed time and eta.
+        """
+        self.start_time = time.time()
+        self.ckpt_time_elapsed = 0
+        self.est_epoch_time = dict.fromkeys([Phase.TRAIN, Phase.VAL], 0)
+
+    def _get_meters(self, phase_filters=None):
+        if self.meters is None:
+            return {}
+        meters = {}
+        for phase, phase_meters in self.meters.items():
+            if phase_filters is not None and phase not in phase_filters:
+                continue
+            for key, key_meters in phase_meters.items():
+                if key_meters is None:
+                    continue
+                for name, meter in key_meters.items():
+                    meters[f"{phase}_{key}/{name}"] = meter
+        return meters
+
+    def _infer_distributed_backend_if_none(self, distributed_conf, accelerator):
+        if distributed_conf.backend is None:
+            distributed_conf.backend = "nccl" if accelerator == "cuda" else "gloo"
+
+    def _setup_env_variables(self, env_variables_conf) -> None:
+        if env_variables_conf is not None:
+            for variable_name, value in env_variables_conf.items():
+                os.environ[variable_name] = value
+
+    def _setup_torch_dist_and_backend(self, cuda_conf, distributed_conf) -> None:
+        if torch.cuda.is_available():
+            torch.backends.cudnn.deterministic = cuda_conf.cudnn_deterministic
+            torch.backends.cudnn.benchmark = cuda_conf.cudnn_benchmark
+            torch.backends.cuda.matmul.allow_tf32 = (
+                cuda_conf.matmul_allow_tf32
+                if cuda_conf.matmul_allow_tf32 is not None
+                else cuda_conf.allow_tf32
+            )
+            torch.backends.cudnn.allow_tf32 = (
+                cuda_conf.cudnn_allow_tf32
+                if cuda_conf.cudnn_allow_tf32 is not None
+                else cuda_conf.allow_tf32
+            )
+
+        self.rank = setup_distributed_backend(
+            distributed_conf.backend, distributed_conf.timeout_mins
+        )
+
+    def _setup_device(self, accelerator):
+        self.local_rank, self.distributed_rank = get_machine_local_and_dist_rank()
+        if accelerator == "cuda":
+            self.device = torch.device("cuda", self.local_rank)
+            torch.cuda.set_device(self.local_rank)
+        elif accelerator == "cpu":
+            self.device = torch.device("cpu")
+        else:
+            raise ValueError(f"Unsupported accelerator: {accelerator}")
+
+    def _setup_ddp_distributed_training(self, distributed_conf, accelerator):
+        assert isinstance(self.model, torch.nn.Module)
+
+        self.model = nn.parallel.DistributedDataParallel(
+            self.model,
+            device_ids=[self.local_rank] if accelerator == "cuda" else [],
+            find_unused_parameters=distributed_conf.find_unused_parameters,
+            gradient_as_bucket_view=distributed_conf.gradient_as_bucket_view,
+            static_graph=distributed_conf.static_graph,
+        )
+        if distributed_conf.comms_dtype is not None:  # noqa
+            from torch.distributed.algorithms import ddp_comm_hooks
+
+            amp_type = get_amp_type(distributed_conf.comms_dtype)
+            if amp_type == torch.bfloat16:
+                hook = ddp_comm_hooks.default_hooks.bf16_compress_hook
+                logging.info("Enabling bfloat16 grad communication")
+            else:
+                hook = ddp_comm_hooks.default_hooks.fp16_compress_hook
+                logging.info("Enabling fp16 grad communication")
+            process_group = None
+            self.model.register_comm_hook(process_group, hook)
+
+    def _move_to_device(self):
+        logging.info(
+            f"Moving components to device {self.device} and local rank {self.local_rank}."
+        )
+
+        self.model.to(self.device)
+
+        logging.info(
+            f"Done moving components to device {self.device} and local rank {self.local_rank}."
+        )
+
+    def save_checkpoint(self, epoch, checkpoint_names=None):
+        if self.skip_saving_ckpts:
+            logging.info(
+                "skip_saving_ckpts is set to True. So, no checkpoints have been saved."
+            )
+            return
+        checkpoint_folder = self.checkpoint_conf.save_dir
+        makedir(checkpoint_folder)
+        if checkpoint_names is None:
+            checkpoint_names = ["checkpoint"]
+            if (
+                self.checkpoint_conf.save_freq > 0
+                and (int(epoch) % self.checkpoint_conf.save_freq == 0)
+            ) or int(epoch) in self.checkpoint_conf.save_list:
+                checkpoint_names.append(f"checkpoint_{int(epoch)}")
+
+        checkpoint_paths = []
+        for ckpt_name in checkpoint_names:
+            checkpoint_paths.append(os.path.join(checkpoint_folder, f"{ckpt_name}.pt"))
+
+        state_dict = unwrap_ddp_if_wrapped(self.model).state_dict()
+        state_dict = exclude_params_matching_unix_pattern(
+            patterns=self.checkpoint_conf.skip_saving_parameters, state_dict=state_dict
+        )
+
+        checkpoint = {
+            "model": state_dict,
+            "optimizer": self.optim.optimizer.state_dict(),
+            "epoch": epoch,
+            "loss": self.loss.state_dict(),
+            "steps": self.steps,
+            "time_elapsed": self.time_elapsed_meter.val,
+            "best_meter_values": self.best_meter_values,
+        }
+        if self.optim_conf.amp.enabled:
+            checkpoint["scaler"] = self.scaler.state_dict()
+
+        # DDP checkpoints are only saved on rank 0 (all workers are identical)
+        if self.distributed_rank != 0:
+            return
+
+        for checkpoint_path in checkpoint_paths:
+            self._save_checkpoint(checkpoint, checkpoint_path)
+
+    def _save_checkpoint(self, checkpoint, checkpoint_path):
+        """
+        Save a checkpoint while guarding against the job being killed in the middle
+        of checkpoint saving (which corrupts the checkpoint file and ruins the
+        entire training since usually only the last checkpoint is kept per run).
+
+        We first save the new checkpoint to a temp file (with a '.tmp' suffix), and
+        and move it to overwrite the old checkpoint_path.
+        """
+        checkpoint_path_tmp = f"{checkpoint_path}.tmp"
+        with g_pathmgr.open(checkpoint_path_tmp, "wb") as f:
+            torch.save(checkpoint, f)
+        # after torch.save is completed, replace the old checkpoint with the new one
+        if g_pathmgr.exists(checkpoint_path):
+            # remove the old checkpoint_path file first (otherwise g_pathmgr.mv fails)
+            g_pathmgr.rm(checkpoint_path)
+        success = g_pathmgr.mv(checkpoint_path_tmp, checkpoint_path)
+        assert success
+
+    def load_checkpoint(self):
+        ckpt_path = get_resume_checkpoint(self.checkpoint_conf.save_dir)
+        if ckpt_path is None:
+            self._init_model_state()
+        else:
+            if self.checkpoint_conf.initialize_after_preemption:
+                self._call_model_initializer()
+            self._load_resuming_checkpoint(ckpt_path)
+
+    def _init_model_state(self):
+        # Checking that parameters that won't be saved are indeed frozen
+        # We do this check here before even saving the model to catch errors
+        # are early as possible and not at the end of the first epoch
+        assert_skipped_parameters_are_frozen(
+            patterns=self.checkpoint_conf.skip_saving_parameters,
+            model=self.model,
+        )
+
+        # Checking that parameters that won't be saved are initialized from
+        # within the model definition, unless `initialize_after_preemption`
+        # is explicitly set to `True`. If not, this is a bug, and after
+        # preemption, the `skip_saving_parameters` will have random values
+        allow_init_skip_parameters = self.checkpoint_conf.initialize_after_preemption
+        with with_check_parameter_frozen(
+            patterns=self.checkpoint_conf.skip_saving_parameters,
+            model=self.model,
+            disabled=allow_init_skip_parameters,
+        ):
+            self._call_model_initializer()
+
+    def _call_model_initializer(self):
+        model_weight_initializer = instantiate(
+            self.checkpoint_conf.model_weight_initializer
+        )
+        if model_weight_initializer is not None:
+            logging.info(
+                f"Loading pretrained checkpoint from {self.checkpoint_conf.model_weight_initializer}"
+            )
+            self.model = model_weight_initializer(model=self.model)
+
+    def _load_resuming_checkpoint(self, ckpt_path: str):
+        logging.info(f"Resuming training from {ckpt_path}")
+
+        with g_pathmgr.open(ckpt_path, "rb") as f:
+            checkpoint = torch.load(f, map_location="cpu")
+        load_state_dict_into_model(
+            model=self.model,
+            state_dict=checkpoint["model"],
+            ignore_missing_keys=self.checkpoint_conf.skip_saving_parameters,
+        )
+
+        self.optim.optimizer.load_state_dict(checkpoint["optimizer"])
+        self.loss.load_state_dict(checkpoint["loss"], strict=True)
+        self.epoch = checkpoint["epoch"]
+        self.steps = checkpoint["steps"]
+        self.ckpt_time_elapsed = checkpoint.get("time_elapsed")
+
+        if self.optim_conf.amp.enabled and "scaler" in checkpoint:
+            self.scaler.load_state_dict(checkpoint["scaler"])
+
+        self.best_meter_values = checkpoint.get("best_meter_values", {})
+
+        if "train_dataset" in checkpoint and self.train_dataset is not None:
+            self.train_dataset.load_checkpoint_state(checkpoint["train_dataset"])
+
+    def is_intermediate_val_epoch(self, epoch):
+        skip_epoch = self.skip_first_val and epoch == 0
+        return (
+            epoch % self.val_epoch_freq == 0
+            and epoch < self.max_epochs - 1
+            and not skip_epoch
+        )
+
+    def _find_loss(self, key: str):
+        if key in self.loss:
+            return self.loss[key]
+
+        assert key != "all", "Loss must be specified for key='all'"
+        assert (
+            "default" in self.loss
+        ), f"Key {key} not found in losss, and no default provided"
+        return self.loss["default"]
+
+    def _find_meter(self, phase: str, key: str):
+        if key in self.meters[phase]:
+            return self.meters[phase][key]
+
+        for cand_key, meter in self.meters[phase].items():
+            if fnmatch.fnmatch(key, cand_key):
+                return meter
+        return None
+
+    def _step(
+        self,
+        batch: BatchedDatapoint,
+        model: nn.Module,
+        phase: str,
+    ):
+        key, batch = batch.popitem()
+        batch = copy_data_to_device(batch, self.device, non_blocking=True)
+
+        find_stages = model(batch)
+        find_targets = [
+            unwrap_ddp_if_wrapped(model).back_convert(x) for x in batch.find_targets
+        ]
+        batch_size = len(batch.img_batch)
+        loss = self._find_loss(key)(find_stages, find_targets)
+
+        loss_str = f"Losses/{phase}_{key}_loss"
+
+        loss_log_str = os.path.join("Step_Losses", loss_str)
+
+        # loss contains multiple sub-components we wish to log
+        step_losses = {}
+        if isinstance(loss, dict):
+            step_losses.update(
+                {f"Losses/{phase}_{key}_{k}": v for k, v in loss.items()}
+            )
+            loss = self._log_loss_detailed_and_return_core_loss(
+                loss, loss_log_str, self.steps[phase]
+            )
+
+        if self.steps[phase] % self.logging_conf.log_scalar_frequency == 0:
+            self.logger.log(
+                loss_log_str,
+                loss,
+                self.steps[phase],
+            )
+
+        self.steps[phase] += 1
+
+        ret_tuple = {loss_str: loss}, batch_size, step_losses
+
+        if phase not in self.meters:
+            return ret_tuple
+
+        meters_dict = self._find_meter(phase, key)
+        if meters_dict is None:
+            return ret_tuple
+        if meters_dict is not None:
+            for _, meter in meters_dict.items():
+                meter.update(
+                    find_stages=find_stages,
+                    find_metadatas=batch.find_metadatas,
+                    model=model,
+                    batch=batch,
+                    key=key,
+                )
+            # Cleanup memory
+            if isinstance(find_stages, SAM3Output):
+                for fs in find_stages:
+                    for k in list(fs.keys()):
+                        del fs[k]
+
+        return ret_tuple
+
+    def run(self):
+        assert self.mode in ["train", "train_only", "val"]
+        if self.mode == "train":
+            if self.epoch > 0:
+                logging.info(f"Resuming training from epoch: {self.epoch}")
+                # resuming from a checkpoint
+                if self.is_intermediate_val_epoch(self.epoch - 1):
+                    logging.info("Running previous val epoch")
+                    self.epoch -= 1
+                    self.run_val()
+                    self.epoch += 1
+            self.run_train()
+            self.run_val()
+        elif self.mode == "val":
+            self.run_val()
+        elif self.mode == "train_only":
+            self.run_train()
+
+    def _setup_dataloaders(self):
+        self.train_dataset = None
+        self.val_dataset = None
+
+        if self.mode in ["train", "val"]:
+            self.val_dataset = instantiate(self.data_conf.get(Phase.VAL, None))
+
+        if self.mode in ["train", "train_only"]:
+            self.train_dataset = instantiate(self.data_conf.train)
+
+    def run_train(self):
+        while self.epoch < self.max_epochs:
+            dataloader = self.train_dataset.get_loader(epoch=int(self.epoch))
+            barrier()
+            outs = self.train_epoch(dataloader)
+            self.logger.log_dict(outs, self.epoch)  # Logged only on rank 0
+
+            # log train to text file.
+            if self.distributed_rank == 0:
+                with g_pathmgr.open(
+                    os.path.join(self.logging_conf.log_dir, "train_stats.json"),
+                    "a",
+                ) as f:
+                    f.write(json.dumps(outs) + "\n")
+
+            # Save checkpoint before validating
+            self.save_checkpoint(self.epoch + 1)
+
+            del dataloader
+            gc.collect()
+
+            # Run val, not running on last epoch since will run after the
+            # loop anyway
+            if self.is_intermediate_val_epoch(self.epoch):
+                self.run_val()
+                if torch.cuda.is_available() and self.empty_gpu_mem_cache_after_eval:
+                    # release memory buffers held by the model during eval (which typically
+                    # involves a lot more frames in video grounding that during training)
+                    torch.cuda.empty_cache()
+
+            if self.distributed_rank == 0:
+                self.best_meter_values.update(self._get_trainer_state("train"))
+                with g_pathmgr.open(
+                    os.path.join(self.logging_conf.log_dir, "best_stats.json"),
+                    "a",
+                ) as f:
+                    f.write(json.dumps(self.best_meter_values) + "\n")
+
+            self.epoch += 1
+        # epoch was incremented in the loop but the val step runs out of the loop
+        self.epoch -= 1
+
+    def run_val(self):
+        if not self.val_dataset:
+            return
+
+        dataloader = self.val_dataset.get_loader(epoch=int(self.epoch))
+        outs = self.val_epoch(dataloader, phase=Phase.VAL)
+        del dataloader
+        gc.collect()
+        self.logger.log_dict(outs, self.epoch)  # Logged only on rank 0
+
+        if self.distributed_rank == 0:
+            with g_pathmgr.open(
+                os.path.join(self.logging_conf.log_dir, "val_stats.json"),
+                "a",
+            ) as f:
+                f.write(json.dumps(outs) + "\n")
+
+    def val_epoch(self, val_loader, phase):
+        batch_time = AverageMeter("Batch Time", self.device, ":.2f")
+        data_time = AverageMeter("Data Time", self.device, ":.2f")
+        mem = MemMeter("Mem (GB)", self.device, ":.2f")
+
+        iters_per_epoch = len(val_loader)
+
+        curr_phases = [phase]
+        curr_models = [self.model]
+
+        loss_names = []
+        for p in curr_phases:
+            for key in self.loss.keys():
+                loss_names.append(f"Losses/{p}_{key}_loss")
+
+        loss_mts = OrderedDict(
+            [(name, AverageMeter(name, self.device, ":.2e")) for name in loss_names]
+        )
+        extra_loss_mts = {}
+
+        for model in curr_models:
+            model.eval()
+            if hasattr(unwrap_ddp_if_wrapped(model), "on_validation_epoch_start"):
+                unwrap_ddp_if_wrapped(model).on_validation_epoch_start()
+
+        progress = ProgressMeter(
+            iters_per_epoch,
+            [batch_time, data_time, mem, self.time_elapsed_meter, *loss_mts.values()],
+            self._get_meters(curr_phases),
+            prefix="Val Epoch: [{}]".format(self.epoch),
+        )
+
+        end = time.time()
+
+        for data_iter, batch in enumerate(val_loader):
+            # measure data loading time
+            data_time.update(time.time() - end)
+
+            # batch = batch.to(self.device, non_blocking=True)
+
+            # compute output
+            with torch.no_grad():
+                with torch.amp.autocast(
+                    device_type="cuda",
+                    enabled=(self.optim_conf.amp.enabled if self.optim_conf else False),
+                    dtype=(
+                        get_amp_type(self.optim_conf.amp.amp_dtype)
+                        if self.optim_conf
+                        else None
+                    ),
+                ):
+                    for phase, model in zip(curr_phases, curr_models):
+                        loss_dict, batch_size, extra_losses = self._step(
+                            batch,
+                            model,
+                            phase,
+                        )
+
+                        assert len(loss_dict) == 1
+                        loss_key, loss = loss_dict.popitem()
+
+                        if loss_key in loss_mts:
+                            loss_mts[loss_key].update(loss.item(), batch_size)
+
+                        for k, v in extra_losses.items():
+                            if k not in extra_loss_mts:
+                                extra_loss_mts[k] = AverageMeter(k, self.device, ":.2e")
+                            extra_loss_mts[k].update(v.item(), batch_size)
+
+            # measure elapsed time
+            batch_time.update(time.time() - end)
+            end = time.time()
+
+            self.time_elapsed_meter.update(
+                time.time() - self.start_time + self.ckpt_time_elapsed
+            )
+
+            if torch.cuda.is_available():
+                mem.update(reset_peak_usage=True)
+
+            if data_iter % self.logging_conf.log_freq == 0:
+                progress.display(data_iter)
+
+            if data_iter % self.logging_conf.log_scalar_frequency == 0:
+                # Log progress meters.
+                for progress_meter in progress.meters:
+                    self.logger.log(
+                        os.path.join("Step_Stats", phase, progress_meter.name),
+                        progress_meter.val,
+                        self.steps[Phase.VAL],
+                    )
+
+            if data_iter % 10 == 0:
+                dist.barrier()
+
+        self.est_epoch_time[phase] = batch_time.avg * iters_per_epoch
+        self._log_timers(phase)
+        for model in curr_models:
+            if hasattr(unwrap_ddp_if_wrapped(model), "on_validation_epoch_end"):
+                unwrap_ddp_if_wrapped(model).on_validation_epoch_end()
+
+        out_dict = self._log_meters_and_save_best_ckpts(curr_phases)
+
+        for k, v in loss_mts.items():
+            out_dict[k] = v.avg
+        for k, v in extra_loss_mts.items():
+            out_dict[k] = v.avg
+
+        for phase in curr_phases:
+            out_dict.update(self._get_trainer_state(phase))
+        self._reset_meters(curr_phases)
+        logging.info(f"Meters: {out_dict}")
+        return out_dict
+
+    def _get_trainer_state(self, phase):
+        return {
+            "Trainer/where": self.where,
+            "Trainer/epoch": self.epoch,
+            f"Trainer/steps_{phase}": self.steps[phase],
+        }
+
+    def train_epoch(self, train_loader):
+        # Init stat meters
+        batch_time_meter = AverageMeter("Batch Time", self.device, ":.2f")
+        data_time_meter = AverageMeter("Data Time", self.device, ":.2f")
+        mem_meter = MemMeter("Mem (GB)", self.device, ":.2f")
+        data_times = []
+        phase = Phase.TRAIN
+
+        iters_per_epoch = len(train_loader)
+
+        loss_names = []
+        for batch_key in self.loss.keys():
+            loss_names.append(f"Losses/{phase}_{batch_key}_loss")
+
+        loss_mts = OrderedDict(
+            [(name, AverageMeter(name, self.device, ":.2e")) for name in loss_names]
+        )
+        extra_loss_mts = {}
+
+        progress = ProgressMeter(
+            iters_per_epoch,
+            [
+                batch_time_meter,
+                data_time_meter,
+                mem_meter,
+                self.time_elapsed_meter,
+                *loss_mts.values(),
+            ],
+            self._get_meters([phase]),
+            prefix="Train Epoch: [{}]".format(self.epoch),
+        )
+
+        # Model training loop
+        self.model.train()
+        end = time.time()
+
+        for data_iter, batch in enumerate(train_loader):
+            # measure data loading time
+            data_time_meter.update(time.time() - end)
+            data_times.append(data_time_meter.val)
+            # batch = batch.to(
+            #     self.device, non_blocking=True
+            # )  # move tensors in a tensorclass
+
+            try:
+                self._run_step(batch, phase, loss_mts, extra_loss_mts)
+
+                # compute gradient and do optim step
+                exact_epoch = self.epoch + float(data_iter) / iters_per_epoch
+                self.where = float(exact_epoch) / self.max_epochs
+                assert self.where <= 1 + self.EPSILON
+                if self.where < 1.0:
+                    self.optim.step_schedulers(
+                        self.where, step=int(exact_epoch * iters_per_epoch)
+                    )
+                else:
+                    logging.warning(
+                        f"Skipping scheduler update since the training is at the end, i.e, {self.where} of [0,1]."
+                    )
+
+                # Log schedulers
+                if data_iter % self.logging_conf.log_scalar_frequency == 0:
+                    for j, param_group in enumerate(self.optim.optimizer.param_groups):
+                        for option in self.optim.schedulers[j]:
+                            optim_prefix = (
+                                "" + f"{j}_"
+                                if len(self.optim.optimizer.param_groups) > 1
+                                else ""
+                            )
+                            self.logger.log(
+                                os.path.join("Optim", f"{optim_prefix}", option),
+                                param_group[option],
+                                self.steps[phase],
+                            )
+
+                # Clipping gradients and detecting diverging gradients
+                if self.gradient_clipper is not None:
+                    self.scaler.unscale_(self.optim.optimizer)
+                    self.gradient_clipper(model=self.model)
+
+                if self.gradient_logger is not None:
+                    self.gradient_logger(
+                        self.model, rank=self.distributed_rank, where=self.where
+                    )
+
+                # Optimizer step: the scaler will make sure gradients are not
+                # applied if the gradients are infinite
+                self.scaler.step(self.optim.optimizer)
+                self.scaler.update()
+
+                # measure elapsed time
+                batch_time_meter.update(time.time() - end)
+                end = time.time()
+
+                self.time_elapsed_meter.update(
+                    time.time() - self.start_time + self.ckpt_time_elapsed
+                )
+
+                mem_meter.update(reset_peak_usage=True)
+                if data_iter % self.logging_conf.log_freq == 0:
+                    progress.display(data_iter)
+
+                if data_iter % self.logging_conf.log_scalar_frequency == 0:
+                    # Log progress meters.
+                    for progress_meter in progress.meters:
+                        self.logger.log(
+                            os.path.join("Step_Stats", phase, progress_meter.name),
+                            progress_meter.val,
+                            self.steps[phase],
+                        )
+
+            # Catching NaN/Inf errors in the loss
+            except FloatingPointError as e:
+                raise e
+
+        self.est_epoch_time[Phase.TRAIN] = batch_time_meter.avg * iters_per_epoch
+        self._log_timers(Phase.TRAIN)
+        self._log_sync_data_times(Phase.TRAIN, data_times)
+
+        out_dict = self._log_meters_and_save_best_ckpts([Phase.TRAIN])
+
+        for k, v in loss_mts.items():
+            out_dict[k] = v.avg
+        for k, v in extra_loss_mts.items():
+            out_dict[k] = v.avg
+        out_dict.update(self._get_trainer_state(phase))
+        logging.info(f"Losses and meters: {out_dict}")
+        self._reset_meters([phase])
+        return out_dict
+
+    def _log_sync_data_times(self, phase, data_times):
+        data_times = all_reduce_max(torch.tensor(data_times)).tolist()
+        steps = range(self.steps[phase] - len(data_times), self.steps[phase])
+        for step, data_time in zip(steps, data_times):
+            if step % self.logging_conf.log_scalar_frequency == 0:
+                self.logger.log(
+                    os.path.join("Step_Stats", phase, "Data Time Synced"),
+                    data_time,
+                    step,
+                )
+
+    def _run_step(
+        self,
+        batch: BatchedDatapoint,
+        phase: str,
+        loss_mts: Dict[str, AverageMeter],
+        extra_loss_mts: Dict[str, AverageMeter],
+        raise_on_error: bool = True,
+    ):
+        """
+        Run the forward / backward
+        """
+
+        # it's important to set grads to None, especially with Adam since 0
+        # grads will also update a model even if the step doesn't produce
+        # gradients
+        self.optim.zero_grad(set_to_none=True)
+
+        if self.gradient_accumulation_steps > 1:
+            assert isinstance(
+                batch, list
+            ), f"Expected a list of batches, got {type(batch)}"
+            assert (
+                len(batch) == self.gradient_accumulation_steps
+            ), f"Expected {self.gradient_accumulation_steps} batches, got {len(batch)}"
+            accum_steps = len(batch)
+        else:
+            accum_steps = 1
+            batch = [batch]
+
+        for i, chunked_batch in enumerate(batch):
+            ddp_context = (
+                self.model.no_sync()
+                if i < accum_steps - 1
+                else contextlib.nullcontext()
+            )
+            with ddp_context:
+                with torch.amp.autocast(
+                    device_type="cuda",
+                    enabled=self.optim_conf.amp.enabled,
+                    dtype=get_amp_type(self.optim_conf.amp.amp_dtype),
+                ):
+                    loss_dict, batch_size, extra_losses = self._step(
+                        chunked_batch,
+                        self.model,
+                        phase,
+                    )
+
+                assert len(loss_dict) == 1
+                loss_key, loss = loss_dict.popitem()
+
+                if not math.isfinite(loss.item()):
+                    error_msg = f"Loss is {loss.item()}, attempting to stop training"
+                    logging.error(error_msg)
+                    if raise_on_error:
+                        raise FloatingPointError(error_msg)
+                    else:
+                        return
+
+                self.scaler.scale(loss).backward()
+                loss_mts[loss_key].update(loss.item(), batch_size)
+                for extra_loss_key, extra_loss in extra_losses.items():
+                    if extra_loss_key not in extra_loss_mts:
+                        extra_loss_mts[extra_loss_key] = AverageMeter(
+                            extra_loss_key, self.device, ":.2e"
+                        )
+                    extra_loss_mts[extra_loss_key].update(extra_loss.item(), batch_size)
+
+    def _log_meters_and_save_best_ckpts(self, phases: List[str]):
+        logging.info("Synchronizing meters")
+        out_dict = {}
+        checkpoint_save_keys = []
+        for key, meter in self._get_meters(phases).items():
+            meter_output = meter.compute_synced()
+            is_better_check = getattr(meter, "is_better", None)
+
+            for meter_subkey, meter_value in meter_output.items():
+                out_dict[os.path.join("Meters_train", key, meter_subkey)] = meter_value
+
+                if is_better_check is None:
+                    continue
+
+                tracked_meter_key = os.path.join(key, meter_subkey)
+                if tracked_meter_key not in self.best_meter_values or is_better_check(
+                    meter_value,
+                    self.best_meter_values[tracked_meter_key],
+                ):
+                    self.best_meter_values[tracked_meter_key] = meter_value
+
+                    if (
+                        self.checkpoint_conf.save_best_meters is not None
+                        and key in self.checkpoint_conf.save_best_meters
+                    ):
+                        checkpoint_save_keys.append(tracked_meter_key.replace("/", "_"))
+
+        if len(checkpoint_save_keys) > 0:
+            self.save_checkpoint(self.epoch + 1, checkpoint_save_keys)
+
+        return out_dict
+
+    def _log_timers(self, phase):
+        time_remaining = 0
+        epochs_remaining = self.max_epochs - self.epoch - 1
+        val_epochs_remaining = sum(
+            n % self.val_epoch_freq == 0 for n in range(self.epoch, self.max_epochs)
+        )
+
+        # Adding the guaranteed val run at the end if val_epoch_freq doesn't coincide with
+        # the end epoch.
+        if (self.max_epochs - 1) % self.val_epoch_freq != 0:
+            val_epochs_remaining += 1
+
+        # Remove the current val run from estimate
+        if phase == Phase.VAL:
+            val_epochs_remaining -= 1
+
+        time_remaining += (
+            epochs_remaining * self.est_epoch_time[Phase.TRAIN]
+            + val_epochs_remaining * self.est_epoch_time[Phase.VAL]
+        )
+
+        self.logger.log(
+            os.path.join("Step_Stats", phase, self.time_elapsed_meter.name),
+            self.time_elapsed_meter.val,
+            self.steps[phase],
+        )
+
+        logging.info(f"Estimated time remaining: {human_readable_time(time_remaining)}")
+
+    def _reset_meters(self, phases: str) -> None:
+        for meter in self._get_meters(phases).values():
+            meter.reset()
+
+    def _check_val_key_match(self, val_keys, phase):
+        if val_keys is not None:
+            # Check if there are any duplicates
+            assert len(val_keys) == len(
+                set(val_keys)
+            ), f"Duplicate keys in val datasets, keys: {val_keys}"
+
+            # Check that the keys match the meter keys
+            if self.meters_conf is not None and phase in self.meters_conf:
+                assert set(val_keys) == set(self.meters_conf[phase].keys()), (
+                    f"Keys in val datasets do not match the keys in meters."
+                    f"\nMissing in meters: {set(val_keys) - set(self.meters_conf[phase].keys())}"
+                    f"\nMissing in val datasets: {set(self.meters_conf[phase].keys()) - set(val_keys)}"
+                )
+
+            if self.loss_conf is not None:
+                loss_keys = set(self.loss_conf.keys()) - set(["all"])
+                if "default" not in loss_keys:
+                    for k in val_keys:
+                        assert (
+                            k in loss_keys
+                        ), f"Error: key {k} is not defined in the losses, and no default is set"
+
+    def _setup_components(self):
+        # Get the keys for all the val datasets, if any
+        val_phase = Phase.VAL
+        val_keys = None
+        if self.data_conf.get(val_phase, None) is not None:
+            val_keys = collect_dict_keys(self.data_conf[val_phase])
+        # Additional checks on the sanity of the config for val datasets
+        self._check_val_key_match(val_keys, phase=val_phase)
+
+        logging.info("Setting up components: Model, loss, optim, meters etc.")
+        self.epoch = 0
+        self.steps = {Phase.TRAIN: 0, Phase.VAL: 0}
+
+        self.logger = Logger(self.logging_conf)
+
+        self.model = instantiate(self.model_conf, _convert_="all")
+        print_model_summary(self.model)
+
+        self.loss = None
+        if self.loss_conf:
+            self.loss = {
+                key: el  # wrap_base_loss(el)
+                for (key, el) in instantiate(self.loss_conf, _convert_="all").items()
+            }
+            self.loss = nn.ModuleDict(self.loss)
+
+        self.meters = {}
+        self.best_meter_values = {}
+        if self.meters_conf:
+            self.meters = instantiate(self.meters_conf, _convert_="all")
+
+        self.scaler = torch.amp.GradScaler(
+            self.device,
+            enabled=self.optim_conf.amp.enabled if self.optim_conf else False,
+        )
+
+        self.gradient_clipper = (
+            instantiate(self.optim_conf.gradient_clip) if self.optim_conf else None
+        )
+        self.gradient_logger = (
+            instantiate(self.optim_conf.gradient_logger) if self.optim_conf else None
+        )
+
+        logging.info("Finished setting up components: Model, loss, optim, meters etc.")
+
+    def _construct_optimizers(self):
+        self.optim = construct_optimizer(
+            self.model,
+            self.optim_conf.optimizer,
+            self.optim_conf.options,
+            self.optim_conf.param_group_modifiers,
+        )
+
+    def _log_loss_detailed_and_return_core_loss(self, loss, loss_str, step):
+        core_loss = loss.pop(CORE_LOSS_KEY)
+        if step % self.logging_conf.log_scalar_frequency == 0:
+            for k in loss:
+                log_str = os.path.join(loss_str, k)
+                self.logger.log(log_str, loss[k], step)
+        return core_loss
+
+
+def print_model_summary(model: torch.nn.Module, log_dir: str = ""):
+    """
+    Prints the model and the number of parameters in the model.
+    # Multiple packages provide this info in a nice table format
+    # However, they need us to provide an `input` (as they also write down the output sizes)
+    # Our models are complex, and a single input is restrictive.
+    # https://github.com/sksq96/pytorch-summary
+    # https://github.com/nmhkahn/torchsummaryX
+    """
+    if get_rank() != 0:
+        return
+    param_kwargs = {}
+    trainable_parameters = sum(
+        p.numel() for p in model.parameters(**param_kwargs) if p.requires_grad
+    )
+    total_parameters = sum(p.numel() for p in model.parameters(**param_kwargs))
+    non_trainable_parameters = total_parameters - trainable_parameters
+    logging.info("==" * 10)
+    logging.info(f"Summary for model {type(model)}")
+    logging.info(f"Model is {model}")
+    logging.info(f"\tTotal parameters {get_human_readable_count(total_parameters)}")
+    logging.info(
+        f"\tTrainable parameters {get_human_readable_count(trainable_parameters)}"
+    )
+    logging.info(
+        f"\tNon-Trainable parameters {get_human_readable_count(non_trainable_parameters)}"
+    )
+    logging.info("==" * 10)
+
+    if log_dir:
+        output_fpath = os.path.join(log_dir, "model.txt")
+        with g_pathmgr.open(output_fpath, "w") as f:
+            print(model, file=f)
+
+
+PARAMETER_NUM_UNITS = [" ", "K", "M", "B", "T"]
+
+
+def get_human_readable_count(number: int) -> str:
+    """
+    Abbreviates an integer number with K, M, B, T for thousands, millions,
+    billions and trillions, respectively.
+    Examples:
+        >>> get_human_readable_count(123)
+        '123  '
+        >>> get_human_readable_count(1234)  # (one thousand)
+        '1.2 K'
+        >>> get_human_readable_count(2e6)   # (two million)
+        '2.0 M'
+        >>> get_human_readable_count(3e9)   # (three billion)
+        '3.0 B'
+        >>> get_human_readable_count(4e14)  # (four hundred trillion)
+        '400 T'
+        >>> get_human_readable_count(5e15)  # (more than trillion)
+        '5,000 T'
+    Args:
+        number: a positive integer number
+    Return:
+        A string formatted according to the pattern described above.
+    """
+    assert number >= 0
+    labels = PARAMETER_NUM_UNITS
+    num_digits = int(np.floor(np.log10(number)) + 1 if number > 0 else 1)
+    num_groups = int(np.ceil(num_digits / 3))
+    num_groups = min(num_groups, len(labels))  # don't abbreviate beyond trillions
+    shift = -3 * (num_groups - 1)
+    number = number * (10**shift)
+    index = num_groups - 1
+    if index < 1 or number >= 100:
+        return f"{int(number):,d} {labels[index]}"
+    else:
+        return f"{number:,.1f} {labels[index]}"
diff --git a/sam3/train/transforms/__init__.py b/sam3/train/transforms/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d37d2aaef52ec7de01999516a2b8c3e1fa4986
--- /dev/null
+++ b/sam3/train/transforms/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
diff --git a/sam3/train/transforms/basic.py b/sam3/train/transforms/basic.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e8cf1f9da3ebe9da53bad8987d05232735c5f67
--- /dev/null
+++ b/sam3/train/transforms/basic.py
@@ -0,0 +1,455 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+Transforms and data augmentation for both image + bbox.
+"""
+
+import math
+import random
+from typing import Iterable
+
+import PIL
+import torch
+import torchvision.transforms as T
+import torchvision.transforms.functional as F
+
+from sam3.model.box_ops import box_xyxy_to_cxcywh
+from sam3.model.data_misc import interpolate
+
+
+def crop(image, target, region):
+    cropped_image = F.crop(image, *region)
+
+    target = target.copy()
+    i, j, h, w = region
+
+    # should we do something wrt the original size?
+    target["size"] = torch.tensor([h, w])
+
+    fields = ["labels", "area", "iscrowd", "positive_map"]
+
+    if "boxes" in target:
+        boxes = target["boxes"]
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        cropped_boxes = boxes - torch.as_tensor([j, i, j, i], dtype=torch.float32)
+        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
+        target["boxes"] = cropped_boxes.reshape(-1, 4)
+        target["area"] = area
+        fields.append("boxes")
+
+    if "input_boxes" in target:
+        boxes = target["input_boxes"]
+        max_size = torch.as_tensor([w, h], dtype=torch.float32)
+        cropped_boxes = boxes - torch.as_tensor([j, i, j, i], dtype=torch.float32)
+        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+        cropped_boxes = cropped_boxes.clamp(min=0)
+        target["input_boxes"] = cropped_boxes.reshape(-1, 4)
+
+    if "masks" in target:
+        # FIXME should we update the area here if there are no boxes?
+        target["masks"] = target["masks"][:, i : i + h, j : j + w]
+        fields.append("masks")
+
+    # remove elements for which the boxes or masks that have zero area
+    if "boxes" in target or "masks" in target:
+        # favor boxes selection when defining which elements to keep
+        # this is compatible with previous implementation
+        if "boxes" in target:
+            cropped_boxes = target["boxes"].reshape(-1, 2, 2)
+            keep = torch.all(cropped_boxes[:, 1, :] > cropped_boxes[:, 0, :], dim=1)
+        else:
+            keep = target["masks"].flatten(1).any(1)
+
+        for field in fields:
+            if field in target:
+                target[field] = target[field][keep]
+
+    return cropped_image, target
+
+
+def hflip(image, target):
+    flipped_image = F.hflip(image)
+
+    w, h = image.size
+
+    target = target.copy()
+    if "boxes" in target:
+        boxes = target["boxes"]
+        boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor(
+            [-1, 1, -1, 1]
+        ) + torch.as_tensor([w, 0, w, 0])
+        target["boxes"] = boxes
+
+    if "input_boxes" in target:
+        boxes = target["input_boxes"]
+        boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor(
+            [-1, 1, -1, 1]
+        ) + torch.as_tensor([w, 0, w, 0])
+        target["input_boxes"] = boxes
+
+    if "masks" in target:
+        target["masks"] = target["masks"].flip(-1)
+
+    if "text_input" in target:
+        text_input = (
+            target["text_input"]
+            .replace("left", "[TMP]")
+            .replace("right", "left")
+            .replace("[TMP]", "right")
+        )
+        target["text_input"] = text_input
+
+    return flipped_image, target
+
+
+def resize(image, target, size, max_size=None, square=False):
+    # size can be min_size (scalar) or (w, h) tuple
+
+    def get_size_with_aspect_ratio(image_size, size, max_size=None):
+        w, h = image_size
+        if max_size is not None:
+            min_original_size = float(min((w, h)))
+            max_original_size = float(max((w, h)))
+            if max_original_size / min_original_size * size > max_size:
+                size = int(round(max_size * min_original_size / max_original_size))
+
+        if (w <= h and w == size) or (h <= w and h == size):
+            return (h, w)
+
+        if w < h:
+            ow = size
+            oh = int(size * h / w)
+        else:
+            oh = size
+            ow = int(size * w / h)
+
+        return (oh, ow)
+
+    def get_size(image_size, size, max_size=None):
+        if isinstance(size, (list, tuple)):
+            return size[::-1]
+        else:
+            return get_size_with_aspect_ratio(image_size, size, max_size)
+
+    if square:
+        size = size, size
+    else:
+        size = get_size(image.size, size, max_size)
+    rescaled_image = F.resize(image, size)
+
+    if target is None:
+        return rescaled_image, None
+
+    ratios = tuple(
+        float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size)
+    )
+    ratio_width, ratio_height = ratios
+
+    target = target.copy()
+    if "boxes" in target:
+        boxes = target["boxes"]
+        scaled_boxes = boxes * torch.as_tensor(
+            [ratio_width, ratio_height, ratio_width, ratio_height], dtype=torch.float32
+        )
+        target["boxes"] = scaled_boxes
+    if "input_boxes" in target:
+        boxes = target["input_boxes"]
+        scaled_boxes = boxes * torch.as_tensor(
+            [ratio_width, ratio_height, ratio_width, ratio_height], dtype=torch.float32
+        )
+        target["input_boxes"] = scaled_boxes
+
+    if "area" in target:
+        area = target["area"]
+        scaled_area = area * (ratio_width * ratio_height)
+        target["area"] = scaled_area
+
+    h, w = size
+    target["size"] = torch.tensor([h, w])
+
+    if "masks" in target:
+        target["masks"] = (
+            interpolate(target["masks"][:, None].float(), size, mode="nearest")[:, 0]
+            > 0.5
+        )
+
+    return rescaled_image, target
+
+
+def pad(image, target, padding):
+    if len(padding) == 2:
+        # assumes that we only pad on the bottom right corners
+        padded_image = F.pad(image, (0, 0, padding[0], padding[1]))
+    else:
+        # left, top, right, bottom
+        padded_image = F.pad(image, (padding[0], padding[1], padding[2], padding[3]))
+    if target is None:
+        return padded_image, None
+    target = target.copy()
+
+    w, h = padded_image.size
+
+    # should we do something wrt the original size?
+    target["size"] = torch.tensor([h, w])
+    if "boxes" in target and len(padding) == 4:
+        boxes = target["boxes"]
+        boxes = boxes + torch.as_tensor(
+            [padding[0], padding[1], padding[0], padding[1]], dtype=torch.float32
+        )
+        target["boxes"] = boxes
+
+    if "input_boxes" in target and len(padding) == 4:
+        boxes = target["input_boxes"]
+        boxes = boxes + torch.as_tensor(
+            [padding[0], padding[1], padding[0], padding[1]], dtype=torch.float32
+        )
+        target["input_boxes"] = boxes
+
+    if "masks" in target:
+        if len(padding) == 2:
+            target["masks"] = torch.nn.functional.pad(
+                target["masks"], (0, padding[0], 0, padding[1])
+            )
+        else:
+            target["masks"] = torch.nn.functional.pad(
+                target["masks"], (padding[0], padding[2], padding[1], padding[3])
+            )
+    return padded_image, target
+
+
+class RandomCrop:
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, img, target):
+        region = T.RandomCrop.get_params(img, self.size)
+        return crop(img, target, region)
+
+
+class RandomSizeCrop:
+    def __init__(self, min_size: int, max_size: int, respect_boxes: bool = False):
+        self.min_size = min_size
+        self.max_size = max_size
+        self.respect_boxes = respect_boxes  # if True we can't crop a box out
+
+    def __call__(self, img: PIL.Image.Image, target: dict):
+        init_boxes = len(target["boxes"])
+        init_boxes_tensor = target["boxes"].clone()
+        if self.respect_boxes and init_boxes > 0:
+            minW, minH, maxW, maxH = (
+                min(img.width, self.min_size),
+                min(img.width, self.min_size),
+                min(img.width, self.max_size),
+                min(img.height, self.max_size),
+            )
+            minX, minY = (
+                target["boxes"][:, 0].max().item() + 10.0,
+                target["boxes"][:, 1].max().item() + 10.0,
+            )
+            minX = min(img.width, minX)
+            minY = min(img.height, minY)
+            maxX, maxY = (
+                target["boxes"][:, 2].min().item() - 10,
+                target["boxes"][:, 3].min().item() - 10,
+            )
+            maxX = max(0.0, maxX)
+            maxY = max(0.0, maxY)
+            minW = max(minW, minX - maxX)
+            minH = max(minH, minY - maxY)
+            w = random.uniform(minW, max(minW, maxW))
+            h = random.uniform(minH, max(minH, maxH))
+            if minX > maxX:
+                # i = random.uniform(max(0, minX - w + 1), max(maxX, max(0, minX - w + 1)))
+                i = random.uniform(max(0, minX - w), max(maxX, max(0, minX - w)))
+            else:
+                i = random.uniform(
+                    max(0, minX - w + 1), max(maxX - 1, max(0, minX - w + 1))
+                )
+            if minY > maxY:
+                # j = random.uniform(max(0, minY - h + 1), max(maxY, max(0, minY - h + 1)))
+                j = random.uniform(max(0, minY - h), max(maxY, max(0, minY - h)))
+            else:
+                j = random.uniform(
+                    max(0, minY - h + 1), max(maxY - 1, max(0, minY - h + 1))
+                )
+            result_img, result_target = crop(img, target, [j, i, h, w])
+            assert (
+                len(result_target["boxes"]) == init_boxes
+            ), f"img_w={img.width}\timg_h={img.height}\tminX={minX}\tminY={minY}\tmaxX={maxX}\tmaxY={maxY}\tminW={minW}\tminH={minH}\tmaxW={maxW}\tmaxH={maxH}\tw={w}\th={h}\ti={i}\tj={j}\tinit_boxes={init_boxes_tensor}\tresults={result_target['boxes']}"
+
+            return result_img, result_target
+        else:
+            w = random.randint(self.min_size, min(img.width, self.max_size))
+            h = random.randint(self.min_size, min(img.height, self.max_size))
+            region = T.RandomCrop.get_params(img, (h, w))
+            result_img, result_target = crop(img, target, region)
+            return result_img, result_target
+
+
+class CenterCrop:
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, img, target):
+        image_width, image_height = img.size
+        crop_height, crop_width = self.size
+        crop_top = int(round((image_height - crop_height) / 2.0))
+        crop_left = int(round((image_width - crop_width) / 2.0))
+        return crop(img, target, (crop_top, crop_left, crop_height, crop_width))
+
+
+class RandomHorizontalFlip:
+    def __init__(self, p=0.5):
+        self.p = p
+
+    def __call__(self, img, target):
+        if random.random() < self.p:
+            return hflip(img, target)
+        return img, target
+
+
+class RandomResize:
+    def __init__(self, sizes, max_size=None, square=False):
+        if isinstance(sizes, int):
+            sizes = (sizes,)
+        assert isinstance(sizes, Iterable)
+        self.sizes = list(sizes)
+        self.max_size = max_size
+        self.square = square
+
+    def __call__(self, img, target=None):
+        size = random.choice(self.sizes)
+        return resize(img, target, size, self.max_size, square=self.square)
+
+
+class RandomPad:
+    def __init__(self, max_pad):
+        self.max_pad = max_pad
+
+    def __call__(self, img, target):
+        pad_x = random.randint(0, self.max_pad)
+        pad_y = random.randint(0, self.max_pad)
+        return pad(img, target, (pad_x, pad_y))
+
+
+class PadToSize:
+    def __init__(self, size):
+        self.size = size
+
+    def __call__(self, img, target):
+        w, h = img.size
+        pad_x = self.size - w
+        pad_y = self.size - h
+        assert pad_x >= 0 and pad_y >= 0
+        pad_left = random.randint(0, pad_x)
+        pad_right = pad_x - pad_left
+        pad_top = random.randint(0, pad_y)
+        pad_bottom = pad_y - pad_top
+        return pad(img, target, (pad_left, pad_top, pad_right, pad_bottom))
+
+
+class Identity:
+    def __call__(self, img, target):
+        return img, target
+
+
+class RandomSelect:
+    """
+    Randomly selects between transforms1 and transforms2,
+    with probability p for transforms1 and (1 - p) for transforms2
+    """
+
+    def __init__(self, transforms1=None, transforms2=None, p=0.5):
+        self.transforms1 = transforms1 or Identity()
+        self.transforms2 = transforms2 or Identity()
+        self.p = p
+
+    def __call__(self, img, target):
+        if random.random() < self.p:
+            return self.transforms1(img, target)
+        return self.transforms2(img, target)
+
+
+class ToTensor:
+    def __call__(self, img, target):
+        return F.to_tensor(img), target
+
+
+class RandomErasing:
+    def __init__(self, *args, **kwargs):
+        self.eraser = T.RandomErasing(*args, **kwargs)
+
+    def __call__(self, img, target):
+        return self.eraser(img), target
+
+
+class Normalize:
+    def __init__(self, mean, std):
+        self.mean = mean
+        self.std = std
+
+    def __call__(self, image, target=None):
+        image = F.normalize(image, mean=self.mean, std=self.std)
+        if target is None:
+            return image, None
+        target = target.copy()
+        h, w = image.shape[-2:]
+        if "boxes" in target:
+            boxes = target["boxes"]
+            boxes = box_xyxy_to_cxcywh(boxes)
+            boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
+            target["boxes"] = boxes
+        if "input_boxes" in target:
+            boxes = target["input_boxes"]
+            boxes = box_xyxy_to_cxcywh(boxes)
+            boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
+            target["input_boxes"] = boxes
+        return image, target
+
+
+class RemoveDifficult:
+    def __init__(self, enabled=False):
+        self.remove_difficult = enabled
+
+    def __call__(self, image, target=None):
+        if target is None:
+            return image, None
+        target = target.copy()
+        keep = ~target["iscrowd"].to(torch.bool) | (not self.remove_difficult)
+        if "boxes" in target:
+            target["boxes"] = target["boxes"][keep]
+        target["labels"] = target["labels"][keep]
+        target["iscrowd"] = target["iscrowd"][keep]
+        return image, target
+
+
+class Compose:
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, image, target):
+        for t in self.transforms:
+            image, target = t(image, target)
+        return image, target
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        for t in self.transforms:
+            format_string += "\n"
+            format_string += "    {0}".format(t)
+        format_string += "\n)"
+        return format_string
+
+
+def get_random_resize_scales(size, min_size, rounded):
+    stride = 128 if rounded else 32
+    min_size = int(stride * math.ceil(min_size / stride))
+    scales = list(range(min_size, size + 1, stride))
+    return scales
+
+
+def get_random_resize_max_size(size, ratio=5 / 3):
+    max_size = round(ratio * size)
+    return max_size
diff --git a/sam3/train/transforms/basic_for_api.py b/sam3/train/transforms/basic_for_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..e0ec2af4c482116e591f27c0e6676102469367c1
--- /dev/null
+++ b/sam3/train/transforms/basic_for_api.py
@@ -0,0 +1,1396 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+"""
+Transforms and data augmentation for both image + bbox.
+"""
+
+import logging
+
+import numbers
+import random
+from collections.abc import Sequence
+from typing import Iterable
+
+import torch
+import torchvision.transforms as T
+import torchvision.transforms.functional as F
+import torchvision.transforms.v2.functional as Fv2
+
+from PIL import Image as PILImage
+
+from sam3.model.box_ops import box_xyxy_to_cxcywh, masks_to_boxes
+from sam3.train.data.sam3_image_dataset import Datapoint
+from torchvision.transforms import InterpolationMode
+
+
+def crop(
+    datapoint,
+    index,
+    region,
+    v2=False,
+    check_validity=True,
+    check_input_validity=True,
+    recompute_box_from_mask=False,
+):
+    if v2:
+        rtop, rleft, rheight, rwidth = (int(round(r)) for r in region)
+        datapoint.images[index].data = Fv2.crop(
+            datapoint.images[index].data,
+            top=rtop,
+            left=rleft,
+            height=rheight,
+            width=rwidth,
+        )
+    else:
+        datapoint.images[index].data = F.crop(datapoint.images[index].data, *region)
+
+    i, j, h, w = region
+
+    # should we do something wrt the original size?
+    datapoint.images[index].size = (h, w)
+
+    for obj in datapoint.images[index].objects:
+        # crop the mask
+        if obj.segment is not None:
+            obj.segment = F.crop(obj.segment, int(i), int(j), int(h), int(w))
+
+        # crop the bounding box
+        if recompute_box_from_mask and obj.segment is not None:
+            # here the boxes are still in XYXY format with absolute coordinates (they are
+            # converted to CxCyWH with relative coordinates in basic_for_api.NormalizeAPI)
+            obj.bbox, obj.area = get_bbox_xyxy_abs_coords_from_mask(obj.segment)
+        else:
+            if recompute_box_from_mask and obj.segment is None and obj.area > 0:
+                logging.warning(
+                    "Cannot recompute bounding box from mask since `obj.segment` is None. "
+                    "Falling back to directly cropping from the input bounding box."
+                )
+            boxes = obj.bbox.view(1, 4)
+            max_size = torch.as_tensor([w, h], dtype=torch.float32)
+            cropped_boxes = boxes - torch.as_tensor([j, i, j, i], dtype=torch.float32)
+            cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+            cropped_boxes = cropped_boxes.clamp(min=0)
+            obj.area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
+            obj.bbox = cropped_boxes.reshape(-1, 4)
+
+    for query in datapoint.find_queries:
+        if query.semantic_target is not None:
+            query.semantic_target = F.crop(
+                query.semantic_target, int(i), int(j), int(h), int(w)
+            )
+        if query.image_id == index and query.input_bbox is not None:
+            boxes = query.input_bbox
+            max_size = torch.as_tensor([w, h], dtype=torch.float32)
+            cropped_boxes = boxes - torch.as_tensor([j, i, j, i], dtype=torch.float32)
+            cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
+            cropped_boxes = cropped_boxes.clamp(min=0)
+
+            # cur_area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
+            # if check_input_validity:
+            #     assert (
+            #         (cur_area > 0).all().item()
+            #     ), "Some input box got cropped out by the crop transform"
+
+            query.input_bbox = cropped_boxes.reshape(-1, 4)
+        if query.image_id == index and query.input_points is not None:
+            print(
+                "Warning! Point cropping with this function may lead to unexpected results"
+            )
+            points = query.input_points
+            # Unlike right-lower box edges, which are exclusive, the
+            # point must be in [0, length-1], hence the -1
+            max_size = torch.as_tensor([w, h], dtype=torch.float32) - 1
+            cropped_points = points - torch.as_tensor([j, i, 0], dtype=torch.float32)
+            cropped_points[:, :, :2] = torch.min(cropped_points[:, :, :2], max_size)
+            cropped_points[:, :, :2] = cropped_points[:, :, :2].clamp(min=0)
+            query.input_points = cropped_points
+
+    if check_validity:
+        # Check that all boxes are still valid
+        for obj in datapoint.images[index].objects:
+            assert obj.area > 0, "Box {} has no area".format(obj.bbox)
+
+    return datapoint
+
+
+def hflip(datapoint, index):
+    datapoint.images[index].data = F.hflip(datapoint.images[index].data)
+
+    w, h = datapoint.images[index].data.size
+    for obj in datapoint.images[index].objects:
+        boxes = obj.bbox.view(1, 4)
+        boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor(
+            [-1, 1, -1, 1]
+        ) + torch.as_tensor([w, 0, w, 0])
+        obj.bbox = boxes
+        if obj.segment is not None:
+            obj.segment = F.hflip(obj.segment)
+
+    for query in datapoint.find_queries:
+        if query.semantic_target is not None:
+            query.semantic_target = F.hflip(query.semantic_target)
+        if query.image_id == index and query.input_bbox is not None:
+            boxes = query.input_bbox
+            boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor(
+                [-1, 1, -1, 1]
+            ) + torch.as_tensor([w, 0, w, 0])
+            query.input_bbox = boxes
+        if query.image_id == index and query.input_points is not None:
+            points = query.input_points
+            points = points * torch.as_tensor([-1, 1, 1]) + torch.as_tensor([w, 0, 0])
+            query.input_points = points
+    return datapoint
+
+
+def get_size_with_aspect_ratio(image_size, size, max_size=None):
+    w, h = image_size
+    if max_size is not None:
+        min_original_size = float(min((w, h)))
+        max_original_size = float(max((w, h)))
+        if max_original_size / min_original_size * size > max_size:
+            size = max_size * min_original_size / max_original_size
+
+    if (w <= h and w == size) or (h <= w and h == size):
+        return (h, w)
+
+    if w < h:
+        ow = int(round(size))
+        oh = int(round(size * h / w))
+    else:
+        oh = int(round(size))
+        ow = int(round(size * w / h))
+
+    return (oh, ow)
+
+
+def resize(datapoint, index, size, max_size=None, square=False, v2=False):
+    # size can be min_size (scalar) or (w, h) tuple
+
+    def get_size(image_size, size, max_size=None):
+        if isinstance(size, (list, tuple)):
+            return size[::-1]
+        else:
+            return get_size_with_aspect_ratio(image_size, size, max_size)
+
+    if square:
+        size = size, size
+    else:
+        cur_size = (
+            datapoint.images[index].data.size()[-2:][::-1]
+            if v2
+            else datapoint.images[index].data.size
+        )
+        size = get_size(cur_size, size, max_size)
+
+    old_size = (
+        datapoint.images[index].data.size()[-2:][::-1]
+        if v2
+        else datapoint.images[index].data.size
+    )
+    if v2:
+        datapoint.images[index].data = Fv2.resize(
+            datapoint.images[index].data, size, antialias=True
+        )
+    else:
+        datapoint.images[index].data = F.resize(datapoint.images[index].data, size)
+
+    new_size = (
+        datapoint.images[index].data.size()[-2:][::-1]
+        if v2
+        else datapoint.images[index].data.size
+    )
+    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(new_size, old_size))
+    ratio_width, ratio_height = ratios
+
+    for obj in datapoint.images[index].objects:
+        boxes = obj.bbox.view(1, 4)
+        scaled_boxes = boxes * torch.as_tensor(
+            [ratio_width, ratio_height, ratio_width, ratio_height], dtype=torch.float32
+        )
+        obj.bbox = scaled_boxes
+        obj.area *= ratio_width * ratio_height
+        if obj.segment is not None:
+            obj.segment = F.resize(obj.segment[None, None], size).squeeze()
+
+    for query in datapoint.find_queries:
+        if query.semantic_target is not None:
+            query.semantic_target = F.resize(
+                query.semantic_target[None, None], size
+            ).squeeze()
+        if query.image_id == index and query.input_bbox is not None:
+            boxes = query.input_bbox
+            scaled_boxes = boxes * torch.as_tensor(
+                [ratio_width, ratio_height, ratio_width, ratio_height],
+                dtype=torch.float32,
+            )
+            query.input_bbox = scaled_boxes
+        if query.image_id == index and query.input_points is not None:
+            points = query.input_points
+            scaled_points = points * torch.as_tensor(
+                [ratio_width, ratio_height, 1],
+                dtype=torch.float32,
+            )
+            query.input_points = scaled_points
+
+    h, w = size
+    datapoint.images[index].size = (h, w)
+    return datapoint
+
+
+def pad(datapoint, index, padding, v2=False):
+    old_h, old_w = datapoint.images[index].size
+    h, w = old_h, old_w
+    if len(padding) == 2:
+        # assumes that we only pad on the bottom right corners
+        if v2:
+            datapoint.images[index].data = Fv2.pad(
+                datapoint.images[index].data, (0, 0, padding[0], padding[1])
+            )
+        else:
+            datapoint.images[index].data = F.pad(
+                datapoint.images[index].data, (0, 0, padding[0], padding[1])
+            )
+        h += padding[1]
+        w += padding[0]
+    else:
+        if v2:
+            # left, top, right, bottom
+            datapoint.images[index].data = Fv2.pad(
+                datapoint.images[index].data,
+                (padding[0], padding[1], padding[2], padding[3]),
+            )
+        else:
+            # left, top, right, bottom
+            datapoint.images[index].data = F.pad(
+                datapoint.images[index].data,
+                (padding[0], padding[1], padding[2], padding[3]),
+            )
+        h += padding[1] + padding[3]
+        w += padding[0] + padding[2]
+
+    datapoint.images[index].size = (h, w)
+
+    for obj in datapoint.images[index].objects:
+        if len(padding) != 2:
+            obj.bbox += torch.as_tensor(
+                [padding[0], padding[1], padding[0], padding[1]], dtype=torch.float32
+            )
+        if obj.segment is not None:
+            if v2:
+                if len(padding) == 2:
+                    obj.segment = Fv2.pad(
+                        obj.segment[None], (0, 0, padding[0], padding[1])
+                    ).squeeze(0)
+                else:
+                    obj.segment = Fv2.pad(obj.segment[None], tuple(padding)).squeeze(0)
+            else:
+                if len(padding) == 2:
+                    obj.segment = F.pad(obj.segment, (0, 0, padding[0], padding[1]))
+                else:
+                    obj.segment = F.pad(obj.segment, tuple(padding))
+
+    for query in datapoint.find_queries:
+        if query.semantic_target is not None:
+            if v2:
+                if len(padding) == 2:
+                    query.semantic_target = Fv2.pad(
+                        query.semantic_target[None, None],
+                        (0, 0, padding[0], padding[1]),
+                    ).squeeze()
+                else:
+                    query.semantic_target = Fv2.pad(
+                        query.semantic_target[None, None], tuple(padding)
+                    ).squeeze()
+            else:
+                if len(padding) == 2:
+                    query.semantic_target = F.pad(
+                        query.semantic_target[None, None],
+                        (0, 0, padding[0], padding[1]),
+                    ).squeeze()
+                else:
+                    query.semantic_target = F.pad(
+                        query.semantic_target[None, None], tuple(padding)
+                    ).squeeze()
+        if query.image_id == index and query.input_bbox is not None:
+            if len(padding) != 2:
+                query.input_bbox += torch.as_tensor(
+                    [padding[0], padding[1], padding[0], padding[1]],
+                    dtype=torch.float32,
+                )
+        if query.image_id == index and query.input_points is not None:
+            if len(padding) != 2:
+                query.input_points += torch.as_tensor(
+                    [padding[0], padding[1], 0], dtype=torch.float32
+                )
+
+    return datapoint
+
+
+class RandomSizeCropAPI:
+    def __init__(
+        self,
+        min_size: int,
+        max_size: int,
+        respect_boxes: bool,
+        consistent_transform: bool,
+        respect_input_boxes: bool = True,
+        v2: bool = False,
+        recompute_box_from_mask: bool = False,
+    ):
+        self.min_size = min_size
+        self.max_size = max_size
+        self.respect_boxes = respect_boxes  # if True we can't crop a box out
+        self.respect_input_boxes = respect_input_boxes
+        self.consistent_transform = consistent_transform
+        self.v2 = v2
+        self.recompute_box_from_mask = recompute_box_from_mask
+
+    def _sample_no_respect_boxes(self, img):
+        w = random.randint(self.min_size, min(img.width, self.max_size))
+        h = random.randint(self.min_size, min(img.height, self.max_size))
+        return T.RandomCrop.get_params(img, (h, w))
+
+    def _sample_respect_boxes(self, img, boxes, points, min_box_size=10.0):
+        """
+        Assure that no box or point is dropped via cropping, though portions
+        of boxes may be removed.
+        """
+        if len(boxes) == 0 and len(points) == 0:
+            return self._sample_no_respect_boxes(img)
+
+        if self.v2:
+            img_height, img_width = img.size()[-2:]
+        else:
+            img_width, img_height = img.size
+
+        minW, minH, maxW, maxH = (
+            min(img_width, self.min_size),
+            min(img_height, self.min_size),
+            min(img_width, self.max_size),
+            min(img_height, self.max_size),
+        )
+
+        # The crop box must extend one pixel beyond points to the bottom/right
+        # to assure the exclusive box contains the points.
+        minX = (
+            torch.cat([boxes[:, 0] + min_box_size, points[:, 0] + 1], dim=0)
+            .max()
+            .item()
+        )
+        minY = (
+            torch.cat([boxes[:, 1] + min_box_size, points[:, 1] + 1], dim=0)
+            .max()
+            .item()
+        )
+        minX = min(img_width, minX)
+        minY = min(img_height, minY)
+        maxX = torch.cat([boxes[:, 2] - min_box_size, points[:, 0]], dim=0).min().item()
+        maxY = torch.cat([boxes[:, 3] - min_box_size, points[:, 1]], dim=0).min().item()
+        maxX = max(0.0, maxX)
+        maxY = max(0.0, maxY)
+        minW = max(minW, minX - maxX)
+        minH = max(minH, minY - maxY)
+        w = random.uniform(minW, max(minW, maxW))
+        h = random.uniform(minH, max(minH, maxH))
+        if minX > maxX:
+            # i = random.uniform(max(0, minX - w + 1), max(maxX, max(0, minX - w + 1)))
+            i = random.uniform(max(0, minX - w), max(maxX, max(0, minX - w)))
+        else:
+            i = random.uniform(
+                max(0, minX - w + 1), max(maxX - 1, max(0, minX - w + 1))
+            )
+        if minY > maxY:
+            # j = random.uniform(max(0, minY - h + 1), max(maxY, max(0, minY - h + 1)))
+            j = random.uniform(max(0, minY - h), max(maxY, max(0, minY - h)))
+        else:
+            j = random.uniform(
+                max(0, minY - h + 1), max(maxY - 1, max(0, minY - h + 1))
+            )
+
+        return [j, i, h, w]
+
+    def __call__(self, datapoint, **kwargs):
+        if self.respect_boxes or self.respect_input_boxes:
+            if self.consistent_transform:
+                # Check that all the images are the same size
+                w, h = datapoint.images[0].data.size
+                for img in datapoint.images:
+                    assert img.data.size == (w, h)
+
+                all_boxes = []
+                # Getting all boxes in all the images
+                if self.respect_boxes:
+                    all_boxes += [
+                        obj.bbox.view(-1, 4)
+                        for img in datapoint.images
+                        for obj in img.objects
+                    ]
+                # Get all the boxes in the find queries
+                if self.respect_input_boxes:
+                    all_boxes += [
+                        q.input_bbox.view(-1, 4)
+                        for q in datapoint.find_queries
+                        if q.input_bbox is not None
+                    ]
+                if all_boxes:
+                    all_boxes = torch.cat(all_boxes, 0)
+                else:
+                    all_boxes = torch.empty(0, 4)
+
+                all_points = [
+                    q.input_points.view(-1, 3)[:, :2]
+                    for q in datapoint.find_queries
+                    if q.input_points is not None
+                ]
+                if all_points:
+                    all_points = torch.cat(all_points, 0)
+                else:
+                    all_points = torch.empty(0, 2)
+
+                crop_param = self._sample_respect_boxes(
+                    datapoint.images[0].data, all_boxes, all_points
+                )
+                for i in range(len(datapoint.images)):
+                    datapoint = crop(
+                        datapoint,
+                        i,
+                        crop_param,
+                        v2=self.v2,
+                        check_validity=self.respect_boxes,
+                        check_input_validity=self.respect_input_boxes,
+                        recompute_box_from_mask=self.recompute_box_from_mask,
+                    )
+                return datapoint
+            else:
+                for i in range(len(datapoint.images)):
+                    all_boxes = []
+                    # Get all boxes in the current image
+                    if self.respect_boxes:
+                        all_boxes += [
+                            obj.bbox.view(-1, 4) for obj in datapoint.images[i].objects
+                        ]
+                    # Get all the boxes in the find queries that correspond to this image
+                    if self.respect_input_boxes:
+                        all_boxes += [
+                            q.input_bbox.view(-1, 4)
+                            for q in datapoint.find_queries
+                            if q.image_id == i and q.input_bbox is not None
+                        ]
+                    if all_boxes:
+                        all_boxes = torch.cat(all_boxes, 0)
+                    else:
+                        all_boxes = torch.empty(0, 4)
+
+                    all_points = [
+                        q.input_points.view(-1, 3)[:, :2]
+                        for q in datapoint.find_queries
+                        if q.input_points is not None
+                    ]
+                    if all_points:
+                        all_points = torch.cat(all_points, 0)
+                    else:
+                        all_points = torch.empty(0, 2)
+
+                    crop_param = self._sample_respect_boxes(
+                        datapoint.images[i].data, all_boxes, all_points
+                    )
+                    datapoint = crop(
+                        datapoint,
+                        i,
+                        crop_param,
+                        v2=self.v2,
+                        check_validity=self.respect_boxes,
+                        check_input_validity=self.respect_input_boxes,
+                        recompute_box_from_mask=self.recompute_box_from_mask,
+                    )
+                return datapoint
+        else:
+            if self.consistent_transform:
+                # Check that all the images are the same size
+                w, h = datapoint.images[0].data.size
+                for img in datapoint.images:
+                    assert img.data.size == (w, h)
+
+                crop_param = self._sample_no_respect_boxes(datapoint.images[0].data)
+                for i in range(len(datapoint.images)):
+                    datapoint = crop(
+                        datapoint,
+                        i,
+                        crop_param,
+                        v2=self.v2,
+                        check_validity=self.respect_boxes,
+                        check_input_validity=self.respect_input_boxes,
+                        recompute_box_from_mask=self.recompute_box_from_mask,
+                    )
+                return datapoint
+            else:
+                for i in range(len(datapoint.images)):
+                    crop_param = self._sample_no_respect_boxes(datapoint.images[i].data)
+                    datapoint = crop(
+                        datapoint,
+                        i,
+                        crop_param,
+                        v2=self.v2,
+                        check_validity=self.respect_boxes,
+                        check_input_validity=self.respect_input_boxes,
+                        recompute_box_from_mask=self.recompute_box_from_mask,
+                    )
+                return datapoint
+
+
+class CenterCropAPI:
+    def __init__(self, size, consistent_transform, recompute_box_from_mask=False):
+        self.size = size
+        self.consistent_transform = consistent_transform
+        self.recompute_box_from_mask = recompute_box_from_mask
+
+    def _sample_crop(self, image_width, image_height):
+        crop_height, crop_width = self.size
+        crop_top = int(round((image_height - crop_height) / 2.0))
+        crop_left = int(round((image_width - crop_width) / 2.0))
+        return crop_top, crop_left, crop_height, crop_width
+
+    def __call__(self, datapoint, **kwargs):
+        if self.consistent_transform:
+            # Check that all the images are the same size
+            w, h = datapoint.images[0].data.size
+            for img in datapoint.images:
+                assert img.size == (w, h)
+
+            crop_top, crop_left, crop_height, crop_width = self._sample_crop(w, h)
+            for i in range(len(datapoint.images)):
+                datapoint = crop(
+                    datapoint,
+                    i,
+                    (crop_top, crop_left, crop_height, crop_width),
+                    recompute_box_from_mask=self.recompute_box_from_mask,
+                )
+            return datapoint
+
+        for i in range(len(datapoint.images)):
+            w, h = datapoint.images[i].data.size
+            crop_top, crop_left, crop_height, crop_width = self._sample_crop(w, h)
+            datapoint = crop(
+                datapoint,
+                i,
+                (crop_top, crop_left, crop_height, crop_width),
+                recompute_box_from_mask=self.recompute_box_from_mask,
+            )
+
+        return datapoint
+
+
+class RandomHorizontalFlip:
+    def __init__(self, consistent_transform, p=0.5):
+        self.p = p
+        self.consistent_transform = consistent_transform
+
+    def __call__(self, datapoint, **kwargs):
+        if self.consistent_transform:
+            if random.random() < self.p:
+                for i in range(len(datapoint.images)):
+                    datapoint = hflip(datapoint, i)
+            return datapoint
+        for i in range(len(datapoint.images)):
+            if random.random() < self.p:
+                datapoint = hflip(datapoint, i)
+        return datapoint
+
+
+class RandomResizeAPI:
+    def __init__(
+        self, sizes, consistent_transform, max_size=None, square=False, v2=False
+    ):
+        if isinstance(sizes, int):
+            sizes = (sizes,)
+        assert isinstance(sizes, Iterable)
+        self.sizes = list(sizes)
+        self.max_size = max_size
+        self.square = square
+        self.consistent_transform = consistent_transform
+        self.v2 = v2
+
+    def __call__(self, datapoint, **kwargs):
+        if self.consistent_transform:
+            size = random.choice(self.sizes)
+            for i in range(len(datapoint.images)):
+                datapoint = resize(
+                    datapoint, i, size, self.max_size, square=self.square, v2=self.v2
+                )
+            return datapoint
+        for i in range(len(datapoint.images)):
+            size = random.choice(self.sizes)
+            datapoint = resize(
+                datapoint, i, size, self.max_size, square=self.square, v2=self.v2
+            )
+        return datapoint
+
+
+class ScheduledRandomResizeAPI(RandomResizeAPI):
+    def __init__(self, size_scheduler, consistent_transform, square=False):
+        self.size_scheduler = size_scheduler
+        # Just a meaningful init value for super
+        params = self.size_scheduler(epoch_num=0)
+        sizes, max_size = params["sizes"], params["max_size"]
+        super().__init__(sizes, consistent_transform, max_size=max_size, square=square)
+
+    def __call__(self, datapoint, **kwargs):
+        assert "epoch" in kwargs, "Param scheduler needs to know the current epoch"
+        params = self.size_scheduler(kwargs["epoch"])
+        sizes, max_size = params["sizes"], params["max_size"]
+        self.sizes = sizes
+        self.max_size = max_size
+        datapoint = super(ScheduledRandomResizeAPI, self).__call__(datapoint, **kwargs)
+        return datapoint
+
+
+class RandomPadAPI:
+    def __init__(self, max_pad, consistent_transform):
+        self.max_pad = max_pad
+        self.consistent_transform = consistent_transform
+
+    def _sample_pad(self):
+        pad_x = random.randint(0, self.max_pad)
+        pad_y = random.randint(0, self.max_pad)
+        return pad_x, pad_y
+
+    def __call__(self, datapoint, **kwargs):
+        if self.consistent_transform:
+            pad_x, pad_y = self._sample_pad()
+            for i in range(len(datapoint.images)):
+                datapoint = pad(datapoint, i, (pad_x, pad_y))
+            return datapoint
+
+        for i in range(len(datapoint.images)):
+            pad_x, pad_y = self._sample_pad()
+            datapoint = pad(datapoint, i, (pad_x, pad_y))
+        return datapoint
+
+
+class PadToSizeAPI:
+    def __init__(self, size, consistent_transform, bottom_right=False, v2=False):
+        self.size = size
+        self.consistent_transform = consistent_transform
+        self.v2 = v2
+        self.bottom_right = bottom_right
+
+    def _sample_pad(self, w, h):
+        pad_x = self.size - w
+        pad_y = self.size - h
+        assert pad_x >= 0 and pad_y >= 0
+        pad_left = random.randint(0, pad_x)
+        pad_right = pad_x - pad_left
+        pad_top = random.randint(0, pad_y)
+        pad_bottom = pad_y - pad_top
+        return pad_left, pad_top, pad_right, pad_bottom
+
+    def __call__(self, datapoint, **kwargs):
+        if self.consistent_transform:
+            # Check that all the images are the same size
+            w, h = datapoint.images[0].data.size
+            for img in datapoint.images:
+                assert img.size == (w, h)
+            if self.bottom_right:
+                pad_right = self.size - w
+                pad_bottom = self.size - h
+                padding = (pad_right, pad_bottom)
+            else:
+                padding = self._sample_pad(w, h)
+            for i in range(len(datapoint.images)):
+                datapoint = pad(datapoint, i, padding, v2=self.v2)
+            return datapoint
+
+        for i, img in enumerate(datapoint.images):
+            w, h = img.data.size
+            if self.bottom_right:
+                pad_right = self.size - w
+                pad_bottom = self.size - h
+                padding = (pad_right, pad_bottom)
+            else:
+                padding = self._sample_pad(w, h)
+            datapoint = pad(datapoint, i, padding, v2=self.v2)
+        return datapoint
+
+
+class RandomMosaicVideoAPI:
+    def __init__(self, prob=0.15, grid_h=2, grid_w=2, use_random_hflip=False):
+        self.prob = prob
+        self.grid_h = grid_h
+        self.grid_w = grid_w
+        self.use_random_hflip = use_random_hflip
+
+    def __call__(self, datapoint, **kwargs):
+        if random.random() > self.prob:
+            return datapoint
+
+        # select a random location to place the target mask in the mosaic
+        target_grid_y = random.randint(0, self.grid_h - 1)
+        target_grid_x = random.randint(0, self.grid_w - 1)
+        # whether to flip each grid in the mosaic horizontally
+        if self.use_random_hflip:
+            should_hflip = torch.rand(self.grid_h, self.grid_w) < 0.5
+        else:
+            should_hflip = torch.zeros(self.grid_h, self.grid_w, dtype=torch.bool)
+        for i in range(len(datapoint.images)):
+            datapoint = random_mosaic_frame(
+                datapoint,
+                i,
+                grid_h=self.grid_h,
+                grid_w=self.grid_w,
+                target_grid_y=target_grid_y,
+                target_grid_x=target_grid_x,
+                should_hflip=should_hflip,
+            )
+
+        return datapoint
+
+
+def random_mosaic_frame(
+    datapoint,
+    index,
+    grid_h,
+    grid_w,
+    target_grid_y,
+    target_grid_x,
+    should_hflip,
+):
+    # Step 1: downsize the images and paste them into a mosaic
+    image_data = datapoint.images[index].data
+    is_pil = isinstance(image_data, PILImage.Image)
+    if is_pil:
+        H_im = image_data.height
+        W_im = image_data.width
+        image_data_output = PILImage.new("RGB", (W_im, H_im))
+    else:
+        H_im = image_data.size(-2)
+        W_im = image_data.size(-1)
+        image_data_output = torch.zeros_like(image_data)
+
+    downsize_cache = {}
+    for grid_y in range(grid_h):
+        for grid_x in range(grid_w):
+            y_offset_b = grid_y * H_im // grid_h
+            x_offset_b = grid_x * W_im // grid_w
+            y_offset_e = (grid_y + 1) * H_im // grid_h
+            x_offset_e = (grid_x + 1) * W_im // grid_w
+            H_im_downsize = y_offset_e - y_offset_b
+            W_im_downsize = x_offset_e - x_offset_b
+
+            if (H_im_downsize, W_im_downsize) in downsize_cache:
+                image_data_downsize = downsize_cache[(H_im_downsize, W_im_downsize)]
+            else:
+                image_data_downsize = F.resize(
+                    image_data,
+                    size=(H_im_downsize, W_im_downsize),
+                    interpolation=InterpolationMode.BILINEAR,
+                    antialias=True,  # antialiasing for downsizing
+                )
+                downsize_cache[(H_im_downsize, W_im_downsize)] = image_data_downsize
+            if should_hflip[grid_y, grid_x].item():
+                image_data_downsize = F.hflip(image_data_downsize)
+
+            if is_pil:
+                image_data_output.paste(image_data_downsize, (x_offset_b, y_offset_b))
+            else:
+                image_data_output[:, y_offset_b:y_offset_e, x_offset_b:x_offset_e] = (
+                    image_data_downsize
+                )
+
+    datapoint.images[index].data = image_data_output
+
+    # Step 2: downsize the masks and paste them into the target grid of the mosaic
+    # (note that we don't scale input/target boxes since they are not used in TA)
+    for obj in datapoint.images[index].objects:
+        if obj.segment is None:
+            continue
+        assert obj.segment.shape == (H_im, W_im) and obj.segment.dtype == torch.uint8
+        segment_output = torch.zeros_like(obj.segment)
+
+        target_y_offset_b = target_grid_y * H_im // grid_h
+        target_x_offset_b = target_grid_x * W_im // grid_w
+        target_y_offset_e = (target_grid_y + 1) * H_im // grid_h
+        target_x_offset_e = (target_grid_x + 1) * W_im // grid_w
+        target_H_im_downsize = target_y_offset_e - target_y_offset_b
+        target_W_im_downsize = target_x_offset_e - target_x_offset_b
+
+        segment_downsize = F.resize(
+            obj.segment[None, None],
+            size=(target_H_im_downsize, target_W_im_downsize),
+            interpolation=InterpolationMode.BILINEAR,
+            antialias=True,  # antialiasing for downsizing
+        )[0, 0]
+        if should_hflip[target_grid_y, target_grid_x].item():
+            segment_downsize = F.hflip(segment_downsize[None, None])[0, 0]
+
+        segment_output[
+            target_y_offset_b:target_y_offset_e, target_x_offset_b:target_x_offset_e
+        ] = segment_downsize
+        obj.segment = segment_output
+
+    return datapoint
+
+
+class ScheduledPadToSizeAPI(PadToSizeAPI):
+    def __init__(self, size_scheduler, consistent_transform):
+        self.size_scheduler = size_scheduler
+        size = self.size_scheduler(epoch_num=0)["sizes"]
+        super().__init__(size, consistent_transform)
+
+    def __call__(self, datapoint, **kwargs):
+        assert "epoch" in kwargs, "Param scheduler needs to know the current epoch"
+        params = self.size_scheduler(kwargs["epoch"])
+        self.size = params["resolution"]
+        return super(ScheduledPadToSizeAPI, self).__call__(datapoint, **kwargs)
+
+
+class IdentityAPI:
+    def __call__(self, datapoint, **kwargs):
+        return datapoint
+
+
+class RandomSelectAPI:
+    """
+    Randomly selects between transforms1 and transforms2,
+    with probability p for transforms1 and (1 - p) for transforms2
+    """
+
+    def __init__(self, transforms1=None, transforms2=None, p=0.5):
+        self.transforms1 = transforms1 or IdentityAPI()
+        self.transforms2 = transforms2 or IdentityAPI()
+        self.p = p
+
+    def __call__(self, datapoint, **kwargs):
+        if random.random() < self.p:
+            return self.transforms1(datapoint, **kwargs)
+        return self.transforms2(datapoint, **kwargs)
+
+
+class ToTensorAPI:
+    def __init__(self, v2=False):
+        self.v2 = v2
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        for img in datapoint.images:
+            if self.v2:
+                img.data = Fv2.to_image_tensor(img.data)
+                # img.data = Fv2.to_dtype(img.data, torch.uint8, scale=True)
+                # img.data = Fv2.convert_image_dtype(img.data, torch.uint8)
+            else:
+                img.data = F.to_tensor(img.data)
+        return datapoint
+
+
+class NormalizeAPI:
+    def __init__(self, mean, std, v2=False):
+        self.mean = mean
+        self.std = std
+        self.v2 = v2
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        for img in datapoint.images:
+            if self.v2:
+                img.data = Fv2.convert_image_dtype(img.data, torch.float32)
+                img.data = Fv2.normalize(img.data, mean=self.mean, std=self.std)
+            else:
+                img.data = F.normalize(img.data, mean=self.mean, std=self.std)
+            for obj in img.objects:
+                boxes = obj.bbox
+                cur_h, cur_w = img.data.shape[-2:]
+                boxes = box_xyxy_to_cxcywh(boxes)
+                boxes = boxes / torch.tensor(
+                    [cur_w, cur_h, cur_w, cur_h], dtype=torch.float32
+                )
+                obj.bbox = boxes
+
+        for query in datapoint.find_queries:
+            if query.input_bbox is not None:
+                boxes = query.input_bbox
+                cur_h, cur_w = datapoint.images[query.image_id].data.shape[-2:]
+                boxes = box_xyxy_to_cxcywh(boxes)
+                boxes = boxes / torch.tensor(
+                    [cur_w, cur_h, cur_w, cur_h], dtype=torch.float32
+                )
+                query.input_bbox = boxes
+            if query.input_points is not None:
+                points = query.input_points
+                cur_h, cur_w = datapoint.images[query.image_id].data.shape[-2:]
+                points = points / torch.tensor([cur_w, cur_h, 1.0], dtype=torch.float32)
+                query.input_points = points
+
+        return datapoint
+
+
+class ComposeAPI:
+    def __init__(self, transforms):
+        self.transforms = transforms
+
+    def __call__(self, datapoint, **kwargs):
+        for t in self.transforms:
+            datapoint = t(datapoint, **kwargs)
+        return datapoint
+
+    def __repr__(self):
+        format_string = self.__class__.__name__ + "("
+        for t in self.transforms:
+            format_string += "\n"
+            format_string += "    {0}".format(t)
+        format_string += "\n)"
+        return format_string
+
+
+class RandomGrayscale:
+    def __init__(self, consistent_transform, p=0.5):
+        self.p = p
+        self.consistent_transform = consistent_transform
+        self.Grayscale = T.Grayscale(num_output_channels=3)
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        if self.consistent_transform:
+            if random.random() < self.p:
+                for img in datapoint.images:
+                    img.data = self.Grayscale(img.data)
+            return datapoint
+        for img in datapoint.images:
+            if random.random() < self.p:
+                img.data = self.Grayscale(img.data)
+        return datapoint
+
+
+class ColorJitter:
+    def __init__(self, consistent_transform, brightness, contrast, saturation, hue):
+        self.consistent_transform = consistent_transform
+        self.brightness = (
+            brightness
+            if isinstance(brightness, list)
+            else [max(0, 1 - brightness), 1 + brightness]
+        )
+        self.contrast = (
+            contrast
+            if isinstance(contrast, list)
+            else [max(0, 1 - contrast), 1 + contrast]
+        )
+        self.saturation = (
+            saturation
+            if isinstance(saturation, list)
+            else [max(0, 1 - saturation), 1 + saturation]
+        )
+        self.hue = hue if isinstance(hue, list) or hue is None else ([-hue, hue])
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        if self.consistent_transform:
+            # Create a color jitter transformation params
+            (
+                fn_idx,
+                brightness_factor,
+                contrast_factor,
+                saturation_factor,
+                hue_factor,
+            ) = T.ColorJitter.get_params(
+                self.brightness, self.contrast, self.saturation, self.hue
+            )
+        for img in datapoint.images:
+            if not self.consistent_transform:
+                (
+                    fn_idx,
+                    brightness_factor,
+                    contrast_factor,
+                    saturation_factor,
+                    hue_factor,
+                ) = T.ColorJitter.get_params(
+                    self.brightness, self.contrast, self.saturation, self.hue
+                )
+            for fn_id in fn_idx:
+                if fn_id == 0 and brightness_factor is not None:
+                    img.data = F.adjust_brightness(img.data, brightness_factor)
+                elif fn_id == 1 and contrast_factor is not None:
+                    img.data = F.adjust_contrast(img.data, contrast_factor)
+                elif fn_id == 2 and saturation_factor is not None:
+                    img.data = F.adjust_saturation(img.data, saturation_factor)
+                elif fn_id == 3 and hue_factor is not None:
+                    img.data = F.adjust_hue(img.data, hue_factor)
+        return datapoint
+
+
+class RandomAffine:
+    def __init__(
+        self,
+        degrees,
+        consistent_transform,
+        scale=None,
+        translate=None,
+        shear=None,
+        image_mean=(123, 116, 103),
+        log_warning=True,
+        num_tentatives=1,
+        image_interpolation="bicubic",
+    ):
+        """
+        The mask is required for this transform.
+        if consistent_transform if True, then the same random affine is applied to all frames and masks.
+        """
+        self.degrees = degrees if isinstance(degrees, list) else ([-degrees, degrees])
+        self.scale = scale
+        self.shear = (
+            shear if isinstance(shear, list) else ([-shear, shear] if shear else None)
+        )
+        self.translate = translate
+        self.fill_img = image_mean
+        self.consistent_transform = consistent_transform
+        self.log_warning = log_warning
+        self.num_tentatives = num_tentatives
+
+        if image_interpolation == "bicubic":
+            self.image_interpolation = InterpolationMode.BICUBIC
+        elif image_interpolation == "bilinear":
+            self.image_interpolation = InterpolationMode.BILINEAR
+        else:
+            raise NotImplementedError
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        for _tentative in range(self.num_tentatives):
+            res = self.transform_datapoint(datapoint)
+            if res is not None:
+                return res
+
+        if self.log_warning:
+            logging.warning(
+                f"Skip RandomAffine for zero-area mask in first frame after {self.num_tentatives} tentatives"
+            )
+        return datapoint
+
+    def transform_datapoint(self, datapoint: Datapoint):
+        _, height, width = F.get_dimensions(datapoint.images[0].data)
+        img_size = [width, height]
+
+        if self.consistent_transform:
+            # Create a random affine transformation
+            affine_params = T.RandomAffine.get_params(
+                degrees=self.degrees,
+                translate=self.translate,
+                scale_ranges=self.scale,
+                shears=self.shear,
+                img_size=img_size,
+            )
+
+        for img_idx, img in enumerate(datapoint.images):
+            this_masks = [
+                obj.segment.unsqueeze(0) if obj.segment is not None else None
+                for obj in img.objects
+            ]
+            if not self.consistent_transform:
+                # if not consistent we create a new affine params for every frame&mask pair Create a random affine transformation
+                affine_params = T.RandomAffine.get_params(
+                    degrees=self.degrees,
+                    translate=self.translate,
+                    scale_ranges=self.scale,
+                    shears=self.shear,
+                    img_size=img_size,
+                )
+
+            transformed_bboxes, transformed_masks = [], []
+            for i in range(len(img.objects)):
+                if this_masks[i] is None:
+                    transformed_masks.append(None)
+                    # Dummy bbox for a dummy target
+                    transformed_bboxes.append(torch.tensor([[0, 0, 0, 0]]))
+                else:
+                    transformed_mask = F.affine(
+                        this_masks[i],
+                        *affine_params,
+                        interpolation=InterpolationMode.NEAREST,
+                        fill=0.0,
+                    )
+                    if img_idx == 0 and transformed_mask.max() == 0:
+                        # We are dealing with a video and the object is not visible in the first frame
+                        # Return the datapoint without transformation
+                        return None
+                    transformed_bbox = masks_to_boxes(transformed_mask)
+                    transformed_bboxes.append(transformed_bbox)
+                    transformed_masks.append(transformed_mask.squeeze())
+
+            for i in range(len(img.objects)):
+                img.objects[i].bbox = transformed_bboxes[i]
+                img.objects[i].segment = transformed_masks[i]
+
+            img.data = F.affine(
+                img.data,
+                *affine_params,
+                interpolation=self.image_interpolation,
+                fill=self.fill_img,
+            )
+        return datapoint
+
+
+class RandomResizedCrop:
+    def __init__(
+        self,
+        consistent_transform,
+        size,
+        scale=None,
+        ratio=None,
+        log_warning=True,
+        num_tentatives=4,
+        keep_aspect_ratio=False,
+    ):
+        """
+        The mask is required for this transform.
+        if consistent_transform if True, then the same random resized crop is applied to all frames and masks.
+        """
+        if isinstance(size, numbers.Number):
+            self.size = (int(size), int(size))
+        elif isinstance(size, Sequence) and len(size) == 1:
+            self.size = (size[0], size[0])
+        elif len(size) != 2:
+            raise ValueError("Please provide only two dimensions (h, w) for size.")
+        else:
+            self.size = size
+
+        self.scale = scale if scale is not None else (0.08, 1.0)
+        self.ratio = ratio if ratio is not None else (3.0 / 4.0, 4.0 / 3.0)
+        self.consistent_transform = consistent_transform
+        self.log_warning = log_warning
+        self.num_tentatives = num_tentatives
+        self.keep_aspect_ratio = keep_aspect_ratio
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        for _tentative in range(self.num_tentatives):
+            res = self.transform_datapoint(datapoint)
+            if res is not None:
+                return res
+
+        if self.log_warning:
+            logging.warning(
+                f"Skip RandomResizeCrop for zero-area mask in first frame after {self.num_tentatives} tentatives"
+            )
+        return datapoint
+
+    def transform_datapoint(self, datapoint: Datapoint):
+        if self.keep_aspect_ratio:
+            original_size = datapoint.images[0].size
+            original_ratio = original_size[1] / original_size[0]
+            ratio = [r * original_ratio for r in self.ratio]
+        else:
+            ratio = self.ratio
+
+        if self.consistent_transform:
+            # Create a random crop transformation
+            crop_params = T.RandomResizedCrop.get_params(
+                img=datapoint.images[0].data,
+                scale=self.scale,
+                ratio=ratio,
+            )
+
+        for img_idx, img in enumerate(datapoint.images):
+            if not self.consistent_transform:
+                # Create a random crop transformation
+                crop_params = T.RandomResizedCrop.get_params(
+                    img=img.data,
+                    scale=self.scale,
+                    ratio=ratio,
+                )
+
+            this_masks = [
+                obj.segment.unsqueeze(0) if obj.segment is not None else None
+                for obj in img.objects
+            ]
+
+            transformed_bboxes, transformed_masks = [], []
+            for i in range(len(img.objects)):
+                if this_masks[i] is None:
+                    transformed_masks.append(None)
+                    # Dummy bbox for a dummy target
+                    transformed_bboxes.append(torch.tensor([[0, 0, 0, 0]]))
+                else:
+                    transformed_mask = F.resized_crop(
+                        this_masks[i],
+                        *crop_params,
+                        size=self.size,
+                        interpolation=InterpolationMode.NEAREST,
+                    )
+                    if img_idx == 0 and transformed_mask.max() == 0:
+                        # We are dealing with a video and the object is not visible in the first frame
+                        # Return the datapoint without transformation
+                        return None
+                    transformed_masks.append(transformed_mask.squeeze())
+                    transformed_bbox = masks_to_boxes(transformed_mask)
+                    transformed_bboxes.append(transformed_bbox)
+
+            # Set the new boxes and masks if all transformed masks and boxes are good.
+            for i in range(len(img.objects)):
+                img.objects[i].bbox = transformed_bboxes[i]
+                img.objects[i].segment = transformed_masks[i]
+
+            img.data = F.resized_crop(
+                img.data,
+                *crop_params,
+                size=self.size,
+                interpolation=InterpolationMode.BILINEAR,
+            )
+        return datapoint
+
+
+class ResizeToMaxIfAbove:
+    # Resize datapoint image if one of its sides is larger that max_size
+    def __init__(
+        self,
+        max_size=None,
+    ):
+        self.max_size = max_size
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        _, height, width = F.get_dimensions(datapoint.images[0].data)
+
+        if height <= self.max_size and width <= self.max_size:
+            # The original frames are small enough
+            return datapoint
+        elif height >= width:
+            new_height = self.max_size
+            new_width = int(round(self.max_size * width / height))
+        else:
+            new_height = int(round(self.max_size * height / width))
+            new_width = self.max_size
+
+        size = new_height, new_width
+
+        for index in range(len(datapoint.images)):
+            datapoint.images[index].data = F.resize(datapoint.images[index].data, size)
+
+            for obj in datapoint.images[index].objects:
+                obj.segment = F.resize(
+                    obj.segment[None, None],
+                    size,
+                    interpolation=InterpolationMode.NEAREST,
+                ).squeeze()
+
+            h, w = size
+            datapoint.images[index].size = (h, w)
+        return datapoint
+
+
+def get_bbox_xyxy_abs_coords_from_mask(mask):
+    """Get the bounding box (XYXY format w/ absolute coordinates) of a binary mask."""
+    assert mask.dim() == 2
+    rows = torch.any(mask, dim=1)
+    cols = torch.any(mask, dim=0)
+    row_inds = rows.nonzero().view(-1)
+    col_inds = cols.nonzero().view(-1)
+    if row_inds.numel() == 0:
+        # mask is empty
+        bbox = torch.zeros(1, 4, dtype=torch.float32)
+        bbox_area = 0.0
+    else:
+        ymin, ymax = row_inds.min(), row_inds.max()
+        xmin, xmax = col_inds.min(), col_inds.max()
+        bbox = torch.tensor([xmin, ymin, xmax, ymax], dtype=torch.float32).view(1, 4)
+        bbox_area = float((ymax - ymin) * (xmax - xmin))
+    return bbox, bbox_area
+
+
+class MotionBlur:
+    def __init__(self, kernel_size=5, consistent_transform=True, p=0.5):
+        assert kernel_size % 2 == 1, "Kernel size must be odd."
+        self.kernel_size = kernel_size
+        self.consistent_transform = consistent_transform
+        self.p = p
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        if random.random() >= self.p:
+            return datapoint
+        if self.consistent_transform:
+            # Generate a single motion blur kernel for all images
+            kernel = self._generate_motion_blur_kernel()
+        for img in datapoint.images:
+            if not self.consistent_transform:
+                # Generate a new motion blur kernel for each image
+                kernel = self._generate_motion_blur_kernel()
+            img.data = self._apply_motion_blur(img.data, kernel)
+
+        return datapoint
+
+    def _generate_motion_blur_kernel(self):
+        kernel = torch.zeros((self.kernel_size, self.kernel_size))
+        direction = random.choice(["horizontal", "vertical", "diagonal"])
+        if direction == "horizontal":
+            kernel[self.kernel_size // 2, :] = 1.0
+        elif direction == "vertical":
+            kernel[:, self.kernel_size // 2] = 1.0
+        elif direction == "diagonal":
+            for i in range(self.kernel_size):
+                kernel[i, i] = 1.0
+        kernel /= kernel.sum()
+        return kernel
+
+    def _apply_motion_blur(self, image, kernel):
+        if isinstance(image, PILImage.Image):
+            image = F.to_tensor(image)
+        channels = image.shape[0]
+        kernel = kernel.to(image.device).unsqueeze(0).unsqueeze(0)
+        blurred_image = torch.nn.functional.conv2d(
+            image.unsqueeze(0),
+            kernel.repeat(channels, 1, 1, 1),
+            padding=self.kernel_size // 2,
+            groups=channels,
+        )
+        return F.to_pil_image(blurred_image.squeeze(0))
+
+
+class LargeScaleJitter:
+    def __init__(
+        self,
+        scale_range=(0.1, 2.0),
+        aspect_ratio_range=(0.75, 1.33),
+        crop_size=(640, 640),
+        consistent_transform=True,
+        p=0.5,
+    ):
+        """
+        Args:rack
+            scale_range (tuple): Range of scaling factors (min_scale, max_scale).
+            aspect_ratio_range (tuple): Range of aspect ratios (min_aspect_ratio, max_aspect_ratio).
+            crop_size (tuple): Target size of the cropped region (width, height).
+            consistent_transform (bool): Whether to apply the same transformation across all frames.
+            p (float): Probability of applying the transformation.
+        """
+        self.scale_range = scale_range
+        self.aspect_ratio_range = aspect_ratio_range
+        self.crop_size = crop_size
+        self.consistent_transform = consistent_transform
+        self.p = p
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        if random.random() >= self.p:
+            return datapoint
+
+        # Sample a single scale factor and aspect ratio for all frames
+        log_ratio = torch.log(torch.tensor(self.aspect_ratio_range))
+        scale_factor = torch.empty(1).uniform_(*self.scale_range).item()
+        aspect_ratio = torch.exp(
+            torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
+        ).item()
+
+        for idx, img in enumerate(datapoint.images):
+            if not self.consistent_transform:
+                # Sample a new scale factor and aspect ratio for each frame
+                log_ratio = torch.log(torch.tensor(self.aspect_ratio_range))
+                scale_factor = torch.empty(1).uniform_(*self.scale_range).item()
+                aspect_ratio = torch.exp(
+                    torch.empty(1).uniform_(log_ratio[0], log_ratio[1])
+                ).item()
+
+            # Compute the dimensions of the jittered crop
+            original_width, original_height = img.data.size
+            target_area = original_width * original_height * scale_factor
+            crop_width = int(round((target_area * aspect_ratio) ** 0.5))
+            crop_height = int(round((target_area / aspect_ratio) ** 0.5))
+
+            # Randomly select the top-left corner of the crop
+            crop_x = random.randint(0, max(0, original_width - crop_width))
+            crop_y = random.randint(0, max(0, original_height - crop_height))
+
+            # Extract the cropped region
+            datapoint = crop(datapoint, idx, (crop_x, crop_y, crop_width, crop_height))
+
+            # Resize the cropped region to the target crop size
+            datapoint = resize(datapoint, idx, self.crop_size)
+
+        return datapoint
diff --git a/sam3/train/transforms/filter_query_transforms.py b/sam3/train/transforms/filter_query_transforms.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d6708f453d34a1ba05de8a461c9aee5d0636f45
--- /dev/null
+++ b/sam3/train/transforms/filter_query_transforms.py
@@ -0,0 +1,607 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import logging
+import random
+
+from collections import defaultdict
+from typing import List, Optional, Union
+
+import torch
+
+from sam3.train.data.sam3_image_dataset import Datapoint, FindQuery, Object
+
+
+class FilterDataPointQueries:
+    find_ids_to_filter: set = None
+    get_ids_to_filter: set = None
+    obj_ids_to_filter: set = None  # stored as pairs (img_id, obj_id)
+
+    def identify_queries_to_filter(self, datapoint: Datapoint) -> None:
+        """
+        Compute set of query ids to keep, for both find and get queries
+        """
+        raise NotImplementedError
+
+    def _do_filter_query(self, query: Union[FindQuery], query_id: int):
+        assert self.find_ids_to_filter is not None
+
+        return query_id in self.find_ids_to_filter
+
+
+class FilterQueryWithText(FilterDataPointQueries):
+    """
+    Filter all datapoints which have query text in a specified list of exluded terms
+    """
+
+    def __init__(
+        self, exclude_find_keys: List[str] = None, exclude_get_keys: List[str] = None
+    ):
+        self.find_filter_keys = exclude_find_keys if exclude_find_keys else []
+        self.get_filter_keys = exclude_get_keys if exclude_get_keys else []
+
+    def identify_queries_to_filter(self, datapoint):
+        self.obj_ids_to_filter = set()
+        del_find_ids = []
+        del_get_ids = []
+        for i, f_q in enumerate(datapoint.find_queries):
+            if f_q.query_text in self.find_filter_keys:
+                del_find_ids.append(i)
+
+        self.find_ids_to_filter = set(del_find_ids)
+
+
+class KeepMaxNumFindQueries(FilterDataPointQueries):
+    def __init__(
+        self, max_num_find_queries: int, retain_positive_queries: bool = False
+    ):
+        self.max_num_find_queries = max_num_find_queries
+        self.retain_positive_queries = retain_positive_queries
+
+    def identify_queries_to_filter(self, datapoint: Datapoint) -> None:
+        self.obj_ids_to_filter = set()
+        num_find_queries = len(datapoint.find_queries)
+        if num_find_queries <= self.max_num_find_queries:
+            self.find_ids_to_filter = set()  # keep all find queries
+            return
+
+        if not self.retain_positive_queries:
+            all_find_query_ids = list(range(num_find_queries))
+            num_queries_to_filter = max(0, num_find_queries - self.max_num_find_queries)
+            query_ids_to_filter = random.sample(
+                all_find_query_ids, k=num_queries_to_filter
+            )
+        else:
+            # keep up to max_num_find_queries postive find queries and fill
+            # the remaining slots (if any) with negative find queries
+            pos_find_ids, neg_find_ids = [], []
+            for i, f_q in enumerate(datapoint.find_queries):
+                # Negative finds return an empty list of object_ids_output
+                if len(f_q.object_ids_output) == 0:
+                    neg_find_ids.append(i)
+                else:
+                    pos_find_ids.append(i)
+
+            if len(pos_find_ids) >= self.max_num_find_queries:
+                # we have more positive find queries than `max_num_find_queries`,
+                # so we subsample postive find queries and remove all negative find queries
+                num_queries_to_filter = len(pos_find_ids) - self.max_num_find_queries
+                query_ids_to_filter = random.sample(
+                    pos_find_ids, k=num_queries_to_filter
+                )
+                query_ids_to_filter.extend(neg_find_ids)
+            else:
+                # we have fewer positive find queries than `max_num_find_queries`
+                # so we need to fill the remaining with negative find queries
+                num_queries_to_filter = num_find_queries - self.max_num_find_queries
+                query_ids_to_filter = random.sample(
+                    neg_find_ids, k=num_queries_to_filter
+                )
+
+        assert len(query_ids_to_filter) == num_find_queries - self.max_num_find_queries
+        self.find_ids_to_filter = set(query_ids_to_filter)
+
+
+class KeepMaxNumFindQueriesVideo(FilterDataPointQueries):
+    def __init__(
+        self,
+        video_mosaic_max_num_find_queries_per_frame: int,
+        retain_positive_queries: bool = False,
+    ):
+        self.video_mosaic_max_num_find_queries_per_frame = (
+            video_mosaic_max_num_find_queries_per_frame
+        )
+        self.retain_positive_queries = retain_positive_queries
+
+    def identify_queries_to_filter(self, datapoint: Datapoint) -> None:
+        self.obj_ids_to_filter = set()
+        num_find_queries = len(datapoint.find_queries)
+
+        findQueries_to_imageIds = defaultdict(list)
+        max_queries_per_frame = True
+        for i, f_q in enumerate(datapoint.find_queries):
+            findQueries_to_imageIds[f_q.image_id].append(i)
+            if (
+                len(findQueries_to_imageIds[f_q.image_id])
+                > self.video_mosaic_max_num_find_queries_per_frame
+            ):
+                max_queries_per_frame = False
+
+        if max_queries_per_frame:
+            self.find_ids_to_filter = set()
+            return
+
+        num_frames = len(findQueries_to_imageIds)
+        findQueries_0 = findQueries_to_imageIds[0]
+        num_find_queries_0 = len(findQueries_0)
+        max_num_find_queries_per_frame = (
+            self.video_mosaic_max_num_find_queries_per_frame
+        )
+        if not self.retain_positive_queries:
+            find_query_ids_0 = list(range(num_find_queries_0))
+            num_queries_to_filter = max(
+                0, num_find_queries_0 - max_num_find_queries_per_frame
+            )
+            query_ids_to_filter_0 = random.sample(
+                find_query_ids_0, k=num_queries_to_filter
+            )
+        else:
+            # keep up to max_num_find_queries postive find queries and fill
+            # the remaining slots (if any) with negative find queries
+            pos_find_ids_0, neg_find_ids_0 = [], []
+            for i, f_q_id in enumerate(findQueries_0):
+                f_q = datapoint.find_queries[f_q_id]
+                # Negative finds return an empty list of object_ids_output
+                if len(f_q.object_ids_output) == 0:
+                    neg_find_ids_0.append(i)
+                else:
+                    pos_find_ids_0.append(i)
+
+            if len(pos_find_ids_0) >= max_num_find_queries_per_frame:
+                # we have more positive find queries than `max_num_find_queries`,
+                # so we subsample postive find queries and remove all negative find queries
+                num_queries_to_filter = (
+                    len(pos_find_ids_0) - max_num_find_queries_per_frame
+                )
+                query_ids_to_filter_0 = random.sample(
+                    pos_find_ids_0, k=num_queries_to_filter
+                )
+                query_ids_to_filter_0.extend(neg_find_ids_0)
+            else:
+                # we have fewer positive find queries than `max_num_find_queries`
+                # so we need to fill the remaining with negative find queries
+                num_queries_to_filter = (
+                    num_find_queries_0 - max_num_find_queries_per_frame
+                )
+                query_ids_to_filter_0 = random.sample(
+                    neg_find_ids_0, k=num_queries_to_filter
+                )
+
+        # get based on frame 0 all find queries from all the frames with the same indices as in frame 0
+        query_ids_to_filter = []
+        for i in range(num_frames):
+            findQueries_i = findQueries_to_imageIds[i]
+            query_ids_to_filter.extend(
+                [findQueries_i[j] for j in query_ids_to_filter_0]
+            )
+
+        assert (
+            len(query_ids_to_filter)
+            == num_find_queries
+            - self.video_mosaic_max_num_find_queries_per_frame * num_frames
+        )
+        self.find_ids_to_filter = set(query_ids_to_filter)
+
+
+class KeepSemanticFindQueriesOnly(FilterDataPointQueries):
+    def identify_queries_to_filter(self, datapoint: Datapoint) -> None:
+        self.obj_ids_to_filter = set()
+        self.find_ids_to_filter = {
+            i for i, q in enumerate(datapoint.find_queries) if q.input_bbox is not None
+        }  # filter (remove) geometric find queries (whose input_bbox is not None)
+
+        # Keep all get queries which don't depend on filtered finds
+
+
+class KeepUnaryFindQueriesOnly(FilterDataPointQueries):
+    def identify_queries_to_filter(self, datapoint: Datapoint) -> None:
+        self.obj_ids_to_filter = set()
+        self.find_ids_to_filter = set()
+
+        # Keep all get queries which don't depend on filtered finds
+
+
+class FilterZeroBoxQueries(FilterDataPointQueries):
+    """
+    Filters all find queries which predict a box with zero area
+    """
+
+    @staticmethod
+    def _is_zero_area_object(obj: Object):
+        # Check if height or width of bounding box is zero
+        bbox = obj.bbox  # Assume in XYXY format
+        height = bbox[..., 3].item() - bbox[..., 1].item()
+        width = bbox[..., 2].item() - bbox[..., 0].item()
+
+        return height == 0 or width == 0
+
+    def identify_queries_to_filter(self, datapoint):
+        self.obj_ids_to_filter = set()
+
+        # Find objects with zero area
+        # Assume only one image per datapoint
+        image_objects = datapoint.images[0].objects
+        exclude_objects = {
+            obj_id
+            for obj_id, obj in enumerate(image_objects)
+            if self._is_zero_area_object(obj)
+        }
+
+        # If a query predicts an object with zero area, drop the whole find query
+        del_find_ids = []
+        for i, f_q in enumerate(datapoint.find_queries):
+            f_q_objects = set(f_q.object_ids_output)
+            if len(exclude_objects.intersection(f_q_objects)) > 0:
+                del_find_ids.append(i)
+
+        self.find_ids_to_filter = set(del_find_ids)
+
+
+class FilterFindQueriesWithTooManyOut(FilterDataPointQueries):
+    """
+    Filters all find queries which have more than a specified number of objects in the output
+    """
+
+    def __init__(self, max_num_objects: int):
+        self.max_num_objects = max_num_objects
+
+    def identify_queries_to_filter(self, datapoint):
+        self.obj_ids_to_filter = set()
+
+        # If a query predicts more than max_num_objects, drop the whole find query
+        del_find_ids = []
+        for i, f_q in enumerate(datapoint.find_queries):
+            if len(f_q.object_ids_output) > self.max_num_objects:
+                del_find_ids.append(i)
+
+        self.find_ids_to_filter = set(del_find_ids)
+
+
+class FilterEmptyTargets(FilterDataPointQueries):
+    """
+    Filters all targets which have zero area
+    """
+
+    def identify_queries_to_filter(self, datapoint):
+        self.obj_ids_to_filter = set()
+
+        for img_id in range(len(datapoint.images)):
+            for obj_id, obj in enumerate(datapoint.images[img_id].objects):
+                if obj.area < 1e-6:
+                    self.obj_ids_to_filter.add((img_id, obj_id))
+        self.find_ids_to_filter = set()
+
+
+class FilterNonExhaustiveFindQueries(FilterDataPointQueries):
+    """
+    Filters all find queries which are non-exhaustive
+    """
+
+    def __init__(self, exhaustivity_type: str):
+        """
+        Args:
+            exhaustivity_type: Can be "pixel" or "instance":
+                -pixel: filter queries where the union of all segments covers every pixel belonging to target class
+                -instance: filter queries where there are non-separable or non annotated instances
+        Note that instance exhaustivity implies pixel exhaustivity
+        """
+        assert exhaustivity_type in ["pixel", "instance"]
+        self.exhaustivity_type = exhaustivity_type
+
+    def identify_queries_to_filter(self, datapoint):
+        self.obj_ids_to_filter = set()
+
+        # If a query predicts more than max_num_objects, drop the whole find query
+        del_find_ids = []
+        for i, f_q in enumerate(datapoint.find_queries):
+            if self.exhaustivity_type == "instance":
+                if not f_q.is_exhaustive:
+                    del_find_ids.append(i)
+            elif self.exhaustivity_type == "pixel":
+                if f_q.is_pixel_exhaustive is not None and not f_q.is_pixel_exhaustive:
+                    del_find_ids.append(i)
+            else:
+                raise RuntimeError(
+                    f"Unknown exhaustivity type {self.exhaustivity_type}"
+                )
+
+        self.find_ids_to_filter = set(del_find_ids)
+
+
+class FilterInvalidGeometricQueries(FilterDataPointQueries):
+    """
+    Filters geometric queries whose output got deleted (eg due to cropping)
+    """
+
+    def identify_queries_to_filter(self, datapoint):
+        self.obj_ids_to_filter = set()
+
+        # If a query predicts more than max_num_objects, drop the whole find query
+        del_find_ids = []
+        for i, f_q in enumerate(datapoint.find_queries):
+            if f_q.input_bbox is not None and f_q.query_text == "geometric":
+                if len(f_q.object_ids_output) == 0:
+                    del_find_ids.append(i)
+        self.find_ids_to_filter = set(del_find_ids)
+
+
+class FlexibleFilterFindGetQueries:
+    def __init__(
+        self, query_filter: FilterDataPointQueries, enabled: bool = True
+    ) -> None:
+        self.query_filter = query_filter
+        self.enabled = enabled
+
+    def __call__(self, datapoint, **kwargs):
+        if not self.enabled:
+            return datapoint
+
+        # Identify all queries to filter
+        self.query_filter.identify_queries_to_filter(datapoint=datapoint)
+
+        del_find_ids = []
+        del_get_ids = []
+        for i, f_q in enumerate(datapoint.find_queries):
+            if self.query_filter._do_filter_query(f_q, i):
+                datapoint.find_queries[i] = None
+                del_find_ids.append(i)
+
+        new_find_queries = []
+        new_get_queries = []
+
+        find_old_to_new_map = {}
+        get_old_to_new_map = {}
+
+        find_counter = 0
+        get_counter = 0
+
+        for i, f_q in enumerate(datapoint.find_queries):
+            if f_q is not None:
+                find_old_to_new_map[i] = find_counter
+                find_counter += 1
+                new_find_queries.append(f_q)
+
+        start_with_zero_check = False
+        for n_f_q in new_find_queries:
+            if n_f_q.query_processing_order == 0:
+                start_with_zero_check = True
+                break
+
+        if len(new_find_queries) == 0:
+            start_with_zero_check = True
+
+        assert (
+            start_with_zero_check
+        ), "Invalid Find queries, they need to start at query_processing_order = 0"
+
+        datapoint.find_queries = new_find_queries
+
+        if len(datapoint.find_queries) == 0:
+            print("Warning: No find queries left in datapoint, this is not allowed")
+            print("Filtering function:", self.query_filter)
+            print("Datapoint:", datapoint)
+            raise ValueError
+
+        # The deletion may have removed intermediate steps, so we need to remap to make them contiguous again
+        all_stages = sorted(
+            list(set(q.query_processing_order for q in datapoint.find_queries))
+        )
+        stage_map = {qpo: i for i, qpo in enumerate(all_stages)}
+        for i in range(len(datapoint.find_queries)):
+            qpo = datapoint.find_queries[i].query_processing_order
+            datapoint.find_queries[i].query_processing_order = stage_map[qpo]
+
+        # Final step, clear up objects that are not used anymore
+        for img_id in range(len(datapoint.images)):
+            all_objects_ids = set(
+                i
+                for find in datapoint.find_queries
+                for i in find.object_ids_output
+                if find.image_id == img_id
+            )
+            unused_ids = (
+                set(range(len(datapoint.images[img_id].objects))) - all_objects_ids
+            )
+            for tgt_img_id, tgt_obj_id in self.query_filter.obj_ids_to_filter:
+                if tgt_img_id == img_id:
+                    unused_ids.add(tgt_obj_id)
+
+            if len(unused_ids) > 0:
+                old_objects = datapoint.images[img_id].objects
+                object_old_to_new_map = {}
+                new_objects = []
+                for i, o in enumerate(old_objects):
+                    if i not in unused_ids:
+                        object_old_to_new_map[i] = len(new_objects)
+                        new_objects.append(o)
+
+                datapoint.images[img_id].objects = new_objects
+
+                # Remap the outputs of the find queries
+                affected_find_queries_ids = set()
+                object_old_to_new_map_per_query = {}
+                for fid, find in enumerate(datapoint.find_queries):
+                    if find.image_id == img_id:
+                        old_object_ids_output = find.object_ids_output
+                        object_old_to_new_map_per_query[fid] = {}
+                        find.object_ids_output = []
+                        for oid, old_obj_id in enumerate(old_object_ids_output):
+                            if old_obj_id not in unused_ids:
+                                new_obj_id = object_old_to_new_map[old_obj_id]
+                                find.object_ids_output.append(new_obj_id)
+                                object_old_to_new_map_per_query[fid][oid] = (
+                                    len(find.object_ids_output) - 1
+                                )
+                        affected_find_queries_ids.add(fid)
+
+        # finally remove unused images
+        all_imgs_to_keep = set()
+        for f_q in datapoint.find_queries:
+            all_imgs_to_keep.add(f_q.image_id)
+
+        old_img_id_to_new_img_id = {}
+        new_images = []
+        for img_id, img in enumerate(datapoint.images):
+            if img_id in all_imgs_to_keep:
+                old_img_id_to_new_img_id[img_id] = len(new_images)
+                new_images.append(img)
+        datapoint.images = new_images
+
+        for f_q in datapoint.find_queries:
+            f_q.image_id = old_img_id_to_new_img_id[f_q.image_id]
+
+        return datapoint
+
+
+class AddPrefixSuffixToFindText:
+    """
+    Add prefix or suffix strings to find query text on the fly.
+
+    If `condition_on_text` is True, the prefix or suffix strings are only added
+    to those find query text in `condition_text_list` (case-insensitive).
+    """
+
+    def __init__(
+        self,
+        prefix: Optional[str] = None,
+        suffix: Optional[str] = None,
+        condition_on_text: bool = False,
+        condition_text_list: Optional[List[str]] = None,
+        enabled: bool = True,
+    ) -> None:
+        self.prefix = prefix
+        self.suffix = suffix
+        self.condition_on_text = condition_on_text
+        if self.condition_on_text:
+            assert condition_text_list is not None
+            self.condition_text_set = {s.lower().strip() for s in condition_text_list}
+        self.enabled = enabled
+        if self.enabled:
+            logging.info(
+                f"AddPrefixSuffixToFindText: prefix={prefix}, suffix={suffix}, "
+                f"condition_on_text={condition_on_text}, condition_text_list={condition_text_list}"
+            )
+
+    def __call__(self, datapoint, **kwargs):
+        if not self.enabled:
+            return datapoint
+
+        for find in datapoint.find_queries:
+            if find.query_text == "geometric":
+                # skip geometric find queries
+                continue
+            if (
+                self.condition_on_text
+                and find.query_text.lower().strip() not in self.condition_text_set
+            ):
+                # if condition_on_text is True, skip those queries not in condition_text_set
+                continue
+
+            # add prefix and/or suffix strings to the find query text
+            if self.prefix is not None:
+                find.query_text = self.prefix + find.query_text
+            if self.suffix is not None:
+                find.query_text = find.query_text + self.suffix
+
+        return datapoint
+
+
+class FilterCrowds(FilterDataPointQueries):
+    def identify_queries_to_filter(self, datapoint: Datapoint) -> None:
+        """
+        Compute set of query ids to keep, for both find and get queries
+        """
+        self.obj_ids_to_filter = set()
+        self.find_ids_to_filter = set()
+        # self.get_ids_to_filter = set()
+        for img_id, img in enumerate(datapoint.images):
+            for obj_id, obj in enumerate(img.objects):
+                if obj.is_crowd:
+                    self.obj_ids_to_filter.add((img_id, obj_id))
+
+
+class TextQueryToVisual:
+    """
+    Transform a test query to a visual query (with some proba), using any of the output targets as the prompt
+    """
+
+    def __init__(self, probability, keep_text_queries=False) -> None:
+        self.probability = probability
+        assert 0 <= probability <= 1
+        self.keep_text_queries = keep_text_queries
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        for find in datapoint.find_queries:
+            if find.input_bbox is not None or find.input_points is not None:
+                # skip geometric find queries
+                continue
+
+            if len(find.object_ids_output) == 0:
+                # Can't create a visual query, skip
+                continue
+
+            if find.query_processing_order > 0:
+                # Second stage query, can't use
+                continue
+
+            if random.random() > self.probability:
+                continue
+
+            selected_vq_id = random.choice(find.object_ids_output)
+            img_id = find.image_id
+
+            find.input_bbox = datapoint.images[img_id].objects[selected_vq_id].bbox
+            find.input_bbox_label = torch.ones(1, dtype=torch.bool)
+            if not self.keep_text_queries:
+                find.query_text = "visual"
+
+        return datapoint
+
+
+class RemoveInputBoxes:
+    """
+    Remove input boxes from find queries
+    """
+
+    def __init__(self) -> None:
+        pass
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        for find in datapoint.find_queries:
+            if find.input_bbox is None:
+                continue
+
+            if find.query_text == "geometric":
+                print("Warning: removing input box from geometric find query")
+
+            find.input_bbox = None
+        return datapoint
+
+
+class OverwriteTextQuery:
+    """
+    With some probability, overwrite the text query with a custom text
+    """
+
+    def __init__(self, target_text, probability=1.0) -> None:
+        self.probability = probability
+        self.target_text = target_text
+        assert 0 <= probability <= 1
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        for find in datapoint.find_queries:
+            if random.random() > self.probability:
+                continue
+
+            find.query_text = self.target_text
+
+        return datapoint
diff --git a/sam3/train/transforms/point_sampling.py b/sam3/train/transforms/point_sampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..e083fde416467fb73a1321b2d6734f591a567eab
--- /dev/null
+++ b/sam3/train/transforms/point_sampling.py
@@ -0,0 +1,345 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import cv2
+import numpy as np
+import torch
+from PIL import Image as PILImage
+from pycocotools import mask as mask_util
+
+from sam3.train.data.sam3_image_dataset import Datapoint
+from torchvision.ops import masks_to_boxes
+
+
+def sample_points_from_rle(rle, n_points, mode, box=None, normalize=True):
+    """
+    Sample random points from a mask provided in COCO RLE format. 'mode'
+    'mode' is in ["centered", "random_mask", "random_box"]
+      "centered": points are sampled farthest from the mask edges and each other
+      "random_mask": points are sampled uniformly from the mask
+      "random_box": points are sampled uniformly from the annotation's box
+    'box' must be provided if 'mode' is "random_box".
+    If 'normalize' is true, points are in [0,1], relative to mask h,w.
+    """
+    mask = np.ascontiguousarray(mask_util.decode(rle))
+    points = sample_points_from_mask(mask, n_points, mode, box)
+
+    if normalize:
+        h, w = mask.shape
+        norm = np.array([w, h, 1.0])[None, :]
+        points = points / norm
+
+    return points
+
+
+def sample_points_from_mask(mask, n_points, mode, box=None):
+    if mode == "centered":
+        points = center_positive_sample(mask, n_points)
+    elif mode == "random_mask":
+        points = uniform_positive_sample(mask, n_points)
+    elif mode == "random_box":
+        assert box is not None, "'random_box' mode requires a provided box."
+        points = uniform_sample_from_box(mask, box, n_points)
+    else:
+        raise ValueError(f"Unknown point sampling mode {mode}.")
+    return points
+
+
+def uniform_positive_sample(mask, n_points):
+    """
+    Samples positive points uniformly from the mask. Only integer pixel
+    values are sampled.
+    """
+    # Sampling directly from the uncompressed RLE would be faster but is
+    # likely unnecessary.
+    mask_points = np.stack(np.nonzero(mask), axis=0).transpose(1, 0)
+    assert len(mask_points) > 0, "Can't sample positive points from an empty mask."
+    selected_idxs = np.random.randint(low=0, high=len(mask_points), size=n_points)
+    selected_points = mask_points[selected_idxs]
+
+    selected_points = selected_points[:, ::-1]  # (y, x) -> (x, y)
+    labels = np.ones((len(selected_points), 1))
+    selected_points = np.concatenate([selected_points, labels], axis=1)
+
+    return selected_points
+
+
+def center_positive_sample(mask, n_points):
+    """
+    Samples points farthest from mask edges (by distance transform)
+    and subsequent points also farthest from each other. Each new point
+    sampled is treated as an edge for future points. Edges of the image are
+    treated as edges of the mask.
+    """
+
+    # Pad mask by one pixel on each end to assure distance transform
+    # avoids edges
+    padded_mask = np.pad(mask, 1)
+
+    points = []
+    for _ in range(n_points):
+        assert np.max(mask) > 0, "Can't sample positive points from an empty mask."
+        dist = cv2.distanceTransform(padded_mask, cv2.DIST_L2, 0)
+        point = np.unravel_index(dist.argmax(), dist.shape)
+        # Mark selected point as background so next point avoids it
+        padded_mask[point[0], point[1]] = 0
+        points.append(point[::-1])  # (y, x) -> (x, y)
+
+    points = np.stack(points, axis=0)
+    points = points - 1  # Subtract left/top padding of 1
+    labels = np.ones((len(points), 1))
+    points = np.concatenate([points, labels], axis=1)
+
+    return points
+
+
+def uniform_sample_from_box(mask, box, n_points):
+    """
+    Sample points uniformly from the provided box. The points' labels
+    are determined by the provided mask. Does not guarantee a positive
+    point is sampled. The box is assumed unnormalized in XYXY format.
+    Points are sampled at integer values.
+    """
+
+    # Since lower/right edges are exclusive, ceil can be applied to all edges
+    int_box = np.ceil(box)
+
+    x = np.random.randint(low=int_box[0], high=int_box[2], size=n_points)
+    y = np.random.randint(low=int_box[1], high=int_box[3], size=n_points)
+    labels = mask[y, x]
+    points = np.stack([x, y, labels], axis=1)
+
+    return points
+
+
+def rescale_box_xyxy(box, factor, imsize=None):
+    """
+    Rescale a box providing in unnormalized XYXY format, fixing the center.
+    If imsize is provided, clamp to the image.
+    """
+    cx, cy = (box[0] + box[2]) / 2, (box[1] + box[3]) / 2
+    w, h = box[2] - box[0], box[3] - box[1]
+
+    new_w, new_h = factor * w, factor * h
+
+    new_x0, new_y0 = cx - new_w / 2, cy - new_h / 2
+    new_x1, new_y1 = cx + new_w / 2, cy + new_h / 2
+
+    if imsize is not None:
+        new_x0 = max(min(new_x0, imsize[1]), 0)
+        new_x1 = max(min(new_x1, imsize[1]), 0)
+        new_y0 = max(min(new_y0, imsize[0]), 0)
+        new_y1 = max(min(new_y1, imsize[0]), 0)
+
+    return [new_x0, new_y0, new_x1, new_y1]
+
+
+def noise_box(box, im_size, box_noise_std, box_noise_max, min_box_area):
+    if box_noise_std <= 0.0:
+        return box
+    noise = box_noise_std * torch.randn(size=(4,))
+    w, h = box[2] - box[0], box[3] - box[1]
+    scale_factor = torch.tensor([w, h, w, h])
+    noise = noise * scale_factor
+    if box_noise_max is not None:
+        noise = torch.clamp(noise, -box_noise_max, box_noise_max)
+    input_box = box + noise
+    # Clamp to maximum image size
+    img_clamp = torch.tensor([im_size[1], im_size[0], im_size[1], im_size[0]])
+    input_box = torch.maximum(input_box, torch.zeros_like(input_box))
+    input_box = torch.minimum(input_box, img_clamp)
+    if (input_box[2] - input_box[0]) * (input_box[3] - input_box[1]) <= min_box_area:
+        return box
+
+    return input_box
+
+
+class RandomGeometricInputsAPI:
+    """
+    For geometric queries, replaces the input box or points with a random
+    one sampled from the GT mask. Segments must be provided for objects
+    that are targets of geometric queries, and must be binary masks. Existing
+    point and box queries in the datapoint will be ignored and completely replaced.
+    Will sample points and boxes in XYXY format in absolute pixel space.
+
+    Geometry queries are currently determined by taking any query whose
+    query text is a set value.
+
+    Args:
+      num_points (int or (int, int)): how many points to sample. If a tuple,
+        sample a random number of points uniformly over the inclusive range.
+      box_chance (float): fraction of time a box is sampled. A box will replace
+        one sampled point.
+      box_noise_std (float): if greater than 0, add noise to the sampled boxes
+        with this std. Noise is relative to the length of the box side.
+      box_noise_max (int): if not none, truncate any box noise larger than this
+        in terms of absolute pixels.
+      resample_box_from_mask (bool): if True, any sampled box will be determined
+        by finding the extrema of the provided mask. If False, the bbox provided
+        in the target object will be used.
+      point_sample_mode (str): In ["centered", "random_mask", "random_box"],
+        controlling how points are sampled:
+          "centered": points are sampled farthest from the mask edges and each other
+          "random_mask": points are sampled uniformly from the mask
+          "random_box": points are sampled uniformly from the annotation's box
+        Note that "centered" may be too slow for on-line generation.
+      geometric_query_str (str): what string in query_text indicates a
+        geometry query.
+      minimum_box_area (float): sampled boxes with area this size or smaller after
+        noising will use the original box instead. It is the input's responsibility
+        to avoid original boxes that violate necessary area bounds.
+      concat_points (bool): if True, any sampled points will be added to existing
+        ones instead of replacing them.
+
+    """
+
+    def __init__(
+        self,
+        num_points,
+        box_chance,
+        box_noise_std=0.0,
+        box_noise_max=None,
+        minimum_box_area=0.0,
+        resample_box_from_mask=False,
+        point_sample_mode="random_mask",
+        sample_box_scale_factor=1.0,
+        geometric_query_str="geometric",
+        concat_points=False,
+    ):
+        self.num_points = num_points
+        if not isinstance(self.num_points, int):
+            # Convert from inclusive range to exclusive range expected by torch
+            self.num_points[1] += 1
+            self.num_points = tuple(self.num_points)
+        self.box_chance = box_chance
+        self.box_noise_std = box_noise_std
+        self.box_noise_max = box_noise_max
+        self.minimum_box_area = minimum_box_area
+        self.resample_box_from_mask = resample_box_from_mask
+        self.point_sample_mode = point_sample_mode
+        assert point_sample_mode in [
+            "centered",
+            "random_mask",
+            "random_box",
+        ], "Unknown point sample mode."
+        self.geometric_query_str = geometric_query_str
+        self.concat_points = concat_points
+        self.sample_box_scale_factor = sample_box_scale_factor
+
+    def _sample_num_points_and_if_box(self):
+        if isinstance(self.num_points, tuple):
+            n_points = torch.randint(
+                low=self.num_points[0], high=self.num_points[1], size=(1,)
+            ).item()
+        else:
+            n_points = self.num_points
+        if self.box_chance > 0.0:
+            use_box = torch.rand(size=(1,)).item() < self.box_chance
+            n_points -= int(use_box)  # box stands in for one point
+        else:
+            use_box = False
+        return n_points, use_box
+
+    def _get_original_box(self, target_object):
+        if not self.resample_box_from_mask:
+            return target_object.bbox
+        mask = target_object.segment
+        return masks_to_boxes(mask[None, :, :])[0]
+
+    def _get_target_object(self, datapoint, query):
+        img = datapoint.images[query.image_id]
+        targets = query.object_ids_output
+        assert (
+            len(targets) == 1
+        ), "Geometric queries only support a single target object."
+        target_idx = targets[0]
+        return img.objects[target_idx]
+
+    def __call__(self, datapoint, **kwargs):
+        for query in datapoint.find_queries:
+            if query.query_text != self.geometric_query_str:
+                continue
+
+            target_object = self._get_target_object(datapoint, query)
+            n_points, use_box = self._sample_num_points_and_if_box()
+            box = self._get_original_box(target_object)
+
+            mask = target_object.segment
+            if n_points > 0:
+                # FIXME: The conversion to numpy and back to reuse code
+                # is awkward, but this is all in the dataloader worker anyway
+                # on CPU and so I don't think it should matter.
+                if self.sample_box_scale_factor != 1.0:
+                    sample_box = rescale_box_xyxy(
+                        box.numpy(), self.sample_box_scale_factor, mask.shape
+                    )
+                else:
+                    sample_box = box.numpy()
+                input_points = sample_points_from_mask(
+                    mask.numpy(),
+                    n_points,
+                    self.point_sample_mode,
+                    sample_box,
+                )
+                input_points = torch.as_tensor(input_points)
+                input_points = input_points[None, :, :]
+                if self.concat_points and query.input_points is not None:
+                    input_points = torch.cat([query.input_points, input_points], dim=1)
+            else:
+                input_points = query.input_points if self.concat_points else None
+
+            if use_box:
+                w, h = datapoint.images[query.image_id].size
+                input_box = noise_box(
+                    box,
+                    (h, w),
+                    box_noise_std=self.box_noise_std,
+                    box_noise_max=self.box_noise_max,
+                    min_box_area=self.minimum_box_area,
+                )
+                input_box = input_box[None, :]
+            else:
+                input_box = query.input_bbox if self.concat_points else None
+
+            query.input_points = input_points
+            query.input_bbox = input_box
+
+        return datapoint
+
+
+class RandomizeInputBbox:
+    """
+    Simplified version of the geometric transform that only deals with input boxes
+    """
+
+    def __init__(
+        self,
+        box_noise_std=0.0,
+        box_noise_max=None,
+        minimum_box_area=0.0,
+    ):
+        self.box_noise_std = box_noise_std
+        self.box_noise_max = box_noise_max
+        self.minimum_box_area = minimum_box_area
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        for query in datapoint.find_queries:
+            if query.input_bbox is None:
+                continue
+
+            img = datapoint.images[query.image_id].data
+            if isinstance(img, PILImage.Image):
+                w, h = img.size
+            else:
+                assert isinstance(img, torch.Tensor)
+                h, w = img.shape[-2:]
+
+            for box_id in range(query.input_bbox.shape[0]):
+                query.input_bbox[box_id, :] = noise_box(
+                    query.input_bbox[box_id, :].view(4),
+                    (h, w),
+                    box_noise_std=self.box_noise_std,
+                    box_noise_max=self.box_noise_max,
+                    min_box_area=self.minimum_box_area,
+                ).view(1, 4)
+
+        return datapoint
diff --git a/sam3/train/transforms/segmentation.py b/sam3/train/transforms/segmentation.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e97db0f36b46ac99ad750c3bd4b8e55810f8f15
--- /dev/null
+++ b/sam3/train/transforms/segmentation.py
@@ -0,0 +1,157 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import numpy as np
+import pycocotools.mask as mask_utils
+import torch
+
+import torchvision.transforms.functional as F
+from PIL import Image as PILImage
+
+from sam3.model.box_ops import masks_to_boxes
+
+from sam3.train.data.sam3_image_dataset import Datapoint
+
+
+class InstanceToSemantic(object):
+    """Convert instance segmentation to semantic segmentation."""
+
+    def __init__(self, delete_instance=True, use_rle=False):
+        self.delete_instance = delete_instance
+        self.use_rle = use_rle
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        for fquery in datapoint.find_queries:
+            h, w = datapoint.images[fquery.image_id].size
+
+            if self.use_rle:
+                all_segs = [
+                    datapoint.images[fquery.image_id].objects[obj_id].segment
+                    for obj_id in fquery.object_ids_output
+                ]
+                if len(all_segs) > 0:
+                    # we need to double check that all rles are the correct size
+                    # Otherwise cocotools will fail silently to an empty [0,0] mask
+                    for seg in all_segs:
+                        assert seg["size"] == all_segs[0]["size"], (
+                            "Instance segments have inconsistent sizes. "
+                            f"Found sizes {seg['size']} and {all_segs[0]['size']}"
+                        )
+                    fquery.semantic_target = mask_utils.merge(all_segs)
+                else:
+                    # There is no good way to create an empty RLE of the correct size
+                    # We resort to converting an empty box to RLE
+                    fquery.semantic_target = mask_utils.frPyObjects(
+                        np.array([[0, 0, 0, 0]], dtype=np.float64), h, w
+                    )[0]
+
+            else:
+                # `semantic_target` is uint8 and remains uint8 throughout the transforms
+                # (it contains binary 0 and 1 values just like `segment` for each object)
+                fquery.semantic_target = torch.zeros((h, w), dtype=torch.uint8)
+                for obj_id in fquery.object_ids_output:
+                    segment = datapoint.images[fquery.image_id].objects[obj_id].segment
+                    if segment is not None:
+                        assert (
+                            isinstance(segment, torch.Tensor)
+                            and segment.dtype == torch.uint8
+                        )
+                        fquery.semantic_target |= segment
+
+        if self.delete_instance:
+            for img in datapoint.images:
+                for obj in img.objects:
+                    del obj.segment
+                    obj.segment = None
+
+        return datapoint
+
+
+class RecomputeBoxesFromMasks:
+    """Recompute bounding boxes from masks."""
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        for img in datapoint.images:
+            for obj in img.objects:
+                # Note: if the mask is empty, the bounding box will be undefined
+                # The empty targets should be subsequently filtered
+                obj.bbox = masks_to_boxes(obj.segment)
+                obj.area = obj.segment.sum().item()
+
+        return datapoint
+
+
+class DecodeRle:
+    """This transform decodes RLEs into binary segments.
+    Implementing it as a transforms allows lazy loading. Some transforms (eg query filters)
+    may be deleting masks, so decoding them from the beginning is wasteful.
+
+    This transforms needs to be called before any kind of geometric manipulation
+    """
+
+    def __call__(self, datapoint: Datapoint, **kwargs):
+        imgId2size = {}
+        warning_shown = False
+        for imgId, img in enumerate(datapoint.images):
+            if isinstance(img.data, PILImage.Image):
+                img_w, img_h = img.data.size
+            elif isinstance(img.data, torch.Tensor):
+                img_w, img_h = img.data.shape[-2:]
+            else:
+                raise RuntimeError(f"Unexpected image type {type(img.data)}")
+
+            imgId2size[imgId] = (img_h, img_w)
+
+            for obj in img.objects:
+                if obj.segment is not None and not isinstance(
+                    obj.segment, torch.Tensor
+                ):
+                    if mask_utils.area(obj.segment) == 0:
+                        print("Warning, empty mask found, approximating from box")
+                        obj.segment = torch.zeros(img_h, img_w, dtype=torch.uint8)
+                        x1, y1, x2, y2 = obj.bbox.int().tolist()
+                        obj.segment[y1 : max(y2, y1 + 1), x1 : max(x1 + 1, x2)] = 1
+                    else:
+                        obj.segment = mask_utils.decode(obj.segment)
+                        # segment is uint8 and remains uint8 throughout the transforms
+                        obj.segment = torch.tensor(obj.segment).to(torch.uint8)
+
+                    if list(obj.segment.shape) != [img_h, img_w]:
+                        # Should not happen often, but adding for security
+                        if not warning_shown:
+                            print(
+                                f"Warning expected instance segmentation size to be {[img_h, img_w]} but found {list(obj.segment.shape)}"
+                            )
+                            # Printing only once per datapoint to avoid spam
+                            warning_shown = True
+
+                        obj.segment = F.resize(
+                            obj.segment[None], (img_h, img_w)
+                        ).squeeze(0)
+
+                    assert list(obj.segment.shape) == [img_h, img_w]
+
+        warning_shown = False
+        for query in datapoint.find_queries:
+            if query.semantic_target is not None and not isinstance(
+                query.semantic_target, torch.Tensor
+            ):
+                query.semantic_target = mask_utils.decode(query.semantic_target)
+                # segment is uint8 and remains uint8 throughout the transforms
+                query.semantic_target = torch.tensor(query.semantic_target).to(
+                    torch.uint8
+                )
+                if tuple(query.semantic_target.shape) != imgId2size[query.image_id]:
+                    if not warning_shown:
+                        print(
+                            f"Warning expected semantic segmentation size to be {imgId2size[query.image_id]} but found {tuple(query.semantic_target.shape)}"
+                        )
+                        # Printing only once per datapoint to avoid spam
+                        warning_shown = True
+
+                    query.semantic_target = F.resize(
+                        query.semantic_target[None], imgId2size[query.image_id]
+                    ).squeeze(0)
+
+                assert tuple(query.semantic_target.shape) == imgId2size[query.image_id]
+
+        return datapoint
diff --git a/sam3/train/utils/__init__.py b/sam3/train/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..46d37d2aaef52ec7de01999516a2b8c3e1fa4986
--- /dev/null
+++ b/sam3/train/utils/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
diff --git a/sam3/train/utils/checkpoint_utils.py b/sam3/train/utils/checkpoint_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f2736adfd593d50afb91afd57e391423979d173
--- /dev/null
+++ b/sam3/train/utils/checkpoint_utils.py
@@ -0,0 +1,358 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+
+import contextlib
+import fnmatch
+import logging
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Union,
+)
+
+import numpy as np
+import torch
+import torch.nn as nn
+from iopath.common.file_io import g_pathmgr
+from torch.jit._script import RecursiveScriptModule
+
+
+def unix_pattern_to_parameter_names(
+    constraints: List[str], all_parameter_names: Sequence[str]
+) -> Union[None, Set[str]]:
+    """
+    Go through the list of parameter names and select those that match
+    any of the provided constraints
+    """
+    parameter_names = []
+    for param_name in constraints:
+        matching_parameters = set(fnmatch.filter(all_parameter_names, param_name))
+        assert (
+            len(matching_parameters) > 0
+        ), f"param_names {param_name} don't match any param in the given names."
+        parameter_names.append(matching_parameters)
+    return set.union(*parameter_names)
+
+
+def filter_params_matching_unix_pattern(
+    patterns: List[str], state_dict: Dict[str, torch.Tensor]
+) -> Dict[str, torch.Tensor]:
+    """
+    Remove from the state dictionary the parameters matching the provided unix patterns
+
+    Args:
+        patterns: the list of unix patterns to exclude
+        state_dict: the dictionary to filter
+
+    Returns:
+        A new state dictionary
+    """
+    if len(patterns) == 0:
+        return {}
+
+    all_keys = list(state_dict.keys())
+    included_keys = unix_pattern_to_parameter_names(patterns, all_keys)
+    return {k: state_dict[k] for k in included_keys}
+
+
+def exclude_params_matching_unix_pattern(
+    patterns: List[str], state_dict: Dict[str, torch.Tensor]
+) -> Dict[str, torch.Tensor]:
+    """
+    Remove from the state dictionary the parameters matching the provided unix patterns
+
+    Args:
+        patterns: the list of unix patterns to exclude
+        state_dict: the dictionary to filter
+
+    Returns:
+        A new state dictionary
+    """
+    if len(patterns) == 0:
+        return state_dict
+
+    all_keys = list(state_dict.keys())
+    excluded_keys = unix_pattern_to_parameter_names(patterns, all_keys)
+    return {k: v for k, v in state_dict.items() if k not in excluded_keys}
+
+
+def _get_state_dict_summary(state_dict: Dict[str, torch.Tensor]):
+    keys = []
+    trace = []
+    for k, v in state_dict.items():
+        keys.append(k)
+        trace.append(v.sum().item())
+    trace = np.array(trace)[np.argsort(keys)]
+    return trace
+
+
+def assert_skipped_parameters_are_frozen(model: nn.Module, patterns: List[str]):
+    """
+    Verifies that all the parameters matching the provided patterns
+    are frozen - this acts as a safeguard when ignoring parameter
+    when saving checkpoints - if the parameters are in fact trainable
+    """
+    if not patterns:
+        return
+
+    frozen_state_dict = filter_params_matching_unix_pattern(
+        patterns=patterns, state_dict=model.state_dict()
+    )
+    non_frozen_keys = {
+        n
+        for n, p in model.named_parameters()
+        if n in frozen_state_dict and p.requires_grad
+    }
+    if non_frozen_keys:
+        raise ValueError(
+            f"Parameters excluded with `skip_saving_parameters` should be frozen: {non_frozen_keys}"
+        )
+
+
+@contextlib.contextmanager
+def with_check_parameter_frozen(
+    model: nn.Module, patterns: List[str], disabled: bool = True
+):
+    """
+    Context manager that inspects a model surrounding a piece of code
+    and verifies if the model has been updated by this piece of code
+
+    The function will raise an exception if the model has been updated
+    on at least one of the parameter that matches one of the pattern
+
+    Args:
+        model: the model that might have been updated
+        patterns: for the parameters we want to observe
+        allowed:
+    """
+    if not patterns or disabled:
+        yield
+        return
+
+    frozen_state_dict = filter_params_matching_unix_pattern(
+        patterns=patterns, state_dict=model.state_dict()
+    )
+    summary_before = _get_state_dict_summary(frozen_state_dict)
+
+    yield
+
+    frozen_state_dict = filter_params_matching_unix_pattern(
+        patterns=patterns, state_dict=model.state_dict()
+    )
+    summary_after = _get_state_dict_summary(frozen_state_dict)
+
+    if not np.allclose(summary_before, summary_after, atol=1e-6):
+        raise ValueError(
+            f"""
+            The `model_weight_initializer` has initialized parameters frozen with `skip_saving_parameters`.
+            You can resolve this error by either initializing those parameters from within the model definition
+            or using the flag `trainer.checkpoint.initialize_after_preemption` to True.
+        """
+        )
+
+
+class CkptExcludeKernel:
+    """
+    Removes the keys from the given model state_dict that match the key_pattern.
+
+    Args:
+        key_pattern: Patterns used to select the keys in the state_dict
+            that are eligible for this kernel.
+    """
+
+    def __init__(self, key_pattern: List[str]):
+        self.key_pattern = key_pattern
+
+    def __call__(self, state_dict: Dict):
+        """
+        Args:
+            state_dict: A dictionary representing the given checkpoint's state dict.
+        """
+        if len(self.key_pattern) == 0:
+            return state_dict
+        exclude_keys = unix_pattern_to_parameter_names(
+            self.key_pattern, state_dict.keys()
+        )
+        return {k: v for k, v in state_dict.items() if k not in exclude_keys}
+
+
+def load_checkpoint(
+    path_list: List[str],
+    pick_recursive_keys: Optional[List[str]] = None,
+    map_location: str = "cpu",
+) -> Any:
+    """
+    Loads a checkpoint from the specified path.
+
+    Args:
+        path_list: A list of paths which contain the checkpoint. Each element
+            is tried (in order) until a file that exists is found. That file is then
+            used to read the checkpoint.
+        pick_recursive_keys: Picks sub dicts from the loaded checkpoint if not None.
+            For pick_recursive_keys = ["a", "b"], will return checkpoint_dict["a"]["b"]
+        map_location (str): a function, torch.device, string or a dict specifying how to
+            remap storage locations
+
+    Returns: Model with the matchin pre-trained weights loaded.
+    """
+    path_exists = False
+    for path in path_list:
+        if g_pathmgr.exists(path):
+            path_exists = True
+            break
+
+    if not path_exists:
+        raise ValueError(f"No path exists in {path_list}")
+
+    with g_pathmgr.open(path, "rb") as f:
+        checkpoint = torch.load(f, map_location=map_location)
+
+    logging.info(f"Loaded checkpoint from {path}")
+    if pick_recursive_keys is not None:
+        for key in pick_recursive_keys:
+            checkpoint = checkpoint[key]
+    return checkpoint
+
+
+def get_state_dict(checkpoint, ckpt_state_dict_keys):
+    if isinstance(checkpoint, RecursiveScriptModule):
+        # This is a torchscript JIT model
+        return checkpoint.state_dict()
+    pre_train_dict = checkpoint
+    for i, key in enumerate(ckpt_state_dict_keys):
+        if (isinstance(pre_train_dict, Mapping) and key not in pre_train_dict) or (
+            isinstance(pre_train_dict, Sequence) and key >= len(pre_train_dict)
+        ):
+            key_str = (
+                '["' + '"]["'.join(list(map(ckpt_state_dict_keys[:i], str))) + '"]'
+            )
+            raise KeyError(
+                f"'{key}' not found in checkpoint{key_str} "
+                f"with keys: {pre_train_dict.keys()}"
+            )
+        pre_train_dict = pre_train_dict[key]
+    return pre_train_dict
+
+
+def load_checkpoint_and_apply_kernels(
+    checkpoint_path: str,
+    checkpoint_kernels: List[Callable] = None,
+    ckpt_state_dict_keys: Tuple[str] = ("state_dict",),
+    map_location: str = "cpu",
+) -> nn.Module:
+    """
+    Performs checkpoint loading with a variety of pre-processing kernel applied in
+    sequence.
+
+    Args:
+        checkpoint_path (str): Path to the checkpoint.
+        checkpoint_kernels List(Callable): A list of checkpoint processing kernels
+            to apply in the specified order. Supported kernels include `CkptIncludeKernel`,
+            `CkptExcludeKernel`, etc. These kernels are applied in the
+            given order.
+        ckpt_state_dict_keys (str): Keys containing the model state dict.
+        map_location (str): a function, torch.device, string or a dict specifying how to
+            remap storage locations
+
+    Returns: Model with the matchin pre-trained weights loaded.
+    """
+    assert g_pathmgr.exists(checkpoint_path), "Checkpoint '{}' not found".format(
+        checkpoint_path
+    )
+
+    # Load the checkpoint on CPU to avoid GPU mem spike.
+    with g_pathmgr.open(checkpoint_path, "rb") as f:
+        checkpoint = torch.load(f, map_location=map_location)
+
+    pre_train_dict = get_state_dict(checkpoint, ckpt_state_dict_keys)
+
+    # Not logging into info etc since it's a huge log
+    logging.debug(
+        "Loaded Checkpoint State Dict pre-kernel application: %s"
+        % str(", ".join(list(pre_train_dict.keys())))
+    )
+    # Apply kernels
+    if checkpoint_kernels is not None:
+        for f in checkpoint_kernels:
+            pre_train_dict = f(state_dict=pre_train_dict)
+
+    logging.debug(
+        "Loaded Checkpoint State Dict Post-kernel application %s"
+        % str(", ".join(list(pre_train_dict.keys())))
+    )
+
+    return pre_train_dict
+
+
+def check_load_state_dict_errors(
+    missing_keys,
+    unexpected_keys,
+    strict: bool,
+    ignore_missing_keys: List[str] = None,
+    ignore_unexpected_keys: List[str] = None,
+):
+    if ignore_missing_keys is not None and len(ignore_missing_keys) > 0:
+        ignored_keys = unix_pattern_to_parameter_names(
+            ignore_missing_keys, missing_keys
+        )
+        missing_keys = [key for key in missing_keys if key not in ignored_keys]
+
+    if ignore_unexpected_keys is not None and len(ignore_unexpected_keys) > 0:
+        ignored_unexpected_keys = unix_pattern_to_parameter_names(
+            ignore_unexpected_keys, unexpected_keys
+        )
+        unexpected_keys = [
+            key for key in unexpected_keys if key not in ignored_unexpected_keys
+        ]
+
+    err = "State key mismatch."
+    if unexpected_keys:
+        err += f" Unexpected keys: {unexpected_keys}."
+    if missing_keys:
+        err += f" Missing keys: {missing_keys}."
+
+    if unexpected_keys or missing_keys:
+        logging.warning(err)
+        if unexpected_keys or strict:
+            raise KeyError(err)
+
+
+def load_state_dict_into_model(
+    state_dict: Dict,
+    model: nn.Module,
+    strict: bool = True,
+    ignore_missing_keys: List[str] = None,
+    ignore_unexpected_keys: List[str] = None,
+    checkpoint_kernels: List[Callable] = None,
+):
+    """
+    Loads a state dict into the given model.
+
+    Args:
+        state_dict: A dictionary containing the model's
+            state dict, or a subset if strict is False
+        model: Model to load the checkpoint weights into
+        strict: raise if the state_dict has missing state keys
+        ignore_missing_keys: unix pattern of keys to ignore
+    """
+    # Apply kernels
+    if checkpoint_kernels is not None:
+        for f in checkpoint_kernels:
+            state_dict = f(state_dict=state_dict)
+    missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
+
+    check_load_state_dict_errors(
+        missing_keys,
+        unexpected_keys,
+        strict=strict,
+        ignore_missing_keys=ignore_missing_keys,
+        ignore_unexpected_keys=ignore_unexpected_keys,
+    )
+    return model
diff --git a/sam3/train/utils/distributed.py b/sam3/train/utils/distributed.py
new file mode 100644
index 0000000000000000000000000000000000000000..3c87a91119c1fa1ebee78cf476c8db6c737053a6
--- /dev/null
+++ b/sam3/train/utils/distributed.py
@@ -0,0 +1,585 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+import datetime
+import functools
+import io
+import logging
+import os
+import random
+import tempfile
+import time
+from typing import Any, Callable, List, Tuple
+
+import torch
+import torch.autograd as autograd
+import torch.distributed as dist
+
+
+# Default to GPU 0
+_cuda_device_index: int = 0
+
+# Setting _cuda_device_index to -1 internally implies that we should use CPU
+_CPU_DEVICE_INDEX = -1
+_PRIMARY_RANK = 0
+
+
+@functools.lru_cache()
+def _get_global_gloo_group():
+    """
+    Return a process group based on gloo backend, containing all the ranks
+    The result is cached.
+    """
+
+    if dist.get_backend() == "nccl":
+        # Increase timeout from 1800 sec to 43200 sec (12 hr) to avoid some processes
+        # being much slower than others causing a timeout (which can happen in relation
+        # or LVIS class mAP evaluation).
+        timeout = 43200
+        return dist.new_group(
+            backend="gloo",
+            timeout=datetime.timedelta(seconds=timeout),
+        )
+
+    return dist.group.WORLD
+
+
+def is_main_process():
+    """Return true if the current process is the main one"""
+    return get_rank() == 0
+
+
+def all_gather_via_filesys(data, filesys_save_dir=None, gather_to_rank_0_only=False):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors), similar to
+    `all_gather` above, but using filesystem instead of collective ops.
+
+    If gather_to_rank_0_only is True, only rank 0 will load the gathered object list
+    (and other ranks will have an empty list).
+    """
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+
+    print("gathering via files")
+    cpu_group = _get_global_gloo_group()
+
+    # if unspecified, we will save to the current python file dir
+    if filesys_save_dir is not None:
+        save_dir = filesys_save_dir
+    elif "EXP_DIR" in os.environ:
+        save_dir = os.environ["EXP_DIR"]
+    else:
+        # try the same directory where the code is stored
+        save_dir = filesys_save_dir or os.path.dirname(__file__)
+    save_dir = os.path.join(save_dir, "all_gather_via_filesys")
+    if is_main_process():
+        os.makedirs(save_dir, exist_ok=True)
+
+    # use a timestamp and salt to distinguish different all_gather
+    timestamp = int(time.time()) if is_main_process() else 0
+    salt = random.randint(0, 2**31 - 1) if is_main_process() else 0
+    # broadcast the timestamp and salt across ranks
+    # (all-reduce will do the broadcasting since only rank 0 is non-zero)
+    timestamp_and_salt = torch.tensor([timestamp, salt], dtype=torch.long)
+    dist.all_reduce(timestamp_and_salt, group=cpu_group)
+    timestamp, salt = timestamp_and_salt.tolist()
+
+    # save the data to a file on the disk
+    rank_save = get_rank()
+    save_data_filename = f"data_to_gather_{timestamp}_{salt}_{rank_save}.pkl"
+    save_data_path = os.path.join(save_dir, save_data_filename)
+    assert not os.path.exists(save_data_path), f"{save_data_path} already exists"
+    torch.save(data, save_data_path)
+    dist.barrier(group=cpu_group)
+
+    # read the data from the files
+    data_list = []
+    if rank_save == 0 or not gather_to_rank_0_only:
+        for rank_load in range(world_size):
+            load_data_filename = f"data_to_gather_{timestamp}_{salt}_{rank_load}.pkl"
+            load_data_path = os.path.join(save_dir, load_data_filename)
+            assert os.path.exists(load_data_path), f"cannot read {save_data_path}"
+            data_list.append(torch.load(load_data_path, weights_only=False))
+    dist.barrier(group=cpu_group)
+
+    # delete the saved file
+    os.remove(save_data_path)
+    return data_list
+
+
+def all_gather(data, force_cpu=False, force_filesys=False, filesys_save_dir=None):
+    """
+    Run all_gather on arbitrary picklable data (not necessarily tensors)
+    Args:
+        data: any picklable object
+    Returns:
+        list[data]: list of data gathered from each rank
+    """
+
+    world_size = get_world_size()
+    if world_size == 1:
+        return [data]
+
+    if os.getenv("MDETR_FILESYS_REDUCE_RANK_0_ONLY") == "1":
+        return all_gather_via_filesys(
+            data, filesys_save_dir, gather_to_rank_0_only=True
+        )
+
+    if os.getenv("MDETR_FILESYS_REDUCE") == "1" or force_filesys:
+        return all_gather_via_filesys(data, filesys_save_dir)
+
+    cpu_group = None
+    if os.getenv("MDETR_CPU_REDUCE") == "1" or force_cpu:
+        cpu_group = _get_global_gloo_group()
+
+    buffer = io.BytesIO()
+    torch.save(data, buffer)
+    data_view = buffer.getbuffer()
+    device = "cuda" if cpu_group is None else "cpu"
+    tensor = torch.ByteTensor(data_view).to(device)
+
+    # obtain Tensor size of each rank
+    local_size = torch.tensor([tensor.numel()], device=device, dtype=torch.long)
+    size_list = [
+        torch.tensor([0], device=device, dtype=torch.long) for _ in range(world_size)
+    ]
+    if cpu_group is None:
+        dist.all_gather(size_list, local_size)
+    else:
+        print("gathering on cpu")
+        dist.all_gather(size_list, local_size, group=cpu_group)
+    size_list = [int(size.item()) for size in size_list]
+    max_size = max(size_list)
+    assert isinstance(local_size.item(), int)
+    local_size = int(local_size.item())
+
+    # receiving Tensor from all ranks
+    # we pad the tensor because torch all_gather does not support
+    # gathering tensors of different shapes
+    tensor_list = []
+    for _ in size_list:
+        tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device=device))
+    if local_size != max_size:
+        padding = torch.empty(
+            size=(max_size - local_size,), dtype=torch.uint8, device=device
+        )
+        tensor = torch.cat((tensor, padding), dim=0)
+    if cpu_group is None:
+        dist.all_gather(tensor_list, tensor)
+    else:
+        dist.all_gather(tensor_list, tensor, group=cpu_group)
+
+    data_list = []
+    for size, tensor in zip(size_list, tensor_list):
+        tensor = torch.split(tensor, [size, max_size - size], dim=0)[0]
+        buffer = io.BytesIO(tensor.cpu().numpy())
+        obj = torch.load(buffer, weights_only=False)
+        data_list.append(obj)
+
+    return data_list
+
+
+def convert_to_distributed_tensor(tensor: torch.Tensor) -> Tuple[torch.Tensor, str]:
+    """
+    For some backends, such as NCCL, communication only works if the
+    tensor is on the GPU. This helper function converts to the correct
+    device and returns the tensor + original device.
+    """
+    orig_device = "cpu" if not tensor.is_cuda else "gpu"
+    if (
+        torch.distributed.is_available()
+        and torch.distributed.get_backend() == torch.distributed.Backend.NCCL
+        and not tensor.is_cuda
+    ):
+        tensor = tensor.cuda()
+    return (tensor, orig_device)
+
+
+def convert_to_normal_tensor(tensor: torch.Tensor, orig_device: str) -> torch.Tensor:
+    """
+    For some backends, such as NCCL, communication only works if the
+    tensor is on the GPU. This converts the tensor back to original device.
+    """
+    if tensor.is_cuda and orig_device == "cpu":
+        tensor = tensor.cpu()
+    return tensor
+
+
+def is_distributed_training_run() -> bool:
+    return (
+        torch.distributed.is_available()
+        and torch.distributed.is_initialized()
+        and (torch.distributed.get_world_size() > 1)
+    )
+
+
+def is_primary() -> bool:
+    """
+    Returns True if this is rank 0 of a distributed training job OR if it is
+    a single trainer job. Otherwise False.
+    """
+    return get_rank() == _PRIMARY_RANK
+
+
+def all_reduce_mean(tensor: torch.Tensor) -> torch.Tensor:
+    """
+    Wrapper over torch.distributed.all_reduce for performing mean reduction
+    of tensor over all processes.
+    """
+    return all_reduce_op(
+        tensor,
+        torch.distributed.ReduceOp.SUM,
+        lambda t: t / torch.distributed.get_world_size(),
+    )
+
+
+def all_reduce_sum(tensor: torch.Tensor) -> torch.Tensor:
+    """
+    Wrapper over torch.distributed.all_reduce for performing sum
+    reduction of tensor over all processes in both distributed /
+    non-distributed scenarios.
+    """
+    return all_reduce_op(tensor, torch.distributed.ReduceOp.SUM)
+
+
+def all_reduce_min(tensor: torch.Tensor) -> torch.Tensor:
+    """
+    Wrapper over torch.distributed.all_reduce for performing min
+    reduction of tensor over all processes in both distributed /
+    non-distributed scenarios.
+    """
+    return all_reduce_op(tensor, torch.distributed.ReduceOp.MIN)
+
+
+def all_reduce_max(tensor: torch.Tensor) -> torch.Tensor:
+    """
+    Wrapper over torch.distributed.all_reduce for performing min
+    reduction of tensor over all processes in both distributed /
+    non-distributed scenarios.
+    """
+    return all_reduce_op(tensor, torch.distributed.ReduceOp.MAX)
+
+
+def all_reduce_op(
+    tensor: torch.Tensor,
+    op: torch.distributed.ReduceOp,
+    after_op_func: Callable[[torch.Tensor], torch.Tensor] = None,
+) -> torch.Tensor:
+    """
+    Wrapper over torch.distributed.all_reduce for performing
+    reduction of tensor over all processes in both distributed /
+    non-distributed scenarios.
+    """
+    if is_distributed_training_run():
+        tensor, orig_device = convert_to_distributed_tensor(tensor)
+        torch.distributed.all_reduce(tensor, op)
+        if after_op_func is not None:
+            tensor = after_op_func(tensor)
+        tensor = convert_to_normal_tensor(tensor, orig_device)
+    return tensor
+
+
+def gather_tensors_from_all(tensor: torch.Tensor) -> List[torch.Tensor]:
+    """
+    Wrapper over torch.distributed.all_gather for performing
+    'gather' of 'tensor' over all processes in both distributed /
+    non-distributed scenarios.
+    """
+    if tensor.ndim == 0:
+        # 0 dim tensors cannot be gathered. so unsqueeze
+        tensor = tensor.unsqueeze(0)
+
+    if is_distributed_training_run():
+        tensor, orig_device = convert_to_distributed_tensor(tensor)
+        gathered_tensors = [
+            torch.zeros_like(tensor) for _ in range(torch.distributed.get_world_size())
+        ]
+        torch.distributed.all_gather(gathered_tensors, tensor)
+        gathered_tensors = [
+            convert_to_normal_tensor(_tensor, orig_device)
+            for _tensor in gathered_tensors
+        ]
+    else:
+        gathered_tensors = [tensor]
+
+    return gathered_tensors
+
+
+def gather_from_all(tensor: torch.Tensor) -> torch.Tensor:
+    gathered_tensors = gather_tensors_from_all(tensor)
+    gathered_tensor = torch.cat(gathered_tensors, 0)
+    return gathered_tensor
+
+
+def broadcast(tensor: torch.Tensor, src: int = 0) -> torch.Tensor:
+    """
+    Wrapper over torch.distributed.broadcast for broadcasting a tensor from the source
+    to all processes in both distributed / non-distributed scenarios.
+    """
+    if is_distributed_training_run():
+        tensor, orig_device = convert_to_distributed_tensor(tensor)
+        torch.distributed.broadcast(tensor, src)
+        tensor = convert_to_normal_tensor(tensor, orig_device)
+    return tensor
+
+
+def barrier() -> None:
+    """
+    Wrapper over torch.distributed.barrier, returns without waiting
+    if the distributed process group is not initialized instead of throwing error.
+    """
+    if not torch.distributed.is_available() or not torch.distributed.is_initialized():
+        return
+    torch.distributed.barrier()
+
+
+def get_world_size() -> int:
+    """
+    Simple wrapper for correctly getting worldsize in both distributed
+    / non-distributed settings
+    """
+    return (
+        torch.distributed.get_world_size()
+        if torch.distributed.is_available() and torch.distributed.is_initialized()
+        else 1
+    )
+
+
+def get_rank() -> int:
+    """
+    Simple wrapper for correctly getting rank in both distributed
+    / non-distributed settings
+    """
+    return (
+        torch.distributed.get_rank()
+        if torch.distributed.is_available() and torch.distributed.is_initialized()
+        else 0
+    )
+
+
+def get_primary_rank() -> int:
+    return _PRIMARY_RANK
+
+
+def set_cuda_device_index(idx: int) -> None:
+    global _cuda_device_index
+    _cuda_device_index = idx
+    torch.cuda.set_device(_cuda_device_index)
+
+
+def set_cpu_device() -> None:
+    global _cuda_device_index
+    _cuda_device_index = _CPU_DEVICE_INDEX
+
+
+def get_cuda_device_index() -> int:
+    return _cuda_device_index
+
+
+def init_distributed_data_parallel_model(
+    model: torch.nn.Module,
+    broadcast_buffers: bool = False,
+    find_unused_parameters: bool = True,
+    bucket_cap_mb: int = 25,
+) -> torch.nn.parallel.DistributedDataParallel:
+    global _cuda_device_index
+
+    if _cuda_device_index == _CPU_DEVICE_INDEX:
+        # CPU-only model, don't specify device
+        return torch.nn.parallel.DistributedDataParallel(
+            model,
+            broadcast_buffers=broadcast_buffers,
+            find_unused_parameters=find_unused_parameters,
+            bucket_cap_mb=bucket_cap_mb,
+        )
+    else:
+        # GPU model
+        return torch.nn.parallel.DistributedDataParallel(
+            model,
+            device_ids=[_cuda_device_index],
+            output_device=_cuda_device_index,
+            broadcast_buffers=broadcast_buffers,
+            find_unused_parameters=find_unused_parameters,
+            bucket_cap_mb=bucket_cap_mb,
+        )
+
+
+def broadcast_object(obj: Any, src: int = _PRIMARY_RANK, use_disk: bool = True) -> Any:
+    """Broadcast an object from a source to all workers.
+
+    Args:
+        obj: Object to broadcast, must be serializable
+        src: Source rank for broadcast (default is primary)
+        use_disk: If enabled, removes redundant CPU memory copies by writing to
+            disk
+    """
+    # Either broadcast from primary to the fleet (default),
+    # or use the src setting as the original rank
+    if get_rank() == src:
+        # Emit data
+        buffer = io.BytesIO()
+        torch.save(obj, buffer)
+        data_view = buffer.getbuffer()
+        length_tensor = torch.LongTensor([len(data_view)])
+        length_tensor = broadcast(length_tensor, src=src)
+        data_tensor = torch.ByteTensor(data_view)
+        data_tensor = broadcast(data_tensor, src=src)
+    else:
+        # Fetch from the source
+        length_tensor = torch.LongTensor([0])
+        length_tensor = broadcast(length_tensor, src=src)
+        data_tensor = torch.empty([length_tensor.item()], dtype=torch.uint8)
+        data_tensor = broadcast(data_tensor, src=src)
+        if use_disk:
+            with tempfile.TemporaryFile("r+b") as f:
+                f.write(data_tensor.numpy())
+                # remove reference to the data tensor and hope that Python garbage
+                # collects it
+                del data_tensor
+                f.seek(0)
+                obj = torch.load(f, weights_only=False)
+        else:
+            buffer = io.BytesIO(data_tensor.numpy())
+            obj = torch.load(buffer, weights_only=False)
+    return obj
+
+
+def all_gather_tensor(tensor: torch.Tensor, world_size=None):
+    if world_size is None:
+        world_size = get_world_size()
+    # make contiguous because NCCL won't gather the tensor otherwise
+    assert tensor.is_contiguous(), f"{tensor.shape} is not contiguous!"
+    tensor, orig_device = convert_to_distributed_tensor(tensor)
+    tensor_all = [torch.ones_like(tensor) for _ in range(world_size)]
+    dist.all_gather(tensor_all, tensor, async_op=False)  # performance opt
+    tensor_all = [
+        convert_to_normal_tensor(tensor, orig_device) for tensor in tensor_all
+    ]
+    return tensor_all
+
+
+def all_gather_batch(tensors: List[torch.Tensor]):
+    """
+    Performs all_gather operation on the provided tensors.
+    """
+    # Queue the gathered tensors
+    world_size = get_world_size()
+    # There is no need for reduction in the single-proc case
+    if world_size == 1:
+        return tensors
+    tensor_list = []
+    output_tensor = []
+    for tensor in tensors:
+        tensor_all = all_gather_tensor(tensor, world_size)
+        tensor_list.append(tensor_all)
+
+    for tensor_all in tensor_list:
+        output_tensor.append(torch.cat(tensor_all, dim=0))
+    return output_tensor
+
+
+class GatherLayer(autograd.Function):
+    """
+    Gather tensors from all workers with support for backward propagation:
+    This implementation does not cut the gradients as torch.distributed.all_gather does.
+    """
+
+    @staticmethod
+    def forward(ctx, x):
+        output = [torch.zeros_like(x) for _ in range(dist.get_world_size())]
+        dist.all_gather(output, x)
+        return tuple(output)
+
+    @staticmethod
+    def backward(ctx, *grads):
+        all_gradients = torch.stack(grads)
+        dist.all_reduce(all_gradients)
+        return all_gradients[dist.get_rank()]
+
+
+def all_gather_batch_with_grad(tensors):
+    """
+    Performs all_gather operation on the provided tensors.
+    Graph remains connected for backward grad computation.
+    """
+    # Queue the gathered tensors
+    world_size = get_world_size()
+    # There is no need for reduction in the single-proc case
+    if world_size == 1:
+        return tensors
+    tensor_list = []
+    output_tensor = []
+
+    for tensor in tensors:
+        tensor_all = GatherLayer.apply(tensor)
+        tensor_list.append(tensor_all)
+
+    for tensor_all in tensor_list:
+        output_tensor.append(torch.cat(tensor_all, dim=0))
+    return output_tensor
+
+
+def unwrap_ddp_if_wrapped(model):
+    if isinstance(model, torch.nn.parallel.DistributedDataParallel):
+        return model.module
+    return model
+
+
+def create_new_process_group(group_size):
+    """
+    Creates process groups of a gives `group_size` and returns
+    process group that current GPU participates in.
+
+    `group_size` must divide the total number of GPUs (world_size).
+
+    Modified from
+    https://github.com/NVIDIA/apex/blob/4e1ae43f7f7ac69113ef426dd15f37123f0a2ed3/apex/parallel/__init__.py#L60
+
+    Args:
+        group_size (int): number of GPU's to collaborate for sync bn
+    """
+
+    assert group_size > 0
+
+    world_size = torch.distributed.get_world_size()
+    if world_size <= 8:
+        if group_size > world_size:
+            logging.warning(
+                f"Requested group size [{group_size}] > world size [{world_size}]. "
+                "Assuming local debug run and capping it to world size."
+            )
+            group_size = world_size
+    assert world_size >= group_size
+    assert world_size % group_size == 0
+
+    group = None
+    for group_num in range(world_size // group_size):
+        group_ids = range(group_num * group_size, (group_num + 1) * group_size)
+        cur_group = torch.distributed.new_group(ranks=group_ids)
+        if torch.distributed.get_rank() // group_size == group_num:
+            group = cur_group
+            # can not drop out and return here, every process must go through creation of all subgroups
+
+    assert group is not None
+    return group
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def gather_to_rank_0_via_filesys(data, filesys_save_dir=None):
+    """
+    Gather any picklable data to rank 0 via filesystem, using all_gather_via_filesys.
+    """
+    return all_gather_via_filesys(data, filesys_save_dir, gather_to_rank_0_only=True)
diff --git a/sam3/train/utils/logger.py b/sam3/train/utils/logger.py
new file mode 100644
index 0000000000000000000000000000000000000000..127f6c8c93d05be6a8654f39a0c428d0d911bf13
--- /dev/null
+++ b/sam3/train/utils/logger.py
@@ -0,0 +1,241 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import atexit
+import functools
+import logging
+import sys
+import uuid
+from typing import Any, Dict, Optional, Union
+
+from hydra.utils import instantiate
+
+from iopath.common.file_io import g_pathmgr
+from numpy import ndarray
+
+from sam3.train.utils.train_utils import get_machine_local_and_dist_rank, makedir
+from torch import Tensor
+from torch.utils.tensorboard import SummaryWriter
+
+Scalar = Union[Tensor, ndarray, int, float]
+
+
+def make_tensorboard_logger(log_dir: str, **writer_kwargs: Any):
+    makedir(log_dir)
+    summary_writer_method = SummaryWriter
+    return TensorBoardLogger(
+        path=log_dir, summary_writer_method=summary_writer_method, **writer_kwargs
+    )
+
+
+class TensorBoardWriterWrapper:
+    """
+    A wrapper around a SummaryWriter object.
+    """
+
+    def __init__(
+        self,
+        path: str,
+        *args: Any,
+        filename_suffix: str = None,
+        summary_writer_method: Any = SummaryWriter,
+        **kwargs: Any,
+    ) -> None:
+        """Create a new TensorBoard logger.
+        On construction, the logger creates a new events file that logs
+        will be written to.  If the environment variable `RANK` is defined,
+        logger will only log if RANK = 0.
+
+        NOTE: If using the logger with distributed training:
+        - This logger can call collective operations
+        - Logs will be written on rank 0 only
+        - Logger must be constructed synchronously *after* initializing distributed process group.
+
+        Args:
+            path (str): path to write logs to
+            *args, **kwargs: Extra arguments to pass to SummaryWriter
+        """
+        self._writer: Optional[SummaryWriter] = None
+        _, self._rank = get_machine_local_and_dist_rank()
+        self._path: str = path
+        if self._rank == 0:
+            logging.info(
+                f"TensorBoard SummaryWriter instantiated. Files will be stored in: {path}"
+            )
+            self._writer = summary_writer_method(
+                log_dir=path,
+                *args,
+                filename_suffix=filename_suffix or str(uuid.uuid4()),
+                **kwargs,
+            )
+        else:
+            logging.debug(
+                f"Not logging meters on this host because env RANK: {self._rank} != 0"
+            )
+        atexit.register(self.close)
+
+    @property
+    def writer(self) -> Optional[SummaryWriter]:
+        return self._writer
+
+    @property
+    def path(self) -> str:
+        return self._path
+
+    def flush(self) -> None:
+        """Writes pending logs to disk."""
+
+        if not self._writer:
+            return
+
+        self._writer.flush()
+
+    def close(self) -> None:
+        """Close writer, flushing pending logs to disk.
+        Logs cannot be written after `close` is called.
+        """
+
+        if not self._writer:
+            return
+
+        self._writer.close()
+        self._writer = None
+
+
+class TensorBoardLogger(TensorBoardWriterWrapper):
+    """
+    A simple logger for TensorBoard.
+    """
+
+    def log_dict(self, payload: Dict[str, Scalar], step: int) -> None:
+        """Add multiple scalar values to TensorBoard.
+
+        Args:
+            payload (dict): dictionary of tag name and scalar value
+            step (int, Optional): step value to record
+        """
+        if not self._writer:
+            return
+        for k, v in payload.items():
+            self.log(k, v, step)
+
+    def log(self, name: str, data: Scalar, step: int) -> None:
+        """Add scalar data to TensorBoard.
+
+        Args:
+            name (string): tag name used to group scalars
+            data (float/int/Tensor): scalar data to log
+            step (int, optional): step value to record
+        """
+        if not self._writer:
+            return
+        self._writer.add_scalar(name, data, global_step=step, new_style=True)
+
+    def log_hparams(
+        self, hparams: Dict[str, Scalar], meters: Dict[str, Scalar]
+    ) -> None:
+        """Add hyperparameter data to TensorBoard.
+
+        Args:
+            hparams (dict): dictionary of hyperparameter names and corresponding values
+            meters (dict): dictionary of name of meter and corersponding values
+        """
+        if not self._writer:
+            return
+        self._writer.add_hparams(hparams, meters)
+
+
+class Logger:
+    """
+    A logger class that can interface with multiple loggers. It now supports tensorboard only for simplicity, but you can extend it with your own logger.
+    """
+
+    def __init__(self, logging_conf):
+        # allow turning off TensorBoard with "should_log: false" in config
+        tb_config = logging_conf.tensorboard_writer
+        tb_should_log = tb_config and tb_config.pop("should_log", True)
+        self.tb_logger = instantiate(tb_config) if tb_should_log else None
+
+    def log_dict(self, payload: Dict[str, Scalar], step: int) -> None:
+        if self.tb_logger:
+            self.tb_logger.log_dict(payload, step)
+
+    def log(self, name: str, data: Scalar, step: int) -> None:
+        if self.tb_logger:
+            self.tb_logger.log(name, data, step)
+
+    def log_hparams(
+        self, hparams: Dict[str, Scalar], meters: Dict[str, Scalar]
+    ) -> None:
+        if self.tb_logger:
+            self.tb_logger.log_hparams(hparams, meters)
+
+
+# cache the opened file object, so that different calls to `setup_logger`
+# with the same file name can safely write to the same file.
+@functools.lru_cache(maxsize=None)
+def _cached_log_stream(filename):
+    # we tune the buffering value so that the logs are updated
+    # frequently.
+    log_buffer_kb = 10 * 1024  # 10KB
+    io = g_pathmgr.open(filename, mode="a", buffering=log_buffer_kb)
+    atexit.register(io.close)
+    return io
+
+
+def setup_logging(
+    name,
+    output_dir=None,
+    rank=0,
+    log_level_primary="INFO",
+    log_level_secondary="ERROR",
+):
+    """
+    Setup various logging streams: stdout and file handlers.
+    For file handlers, we only setup for the master gpu.
+    """
+    # get the filename if we want to log to the file as well
+    log_filename = None
+    if output_dir:
+        makedir(output_dir)
+        if rank == 0:
+            log_filename = f"{output_dir}/log.txt"
+
+    logger = logging.getLogger(name)
+    logger.setLevel(log_level_primary)
+
+    # create formatter
+    FORMAT = "%(levelname)s %(asctime)s %(filename)s:%(lineno)4d: %(message)s"
+    formatter = logging.Formatter(FORMAT)
+
+    # Cleanup any existing handlers
+    for h in logger.handlers:
+        logger.removeHandler(h)
+    logger.root.handlers = []
+
+    # setup the console handler
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setFormatter(formatter)
+    logger.addHandler(console_handler)
+    if rank == 0:
+        console_handler.setLevel(log_level_primary)
+    else:
+        console_handler.setLevel(log_level_secondary)
+
+    # we log to file as well if user wants
+    if log_filename and rank == 0:
+        file_handler = logging.StreamHandler(_cached_log_stream(log_filename))
+        file_handler.setLevel(log_level_primary)
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+
+    logging.root = logger
+
+
+def shutdown_logging():
+    """
+    After training is done, we ensure to shut down all the logger streams.
+    """
+    logging.info("Shutting down loggers...")
+    handlers = logging.root.handlers
+    for handler in handlers:
+        handler.close()
diff --git a/sam3/train/utils/train_utils.py b/sam3/train/utils/train_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..43a80782760a3ebb81878f985e4ac5804ba14f21
--- /dev/null
+++ b/sam3/train/utils/train_utils.py
@@ -0,0 +1,285 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+
+import logging
+import math
+import os
+import random
+import re
+from datetime import timedelta
+from typing import Optional
+
+import hydra
+
+import numpy as np
+import omegaconf
+import torch
+import torch.distributed as dist
+from iopath.common.file_io import g_pathmgr
+from omegaconf import OmegaConf
+
+
+def multiply_all(*args):
+    return np.prod(np.array(args)).item()
+
+
+def collect_dict_keys(config):
+    """This function recursively iterates through a dataset configuration, and collect all the dict_key that are defined"""
+    val_keys = []
+    # If the this config points to the collate function, then it has a key
+    if "_target_" in config and re.match(r".*collate_fn.*", config["_target_"]):
+        val_keys.append(config["dict_key"])
+    else:
+        # Recursively proceed
+        for v in config.values():
+            if isinstance(v, type(config)):
+                val_keys.extend(collect_dict_keys(v))
+            elif isinstance(v, omegaconf.listconfig.ListConfig):
+                for item in v:
+                    if isinstance(item, type(config)):
+                        val_keys.extend(collect_dict_keys(item))
+    return val_keys
+
+
+class Phase:
+    TRAIN = "train"
+    VAL = "val"
+
+
+def register_omegaconf_resolvers():
+    OmegaConf.register_new_resolver("get_method", hydra.utils.get_method)
+    OmegaConf.register_new_resolver("get_class", hydra.utils.get_class)
+    OmegaConf.register_new_resolver("add", lambda x, y: x + y)
+    OmegaConf.register_new_resolver("times", multiply_all)
+    OmegaConf.register_new_resolver("divide", lambda x, y: x / y)
+    OmegaConf.register_new_resolver("pow", lambda x, y: x**y)
+    OmegaConf.register_new_resolver("subtract", lambda x, y: x - y)
+    OmegaConf.register_new_resolver("range", lambda x: list(range(x)))
+    OmegaConf.register_new_resolver("int", lambda x: int(x))
+    OmegaConf.register_new_resolver("ceil_int", lambda x: int(math.ceil(x)))
+    OmegaConf.register_new_resolver("merge", lambda *x: OmegaConf.merge(*x))
+    OmegaConf.register_new_resolver("string", lambda x: str(x))
+
+
+def setup_distributed_backend(backend, timeout_mins):
+    """
+    Initialize torch.distributed and set the CUDA device.
+    Expects environment variables to be set as per
+    https://pytorch.org/docs/stable/distributed.html#environment-variable-initialization
+    along with the environ variable "LOCAL_RANK" which is used to set the CUDA device.
+    """
+    # enable TORCH_NCCL_ASYNC_ERROR_HANDLING to ensure dist nccl ops time out after timeout_mins
+    # of waiting
+    os.environ["TORCH_NCCL_ASYNC_ERROR_HANDLING"] = "1"
+    logging.info(f"Setting up torch.distributed with a timeout of {timeout_mins} mins")
+    dist.init_process_group(backend=backend, timeout=timedelta(minutes=timeout_mins))
+    return dist.get_rank()
+
+
+def get_machine_local_and_dist_rank():
+    """
+    Get the distributed and local rank of the current gpu.
+    """
+    local_rank = int(os.environ.get("LOCAL_RANK", None))
+    distributed_rank = int(os.environ.get("RANK", None))
+    assert (
+        local_rank is not None and distributed_rank is not None
+    ), "Please the set the RANK and LOCAL_RANK environment variables."
+    return local_rank, distributed_rank
+
+
+def print_cfg(cfg):
+    """
+    Supports printing both Hydra DictConfig and also the AttrDict config
+    """
+    logging.info("Training with config:")
+    logging.info(OmegaConf.to_yaml(cfg))
+
+
+def set_seeds(seed_value, max_epochs, dist_rank):
+    """
+    Set the python random, numpy and torch seed for each gpu. Also set the CUDA
+    seeds if the CUDA is available. This ensures deterministic nature of the training.
+    """
+    # Since in the pytorch sampler, we increment the seed by 1 for every epoch.
+    seed_value = (seed_value + dist_rank) * max_epochs
+    logging.info(f"MACHINE SEED: {seed_value}")
+    random.seed(seed_value)
+    np.random.seed(seed_value)
+    torch.manual_seed(seed_value)
+    if torch.cuda.is_available():
+        torch.cuda.manual_seed_all(seed_value)
+
+
+def makedir(dir_path):
+    """
+    Create the directory if it does not exist.
+    """
+    is_success = False
+    try:
+        if not g_pathmgr.exists(dir_path):
+            g_pathmgr.mkdirs(dir_path)
+        is_success = True
+    except BaseException:
+        logging.info(f"Error creating directory: {dir_path}")
+    return is_success
+
+
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+
+
+def get_amp_type(amp_type: Optional[str] = None):
+    if amp_type is None:
+        return None
+    assert amp_type in ["bfloat16", "float16"], "Invalid Amp type."
+    if amp_type == "bfloat16":
+        return torch.bfloat16
+    else:
+        return torch.float16
+
+
+def log_env_variables():
+    env_keys = sorted(list(os.environ.keys()))
+    st = ""
+    for k in env_keys:
+        v = os.environ[k]
+        st += f"{k}={v}\n"
+    logging.info("Logging ENV_VARIABLES")
+    logging.info(st)
+
+
+class AverageMeter:
+    """Computes and stores the average and current value"""
+
+    def __init__(self, name, device, fmt=":f"):
+        self.name = name
+        self.fmt = fmt
+        self.device = device
+        self.reset()
+
+    def reset(self):
+        self.val = 0
+        self.avg = 0
+        self.sum = 0
+        self.count = 0
+        self._allow_updates = True
+
+    def update(self, val, n=1):
+        self.val = val
+        self.sum += val * n
+        self.count += n
+        self.avg = self.sum / self.count
+
+    def __str__(self):
+        fmtstr = "{name}: {val" + self.fmt + "} ({avg" + self.fmt + "})"
+        return fmtstr.format(**self.__dict__)
+
+
+class MemMeter:
+    """Computes and stores the current, avg, and max of peak Mem usage per iteration"""
+
+    def __init__(self, name, device, fmt=":f"):
+        self.name = name
+        self.fmt = fmt
+        self.device = device
+        self.reset()
+
+    def reset(self):
+        self.val = 0  # Per iteration max usage
+        self.avg = 0  # Avg per iteration max usage
+        self.peak = 0  # Peak usage for lifetime of program
+        self.sum = 0
+        self.count = 0
+        self._allow_updates = True
+
+    def update(self, n=1, reset_peak_usage=True):
+        self.val = torch.cuda.max_memory_allocated() // 1e9
+        self.sum += self.val * n
+        self.count += n
+        self.avg = self.sum / self.count
+        self.peak = max(self.peak, self.val)
+        if reset_peak_usage:
+            torch.cuda.reset_peak_memory_stats()
+
+    def __str__(self):
+        fmtstr = (
+            "{name}: {val"
+            + self.fmt
+            + "} ({avg"
+            + self.fmt
+            + "}/{peak"
+            + self.fmt
+            + "})"
+        )
+        return fmtstr.format(**self.__dict__)
+
+
+def human_readable_time(time_seconds):
+    time = int(time_seconds)
+    minutes, seconds = divmod(time, 60)
+    hours, minutes = divmod(minutes, 60)
+    days, hours = divmod(hours, 24)
+    return f"{days:02}d {hours:02}h {minutes:02}m"
+
+
+class DurationMeter:
+    def __init__(self, name, device, fmt=":f"):
+        self.name = name
+        self.device = device
+        self.fmt = fmt
+        self.val = 0
+
+    def reset(self):
+        self.val = 0
+
+    def update(self, val):
+        self.val = val
+
+    def add(self, val):
+        self.val += val
+
+    def __str__(self):
+        return f"{self.name}: {human_readable_time(self.val)}"
+
+
+class ProgressMeter:
+    def __init__(self, num_batches, meters, real_meters, prefix=""):
+        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
+        self.meters = meters
+        self.real_meters = real_meters
+        self.prefix = prefix
+
+    def display(self, batch, enable_print=False):
+        entries = [self.prefix + self.batch_fmtstr.format(batch)]
+        entries += [str(meter) for meter in self.meters]
+        entries += [
+            " | ".join(
+                [
+                    f"{os.path.join(name, subname)}: {val:.4f}"
+                    for subname, val in meter.compute().items()
+                ]
+            )
+            for name, meter in self.real_meters.items()
+        ]
+        logging.info(" | ".join(entries))
+        if enable_print:
+            print(" | ".join(entries))
+
+    def _get_batch_fmtstr(self, num_batches):
+        num_digits = len(str(num_batches // 1))
+        fmt = "{:" + str(num_digits) + "d}"
+        return "[" + fmt + "/" + fmt.format(num_batches) + "]"
+
+
+def get_resume_checkpoint(checkpoint_save_dir):
+    if not g_pathmgr.isdir(checkpoint_save_dir):
+        return None
+    ckpt_file = os.path.join(checkpoint_save_dir, "checkpoint.pt")
+    if not g_pathmgr.isfile(ckpt_file):
+        return None
+
+    return ckpt_file
diff --git a/sam3/visualization_utils.py b/sam3/visualization_utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..73398f2bf68b0496c162859ec9249d7f6ac1e999
--- /dev/null
+++ b/sam3/visualization_utils.py
@@ -0,0 +1,941 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates. All Rights Reserved
+import json
+import os
+import subprocess
+from pathlib import Path
+
+import cv2
+import matplotlib.patches as patches
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import pycocotools.mask as mask_utils
+import torch
+from matplotlib.colors import to_rgb
+from PIL import Image
+from skimage.color import lab2rgb, rgb2lab
+from sklearn.cluster import KMeans
+from torchvision.ops import masks_to_boxes
+from tqdm import tqdm
+
+
+def generate_colors(n_colors=256, n_samples=5000):
+    # Step 1: Random RGB samples
+    np.random.seed(42)
+    rgb = np.random.rand(n_samples, 3)
+    # Step 2: Convert to LAB for perceptual uniformity
+    # print(f"Converting {n_samples} RGB samples to LAB color space...")
+    lab = rgb2lab(rgb.reshape(1, -1, 3)).reshape(-1, 3)
+    # print("Conversion to LAB complete.")
+    # Step 3: k-means clustering in LAB
+    kmeans = KMeans(n_clusters=n_colors, n_init=10)
+    # print(f"Fitting KMeans with {n_colors} clusters on {n_samples} samples...")
+    kmeans.fit(lab)
+    # print("KMeans fitting complete.")
+    centers_lab = kmeans.cluster_centers_
+    # Step 4: Convert LAB back to RGB
+    colors_rgb = lab2rgb(centers_lab.reshape(1, -1, 3)).reshape(-1, 3)
+    colors_rgb = np.clip(colors_rgb, 0, 1)
+    return colors_rgb
+
+
+COLORS = generate_colors(n_colors=128, n_samples=5000)
+
+
+def show_img_tensor(img_batch, vis_img_idx=0):
+    MEAN_IMG = np.array([0.5, 0.5, 0.5])
+    STD_IMG = np.array([0.5, 0.5, 0.5])
+    im_tensor = img_batch[vis_img_idx].detach().cpu()
+    assert im_tensor.dim() == 3
+    im_tensor = im_tensor.numpy().transpose((1, 2, 0))
+    im_tensor = (im_tensor * STD_IMG) + MEAN_IMG
+    im_tensor = np.clip(im_tensor, 0, 1)
+    plt.imshow(im_tensor)
+
+
+def draw_box_on_image(image, box, color=(0, 255, 0)):
+    """
+    Draws a rectangle on a given PIL image using the provided box coordinates in xywh format.
+    :param image: PIL.Image - The image on which to draw the rectangle.
+    :param box: tuple - A tuple (x, y, w, h) representing the top-left corner, width, and height of the rectangle.
+    :param color: tuple - A tuple (R, G, B) representing the color of the rectangle. Default is red.
+    :return: PIL.Image - The image with the rectangle drawn on it.
+    """
+    # Ensure the image is in RGB mode
+    image = image.convert("RGB")
+    # Unpack the box coordinates
+    x, y, w, h = box
+    x, y, w, h = int(x), int(y), int(w), int(h)
+    # Get the pixel data
+    pixels = image.load()
+    # Draw the top and bottom edges
+    for i in range(x, x + w):
+        pixels[i, y] = color
+        pixels[i, y + h - 1] = color
+        pixels[i, y + 1] = color
+        pixels[i, y + h] = color
+        pixels[i, y - 1] = color
+        pixels[i, y + h - 2] = color
+    # Draw the left and right edges
+    for j in range(y, y + h):
+        pixels[x, j] = color
+        pixels[x + 1, j] = color
+        pixels[x - 1, j] = color
+        pixels[x + w - 1, j] = color
+        pixels[x + w, j] = color
+        pixels[x + w - 2, j] = color
+    return image
+
+
+def plot_bbox(
+    img_height,
+    img_width,
+    box,
+    box_format="XYXY",
+    relative_coords=True,
+    color="r",
+    linestyle="solid",
+    text=None,
+    ax=None,
+):
+    if box_format == "XYXY":
+        x, y, x2, y2 = box
+        w = x2 - x
+        h = y2 - y
+    elif box_format == "XYWH":
+        x, y, w, h = box
+    elif box_format == "CxCyWH":
+        cx, cy, w, h = box
+        x = cx - w / 2
+        y = cy - h / 2
+    else:
+        raise RuntimeError(f"Invalid box_format {box_format}")
+
+    if relative_coords:
+        x *= img_width
+        w *= img_width
+        y *= img_height
+        h *= img_height
+
+    if ax is None:
+        ax = plt.gca()
+    rect = patches.Rectangle(
+        (x, y),
+        w,
+        h,
+        linewidth=1.5,
+        edgecolor=color,
+        facecolor="none",
+        linestyle=linestyle,
+    )
+    ax.add_patch(rect)
+    if text is not None:
+        facecolor = "w"
+        ax.text(
+            x,
+            y - 5,
+            text,
+            color=color,
+            weight="bold",
+            fontsize=8,
+            bbox={"facecolor": facecolor, "alpha": 0.75, "pad": 2},
+        )
+
+
+def plot_mask(mask, color="r", ax=None):
+    im_h, im_w = mask.shape
+    mask_img = np.zeros((im_h, im_w, 4), dtype=np.float32)
+    mask_img[..., :3] = to_rgb(color)
+    mask_img[..., 3] = mask * 0.5
+    # Use the provided ax or the current axis
+    if ax is None:
+        ax = plt.gca()
+    ax.imshow(mask_img)
+
+
+def normalize_bbox(bbox_xywh, img_w, img_h):
+    # Assumes bbox_xywh is in XYWH format
+    if isinstance(bbox_xywh, list):
+        assert (
+            len(bbox_xywh) == 4
+        ), "bbox_xywh list must have 4 elements. Batching not support except for torch tensors."
+        normalized_bbox = bbox_xywh.copy()
+        normalized_bbox[0] /= img_w
+        normalized_bbox[1] /= img_h
+        normalized_bbox[2] /= img_w
+        normalized_bbox[3] /= img_h
+    else:
+        assert isinstance(
+            bbox_xywh, torch.Tensor
+        ), "Only torch tensors are supported for batching."
+        normalized_bbox = bbox_xywh.clone()
+        assert (
+            normalized_bbox.size(-1) == 4
+        ), "bbox_xywh tensor must have last dimension of size 4."
+        normalized_bbox[..., 0] /= img_w
+        normalized_bbox[..., 1] /= img_h
+        normalized_bbox[..., 2] /= img_w
+        normalized_bbox[..., 3] /= img_h
+    return normalized_bbox
+
+
+def visualize_frame_output(frame_idx, video_frames, outputs, figsize=(12, 8)):
+    plt.figure(figsize=figsize)
+    plt.title(f"frame {frame_idx}")
+    img = load_frame(video_frames[frame_idx])
+    img_H, img_W, _ = img.shape
+    plt.imshow(img)
+    for i in range(len(outputs["out_probs"])):
+        box_xywh = outputs["out_boxes_xywh"][i]
+        prob = outputs["out_probs"][i]
+        obj_id = outputs["out_obj_ids"][i]
+        binary_mask = outputs["out_binary_masks"][i]
+        color = COLORS[obj_id % len(COLORS)]
+        plot_bbox(
+            img_H,
+            img_W,
+            box_xywh,
+            text=f"(id={obj_id}, {prob=:.2f})",
+            box_format="XYWH",
+            color=color,
+        )
+        plot_mask(binary_mask, color=color)
+
+
+def visualize_formatted_frame_output(
+    frame_idx,
+    video_frames,
+    outputs_list,
+    titles=None,
+    points_list=None,
+    points_labels_list=None,
+    figsize=(12, 8),
+    title_suffix="",
+    prompt_info=None,
+):
+    """Visualize up to three sets of segmentation masks on a video frame.
+
+    Args:
+        frame_idx: Frame index to visualize
+        image_files: List of image file paths
+        outputs_list: List of {frame_idx: {obj_id: mask_tensor}} or single dict {obj_id: mask_tensor}
+        titles: List of titles for each set of outputs_list
+        points_list: Optional list of point coordinates
+        points_labels_list: Optional list of point labels
+        figsize: Figure size tuple
+        save: Whether to save the visualization to file
+        output_dir: Base output directory when saving
+        scenario_name: Scenario name for organizing saved files
+        title_suffix: Additional title suffix
+        prompt_info: Dictionary with prompt information (boxes, points, etc.)
+    """
+    # Handle single output dict case
+    if isinstance(outputs_list, dict) and frame_idx in outputs_list:
+        # This is a single outputs dict with frame indices as keys
+        outputs_list = [outputs_list]
+    elif isinstance(outputs_list, dict) and not any(
+        isinstance(k, int) for k in outputs_list.keys()
+    ):
+        # This is a single frame's outputs {obj_id: mask}
+        single_frame_outputs = {frame_idx: outputs_list}
+        outputs_list = [single_frame_outputs]
+
+    num_outputs = len(outputs_list)
+    if titles is None:
+        titles = [f"Set {i+1}" for i in range(num_outputs)]
+    assert (
+        len(titles) == num_outputs
+    ), "length of `titles` should match that of `outputs_list` if not None."
+
+    _, axes = plt.subplots(1, num_outputs, figsize=figsize)
+    if num_outputs == 1:
+        axes = [axes]  # Make it iterable
+
+    img = load_frame(video_frames[frame_idx])
+    img_H, img_W, _ = img.shape
+
+    for idx in range(num_outputs):
+        ax, outputs_set, ax_title = axes[idx], outputs_list[idx], titles[idx]
+        ax.set_title(f"Frame {frame_idx} - {ax_title}{title_suffix}")
+        ax.imshow(img)
+
+        if frame_idx in outputs_set:
+            _outputs = outputs_set[frame_idx]
+        else:
+            print(f"Warning: Frame {frame_idx} not found in outputs_set")
+            continue
+
+        if prompt_info and frame_idx == 0:  # Show prompts on first frame
+            if "boxes" in prompt_info:
+                for box in prompt_info["boxes"]:
+                    # box is in [x, y, w, h] normalized format
+                    x, y, w, h = box
+                    plot_bbox(
+                        img_H,
+                        img_W,
+                        [x, y, x + w, y + h],  # Convert to XYXY
+                        box_format="XYXY",
+                        relative_coords=True,
+                        color="yellow",
+                        linestyle="dashed",
+                        text="PROMPT BOX",
+                        ax=ax,
+                    )
+
+            if "points" in prompt_info and "point_labels" in prompt_info:
+                points = np.array(prompt_info["points"])
+                labels = np.array(prompt_info["point_labels"])
+                # Convert normalized to pixel coordinates
+                points_pixel = points * np.array([img_W, img_H])
+
+                # Draw positive points (green stars)
+                pos_points = points_pixel[labels == 1]
+                if len(pos_points) > 0:
+                    ax.scatter(
+                        pos_points[:, 0],
+                        pos_points[:, 1],
+                        color="lime",
+                        marker="*",
+                        s=200,
+                        edgecolor="white",
+                        linewidth=2,
+                        label="Positive Points",
+                        zorder=10,
+                    )
+
+                # Draw negative points (red stars)
+                neg_points = points_pixel[labels == 0]
+                if len(neg_points) > 0:
+                    ax.scatter(
+                        neg_points[:, 0],
+                        neg_points[:, 1],
+                        color="red",
+                        marker="*",
+                        s=200,
+                        edgecolor="white",
+                        linewidth=2,
+                        label="Negative Points",
+                        zorder=10,
+                    )
+
+        objects_drawn = 0
+        for obj_id, binary_mask in _outputs.items():
+            mask_sum = (
+                binary_mask.sum()
+                if hasattr(binary_mask, "sum")
+                else np.sum(binary_mask)
+            )
+
+            if mask_sum > 0:  # Only draw if mask has content
+                # Convert to torch tensor if it's not already
+                if not isinstance(binary_mask, torch.Tensor):
+                    binary_mask = torch.tensor(binary_mask)
+
+                # Find bounding box from mask
+                if binary_mask.any():
+                    box_xyxy = masks_to_boxes(binary_mask.unsqueeze(0)).squeeze()
+                    box_xyxy = normalize_bbox(box_xyxy, img_W, img_H)
+                else:
+                    # Fallback: create a small box at center
+                    box_xyxy = [0.45, 0.45, 0.55, 0.55]
+
+                color = COLORS[obj_id % len(COLORS)]
+
+                plot_bbox(
+                    img_H,
+                    img_W,
+                    box_xyxy,
+                    text=f"(id={obj_id})",
+                    box_format="XYXY",
+                    color=color,
+                    ax=ax,
+                )
+
+                # Convert back to numpy for plotting
+                mask_np = (
+                    binary_mask.numpy()
+                    if isinstance(binary_mask, torch.Tensor)
+                    else binary_mask
+                )
+                plot_mask(mask_np, color=color, ax=ax)
+                objects_drawn += 1
+
+        if objects_drawn == 0:
+            ax.text(
+                0.5,
+                0.5,
+                "No objects detected",
+                transform=ax.transAxes,
+                fontsize=16,
+                ha="center",
+                va="center",
+                color="red",
+                weight="bold",
+            )
+
+        # Draw additional points if provided
+        if points_list is not None and points_list[idx] is not None:
+            show_points(
+                points_list[idx], points_labels_list[idx], ax=ax, marker_size=200
+            )
+
+        ax.axis("off")
+
+    plt.tight_layout()
+    plt.show()
+
+
+def render_masklet_frame(img, outputs, frame_idx=None, alpha=0.5):
+    """
+    Overlays masklets and bounding boxes on a single image frame.
+    Args:
+        img: np.ndarray, shape (H, W, 3), uint8 or float32 in [0,255] or [0,1]
+        outputs: dict with keys: out_boxes_xywh, out_probs, out_obj_ids, out_binary_masks
+        frame_idx: int or None, for overlaying frame index text
+        alpha: float, mask overlay alpha
+    Returns:
+        overlay: np.ndarray, shape (H, W, 3), uint8
+    """
+    if img.dtype == np.float32 or img.max() <= 1.0:
+        img = (img * 255).astype(np.uint8)
+    img = img[..., :3]  # drop alpha if present
+    height, width = img.shape[:2]
+    overlay = img.copy()
+
+    for i in range(len(outputs["out_probs"])):
+        obj_id = outputs["out_obj_ids"][i]
+        color = COLORS[obj_id % len(COLORS)]
+        color255 = (color * 255).astype(np.uint8)
+        mask = outputs["out_binary_masks"][i]
+        if mask.shape != img.shape[:2]:
+            mask = cv2.resize(
+                mask.astype(np.float32),
+                (img.shape[1], img.shape[0]),
+                interpolation=cv2.INTER_NEAREST,
+            )
+        mask_bool = mask > 0.5
+        for c in range(3):
+            overlay[..., c][mask_bool] = (
+                alpha * color255[c] + (1 - alpha) * overlay[..., c][mask_bool]
+            ).astype(np.uint8)
+
+    # Draw bounding boxes and text
+    for i in range(len(outputs["out_probs"])):
+        box_xywh = outputs["out_boxes_xywh"][i]
+        obj_id = outputs["out_obj_ids"][i]
+        prob = outputs["out_probs"][i]
+        color = COLORS[obj_id % len(COLORS)]
+        color255 = tuple(int(x * 255) for x in color)
+        x, y, w, h = box_xywh
+        x1 = int(x * width)
+        y1 = int(y * height)
+        x2 = int((x + w) * width)
+        y2 = int((y + h) * height)
+        cv2.rectangle(overlay, (x1, y1), (x2, y2), color255, 2)
+        if prob is not None:
+            label = f"id={obj_id}, p={prob:.2f}"
+        else:
+            label = f"id={obj_id}"
+        cv2.putText(
+            overlay,
+            label,
+            (x1, max(y1 - 10, 0)),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            color255,
+            1,
+            cv2.LINE_AA,
+        )
+
+    # Overlay frame index at the top-left corner
+    if frame_idx is not None:
+        cv2.putText(
+            overlay,
+            f"Frame {frame_idx}",
+            (10, 30),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            1.0,
+            (255, 255, 255),
+            2,
+            cv2.LINE_AA,
+        )
+
+    return overlay
+
+
+def save_masklet_video(video_frames, outputs, out_path, alpha=0.5, fps=10):
+    # Each outputs dict has keys: "out_boxes_xywh", "out_probs", "out_obj_ids", "out_binary_masks"
+    # video_frames: list of video frame data, same length as outputs_list
+
+    # Read first frame to get size
+    first_img = load_frame(video_frames[0])
+    height, width = first_img.shape[:2]
+    if first_img.dtype == np.float32 or first_img.max() <= 1.0:
+        first_img = (first_img * 255).astype(np.uint8)
+    # Use 'mp4v' for best compatibility with VSCode playback (.mp4 files)
+    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
+    writer = cv2.VideoWriter("temp.mp4", fourcc, fps, (width, height))
+
+    outputs_list = [
+        (video_frames[frame_idx], frame_idx, outputs[frame_idx])
+        for frame_idx in sorted(outputs.keys())
+    ]
+
+    for frame, frame_idx, frame_outputs in tqdm(outputs_list):
+        img = load_frame(frame)
+        overlay = render_masklet_frame(
+            img, frame_outputs, frame_idx=frame_idx, alpha=alpha
+        )
+        writer.write(cv2.cvtColor(overlay, cv2.COLOR_RGB2BGR))
+
+    writer.release()
+
+    # Re-encode the video for VSCode compatibility using ffmpeg
+    subprocess.run(["ffmpeg", "-y", "-i", "temp.mp4", out_path])
+    print(f"Re-encoded video saved to {out_path}")
+
+    os.remove("temp.mp4")  # Clean up temporary file
+
+
+def save_masklet_image(frame, outputs, out_path, alpha=0.5, frame_idx=None):
+    """
+    Save a single image with masklet overlays.
+    """
+    img = load_frame(frame)
+    overlay = render_masklet_frame(img, outputs, frame_idx=frame_idx, alpha=alpha)
+    Image.fromarray(overlay).save(out_path)
+    print(f"Overlay image saved to {out_path}")
+
+
+def prepare_masks_for_visualization(frame_to_output):
+    # frame_to_obj_masks --> {frame_idx: {'output_probs': np.array, `out_obj_ids`: np.array, `out_binary_masks`: np.array}}
+    for frame_idx, out in frame_to_output.items():
+        _processed_out = {}
+        for idx, obj_id in enumerate(out["out_obj_ids"].tolist()):
+            if out["out_binary_masks"][idx].any():
+                _processed_out[obj_id] = out["out_binary_masks"][idx]
+        frame_to_output[frame_idx] = _processed_out
+    return frame_to_output
+
+
+def convert_coco_to_masklet_format(
+    annotations, img_info, is_prediction=False, score_threshold=0.5
+):
+    """
+    Convert COCO format annotations to format expected by render_masklet_frame
+    """
+    outputs = {
+        "out_boxes_xywh": [],
+        "out_probs": [],
+        "out_obj_ids": [],
+        "out_binary_masks": [],
+    }
+
+    img_h, img_w = img_info["height"], img_info["width"]
+
+    for idx, ann in enumerate(annotations):
+        # Get bounding box in relative XYWH format
+        if "bbox" in ann:
+            bbox = ann["bbox"]
+            if max(bbox) > 1.0:  # Convert absolute to relative coordinates
+                bbox = [
+                    bbox[0] / img_w,
+                    bbox[1] / img_h,
+                    bbox[2] / img_w,
+                    bbox[3] / img_h,
+                ]
+        else:
+            mask = mask_utils.decode(ann["segmentation"])
+            rows = np.any(mask, axis=1)
+            cols = np.any(mask, axis=0)
+            if np.any(rows) and np.any(cols):
+                rmin, rmax = np.where(rows)[0][[0, -1]]
+                cmin, cmax = np.where(cols)[0][[0, -1]]
+                # Convert to relative XYWH
+                bbox = [
+                    cmin / img_w,
+                    rmin / img_h,
+                    (cmax - cmin + 1) / img_w,
+                    (rmax - rmin + 1) / img_h,
+                ]
+            else:
+                bbox = [0, 0, 0, 0]
+
+        outputs["out_boxes_xywh"].append(bbox)
+
+        # Get probability/score
+        if is_prediction:
+            prob = ann["score"]
+        else:
+            prob = 1.0  # GT has no probability
+        outputs["out_probs"].append(prob)
+
+        outputs["out_obj_ids"].append(idx)
+        mask = mask_utils.decode(ann["segmentation"])
+        mask = (mask > score_threshold).astype(np.uint8)
+
+        outputs["out_binary_masks"].append(mask)
+
+    return outputs
+
+
+def save_side_by_side_visualization(img, gt_anns, pred_anns, noun_phrase):
+    """
+    Create side-by-side visualization of GT and predictions
+    """
+
+    # Create side-by-side visualization
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 7))
+
+    main_title = f"Noun phrase: '{noun_phrase}'"
+    fig.suptitle(main_title, fontsize=16, fontweight="bold")
+
+    gt_overlay = render_masklet_frame(img, gt_anns, alpha=0.5)
+    ax1.imshow(gt_overlay)
+    ax1.set_title("Ground Truth", fontsize=14, fontweight="bold")
+    ax1.axis("off")
+
+    pred_overlay = render_masklet_frame(img, pred_anns, alpha=0.5)
+    ax2.imshow(pred_overlay)
+    ax2.set_title("Predictions", fontsize=14, fontweight="bold")
+    ax2.axis("off")
+
+    plt.subplots_adjust(top=0.88)
+    plt.tight_layout()
+
+
+def bitget(val, idx):
+    return (val >> idx) & 1
+
+
+def pascal_color_map():
+    colormap = np.zeros((512, 3), dtype=int)
+    ind = np.arange(512, dtype=int)
+    for shift in reversed(list(range(8))):
+        for channel in range(3):
+            colormap[:, channel] |= bitget(ind, channel) << shift
+        ind >>= 3
+
+    return colormap.astype(np.uint8)
+
+
+def draw_masks_to_frame(
+    frame: np.ndarray, masks: np.ndarray, colors: np.ndarray
+) -> np.ndarray:
+    masked_frame = frame
+    for mask, color in zip(masks, colors):
+        curr_masked_frame = np.where(mask[..., None], color, masked_frame)
+        masked_frame = cv2.addWeighted(masked_frame, 0.75, curr_masked_frame, 0.25, 0)
+
+        if int(cv2.__version__[0]) > 3:
+            contours, _ = cv2.findContours(
+                np.array(mask, dtype=np.uint8).copy(),
+                cv2.RETR_TREE,
+                cv2.CHAIN_APPROX_NONE,
+            )
+        else:
+            _, contours, _ = cv2.findContours(
+                np.array(mask, dtype=np.uint8).copy(),
+                cv2.RETR_TREE,
+                cv2.CHAIN_APPROX_NONE,
+            )
+
+        cv2.drawContours(
+            masked_frame, contours, -1, (255, 255, 255), 7
+        )  # White outer contour
+        cv2.drawContours(
+            masked_frame, contours, -1, (0, 0, 0), 5
+        )  # Black middle contour
+        cv2.drawContours(
+            masked_frame, contours, -1, color.tolist(), 3
+        )  # Original color inner contour
+    return masked_frame
+
+
+def get_annot_df(file_path: str):
+    with open(file_path, "r") as f:
+        data = json.load(f)
+
+    dfs = {}
+
+    for k, v in data.items():
+        if k in ("info", "licenses"):
+            dfs[k] = v
+            continue
+        df = pd.DataFrame(v)
+        dfs[k] = df
+
+    return dfs
+
+
+def get_annot_dfs(file_list: list[str]):
+    dfs = {}
+    for annot_file in tqdm(file_list):
+        dataset_name = Path(annot_file).stem
+        dfs[dataset_name] = get_annot_df(annot_file)
+
+    return dfs
+
+
+def get_media_dir(media_dir: str, dataset: str):
+    if dataset in ["saco_veval_sav_test", "saco_veval_sav_val"]:
+        return os.path.join(media_dir, "saco_sav", "JPEGImages_24fps")
+    elif dataset in ["saco_veval_yt1b_test", "saco_veval_yt1b_val"]:
+        return os.path.join(media_dir, "saco_yt1b", "JPEGImages_6fps")
+    elif dataset in ["saco_veval_smartglasses_test", "saco_veval_smartglasses_val"]:
+        return os.path.join(media_dir, "saco_sg", "JPEGImages_6fps")
+    elif dataset == "sa_fari_test":
+        return os.path.join(media_dir, "sa_fari", "JPEGImages_6fps")
+    else:
+        raise ValueError(f"Dataset {dataset} not found")
+
+
+def get_all_annotations_for_frame(
+    dataset_df: pd.DataFrame, video_id: int, frame_idx: int, data_dir: str, dataset: str
+):
+    media_dir = os.path.join(data_dir, "media")
+
+    # Load the annotation and video data
+    annot_df = dataset_df["annotations"]
+    video_df = dataset_df["videos"]
+
+    # Get the frame
+    video_df_current = video_df[video_df.id == video_id]
+    assert (
+        len(video_df_current) == 1
+    ), f"Expected 1 video row, got {len(video_df_current)}"
+    video_row = video_df_current.iloc[0]
+    file_name = video_row.file_names[frame_idx]
+    file_path = os.path.join(
+        get_media_dir(media_dir=media_dir, dataset=dataset), file_name
+    )
+    frame = cv2.imread(file_path)
+    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+
+    # Get the masks and noun phrases annotated in this video in this frame
+    annot_df_current_video = annot_df[annot_df.video_id == video_id]
+    if len(annot_df_current_video) == 0:
+        print(f"No annotations found for video_id {video_id}")
+        return frame, None, None
+    else:
+        empty_mask = np.zeros(frame.shape[:2], dtype=np.uint8)
+        mask_np_pairs = annot_df_current_video.apply(
+            lambda row: (
+                (
+                    mask_utils.decode(row.segmentations[frame_idx])
+                    if row.segmentations[frame_idx]
+                    else empty_mask
+                ),
+                row.noun_phrase,
+            ),
+            axis=1,
+        )
+        # sort based on noun_phrases
+        mask_np_pairs = sorted(mask_np_pairs, key=lambda x: x[1])
+        masks, noun_phrases = zip(*mask_np_pairs)
+
+    return frame, masks, noun_phrases
+
+
+def visualize_prompt_overlay(
+    frame_idx,
+    video_frames,
+    title="Prompt Visualization",
+    text_prompt=None,
+    point_prompts=None,
+    point_labels=None,
+    bounding_boxes=None,
+    box_labels=None,
+    obj_id=None,
+):
+    """Simple prompt visualization function"""
+    img = Image.fromarray(load_frame(video_frames[frame_idx]))
+    fig, ax = plt.subplots(1, figsize=(6, 4))
+    ax.imshow(img)
+
+    img_w, img_h = img.size
+
+    if text_prompt:
+        ax.text(
+            0.02,
+            0.98,
+            f'Text: "{text_prompt}"',
+            transform=ax.transAxes,
+            fontsize=12,
+            color="white",
+            weight="bold",
+            bbox=dict(boxstyle="round,pad=0.3", facecolor="red", alpha=0.7),
+            verticalalignment="top",
+        )
+
+    if point_prompts:
+        for i, point in enumerate(point_prompts):
+            x, y = point
+            # Convert relative to absolute coordinates
+            x_img, y_img = x * img_w, y * img_h
+
+            # Use different colors for positive/negative points
+            if point_labels and len(point_labels) > i:
+                color = "green" if point_labels[i] == 1 else "red"
+                marker = "o" if point_labels[i] == 1 else "x"
+            else:
+                color = "green"
+                marker = "o"
+
+            ax.plot(
+                x_img,
+                y_img,
+                marker=marker,
+                color=color,
+                markersize=10,
+                markeredgewidth=2,
+                markeredgecolor="white",
+            )
+            ax.text(
+                x_img + 5,
+                y_img - 5,
+                f"P{i+1}",
+                color=color,
+                fontsize=10,
+                weight="bold",
+                bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.8),
+            )
+
+    if bounding_boxes:
+        for i, box in enumerate(bounding_boxes):
+            x, y, w, h = box
+            # Convert relative to absolute coordinates
+            x_img, y_img = x * img_w, y * img_h
+            w_img, h_img = w * img_w, h * img_h
+
+            # Use different colors for positive/negative boxes
+            if box_labels and len(box_labels) > i:
+                color = "green" if box_labels[i] == 1 else "red"
+            else:
+                color = "green"
+
+            rect = patches.Rectangle(
+                (x_img, y_img),
+                w_img,
+                h_img,
+                linewidth=2,
+                edgecolor=color,
+                facecolor="none",
+            )
+            ax.add_patch(rect)
+            ax.text(
+                x_img,
+                y_img - 5,
+                f"B{i+1}",
+                color=color,
+                fontsize=10,
+                weight="bold",
+                bbox=dict(boxstyle="round,pad=0.2", facecolor="white", alpha=0.8),
+            )
+
+    # Add object ID info if provided
+    if obj_id is not None:
+        ax.text(
+            0.02,
+            0.02,
+            f"Object ID: {obj_id}",
+            transform=ax.transAxes,
+            fontsize=10,
+            color="white",
+            weight="bold",
+            bbox=dict(boxstyle="round,pad=0.3", facecolor="blue", alpha=0.7),
+            verticalalignment="bottom",
+        )
+
+    ax.set_title(title)
+    ax.axis("off")
+    plt.tight_layout()
+    plt.show()
+
+
+def plot_results(img, results):
+    plt.figure(figsize=(12, 8))
+    plt.imshow(img)
+    nb_objects = len(results["scores"])
+    print(f"found {nb_objects} object(s)")
+    for i in range(nb_objects):
+        color = COLORS[i % len(COLORS)]
+        plot_mask(results["masks"][i].squeeze(0).cpu(), color=color)
+        w, h = img.size
+        prob = results["scores"][i].item()
+        plot_bbox(
+            h,
+            w,
+            results["boxes"][i].cpu(),
+            text=f"(id={i}, {prob=:.2f})",
+            box_format="XYXY",
+            color=color,
+            relative_coords=False,
+        )
+
+
+def single_visualization(img, anns, title):
+    """
+    Create a single image visualization with overlays.
+    """
+    fig, ax = plt.subplots(figsize=(7, 7))
+    fig.suptitle(title, fontsize=16, fontweight="bold")
+    overlay = render_masklet_frame(img, anns, alpha=0.5)
+    ax.imshow(overlay)
+    ax.axis("off")
+    plt.tight_layout()
+
+
+def show_mask(mask, ax, obj_id=None, random_color=False):
+    if random_color:
+        color = np.concatenate([np.random.random(3), np.array([0.6])], axis=0)
+    else:
+        cmap = plt.get_cmap("tab10")
+        cmap_idx = 0 if obj_id is None else obj_id
+        color = np.array([*cmap(cmap_idx)[:3], 0.6])
+    h, w = mask.shape[-2:]
+    mask_image = mask.reshape(h, w, 1) * color.reshape(1, 1, -1)
+    ax.imshow(mask_image)
+
+
+def show_box(box, ax):
+    x0, y0 = box[0], box[1]
+    w, h = box[2] - box[0], box[3] - box[1]
+    ax.add_patch(
+        plt.Rectangle((x0, y0), w, h, edgecolor="green", facecolor=(0, 0, 0, 0), lw=2)
+    )
+
+
+def show_points(coords, labels, ax, marker_size=375):
+    pos_points = coords[labels == 1]
+    neg_points = coords[labels == 0]
+    ax.scatter(
+        pos_points[:, 0],
+        pos_points[:, 1],
+        color="green",
+        marker="*",
+        s=marker_size,
+        edgecolor="white",
+        linewidth=1.25,
+    )
+    ax.scatter(
+        neg_points[:, 0],
+        neg_points[:, 1],
+        color="red",
+        marker="*",
+        s=marker_size,
+        edgecolor="white",
+        linewidth=1.25,
+    )
+
+
+def load_frame(frame):
+    if isinstance(frame, np.ndarray):
+        img = frame
+    elif isinstance(frame, Image.Image):
+        img = np.array(frame)
+    elif isinstance(frame, str) and os.path.isfile(frame):
+        img = plt.imread(frame)
+    else:
+        raise ValueError(f"Invalid video frame type: {type(frame)=}")
+    return img