{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2026,2,28]],"date-time":"2026-02-28T18:38:19Z","timestamp":1772303899066,"version":"3.50.1"},"reference-count":56,"publisher":"IEEE","license":[{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-029"},{"start":{"date-parts":[[2025,5,19]],"date-time":"2025-05-19T00:00:00Z","timestamp":1747612800000},"content-version":"stm-asf","delay-in-days":0,"URL":"https:\/\/doi.org\/10.15223\/policy-037"}],"content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2025,5,19]]},"DOI":"10.1109\/icra55743.2025.11127800","type":"proceedings-article","created":{"date-parts":[[2025,9,2]],"date-time":"2025-09-02T17:28:56Z","timestamp":1756834136000},"page":"1-8","source":"Crossref","is-referenced-by-count":2,"title":["Generalizable Imitation Learning Through Pre-Trained Representations"],"prefix":"10.1109","author":[{"given":"Wei-Di","family":"Chang","sequence":"first","affiliation":[{"name":"McGill University,Samsung AI Center Montr&#x00E9;al. Center for Intelligent Machines"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Francois","family":"Hogan","sequence":"additional","affiliation":[{"name":"McGill University,Samsung AI Center Montr&#x00E9;al. Center for Intelligent Machines"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Scott","family":"Fujimoto","sequence":"additional","affiliation":[{"name":"McGill University,Samsung AI Center Montr&#x00E9;al. Center for Intelligent Machines"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"David","family":"Meger","sequence":"additional","affiliation":[{"name":"McGill University,Samsung AI Center Montr&#x00E9;al. Center for Intelligent Machines"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"given":"Gregory","family":"Dudek","sequence":"additional","affiliation":[{"name":"McGill University,Samsung AI Center Montr&#x00E9;al. Center for Intelligent Machines"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"263","reference":[{"key":"ref1","doi-asserted-by":"publisher","DOI":"10.1145\/1015330.1015430"},{"key":"ref2","first-page":"4565","article-title":"Generative adversarial imitation learning","author":"Ho","year":"2016","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref3","article-title":"Il-flow: Imitation learning from observation using normalizing flows","author":"Chang","year":"2022","journal-title":"arXiv preprint"},{"key":"ref4","first-page":"158","article-title":"Implicit behavioral cloning","volume-title":"Conference on Robot Learning","author":"Florence"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.026"},{"key":"ref6","article-title":"Imitation learning from observation through optimal transport","author":"Chang","year":"2023","journal-title":"arXiv preprint"},{"key":"ref7","article-title":"The unsurprising effectiveness of pre-trained vision models for control","volume-title":"International Conference on Machine Learning, ICML 2022","author":"Parisi","year":"2022"},{"key":"ref8","first-page":"892","article-title":"R3M: A universal visual representation for robot manipulation","volume-title":"Conference on Robot Learning, CoRL 2022, 14\u201318 December 2022, Auckland, New Zealand","volume":"205","author":"Nair"},{"key":"ref9","author":"Xiao","year":"2022","journal-title":"Masked visual pre-training for motor control"},{"key":"ref10","first-page":"655","article-title":"Where are we in the search for an artificial visual cortex for embodied intelligence?","volume":"36","author":"Majumdar","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref11","article-title":"What makes pre-trained visual representations successful for robust manipulation?","author":"Burns","year":"2023","journal-title":"arXiv preprint"},{"key":"ref12","first-page":"1183","article-title":"An unbiased look at datasets for visuo-motor pre-training","volume-title":"Conference on Robot Learning","author":"Dasari"},{"key":"ref13","article-title":"Very deep convolutional networks for large-scale image recognition","author":"Simonyan","year":"2014","journal-title":"arXiv preprint"},{"key":"ref14","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2016.90"},{"key":"ref15","doi-asserted-by":"publisher","DOI":"10.1145\/3065386"},{"key":"ref16","first-page":"894","article-title":"Cliport: What and where pathways for robotic manipulation","volume-title":"Conference on robot learning","author":"Shridhar"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV48922.2021.00951"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA46639.2022.9811809"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1162\/neco.1991.3.1.88"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461076"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2019.2956365"},{"key":"ref22","article-title":"What matters in learning from offline human demonstrations for robot manipulation","volume-title":"Conference on Robot Learning (CoRL)","author":"Mandlekar"},{"key":"ref23","first-page":"627","article-title":"A reduction of imitation learning and structured prediction to no-regret online learning","volume-title":"Proceedings of the fourteenth international conference on artificial intelligence and statistics. JMLR Workshop and Conference Proceedings","author":"Ross"},{"issue":"39","key":"ref24","first-page":"1","article-title":"End-to-end training of deep visuomotor policies","volume-title":"Journal of Machine Learning Research","volume":"17","author":"Levine","year":"2016"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1109\/IROS51168.2021.9636023"},{"key":"ref26","doi-asserted-by":"publisher","DOI":"10.1109\/LRA.2021.3059619"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2018.XIV.002"},{"key":"ref28","doi-asserted-by":"publisher","DOI":"10.2307\/j.ctt4cgngj.10"},{"key":"ref29","article-title":"Image augmentation is all you need: Regularizing deep reinforcement learning from pixels","volume-title":"International Conference on Learning Representations","author":"Yarats","year":"2021"},{"key":"ref30","article-title":"Mastering visual continuous control: Improved data-augmented reinforcement learning","volume-title":"International Conference on Learning Representations","author":"Yarats","year":"2021"},{"key":"ref31","article-title":"For sale: State-action representation learning for deep reinforcement learning","volume":"36","author":"Fujimoto","year":"2023","journal-title":"Advances in Neural Information Processing Systems"},{"key":"ref32","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2023.XIX.032"},{"key":"ref33","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01842"},{"key":"ref34","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2009.5206848"},{"key":"ref35","doi-asserted-by":"publisher","DOI":"10.1109\/IROS.2017.8202133"},{"key":"ref36","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2018.00493"},{"key":"ref37","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2019.8794224"},{"key":"ref38","doi-asserted-by":"publisher","DOI":"10.1109\/ICRA.2018.8461196"},{"key":"ref39","first-page":"979","article-title":"Graph-structured visual imitation","volume-title":"Conference on Robot Learning","author":"Sieb"},{"key":"ref40","first-page":"1199","article-title":"Viola: Object-centric imitation learning for vision-based robot manipulation","volume-title":"Conference on Robot Learning","author":"Zhu"},{"key":"ref41","article-title":"Deep object pose estimation for semantic robotic grasping of household objects","author":"Tremblay","year":"2018","journal-title":"arXiv preprint"},{"key":"ref42","doi-asserted-by":"publisher","DOI":"10.1007\/s10462-020-09888-5"},{"key":"ref43","doi-asserted-by":"publisher","DOI":"10.1016\/j.compag.2022.106716"},{"key":"ref44","first-page":"373","article-title":"Dense object nets: Learning dense visual object descriptors by and for robotic manipulation","volume-title":"Conference on Robot Learning","author":"Florence"},{"key":"ref45","article-title":"Unsupervised learning of object key-points for perception and control","volume":"32","author":"Kulkarni","year":"2019","journal-title":"Advances in neural information processing systems"},{"key":"ref46","first-page":"1094","article-title":"Meta-world: A benchmark and evaluation for multi-task and meta reinforcement learning","volume-title":"Conference on robot learning","author":"Yu"},{"key":"ref47","doi-asserted-by":"publisher","DOI":"10.1177\/02783649241276017"},{"key":"ref48","article-title":"Maniskill: Generalizable manipulation skill benchmark with large-scale demonstrations","volume-title":"Thirty-fifth Conference on Neural Information Processing Systems Datasets and Benchmarks Track (Round 2)","author":"Mu"},{"key":"ref49","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2024.XX.133"},{"key":"ref50","first-page":"4171","article-title":"Bert: Pre-training of deep bidirectional transformers for language understanding","volume-title":"Proceedings of NAACL-HLT","author":"Kenton"},{"key":"ref51","article-title":"Better plain vit baselines for imagenet-1k","author":"Beyer","year":"2022","journal-title":"arXiv preprint"},{"key":"ref52","article-title":"Learning to merge tokens in vision transformers","author":"Renggli","year":"2022","journal-title":"arXiv preprint"},{"issue":"3","key":"ref53","first-page":"4","article-title":"Deep vit features as dense visual descriptors","volume":"2","author":"Amir","year":"2021","journal-title":"arXiv preprint"},{"key":"ref54","article-title":"robosuite: A modular simulation framework and benchmark for robot learning","author":"Zhu","year":"2020","journal-title":"arXiv preprint"},{"key":"ref55","doi-asserted-by":"publisher","DOI":"10.1109\/JRA.1987.1087068"},{"key":"ref56","doi-asserted-by":"publisher","DOI":"10.15607\/RSS.2006.II.033"}],"event":{"name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","location":"Atlanta, GA, USA","start":{"date-parts":[[2025,5,19]]},"end":{"date-parts":[[2025,5,23]]}},"container-title":["2025 IEEE International Conference on Robotics and Automation (ICRA)"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx8\/11127273\/11127223\/11127800.pdf?arnumber=11127800","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,9,3]],"date-time":"2025-09-03T06:05:58Z","timestamp":1756879558000},"score":1,"resource":{"primary":{"URL":"https:\/\/ieeexplore.ieee.org\/document\/11127800\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,5,19]]},"references-count":56,"URL":"https:\/\/doi.org\/10.1109\/icra55743.2025.11127800","relation":{},"subject":[],"published":{"date-parts":[[2025,5,19]]}}}