{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:07:03Z","timestamp":1765339623687,"version":"3.46.0"},"publisher-location":"New York, NY, USA","reference-count":60,"publisher":"ACM","funder":[{"name":"National Natural Science Foundation","award":["62072382"],"award-info":[{"award-number":["62072382"]}]}],"content-domain":{"domain":["dl.acm.org"],"crossmark-restriction":true},"short-container-title":[],"published-print":{"date-parts":[[2025,10,27]]},"DOI":"10.1145\/3746027.3755733","type":"proceedings-article","created":{"date-parts":[[2025,10,25]],"date-time":"2025-10-25T06:55:00Z","timestamp":1761375300000},"page":"10506-10515","update-policy":"https:\/\/doi.org\/10.1145\/crossmark-policy","source":"Crossref","is-referenced-by-count":0,"title":["HairShifter: Consistent and High-Fidelity Video Hair Transfer via Anchor-Guided Animation"],"prefix":"10.1145","author":[{"ORCID":"https:\/\/orcid.org\/0009-0008-4049-3539","authenticated-orcid":false,"given":"Wangzheng","family":"Shi","sequence":"first","affiliation":[{"name":"School of Informatics, Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-4671-6111","authenticated-orcid":false,"given":"Yinglin","family":"Zheng","sequence":"additional","affiliation":[{"name":"School of Informatics, Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-0252-7789","authenticated-orcid":false,"given":"Yuxin","family":"Lin","sequence":"additional","affiliation":[{"name":"Informatics Institution, Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0009-0003-5626-3264","authenticated-orcid":false,"given":"Jianmin","family":"Bao","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0002-5056-0706","authenticated-orcid":false,"given":"Ming","family":"Zeng","sequence":"additional","affiliation":[{"name":"School of Informatics, Xiamen University, Xiamen, China"}],"role":[{"role":"author","vocabulary":"crossref"}]},{"ORCID":"https:\/\/orcid.org\/0000-0003-0588-9331","authenticated-orcid":false,"given":"Dong","family":"Chen","sequence":"additional","affiliation":[{"name":"Microsoft Research Asia, Beijing, China"}],"role":[{"role":"author","vocabulary":"crossref"}]}],"member":"320","published-online":{"date-parts":[[2025,10,27]]},"reference":[{"key":"e_1_3_2_1_1_1","volume-title":"Uniedit: A unified tuning-free framework for video motion and appearance editing. arXiv preprint arXiv:2402.13185","author":"Bai Jianhong","year":"2024","unstructured":"Jianhong Bai, Tianyu He, Yuchi Wang, Junliang Guo, Haoji Hu, Zuozhu Liu, and Jiang Bian. 2024. Uniedit: A unified tuning-free framework for video motion and appearance editing. arXiv preprint arXiv:2402.13185 (2024)."},{"key":"e_1_3_2_1_2_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.502"},{"key":"e_1_3_2_1_3_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02121"},{"key":"e_1_3_2_1_4_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02106"},{"key":"e_1_3_2_1_5_1","volume-title":"Flatten: optical flow-guided attention for consistent text-to-video editing. arXiv preprint arXiv:2310.05922","author":"Cong Yuren","year":"2023","unstructured":"Yuren Cong, Mengmeng Xu, Christian Simon, Shoufa Chen, Jiawei Ren, Yanping Xie, Juan-Manuel Perez-Rua, Bodo Rosenhahn, Tao Xiang, and Sen He. 2023. Flatten: optical flow-guided attention for consistent text-to-video editing. arXiv preprint arXiv:2310.05922 (2023)."},{"key":"e_1_3_2_1_6_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00482"},{"key":"e_1_3_2_1_7_1","volume-title":"Diffusion models beat gans on image synthesis. Advances in neural information processing systems","author":"Dhariwal Prafulla","year":"2021","unstructured":"Prafulla Dhariwal and Alexander Nichol. 2021. Diffusion models beat gans on image synthesis. Advances in neural information processing systems, Vol. 34 (2021), 8780-8794."},{"key":"e_1_3_2_1_8_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00812"},{"key":"e_1_3_2_1_9_1","volume-title":"Tokenflow: Consistent diffusion features for consistent video editing. arXiv preprint arXiv:2307.10373","author":"Geyer Michal","year":"2023","unstructured":"Michal Geyer, Omer Bar-Tal, Shai Bagon, and Tali Dekel. 2023. Tokenflow: Consistent diffusion features for consistent video editing. arXiv preprint arXiv:2307.10373 (2023)."},{"key":"e_1_3_2_1_10_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.00728"},{"key":"e_1_3_2_1_11_1","volume-title":"Liveportrait: Efficient portrait animation with stitching and retargeting control. arXiv preprint arXiv:2407.03168","author":"Guo Jianzhu","year":"2024","unstructured":"Jianzhu Guo, Dingyun Zhang, Xiaoqiang Liu, Zhizhou Zhong, Yuan Zhang, Pengfei Wan, and Di Zhang. 2024. Liveportrait: Efficient portrait animation with stitching and retargeting control. arXiv preprint arXiv:2407.03168 (2024)."},{"key":"e_1_3_2_1_12_1","volume-title":"Animatediff: Animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725","author":"Guo Yuwei","year":"2023","unstructured":"Yuwei Guo, Ceyuan Yang, Anyi Rao, Zhengyang Liang, Yaohui Wang, Yu Qiao, Maneesh Agrawala, Dahua Lin, and Bo Dai. 2023. Animatediff: Animate your personalized text-to-image diffusion models without specific tuning. arXiv preprint arXiv:2307.04725 (2023)."},{"key":"e_1_3_2_1_13_1","volume-title":"Face-Adapter for Pre-trained Diffusion Models with Fine-Grained ID and Attribute Control. In European Conference on Computer Vision. Springer, 20-36","author":"Han Yue","year":"2024","unstructured":"Yue Han, Junwei Zhu, Keke He, Xu Chen, Yanhao Ge, Wei Li, Xiangtai Li, Jiangning Zhang, Chengjie Wang, and Yong Liu. 2024. Face-Adapter for Pre-trained Diffusion Models with Fine-Grained ID and Attribute Control. In European Conference on Computer Vision. Springer, 20-36."},{"key":"e_1_3_2_1_14_1","volume-title":"Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems","author":"Heusel Martin","year":"2017","unstructured":"Martin Heusel, Hubert Ramsauer, Thomas Unterthiner, Bernhard Nessler, and Sepp Hochreiter. 2017. Gans trained by a two time-scale update rule converge to a local nash equilibrium. Advances in neural information processing systems, Vol. 30 (2017)."},{"key":"e_1_3_2_1_15_1","volume-title":"Denoising diffusion probabilistic models. Advances in neural information processing systems","author":"Ho Jonathan","year":"2020","unstructured":"Jonathan Ho, Ajay Jain, and Pieter Abbeel. 2020. Denoising diffusion probabilistic models. Advances in neural information processing systems, Vol. 33 (2020), 6840-6851."},{"key":"e_1_3_2_1_16_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02108"},{"key":"e_1_3_2_1_17_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00339"},{"key":"e_1_3_2_1_18_1","volume-title":"Introvae: Introspective variational autoencoders for photographic image synthesis. Advances in neural information processing systems","author":"Huang Huaibo","year":"2018","unstructured":"Huaibo Huang, Ran He, Zhenan Sun, Tieniu Tan, et al., 2018. Introvae: Introspective variational autoencoders for photographic image synthesis. Advances in neural information processing systems, Vol. 31 (2018)."},{"key":"e_1_3_2_1_19_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52733.2024.02060"},{"key":"e_1_3_2_1_20_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2017.632"},{"key":"e_1_3_2_1_21_1","volume-title":"Ground-a-video: Zero-shot grounded video editing using text-to-image diffusion models. arXiv preprint arXiv:2310.01107","author":"Jeong Hyeonho","year":"2023","unstructured":"Hyeonho Jeong and Jong Chul Ye. 2023. Ground-a-video: Zero-shot grounded video editing using text-to-image diffusion models. arXiv preprint arXiv:2310.01107 (2023)."},{"key":"e_1_3_2_1_22_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00453"},{"key":"e_1_3_2_1_23_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.00832"},{"key":"e_1_3_2_1_24_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-19790-1_12"},{"key":"e_1_3_2_1_25_1","first-page":"22438","article-title":"Implicit warping for animation with image sets","volume":"35","author":"Mallya Arun","year":"2022","unstructured":"Arun Mallya, Ting-Chun Wang, and Ming-Yu Liu. 2022. Implicit warping for animation with image sets. Advances in Neural Information Processing Systems, Vol. 35 (2022), 22438-22450.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_26_1","first-page":"45600","article-title":"Hairfastgan: Realistic and robust hair transfer with a fast encoder-based approach","volume":"37","author":"Nikolaev Maxim","year":"2024","unstructured":"Maxim Nikolaev, Mikhail Kuznetsov, Dmitry P Vetrov, and Aibek Alanov. 2024. Hairfastgan: Realistic and robust hair transfer with a fast encoder-based approach. Advances in Neural Information Processing Systems, Vol. 37 (2024), 45600-45635.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_27_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR.2019.00244"},{"key":"e_1_3_2_1_28_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.01460"},{"key":"e_1_3_2_1_29_1","volume-title":"International conference on machine learning. PmLR, 8748-8763","author":"Radford Alec","year":"2021","unstructured":"Alec Radford, Jong Wook Kim, Chris Hallacy, Aditya Ramesh, Gabriel Goh, Sandhini Agarwal, Girish Sastry, Amanda Askell, Pamela Mishkin, Jack Clark, et al., 2021. Learning transferable visual models from natural language supervision. In International conference on machine learning. PmLR, 8748-8763."},{"key":"e_1_3_2_1_30_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01042"},{"key":"e_1_3_2_1_31_1","doi-asserted-by":"publisher","DOI":"10.1109\/TIP.2006.881959"},{"key":"e_1_3_2_1_32_1","volume-title":"First order motion model for image animation. Advances in neural information processing systems","author":"Siarohin Aliaksandr","year":"2019","unstructured":"Aliaksandr Siarohin, St\u00e9phane Lathuili\u00e8re, Sergey Tulyakov, Elisa Ricci, and Nicu Sebe. 2019. First order motion model for image animation. Advances in neural information processing systems, Vol. 32 (2019)."},{"key":"e_1_3_2_1_33_1","volume-title":"Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556","author":"Simonyan Karen","year":"2014","unstructured":"Karen Simonyan and Andrew Zisserman. 2014. Very deep convolutional networks for large-scale image recognition. arXiv preprint arXiv:1409.1556 (2014)."},{"key":"e_1_3_2_1_34_1","volume-title":"Michigan: multi-input-conditioned hair image generation for portrait editing. arXiv preprint arXiv:2010.16417","author":"Tan Zhentao","year":"2020","unstructured":"Zhentao Tan, Menglei Chai, Dongdong Chen, Jing Liao, Qi Chu, Lu Yuan, Sergey Tulyakov, and Nenghai Yu. 2020. Michigan: multi-input-conditioned hair image generation for portrait editing. arXiv preprint arXiv:2010.16417 (2020)."},{"key":"e_1_3_2_1_35_1","unstructured":"Gemini Team Rohan Anil Sebastian Borgeaud Jean-Baptiste Alayrac Jiahui Yu Radu Soricut Johan Schalkwyk Andrew M Dai Anja Hauth Katie Millican et al. 2023. Gemini: a family of highly capable multimodal models. arXiv preprint arXiv:2312.11805 (2023)."},{"key":"e_1_3_2_1_36_1","volume-title":"Karol Kurach, Raphael Marinier, Marcin Michalski, and Sylvain Gelly.","author":"Unterthiner Thomas","year":"2018","unstructured":"Thomas Unterthiner, Sjoerd Van Steenkiste, Karol Kurach, Raphael Marinier, Marcin Michalski, and Sylvain Gelly. 2018. Towards accurate generative models of video: A new metric & challenges. arXiv preprint arXiv:1812.01717 (2018)."},{"key":"e_1_3_2_1_37_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR46437.2021.00991"},{"key":"e_1_3_2_1_38_1","volume-title":"Image quality assessment: from error visibility to structural similarity","author":"Wang Zhou","year":"2004","unstructured":"Zhou Wang, Alan C Bovik, Hamid R Sheikh, and Eero P Simoncelli. 2004. Image quality assessment: from error visibility to structural similarity. IEEE transactions on image processing, Vol. 13, 4 (2004), 600-612."},{"key":"e_1_3_2_1_39_1","volume-title":"Aniportrait: Audio-driven synthesis of photorealistic portrait animation. arXiv preprint arXiv:2403.17694","author":"Wei Huawei","year":"2024","unstructured":"Huawei Wei, Zejun Yang, and Zhisheng Wang. 2024. Aniportrait: Audio-driven synthesis of photorealistic portrait animation. arXiv preprint arXiv:2403.17694 (2024)."},{"key":"e_1_3_2_1_40_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01754"},{"key":"e_1_3_2_1_41_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.02156"},{"key":"e_1_3_2_1_42_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00701"},{"key":"e_1_3_2_1_43_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00419"},{"key":"e_1_3_2_1_44_1","doi-asserted-by":"publisher","DOI":"10.1145\/3641519.3657459"},{"key":"e_1_3_2_1_45_1","volume-title":"Easyanimate: A high-performance long video generation method based on transformer architecture. arXiv preprint arXiv:2405.18991","author":"Xu Jiaqi","year":"2024","unstructured":"Jiaqi Xu, Xinyi Zou, Kunzhe Huang, Yunkuo Chen, Bo Liu, MengLi Cheng, Xing Shi, and Jun Huang. 2024. Easyanimate: A high-performance long video generation method based on transformer architecture. arXiv preprint arXiv:2405.18991 (2024)."},{"key":"e_1_3_2_1_46_1","doi-asserted-by":"publisher","DOI":"10.1145\/3581783.3612200"},{"key":"e_1_3_2_1_47_1","volume-title":"Megactor: Harness the power of raw video for vivid portrait animation. arXiv preprint arXiv:2405.20851","author":"Yang Shurong","year":"2024","unstructured":"Shurong Yang, Huadong Li, Juhao Wu, Minhao Jing, Linze Li, Renhe Ji, Jiajun Liang, and Haoqiang Fan. 2024. Megactor: Harness the power of raw video for vivid portrait animation. arXiv preprint arXiv:2405.20851 (2024)."},{"key":"e_1_3_2_1_48_1","doi-asserted-by":"publisher","DOI":"10.1145\/3610548.3618160"},{"key":"e_1_3_2_1_49_1","volume-title":"VideoGrain: Modulating Space-Time Attention for Multi-grained Video Editing. arXiv preprint arXiv:2502.17258","author":"Yang Xiangpeng","year":"2025","unstructured":"Xiangpeng Yang, Linchao Zhu, Hehe Fan, and Yi Yang. 2025. VideoGrain: Modulating Space-Time Attention for Multi-grained Video Editing. arXiv preprint arXiv:2502.17258 (2025)."},{"key":"e_1_3_2_1_50_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52729.2023.01008"},{"key":"e_1_3_2_1_51_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPRW59228.2023.00070"},{"key":"e_1_3_2_1_52_1","first-page":"5048","article-title":"HairDiffusion: Vivid Multi-Colored Hair Editing via Latent Diffusion","volume":"37","author":"Zeng Yu","year":"2024","unstructured":"Yu Zeng, Yang Zhang, Liu Jiachen, Linlin Shen, Kaijun Deng, Weizhao He, and Jinbao Wang. 2024. HairDiffusion: Vivid Multi-Colored Hair Editing via Latent Diffusion. Advances in Neural Information Processing Systems, Vol. 37 (2024), 5048-5073.","journal-title":"Advances in Neural Information Processing Systems"},{"key":"e_1_3_2_1_53_1","doi-asserted-by":"publisher","DOI":"10.1109\/ICCV51070.2023.00355"},{"key":"e_1_3_2_1_54_1","volume-title":"Controlvideo: Training-free controllable text-to-video generation. arXiv preprint arXiv:2305.13077","author":"Zhang Yabo","year":"2023","unstructured":"Yabo Zhang, Yuxiang Wei, Dongsheng Jiang, Xiaopeng Zhang, Wangmeng Zuo, and Qi Tian. 2023b. Controlvideo: Training-free controllable text-to-video generation. arXiv preprint arXiv:2305.13077 (2023)."},{"key":"e_1_3_2_1_55_1","volume-title":"Stable-hair: Real-world hair transfer via diffusion model. arXiv preprint arXiv:2407.14078.","author":"Zhang Yuxuan","year":"2024","unstructured":"Yuxuan Zhang, Qing Zhang, Yiren Song, Jichao Zhang, Hao Tang, and Jiaming Liu. 2024. Stable-hair: Real-world hair transfer via diffusion model. arXiv preprint arXiv:2407.14078."},{"key":"e_1_3_2_1_56_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.00364"},{"key":"e_1_3_2_1_57_1","doi-asserted-by":"publisher","DOI":"10.1109\/CVPR52688.2022.01814"},{"key":"e_1_3_2_1_58_1","doi-asserted-by":"publisher","DOI":"10.1007\/978-3-031-20071-7_38"},{"key":"e_1_3_2_1_59_1","volume-title":"Barbershop: Gan-based image compositing using segmentation masks. arXiv preprint arXiv:2106.01505","author":"Zhu Peihao","year":"2021","unstructured":"Peihao Zhu, Rameen Abdal, John Femiani, and Peter Wonka. 2021. Barbershop: Gan-based image compositing using segmentation masks. arXiv preprint arXiv:2106.01505 (2021)."},{"key":"e_1_3_2_1_60_1","volume-title":"European Conference on Computer Vision. Springer, 195-211","author":"Zhuang Junhao","year":"2024","unstructured":"Junhao Zhuang, Yanhong Zeng, Wenran Liu, Chun Yuan, and Kai Chen. 2024. A task is worth one word: Learning with task prompts for high-quality versatile image inpainting. In European Conference on Computer Vision. Springer, 195-211."}],"event":{"name":"MM '25: The 33rd ACM International Conference on Multimedia","sponsor":["SIGMM ACM Special Interest Group on Multimedia"],"location":"Dublin Ireland","acronym":"MM '25"},"container-title":["Proceedings of the 33rd ACM International Conference on Multimedia"],"original-title":[],"link":[{"URL":"https:\/\/dl.acm.org\/doi\/pdf\/10.1145\/3746027.3755733","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2025,12,10]],"date-time":"2025-12-10T04:03:41Z","timestamp":1765339421000},"score":1,"resource":{"primary":{"URL":"https:\/\/dl.acm.org\/doi\/10.1145\/3746027.3755733"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2025,10,27]]},"references-count":60,"alternative-id":["10.1145\/3746027.3755733","10.1145\/3746027"],"URL":"https:\/\/doi.org\/10.1145\/3746027.3755733","relation":{},"subject":[],"published":{"date-parts":[[2025,10,27]]},"assertion":[{"value":"2025-10-27","order":3,"name":"published","label":"Published","group":{"name":"publication_history","label":"Publication History"}}]}}