{"id":"https://openalex.org/W4416034211","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.1015","title":"MT-R1-Zero: Advancing LLM-based Machine Translation via R1-Zero-like Reinforcement Learning","display_name":"MT-R1-Zero: Advancing LLM-based Machine Translation via R1-Zero-like Reinforcement Learning","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416034211","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.1015"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.1015","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.1015","pdf_url":"https://aclanthology.org/2025.findings-emnlp.1015.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-emnlp.1015.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078947610","display_name":"Zhaopeng Feng","orcid":"https://orcid.org/0000-0002-6396-3184"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhaopeng Feng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003340183","display_name":"Shaosheng Cao","orcid":"https://orcid.org/0000-0002-3795-8824"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shaosheng Cao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Jiahan Ren","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiahan Ren","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069491635","display_name":"Jianwen Su","orcid":"https://orcid.org/0000-0002-4637-1339"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiayuan Su","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103263792","display_name":"Ruizhe Chen","orcid":"https://orcid.org/0000-0001-5537-0082"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruizhe Chen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019954538","display_name":"Yan Zhang","orcid":"https://orcid.org/0000-0002-2598-8321"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yan Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110293836","display_name":"Jian Wu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jian Wu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5024343415","display_name":"Zuozhu Liu","orcid":"https://orcid.org/0000-0002-7816-502X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zuozhu Liu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":8.7939,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.97549357,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"18685","last_page":"18702"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5863999724388123,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5863999724388123,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.039000000804662704,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.03009999915957451,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.42579999566078186},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.31929999589920044},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.3179999887943268},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.302700012922287},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.2874999940395355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6220999956130981},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5332000255584717},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.42579999566078186},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.31929999589920044},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.3179999887943268},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.302700012922287},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.2874999940395355},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.273499995470047},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.267300009727478},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.2651999890804291},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2597000002861023},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-emnlp.1015","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.1015","pdf_url":"https://aclanthology.org/2025.findings-emnlp.1015.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.1015","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.1015","pdf_url":"https://aclanthology.org/2025.findings-emnlp.1015.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G4474290519","display_name":null,"funder_award_id":"LZ23F020008","funder_id":"https://openalex.org/F4320338464","funder_display_name":"Natural Science Foundation of Zhejiang Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320338464","display_name":"Natural Science Foundation of Zhejiang Province","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416034211.pdf","grobid_xml":"https://content.openalex.org/works/W4416034211.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large-scale":[0],"reinforcement":[1],"learning":[2],"(RL)":[3],"methods":[4],"have":[5],"proven":[6],"highly":[7],"effective":[8],"in":[9],"enhancing":[10],"the":[11,57,62,89,132,171,186],"reasoning":[12,183],"abilities":[13],"of":[14,61,104,114,158,174],"large":[15],"language":[16],"models":[17,126],"(LLMs),":[18],"particularly":[19],"for":[20,66,189],"tasks":[21],"with":[22,47,123],"verifiable":[23],"solutions":[24],"such":[25,127],"as":[26,128],"mathematics":[27],"and":[28,42,130,154,164,181],"coding.However,":[29],"applying":[30],"this":[31,52],"idea":[32],"to":[33,44,79],"machine":[34],"translation":[35,84],"(MT),":[36],"where":[37],"outputs":[38],"are":[39],"flexibly":[40],"formatted":[41],"difficult":[43],"automatically":[45],"evaluate":[46],"explicit":[48],"rules,":[49],"remains":[50],"underexplored.In":[51],"work,":[53],"we":[54],"introduce":[55],"MT-R1-Zero,":[56],"first":[58],"open-source":[59],"adaptation":[60],"R1-Zero":[63,187],"RL":[64],"framework":[65],"MT":[67,149],"without":[68],"supervised":[69],"fine-tuning":[70],"or":[71],"cold-start.We":[72],"propose":[73],"a":[74,110],"rule-metric":[75],"mixed":[76],"reward":[77,165,175],"mechanism":[78],"guide":[80],"LLMs":[81],"towards":[82],"improved":[83],"quality":[85],"via":[86],"emergent":[87,182],"reasoning.On":[88],"WMT":[90],"24":[91],"English-Chinese":[92],"benchmark,":[93],"our":[94,107,141],"MT-R1-Zero-3B-Mix":[95],"achieves":[96,135],"competitive":[97],"performance,":[98],"surpassing":[99],"TowerInstruct-7B-v0.2":[100],"by":[101],"an":[102],"average":[103,112],"1.26":[105],"points.Meanwhile,":[106],"MT-R1-Zero-7B-Mix":[108],"attains":[109],"high":[111],"score":[113],"62.25":[115],"across":[116,161],"all":[117],"metrics,":[118],"placing":[119],"it":[120],"on":[121,138,147],"par":[122],"advanced":[124],"proprietary":[125],"GPT-4o":[129],"Claude-3.5-Sonnet,while":[131],"MT-R1-Zero-7B-Sem":[133],"variant":[134],"state-of-the-art":[136],"scores":[137],"semantic":[139],"metrics.Moreover,":[140],"work":[142],"exhibits":[143],"strong":[144],"generalization":[145],"capabilities":[146],"outof-distribution":[148],"tasks,":[150],"robustly":[151],"supporting":[152],"multilingual":[153],"low-resource":[155],"settings.Extensive":[156],"analysis":[157],"model":[159],"behavior":[160],"different":[162],"initializations":[163],"metrics":[166],"offers":[167],"pioneering":[168],"insight":[169],"into":[170],"critical":[172],"role":[173],"design,":[176],"LLM":[177],"adaptability,":[178],"training":[179],"dynamics,":[180],"patterns":[184],"within":[185],"paradigm":[188],"MT.":[190]},"counts_by_year":[{"year":2026,"cited_by_count":5}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-08T00:00:00"}
