{"id":"https://openalex.org/W4416034870","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.406","title":"MC2: A Minimum-Coverage and Dataset-Agnostic Framework for Compositional Generalization of LLMs on Semantic Parsing","display_name":"MC2: A Minimum-Coverage and Dataset-Agnostic Framework for Compositional Generalization of LLMs on Semantic Parsing","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416034870","doi":"https://doi.org/10.18653/v1/2025.findings-emnlp.406"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.406","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.406","pdf_url":"https://aclanthology.org/2025.findings-emnlp.406.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.findings-emnlp.406.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031230037","display_name":"Ziyao Xu","orcid":"https://orcid.org/0000-0002-7298-004X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ziyao Xu","raw_affiliation_strings":["MOE Key Lab of Computational Linguistics , School of Computer Science , Peking University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MOE Key Lab of Computational Linguistics , School of Computer Science , Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108020812","display_name":"Zhe Yang","orcid":"https://orcid.org/0009-0002-1358-3420"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhe Yang","raw_affiliation_strings":["MOE Key Lab of Computational Linguistics , School of Computer Science , Peking University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MOE Key Lab of Computational Linguistics , School of Computer Science , Peking University","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5025565222","display_name":"Houfeng Wang","orcid":"https://orcid.org/0000-0001-7130-1589"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Houfeng Wang","raw_affiliation_strings":["MOE Key Lab of Computational Linguistics , School of Computer Science , Peking University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"MOE Key Lab of Computational Linguistics , School of Computer Science , Peking University","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15757637,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"7694","last_page":"7706"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5641999840736389,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.5641999840736389,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.10740000009536743,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.04349999874830246,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.652899980545044},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5837000012397766},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.36309999227523804},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.2840999960899353},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.2824000120162964}],"concepts":[{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.652899980545044},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6273000240325928},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6248000264167786},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6014000177383423},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5837000012397766},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.36309999227523804},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.323199987411499},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2824000120162964},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2644999921321869},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.findings-emnlp.406","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.406","pdf_url":"https://aclanthology.org/2025.findings-emnlp.406.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.findings-emnlp.406","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.findings-emnlp.406","pdf_url":"https://aclanthology.org/2025.findings-emnlp.406.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EMNLP 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3893241370","display_name":null,"funder_award_id":"62036001","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416034870.pdf","grobid_xml":"https://content.openalex.org/works/W4416034870.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Compositional":[0],"generalization":[1,37,66,76,114,140],"is":[2,57],"one":[3],"of":[4,29,38,52,141],"the":[5,35,50,126,138,149],"important":[6],"abilities":[7],"that":[8,48,124,132],"large":[9,27],"language":[10],"models":[11],"(LLMs)":[12],"need":[13],"to":[14,33,59,108],"have":[15],"for":[16,63],"semantic":[17,41,80,145],"parsing.Previous":[18],"research":[19],"typically":[20],"relies":[21],"on":[22,40,78,103,143],"datasetspecific":[23],"designs":[24],"or":[25],"a":[26,55,60,97],"number":[28,51],"samples":[30,53,119],"in":[31,54,148],"demonstrations":[32],"improve":[34,137],"compositional":[36,65,75,113,122,139],"LLMs":[39,70,111,142],"parsing.We":[42],"revisit":[43],"this":[44,87],"issue":[45],"and":[46,99,117,129],"find":[47],"when":[49],"demonstration":[56],"limited":[58],"lower":[61],"bound":[62],"achieving":[64],"(minimum-coverage),":[67],"current":[68],"advanced":[69],"cannot":[71],"arbitrarily":[72],"achieve":[73,112],"good":[74],"generically":[77,109],"different":[79,144],"parsing":[81,146],"datasets":[82,147],"without":[83],"dataset-specific":[84],"designs.To":[85],"solve":[86],"problem,":[88],"we":[89],"propose":[90],"Multi-level":[91],"Component":[92],"Composition":[93],"(MC":[94],"2":[95,134],"),":[96],"minimum-coverage":[98,150],"datasetagnostic":[100],"framework":[101],"based":[102],"input":[104],"primitives,":[105],"which":[106],"aims":[107],"help":[110],"by":[115],"selecting":[116],"organizing":[118],"from":[120],"multiple":[121],"levels":[123],"satisfy":[125],"primitive":[127],"coverage.Experiments":[128],"analysis":[130],"show":[131],"MC":[133],"can":[135],"effectively":[136],"setting.":[151]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-08T00:00:00"}
