{"id":"https://openalex.org/W7126432811","doi":"https://doi.org/10.18653/v1/2024.findings-eacl.152","title":"DialogStudio: Towards Richest and Most Diverse Unified Dataset Collection for Conversational AI","display_name":"DialogStudio: Towards Richest and Most Diverse Unified Dataset Collection for Conversational AI","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W7126432811","doi":"https://doi.org/10.18653/v1/2024.findings-eacl.152"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2024.findings-eacl.152","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-eacl.152","pdf_url":"https://aclanthology.org/2024.findings-eacl.152.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EACL 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2024.findings-eacl.152.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124687345","display_name":"Jianguo Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianguo Zhang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124565416","display_name":"Kun Qian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kun Qian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124676625","display_name":"Zhiwei Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhiwei Liu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063103006","display_name":"Shelby Heinecke","orcid":"https://orcid.org/0000-0002-8831-0753"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shelby Heinecke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124558300","display_name":"Rui Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rui Meng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124485820","display_name":"Ye Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ye Liu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124476630","display_name":"Zhou Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou Yu","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124661791","display_name":"Huan Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huan Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Silvio Savarese","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Silvio Savarese","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5114452208","display_name":"Caiming Xiong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Caiming Xiong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6082,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.76699102,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"2299","last_page":"2315"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2409999966621399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2409999966621399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.20900000631809235,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12128","display_name":"AI in Service Interactions","score":0.10440000146627426,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/data-collection","display_name":"Data collection","score":0.3668999969959259},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.31450000405311584},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.2646999955177307},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.25870001316070557},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.25540000200271606}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5688999891281128},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45320001244544983},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42320001125335693},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.3668999969959259},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.31450000405311584},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.29670000076293945},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.2646999955177307},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25870001316070557},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.25540000200271606},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.23839999735355377}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2024.findings-eacl.152","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-eacl.152","pdf_url":"https://aclanthology.org/2024.findings-eacl.152.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EACL 2024","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2024.findings-eacl.152","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-eacl.152","pdf_url":"https://aclanthology.org/2024.findings-eacl.152.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics: EACL 2024","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.5417900681495667,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7126432811.pdf","grobid_xml":"https://content.openalex.org/works/W7126432811.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"advancements":[1],"in":[2,119],"conversational":[3,12,60,109],"AI,":[4],"language":[5,58,142],"models":[6,111,150],"encounter":[7],"challenges":[8],"to":[9,103],"handle":[10],"diverse":[11,34,73],"tasks,":[13],"and":[14,22,32,64,72,78,97,116,122,133,136,149],"existing":[15],"dialogue":[16,37,62,76],"dataset":[17,114,135],"collections":[18],"often":[19],"lack":[20],"diversity":[21],"comprehensiveness.To":[23],"tackle":[24],"these":[25],"issues,":[26],"we":[27,87,107],"introduce":[28],"DialogStudio:":[29],"the":[30,83,89,113,127],"largest":[31],"most":[33],"collection":[35,49],"of":[36,85,129],"datasets,":[38,146],"unified":[39],"under":[40],"a":[41],"consistent":[42],"format":[43],"while":[44],"preserving":[45],"their":[46],"original":[47],"information.Our":[48],"encompasses":[50],"data":[51],"from":[52],"open-domain":[53],"dialogues,":[54,56,66],"task-oriented":[55],"natural":[57],"understanding,":[59],"recommendation,":[61],"summarization,":[63],"knowledge-grounded":[65],"making":[67],"it":[68],"an":[69],"incredibly":[70],"rich":[71],"resource":[74],"for":[75,91,100],"research":[77],"model":[79,143],"training.To":[80],"further":[81],"enhance":[82],"utility":[84],"DialogStudio,":[86],"identify":[88],"licenses":[90],"each":[92],"dataset,":[93],"design":[94],"external":[95],"knowledge":[96],"domain-aware":[98],"prompts":[99],"selected":[101],"dialogues":[102],"facilitate":[104],"instruction-aware":[105],"fine-tuning.Furthermore,":[106],"develop":[108],"AI":[110],"using":[112],"collection,":[115],"our":[117],"experiments":[118],"both":[120],"zero-shot":[121],"few-shot":[123],"learning":[124],"scenarios":[125],"demonstrate":[126],"superiority":[128],"Di-alogStudio.To":[130],"improve":[131],"transparency":[132],"support":[134],"task-based":[137],"research,":[138],"as":[139,141],"well":[140],"pre-training,":[144],"all":[145],"licenses,":[147],"codes,":[148],"associated":[151],"with":[152],"Di-alogStudio":[153],"are":[154],"made":[155],"publicly":[156],"accessible":[157],"1":[158],".":[159]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-06-13T07:54:00.901334","created_date":"2026-02-02T00:00:00"}
