{"id":"https://openalex.org/W7126246758","doi":"https://doi.org/10.48550/arxiv.2601.20882","title":"DevOps-Gym: Benchmarking AI Agents in Software DevOps Cycle","display_name":"DevOps-Gym: Benchmarking AI Agents in Software DevOps Cycle","publication_year":2026,"publication_date":"2026-01-27","ids":{"openalex":"https://openalex.org/W7126246758","doi":"https://doi.org/10.48550/arxiv.2601.20882"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2601.20882","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tang, Yuheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tang, Yuheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009692602","display_name":"Kaijie Zhu","orcid":"https://orcid.org/0000-0002-3235-188X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Kaijie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061371645","display_name":"Bonan Ruan","orcid":"https://orcid.org/0009-0004-5500-6060"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruan, Bonan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124392734","display_name":"Chuqi Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Chuqi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124316831","display_name":"Michael Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Michael","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124453676","display_name":"Hongwei Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Hongwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124365599","display_name":"Suyue Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Suyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Shi, Tianneng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shi, Tianneng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100607170","display_name":"Zekun Li","orcid":"https://orcid.org/0009-0000-7124-4334"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Zekun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022177364","display_name":"Christopher Kruegel","orcid":"https://orcid.org/0000-0001-5140-3414"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kruegel, Christopher","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075685499","display_name":"Giovanni Vigna","orcid":"https://orcid.org/0000-0002-3422-5369"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vigna, Giovanni","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019426968","display_name":"Dawn Song","orcid":"https://orcid.org/0000-0001-9745-6802"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Song, Dawn","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Wang, William Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, William Yang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124358696","display_name":"Lun Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Lun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064527453","display_name":"Yangruibo Ding","orcid":"https://orcid.org/0000-0003-3224-6876"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ding, Yangruibo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084611756","display_name":"Zhenkai Liang","orcid":"https://orcid.org/0000-0001-7138-5030"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liang, Zhenkai","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5124332178","display_name":"Wenbo Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Wenbo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":17,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.6650000214576721,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.6650000214576721,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.09929999709129333,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10639","display_name":"Advanced Software Engineering Methodologies","score":0.06930000334978104,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/devops","display_name":"DevOps","score":0.941100001335144},{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5605999827384949},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.5450000166893005},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.5198000073432922},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.45249998569488525},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.42820000648498535},{"id":"https://openalex.org/keywords/software-development","display_name":"Software development","score":0.39489999413490295},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3596999943256378}],"concepts":[{"id":"https://openalex.org/C9903902","wikidata":"https://www.wikidata.org/wiki/Q3025536","display_name":"DevOps","level":3,"score":0.941100001335144},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7077999711036682},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.6717000007629395},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5605999827384949},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.5450000166893005},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5198000073432922},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.45249998569488525},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.42820000648498535},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.39489999413490295},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3596999943256378},{"id":"https://openalex.org/C152752567","wikidata":"https://www.wikidata.org/wiki/Q116877","display_name":"Code refactoring","level":3,"score":0.3555000126361847},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3271999955177307},{"id":"https://openalex.org/C180152950","wikidata":"https://www.wikidata.org/wiki/Q2904257","display_name":"Software development process","level":4,"score":0.32260000705718994},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C122944926","wikidata":"https://www.wikidata.org/wiki/Q209711","display_name":"Extreme programming","level":5,"score":0.2978000044822693},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.29420000314712524},{"id":"https://openalex.org/C135945739","wikidata":"https://www.wikidata.org/wiki/Q1211457","display_name":"Software release life cycle","level":5,"score":0.2777000069618225},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.26600000262260437},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C2982740150","wikidata":"https://www.wikidata.org/wiki/Q5249230","display_name":"Design cycle","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C117447612","wikidata":"https://www.wikidata.org/wiki/Q1412670","display_name":"Software quality","level":4,"score":0.25099998712539673}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2601.20882","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2601.20882","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.20882","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2601.20882","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.5766469836235046,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Even":[0],"though":[1],"demonstrating":[2],"extraordinary":[3],"capabilities":[4,14],"in":[5,15,33,104,121,146,172],"code":[6,27],"generation":[7,145],"and":[8,39,52,63,66,87,92,106,117,126,133,143,148,150,160,162],"software":[9,18],"issue":[10,90,141],"resolving,":[11,91],"AI":[12,80,179],"agents'":[13],"the":[16,30,74,123,167,174],"full":[17,175],"DevOps":[19,31,84,176],"cycle":[20,32,177],"are":[21],"still":[22],"unknown.":[23],"Different":[24],"from":[25,101],"pure":[26],"generation,":[28],"handling":[29],"real-world":[34,98],"software,":[35],"including":[36],"developing,":[37],"deploying,":[38],"managing,":[40],"requires":[41],"analyzing":[42],"large-scale":[43],"projects,":[44],"understanding":[45],"dynamic":[46],"program":[47],"behaviors,":[48],"leveraging":[49],"domain-specific":[50],"tools,":[51],"making":[53],"sequential":[54],"decisions.":[55],"However,":[56],"existing":[57],"benchmarks":[58],"focus":[59],"on":[60],"isolated":[61],"problems":[62],"lack":[64],"environments":[65],"tool":[67],"interfaces":[68],"for":[69,78,169],"DevOps.":[70],"We":[71,108],"introduce":[72],"DevOps-Gym,":[73],"first":[75],"end-to-end":[76],"benchmark":[77],"evaluating":[79],"agents":[81,134],"across":[82],"core":[83],"workflows:":[85],"build":[86,161],"configuration,":[88],"monitoring,":[89],"test":[93,144],"generation.":[94],"DevOps-Gym":[95],"includes":[96],"700+":[97],"tasks":[99,156],"collected":[100],"30+":[102],"projects":[103],"Java":[105,147],"Go.":[107],"develop":[109],"a":[110],"semi-automated":[111],"data":[112],"collection":[113],"mechanism":[114],"with":[115,140,178],"rigorous":[116],"non-trivial":[118],"expert":[119],"efforts":[120],"ensuring":[122],"task":[124],"coverage":[125],"quality.":[127],"Our":[128],"evaluation":[129],"of":[130],"state-of-the-art":[131],"models":[132],"reveals":[135],"fundamental":[136],"limitations:":[137],"they":[138],"struggle":[139],"resolving":[142],"Go,":[149],"remain":[151],"unable":[152],"to":[153],"handle":[154],"new":[155],"such":[157],"as":[158],"monitoring":[159],"configuration.":[163],"These":[164],"results":[165],"highlight":[166],"need":[168],"essential":[170],"research":[171],"automating":[173],"agents.":[180]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-01T00:00:00"}
