@conference {VGZ15, title = {Automatically Prioritizing Pull Requests}, booktitle = {Proceedings of the 12th International Conference on Mining Software Repositories}, year = {2015}, month = {05/2015}, publisher = {IEEE}, organization = {IEEE}, abstract = {In previous work, we observed that in the pull-based development model integrators face challenges with regard to prioritizing work in the face of multiple concurrent pull requests. We present the design and initial implementation of a prototype pull request prioritisation tool called PRioritizer. PRioritizer works like a priority inbox for pull requests, recommending the top pull requests the project owner should focus on. A preliminary user study showed that PRioritizer provides functionality that GitHub is currently lacking, even though users need more insight into how the priority ranking is established to make PRioritizer really useful.}, url = {http://www.gousios.gr/pub/prioritizer.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/prioritizer.pdf}, author = {van der Veen, Erik and Gousios, Georgios and Zaidman, Andy} } @conference {Gousios:2014:DPD:2597073.2597122, title = {A Dataset for Pull-based Development Research}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {368{\textendash}371}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Pull requests form a new method for collaborating in distributed software development. To study the pull request distributed development model, we constructed a dataset of almost 900 projects and 350,000 pull requests, including some of the largest users of pull requests on Github. In this paper, we describe how the project selection was done, we analyze the selected features and present a machine learning tool set for the R statistics environment. }, keywords = {Distributed software development, Empirical software engineering, msr data showcase, pull request, pull-based development}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597122}, url = {http://doi.acm.org/10.1145/2597073.2597122}, attachments = {https://flosshub.org/sites/flosshub.org/files/pullreqs-dataset.pdf}, author = {Gousios, Georgios and Zaidman, Andy} } @conference {Gousios:2014:LGG:2597073.2597126, title = {Lean GHTorrent: GitHub Data on Demand}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {384{\textendash}387}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In recent years, GitHub has become the largest code host in the world, with more than 5M developers collaborating across 10M repositories. Numerous popular open source projects (such as Ruby on Rails, Homebrew, Bootstrap, Django or jQuery) have chosen GitHub as their host and have migrated their code base to it. GitHub offers a tremendous research potential. For instance, it is a flagship for current open source development, a place for developers to showcase their expertise to peers or potential recruiters, and the platform where social coding features or pull requests emerged. However, GitHub data is, to date, largely underexplored. To facilitate studies of GitHub, we have created GHTorrent, a scalable, queriable, offline mirror of the data offered through the GitHub REST API. In this paper we present a novel feature of GHTorrent designed to offer customisable data dumps on demand. The new GHTorrent data-on-demand service offers users the possibility to request via a web form up-to-date GHTorrent data dumps for any collection of GitHub repositories. We hope that by offering customisable GHTorrent data dumps we will not only lower the "barrier for entry" even further for researchers interested in mining GitHub data (thus encourage researchers to intensify their mining efforts), but also enhance the replicability of GitHub studies (since a snapshot of the data on which the results were obtained can now easily accompany each study). }, keywords = {data on demand, dataset, github, msr data showcase}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597126}, url = {http://doi.acm.org/10.1145/2597073.2597126}, attachments = {https://flosshub.org/sites/flosshub.org/files/lean-ghtorrent_0.pdf}, author = {Gousios, Georgios and Vasilescu, Bogdan and Serebrenik, Alexander and Zaidman, Andy} } @conference {Beller:2014:MCR:2597073.2597082, title = {Modern Code Reviews in Open-source Projects: Which Problems Do They Fix?}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {202{\textendash}211}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Code review is the manual assessment of source code by humans, mainly intended to identify defects and quality problems. Modern Code Review (MCR), a lightweight variant of the code inspections investigated since the 1970s, prevails today both in industry and open-source software (OSS) systems. The objective of this paper is to increase our understanding of the practical benefits that the MCR process produces on reviewed source code. To that end, we empirically explore the problems fixed through MCR in OSS systems. We manually classified over 1,400 changes taking place in reviewed code from two OSS projects into a validated categorization scheme. Surprisingly, results show that the types of changes due to the MCR process in OSS are strikingly similar to those in the industry and academic systems from literature, featuring the similar 75:25 ratio of maintainability-related to functional problems. We also reveal that 7{\textendash}35\% of review comments are discarded and that 10{\textendash}22\% of the changes are not triggered by an explicit review comment. Patterns emerged in the review data; we investigated them revealing the technical factors that influence the number of changes due to the MCR process. We found that bug-fixing tasks lead to fewer changes and tasks with more altered files and a higher code churn have more changes. Contrary to intuition, the person of the reviewer had no impact on the number of changes. }, keywords = {code review, defects, open source software}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597082}, url = {http://doi.acm.org/10.1145/2597073.2597082}, attachments = {https://flosshub.org/sites/flosshub.org/files/beller.pdf}, author = {Beller, Moritz and Bacchelli, Alberto and Zaidman, Andy and Juergens, Elmar} } @conference {957, title = {Assessment of issue handling efficiency}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {94 - 97}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {We mined the issue database of GNOME to assess how issues are handled. How many issues are submitted and resolved? Does the backlog grow or decrease? How fast are issues resolved? Does issue resolution speed increase or decrease over time? In which subproject are issues handled most efficiently? To answer such questions, we apply several visualization and quantification instruments to the raw issue data. In particular, we aggregate issues into four risk categories, based on their resolution time. These categories are the basis both for visualizing and ranking, which are used in concert for issue database exploration.}, keywords = {bug reports, bug tracking, classification, gnome, msr challenge, visualization}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463292}, attachments = {https://flosshub.org/sites/flosshub.org/files/94bluijtenMSR2010.pdf}, author = {Luijten, Bart and Visser, Joost and Zaidman, Andy} } @conference {938, title = {Using association rules to study the co-evolution of production \& test code}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {151 - 154}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Unit tests are generally acknowledged as an important aid to produce high quality code, as they provide quick feedback to developers on the correctness of their code. In order to achieve high quality, well-maintained tests are needed. Ideally, tests co-evolve with the production code to test changes as soon as possible. In this paper, we explore an approach based on association rule mining to determine whether production and test code co-evolve synchronously. Through two case studies, one with an open source and another one with an industrial software system, we show that our association rule mining approach allows one to assess the co-evolution of product and test code in a software project and, moreover, to uncover the distribution of programmer effort over pure coding, pure testing, or a more test-driven-like practice.}, keywords = {association rules, checkstyle, source code, unit test}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069493}, attachments = {https://flosshub.org/sites/flosshub.org/files/151UsingAssociation.pdf}, author = {Lubsen, Zeeger and Zaidman, Andy and Pinzger, Martin} }