@conference {Vasilescu:2016:SLM:2884781.2884875, title = {The Sky is Not the Limit: Multitasking Across GitHub Projects}, booktitle = {Proceedings of the 38th International Conference on Software Engineering (ICSE 2016)}, series = {ICSE {\textquoteright}16}, year = {2016}, pages = {994{\textendash}1005}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software development has always inherently required multitasking: developers switch between coding, reviewing, testing, designing, and meeting with colleagues. The advent of software ecosystems like GitHub has enabled something new: the ability to easily switch between projects. Developers also have social incentives to contribute to many projects; prolific contributors gain social recognition and (eventually) economic rewards. Multitasking, however, comes at a cognitive cost: frequent context-switches can lead to distraction, sub-standard work, and even greater stress. In this paper, we gather ecosystem-level data on a group of programmers working on a large collection of projects. We develop models and methods for measuring the rate and breadth of a developers{\textquoteright} context-switching behavior, and we study how context-switching affects their productivity. We also survey developers to understand the reasons for and perceptions of multitasking. We find that the most common reason for multitasking is interrelationships and dependencies between projects. Notably, we find that the rate of switching and breadth (number of projects) of a developer{\textquoteright}s work matter. Developers who work on many projects have higher productivity if they focus on few projects per day. Developers that switch projects too much during the course of a day have lower productivity as they work on more projects overall. Despite these findings, developers perceptions of the benefits of multitasking are varied. }, keywords = {github, multitasking, productivity}, isbn = {978-1-4503-3900-1}, doi = {10.1145/2884781.2884875}, url = {http://doi.acm.org/10.1145/2884781.2884875}, author = {Vasilescu, Bogdan and Blincoe, Kelly and Xuan, Qi and Casalnuovo, Casey and Damian, Daniela and Devanbu, Premkumar and Filkov, Vladimir} } @conference {blincoeMSR15, title = {Ecosystems in GitHub and a Method for Ecosystem Identification using Reference Coupling}, booktitle = {12th Working Conference on Mining Software Repositories}, series = {MSR}, year = {2015}, month = {05/2015}, publisher = {IEEE}, organization = {IEEE}, abstract = {Software projects are not developed in isolation. Recent research has shifted to studying software ecosystems, communities of projects that depend on each other and are developed together. However, identifying technical dependencies at the ecosystem level can be challenging. In this paper, we propose a new method, known as reference coupling, for detecting technical dependencies between projects. The method establishes dependencies through user-specified cross-references between projects. We use our method to identify ecosystems in GitHubhosted projects, and we identify several characteristics of the identified ecosystems. We find that most ecosystems are centered around one project and are interconnected with other ecosystems. The predominant type of ecosystems are those that develop tools to support software development. We also found that the project owners{\textquoteright} social behaviour aligns well with the technical dependencies within the ecosystem, but project contributors{\textquoteright} social behaviour does not align with these dependencies. We conclude with a discussion on future research that is enabled by our reference coupling method.}, url = {http://kblincoe.github.io/publications/2015_MSR_Ecosystems_CameraReady.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/2015_MSR_Ecosystems_CameraReady.pdf}, author = {Blincoe, Kelly and Harrison, Francis and Damian, Daniela} } @inbook {1733, title = {Implicit Coordination: A Case Study of the Rails OSS Project}, booktitle = {Open Source Systems: Adoption and Impact}, series = {IFIP Advances in Information and Communication Technology}, volume = {451}, year = {2015}, pages = {35-44}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {Previous studies on coordination in OSS projects have studied explicit communication. Research has theorized on the existence of coordination without direct communication or implicit coordination in OSS projects, suggesting that it contributes to their success. However, due to the intangible nature of implicit coordination, no studies have confirmed these theories. We describe how implicit coordination can now be measured in modern collaborative development environments. Through a case study of a popular OSS GitHub-hosted project, we report on how and why features that support implicit coordination are used. }, isbn = {978-3-319-17836-3}, doi = {10.1007/978-3-319-17837-0_4}, url = {http://dx.doi.org/10.1007/978-3-319-17837-0_4}, author = {Blincoe, Kelly and Damian, Daniela}, editor = {Damiani, Ernesto and Frati, Fulvio and Dirk Riehle and Wasserman, Anthony I.} } @article {1796, title = {An in-depth study of the promises and perils of mining GitHub}, journal = {Empirical Software Engineering}, year = {2015}, publisher = {Springer}, abstract = {With over 10 million git repositories, GitHub is becoming one of the most important sources of software artifacts on the Internet. Researchers mine the information stored in GitHub{\textquoteright}s event logs to understand how its users employ the site to collaborate on software, but so far there have been no studies describing the quality and properties of the available GitHub data. We document the results of an empirical study aimed at understanding the characteristics of the repositories and users in GitHub; we see how users take advantage of GitHub{\textquoteright}s main features and how their activity is tracked on GitHub and related datasets to point out misalignment between the real and mined data. Our results indicate that while GitHub is a rich source of data on software development, mining GitHub for research purposes should take various potential perils into consideration. For example, we show that the majority of the projects are personal and inactive, and that almost 40\% of all pull requests do not appear as merged even though they were. Also, approximately half of GitHub{\textquoteright}s registered users do not have public activity, while the activity of GitHub users in repositories is not always easy to pinpoint. We use our identified perils to see if they can pose validity threats; we review selected papers from the MSR 2014 Mining Challenge and see if there are potential impacts to consider. We provide a set of recommendations for software engineering researchers on how to approach the data in GitHub.}, keywords = {github}, issn = {1573-7616}, doi = {10.1007/s10664-015-9393-5}, url = {http://www.gousios.gr/pub/promises-perils-github-extended.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/promises-perils-github-extended.pdf}, author = {Kalliamvakou, Eirini and Gousios, Georgios and Blincoe, Kelly and Singer, Leif and Daniel M. German and Damian, Daniela} } @proceedings {1726, title = {Open Source-Style Collaborative Development Practices in Commercial Projects Using GitHub}, volume = {1}, year = {2015}, month = {05/2015}, pages = {574-585}, publisher = {ACM/IEEE}, abstract = {Researchers are currently drawn to study projects hosted on GitHub due to its popularity, ease of obtaining data, and its distinctive built-in social features. GitHub has been found to create a transparent development environment, which together with a pull request-based workflow, provides a lightweight mechanism for committing, reviewing and managing code changes. These features impact how GitHub is used and the benefits it provides to teams{\textquoteright} development and collaboration. While most of the evidence we have is from GitHub{\textquoteright}s use in open source software (OSS) projects, GitHub is also used in an increasing number of commercial projects. It is unknown how GitHub supports these projects given that GitHub{\textquoteright}s workflow model does not intuitively fit the commercial development way of working. In this paper, we report findings from an online survey and interviews with GitHub users on how GitHub is used for collaboration in commercial projects. We found that many commercial projects adopted practices that are more typical of OSS projects including reduced communication, more independent work, and self-organization. We discuss how GitHub{\textquoteright}s transparency and popular workflow can promote open collaboration, allowing organizations to increase code reuse and promote knowledge sharing across their teams.}, keywords = {github}, doi = {10.1109/ICSE.2015.74}, attachments = {https://flosshub.org/sites/flosshub.org/files/icse-camera.pdf}, author = {Kalliamvakou, E and Damian, Daniela and Blincoe, Kelly and Singer, L. and German, Daniel} } @article {mcdonald2014modeling, title = {MODELING DISTRIBUTED COLLABORATION ON GITHUB}, journal = {Advances in Complex Systems}, year = {2014}, publisher = {World Scientific}, abstract = {In this paper, we apply concepts from Distributed Leadership, a theory suggesting that leadership is shared among members of an organization, to frame models of contribution that we uncover in five relatively successful open source software (OSS) projects hosted on GitHub. In this qualitative, comparative case study, we show how these projects make use of GitHub features such as pull requests (PRs). We find that projects in which member PRs are more frequently merged with the codebase experience more sustained participation. We also find that projects with higher success rates among contributors and higher contributor retention tend to have more distributed (non-centralized) practices for reviewing and processing PRs. The relationships between organizational form and GitHub practices are enabled and made visible as a result of GitHub{\textquoteright}s novel interface. Our results demonstrate specific dimensions along which these projects differ and explicate a framework that warrants testing in future studies of OSS, particularly GitHub.}, author = {McDONALD, NORA and Blincoe, Kelly and PETAKOVIC, EVA and Goggins, Sean} } @conference {Kalliamvakou:2014:PPM:2597073.2597074, title = {The Promises and Perils of Mining GitHub}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {92{\textendash}101}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {With over 10 million git repositories, GitHub is becoming one of the most important source of software artifacts on the Internet. Researchers are starting to mine the information stored in GitHub{\textquoteright}s event logs, trying to understand how its users employ the site to collaborate on software. However, so far there have been no studies describing the quality and properties of the data available from GitHub. We document the results of an empirical study aimed at understanding the characteristics of the repositories in GitHub and how users take advantage of GitHub{\textquoteright}s main features---namely commits, pull requests, and issues. Our results indicate that, while GitHub is a rich source of data on software development, mining GitHub for research purposes should take various potential perils into consideration. We show, for example, that the majority of the projects are personal and inactive; that GitHub is also being used for free storage and as a Web hosting service; and that almost 40\% of all pull requests do not appear as merged, even though they were. We provide a set of recommendations for software engineering researchers on how to approach the data in GitHub. }, keywords = {bias, code reviews, git, github, mining software repositories}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597074}, url = {http://doi.acm.org/10.1145/2597073.2597074}, attachments = {https://flosshub.org/sites/flosshub.org/files/perils.pdf}, author = {Kalliamvakou, Eirini and Gousios, Georgios and Blincoe, Kelly and Singer, Leif and Daniel M. German and Damian, Daniela} } @conference {Sheoran:2014:UWG:2597073.2597114, title = {Understanding "Watchers" on GitHub}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {336{\textendash}339}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Users on GitHub can watch repositories to receive notifications about project activity. This introduces a new type of passive project membership. In this paper, we investigate the behavior of watchers and their contribution to the projects they watch. We find that a subset of project watchers begin contributing to the project and those contributors account for a significant percentage of contributors on the project. As contributors, watchers are more confident and contribute over a longer period of time in a more varied way than other contributors. This is likely attributable to the knowledge gained through project notifications. }, keywords = {github, mining challenge, msr challenge, repositories, Software Teams, Watchers}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597114}, url = {http://doi.acm.org/10.1145/2597073.2597114}, author = {Sheoran, Jyoti and Blincoe, Kelly and Kalliamvakou, Eirini and Damian, Daniela and Ell, Jordan} }