@inbook {1733, title = {Implicit Coordination: A Case Study of the Rails OSS Project}, booktitle = {Open Source Systems: Adoption and Impact}, series = {IFIP Advances in Information and Communication Technology}, volume = {451}, year = {2015}, pages = {35-44}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {Previous studies on coordination in OSS projects have studied explicit communication. Research has theorized on the existence of coordination without direct communication or implicit coordination in OSS projects, suggesting that it contributes to their success. However, due to the intangible nature of implicit coordination, no studies have confirmed these theories. We describe how implicit coordination can now be measured in modern collaborative development environments. Through a case study of a popular OSS GitHub-hosted project, we report on how and why features that support implicit coordination are used. }, isbn = {978-3-319-17836-3}, doi = {10.1007/978-3-319-17837-0_4}, url = {http://dx.doi.org/10.1007/978-3-319-17837-0_4}, author = {Blincoe, Kelly and Damian, Daniela}, editor = {Damiani, Ernesto and Frati, Fulvio and Dirk Riehle and Wasserman, Anthony I.} } @article {1796, title = {An in-depth study of the promises and perils of mining GitHub}, journal = {Empirical Software Engineering}, year = {2015}, publisher = {Springer}, abstract = {With over 10 million git repositories, GitHub is becoming one of the most important sources of software artifacts on the Internet. Researchers mine the information stored in GitHub{\textquoteright}s event logs to understand how its users employ the site to collaborate on software, but so far there have been no studies describing the quality and properties of the available GitHub data. We document the results of an empirical study aimed at understanding the characteristics of the repositories and users in GitHub; we see how users take advantage of GitHub{\textquoteright}s main features and how their activity is tracked on GitHub and related datasets to point out misalignment between the real and mined data. Our results indicate that while GitHub is a rich source of data on software development, mining GitHub for research purposes should take various potential perils into consideration. For example, we show that the majority of the projects are personal and inactive, and that almost 40\% of all pull requests do not appear as merged even though they were. Also, approximately half of GitHub{\textquoteright}s registered users do not have public activity, while the activity of GitHub users in repositories is not always easy to pinpoint. We use our identified perils to see if they can pose validity threats; we review selected papers from the MSR 2014 Mining Challenge and see if there are potential impacts to consider. We provide a set of recommendations for software engineering researchers on how to approach the data in GitHub.}, keywords = {github}, issn = {1573-7616}, doi = {10.1007/s10664-015-9393-5}, url = {http://www.gousios.gr/pub/promises-perils-github-extended.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/promises-perils-github-extended.pdf}, author = {Kalliamvakou, Eirini and Gousios, Georgios and Blincoe, Kelly and Singer, Leif and Daniel M. German and Damian, Daniela} }