@conference {DBLP:conf/msr/BirdRBHGD09, title = {The promises and perils of mining git}, booktitle = {Proceedings of the 6th International Working Conference on Mining Software Repositories, MSR 2009}, year = {2009}, pages = {1-10}, abstract = {We are now witnessing the rapid growth of decentralized source code management (DSCM) systems, in which every developer has her own repository. DSCMs facilitate a style of collaboration in which work output can flow sideways (and privately) between collaborators, rather than always up and down (and publicly) via a central repository. Decentralization comes with both the promise of new data and the peril of its misinterpretation. We focus on git, a very popular DSCM used in high-profile projects. Decentralization, and other features of git, such as automatically recorded contributor attribution, lead to richer content histories, giving rise to new questions such as "How do contributions flow between developers to the official project repository?" However, there are pitfalls. Commits may be reordered, deleted, or edited as they move between repositories. The semantics of terms common to SCMs and DSCMs sometimes differ markedly, potentially creating confusion. For example, a commit is immediately visible to all developers in centralized SCMs, but not in DSCMs. Our goal is to help researchers interested in DSCMs avoid these and other perils when mining and analyzing git data.}, keywords = {dscm, git, mining, scm, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/1promisePeril.pdf}, author = {Christian Bird and Peter C. Rigby and Earl T. Barr and David J. Hamilton and Daniel M. Germ{\'a}n and Premkumar T. Devanbu} } @conference {Rigby:2008:OSS:1368088.1368162, title = {Open source software peer review practices: a case study of the apache server}, booktitle = {Proceedings of the 30th International Conference on Software Engineering (ICSE 2008)}, series = {ICSE {\textquoteright}08}, year = {2008}, note = {http://faculty.salisbury.edu/~xswang/Research/Papers/SERelated/testing/p541-rigby.pdf}, pages = {541{\textendash}550}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Peer review is seen as an important quality assurance mechanism in both industrial development and the open source software (OSS) community. The techniques for performing inspections have been well studied in industry; in OSS development, peer reviews are less well understood. We examine the two peer review techniques used by the successful, mature Apache server project: review-then-commit and commit-then-review. Using archival records of email discussion and version control repositories, we construct a series of metrics that produces measures similar to those used in traditional inspection experiments. Specifically, we measure the frequency of review, the level of participation in reviews, the size of the artifact under review, the calendar time to perform a review, and the number of reviews that find defects. We provide a comparison of the two Apache review techniques as well as a comparison of Apache review to inspection in an industrial project. We conclude that Apache reviews can be described as (1) early, frequent reviews (2) of small, independent, complete contributions (3) conducted asynchronously by a potentially large, but actually small, group of self-selected experts (4) leading to an efficient and effective peer review technique.}, keywords = {apache, cvs, email, inspection, mining software repositories (email), open source software, peer review, version control}, isbn = {978-1-60558-079-1}, doi = {10.1145/1368088.1368162}, url = {http://doi.acm.org/10.1145/1368088.1368162}, attachments = {https://flosshub.org/sites/flosshub.org/files/p541-rigby.pdf}, author = {Peter C. Rigby and Daniel M. German and Storey, Margaret-Anne} } @conference {1008, title = {What Can OSS Mailing Lists Tell Us? A Preliminary Psychometric Text Analysis of the Apache Developer Mailing List}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {23 - 23}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Developer mailing lists are a rich source of information about Open Source Software (OSS) development. The unstructured nature of email makes extracting information difficult. We use a psychometrically-based linguistic analysis tool, the LIWC, to examine the Apache httpd server developer mailing list. We conduct three preliminary experiments to assess the appropriateness of this tool for information extraction from mailing lists. First, using LIWC dimensions that are correlated with the big five personality traits, we assess the personality of four top developers against a baseline for the entire mailing list. The two developers that were responsible for the major Apache releases had similar personalities. Their personalities were different from the baseline and the other developers. Second, the first and last 50 emails for two top developers who have left the project are examined. The analysis shows promise in understanding why developers join and leave a project. Third, we examine word usage on the mailing list for two major Apache releases. The differences may reflect the relative success of each release.}, keywords = {apache, developers, email, joining, liwc, mailing lists, personality}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.35}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300023.pdf}, author = {Peter C. Rigby and Hassan, Ahmed E.} } @conference {German:2006:UEA:1137983.1138020, title = {Using evolutionary annotations from change logs to enhance program comprehension}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {159{\textendash}162}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Evolutionary annotations are descriptions of how source code evolves over time. Typical source comments, given their static nature, are usually inadequate for describing how a program has evolved over time; instead, source code comments are typically a description of what a program currently does. We propose the use of evolutionary annotations as a way of describing the rationale behind changes applied to a given program (for example "These lines were added to ..."). Evolutionary annotations can assist a software developer in the understanding of how a given portion of source code works by showing him how the source has evolved into its current form.In this paper we describe a method to automatically create evolutionary annotations from change logs, defect tracking systems and mailing lists. We describe the design of a prototype for Eclipse that can filter and present these annotations alongside their corresponding source code and in workbench views. We use Apache as a test case to demonstrate the feasibility of this approach.}, keywords = {annotations, apache, bug tracking, change history, eclipse, evolutionary, log files, mailing lists, mining software repositories, software evolution, version control}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138020}, url = {http://doi.acm.org/10.1145/1137983.1138020}, attachments = {https://flosshub.org/sites/flosshub.org/files/159UsingEvolutionary.pdf}, author = {Daniel M. German and Peter C. Rigby and Storey, Margaret-Anne} }