@conference {Vasilescu:2014:SQS:2531602.2531659, title = {How Social Q\&A Sites Are Changing Knowledge Sharing in Open Source Software Communities}, booktitle = {Proceedings of the 17th ACM Conference on Computer Supported Cooperative Work \&\#38; Social Computing}, series = {CSCW {\textquoteright}14}, year = {2014}, pages = {342{\textendash}354}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Historically, mailing lists have been the preferred means for coordinating development and user support activities. With the emergence and popularity growth of social Q\&A sites such as the StackExchange network (e.g., StackOverflow), this is beginning to change. Such sites offer different socio-technical incentives to their participants than mailing lists do, e.g., rich web environments to store and manage content collaboratively, or a place to showcase their knowledge and expertise more vividly to peers or potential recruiters. A key difference between StackExchange and mailing lists is gamification, i.e., StackExchange participants compete to obtain reputation points and badges. In this paper, we use a case study of R (a widely-used tool for data analysis) to investigate how mailing list participation has evolved since the launch of StackExchange. Our main contribution is the assembly of a joint data set from the two sources, in which participants in both the texttt{r-help} mailing list and StackExchange are identifiable. This permits their activities to be linked across the two resources and also over time. With this data set we found that user support activities show a strong shift away from texttt{r-help}. In particular, mailing list experts are migrating to StackExchange, where their behaviour is different. First, participants active both on texttt{r-help} and on StackExchange are more active than those who focus exclusively on only one of the two. Second, they provide faster answers on StackExchange than on texttt{r-help}, suggesting they are motivated by the emph{gamified} environment. To our knowledge, our study is the first to directly chart the changes in behaviour of specific contributors as they migrate into gamified environments, and has important implications for knowledge management in software engineering.}, keywords = {a, crowdsourced knowledge, gamification., mailing lists, open source, social q\&\#38}, isbn = {978-1-4503-2540-0}, doi = {10.1145/2531602.2531659}, url = {http://doi.acm.org/10.1145/2531602.2531659}, attachments = {https://flosshub.org/sites/flosshub.org/files/cscw14.pdf}, author = {Vasilescu, Bogdan and Serebrenik, Alexander and Devanbu, Prem and Filkov, Vladimir} } @article {1745, title = {SENTIMENT ANALYSIS OF FREE/OPEN SOURCE DEVELOPERS: PRELIMINARY FINDINGS FROM A CASE STUDY}, journal = {Revista Eletr{\^o}nica de Sistemas de Informa{\c c}{\~a}o}, volume = {13}, year = {2014}, month = {08/2014}, abstract = {Software development is a human intensive activity. And as such, how developers face their tasks is of major importance. In an environment such as the one that is common in FOSS (free/open source software) projects where professionals (i.e., paid developers) share the development effort with volunteers, the morale of the development and user community is of major importance. In this paper, we present a preliminary analysis using sentiment analysis techniques to a FOSS project. We therefore mine the mailing list of a project and apply these techniques to the most relevant participants. Although the application is at this time limited, we hope that this experience can be of benefit in the future to determine situations that may affect the developers or the project, such as low productivity, developer abandonment, project forking, etc. }, keywords = {developer productivity, FLOSS, mailing lists, natural language processing, openSUSE, sentiment analysis, software development; software repository mining}, doi = {10.5329/RESI.2014.1302006}, url = {http://189.16.45.2/ojs/index.php/reinfo/article/view/1677}, attachments = {https://flosshub.org/sites/flosshub.org/files/1677-6732-1-PB.pdf}, author = {Rousinopoulos, Athanasios-Ilias and Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M.} } @proceedings {1561, title = {A Replicable Infrastructure for Empirical Studies of Email Archives}, year = {2013}, month = {10/2013}, pages = {43-50}, publisher = {IEEE}, address = {Baltimore, MD, USA}, abstract = {This paper describes a replicable infrastructure solution for conducting empirical software engineering studies based on email mailing list archives. Mailing list emails, such as those affiliated with free, libre, and open source software (FLOSS) projects, are currently archived in several places online, but each research team that wishes to study these email artifacts closely must design their own solution for collection, storage and cleaning of the data. Consequently, research results will be difficult to replicate, especially as the email archive for any living project will still be continually growing. This paper describes a simple, replicable infrastructure for the collection, storage, and cleaning of project email data and analyses.}, keywords = {apache, cleaning, collection, couchdb, database, document-oriented database, email, lucene, mailing lists, nosql, replication, storage}, isbn = {978-0-7695-5121-0}, attachments = {https://flosshub.org/sites/flosshub.org/files/RESERv2.pdf}, author = {Squire, Megan} } @proceedings {1444, title = {Gender Differences in Early Free and Open Source Software Joining Process}, volume = {378}, year = {2012}, note = {"We examined subscriber logs and data for six FOSS projects..." "We examined the differences between posters and non-posters to determine the attrition rate..." "Using data from the US Census, we matched names to lists of the most common female and male names. We identified 666 users using this process."}, month = {09/2012}, pages = {78-93}, publisher = {IFIP AICT, Springer}, address = {Eighth International Conference on Open Source Systems (OSS 2012)}, abstract = {With the growth of free and open source software (FOSS) and the adoption of FOSS solutions in business and everyday life, it is important that projects serve their growingly diverse user base. The sustainability of FOSS projects relies on a constant influx of new contributors. Several large demo- graphic surveys found that FOSS communities are very homogenous, dominated by young men, similar to the bias existing in the rest of the IT workforce. Building on previous research, we examine mailing list subscriptions and posting statistics of female FOSS participants. New participants often experience their first interaction on a FOSS project{\textquoteright}s mailing list. We explored six FOSS projects {\textendash} Buildroot, Busybox, Jaws, Parrot, uClibc, and Yum. We found a declining rate of female participation from the 8.27\% of subscribers, to 6.63\% of posters, and finally the often reported code contributor rate of 1.5\%. We found a disproportionate attrition rate among women along every step of the FOSS joining process.}, keywords = {buildroot, busybox, diversity, email, jaws, joining process, mailing lists, parrot, uClibc, Yum}, author = {Kuechler, Victor and Gilbertson, Claire and Jensen, Carlos} } @article {1519, title = {How the FLOSS Research Community Uses Email Archives}, journal = {International Journal of Open Source Software and Processes}, volume = {4}, year = {2012}, note = {classifies 72 FLOSS papers into various analysis categories}, month = {12/2012}, pages = {37 - 59}, abstract = {Artifacts of the software development process, such as source code or emails between developers, are a frequent object of study in empirical software engineering literature. One of the hallmarks of free, libre, and open source software (FLOSS) projects is that the artifacts of the development process are publicly-accessible and therefore easily collected and studied. Thus, there is a long history in the FLOSS research community of using these artifacts to gain understanding about the phenomenon of open source software, which could then be compared to studies of software engineering more generally. This paper looks specifically at how the FLOSS research community has used email artifacts from free and open source projects. It provides a classification of the relevant literature using a publicly-available online repository of papers about FLOSS development using email. The outcome of this paper is to provide a broad overview for the software engineering and FLOSS research communities of how other researchers have used FLOSS email message artifacts in their work}, keywords = {email, email archives, literature, mailing lists, review, Survey}, issn = {1942-3934}, doi = {10.4018/jossp.2012010103}, attachments = {https://flosshub.org/sites/flosshub.org/files/ijossp_v3_PREPRINT.pdf}, author = {Squire, Megan} } @proceedings {1457, title = {The Impact of Formal QA Practices on FLOSS Communities {\textendash} The Case of Mozilla}, volume = {378}, year = {2012}, month = {09/2012}, pages = {262-267}, publisher = {IFIP AICT, Springer}, abstract = {The number of FLOSS projects that include a QA step in the development model is increasing which suggests that a new layer may be emerging in the classic {\textquotedblleft}onion model{\textquotedblright}. This change might affect the information flow within projects and implicitly their sustainability. Communities, the essential resource of FLOSS projects, have been extensively studied but questions concerning QA remain. This paper takes a step towards answering such questions by analyzing QA mailing lists and issue tracker data for the Mozilla group of projects. Because the Bugzilla data set contains over half a million bugs, data processing and analysis is a considerable challenge for this research. The provisional conclusions are that QA activity may not be increasing steadily over time but is dependent on other factors and that the QA team and other groups of contributors form a highly connected network that doesn{\textquoteright}t contain isolates.}, keywords = {email, information flow, mailing lists, mozilla, quality assurance, social network analysis, test}, author = {Barham, Adina} } @conference {1358, title = {Extracting source code from e-mails}, booktitle = {Proceedings of ICPC 2010 (18th IEEE International Conference on Program Comprehension)}, year = {2010}, note = {"We want to extract source code fragments from e-mail messages. To do this, we first need to select e-mails that contain source code fragments, and then we extract such fragments from the content in which they are enclosed." "we manually build a statistically significant benchmark taking sample e- mails from five unrelated open source Java software systems." }, pages = {24-33}, abstract = {E-mails, used by developers and system users to communicate over a broad range of topics, offer a valuable source of information. If archived, e-mails can be mined to support program comprehension activities and to provide views of a software system that are alternative and complementary to those offered by the source code. However, e-mails are written in natural language, and therefore contain noise that makes it difficult to retrieve the important data. Thus, before conducting an effective system analysis and extracting data for program comprehension, it is necessary to select the relevant messages, and to expose only the meaningful information. In this work we focus both on classifying e-mails that hold fragments of the source code of a system, and on extracting the source code pieces inside the e-mail. We devised and analyzed a number of lightweight techniques to accomplish these tasks. To assess the validity of our techniques, we manually inspected and annotated a statistically significant number of e-mails from five unrelated open source software systems written in Java. With such a benchmark in place, we measured the effectiveness of each technique in terms of precision and recall.}, keywords = {argouml, email, freenet, jmeter, mailing lists, mina, natural language, openjpa, source code}, url = {http://www.inf.usi.ch/phd/bacchelli/publications.php}, attachments = {https://flosshub.org/sites/flosshub.org/files/icpc2010.pdf}, author = {Bacchelli, Alberto and D{\textquoteright}Ambros, Marco and Lanza, Michele} } @conference {bird2010lee, title = {{Linkster: Enabling Efficient Manual Mining}}, booktitle = {Demonstration Track, Proceedings of the 17th SIGSOFT Symposium on Foundations of Software Engineering}, year = {2010}, note = {"LINKSTER efficiently displays, integrates, and allows inspection and annotation of information from three main sources of data: source code repositories, developer mailing lists archives, and bug tracking databases. LINKSTER requires access to a source code repository for file content and a database which contains the raw mined repository, mailing list, and bug tracking information. All notes and annotations made by the user are also recorded in the database."}, publisher = {ACM}, organization = {ACM}, abstract = {While many uses of mined software engineering data are automatic in nature, some techniques and studies either require, or can be improved, by manual methods. Unfortunately, manually inspecting, analyzing, and annotating mined data can be difficult and tedious, especially when information from multiple sources must be integrated. Oddly, while there are numerous tools and frameworks for automatically mining and analyzing data, there is a dearth of tools which facilitate manual methods. To fill this void, we have developed LINKSTER, a tool which integrates data from bug databases, source code repositories, and mailing list archives to allow manual inspection and annotation. LINKSTER has already been used successfully by an OSS project lead to obtain data for one empirical study.}, keywords = {artifacts, bug, bug tracking, data mining, email, mailing lists, open source, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/bird2010lee.pdf}, author = {Christian Bird and Adrian Bachman and Rahman, Foyzur and Bernstein, Abraham} } @conference {962, title = {OSS developers context-specific Preferred Representational systems: A initial Neurolinguistic text analysis of the Apache mailing list}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {126 - 129}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Open Source Software (OSS) mailing lists are used by developers to discuss software engineering tasks performed in the project. In the last years, researchers have been conducting mailing lists linguistic analyses for understanding the intricacies of OSS development. An unpublished approach for that is to use NeuroLinguistic Theory (NT). NT postulates the use of a Preferred Representational cognitive System (PRS) in specific contexts. This means that different resources and cognitive channels are used by developers in order to understand software, but what types of representational systems are the preferred by software engineers? This paper introduces a psychometrically-based neuro-linguistic analysis tool to classify developers, and presents early results of an experiment to assess the PRS of four top developers against a baseline of the Apache server mailing list.}, keywords = {apache, email, email archives, mailing lists, neurolinguistics}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463339}, author = {Junior, Methanias Colaco and Mendonca, Manoel and Farias, Mario and Henrique, Paulo} } @conference {1222, title = {Responsiveness as a measure for assessing the health of OSS ecosystems}, booktitle = {Second International Workshop on Building Sustainable Open Source Communities (OSCOMM 2010)}, year = {2010}, note = {"Data was collected from the GMANE (gmane.org) archives of the SourceForge {\textquotedblleft}Nagios-devel{\textquotedblright} mailing list for the period from January 2004 to October 2009". response time, responsiveness. "}, month = {05/2010}, abstract = {The health of an Open Source ecosystem is an important decision factor when considering the adoption of Open Source software or when monitoring a seeded Open Source project. In this paper we introduce responsiveness as a qualitative measure of the quality of replies within mailing lists, which can be used for assessing ecosystem health. We consider one specific metric of responsiveness in this paper, and that is the response time of follow-up messages in mailing lists. We also describe a way for characterising the nature of communication in messages with short and long response times. The approach is tested in the context of the Nagios project, and we particularly focus on the responsiveness for contributors acting in their professional roles as core developers. Our contribution is a step towards a deeper understanding of voluntary support provided in mailing lists of OSS projects.}, keywords = {email, email archives, gmane, mailing lists, nagios, response time, sourceforge}, attachments = {https://flosshub.org/sites/flosshub.org/files/osscomm002.pdf}, author = {Gamalielsson, Jonas and Lundell, Bj{\"o}rn and Lings, Brian} } @conference {967, title = {Should I contribute to this discussion?}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {181 - 190}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town}, abstract = {Development mailing lists play a central role in facilitating communication in open source projects. Since these lists frequently host design and project discussions, knowledgeable contribution to these discussion threads is essential to avoid mis-communication that might slow-down the progress of a project. However, given the sheer volume of emails on these lists, it is easy to miss important discussions. To find out how developers are able to deal with mailing list discussions, we study the main factors that encourage developers to contribute to the development mailing lists. We develop personalized models to automatically identify discussion threads that a developer would contribute to based on his previous contribution behavior. Case studies on development mailing lists of three open source projects (Apache, PostgreSQL and Python) show that the average accuracy of our models is 89-85\% and that the models vary significantly between different developers.}, keywords = {apache, contributions, developers, email, email archives, mailing lists, postgresql, python}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463345}, attachments = {https://flosshub.org/sites/flosshub.org/files/181ibrahim-msr2010.pdf}, author = {Ibrahim, Walid M and Bettenburg, Nicolas and Shihab, Emad and Adams, Bram and Hassan, Ahmed E.} } @conference {969, title = {Validity of network analyses in Open Source Projects}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, note = {"We have mined archival records of developer mailing lists to generate reply-to social networks for the three OSS projects: Apache, MySQL, and Perl." "For each of these projects, we construct an information flow network based on messages that are sent as replies to previous messages....We use this methodology on all mined data to create a network of mailing list participants. "}, pages = {201 - 209}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Social network methods are frequently used to analyze networks derived from Open Source Project communication and collaboration data. Such studies typically discover patterns in the information flow between contributors or contributions in these projects. Social network metrics have also been used to predict defect occurrence. However, such studies often ignore or side-step the issue of whether (and in what way) the metrics and networks of study are influenced by inadequate or missing data. In previous studies email archives of OSS projects have provided a useful trace of the communication and co-ordination activities of the participants. These traces have been used to construct social networks that are then subject to various types of analysis. However, during the construction of these networks, some assumptions are made, that may not always hold; this leads to incomplete, and sometimes incorrect networks. The question then becomes, do these errors affect the validity of the ensuing analysis? In this paper we specifically examine the stability of network metrics in the presence of inadequate and missing data. The issues that we study are: 1) the effect of paths with broken information flow (i.e. consecutive edges which are out of temporal order) on measures of centrality of nodes in the network, and 2) the effect of missing links on such measures. We demonstrate on three different OSS projects that while these issues do change network topology, the metrics used in the analysis are stable with respect to such changes.}, keywords = {apache, email archives, mailing lists, missing data, mysql, perl, social networks}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463342}, attachments = {https://flosshub.org/sites/flosshub.org/files/201NetworkAnalysis.pdf}, author = {Nia, Roozbeh and Christian Bird and Devanbu, Premkumar and Filkov, Vladimir} } @conference {1360, title = {Benchmarking Lightweight Techniques to Link E-Mails and Source Code}, booktitle = {2009 16th Working Conference on Reverse Engineering}, year = {2009}, note = {"We present different lightweight approaches that, exploiting the specific characteristics of e-mails and the ones of the source code, are capable of establishing a bi-directional link between source code entities and e-mails" "We analyzed ArgoUML1, a UML modelling tool written in Java, developed over the course of approximately 9 years, and made available under the BSD Open Source License. We consider the release 0.28 (March 2009) that comprehends 2,197 classes. We employed the lightweight approaches to map such classes to the related e-mails in ArgoUML mailing lists. ArgoUML e-mails are stored in six mailing lists (see Table I), for a total amount of 79,175 messages" Figure 4 is helpful to understand what this does.}, pages = {205 - 214}, publisher = {IEEE}, organization = {IEEE}, address = {Lille, France}, abstract = {During the evolution of a software system, a large amount of information, which is not always directly related to the source code, is produced. Several researchers have provided evidence that the contents of mailing lists represent a valuable source of information: Through e-mails, developers discuss design decisions, ideas, known problems and bugs, etc. which are otherwise not to be found in the system. A technical challenge in this context is how to establish the missing link between free-form e-mails and the system artifacts they refer to. Although the range of approaches is vast, establishing their accuracy remains a problem, as there is no benchmark against which to compare their performance. To overcome this issue, we manually inspected a statistically significant number of e-mails pertaining to the ArgoUML system. Based on this benchmark, we present a variety of lightweight techniques to assign e-mails to software artifacts and measure their effectiveness in terms of precision and recall.}, keywords = {argouml, email, mailing lists}, isbn = {978-0-7695-3867-9}, doi = {10.1109/WCRE.2009.44}, attachments = {https://flosshub.org/sites/flosshub.org/files/wcre2009.pdf}, author = {Bacchelli, Alberto and D{\textquoteright}Ambros, Marco and Lanza, Michele and Robbes, Romain} } @article {1236, title = {Tools for the Study of the Usual Data Sources found in Libre Software Projects}, journal = {International Journal of Open Source Software and Processes}, volume = {1}, year = {2009}, month = {31/2009}, pages = {24 - 45}, abstract = {Due to the open nature of Free/Libre/Open Source software projects, researchers have gained access to a rich set of development-related information. Although this information is publicly available on the Internet, obtaining and analyzing it in a convenient way is not an easy task and many considerations have to be taken into account. In this paper we present the most important data sources that can be found in libre software projects and that are studied by the research community: source code, source code management systems, mailing lists and bug tracking systems. We will give advice for the problems that can be found when retrieving and preparing the data sources for a posterior analysis, as well as provide information about the tools that support these tasks.}, keywords = {bug tracking systems, data sources, mailing lists, scm, tools}, issn = {1942-3934}, doi = {10.4018/jossp.2009010102}, attachments = {https://flosshub.org/sites/flosshub.org/files/robles.pdf}, author = {Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Izquierdo-Cortazar, Daniel and Herraiz, Israel} } @conference {Pattison:2008:TWP:1370750.1370776, title = {Talk and work: a preliminary report}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {113{\textendash}116}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Developers in Open Source Software (OSS) projects communicate using mailing lists. By convention, the mailing lists used only for task-related discussions, so they are primarily concerned with the software under development, and software process issues (releases, etc.). We focus on the discussions concerning the software, and study the frequency with which software entities (functions, methods, classes, etc) are mentioned in the mail. We find a strong, striking, cumulative relationship between this mention count in the email, and the number of times these entities are included in changes to the software. When we study the same phenomena over a series of time-intervals, the relationship is much less strong. This suggests some interesting avenues for future research.}, keywords = {ant, apache, email, mailing lists, postgresql, python, scm, source code}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370776}, url = {http://doi.acm.org/10.1145/1370750.1370776}, attachments = {https://flosshub.org/sites/flosshub.org/files/p113-pattison.pdf}, author = {Pattison, David S. and Bird, Christian A. and Premkumar T. Devanbu} } @conference {996, title = {Correlating Social Interactions to Release History during Software Evolution}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {7 - 7}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {In this paper, we propose a method to reason about the nature of software changes by mining and correlating discussion archives. We employ an information retrieval approach to find correlation between source code change history and history of social interactions surrounding these changes. We apply our correlation method on two software systems, LSEdit and Apache Ant. The results of these exploratory case studies demonstrate the evidence of similarity between the content of free-form text emails among developers and the actual modifications in the code. We identify a set of correlation patterns between discussion and changed code vocabularies and discover that some releases referred to as minor should instead fall under the major category. These patterns can be used to give estimations about the type of a change and time needed to implement it.}, keywords = {ant, apache, change management, developers, discussion, effort estimation, lsedit, mailing lists, scm, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.4}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300007.pdf}, author = {Baysal, Olga and Malton, Andrew J.} } @article {springerlink:10.1007/s10588-006-9006-3, title = {Correlating temporal communication patterns of the Eclipse open source community with performance and creativity}, journal = {Computational \& Mathematical Organization Theory}, volume = {13}, year = {2007}, note = {"Social network data was collected from the Eclipse component development groups{\textquoteright} online mailing lists by using the online process tool (Gloor and Zhao, 2004). Data on bugs and enhancements for each group was collected from the Eclipse bugzilla database (Eclipse bugzilla, 2004). The social network data was analyzed with the TeCFlow tool (Gloor and Zhao, 2004)." "The study is based on data from the three main projects of the Eclipse open source development community, namely {\textquotedblleft}eclipse{\textquotedblright}, {\textquotedblleft}tools{\textquotedblright} and {\textquotedblleft}technology{\textquotedblright}. We have chosen thirty-three different component development groups for analysis." "The online process tool (online process tool, 2004) was utilized to collect communication data from their mailing list archives. The online process tool runs a robot that searches for URLs in the projects{\textquoteright} mailing list archives to compile a list of the possible URL links. It then extracts communication data as tuples in the form of {\textquotedblleft}sender, receiver, communication type, timestamp, communication contents{\textquotedblright} and stores it in the database. Further, bugs and enhancement data were collected from the Eclipse bugzilla database."}, pages = {17-27}, publisher = {Springer Netherlands}, abstract = {This paper studies the temporal communication patterns of online communities of developers and users of the open source Eclipse Java development environment. It measures the productivity of each community and seeks to identify correlations that exist between group communication characteristics and productivity attributes. The study uses the TeCFlow (Temporal Communication Flow) visualizer to create movie maps of the knowledge flow by analyzing the publicly accessible Eclipse developer mailing lists as an approximation of the social networks of developers and users. Thirty-three different Eclipse communities discussing development and use of components of Eclipse such as the Java Development Tools, the different platform components, the C/C++ Development Tools and the AspectJ extension have been analyzed over a period of six months. The temporal evolution of social network variables such as betweenness centrality, density, contribution index, and degree have been computed and plotted. Productivity of each development group is measured in terms of two indices, namely performance and creativity. Performance of a group is defined as the ratio of new bugs submitted compared with bugs fixed within the same period of time. Creativity is calculated as a function of new features proposed and implemented. Preliminary results indicate that there is a correlation between attributes of social networks such as density and betweenness centrality and group productivity measures in an open source development community. We also find a positive correlation between changes over time in betweenness centrality and creativity, and a negative correlation between changes in betweenness centrality and performance.}, keywords = {bug fixing, bugs, bugzilla, communication, creativity, developers, eclipse, email, email archives, feature requests, mailing lists, performance, productivity}, issn = {1381-298X}, url = {http://dx.doi.org/10.1007/s10588-006-9006-3}, attachments = {https://flosshub.org/sites/flosshub.org/files/16.pdf}, author = {Kidane, Yared and Gloor, Peter} } @conference {1008, title = {What Can OSS Mailing Lists Tell Us? A Preliminary Psychometric Text Analysis of the Apache Developer Mailing List}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {23 - 23}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Developer mailing lists are a rich source of information about Open Source Software (OSS) development. The unstructured nature of email makes extracting information difficult. We use a psychometrically-based linguistic analysis tool, the LIWC, to examine the Apache httpd server developer mailing list. We conduct three preliminary experiments to assess the appropriateness of this tool for information extraction from mailing lists. First, using LIWC dimensions that are correlated with the big five personality traits, we assess the personality of four top developers against a baseline for the entire mailing list. The two developers that were responsible for the major Apache releases had similar personalities. Their personalities were different from the baseline and the other developers. Second, the first and last 50 emails for two top developers who have left the project are examined. The analysis shows promise in understanding why developers join and leave a project. Third, we examine word usage on the mailing list for two major Apache releases. The differences may reflect the relative success of each release.}, keywords = {apache, developers, email, joining, liwc, mailing lists, personality}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.35}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300023.pdf}, author = {Peter C. Rigby and Hassan, Ahmed E.} } @conference {Tsunoda:2006:AOD:1137983.1138031, title = {Analyzing OSS developers{\textquoteright} working time using mailing lists archives}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {181{\textendash}182}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Our mining question is {\textquotedblleft}when OSS developers work?{\textquotedblright} OSS developers{\textquoteright} working time may be a good indicator to understand the development style of a project. (For example, if many developers work in office hour, these might be daily works in a company.)}, keywords = {developers, email, email archives, mailing lists, mining challenge, msr challenge, overtime work, postgresql, workload}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138031}, url = {http://doi.acm.org/10.1145/1137983.1138031}, attachments = {https://flosshub.org/sites/flosshub.org/files/181AnalyzingOSS.pdf}, author = {Tsunoda, Masateru and Monden, Akito and Kakimoto, Takeshi and Kamei, Yasutaka and Matsumoto, Ken-ichi} } @conference {Bird:2006:MES:1137983.1138016, title = {Mining email social networks}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {137{\textendash}143}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Communication \& Co-ordination activities are central to large software projects, but are difficult to observe and study in traditional (closed-source, commercial) settings because of the prevalence of informal, direct communication modes. OSS projects, on the other hand, use the internet as the communication medium,and typically conduct discussions in an open, public manner. As a result, the email archives of OSS projects provide a useful trace of the communication and co-ordination activities of the participants. However, there are various challenges that must be addressed before this data can be effectively mined. Once this is done, we can construct social networks of email correspondents, and begin to address some interesting questions. These include questions relating to participation in the email; the social status of different types of OSS participants; the relationship of email activity and commit activity (in the CVS repositories) and the relationship of social status with commit activity. In this paper, we begin with a discussion of our infrastructure (including a novel use of Scientific Workflow software) and then discuss our approach to mining the email archives; and finally we present some preliminary results from our data analysis.}, keywords = {communication, contributions, developers, email, email archives, mailing lists, open source, social networks}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138016}, url = {http://doi.acm.org/10.1145/1137983.1138016}, attachments = {https://flosshub.org/sites/flosshub.org/files/137MiningEmail.pdf}, author = {Christian Bird and Gourley, Alex and Devanbu, Prem and Gertz, Michael and Swaminathan, Anand} } @article {1122, title = {Understanding the Motivations, Participation, and Performance of Open Source Software Developers: A Longitudinal Study of the Apache Projects}, journal = {Management Science}, volume = {52}, year = {2006}, note = {"analyzing archival data collected from OSS project records over a period of four years" "Apart from the source and binary codes of the actual software programs, Apache products include developer websites, change logs, documentation, and developer communications in the form of e-mail archives. From these products, we extracted two types of information: each contributor{\textquoteright}s progression along the Apache career path, and each contributor{\textquoteright}s source code contributions to the project."}, month = {07/2006}, pages = {984 - 999}, abstract = {Understanding what motivates participation is a central theme in the research on open source software (OSS) development. Our study contributes by revealing how the different motivations of OSS developers are interrelated, how these motivations influence participation leading to performance, and how past performance influences subsequent motivations. Drawing on theories of intrinsic and extrinsic motivation, we develop a theoretical model relating the motivations, participation, and performance of OSS developers. We evaluate our model using survey and archival data collected from a longitudinal field study of software developers in the Apache projects. Our results reveal several important findings. First, we find that developers{\textquoteright} motivations are not independent but rather are related in complex ways. Being paid to contribute to Apache projects is positively related to developers{\textquoteright} status motivations but negatively related to their use-value motivations. Perhaps surprisingly, we find no evidence of diminished intrinsic motivation in the presence of extrinsic motivations; rather, status motivations enhance intrinsic motivations. Second, we find that different motivations have an impact on participation in different ways. Developers{\textquoteright} paid participation and status motivations lead to above-average contribution levels, but use-value motivations lead to below-average contribution levels, and intrinsic motivations do not significantly impact average contribution levels. Third, we find that developers{\textquoteright} contribution levels positively impact their performance rankings. Finally, our results suggest that past-performance rankings enhance developers{\textquoteright} subsequent status motivations.}, keywords = {apache, change logs, contributions, email, email archives, extrinsic motivation, intrinsic motivation, mailing lists, MOTIVATION, open source software, participation, software development performance, source code, status, Survey}, issn = {1526-5501}, doi = {10.1287/mnsc.1060.0554}, author = {Roberts, Jeffrey A. and Il-Horn Hann and Slaughter, Sandra A.} } @conference {German:2006:UEA:1137983.1138020, title = {Using evolutionary annotations from change logs to enhance program comprehension}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {159{\textendash}162}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Evolutionary annotations are descriptions of how source code evolves over time. Typical source comments, given their static nature, are usually inadequate for describing how a program has evolved over time; instead, source code comments are typically a description of what a program currently does. We propose the use of evolutionary annotations as a way of describing the rationale behind changes applied to a given program (for example "These lines were added to ..."). Evolutionary annotations can assist a software developer in the understanding of how a given portion of source code works by showing him how the source has evolved into its current form.In this paper we describe a method to automatically create evolutionary annotations from change logs, defect tracking systems and mailing lists. We describe the design of a prototype for Eclipse that can filter and present these annotations alongside their corresponding source code and in workbench views. We use Apache as a test case to demonstrate the feasibility of this approach.}, keywords = {annotations, apache, bug tracking, change history, eclipse, evolutionary, log files, mailing lists, mining software repositories, software evolution, version control}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138020}, url = {http://doi.acm.org/10.1145/1137983.1138020}, attachments = {https://flosshub.org/sites/flosshub.org/files/159UsingEvolutionary.pdf}, author = {Daniel M. German and Peter C. Rigby and Storey, Margaret-Anne} } @article {146, title = {Community, joining, and specialization in open source software innovation: a case study}, journal = {Research Policy}, volume = {32}, number = {7}, year = {2003}, note = {first, telephone interviews "Secondly, we collected the project{\textquoteright}s public email conversations stored in the projects{\textquoteright} mailing lists which is archived on Freenet{\textquoteright}s website" "The third source of data included the history of changes to the software code available via the project{\textquoteright}s software repository within the CVS ({\textquoteleft}Concurrent Versioning System{\textquoteright}) source code management tool" "Fourthly, in order obtain contextual understanding of the project we collected publicly available documents related to open source in general and to the project in particular. Among the most important sources were the Freenet project web pages (e.g. the Frequently Asked Questions (FAQ)7), Ian Clarke{\textquoteright}s master thesis (1999), newspaper interviews with the core developers, and a technical paper (Clarke, Sandberg, Wiley, \& Hong, 2000) describing the Freenet project written by some of the developers."}, pages = {1217-1241}, abstract = {This paper develops an inductive theory of the open source software innovation process by focussing on the creation of Freenet, a project aimed at developing a decentralized and anonymous peer-to-peer electronic file sharing network. We are particularly interested in the strategies and processes by which new people join the existing community of software developers, and how they initially contribute code. Analyzing data from multiple sources on the Freenet software development process, we generate the constructs of "joining script", We are grateful to helpful comments from two anonymous reviewers. We also thank Chris Argyris, John Seely Brown, Eric von Hippel, Stefan Haefliger, Petra Kugler, Heike Bruch, Simon Gchter, Simon Peck, and Hari Tsoukas for helpful comments and suggestions. Ben Ho and Craig Lebowitz provided technical assistance with data importation and parsing. We would like to thank Ian Clarke and the Freenet developers for their willingness to participate in our study and providing key insights into the open source development process. Karim R. Lakhani would like to acknowledge the generous support of The Boston Consulting Group and Canada{\textquoteright}s Social Science and Humanities Research Council doctoral fellowship. Georg von Krogh and Sebastian Spaeth acknowledge the generous support from the Research Foundation at the University of St. Gallen.}, keywords = {cvs, email, email archives, freenet, INNOVATION, mailing lists, roles, source code}, doi = {http://dx.doi.org/10.1016/S0048-7333(03)00050-7}, attachments = {https://flosshub.org/sites/flosshub.org/files/krogh03.pdf}, author = {Georg von Krogh and Spaeth, S. and Karim R Lakhani} } @conference {1160, title = {Adopting OSS Methods by Adopting OSS Tools}, booktitle = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, abstract = {The open source movement has created and used a set of software engineering tools with features that fit the characteristics of open source development processes. To a large extent, the open source culture and methodology are conveyed to new developers via the toolset itself, and through the demonstrated usage of these tools on existing projects. The rapid and wide adoption of open source tools stands in stark contrast to the difficulties encountered in adopting traditional CASE tools. This paper explores the characteristics that make these tools adoptable and how adopting them may influence software development processes.}, keywords = {ant, argouml, bugzilla, cactus, cvs, developers, eclipse, emacs, email, faq, junit, mailing lists, make, netbeans, package management, rpm, scarab, subversion, teams, tools, torque, WORK}, attachments = {https://flosshub.org/sites/flosshub.org/files/Robbins.pdf}, author = {Robbins, Jason E.} }