@proceedings {1917, title = {Understanding the Impressions, Motivations, and Barriers of One Time Code Contributors to FLOSS Projects: A Survey}, year = {2017}, month = {05/2017}, pages = {187-197}, abstract = {Successful Free/Libre Open Source Software (FLOSS) projects must attract and retain high-quality talent. Researchers have invested considerable effort in the study of core and peripheral FLOSS developers. To this point, one critical subset of developers that have not been studied are One-Time code Contributors (OTC) {\textendash} those that have had exactly one patch accepted. To understand why OTCs have not contributed another patch and provide guidance to FLOSS projects on retaining OTCs, this study seeks to understand the impressions, motivations, and barriers experienced by OTCs. We conducted an online survey of OTCs from 23 popular FLOSS projects. Based on the 184 responses received, we observed that OTCs generally have positive impressions of their FLOSS project and are driven by a variety of motivations. Most OTCs primarily made contributions to fix bugs that impeded their work and did not plan on becoming long term contributors. Furthermore, OTCs encounter a number of barriers that prevent them from continuing to contribute to the project. Based on our findings, there are some concrete actions FLOSS projects can take to increase the chances of converting OTCs into long-term contributors.}, keywords = {newcomers, One Time Contributors, Qualitative Research, Survey}, author = {Amanda Lee and Carver, Jeffrey C. and Bosu, Amiangshu} } @proceedings {1918, title = {Using Gamification to Orient and Motivate Students to Contribute to OSS Projects}, year = {2017}, month = {05/2017}, pages = {36-42}, abstract = {Students can benefit from contributing to Open Source Software (OSS), since they can enrich their portfolio and learn with real world projects. However, sometimes students are demotivated to contribute due to entrance barriers. On the other hand, gamification is widely used to engage and motivate people to accomplish tasks and improve their performance. The goal of this work is to analyze the use of gamification to orient and motivate undergraduate students to overcome onboarding barriers and engage to OSS projects. To achieve this goal, we implemented four gaming elements (Quests, Points, Ranking, and Levels) in GitLab and assessed the environment by means of a study conducted with 17 students within a real OSS project (JabRed). At the end of the study, the students evaluated their experience through a questionnaire. We found that the Quest element helped to guide participants and keep them motivated and points helped by providing feedback on students{\textquoteright} performed tasks. We conclude that the gamified environment oriented the students in an attempt to make a contribution and that gamification can motivate and orient newcomers{\textquoteright} to engage to OSS projects. }, keywords = {engagement, gamification, MOTIVATION, newcomers, students}, author = {Guilherme C. Diniz and Marco A. Graciotto Silva and Marco Gerosa and Steinmacher, Igor} } @inbook {1732, title = {How Developers Acquire FLOSS Skills}, booktitle = {Open Source Systems: Adoption and Impact}, series = {IFIP Advances in Information and Communication Technology}, volume = {451}, year = {2015}, pages = {23-32}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {With the increasing prominence of open collaboration as found in free/libre/open source software projects and other joint production communities, potential participants need to acquire skills. How these skills are learned has received little research attention. This article presents a large-scale survey (5,309 valid responses) in which users and developers of the beta release of a popular file download application were asked which learning styles were used to acquire technical and social skills. We find that the extent to which a person acquired the relevant skills through informal methods tends to be higher if the person is a free/libre/open source code contributor, while being a professional software developer does not have this effect. Additionally, younger participants proved more likely to make use of formal methods of learning. These insights will help individuals, commercial companies, educational institutions, governments and open collaborative projects decide how they promote learning. }, keywords = {competencies, Informal learning, Non-formal learning, open source, Skills, Software developer}, isbn = {978-3-319-17836-3}, doi = {10.1007/978-3-319-17837-0_3}, url = {http://dx.doi.org/10.1007/978-3-319-17837-0_3}, attachments = {https://flosshub.org/sites/flosshub.org/files/oss-2015.pdf}, author = {Barcomb, Ann and Grottke, Michael and Stauffert, Jan-Philipp and Dirk Riehle and Jahn, Sabrina}, editor = {Damiani, Ernesto and Frati, Fulvio and Dirk Riehle and Wasserman, Anthony I.} } @booklet {1709, title = {Understanding and Supporting the Choice of an Appropriate Task to Start With In Open Source Software Communities }, howpublished = {Hawaii International Conference on System Sciences 48}, year = {2015}, pages = {5299-5308}, publisher = {IEEE}, abstract = {Open Source Software (OSS) projects leverage the contribution of outsiders. In fact, the sustainability of many projects relies on retaining some of these newcomers. Usually these communities do not coordinate the work of the newcomers, who go to the issue trackers and self-select a task to start with. We found that {\textquotedblleft}finding a way to start{\textquotedblright} was reported as an important issue by practitioners. To further investigate this specific barrier, we conducted a qualitative analysis with data obtained from semi-structured interviews with 36 subjects from 14 different projects, including newcomers and experienced members. We used procedures of Grounded Theory {\textendash} open and axial coding {\textendash} to analyze the data. We found that newcomers are not confident enough to choose their initial task and they need information about the tasks or direction from the community to support choosing a task more suitable for them. We also present a set of strategies identified in the literature, interviews, and state-of-the practice that can provide newcomers with such information, enabling them to be more confident when choosing their first tasks and collaborate with the community. }, keywords = {coordination, joining process, new developer, newcomers, onboarding, open source software, task selection}, attachments = {https://flosshub.org/sites/flosshub.org/files/HICSS-48_present.pdf}, author = {Steinmacher, Igor and Conte, Tayana and Gerosa, MarcoAur{\'e}lio} } @inbook {1603, title = {A Performance Analysis of Wireless Mesh Networks Implementations Based on Open Source Software}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {107-110}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { Wireless mesh networks (WMNs) have emerged as a promising technology, capable of provide broadband connectivity at low cost. Implementations based on Open Source Software of these networks offer advantages for providing broadband networking communications in scenarios where cabling is too expensive or prohibitive such as rural environments. In this paper we evaluate the performance of small scale wireless mesh WMN routing protocols for WMNs: B.A.T.M.A.N. Advanced and the 802.11s standard. We also compare an OpenFlow controller implemented over the WMN, verifying their bandwidth, datagram loss and jitter. }, keywords = {network performance, Open Source Software for research and innovation, OpenFlow, OpenWRT, Wireless Mesh Networks}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_14}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_14}, author = {Armuelles Voinov, Iv{\'a}n and Cede{\~n}o, AidelenChung and Chung, Joaqu{\'\i}n and Gonz{\'a}lez, Grace}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @article {1745, title = {SENTIMENT ANALYSIS OF FREE/OPEN SOURCE DEVELOPERS: PRELIMINARY FINDINGS FROM A CASE STUDY}, journal = {Revista Eletr{\^o}nica de Sistemas de Informa{\c c}{\~a}o}, volume = {13}, year = {2014}, month = {08/2014}, abstract = {Software development is a human intensive activity. And as such, how developers face their tasks is of major importance. In an environment such as the one that is common in FOSS (free/open source software) projects where professionals (i.e., paid developers) share the development effort with volunteers, the morale of the development and user community is of major importance. In this paper, we present a preliminary analysis using sentiment analysis techniques to a FOSS project. We therefore mine the mailing list of a project and apply these techniques to the most relevant participants. Although the application is at this time limited, we hope that this experience can be of benefit in the future to determine situations that may affect the developers or the project, such as low productivity, developer abandonment, project forking, etc. }, keywords = {developer productivity, FLOSS, mailing lists, natural language processing, openSUSE, sentiment analysis, software development; software repository mining}, doi = {10.5329/RESI.2014.1302006}, url = {http://189.16.45.2/ojs/index.php/reinfo/article/view/1677}, attachments = {https://flosshub.org/sites/flosshub.org/files/1677-6732-1-PB.pdf}, author = {Rousinopoulos, Athanasios-Ilias and Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M.} } @inbook {1604, title = {Use of Open Software Tools for Data Offloading Techniques Analysis on Mobile Networks}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {111-112}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { This research aims to highlight the benefits of using free software based tools for studying a LTE mobile network with realistic parameters. We will overload this LTE network and offload it through data offloading techniques such as small cells and WiFi offload. For this research, discreteevent open software network simulator ns3 will be implemented. Ns3 is a network simulator based on the programming language C++, and has all the necessary libraries to simulate an LTE and WiFi network. }, keywords = {Data Offloading, LTE, ns3, OSS for research and education, small cells, WiFi}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_15}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_15}, author = {Koo, Jos{\'e}M. and Espino, JuanP. and Armuelles, Iv{\'a}n and Villarreal, Rub{\'e}n}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @proceedings {1501, title = {The Impact of Tangled Code Changes}, year = {2013}, month = {05/2013}, abstract = {When interacting with version control systems, developers often commit unrelated or loosely related code changes in a single transaction. When analyzing the version history, such tangled changes will make all changes to all modules appear related, possibly compromising the resulting analyses through noise and bias. In an investigation of five open-source JAVA projects, we found up to 15\% of all bug fixes to consist of multiple tangled changes. Using a multi-predictor approach to untangle changes, we show that on average at least 16.6\% of all source files are incorrectly associated with bug reports. We recommend better change organization to limit the impact of tangled changes.}, keywords = {bias, data quality, history, java, mining software repositories, noise, tangled code changes, version control}, url = {http://www.kim-herzig.de/wp-content/uploads/2013/03/msr2013-untangling.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr2013-untangling.pdf}, author = {Kim Herzig and Zeller, Andreas} } @proceedings {1521, title = {It{\textquoteright}s Not a Bug, It{\textquoteright}s a Feature: How Misclassification Impacts Bug Prediction}, year = {2013}, month = {05/2013}, pages = {392-401}, abstract = {In a manual examination of more than 7,000 issue reports from the bug databases of five open-source projects, we found 33.8\% of all bug reports to be misclassified{\textemdash}that is, rather than referring to a code fix, they resulted in a new feature, an update to documentation, or an internal refactoring. This misclassification introduces bias in bug prediction models, confusing bugs and features: On average, 39\% of files marked as defective actually never had a bug. We discuss the impact of this misclassification on earlier studies and recommend manual data validation for future studies.}, keywords = {bias, bug reports, data quality, mining software repositories, noise}, author = {Kim Herzig and Sascha Just and Zeller, Andreas} } @proceedings {1561, title = {A Replicable Infrastructure for Empirical Studies of Email Archives}, year = {2013}, month = {10/2013}, pages = {43-50}, publisher = {IEEE}, address = {Baltimore, MD, USA}, abstract = {This paper describes a replicable infrastructure solution for conducting empirical software engineering studies based on email mailing list archives. Mailing list emails, such as those affiliated with free, libre, and open source software (FLOSS) projects, are currently archived in several places online, but each research team that wishes to study these email artifacts closely must design their own solution for collection, storage and cleaning of the data. Consequently, research results will be difficult to replicate, especially as the email archive for any living project will still be continually growing. This paper describes a simple, replicable infrastructure for the collection, storage, and cleaning of project email data and analyses.}, keywords = {apache, cleaning, collection, couchdb, database, document-oriented database, email, lucene, mailing lists, nosql, replication, storage}, isbn = {978-0-7695-5121-0}, attachments = {https://flosshub.org/sites/flosshub.org/files/RESERv2.pdf}, author = {Squire, Megan} } @proceedings {1515, title = {Using Citation Influence to Predict Software Defects}, year = {2013}, month = {05/2013}, abstract = {The software dependency network reflects structure and the developer contribution network reflects process. Previous studies have used social network properties over these networks to predict whether a software component is defect-prone. However, these studies do not consider the strengths of the dependencies in the networks. In our approach, we use a citation influence topic model to determine dependency strengths among components and developers, analyze weak and strong dependencies separately, and apply social network properties to predict defect-prone components. In experiments on Eclipse and NetBeans, our approach has higher accuracy than prior work.}, keywords = {eclipse, netbeans, social network}, author = {Wei Hu and Kenny Wong} } @proceedings {1497, title = {Why So Complicated? Simple Term Filtering and Weighting for Location-Based Bug Report Assignment Recommendation}, year = {2013}, month = {05-2013}, pages = {2-11}, abstract = {Large software development projects receive many bug reports and each of these reports needs to be triaged. An important step in the triage process is the assignment of the report to a developer. Most previous e orts towards improving bug report assignment have focused on using an activity-based approach. We address some of the limitations of activity-based approaches by proposing a two-phased location-based approach where bug report assignment recommendations are based on the predicted location of the bug. The proposed approach utilizes a noun extraction process on several information sources to determine bug location information and a simple term weighting scheme to provide a bug report assignment recommendation. We found that by using a location-based approach, we achieved an accuracy of 89.41\% and 59.76\% when recommending five developers for the Eclipse and Mozilla projects, respectively.}, keywords = {bug, bug report assignment, eclipse, File Activity Histories, Mining Software Artifacts, mozilla, Named Entity Recognition, POS Filtering}, url = {http://www.cwu.edu/~janvik/papers/msr2013.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr2013.pdf} } @conference {1357, title = {Content classification of developer emails}, booktitle = {Proceedings of the 34th IEEE/ACM International Conference On Software Engineering (ICSE 2012)}, year = {2012}, note = {We created a web application to manually classify email content in the chosen categories. We classified a statistically significant set of emails from four java open source software (OSS) systems, used to evaluate the accuracy of our approach. The contributions of this paper are: 1) a novel approach that fuses parsing and ML techniques for classification of email lines; 2) a web application to manually classify email content; 3) the manual classification of a statistically significant sample set of emails (for a total of 67,792 lines) from mailing lists of four different software systems{\textendash}in the form of a freely available benchmark; and 4) the empirical evaluation of our approach against the benchmark}, month = {06/2012}, abstract = {Emails related to the development of a software system contain information about design choices and issues encountered during the development process. Exploiting the knowledge embedded in emails with automatic tools is challenging, due to the unstructured, noisy and mixed language nature of this communication medium. Natural language text is often not well-formed and is interleaved with languages with other syntaxes, such as code or stack traces. We present an approach to classify email content at line level. Our technique classifies email lines in five categories (i.e., text, junk, code, patch, and stack trace) to allow one to subsequently apply ad hoc analysis techniques for each category. We evaluated our approach on a statistically significant set of emails gathered from mailing lists of four unrelated open source systems.}, keywords = {email, Emails, Empirical software engineering, mailing list, natural language, Unstructured Data Mining}, url = {http://www.inf.usi.ch/phd/bacchelli/publications.php}, attachments = {https://flosshub.org/sites/flosshub.org/files/icse2012.pdf}, author = {Bacchelli, Alberto and Dal Sasso, Tommaso and D{\textquoteright}Ambros, Marco and Lanza, Michele} } @proceedings {1455, title = {Open-Source Technologies Realizing Social Networks: A Multiple Descriptive Case-Study}, volume = {378}, year = {2012}, month = {09/2012}, pages = {250-255}, publisher = {IFIP AICT, Springer}, abstract = {This article aims at describing the role of the open-source software phenomenon within high-tech corporations providing social networks and applications. By taking a multiple case study approach, We address what are the open-source software technological components embedded by leading social networking players, and a rich description on how those players collaborate with the open-source community. Our findings, based on a population of three commercial providers of social networks a suggest that open-source plays an important role on the technological development of their social networking platforms. An open-source technological stack for realizing social networks is proposed and several managerial issues dealing with collaboration with open-source communities are explored.}, keywords = {entrepreneurship, facebook, netlog, social networks, spotify}, author = {Teixeira, Jose} } @conference {1314, title = {Apples vs. oranges?}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, note = {"In this MSR challenge report, we compare the source code of two in- dustrial grade Integrated Development Environments (IDE): Netbeans, developed by Oracle Corporation, and Eclipse, developed by the Eclipse Foundation. In the same spirit as [1], our goal is not to identify how similar or different they are, but to identify differences that, if not taken into consideration, might result in biased, and potentially erroneous conclusions."}, month = {05/2011}, pages = {246-249}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {We attempt to compare the source code of two Java IDE systems: Netbeans and Eclipse. The result of this experiment shows that many factors, if ignored, could risk a bias in the results, and we posit various observations that should be taken into consideration to minimize such risk. }, keywords = {eclipse, netbeans, source code}, isbn = {9781450305747}, doi = {10.1145/1985441.1985483}, author = {Davies, Julius and Daniel M. German} } @proceedings {1278, title = {Building Knowledge in Open Source Software Research in Six Years of Conferences}, year = {2011}, note = {RQ1. Is there any social network underlying the research production at the OSS conference series? RQ2. What are the major streams of research proposed at the OSS conference series?}, month = {10/2011}, pages = {123-141}, publisher = {Springer}, abstract = {Since its origins, the diffusion of the OSS phenomenon and the information about it has been entrusted to the Internet and its virtual communities of developers. This public mass of data has attracted the interest of researchers and practitioners aiming at formalizing it into a body of knowledge. To this aim, in 2005, a new series of conferences on OSS started to collect and convey OSS knowledge to the research and industrial community. Our work mines articles of the OSS conference series to understand the process of knowledge grounding and the community surrounding it. As such, we propose a semi-automated approach for a systematic mapping study on these articles. We automatically build a map of cross-citations among all the papers of the conferences and then we manually inspect the resulting clusters to identify knowledge building blocks and their mutual relationships. We found that industry-related, quality assurance, and empirical studies often originate or maintain new streams of research.}, keywords = {Cross-citations, flossmole cited, graph, literature review, network, research, Systematic Mapping Study}, author = {Mulazzini, Fabio and Rossi, Bruno and Russo, Barbara and Steff, Maximilian} } @conference {1313, title = {Do comments explain codes adequately?}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, month = {05/2011}, pages = {242-245}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {Comment lines in the software source code include descriptions of codes, usage of codes, copyrights, unused codes, comments, and so on. It is required for comments to explain the content of written code adequately, since the wrong description in the comment may causes further bug and confusion in maintenance. In this paper, we try to clarify a research question: "In which projects do comments describe the code adequately?" To answer this question, we selected the group 1 of mining challenge and used data obtained from Eclipse and Netbeans. Since it is difficult to answer the above question directly, we define the distance between codes and comments. By utilizing the fault-prone module prediction technique, we can answer the alternative question from the data of two projects. The result shows that Eclipse project has relatively adequate comments.}, keywords = {comments, eclipse, msr challenge, netbeans, prediction}, isbn = {9781450305747}, doi = {10.1145/1985441.1985482}, author = {Mizuno, Osamu and Hirata, Yukinao} } @proceedings {1281, title = {License Update and Migration Processes in Open Source Software Projects}, year = {2011}, note = {"The case studies in this report are part of an ongoing, multi-year research project discovering and modeling open source software processes. Our research methodology is ethnographically informed, applying a grounded theory to the analysis of artifacts found in OSS projects. The primary data sources in this study come from mailing list archives of the Apache and NetBeans projects."}, month = {10/2011}, pages = {177-195}, publisher = {Springer}, abstract = {Open source software (OSS) has increasingly been the subject of research efforts. Central to this focus is the nature under which the software can be distributed, used, and modified and the causes and consequent effects on software development, usage, and distribution. At present, we have little understanding of, what happens when these licenses change, what motivates such changes, and how new licenses are created, updated, and deployed. Similarly, little attention has been paid to the agreements under which contributions are made to OSS projects and the impacts of changes to these agreements. We might also ask these same questions regarding the licenses governing how individuals and groups contribute to OSS projects. This paper focuses on addressing these questions with case studies of processes by which the Apache Software Foundation{\textquoteright}s creation and migration to Version 2.0 of the Apache Software License and the NetBeans project{\textquoteright}s migration to the Joint Licensing Agreement.}, keywords = {apache, case study, email, email archive, license evolution, mailing list, netbeans, open source, process}, attachments = {https://flosshub.org/sites/flosshub.org/files/1.pdf}, author = {Chris Jensen and Walt Scacchi} } @proceedings {1275, title = {Preparing FLOSS for Future Network Paradigms: A Survey on Linux Network Management}, year = {2011}, month = {10/2011}, pages = {75-89}, publisher = {Springer}, abstract = {Operating system tools must fulfill the requirements generated by the advances in networking paradigms. To understand the current state of the Free, Libre and Open Source Software (FLOSS) ecosystem, we present a survey on the main tools used to manage and interact with the network, and how they are organized in Linux-based operating systems. Based on the survey results, we present a reference Linux network stack that can serve as the basis for future heterogeneous network environments, contributing towards a standardized approach in Linux. Using this stack, and focusing on dynamic and spontaneous network interactions, we present an evolution path for network related technologies, contributing to Linux as a network research operating system and to FLOSS as a whole.}, keywords = {linux, networking, Survey}, author = {Matos, Alfredo and Thomson, John and Paulo Trezentos} } @conference {1310, title = {System compatibility analysis of Eclipse and Netbeans based on bug data}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, month = {05/2011}, pages = {230-233}, publisher = {ACM Press}, organization = {ACM Press}, address = {Waikiki, Honolulu, HI, USANew York, New York, USA}, abstract = {Eclipse and Netbeans are two top of the line Integrated Development Environments (IDEs) for Java development. Both of them provide support for a wide variety of development tasks and have a large user base. This paper provides an analysis and comparison for the compatibility and stability of Eclipse and Netbeans on the three most commonly used operating systems, Windows, Linux and Mac OS. Both IDEs are programmed in Java and use a Bugzilla issue tracker to track reported bugs and feature requests. We looked into the Bugzilla repository databases of these two IDEs, which contains the bug records and histories of these two IDEs. We used some basic data mining techniques to analyze some historical statistics of the bug data. Based on the analysis, we try to answer certain stability-comparison oriented questions in the paper, so that users can have a better idea which of these two IDEs is designed better to work on different platforms. }, keywords = {bug tracking system, bugzilla, eclipse, ms challenge, netbeans, version history}, isbn = {9781450305747}, doi = {10.1145/1985441.1985479}, author = {Baik, Eilwoo and Devanbu, Premkar and Wang, Xinlei (Oscar)} } @conference {954, title = {Clones: What is that smell?}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {72 - 81}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Clones are generally considered bad programming practice in software engineering folklore. They are identified as a bad smell and a major contributor to project maintenance difficulties. Clones inherently cause code bloat, thus increasing project size and maintenance costs. In this work, we try to validate the conventional wisdom empirically to see whether cloning makes code more defect prone. This paper analyses relationship between cloning and defect proneness. We find that, first, the great majority of bugs are not significantly associated with clones. Second, we find that clones may be less defect prone than non-cloned code. Finally, we find little evidence that clones with more copies are actually more error prone. Our findings do not support the claim that clones are really a "bad smell". Perhaps we can clone, and breathe easy, at the same time.}, keywords = {apache, bug fix revisions, bugs, clone, evolution, gimp, nautilus, scm, source code}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463343}, attachments = {https://flosshub.org/sites/flosshub.org/files/72rahman2010cws.pdf}, author = {Rahman, Foyzur and Christian Bird and Devanbu, Premkumar} } @conference {1358, title = {Extracting source code from e-mails}, booktitle = {Proceedings of ICPC 2010 (18th IEEE International Conference on Program Comprehension)}, year = {2010}, note = {"We want to extract source code fragments from e-mail messages. To do this, we first need to select e-mails that contain source code fragments, and then we extract such fragments from the content in which they are enclosed." "we manually build a statistically significant benchmark taking sample e- mails from five unrelated open source Java software systems." }, pages = {24-33}, abstract = {E-mails, used by developers and system users to communicate over a broad range of topics, offer a valuable source of information. If archived, e-mails can be mined to support program comprehension activities and to provide views of a software system that are alternative and complementary to those offered by the source code. However, e-mails are written in natural language, and therefore contain noise that makes it difficult to retrieve the important data. Thus, before conducting an effective system analysis and extracting data for program comprehension, it is necessary to select the relevant messages, and to expose only the meaningful information. In this work we focus both on classifying e-mails that hold fragments of the source code of a system, and on extracting the source code pieces inside the e-mail. We devised and analyzed a number of lightweight techniques to accomplish these tasks. To assess the validity of our techniques, we manually inspected and annotated a statistically significant number of e-mails from five unrelated open source software systems written in Java. With such a benchmark in place, we measured the effectiveness of each technique in terms of precision and recall.}, keywords = {argouml, email, freenet, jmeter, mailing lists, mina, natural language, openjpa, source code}, url = {http://www.inf.usi.ch/phd/bacchelli/publications.php}, attachments = {https://flosshub.org/sites/flosshub.org/files/icpc2010.pdf}, author = {Bacchelli, Alberto and D{\textquoteright}Ambros, Marco and Lanza, Michele} } @conference {962, title = {OSS developers context-specific Preferred Representational systems: A initial Neurolinguistic text analysis of the Apache mailing list}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {126 - 129}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Open Source Software (OSS) mailing lists are used by developers to discuss software engineering tasks performed in the project. In the last years, researchers have been conducting mailing lists linguistic analyses for understanding the intricacies of OSS development. An unpublished approach for that is to use NeuroLinguistic Theory (NT). NT postulates the use of a Preferred Representational cognitive System (PRS) in specific contexts. This means that different resources and cognitive channels are used by developers in order to understand software, but what types of representational systems are the preferred by software engineers? This paper introduces a psychometrically-based neuro-linguistic analysis tool to classify developers, and presents early results of an experiment to assess the PRS of four top developers against a baseline of the Apache server mailing list.}, keywords = {apache, email, email archives, mailing lists, neurolinguistics}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463339}, author = {Junior, Methanias Colaco and Mendonca, Manoel and Farias, Mario and Henrique, Paulo} } @conference {1222, title = {Responsiveness as a measure for assessing the health of OSS ecosystems}, booktitle = {Second International Workshop on Building Sustainable Open Source Communities (OSCOMM 2010)}, year = {2010}, note = {"Data was collected from the GMANE (gmane.org) archives of the SourceForge {\textquotedblleft}Nagios-devel{\textquotedblright} mailing list for the period from January 2004 to October 2009". response time, responsiveness. "}, month = {05/2010}, abstract = {The health of an Open Source ecosystem is an important decision factor when considering the adoption of Open Source software or when monitoring a seeded Open Source project. In this paper we introduce responsiveness as a qualitative measure of the quality of replies within mailing lists, which can be used for assessing ecosystem health. We consider one specific metric of responsiveness in this paper, and that is the response time of follow-up messages in mailing lists. We also describe a way for characterising the nature of communication in messages with short and long response times. The approach is tested in the context of the Nagios project, and we particularly focus on the responsiveness for contributors acting in their professional roles as core developers. Our contribution is a step towards a deeper understanding of voluntary support provided in mailing lists of OSS projects.}, keywords = {email, email archives, gmane, mailing lists, nagios, response time, sourceforge}, attachments = {https://flosshub.org/sites/flosshub.org/files/osscomm002.pdf}, author = {Gamalielsson, Jonas and Lundell, Bj{\"o}rn and Lings, Brian} } @conference {953, title = {When process data quality affects the number of bugs: Correlations in software engineering datasets}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {62 - 71}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Software engineering process information extracted from version control systems and bug tracking databases are widely used in empirical software engineering. In prior work, we showed that these data are plagued by quality deficiencies, which vary in its characteristics across projects. In addition, we showed that those deficiencies in the form of bias do impact the results of studies in empirical software engineering. While these findings affect software engineering researchers the impact on practitioners has not yet been substantiated. In this paper we, therefore, explore (i) if the process data quality and characteristics have an influence on the bug fixing process and (ii) if the process quality as measured by the process data has an influence on the product (i.e., software) quality. Specifically, we analyze six Open Source as well as two Closed Source projects and show that process data quality and characteristics have an impact on the bug fixing process: the high rate of empty commit messages in Eclipse, for example, correlates with the bug report quality. We also show that the product quality - measured by number of bugs reported - is affected by process data quality measures. These findings have the potential to prompt practitioners to increase the quality of their software process and its associated data quality.}, keywords = {apache, bug reports, eclipse, gnome, log files, mozilla, netbeans, openoffice.org, version control}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463286}, attachments = {https://flosshub.org/sites/flosshub.org/files/62bachmann-msr10.pdf}, author = {Bachmann, Adrian and Bernstein, Abraham} } @conference {942, title = {On mining data across software repositories}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {171 - 174}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Software repositories provide abundance of valuable information about open source projects. With the increase in the size of the data maintained by the repositories, automated extraction of such data from individual repositories, as well as of linked information across repositories, has become a necessity. In this paper we describe a framework that uses web scraping to automatically mine repositories and link information across repositories. We discuss two implementations of the framework. In the first implementation, we automatically identify and collect security problem reports from project repositories that deploy the Bugzilla bug tracker using related vulnerability information from the National Vulnerability Database. In the second, we collect security problem reports for projects that deploy the Launchpad bug tracker along with related vulnerability information from the National Vulnerability Database. We have evaluated our tool on various releases of Fedora, Ubuntu, Suse, RedHat, and Firefox projects. The percentage of security bugs identified using our tool is consistent with that reported by other researchers.}, keywords = {bug reports, bugzilla, Fedora, Firefox, htmlscraper, integration, launchpad, national vulnerability database, RedHat, Suse, tracker, Ubuntu}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069498}, attachments = {https://flosshub.org/sites/flosshub.org/files/171MiningAcrossmsr09.pdf}, author = {Anbalagan, Prasanth and Vouk, Mladen} } @conference {926, title = {Tracking concept drift of software projects using defect prediction quality}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {51 - 60}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Defect prediction is an important task in the mining of software repositories, but the quality of predictions varies strongly within and across software projects. In this paper we investigate the reasons why the prediction quality is so fluctuating due to the altering nature of the bug (or defect) fixing process. Therefore, we adopt the notion of a concept drift, which denotes that the defect prediction model has become unsuitable as set of influencing features has changed - usually due to a change in the underlying bug generation process (i.e., the concept). We explore four open source projects (Eclipse, OpenOffice, Netbeans and Mozilla) and construct file-level and project-level features for each of them from their respective CVS and Bugzilla repositories. We then use this data to build defect prediction models and visualize the prediction quality along the time axis. These visualizations allow us to identify concept drifts and - as a consequence - phases of stability and instability expressed in the level of defect prediction quality. Further, we identify those project features, which are influencing the defect prediction quality using both a tree induction-algorithm and a linear regression model. Our experiments uncover that software systems are subject to considerable concept drifts in their evolution history. Specifically, we observe that the change in number of authors editing a file and the number of defects fixed by them contribute to a project{\textquoteright}s concept drift and therefore influence the defect prediction quality. Our findings suggest that project managers using defect prediction models for decision making should be aware of the actual phase of stability or instability due to a potential concept drift.}, keywords = {bugzilla, cvs, defect prediction, eclipse, mozilla, netbeans, openoffice}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069480}, attachments = {https://flosshub.org/sites/flosshub.org/files/51MSR2009_0111_Ekanayake_Jayalath.pdf}, author = {Ekanayake, Jayalath and Tappolet, Jonas and Gall, Harald C. and Bernstein, Abraham} } @article {10.1109/HICSS.2009.1014, title = {Using Software Archaeology to Measure Knowledge Loss in Software Projects Due to Developer Turnover}, journal = {2009 42nd Hawaii International Conference on System Sciences (HICSS 2009)}, year = {2009}, pages = {1-10}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, abstract = {Developer turnover can result in a major problem when developing software. When senior developers abandon a software project, they leave a knowledge gap that has to be managed. In addition, new (junior) developers require some time in order to achieve the desired level of productivity. In this paper, we present a methodology to measure the effect of knowledge loss due to developer turnover in software projects. For a given software project, we measure the quantity of code that has been authored by developers that do not belong to the current development team, which we define as orphaned code. Besides, we study how orphaned code is managed by the project. Our methodology is based on the concept of software archaeology, a derivation of software evolution. As case studies we have selected four FLOSS (free, libre, open source software) projects, from purely driven by volunteers to company-supported. The application of our methodology to these case studies will give insight into the turnover that these projects suffer and how they have managed it and shows that this methodology is worth being augmented in future research.}, keywords = {attrition, case study, developers, evince, evolution, gimp, growth, knowledge collaboration, lines of code, nautilus, quality, sloc, turnover}, isbn = {978-0-7695-3450-3}, doi = {http://doi.ieeecomputersociety.org/10.1109/HICSS.2009.1014}, attachments = {https://flosshub.org/sites/flosshub.org/files/07-07-08.pdf}, author = {Izquierdo-Cortazar, Daniel and Gregorio Robles and Ortega, Felipe and Jesus M. Gonzalez-Barahona} } @article {denBesten2008316, title = {The allocation of collaborative efforts in open-source software}, journal = {Information Economics and Policy}, volume = {20}, number = {4}, year = {2008}, note = {"we have selected a set of 10 large open-source projects" apache, cvs, gaim, gcc, ghostscript, mozilla, netbsd, openssh, postgresql, python "Our data were extracted from logs of development activity generated by software version control systems. For each project in the selection, we extracted CVS development logs" "We notably computed for each file in the sample, and for each month in its history, the number of distinct maintainers that had committed a change during that month, and the number of commits, the blocks of code addition, each file had received during that month." "other variables used in the regressions are proxies for the size, age, and granularity of files; the size of a file is represented as its number of lines of code (LOCs), its age by its creation date (Youth), and its granularity by the number of functions it contains."}, pages = {316 - 322}, abstract = {The article investigates the allocation of collaborative efforts among core developers (maintainers) of open-source software by analyzing on-line development traces (logs) for a set of 10 large projects. Specifically, we investigate whether the division of labor within open-source projects is influenced by characteristics of software code. We suggest that the collaboration among maintainers tends to be influenced by different measures of code complexity. We interpret these findings by providing preliminary evidence that the organization of open-source software development would self-adapt to characteristics of the code base, in a {\textquoteright}stigmergic{\textquoteright} manner.}, keywords = {age, apache, complexity, cvs, division of labor, functions, gaim, gcc, ghostscript, lines of code, loc, log files, mozilla, netbsd, openssh, postgresql, python, revision control, scm, size, source code, Stigmergy, version control}, issn = {0167-6245}, doi = {DOI: 10.1016/j.infoecopol.2008.06.003}, url = {http://www.sciencedirect.com/science/article/B6V8J-4SSG4PN-1/2/88b3824c30a31c18929d8a5ca6d64f62}, author = {den Besten, Matthijs and Jean-Michel Dalle and Galia, Fabrice} } @conference {Hill:2008:AAM:1370750.1370771, title = {AMAP: automatically mining abbreviation expansions in programs to enhance software maintenance tools}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {79{\textendash}88}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {When writing software, developers often employ abbreviations in identifier names. In fact, some abbreviations may never occur with the expanded word, or occur more often in the code. However, most existing program comprehension and search tools do little to address the problem of abbreviations, and therefore may miss meaningful pieces of code or relationships between software artifacts. In this paper, we present an automated approach to mining abbreviation expansions from source code to enhance software maintenance tools that utilize natural language information. Our scoped approach uses contextual information at the method, program, and general software level to automatically select the most appropriate expansion for a given abbreviation. We evaluated our approach on a set of 250 potential abbreviations and found that our scoped approach provides a 57\% improvement in accuracy over the current state of the art.}, keywords = {automatic abbreviation expansion, azureus, itext.net, liferay, maintenance, natural language, openoffice.org, program comprehension, source code, tiger envelopes, tools}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370771}, url = {http://doi.acm.org/10.1145/1370750.1370771}, attachments = {https://flosshub.org/sites/flosshub.org/files/p79-hill.pdf}, author = {Hill, Emily and Fry, Zachary P. and Boyd, Haley and Sridhara, Giriprasad and Novikova, Yana and Pollock, Lori and Vijay-Shanker, K.} } @conference {Wang:2008:ADD:1368088.1368151, title = {An approach to detecting duplicate bug reports using natural language and execution information}, booktitle = {Proceedings of the 30th international conference on Software engineering}, series = {ICSE {\textquoteright}08}, year = {2008}, pages = {461{\textendash}470}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {An open source project typically maintains an open bug repository so that bug reports from all over the world can be gathered. When a new bug report is submitted to the repository, a person, called a triager, examines whether it is a duplicate of an existing bug report. If it is, the triager marks it as DUPLICATE and the bug report is removed from consideration for further work. In the literature, there are approaches exploiting only natural language information to detect duplicate bug reports. In this paper we present a new approach that further involves execution information. In our approach, when a new bug report arrives, its natural language information and execution information are compared with those of the existing bug reports. Then, a small number of existing bug reports are suggested to the triager as the most similar bug reports to the new bug report. Finally, the triager examines the suggested bug reports to determine whether the new bug report duplicates an existing bug report. We calibrated our approach on a subset of the Eclipse bug repository and evaluated our approach on a subset of the Firefox bug repository. The experimental results show that our approach can detect 67\%-93\% of duplicate bug reports in the Firefox bug repository, compared to 43\%-72\% using natural language information alone.}, keywords = {bug report, duplicate bug report, execution information, information retrieval, natural language}, isbn = {978-1-60558-079-1}, doi = {10.1145/1368088.1368151}, url = {http://doi.acm.org/10.1145/1368088.1368151}, author = {Wang, Xiaoyin and Zhang, Lu and Xie, Tao and Anvik, John and Sun, Jiasu} } @conference {972, title = {Extracting structural information from bug reports}, booktitle = {Proceedings of the 2008 international workshop on Mining software repositories - MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {27-30}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {In software engineering experiments, the description of bug reports is typically treated as natural language text, although it often contains stack traces, source code, and patches. Neglecting such structural elements is a loss of valuable information; structure usually leads to a better performance of machine learning approaches. In this paper, we present a tool called infoZilla that detects structural elements from bug reports with near perfect accuracy and allows us to extract them. We anticipate that infoZilla can be used to leverage data from bug reports at a different granularity level that can facilitate interesting research in the future.}, keywords = {bug reports, eclipse, enumerations, infozilla, natural language, patches, source code, stack trace}, isbn = {9781605580241}, doi = {10.1145/1370750.1370757}, attachments = {https://flosshub.org/sites/flosshub.org/files/p27-bettenburg.pdf}, author = {Premraj, Rahul and Zimmermann, Thomas and Kim, Sunghun and Bettenburg, Nicolas} } @proceedings {118, title = {Design architecture, developer networks and performance of open source software projects}, year = {2007}, keywords = {NotreDameArchive}, author = {Xiang Liu and Bala Iyer} } @conference {623, title = {Experiences on Product Development with Open Source Software}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {85 - 96}, publisher = {Springer}, organization = {Springer}, chapter = {7}, abstract = {This article discusses Nokia{\textquoteright}s experiences of using open source in commercial product development. It presents the development model used in the creation of mobile consumer devices and highlights the opportunities and challenges experienced. This article concludes that the main benefits come from the utilization of already available open source components, and from their quality and flexibility. It illustrates the challenges and solutions faced when mixing open and closed development models at Nokia. }, keywords = {gtk, linux, nokia, tablet}, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_7}, attachments = {https://flosshub.org/sites/flosshub.org/files/Experiences\%20on\%20Product\%20Development.pdf}, author = {Jaaksi, Ari} } @article {125, title = {Membership herding and network stability in the open source community: The Ising perspective}, journal = {Management Science}, volume = {53}, number = {7}, year = {2007}, month = {Jul}, pages = {1086-1101}, abstract = {The aim of this paper is twofold: (1) to conceptually understand membership dynamics in the open source software (OSS) community, and (2) to explore how different network characteristics (i.e., network size and connectivity) influence the stability of an OSS network. Through the lens of Ising theory, which is widely accepted in physics, we investigate basic patterns of interaction and present fresh conceptual insight into dynamic and reciprocal relations among OSS community members. We also perform computer simulations based on empirical data collected from two actual OSS communities. Key findings include: (1) membership herding is highly present when external influences (e.g., the availability of other OSS projects) are weak, but decreases significantly when external influences increase, (2) propensity for membership herding is most likely to be seen in a large network with random connectivity, and (3) for large networks, when external influences are weak, random connectivity will result in higher network strength than scale-free connectivity (as external influences increase, however, the reverse phenomenon is observed). In addition, scale-free connectivity appears to be less volatile than random connectivity in response to an increase in the strength of external influences. We conclude with several implications that may be of significance to OSS stakeholders in particular, and to a broader range of online communities in general.}, keywords = {BEHAVIOR, DYNAMICS, ECONOMICS, INNOVATION, INVESTMENT, Ising theory, membership herding, MODEL, MOTIVATION, network connectivity, network stability, open source, PHASE-TRANSITION, UNCERTAINTY}, isbn = {0025-1909}, author = {Oh, Wonseok and Jeon, Sangyong} } @proceedings {136, title = {Open source software development and the small world phenomenon: an empirical investigations of macro level colaboration network properties on project success}, year = {2007}, keywords = {NotreDameArchive}, author = {Param Vir Singh} } @proceedings {144, title = {Social capital, structural holes and team composition: collaborative networks of the open source software community}, year = {2007}, keywords = {NotreDameArchive}, author = {Yong Tan and Vijay Mookerjee and Param Vir Singh} } @proceedings {91, title = {An absorptive capacity perspective of open source software development group performance}, year = {2006}, keywords = {NotreDameArchive}, author = {Sherae Daniel and Ritu Agarwal and Katherine Stewart} } @conference {682, title = {Collaborative Maintenance in Large Open-Source Projects}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {233 - 244}, publisher = {Springer}, organization = {Springer}, abstract = {The paper investigates collaborative work among maintainers of open source software by analyzing the logs of a set of 10 large projects. We inquire whether teamwork can be influenced by several characteristics of code. Preliminary results suggest that collaboration among maintainers in most large open-source projects seems to be positively influenced by file vintage and by Halstead volume of files, and negatively by McCabe complexity and size measured in SLOCs. These results could be consistent with an increased attractivity of files created early in the history of a project, and with maintainers being less attracted by more verbose code and by more complex code, although in this last case it might also reflect the fact that more complex files would be de facto more exclusive in terms of maintenance. }, keywords = {apache, COLLABORATION, complexity, cvs, gaim, gcc, ghostscript, halstead, lines of code, loc, mccabe, mozilla, netbsd, openssh, postgresql, python, sloc}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_23}, attachments = {https://flosshub.org/sites/flosshub.org/files/Collaborative\%20Maintenance.pdf}, author = {den Besten, Matthijs and Jean-Michel Dalle and Galia, Fabrice} } @conference {Askari:2006:ITE:1137983.1138013, title = {Information theoretic evaluation of change prediction models for large-scale software}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {126{\textendash}132}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper, we analyze the data extracted from several open source software repositories. We observe that the change data follows a Zipf distribution. Based on the extracted data, we then develop three probabilistic models to predict which files will have changes or bugs. The first model is Maximum Likelihood Estimation (MLE), which simply counts the number of events, i.e., changes or bugs, that happen to each file and normalizes the counts to compute a probability distribution. The second model is Reflexive Exponential Decay (RED) in which we postulate that the predictive rate of modification in a file is incremented by any modification to that file and decays exponentially. The third model is called RED-Co-Change. With each modification to a given file, the RED-Co-Change model not only increments its predictive rate, but also increments the rate for other files that are related to the given file through previous co-changes. We then present an information-theoretic approach to evaluate the performance of different prediction models. In this approach, the closeness of model distribution to the actual unknown probability distribution of the system is measured using cross entropy. We evaluate our prediction models empirically using the proposed information-theoretic approach for six large open source systems. Based on this evaluation, we observe that of our three prediction models, the RED-Co-Change model predicts the distribution that is closest to the actual distribution for all the studied systems.}, keywords = {bugs, change analysis, cvs, evaluation approach, file, freebsd, information theory, kde, koffice, log files, netbsd, openbsd, postgresql, prediction, prediction models, scm, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138013}, url = {http://doi.acm.org/10.1145/1137983.1138013}, attachments = {https://flosshub.org/sites/flosshub.org/files/126InformationTheoretic.pdf}, author = {Askari, Mina and Holt, Ric} } @article {Grewal:2006:LLL:1246148.1246155, title = {Location, Location, Location: How Network Embeddedness Affects Project Success in Open Source Systems}, journal = {Management Science}, volume = {52}, number = {7}, year = {2006}, month = {July}, pages = {1043{\textendash}1056}, publisher = {INFORMS}, address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA}, abstract = {The community-based model for software development in open source environments is becoming a viable alternative to traditional firm-based models. To better understand the workings of open source environments, we examine the effects of network embeddedness---or the nature of the relationship among projects and developers---on the success of open source projects. We find that considerable heterogeneity exists in the network embeddedness of open source projects and project managers. We use a visual representation of the affiliation network of projects and developers as well as a formal statistical analysis to demonstrate this heterogeneity and to investigate how these structures differ across projects and project managers. Our main results surround the effect of this differential network embeddedness on project success. We find that network embeddedness has strong and significant effects on both technical and commercial success, but that those effects are quite complex. We use latent class regression analysis to show that multiple regimes exist and that some of the effects of network embeddedness are positive under some regimes and negative under others. We use project age and number of page views to provide insights into the direction of the effect of network embeddedness on project success. Our findings show that different aspects of network embeddedness have powerful but subtle effects on project success and suggest that this is a rich environment for further study.}, keywords = {affiliation network, age, developers, latent class analysis, network embeddedness, open source software, page views, perl, project success, registration, sourceforge}, issn = {0025-1909}, doi = {10.1287/mnsc.1060.0550}, url = {http://portal.acm.org/citation.cfm?id=1246148.1246155}, author = {Grewal, Rajdeep and Lilien, Gary L. and Mallapragada, Girish} } @article {Yu:2006:MKO:1150566.1150571, title = {Maintainability of the kernels of open-source operating systems: A comparison of Linux with FreeBSD, NetBSD, and OpenBSD}, journal = {J. Syst. Softw.}, volume = {79}, year = {2006}, note = {"Data regarding the number and total number of lines of code of kernel and nonkernel modules in the four operating systems are provided in Table 1" loc, kloc, number of kernel modules, number of nonkernel modules size c files .h files}, month = {June}, pages = {807{\textendash}815}, publisher = {Elsevier Science Inc.}, address = {New York, NY, USA}, abstract = {We compared and contrasted the maintainability of four open-source operating systems: Linux, FreeBSD, NetBSD, and OpenBSD. We used our categorization of common coupling in kernel-based software to highlight future maintenance problems. An unsafe definition is a definition of a global variable that can affect a kernel module if that definition is changed. For each operating system we determined a number of measures, including the number of global variables, the number of instances of global variables in the kernel and overall, as well as the number of unsafe definitions in the kernel and overall. We also computed the value of each our measures per kernel KLOC and per KLOC overall. For every measure and every ratio, Linux compared unfavorably with FreeBSD, NetBSD, and OpenBSD. Accordingly, we are concerned about the future maintainability of Linux. }, keywords = {abiword, Common coupling, coupling, Definition-use analysis, freebsd, kernel, lines of code, linux, linux kernel, loc, Maintainability, modules, netbsd, Open-source software, openbsd, source code}, issn = {0164-1212}, doi = {http://dx.doi.org/10.1016/j.jss.2005.08.014}, url = {http://dx.doi.org/10.1016/j.jss.2005.08.014}, attachments = {https://flosshub.org/sites/flosshub.org/files/YuSchachChen.pdf}, author = {Yu, Liguo and Schach, Stephen R. and Chen, Kai and Heller, Gillian Z. and Offutt, Jeff} } @conference {795, title = {The challenges of creating open source education software: the Gild experience}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {338-340}, keywords = {COMMUNITY, eclipse, learning environment, novice programmers, open source, programming environment}, url = {http://pascal.case.unibz.it/handle/2038/1539}, author = {Daniel M. German and Rigby, Peter and Cubranic, Davor and Storey, Margaret-Anne and Thomson, Suzanne} } @article {102, title = {Empirical validation of object-oriented metrics on open source software for fault prediction}, journal = {IEEE Transactions on Software Engineering}, volume = {31}, number = {10}, year = {2005}, note = {"This paper describes how we calculated the object-oriented metrics given by Chidamber and Kemerer to illustrate how fault-proneness detection of the source code of the open source Web and e-mail suite called Mozilla can be carried out. We checked the values obtained against the number of bugs found in its bug database - called Bugzilla - using regression and machine learning methods to validate the usefulness of these metrics for fault-proneness prediction. We also compared the metrics of several versions of Mozilla to see how the predicted fault-proneness of the software system changed during its development cycle." metrics, wmc weighted methods per class, dit depth of inheritance, rfc response for a class, noc number of children, cbo coupling between object classes, cohesion, lines of code, loc, sloc chidamber and kemerer metrics}, pages = {897-910}, abstract = {Open source software systems are becoming increasingly important these days. Many companies are investing in open source projects and lots of them are also using such software in their own work. But, because open source software is often developed with a different management style than the industrial ones, the quality and reliability of the code needs to be studied. Hence, the characteristics of the source code of these projects need to be measured to obtain more information about it. This paper describes how we calculated the object-oriented metrics given by Chidamber and Kemerer to illustrate how fault-proneness detection of the source code of the open source Web and e-mail suite called Mozilla can be carried out. We checked the values obtained against the number of bugs found in its bug database - called Bugzilla - using regression and machine learning methods to validate the usefulness of these metrics for fault-proneness prediction. We also compared the metrics of several versions of Mozilla to see how the predicted fault-proneness of the software system changed during its development cycle.}, keywords = {bugs, bugzilla, cbo, defects, dit, fault-prone modules, faults, lcom, lcomn, loc, metrics, mozilla, noc, object-oriented, rfc, source code, wmc}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.115.8372\&rep=rep1\&type=pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/Gyimothy.pdf}, author = {Gyimothy, T. and Ferenc, R. and Siket, I.} } @conference {787, title = {Knowledge, Communication and Innovation: the case of Open Source Software as Open Media}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {314-316}, abstract = {The understanding of the major characteristics of Linux Operative System and, more in general, the analysis of the so-called Open Source Phenomenon, is nowadays a central issue in order to appreciate the ongoing evolution of software industry. Free/Open Source Software model may appears a {\textquotedblleft}revolution{\textquotedblright} in the way of thinking about software development, distribution and use. But, at a closer glance, it reveals itself more as an {\textquotedblleft}evolution{\textquotedblright} along the path of {\textquotedblleft}tinkering with software{\textquotedblright} typical of the Hacker Tech-Culture since the beginning of sixties. In fact the {\textquotedblleft}open source way{\textquotedblright} of developing software results from the hackers habit of sharing technological knowledge and it represents a perfection of this, due to the availably of new communication technologies. The aim of this research is to explain this evolution, to point out its historical, economics and technological determinants and to link it with the evolution of the {\textquotedblleft}communicational medium{\textquotedblright} used by the open source ...}, keywords = {attributive, Creative Commons, non-commercial}, url = {http://pascal.case.unibz.it/handle/2038/972}, author = {Lorenzo Benussi} } @conference {Fischer:2005:MED:1083142.1083145, title = {Mining evolution data of a product family}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {12-16}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Diversification of software assets through changing requirements impose a constant challenge on the developers and maintainers of large software systems. Recent research has addressed the mining for data in software repositories of single products ranging from fine- to coarse grained analyses. But so far, little attention has been payed to mining data about the evolution of product families. In this work, we study the evolution and commonalities of three variants of the BSD (Berkeley Software Distribution), a large open source operating system. The research questions we tackle are concerned with how to generate high level views of the system discovering and indicating evolutionary highlights. To process the large amount of data, we extended our previously developed approach for storing release history information to support the analysis of product families. In a case study we apply our approach on data from three different code repositories representing about 8.5GB of data and 10 years of active development.}, keywords = {bsd, change analysis, change history, cvs, evolution, freebsd, netbsd, openbsd, release history, source code, text mining}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083145}, url = {http://doi.acm.org/10.1145/1082983.1083145}, attachments = {https://flosshub.org/sites/flosshub.org/files/12MiningEvolution.pdf}, author = {Fischer, Michael and Oberleitner, Johann and Ratzinger, Jacek and Gall, Harald} } @conference {899, title = {Stopping spyware at the gate: a user study of privacy, notice and spyware}, booktitle = {2005 Symposium on Usable Privacy and Security}, year = {2005}, month = {07/2005}, pages = {43-52}, publisher = {Association for Computing Machinery}, organization = {Association for Computing Machinery}, address = {Pittsburgh, PA}, keywords = {agreement,, and, Aspects,, Design,, end, EULA,, Experimentation,, Factors,, Human, Legal, license, notice,, of, privacy,, security, service,, spyware,, terms, ToS,, usability,, user}, isbn = {1-59593-178-3 }, author = {N. Good and Dhamija, R. and J. Grossklags and D. Thaw and Aronowitz, S. and D. Mulligan and J. Konstan} } @conference {788, title = {Structure, Cohesion, and Open Source Software Success}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {317-319}, abstract = {This paper proposes a dissertation designed to understand how the open source software (OSS) development group and its associated interest community jointly and independently impact OSS success for a single OSS project.}, keywords = {interest community, network externalities, open source software, software quality}, url = {http://pascal.case.unibz.it/handle/2038/1536}, author = {Sherae Daniel} } @proceedings {905, title = {Collaboration, Leadership, Control, and Conflict Negotiation in the NetBeans.org Community}, year = {2004}, month = {May 25}, address = {Edinburgh, Scotland, UK}, keywords = {netbeans}, attachments = {https://flosshub.org/sites/flosshub.org/files/jensen.pdf}, author = {Chris Jensen and Walt Scacchi} } @proceedings {1191, title = {Collaboration, Leadership, Control, and Conflict Negotiation in the Netbeans.org Community}, year = {2004}, pages = {48-52}, abstract = {Large open source software development communities are quickly learning that, to be successful, they must integrate efforts not only among the organizations investing developers within the community and unaffiliated volunteer contributors, but also negotiate relationships with external groups hoping to sway the social and technical direction of the community and its products. Leadership and control sharing across organizations and individuals in and between communities are common sources of conflict. Such conflict often leads to breakdowns in collaboration. This paper seeks to explore the negotiation of these conflicts, collaborative efforts, and leadership and control structures in the Netbeans.org community.}, keywords = {conflict, leadership, netbeans}, attachments = {https://flosshub.org/sites/flosshub.org/files/jensen_0.pdf}, author = {Chris Jensen and Walt Scacchi} } @conference {1054, title = {LASER: a lexical approach to analogy in software reuse}, booktitle = {International Workshop on Mining Software Repositories (MSR 2004)}, volume = {2004}, year = {2004}, pages = {112 - 116}, publisher = {IEE}, organization = {IEE}, address = {Edinburgh, Scotland, UK}, abstract = {Software reuse is the process of creating a software system from existing software components, rather than creating it from scratch. With the increase in size and complexity of existing software repositories, the need to provide intelligent support to the programmer becomes more pressing. An analogy is a comparison of certain similarities between things which are otherwise unlike. This concept has shown to be valuable in developing UML-level reuse techniques. In the LASER project we apply lexically-driven Analogy at the code level, rather than at the UML-level, in order to retrieve matching components from a repository of existing components. Using the lexical ontology Word-Net, we have conducted a case study to assess if class and method names in open source applications are used in a semantically meaningful way. Our results demonstrate that both hierarchical reuse and parallel reuse can be enhanced through the use of lexically-driven Analogy.}, keywords = {class, developers, functions, jrefactory, method, naming, natural language, reuse, source code, wordnet}, doi = {10.1049/ic:20040487}, attachments = {https://flosshub.org/sites/flosshub.org/files/112LASER.pdf}, author = {Amin, R. and Mel O Cinneide and Veale, Tony} } @proceedings {124, title = {Membership dynamics and network stability in the open-source community: the ising perspective}, year = {2004}, note = {"simulations with the empirical network data that were collected from two actual OSS communities, Linux and Hypermail." "we initially downloaded nearly 100,000 archived (between 1997 and 2003) LINUX Kernel and Hypermail newsgroup messages posted in a UNIX mailbox format" "Specific information was obtained regarding the characteristics of these two OSS communities, including the size, the number of average connections per participant, and the hierarchy of each community."}, abstract = {In this paper, we address the following two questions: (1)How does a participant{\textquoteright}s membership decision affect the others (neighbors) with whom he has collaborated over an extended period of time in an open source software (OSS) network? (2) To what extent do network characteristics (i.e, size and connectivity) mediate the impact of external factors on the OSS participants{\textquoteright} dynamic membership decisions and hence the stability of the network? From the Ising perspective, we present fresh theoretical insight into the dynamic and reciprocal membership relations between OSS participants. We also performed simulations based on empirical data that were collected from two actual OSS communities. Some of the key findings include that (1) membership herding is highly present when the external force is weak, but decreases significantly when the force increases, (2) the propensity for membership herding is most likely to be seen in a large network with a random connectivity, and (3) for large networks, at low external force a random connectivity will perform better than a scale-free counterpart in terms of the network strength. However, as the temperature (external force) increases, the reverse phenomenon is observed. In addition, the scale-free connectivity appears to be less volatile than with the random connectivity in response to the increase in the temperature. We conclude with several implications that may be of significance to OSS stakeholders.}, keywords = {email, email archive, hypermail, linux, mailing list, membership, membership herding, newsgroup, open source, participants, social network analysis, stakeholders, team size}, attachments = {https://flosshub.org/sites/flosshub.org/files/OhJeon.pdf}, author = {Oh, Wonseok and Jeon, Sangyong} } @conference {1160, title = {Adopting OSS Methods by Adopting OSS Tools}, booktitle = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, abstract = {The open source movement has created and used a set of software engineering tools with features that fit the characteristics of open source development processes. To a large extent, the open source culture and methodology are conveyed to new developers via the toolset itself, and through the demonstrated usage of these tools on existing projects. The rapid and wide adoption of open source tools stands in stark contrast to the difficulties encountered in adopting traditional CASE tools. This paper explores the characteristics that make these tools adoptable and how adopting them may influence software development processes.}, keywords = {ant, argouml, bugzilla, cactus, cvs, developers, eclipse, emacs, email, faq, junit, mailing lists, make, netbeans, package management, rpm, scarab, subversion, teams, tools, torque, WORK}, attachments = {https://flosshub.org/sites/flosshub.org/files/Robbins.pdf}, author = {Robbins, Jason E.} } @article {1095, title = {High Quality and Open Source Software Practices}, journal = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, note = {"We examined the publicly visible portions of these projects from November 2001 through March 2002, ...The SLOC counts for the predominate languages are shown}, month = {2002}, abstract = {Surveys suggest that, according to various metrics, the quality and dependability of today{\textquoteright}s open source software is roughly on par with commercial and government developed software. What are the prospects for advancing to much higher levels of quality in open source software? More specifically, what attributes must be possessed by quality-related interventions for them to be feasibly adoptable in open source practice? In order to identify some of these attributes, we conducted a preliminary survey of the quality practices of a number of successful open source projects. We focus, in particular, on attributes related to adoptability by the open source practitioner community.}, keywords = {apache, bug report, bug tracker, bug tracking system, feature requests, gcc, gnome, kde, lines of code, linux, loc, mozilla, netbeans, perl, position paper, python, sloc, source code, Survey, tomcat, xfree86}, attachments = {https://flosshub.org/sites/flosshub.org/files/HalloranScherlis.pdf}, author = {T. Halloran and W. Scherlis} } @article {98, title = {Striking a balance between trust anti control in a virtual organization: a content analysis of open source software case studies}, journal = {Information Systems Journal}, volume = {11}, number = {4}, year = {2001}, note = {"I employ secondary analysis of published case studies of OSS projects. I used several search methods to identify such case studies about OSS pro- jects. First, I searched the electronic archives of both ACM and IEEE, using terms such as {\textquoteleft}open source.{\textquoteright} " "Secondly, I searched on Bell \& Howell/Proquest{\textquoteright}s ABI/Inform, a database of acade- mic and trade publications on business and management (including technology management). Thirdly, I reviewed the {\textquoteleft}position papers{\textquoteright} of the various authors who attended the 1st Work- shop on Open-Source Software Engineering (Feller et al., 2001) for relevant case studies. Fourthly, I searched on Harvard Publishing{\textquoteright}s websites, which contain case studies published" "Finally, as I located case studies or other publi- cations about OSS projects, I followed references from them to identify other case studies. Although I found literally hundreds of publications about OSS development, there was a much smaller number of case studies"}, pages = {277-304}, abstract = {Many organization theorists have predicted the emergence of the networked or virtual firm as a model for the design of future organizations. Researchers have also emphasized the importance of trust as a necessary condition for ensuring the success of virtual organizations. This paper examines the open source software (OSS) {\textquoteright}movement{\textquoteright} as an example of a virtual organization and proposes a model that runs contrary to the belief that trust is critical for virtual organizations. Instead, I argue that various control mechanisms can ensure the effective performance of autonomous agents who participate in virtual organizations. Borrowing from the theory of the {\textquoteright}McDonaldization{\textquoteright} of society, I argue that, given a set of practices to ensure the control, efficiency, predictability and calculability of processes and outcomes in virtual organizations, effective performance may occur in the absence of trust. As support for my argument, I employ content analysis to examine a set of published case studies of OSS projects. My results show that, although that trust is rarely mentioned, ensuring control is an important criterion for effective performance within OSS projects. The case studies feature few references to other dimensions of {\textquoteright}McDonaldization{\textquoteright} (efficiency, predictability and calculability), however, and I conclude that the OSS movement relies on many other forms of social control and self-control, which are often unacknowledged in OSS projects. Through these implicit forms of control, OSS projects are able to secure the cooperation of the autonomous agents that participate in project teams. I conclude by extrapolating from these case studies to other virtual organizations.}, keywords = {apache, case studies, Control, fetchmail, jun, linux, linux kernel, McDonaldization, mozilla, networked organization, perl, rationalization, trust, virtual organization}, author = {Gallivan, M. J.} }