@proceedings {1910, title = {Charting the market disruptive nature of Open Source: Experiences from Sony Mobile }, year = {2017}, month = {05/2017}, pages = {175-176}, abstract = {Open Source Software (OSS) has substantial impact on how software-intensive firms develop products and deliver value to the customers. These companies need both strategic and operational support on how to adapt OSS as a part of their products and how to adjust processes and organizations to increase the benefits from OSS participation. This work presents the key insights from the journey that Sony Mobile has made from a company developing proprietary software to a respected member of OSS communities. We framed the experiences into an Open Source Maturity Model that includes two scenarios: engineering-driven and business-driven open source. We outline the most important decisions, roles, processes and implications. }, keywords = {ecosystem, poster, software business}, author = {Mols, CE and Wnuk, K} } @proceedings {1875, title = {Considering the use of walled gardens for FLOSS project communication}, year = {2017}, month = {05/2017}, abstract = {At its core, free, libre, and open source software (FLOSS) is defined by its adherence to a set of licenses that give various freedoms to the users of the software, for example the ability to use the software, to read or modify its source code, and to distribute the software to others. In addition, many FLOSS projects and developers also champion other values related to "freedom" and "openness", such as transparency, for example in communication and decision-making, or community-orientedness, for example in broadening access, collaboration, and participation. This paper explores how one increasingly common software development practice - communicating inside non-archived, third-party "walled gardens" - puts these FLOSS values into conflict. If communities choose to use non-archived walled gardens for communication, they may be prioritizing one type of openness (broad participation) over another (transparency). We use 18 FLOSS projects as a sample to describe how walled gardens are currently being used for intra-project communication, as well as to determine whether or not these projects provide archives of these communications. Findings will be useful to the FLOSS community as a whole as it seeks to under- stand the evolution and impact of its communication choices.}, keywords = {apache, chat, communication, email, free software, irc, mailing list, open source, Slack, Stack Overflow, teams, Wordpress}, doi = {10.1007/978-3-319-57735-7_1}, url = {https://link.springer.com/content/pdf/10.1007\%2F978-3-319-57735-7_1.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/preprint_0.pdf}, author = {Squire, Megan} } @proceedings {1894, title = {Digging into the Eclipse Marketplace}, volume = {496}, year = {2017}, month = {05/2017}, pages = {60-65}, publisher = {Springer}, abstract = {Eclipse is an integrated development environment that can be extended with plug-ins. Thanks to Eclipse{\textquoteright}s success, a diverse community has been established with members coming from industry, open-source projects, and others, and a marketplace with more than 1.700 different plug-ins developed. Hence, the question arises how this marketplace is composed: Who contributes plug-ins? Which plug-ins are successful? Are there common characteristics or trends? To answer these questions, extensive investigations are necessary. In this paper, we present (i) an initial approach for corresponding analyses and (ii) preliminary results. Overall, we aim to pave the way for further research addressing, for example, motivations to participate in, or the evolution of, open marketplaces.}, keywords = {Eclipse IDE, Eclipse Marketplace, empirical study, open source}, doi = {10.1007/978-3-319-57735-7_7}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_7}, author = {Kr{\"u}ger, J and Corr, N and Schr{\"o}ter, I and Leich, T} } @proceedings {1908, title = {How Open Source Projects use Static Code Analysis Tools in Continuous Integration Pipelines}, year = {2017}, note = {Data: the paper studies the use of ASCATs (Automated Static Code Analysis Tools) within CI pipelines of 20 popular Java open source projects hosted on GitHub and using Travis CI to support CI activities}, month = {05/2017}, pages = {334-344}, abstract = {Static analysis tools are often used by software developers to entail early detection of potential faults, vulnerabilities, code smells, or to assess the source code adherence to coding standards and guidelines. Also, their adoption within Continuous Integration (CI) pipelines has been advocated by researchers and practitioners. This paper studies the usage of static analysis tools in 20 Java open source projects hosted on GitHub and using Travis CI as continuous integration infrastructure. Specifically, we investigate (i) which tools are being used and how they are configured for the CI, (ii) what types of issues make the build fail or raise warnings, and (iii) whether, how, and after how long are broken builds and warnings resolved. Results indicate that in the analyzed projects build breakages due to static analysis tools are mainly related to adherence to coding standards, and there is also some attention to missing licenses. Build failures related to tools identifying potential bugs or vulnerabilities occur less frequently, and in some cases such tools are activated in a {\textquotedblleft}softer{\textquotedblright} mode, without making the build fail. Also, the study reveals that build breakages due to static analysis tools are quickly fixed by actually solving the problem, rather than by disabling the warning, and are often properly documented.}, keywords = {continuous integration, empirical study, static analysis}, doi = {10.1109/MSR.2017.2}, author = {Zampetti, Fiorella and Scalabrino, Simone and Oliveto, Rocco and Canfora, Gerardo and Di Penta, Massimiliano} } @conference {Sharma:2017:IDE:3084226.3084271, title = {Investigating Developers{\textquoteright} Email Discussions During Decision-making in Python Language Evolution}, booktitle = {Proceedings of the 21st International Conference on Evaluation and Assessment in Software Engineering}, series = {EASE{\textquoteright}17}, year = {2017}, pages = {286{\textendash}291}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Context: Open Source Software (OSS) developers use mailing lists as their main forum for discussing the evolution of a project. However, the use of mailing lists by developers for decision-making has not received much research attention. Objective: We have explored this issue by studying developers{\textquoteright} email discussions around Python Enhancement Proposals (PEPs). Method: Our dataset comprised 42,672 emails from six different mailing lists pertaining to PEP development. We performed multiple forms of analysis on these emails, involving both quantitative measures (e.g., frequency) and deeper analysis of specific PEP discussions (i.e., outlier analysis). Results: Out of three PEP types (Informational, Process and Standard Track), Standard Track PEPs attract a large amount of discussion (both in volume and average number of messages per proposal). Our study also identified specific PEP states and topics that generated a disproportionate amount of discussion. Conclusion: Our outcomes point to several opportunities for improving the management of an OSS team based on the knowledge generated from discussions. We have also identified several interesting avenues for future work such as identifying individuals or groups that present persuasive arguments during decision-making.}, keywords = {Decision-making, Email discussions, Python development}, isbn = {978-1-4503-4804-1}, doi = {10.1145/3084226.3084271}, url = {http://doi.acm.org/10.1145/3084226.3084271}, author = {Sharma, Pankajeshwara and Savarimuthu, Bastin Tony Roy and Stanger, Nigel and Licorish, Sherlock A. and Rainer, Austen} } @inbook {Kerzazi2017, title = {Knowledge Flows Within Open Source Software Projects: A Social Network Perspective}, booktitle = {Advances in Ubiquitous Networking 2: Proceedings of the UNet{\textquoteright}16}, year = {2017}, pages = {247{\textendash}258}, publisher = {Springer Singapore}, organization = {Springer Singapore}, address = {Singapore}, abstract = {Developing software is knowledge-intensive activity, requiring extensive technical knowledge and awareness. The abstract part of development is the social interactions that drive knowledge flows between contributors, especially for Open Source Software (OSS). This study investigated knowledge sharing and propagation from social perspective using social network analysis (SNA). We mined and analyzed the issue and review histories of three OSS from GitHub. Particular attention has been paid to the socio-interactions through comments from contributors on reviews. We aim at explaining the propagation and density of knowledge flows within contributor networks. The results show that review requests flow from the core contributors toward peripheral contributors and comments on reviews are in a continuous loop from the core teams to the peripherals and back; and the core contributors leverage on their awareness and technical knowledge to increase their notoriety by playing the role of communication brokers supported by comments on work items.}, keywords = {expertise, Knowledge flows, open source, SNA}, isbn = {978-981-10-1627-1}, doi = {10.1007/978-981-10-1627-1_19}, url = {http://dx.doi.org/10.1007/978-981-10-1627-1_19}, author = {Kerzazi, Noureddine and El Asri, Ikram}, editor = {El-Azouzi, Rachid and Menasche, Daniel Sadoc and Sabir, Essa{\"\i}d and De Pellegrini, Francesco and Benjillali, Mustapha} } @proceedings {1911, title = {Machine Learning-Based Detection of Open Source License Exceptions}, year = {2017}, note = {"We address these questions by first performing a large scale mining-based study... [W]e analyzed the source code of 51,754 projects written in six different programming languages (Ruby, Javascript, Python, C, C++, and C$\#$) hosted on GitHub. }, month = {05/2017}, pages = {118-129}, abstract = {From a legal perspective, software licenses govern the redistribution, reuse, and modification of software as both source and binary code. Free and Open Source Software (FOSS) licenses vary in the degree to which they are permissive or restrictive in allowing redistribution or modification under licenses different from the original one(s). In certain cases developers may modify the license by appending to it an exception to specifically allow reuse or modification under a particular condition. These exceptions are an important factor to consider for license compliance analysis since they modify the standard (and widely understood_ terms of the original license. In this work, we first perform a large-scale empirical study on the change history of over 51k FOSS systems aimed at quantitatively investigating the prevalence of known license exceptions and identifying new ones. Subsequently, we performed a study on the detection of license exceptions by relying on machine learning. We evaluated the license exception classification with four different supervised learners and sensitivity analysis. Finally we present a categorization of license exceptions and explain their implications.}, keywords = {classifier, empirical studies, license, machine learning}, doi = {10.1109/ICSE.2017.19}, author = {Vendome, Christopher and Mario Linares-Vasquez and Bavota, Gabriele and Di Penta, Massimiliano and Daniel M. German and Poshyvanyk, Denys} } @proceedings {1914, title = {Practices and Perceptions of UML Use in Open Source Projects}, year = {2017}, month = {05/2017}, pages = {203-212}, abstract = {Context: Open Source is getting more and more collaborative with industry. At the same time, modeling is today playing a crucial role in development of, e.g., safety critical software. Goal: However, there is a lack of research about the use of modeling in Open Source. Our goal is to shed some light into the motivation and benefits of the use of modeling and its use within project teams. Method: In this study, we perform a survey among Open Source developers. We focus on projects that use the Unified Modeling Language (UML) as a representative for software modeling. Results: We received 485 answers of contributors of 458 different Open Source projects. Conclusion: Collaboration seems to be the most important motivation for using UML. It benefits new contributors and contributors who do not create models. Teams use UML during communication and planning of joint implementation efforts.}, keywords = {architecture documentation, communication, effectiveness of UML, github, MOTIVATION, UML}, author = {Truong Ho-Quang and Hebig, Regina and Gregorio Robles and Chaudron, Michel R. V. and Miguel Angel Fernandez} } @proceedings {1891, title = {Principled Evaluation of Strengths and Weaknesses in FLOSS Communities: A Systematic Mixed Methods Maturity Model Approach}, volume = {496}, year = {2017}, month = {05/2017}, pages = {34-46}, publisher = {Springer}, abstract = {Context: Free and Open Source Software usually results from intricate socio-technical dynamics operating in a diverse and geographically dispersed community. Understanding the fundamental underpinnings of healthy and thriving communities is of paramount importance to evaluate existing efforts and identify improvement opportunities. Objective: This paper presents a novel reference model for evaluating the maturity of FLOSS communities by mixing quantitative and qualitative methods. Method: We build upon established guidelines for Design Science research in order to devise a well-informed and expressive maturity model, describing how those methods and procedures were used in the design and development of such a model. Results: We present the model structure and functions, as well as instructions on how to instantiate it as evaluations of FLOSS communities. The use of the proposed maturity model is demonstrated in four FLOSS communities. Conclusion: Whilst instantiating the model may be burdensome if aiming at sketchy evaluations, results indicate our model effectively captures the maturity regardless aspects such as community size and lifetime.}, keywords = {Design science research, Discourse communities, evaluation, FLOSS communities, maturity models, Mixed methods research}, doi = {10.1007/978-3-319-57735-7_4}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_4}, author = {Andrade, S and Saraiva, F.} } @proceedings {1897, title = {Understanding the Effects of Practices on KDE Ecosystem Health}, volume = {496}, year = {2017}, month = {05/2017}, pages = {89-100}, publisher = {Springer}, abstract = {Open source software ecosystems have adjusted and evolved a set of practices over the years to support the delivery of sustainable software. However, few studies have investigated the impacts of such practices on the health of these ecosystems. In this paper, we present the results of an ethnographic-based study conducted during the Latin-American KDE users and contributors meeting (LaKademy 2015) with the goal of collecting practices used within the KDE ecosystem and understanding how they affect ecosystem health. The analysis was based on softgoal interdependency graphs adapted to represent practices and relate them to non-functional requirements and goals. Our results provide a preliminary insight to understand how KDE ecosystem community interacts, which working practices have been adopted and how they affect ecosystem health. }, keywords = {Ethnographic studies, Open source software ecosystems, Software ecosystem health, Software practices}, doi = {10.1007/978-3-319-57735-7_10}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_10}, author = {Simone da Silva Amorim and John D. McGregor and Eduardo Santana de Almeida and Christina von Flach Garcia Chavez} } @proceedings {1918, title = {Using Gamification to Orient and Motivate Students to Contribute to OSS Projects}, year = {2017}, month = {05/2017}, pages = {36-42}, abstract = {Students can benefit from contributing to Open Source Software (OSS), since they can enrich their portfolio and learn with real world projects. However, sometimes students are demotivated to contribute due to entrance barriers. On the other hand, gamification is widely used to engage and motivate people to accomplish tasks and improve their performance. The goal of this work is to analyze the use of gamification to orient and motivate undergraduate students to overcome onboarding barriers and engage to OSS projects. To achieve this goal, we implemented four gaming elements (Quests, Points, Ranking, and Levels) in GitLab and assessed the environment by means of a study conducted with 17 students within a real OSS project (JabRed). At the end of the study, the students evaluated their experience through a questionnaire. We found that the Quest element helped to guide participants and keep them motivated and points helped by providing feedback on students{\textquoteright} performed tasks. We conclude that the gamified environment oriented the students in an attempt to make a contribution and that gamification can motivate and orient newcomers{\textquoteright} to engage to OSS projects. }, keywords = {engagement, gamification, MOTIVATION, newcomers, students}, author = {Guilherme C. Diniz and Marco A. Graciotto Silva and Marco Gerosa and Steinmacher, Igor} } @proceedings {1854, title = {Differentiating Communication Styles of Leaders on the Linux Kernel Mailing List}, year = {2016}, note = {Slides link: https://docs.google.com/presentation/d/1_5kqOXBYwH33ayfGKCncCtCondfUYtsHSDBS3DBig6Y/edit?usp=sharing Edited to fix typo in abstract. New version is v3.pdf}, month = {08/2016}, publisher = {ACM}, abstract = {Much communication between developers of free, libre, and open source software (FLOSS) projects happens on email mailing lists. Geographically and temporally dispersed development teams use email as an asynchronous, centralized, persistently stored institutional memory for sharing code samples, discussing bugs, and making decisions. Email is especially important to large, mature projects, such as the Linux kernel, which has thousands of developers and a multi-layered leadership structure. In this paper, we collect and analyze data to understand the communication patterns in such a community. How do the leaders of the Linux Kernel project write in email? What are the salient features of their writing, and can we discern one leader from another? We find that there are clear written markers for two leaders who have been particularly important to recent discussions of leadership style on the Linux Kernel Mailing List (LKML): Linux Torvalds and Greg Kroah-Hartman. Furthermore, we show that it is straightforward to use a machine learning strategy to automatically differentiate these two leaders based on their writing. Our findings will help researchers understand how this community works, and why there is occasional controversy regarding differences in communication styles on the LKML.}, keywords = {email, flossmole, linus torvalds, linux, lkml}, attachments = {https://flosshub.org/sites/flosshub.org/files/v3_0.pdf}, author = {Schneider, Daniel and Spurlock, Scott and Squire, Megan} } @inbook {1734, title = {Examining Usability Work and Culture in OSS}, booktitle = {Open Source Systems: Adoption and Impact}, series = {IFIP Advances in Information and Communication Technology}, volume = {451}, year = {2015}, pages = {58-67}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {Organizational culture has been recognized as an influential factor affecting the successes and failures of usability work in organizations; however, there is a lack of research on organizational culture in open source software (OSS) development. This paper shows that there are different kinds of cultures in OSS development projects and builds propositions on the relationship between culture and usability work in OSS development projects. Partly those are derived from the literature, partly from an exploratory empirical inquiry. We speculate whether there is an ideal culture type for usability work in OSS development or whether usability work should be modified to fit the different cultures of OSS development projects. }, keywords = {culture, empirical study, open source software, Usability}, isbn = {978-3-319-17836-3}, doi = {10.1007/978-3-319-17837-0_6}, url = {http://dx.doi.org/10.1007/978-3-319-17837-0_6}, author = {Rajanen, Mikko and Iivari, Netta}, editor = {Damiani, Ernesto and Frati, Fulvio and Dirk Riehle and Wasserman, Anthony I.} } @inbook {1736, title = {A Systematic Approach for Evaluating BPM Systems: Case Studies on Open Source and Proprietary Tools}, booktitle = {Open Source Systems: Adoption and Impact}, series = {IFIP Advances in Information and Communication Technology}, volume = {451}, year = {2015}, pages = {81-90}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {Business Process Management Systems (BPMS) provide support for modeling, developing, deploying, executing and evaluating business processes in an organization. Selecting a BPMS is not a trivial task, not only due to the many existing alternatives, both in the open source and proprietary realms, but also because it requires a thorough evaluation of its capabilities, contextualizing them in the organizational environment in which they will be used. In this paper we present a methodology to guide the systematic evaluation of BPMS that takes into account the specific needs of each organization. It provides a list of key characteristics of BPMS which are ranked by the organization and evaluated using test cases and quantitative criteria. We also present case studies of open source and proprietary BPMS evaluations following our proposal. }, keywords = {Business Process Management Systems (BPMS), Evaluation methodology, Open source and proprietary BPMS, Systematic approach}, isbn = {978-3-319-17836-3}, doi = {10.1007/978-3-319-17837-0_8}, url = {http://dx.doi.org/10.1007/978-3-319-17837-0_8}, author = {Delgado, Andrea and Calegari, Daniel and Milanese, Pablo and Falcon, Renatta and Garc{\'\i}a, Esteban}, editor = {Damiani, Ernesto and Frati, Fulvio and Dirk Riehle and Wasserman, Anthony I.} } @proceedings {1752, title = {Why Power Laws? An Explanation from Fine-Grained Code Changes}, year = {2015}, note = {"The data used in this section come from four open source Java projects. We used CVSANALY to collect data from their Git repositories."}, abstract = {Throughout the years, empirical studies have found power law distributions in various measures across many software systems. However, surprisingly little is known about how they are produced. What causes these power law distributions? We offer an explanation from the perspective of fine-grained code changes. A model based on preferential attachment and selforganized criticality is proposed to simulate software evolution. The experiment shows that the simulation is able to render power law distributions out of fine-grained code changes, suggesting preferential attachment and self-organized criticality are the underlying mechanism causing the power law distributions in software systems.}, keywords = {Apache Maven, cvsanaly, eclipse, git, Google Guice, java, jedit, open source}, url = {https://users.soe.ucsc.edu/~linzhp/msr2015.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr2015.pdf}, author = {Zhongpeng Lin and Jim Whitehead} } @inbook {1605, title = {Crafting a Systematic Literature Review on Open-Source Platforms}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {113-122}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { This working paper unveils the crafting of a systematic literature review on open-source platforms. The high-competitive mobile devices market, where several players such as Apple, Google, Nokia and Microsoft run a platforms- war with constant shifts in their technological strategies, is gaining increasing attention from scholars. It matters, then, to review previous literature on past platforms-wars, such as the ones from the PC and game-console industries, and assess its implications to the current mobile devices platforms-war. The paper starts by justifying the purpose and rationale behind this literature review on open-source platforms. The concepts of open-source software and computer-based platforms were then discussed both individually and in unison, in order to clarify the core-concept of {\textquotedblleft}open-source platform{\textquotedblright} that guides this literature review. The detailed design of the employed methodological strategy is then presented as the central part of this paper. The paper concludes with preliminary findings organizing previous literature on open-source platforms for the purpose of guiding future research in this area. }, keywords = {Ecosystems, FLOSS, open-source, Platforms, R\&D Management}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_16}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_16}, author = {Teixeira, Jose and Baiyere, Abayomi}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @conference {Saini:2014:DMA:2597073.2597134, title = {A Dataset for Maven Artifacts and Bug Patterns Found in Them}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {416{\textendash}419}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper, we present data downloaded from Maven, one of the most popular component repositories. The data includes the binaries of 186,392 components, along with source code for 161,025. We identify and organize these components into groups where each group contains all the versions of a library. In order to asses the quality of these components, we make available report generated by the FindBugs tool on 64,574 components. The information is also made available in the form of a database which stores total number, type, and priority of bug patterns found in each component, along with its defect density. We also describe how this dataset can be useful in software engineering research. }, keywords = {Empirical Research, Empirical software engineering, findbugs, maven, software quality}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597134}, url = {http://doi.acm.org/10.1145/2597073.2597134}, author = {Saini, Vaibhav and Sajnani, Hitesh and Ossher, Joel and Lopes, Cristina V.} } @conference {Gousios:2014:DPD:2597073.2597122, title = {A Dataset for Pull-based Development Research}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {368{\textendash}371}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Pull requests form a new method for collaborating in distributed software development. To study the pull request distributed development model, we constructed a dataset of almost 900 projects and 350,000 pull requests, including some of the largest users of pull requests on Github. In this paper, we describe how the project selection was done, we analyze the selected features and present a machine learning tool set for the R statistics environment. }, keywords = {Distributed software development, Empirical software engineering, msr data showcase, pull request, pull-based development}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597122}, url = {http://doi.acm.org/10.1145/2597073.2597122}, attachments = {https://flosshub.org/sites/flosshub.org/files/pullreqs-dataset.pdf}, author = {Gousios, Georgios and Zaidman, Andy} } @conference {Passos:2014:DFA:2597073.2597124, title = {A Dataset of Feature Additions and Feature Removals from the Linux Kernel}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {376{\textendash}379}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {This paper describes a dataset of feature additions and removals in the Linux kernel evolution history, spanning over seven years of kernel development. Features, in this context, denote configurable system options that users select when creating customized kernel images. The provided dataset is the largest corpus we are aware of capturing feature additions and removals, allowing researchers to assess the kernel evolution from a feature-oriented point-of-view. Furthermore, the dataset can be used to better understand how features evolve over time, and how different artifacts change as a result. One particular use of the dataset is to provide a real-world case to assess existing support for feature traceability and evolution. In this paper, we detail the dataset extraction process, the underlying database schema, and example queries. The dataset is directly available at our Bitbucket repository: https://bitbucket.org/lpassos/kconfigdb }, keywords = {evolution, linux, msr data showcase, Traceability, Version Control History}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597124}, url = {http://doi.acm.org/10.1145/2597073.2597124}, attachments = {https://flosshub.org/sites/flosshub.org/files/kernel.pdf}, author = {Passos, Leonardo and Czarnecki, Krzysztof} } @conference {Brunet:2014:DDD:2597073.2597115, title = {Do Developers Discuss Design?}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {340{\textendash}343}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Design is often raised in the literature as important to attaining various properties and characteristics in a software system. At least for open-source projects, it can be hard to find evidence of ongoing design work in the technical artifacts produced as part of the development. Although developers usually do not produce specific design documents, they do communicate about design in different ways. In this paper, we provide quantitative evidence that developers address design through discussions in commits, issues, and pull requests. To achieve this, we built a discussions{\textquoteright} classifier and automatically labeled 102,122 discussions from 77 projects. Based on this data, we make four observations about the projects: i) on average, 25\% of the discussions in a project are about design; ii) on average, 26\% of developers contribute to at least one design discussion; iii) only 1\% of the developers contribute to more than 15\% of the discussions in a project; and iv) these few developers who contribute to a broad range of design discussions are also the top committers in a project. }, keywords = {Design Discussions, empirical study, machine learning, mining challenge, msr challenge}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597115}, url = {http://doi.acm.org/10.1145/2597073.2597115}, attachments = {https://flosshub.org/sites/flosshub.org/files/brunet.pdf}, author = {Brunet, Jo{\~a}o and Murphy, Gail C. and Terra, Ricardo and Figueiredo, Jorge and Serey, Dalton} } @conference {Murgia:2014:DFE:2597073.2597086, title = {Do Developers Feel Emotions? An Exploratory Analysis of Emotions in Software Artifacts}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {262{\textendash}271}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software development is a collaborative activity in which developers interact to create and maintain a complex software system. Human collaboration inevitably evokes emotions like joy or sadness, which can affect the collaboration either positively or negatively, yet not much is known about the individual emotions and their role for software development stakeholders. In this study, we analyze whether development artifacts like issue reports carry any emotional information about software development. This is a first step towards verifying the feasibility of an automatic tool for emotion mining in software development artifacts: if humans cannot determine any emotion from a software artifact, neither can a tool. Analysis of the Apache Software Foundation issue tracking system shows that developers do express emotions (in particular gratitude, joy and sadness). However, the more context is provided about an issue report, the more human raters start to doubt and nuance their interpretation of emotions. More investigation is needed before building a fully automatic emotion mining tool. }, keywords = {Emotion Mining, Empirical Software Engineer- ing, Issue Report}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597086}, url = {http://doi.acm.org/10.1145/2597073.2597086}, attachments = {https://flosshub.org/sites/flosshub.org/files/murgia.pdf}, author = {Murgia, Alessandro and Tourani, Parastou and Adams, Bram and Ortu, Marco} } @proceedings {roblesestimating, title = {Estimating Development Effort in Free/Open Source Software Projects by Mining Software Repositories: A Case Study of OpenStack}, year = {2014}, month = {05/2014}, abstract = {Because of the distributed and collaborative nature of free/open source software (FOSS) projects, the development effort invested in a project is usually unknown, even after the software has been released. However, this information is becoming of major interest, especially - but not only- because of the growth in the number of companies for which FOSS has become relevant for their business strategy. In this paper we present a novel approach to estimate effort by considering data from source code management repositories. We apply our model to the OpenStack project, a FOSS project with more than 1,000 authors, in which several tens of companies cooperate. Based on data from its repositories and together with the input from a survey answered by more than 100 developers, we show that the model offers a simple, but sound way of obtaining software development estimations with bounded margins of error.}, keywords = {effort estimation, openstack}, url = {http://gsyc.urjc.es/~grex/repro/2014-msr-effort/msr14-robles-estimating-effort.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr14-robles-estimating-effort.pdf}, author = {Gregorio Robles and Gonz{\'a}lez-Barahona, Jesus M and Cervig{\'o}n, Carlos and Capiluppi, Andrea} } @conference {Robles:2014:EDE:2597073.2597107, title = {Estimating Development Effort in Free/Open Source Software Projects by Mining Software Repositories: A Case Study of OpenStack}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {222{\textendash}231}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Because of the distributed and collaborative nature of free / open source software (FOSS) projects, the development effort invested in a project is usually unknown, even after the software has been released. However, this information is becoming of major interest, especially ---but not only--- because of the growth in the number of companies for which FOSS has become relevant for their business strategy. In this paper we present a novel approach to estimate effort by considering data from source code management repositories. We apply our model to the OpenStack project, a FOSS project with more than 1,000 authors, in which several tens of companies cooperate. Based on data from its repositories and together with the input from a survey answered by more than 100 developers, we show that the model offers a simple, but sound way of obtaining software development estimations with bounded margins of error. }, keywords = {effort estimation, free software, mining software repositories, open source, openstack}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597107}, url = {http://doi.acm.org/10.1145/2597073.2597107}, attachments = {https://flosshub.org/sites/flosshub.org/files/robles_0.pdf}, author = {Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Cervig{\'o}n, Carlos and Capiluppi, Andrea and Izquierdo-Cort{\'a}zar, Daniel} } @conference {Wu:2014:EES:2556420.2556483, title = {Exploring the Ecosystem of Software Developers on GitHub and Other Platforms}, booktitle = {Proceedings of the Companion Publication of the 17th ACM Conference on Computer Supported Cooperative Work \&\#38; Social Computing}, series = {CSCW Companion {\textquoteright}14}, year = {2014}, pages = {265{\textendash}268}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {GitHub provides various social features for developers to collaborate with others. Those features are important for developers to coordinate their work (Dabbish et al., 2012; Marlow et al., 2013). We hypothesized that the social system of GitHub users was bound by system interactions such that contributing to similar code repositories would lead to users following one another on GitHub or vice versa. Using a quadratic assignment procedure (QAP) correlation, however, only a weak correlation among followship and production activities (code, issue, and wiki contributions) was found. Survey with GitHub users revealed an ecosystem on the Internet for software developers, which includes many platforms, such as Forrst, Twitter, and Hacker News, among others. Developers make social introductions and other interactions on these platforms and engage with one anther on GitHub. Due to these preliminary findings, we describe GitHub as a part of a larger ecosystem of developer interactions. }, keywords = {ecosystem, follow, github, social connection}, isbn = {978-1-4503-2541-7}, doi = {10.1145/2556420.2556483}, url = {http://doi.acm.org/10.1145/2556420.2556483}, author = {Wu, Yu and Kropczynski, Jessica and Shih, Patrick C. and Carroll, John M.} } @conference {Robles:2014:FSD:2597073.2597129, title = {FLOSS 2013: A Survey Dataset About Free Software Contributors: Challenges for Curating, Sharing, and Combining}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {396{\textendash}399}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this data paper we describe a data set obtained by means of performing an on-line survey to over 2,000 Free Libre Open Source Software (FLOSS) contributors. The survey includes questions related to personal characteristics (gender, age, civil status, nationality, etc.), education and level of English, professional status, dedication to FLOSS projects, reasons and motivations, involvement and goals. We describe as well the possibilities and challenges of using private information from the survey when linked with other, publicly available data sources. In this regard, an example of data sharing will be presented and legal, ethical and technical issues will be discussed. }, keywords = {anonymization, data combining, data sharing, ethics, free software, microdata, msr data showcase, open data, open source, privacy, Survey}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597129}, url = {http://doi.acm.org/10.1145/2597073.2597129}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr14gregorio.pdf}, author = {Gregorio Robles and Reina, Laura Arjona and Serebrenik, Alexander and Vasilescu, Bogdan and Gonz{\'a}lez-Barahona, Jes{\'u}s M.} } @proceedings {1636, title = {Free/Open Source Software projects as early MOOCs}, year = {2014}, month = {04/2014}, pages = {874-883}, abstract = {This paper presents Free/Libre/Open Source Software (FLOSS) Projects as early Massive Online Open Courses (MOOCs). Being software development a process where learning and collaboration is of major importance, FLOSS projects have in common many characteristics with MOOCs. This is because many FLOSS projects (such as Linux, Apache, GNOME or KDE, among others) are massive, they are open to anyone to participate, and are driven mainly by telematic means. We therefore present the research literature that has studied FLOSS projects from points of view that are close to learning and discuss how the FLOSS community has approached many of the issues related to acquiring knowledge and skills over the Internet and compare them to how currently MOOCs, both xMOOCs and cMOOCs, address these situations. }, keywords = {education, FLOSS, learning, mooc}, doi = {10.1109/EDUCON.2014.6826200}, author = {Robles, G. and Plaza, H. and Gonzalez-Barahona, J.M.} } @inbook {1607, title = {Lessons Learned from Teaching Open Source Software Development}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {133-142}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { Free/Open Source Software allows students to learn valuable real world skills and experiences, as well as a create a portfolio to show future employers. However, the learning curve to joining FOSS can be daunting, often leading newcomers to walk away frustrated. Universities therefore need to find ways to provide a structured introduction to students, helping them overcome the barriers to entry. This paper describes two courses taught at two universities, built around a Communities of Practice model, and the lessons learned from these. Suggestions and insights are shared for how to structure and evaluate such courses for maximum effect. }, keywords = {education, FOSS, Free/Open Source Software}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_18}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_18}, author = {Morgan, Becka and Jensen, Carlos}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @conference {Eckhardt:2014:MMO:2642803.2642810, title = {The Merits of a Meritocracy in Open Source Software Ecosystems}, booktitle = {Proceedings of the 2014 European Conference on Software Architecture Workshops}, series = {ECSAW {\textquoteright}14}, year = {2014}, pages = {7:1{\textendash}7:6}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {The Eclipse open source ecosystem has grown from a small internal IBM project to one of the biggest Integrated Development Environments in the market. Open source communities and ecosystems do not follow the standard governance strategies typically used in large organizations. A meritocracy is a frequently occurring form of governance on different levels in open ecosystems. In this paper we investigate how this form of governance influences the health of projects within the Eclipse ecosystem in terms of the amount of commits within each month. We analyzed the hierarchy of Eclipse, how merits are conceptualized within the ecosystem and the effect of the appointments of mentors and project leads on the amount of commits. From our research, we can conclude that this system is not always as fair as it seems; merits are only a benefit in some cases.}, keywords = {Ecosystem Health, Meritocracy, open source, Software ecosystems}, isbn = {978-1-4503-2778-7}, doi = {10.1145/2642803.2642810}, url = {http://doi.acm.org/10.1145/2642803.2642810}, author = {Eckhardt, Evert and Kaats, Erwin and Slinger Jansen and Alves, Carina} } @conference {Padhye:2014:SEC:2597073.2597113, title = {A Study of External Community Contribution to Open-source Projects on GitHub}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {332{\textendash}335}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Open-source software projects are primarily driven by community contribution. However, commit access to such projects{\textquoteright} software repositories is often strictly controlled. These projects prefer to solicit external participation in the form of patches or pull requests. In this paper, we analyze a set of 89 top-starred GitHub projects and their forks in order to explore the nature and distribution of such community contribution. We first classify commits (and developers) into three categories: core, external and mutant, and study the relative sizes of each of these classes through a ring-based visualization. We observe that projects written in mainstream scripting languages such as JavaScript and Python tend to include more external participation than projects written in upcoming languages such as Scala. We also visualize the geographic spread of these communities via geocoding. Finally, we classify the types of pull requests submitted based on their labels and observe that bug fixes are more likely to be merged into the main projects as compared to feature enhancements. }, keywords = {community participation, core committers, external contribution, mining challenge, mining software repositories, msr challenge, Open-source software, pull requests}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597113}, url = {http://doi.acm.org/10.1145/2597073.2597113}, author = {Padhye, Rohan and Mani, Senthil and Sinha, Vibha Singhal} } @conference {Teixeira:2014:UCO:2641580.2641627, title = {Understanding Coopetition in the Open-Source Arena: The Cases of WebKit and OpenStack}, booktitle = {Proceedings of The International Symposium on Open Collaboration}, series = {OpenSym {\textquoteright}14}, year = {2014}, pages = {39:1{\textendash}39:5}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In an era of software crisis, the move of firms towards distributed software development teams is being challenged by emerging collaboration issues. On this matter, the open-source phenomenon may shed some light, as successful cases on distributed collaboration in the open-source community have been recurrently reported. In our research we explore collaboration networks in the WebKit and OpenStack high-networked open-source projects, by mining their source-code version-control-systems data with Social Network Analysis (SNA). Our approach allows us to observe how key events in the industry affect open-source collaboration networks over time. With our findings, we highlight the explanatory power from network visualizations capturing the collaborative dynamics of high-networked software projects over time. Moreover, we argue that competing companies that sell similar products in the same market, can collaborate in the open-source community while publicly manifesting intense rivalry (e.g. Apple vs Samsung patent-wars). After integrating our findings with the current body of theoretical knowledge in management strategy, economics, strategic alliances and coopetition, we propose the novel notion of open-coopetition, where rival firms collaborate with competitors in the open-source community. We argue that classical coopetition management theories do not fully explain the competitive and collaborative issues that are simultaneously present and interconnected in the WebKit and OpenStack open-source communities. We propose the development of the novel open-coopetition theory for a better understanding on how rival-firms collaborate with competitors by open-source manners. }, keywords = {COLLABORATION, Competition, Coopetition, Ecosystems, FLOSS, Open-Coopetition, open-source, OSS, Strategic Alliances}, isbn = {978-1-4503-3016-9}, doi = {10.1145/2641580.2641627}, url = {http://doi.acm.org/10.1145/2641580.2641627}, author = {Teixeira, Jose} } @proceedings {1522, title = {Boa: A Language and Infrastructure for Analyzing Ultra-Large-Scale Software Repositories}, year = {2013}, month = {05/2013}, pages = {422-431}, abstract = {In today{\textquoteright}s software-centric world, ultra-large-scale software repositories, e.g. SourceForge (350,000+ projects), GitHub (250,000+ projects), and Google Code (250,000+ projects) are the new library of Alexandria. They contain an enormous corpus of software and information about software. Scientists and engineers alike are interested in analyzing this wealth of information both for curiosity as well as for testing important hypotheses. However, systematic extraction of relevant data from these repositories and analysis of such data for testing hypotheses is hard, and best left for mining software repository (MSR) experts! The goal of Boa, a domain-specific language and infrastructure described here, is to ease testing MSR-related hypotheses. We have implemented Boa and provide a web-based interface to Boa{\textquoteright}s infrastructure. Our evaluation demonstrates that Boa substantially reduces programming efforts, thus lowering the barrier to entry. We also see drastic improvements in scalability. Last but not least, reproducing an experiment conducted using Boa is just a matter of re-running small Boa programs provided by previous researchers.}, keywords = {ease of use, forge, github, google code, lower barrier to entry, mining, repository, reproducible, scalable, Software, sourceforge}, author = {Dyer, Robert and Nguyen, Hoan Anh and Rajan, Hridesh and Nguyen, Tien N.} } @proceedings {1491, title = {Communication in Open Source Software Development Mailing Lists}, year = {2013}, note = {"The entire dataset used in the experiment, including the cards, the resolved aliases, and detailed statistical results, can be downloaded from ..." http://www.st.ewi.tudelft.nl/~guzzi/oss-communication/}, month = {05/2013}, pages = {277-286}, abstract = {Open source software (OSS) development teams use electronic means, such as emails, instant messaging, or forums, to conduct open and public discussions. Researchers investigated mailing lists considering them as a hub for project communication. Prior work focused on specific aspects of emails, for example the handling of patches, traceability concerns, or social networks. This led to insights pertaining to the investigated aspects, but not to a comprehensive view of what developers communicate about. Our objective is to increase the understanding of development mailing lists communication. We quantitatively and qualitatively analyzed a sample of 506 email threads from the development mailing list of a major OSS project, Lucene. Our investigation reveals that implementation details are discussed only in about 35\% of the threads, and that a range of other topics is discussed. Moreover, core developers participate in less than 75\% of the threads. We observed that the development mailing list is not the main player in OSS project communication, as it also includes other channels such as the issue repository.}, keywords = {email, lucene, mailling list}, url = {http://www.st.ewi.tudelft.nl/~guzzi/downloads/Guzzi2013msr.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/Guzzi2013msr.pdf}, author = {Guzzi, Anja and Bacchelli, Alberto and Lanza, Michele and Pinzger, Martin and van Deursen, Arie} } @proceedings {1512, title = {Discovering, Reporting, and Fixing Performance Bugs}, year = {2013}, month = {05/2013}, abstract = {Software performance is critical for how users perceive the quality of software products. Performance bugs---programming errors that cause significant performance degradation---lead to poor user experience and low system throughput. Designing effective techniques to address performance bugs requires a deep understanding of how performance bugs are discovered, reported, and fixed. In this paper, we study how performance bugs are discovered, reported to developers, and fixed by developers, and compare the results with those for non-performance bugs. We study performance and non-performance bugs from three popular code bases: Eclipse JDT, Eclipse SWT, and Mozilla. First, we find little evidence that fixing performance bugs has a higher chance to introduce new functional bugs than fixing non-performance bugs, which implies that developers may not need to be overconcerned about fixing performance bugs. Second, although fixing performance bugs is about as error-prone as fixing nonperformance bugs, fixing performance bugs is more difficult than fixing non-performance bugs, indicating that developers need better tool support for fixing performance bugs and testing performance bug patches. Third, unlike many non-performance bugs, a large percentage of performance bugs are discovered through code reasoning, not through users observing the negative effects of the bugs (e.g., performance degradation) or through profiling. The result suggests that techniques to help developers reason about performance, better test oracles, and better profiling techniques are needed for discovering performance bugs.}, keywords = {bugs, eclipse, mozilla, performance bugs}, author = {Adrian Nistor and Tian Jiang and Tan, Lin} } @proceedings {1513, title = {Improving Bug Localization using Correlations in Crash Reports}, year = {2013}, month = {05/2013}, abstract = {Nowadays, many software organizations rely on automatic problem reporting tools to collect crash reports directly from users{\textquoteright} environments. These crash reports are later grouped together into crash types. Usually, developers prioritize crash types based on the number of crash reports and file bugs for the top crash types. Because a bug can trigger a crash in different usage scenarios, different crash types are sometimes related to a same bug. Two bugs are correlated when the occurrence of one bug causes the other bug to occur. We refer to a group of crash types related to identical or correlated bugs, as a crash correlation group. In this paper, we propose three rules to identify correlated crash types automatically. We also propose an algorithm to locate and rank buggy files using crash correlation groups. Through an empirical study on Firefox and Eclipse, we show that the three rules can identify crash correlation groups with a precision of 100\% and a recall of 90\% for Firefox and a precision of 79\% and a recall of 65\% for Eclipse. On the top three buggy file candidates, the proposed bug localization algorithm achieves a recall of 62\% and a precision of 42\% for Firefox and a recall of 52\% and a precision of 50\% for Eclipse. On the top 10 buggy file candidates, the recall increases to 92\% for Firefox and 90\% for Eclipse. Developers can combine the proposed crash correlation rules with the new bug localization algorithm to identify and fix correlated crash types all together.}, keywords = {eclipse, Firefox}, author = {Shaohua Wang and Foutse Khomh and Ying Zou} } @proceedings {1561, title = {A Replicable Infrastructure for Empirical Studies of Email Archives}, year = {2013}, month = {10/2013}, pages = {43-50}, publisher = {IEEE}, address = {Baltimore, MD, USA}, abstract = {This paper describes a replicable infrastructure solution for conducting empirical software engineering studies based on email mailing list archives. Mailing list emails, such as those affiliated with free, libre, and open source software (FLOSS) projects, are currently archived in several places online, but each research team that wishes to study these email artifacts closely must design their own solution for collection, storage and cleaning of the data. Consequently, research results will be difficult to replicate, especially as the email archive for any living project will still be continually growing. This paper describes a simple, replicable infrastructure for the collection, storage, and cleaning of project email data and analyses.}, keywords = {apache, cleaning, collection, couchdb, database, document-oriented database, email, lucene, mailing lists, nosql, replication, storage}, isbn = {978-0-7695-5121-0}, attachments = {https://flosshub.org/sites/flosshub.org/files/RESERv2.pdf}, author = {Squire, Megan} } @proceedings {1515, title = {Using Citation Influence to Predict Software Defects}, year = {2013}, month = {05/2013}, abstract = {The software dependency network reflects structure and the developer contribution network reflects process. Previous studies have used social network properties over these networks to predict whether a software component is defect-prone. However, these studies do not consider the strengths of the dependencies in the networks. In our approach, we use a citation influence topic model to determine dependency strengths among components and developers, analyze weak and strong dependencies separately, and apply social network properties to predict defect-prone components. In experiments on Eclipse and NetBeans, our approach has higher accuracy than prior work.}, keywords = {eclipse, netbeans, social network}, author = {Wei Hu and Kenny Wong} } @proceedings {1497, title = {Why So Complicated? Simple Term Filtering and Weighting for Location-Based Bug Report Assignment Recommendation}, year = {2013}, month = {05-2013}, pages = {2-11}, abstract = {Large software development projects receive many bug reports and each of these reports needs to be triaged. An important step in the triage process is the assignment of the report to a developer. Most previous e orts towards improving bug report assignment have focused on using an activity-based approach. We address some of the limitations of activity-based approaches by proposing a two-phased location-based approach where bug report assignment recommendations are based on the predicted location of the bug. The proposed approach utilizes a noun extraction process on several information sources to determine bug location information and a simple term weighting scheme to provide a bug report assignment recommendation. We found that by using a location-based approach, we achieved an accuracy of 89.41\% and 59.76\% when recommending five developers for the Eclipse and Mozilla projects, respectively.}, keywords = {bug, bug report assignment, eclipse, File Activity Histories, Mining Software Artifacts, mozilla, Named Entity Recognition, POS Filtering}, url = {http://www.cwu.edu/~janvik/papers/msr2013.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr2013.pdf} } @conference {1357, title = {Content classification of developer emails}, booktitle = {Proceedings of the 34th IEEE/ACM International Conference On Software Engineering (ICSE 2012)}, year = {2012}, note = {We created a web application to manually classify email content in the chosen categories. We classified a statistically significant set of emails from four java open source software (OSS) systems, used to evaluate the accuracy of our approach. The contributions of this paper are: 1) a novel approach that fuses parsing and ML techniques for classification of email lines; 2) a web application to manually classify email content; 3) the manual classification of a statistically significant sample set of emails (for a total of 67,792 lines) from mailing lists of four different software systems{\textendash}in the form of a freely available benchmark; and 4) the empirical evaluation of our approach against the benchmark}, month = {06/2012}, abstract = {Emails related to the development of a software system contain information about design choices and issues encountered during the development process. Exploiting the knowledge embedded in emails with automatic tools is challenging, due to the unstructured, noisy and mixed language nature of this communication medium. Natural language text is often not well-formed and is interleaved with languages with other syntaxes, such as code or stack traces. We present an approach to classify email content at line level. Our technique classifies email lines in five categories (i.e., text, junk, code, patch, and stack trace) to allow one to subsequently apply ad hoc analysis techniques for each category. We evaluated our approach on a statistically significant set of emails gathered from mailing lists of four unrelated open source systems.}, keywords = {email, Emails, Empirical software engineering, mailing list, natural language, Unstructured Data Mining}, url = {http://www.inf.usi.ch/phd/bacchelli/publications.php}, attachments = {https://flosshub.org/sites/flosshub.org/files/icse2012.pdf}, author = {Bacchelli, Alberto and Dal Sasso, Tommaso and D{\textquoteright}Ambros, Marco and Lanza, Michele} } @proceedings {1462, title = {Does OSS Affect E-Government Growth? An Econometric Analysis on the Impacting Factors}, volume = {378}, year = {2012}, month = {09/2012}, pages = {292-297}, publisher = {IFIP AICT, Springer}, abstract = {Inspired by the OSS values, an increasing number of different forms of open initiatives have come to the fore. In the context of eGovernment the notion of open government has met wide acceptance among nations and became closely related to one of its goals. Open government shares with OSS the notions of collaboration, participation and transparency and many actions towards OSS into eGovernment reform policies, have been recorded worldwide. The study investigates the relationship between OSS growth and eGovernment. A theoretical framework of the theories of institutionalism, growth and human capital is proposed as the guiding theoretical lens to identify possible influencing factors that together with OSS are evaluated for their magnitude of impact on eGovernment growth across different economic environments.}, keywords = {eGovernment adoption, growth theory, human capital theory, institutionalism}, author = {Lakka, Spyridoula and Stamati, Teta and Martakos, Draculis} } @proceedings {1444, title = {Gender Differences in Early Free and Open Source Software Joining Process}, volume = {378}, year = {2012}, note = {"We examined subscriber logs and data for six FOSS projects..." "We examined the differences between posters and non-posters to determine the attrition rate..." "Using data from the US Census, we matched names to lists of the most common female and male names. We identified 666 users using this process."}, month = {09/2012}, pages = {78-93}, publisher = {IFIP AICT, Springer}, address = {Eighth International Conference on Open Source Systems (OSS 2012)}, abstract = {With the growth of free and open source software (FOSS) and the adoption of FOSS solutions in business and everyday life, it is important that projects serve their growingly diverse user base. The sustainability of FOSS projects relies on a constant influx of new contributors. Several large demo- graphic surveys found that FOSS communities are very homogenous, dominated by young men, similar to the bias existing in the rest of the IT workforce. Building on previous research, we examine mailing list subscriptions and posting statistics of female FOSS participants. New participants often experience their first interaction on a FOSS project{\textquoteright}s mailing list. We explored six FOSS projects {\textendash} Buildroot, Busybox, Jaws, Parrot, uClibc, and Yum. We found a declining rate of female participation from the 8.27\% of subscribers, to 6.63\% of posters, and finally the often reported code contributor rate of 1.5\%. We found a disproportionate attrition rate among women along every step of the FOSS joining process.}, keywords = {buildroot, busybox, diversity, email, jaws, joining process, mailing lists, parrot, uClibc, Yum}, author = {Kuechler, Victor and Gilbertson, Claire and Jensen, Carlos} } @article {1519, title = {How the FLOSS Research Community Uses Email Archives}, journal = {International Journal of Open Source Software and Processes}, volume = {4}, year = {2012}, note = {classifies 72 FLOSS papers into various analysis categories}, month = {12/2012}, pages = {37 - 59}, abstract = {Artifacts of the software development process, such as source code or emails between developers, are a frequent object of study in empirical software engineering literature. One of the hallmarks of free, libre, and open source software (FLOSS) projects is that the artifacts of the development process are publicly-accessible and therefore easily collected and studied. Thus, there is a long history in the FLOSS research community of using these artifacts to gain understanding about the phenomenon of open source software, which could then be compared to studies of software engineering more generally. This paper looks specifically at how the FLOSS research community has used email artifacts from free and open source projects. It provides a classification of the relevant literature using a publicly-available online repository of papers about FLOSS development using email. The outcome of this paper is to provide a broad overview for the software engineering and FLOSS research communities of how other researchers have used FLOSS email message artifacts in their work}, keywords = {email, email archives, literature, mailing lists, review, Survey}, issn = {1942-3934}, doi = {10.4018/jossp.2012010103}, attachments = {https://flosshub.org/sites/flosshub.org/files/ijossp_v3_PREPRINT.pdf}, author = {Squire, Megan} } @proceedings {1457, title = {The Impact of Formal QA Practices on FLOSS Communities {\textendash} The Case of Mozilla}, volume = {378}, year = {2012}, month = {09/2012}, pages = {262-267}, publisher = {IFIP AICT, Springer}, abstract = {The number of FLOSS projects that include a QA step in the development model is increasing which suggests that a new layer may be emerging in the classic {\textquotedblleft}onion model{\textquotedblright}. This change might affect the information flow within projects and implicitly their sustainability. Communities, the essential resource of FLOSS projects, have been extensively studied but questions concerning QA remain. This paper takes a step towards answering such questions by analyzing QA mailing lists and issue tracker data for the Mozilla group of projects. Because the Bugzilla data set contains over half a million bugs, data processing and analysis is a considerable challenge for this research. The provisional conclusions are that QA activity may not be increasing steadily over time but is dependent on other factors and that the QA team and other groups of contributors form a highly connected network that doesn{\textquoteright}t contain isolates.}, keywords = {email, information flow, mailing lists, mozilla, quality assurance, social network analysis, test}, author = {Barham, Adina} } @proceedings {1455, title = {Open-Source Technologies Realizing Social Networks: A Multiple Descriptive Case-Study}, volume = {378}, year = {2012}, month = {09/2012}, pages = {250-255}, publisher = {IFIP AICT, Springer}, abstract = {This article aims at describing the role of the open-source software phenomenon within high-tech corporations providing social networks and applications. By taking a multiple case study approach, We address what are the open-source software technological components embedded by leading social networking players, and a rich description on how those players collaborate with the open-source community. Our findings, based on a population of three commercial providers of social networks a suggest that open-source plays an important role on the technological development of their social networking platforms. An open-source technological stack for realizing social networks is proposed and several managerial issues dealing with collaboration with open-source communities are explored.}, keywords = {entrepreneurship, facebook, netlog, social networks, spotify}, author = {Teixeira, Jose} } @proceedings {1456, title = {A Qualitative Method for Mining Open Source Software Repositories}, volume = {378}, year = {2012}, note = {The method proposed by this study employs content analysis Krippendorff [10], a classification technique that is frequently applied to interview and focus group data. The objective of content analysis is to ask quantitative questions about qualitative data. The approach is similar to the grounded theory method, but differs from grounded theory in that the results are quantitative rather than qualitative: content analysis produces results such as, {\textquotedblleft}49\% of messages submitted to project mailing lists were sent by core developers.{\textquotedblright}}, month = {09/2012}, pages = {256-261}, publisher = {IFIP AICT, Springer}, abstract = {The volume of data archived in open source software project repositories makes automated, quantitative techniques attractive for extracting and analyzing information from these archives. However, many kinds of archival data include blocks of natural language text that are difficult to analyze automatically. This paper introduces a qualitative analysis method that is transparent and repeatable, leads to objective findings when dealing with qualitative data, and is efficient enough to be applied to large archives. The method was applied in a case study of developer and user forum discussions of an open source electronic medical record project. The study demonstrates that the qualitative repository mining method can be employed to derive useful results quickly yet accurately. These results would not be possible using a strictly automated approach.}, keywords = {content analysis, Electronic Medical Record, Qualitative Research}, author = {Noll, John and Seichter, Dominik and Beecham, Sarah} } @proceedings {1442, title = {Step-by-Step Strategies and Case Studies for Embedded Software Companies to Adapt to the FOSS Ecosystem}, volume = {378}, year = {2012}, month = {09/2012}, pages = {48-60}, abstract = {Due to the continuous expansion of the FOSS ecosystem and the introduction of high-quality FOSS, FOSS is increasingly used in consumer electronics (CE) such as smartphones, televisions, and cameras. As a result, manufacturers of CE products have developed a close relationship with the FOSS ecosystem. For CE product manufacturers, efficient adaptation to the FOSS ecosystem has become an essential component in their business operations. This paper will divide the activities of CE product manufacturers within the FOSS ecosystem into the following four levels: identification, adoption, compliance, and contribution. It will examine the main activities and obstacles that arise in each level. The paper will also present instances where companies have succeeded in overcoming these obstacles.}, keywords = {embedded software, FOSS ecosystem}, author = {Kim, Suhyun and Yoo, Jaehyun and Lee, Myunghwa} } @article {1396, title = {Studying the impact of social interactions on software quality}, journal = {Empirical Software Engineering}, year = {2012}, note = {bug databases}, abstract = {Correcting software defects accounts for a significant amount of resources in a software project. To make best use of testing efforts, researchers have studied statistical models to predict in which parts of a software system future defects are likely to occur. By studying the mathematical relations between predictor variables used in these models, researchers can form an increased understanding of the important connections between development activities and software quality. Predictor variables used in past top-performing models are largely based on source code-oriented metrics, such as lines of code or number of changes. However, source code is the end product of numerous interlaced and collaborative activities carried out by developers. Traces of such activities can be found in the various repositories used to manage development efforts. In this paper, we develop statistical models to study the impact of social interactions in a software project on software quality. These models use predictor variables based on social information mined from the issue tracking and version control repositories of two large open-source software projects. The results of our case studies demonstrate the impact of metrics from four different dimensions of social interaction on post-release defects. Our findings show that statistical models based on social information have a similar degree of explanatory power as traditional models. Furthermore, our results demonstrate that social information does not substitute, but rather augments traditional source code-based metrics used in defect prediction models.}, keywords = {bug tracker, eclipse, Firefox, Human Factors, measurement, metrics, software evolution, Software quality assurance}, issn = {1573-7616}, doi = {10.1007/s10664-012-9205-0}, author = {Bettenburg, Nicolas and Hassan, Ahmed E.} } @conference {bird2012www, title = {Who? What? Where? Examining Distributed Development in Two Large Open Source Projects}, booktitle = {Proceedings of the Working Conference on Mining Software Repositories}, year = {2012}, abstract = {To date, a large body of knowledge has been built up around understanding open source software development. However, there is limited research on examining levels of geographic and organizational distribution within open source software projects, despite many studies examining these same aspects in commercial contexts. We set out to fill this gap in OSS knowledge by manually collecting data for two large, mature, successful projects in an effort to assess how distributed both geographically and organizationally. Both Firefox and Eclipse have been the subject of many studies and are ubiquitous in the areas of software development and internet usage respectively. Further, both receive substantial development contributions from many companies. As such, both are worthy of study in order to understand the development processes that they use, how distributed the projects are, and what, if any, relationship distribution has with quality. To this end, we identified the top contributors that made 95\% of the changes over multiple major releases of Firefox and Eclipse and determined their geographic locations and organizational affiliations. We found that Firefox is very geographically distributed with over a third of its components receiving major contributions from developers on different continents, and that components that are highly distributed have no more defects than those that are not. In contrast, Eclipse is directed and developed largely by one company; with IBM making 96\% of the total commits (49\% coming from one lab in Ottawa, Canada). We further examined the distribution in each project{\textquoteright}s constituent subsystems and report the relationship of pre- and post-release defects with geographic and organizational factors.}, keywords = {eclipse, Firefox}, attachments = {https://flosshub.org/sites/flosshub.org/files/bird2012www.pdf}, author = {Christian Bird and Nachiappan Nagappan} } @conference {1319, title = {Who Will Remain? An Evaluation of Actual Person-Job and Person-Team Fit to Predict Developer Retention in FLOSS Projects}, booktitle = {45th Hawai{\textquoteright}i International Conference on System Sciences}, year = {2012}, month = {01/2012}, pages = {3446-3455}, abstract = {Many businesses and private households rely on Free Libre Open Source Software (FLOSS). Due to a lack of sustained contributors, however, most FLOSS projects do not survive. The early identification of developers who are likely to remain is thus an eminent challenge for the management of FLOSS initiatives. Previous research has shown that individuals{\textquoteright} subjective assessment is often inaccurate emphasizing the need to objectively evaluate retention behavior. Consistent with the concepts Person-Job (P-J) and Person-Team (P-T) fit from the traditional recruitment literature, we derive objective measures to predict developer retention in FLOSS projects. In an analysis of the contribution behavior of former Google Summer of Code (GSoC) students we reveal that the level of development experience and conversational knowledge is strongly associated with retention. Surprisingly, our analysis reveals that students with abilities that are underrepresented in the project and students with a higher academic education do not remain considerably longer.}, keywords = {email, email archives, google summer of code, kde, mailing list, students}, doi = {http://doi.ieeecomputersociety.org/10.1109/HICSS.2012.644}, attachments = {https://flosshub.org/sites/flosshub.org/files/45.pdf}, author = {Schilling, A. and Laumer, S. and Weitzel, T.} } @proceedings {1288, title = {An Analysis of Author Contribution Patterns in Eclipse Foundation Project Source Code}, year = {2011}, note = {"We conducted an observational study on existing Eclipse projects by extracting author attribution data for Java source code files from git repositories." "Specifically, we consider entropy of source code by counting the number of lines attributed to each author."}, month = {10/2011}, pages = {269-281}, publisher = {Springer}, abstract = {Collaborative development is a key tenet of open source software, but if not properly understood and managed, it can become a liability. We examine author contribution data for the newest revision of 251,633 Java source files in 592 Eclipse projects. We use this observational data to analyze collaboration patterns within files, and to explore relationships between file size, author count, and code authorship. We calculate author entropy to characterize the contributions of multiple authors to a given file, with an eye toward understanding the degree of collaboration and the most common interaction patterns.}, keywords = {COLLABORATION, contribution, eclipse, entropy, java}, author = {Taylor, Quinn C. and Krein, Jonathan L. and MacLean, Alexander C. and Knutson, Charles D.} } @conference {1314, title = {Apples vs. oranges?}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, note = {"In this MSR challenge report, we compare the source code of two in- dustrial grade Integrated Development Environments (IDE): Netbeans, developed by Oracle Corporation, and Eclipse, developed by the Eclipse Foundation. In the same spirit as [1], our goal is not to identify how similar or different they are, but to identify differences that, if not taken into consideration, might result in biased, and potentially erroneous conclusions."}, month = {05/2011}, pages = {246-249}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {We attempt to compare the source code of two Java IDE systems: Netbeans and Eclipse. The result of this experiment shows that many factors, if ignored, could risk a bias in the results, and we posit various observations that should be taken into consideration to minimize such risk. }, keywords = {eclipse, netbeans, source code}, isbn = {9781450305747}, doi = {10.1145/1985441.1985483}, author = {Davies, Julius and Daniel M. German} } @conference {1313, title = {Do comments explain codes adequately?}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, month = {05/2011}, pages = {242-245}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {Comment lines in the software source code include descriptions of codes, usage of codes, copyrights, unused codes, comments, and so on. It is required for comments to explain the content of written code adequately, since the wrong description in the comment may causes further bug and confusion in maintenance. In this paper, we try to clarify a research question: "In which projects do comments describe the code adequately?" To answer this question, we selected the group 1 of mining challenge and used data obtained from Eclipse and Netbeans. Since it is difficult to answer the above question directly, we define the distance between codes and comments. By utilizing the fault-prone module prediction technique, we can answer the alternative question from the data of two projects. The result shows that Eclipse project has relatively adequate comments.}, keywords = {comments, eclipse, msr challenge, netbeans, prediction}, isbn = {9781450305747}, doi = {10.1145/1985441.1985482}, author = {Mizuno, Osamu and Hirata, Yukinao} } @article {1418, title = {Effort estimation of FLOSS projects: a study of the Linux kernel}, journal = {Empirical Software Engineering}, year = {2011}, pages = {1-29}, abstract = {Empirical research on Free/Libre/Open Source Software (FLOSS) has shown that developers tend to cluster around two main roles: {\textquotedblleft}core{\textquotedblright} contributors differ from {\textquotedblleft}peripheral{\textquotedblright} developers in terms of a larger number of responsibilities and a higher productivity pattern. A further, cross-cutting characterization of developers could be achieved by associating developers with {\textquotedblleft}time slots{\textquotedblright}, and different patterns of activity and effort could be associated to such slots. Such analysis, if replicated, could be used not only to compare different FLOSS communities, and to evaluate their stability and maturity, but also to determine within projects, how the effort is distributed in a given period, and to estimate future needs with respect to key points in the software life-cycle (e.g., major releases). This study analyses the activity patterns within the Linux kernel project, at first focusing on the overall distribution of effort and activity within weeks and days; then, dividing each day into three 8-hour time slots, and focusing on effort and activity around major releases. Such analyses have the objective of evaluating effort, productivity and types of activity globally and around major releases. They enable a comparison of these releases and patterns of effort and activities with traditional software products and processes, and in turn, the identification of company-driven projects (i.e., working mainly during office hours) among FLOSS endeavors. The results of this research show that, overall, the effort within the Linux kernel community is constant (albeit at different levels) throughout the week, signalling the need of updated estimation models, different from those used in traditional 9am{\textendash}5pm, Monday to Friday commercial companies. It also becomes evident that the activity before a release is vastly different from after a release, and that the changes show an increase in code complexity in specific time slots (notably in the late night hours), which will later require additional maintenance efforts.}, keywords = {complexity, effort estimation, Effort models, mining software repositories, open source software}, issn = {1573-7616}, doi = {10.1007/s10664-011-9191-7}, url = {http://www.springerlink.com/content/612r616k8t52m867/fulltext.html}, author = {Capiluppi, Andrea and Izquierdo-Cort{\'a}zar, Daniel} } @conference {1304, title = {Entering the circle of trust}, booktitle = {Proceeding of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, note = {"we started by analyzing 219 projects" }, month = {05/2011}, pages = {133-142}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {The success of an open-source project depends to a large degree on the proactive and constructive participation by the developer community. An important role that developers play in a project is that of a code committer. However, code-commit privilege is typically restricted to the core group of a project. In this paper, we study the phenomenon of the induction of external developers as code committers. The trustworthiness of an external developer is one of the key factors that determines the granting of commit privileges. Therefore, we formulate different hypotheses to explain how the trust is established in practice. To investigate our hypotheses, we developed an automated approach based on mining code repositories and bug-tracking systems. We implemented the approach and performed an empirical study, using the Eclipse projects, to test the hypotheses. Our results indicate that, most frequently, developers establish trust and credibility in a project by contributing to the project in a non-committer role. Moreover, the employing organization of a developer is another factor--although a less significant one--that influences trust.}, keywords = {bug tracking, bug tracking system, commits, committers, core, developers, eclipse, trust}, isbn = {9781450305747}, doi = {10.1145/1985441.1985462}, author = {Mani, Senthil and Sinha, Saurabh and Sinha, Vibha Singhal} } @conference {Bacchelli:2011:EEE:1985793.1985999, title = {Exploring, exposing, and exploiting emails to include human factors in software engineering}, booktitle = {Companion to the Proceedings of the 33rd International Conference on Software Engineering}, series = {ICSE {\textquoteright}11}, year = {2011}, note = {This paper is a summary of work in the field, for the doctoral consortium.}, pages = {1074{\textendash}1077}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Researchers mine software repositories to support software maintenance and evolution. The analysis of the structured data, mainly source code and changes, has several benefits and offers precise results. This data, however, leaves communication in the background, and does not permit a deep investigation of the human factor, which is crucial in software engineering. Software repositories also archive documents, such as emails or comments, that are used to exchange knowledge among people - we call it "people-centric information." By covering this data, we include the human factor in our analysis, yet its unstructured nature makes it currently sub-exploited. Our work, by focusing on email communication and by implementing the necessary tools, investigates methods for exploring, exposing, and exploiting unstructured data. We believe it is possible to close the gap between development and communication, extract opinions, habits, and views of developers, and link implementation to its rationale; we see in a future where software analysis and development is routinely augmented with people-centric information.}, keywords = {email communication, toolset, unstructured data}, isbn = {978-1-4503-0445-0}, doi = {10.1145/1985793.1985999}, url = {http://doi.acm.org/10.1145/1985793.1985999}, author = {Bacchelli, Alberto} } @proceedings {1283, title = {Framing the Conundrum of Total Cost of Ownership of Open Source Software}, year = {2011}, month = {10/2011}, pages = {208-219}, publisher = {Springer}, abstract = {This paper reflects the results of phase I of our study on the total cost of ownership (TCO) of open source software adoption. Not only have we found TCO to be an intriguing issue but it is contentious, baffling and each company approaches it in a distinctive manner (and sometimes not at all). In effect it is a conundrum that needs unpacking before it can be explained and understood. Our paper discusses the components of TCO as total cost of ownership and total cost of acquisition (and besides). Using this broad dichotomy and its various components we then analyze our data to make sense of procurement decisions in relation to open source software in the public sector and private companies.}, keywords = {benefits, exit costs, open source software, software adoption, Survey, tco, total cost of ownership}, author = {Maha Shaikh and Cornford, Tony} } @conference {1307, title = {How do developers blog?}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, note = {publishing frequency, post structure, word usage, publication patterns, content}, month = {05/2011}, pages = {123-132}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {We report on an exploratory study, which aims at understanding how software developers use social media compared to conventional development infrastructures. We analyzed the blogging and the committing behavior of 1,100 developers in four large open source communities. We observed that these communities intensively use blogs with one new entry about every 8 hours. A blog entry includes 14 times more words than a commit message. When analyzing the content of the blogs, we found that most popular topics represent high-level concepts such as functional requirements and domain concepts. Source code related topics are covered in less than 15\% of the posts. Our results also show that developers are more likely to blog after corrective engineering and management activities than after forward engineering and re-engineering activities. Our findings call for a hypothesis-driven research to further understand the role of social media in software engineering and integrate it into development processes and tools.}, keywords = {blog, communication, developer, eclipse, gnome, LDA, postgres, python}, isbn = {9781450305747}, doi = {10.1145/1985441.1985461}, author = {Maalej, Walid and Pagano, Dennis} } @conference {1481, title = {Joining Free/Open Source Software Communities: An Analysis of Newbies{\textquoteright} First Interactions on Project Mailing Lists}, booktitle = {2011 44th Hawaii International Conference on System Sciences (HICSS 2011)}, year = {2011}, pages = {1 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Kauai, HI}, abstract = {Free/Open source software (FOSS) is an important part of the IT ecosystem. Due to the voluntary nature of participation, continual recruitment is key to the growth and sustainability of these communities. It is therefore important to understand how and why potential contributors fail in the process of transitioning from user to contributor. Most newcomers, or "newbies", have their first interaction with a community through a mailing list. To understand how this first contact influences future interactions, we studied eight mailing lists across four FOSS projects: MediaWiki, GIMP, PostgreSQL, and Subversion. We analyzed discussions initiated by newbies to determine the effect of gender, nationality, politeness, helpfulness and timeliness of response. We found that nearly 80\% of newbie posts received replies, and that receiving timely responses, especially within 48 hours, was positively correlated with future participation. We also found that while the majority of interactions were positive, 1.5\% of responses were rude or hostile.}, keywords = {email, email archive, gimp, mailing list, mediawiki, postgresql, subversion}, isbn = {978-1-4244-9618-1}, doi = {10.1109/HICSS.2011.264}, author = {Jensen, Carlos and King, Scott and Kuechler, Victor} } @proceedings {1277, title = {Knowledge Homogeneity and Specialization in the Apache HTTP Server Project}, year = {2011}, note = {"Our data set consists of the commit history and email archives for the Apache HTTP Server Project, spanning sixteen years (2/27/1995 - 1/31/2011)" "we 1) mapped the committers to email records, 2) cleaned the email records to remove extraneous information, 3) identified topics of discussion in the resulting messages, and 4) constructed a social network model from committers and topics." "If specialization exists within the httpd community, we should see distinct communities develop around topics. In addition, unique groups of developers should congregate around specialized subtopics. We examined the data from both angles: topical affinity and topic communities." }, month = {10/2011}, pages = {106-122}, publisher = {Springer}, abstract = {We present an analysis of developer communication in the Apache HTTP Server project. Using topic modeling techniques we expose latent conceptual sub-communities arising from developer specialization within the greater developer population. However, we found that among the major contributors to the project, very little specialization exists. We present theories to explain this phenomenon, and suggest further research.}, keywords = {apache, commits, developer, email, email archive, LDA, mailing list, revision control, revision history, scm, social network analysis, specialization, subversion, svn}, url = {http://sequoia.cs.byu.edu/lab/files/pubs/MacLean2011a.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/MacLean2011a.pdf}, author = {MacLean, Alexander C. and Pratt, Landon J. and Knutson, Charles D. and Ringger, Eric K.} } @proceedings {1281, title = {License Update and Migration Processes in Open Source Software Projects}, year = {2011}, note = {"The case studies in this report are part of an ongoing, multi-year research project discovering and modeling open source software processes. Our research methodology is ethnographically informed, applying a grounded theory to the analysis of artifacts found in OSS projects. The primary data sources in this study come from mailing list archives of the Apache and NetBeans projects."}, month = {10/2011}, pages = {177-195}, publisher = {Springer}, abstract = {Open source software (OSS) has increasingly been the subject of research efforts. Central to this focus is the nature under which the software can be distributed, used, and modified and the causes and consequent effects on software development, usage, and distribution. At present, we have little understanding of, what happens when these licenses change, what motivates such changes, and how new licenses are created, updated, and deployed. Similarly, little attention has been paid to the agreements under which contributions are made to OSS projects and the impacts of changes to these agreements. We might also ask these same questions regarding the licenses governing how individuals and groups contribute to OSS projects. This paper focuses on addressing these questions with case studies of processes by which the Apache Software Foundation{\textquoteright}s creation and migration to Version 2.0 of the Apache Software License and the NetBeans project{\textquoteright}s migration to the Joint Licensing Agreement.}, keywords = {apache, case study, email, email archive, license evolution, mailing list, netbeans, open source, process}, attachments = {https://flosshub.org/sites/flosshub.org/files/1.pdf}, author = {Chris Jensen and Walt Scacchi} } @conference {1406, title = {Process Mining Software Repositories}, booktitle = {15th European Conference on Software Maintenance and Reengineering (CSMR 2011) }, year = {2011}, pages = {5 - 14}, publisher = {IEEE}, organization = {IEEE}, address = {Oldenburg, Germany}, abstract = {Software developers{\textquoteright} activities are in general recorded in software repositories such as version control systems, bug trackers and mail archives. While abundant information is usually present in such repositories, successful information extraction is often challenged by the necessity to simultaneously analyze different repositories and to combine the information obtained. We propose to apply process mining techniques, originally developed for business process analysis, to address this challenge. However, in order for process mining to become applicable, different software repositories should be combined, and {\textquotedblleft}related{\textquotedblright} software development events should be matched: e.g., mails sent about a file, modifications of the file and bug reports that can be traced back to it. The combination and matching of events has been implemented in FRASR (FRamework for Analyzing Software Repositories), augmenting the process mining framework ProM. FRASR has been successfully applied in a series of case studies addressing such aspects of the development process as roles of different developers and the way bug reports are handled.}, keywords = {amsn, email, email archives, gcc, mailing list, Process mining, software repositories}, isbn = {978-1-61284-259-2}, doi = {10.1109/CSMR.2011.5}, attachments = {https://flosshub.org/sites/flosshub.org/files/2011-03_CSMR.pdf}, author = {Poncin, Wouter and Serebrenik, Alexander and Brand, Mark van den} } @conference {1308, title = {Social interactions around cross-system bug fixings}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, note = {"We rely on information stored in versioning database and mailing lists of both systems and develop methods to reconstruct and integrate different historical database"}, month = {05/2011}, pages = {143-152}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {Cross-system bug fixing propagation is frequent among systems having similar characteristics, using a common framework, or, in general, systems with cloned source code fragments. While previous studies showed that clones tend to be properly maintained within a single system, very little is known about cross-system bug management. This paper describes an approach to mine explicitly documented cross-system bug fixings, and to relate their occurrences to social characteristics of contributors discussing through the project mailing lists--e.g., degree, betweenness, and brokerage--as well as to the contributors{\textquoteright} activity on source code. The paper reports results of an empirical study carried out on FreeBSD and OpenBSD kernels. The study shows that the phenomenon of cross-system bug fixing between these two projects occurs often, despite the limited overlap of contributors. The study also shows that cross-system bug fixings mainly involve contributors with the highest degree, betweenness and brokerage level, as well as contributors that change the source code more than others.}, keywords = {bug fixing, bug tracking system, committers, email, email archives, freebsd, mailing list, openbsd}, isbn = {9781450305747}, doi = {10.1145/1985441.1985463}, author = {Cerulo, Luigi and Cimitile, Marta and Di Penta, Massimiliano and Canfora, Gerardo} } @conference {1310, title = {System compatibility analysis of Eclipse and Netbeans based on bug data}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, month = {05/2011}, pages = {230-233}, publisher = {ACM Press}, organization = {ACM Press}, address = {Waikiki, Honolulu, HI, USANew York, New York, USA}, abstract = {Eclipse and Netbeans are two top of the line Integrated Development Environments (IDEs) for Java development. Both of them provide support for a wide variety of development tasks and have a large user base. This paper provides an analysis and comparison for the compatibility and stability of Eclipse and Netbeans on the three most commonly used operating systems, Windows, Linux and Mac OS. Both IDEs are programmed in Java and use a Bugzilla issue tracker to track reported bugs and feature requests. We looked into the Bugzilla repository databases of these two IDEs, which contains the bug records and histories of these two IDEs. We used some basic data mining techniques to analyze some historical statistics of the bug data. Based on the analysis, we try to answer certain stability-comparison oriented questions in the paper, so that users can have a better idea which of these two IDEs is designed better to work on different platforms. }, keywords = {bug tracking system, bugzilla, eclipse, ms challenge, netbeans, version history}, isbn = {9781450305747}, doi = {10.1145/1985441.1985479}, author = {Baik, Eilwoo and Devanbu, Premkar and Wang, Xinlei (Oscar)} } @conference {Stroulia:2011:TDS:1984665.1984670, title = {Teaching distributed software engineering with UCOSP: the undergraduate capstone open-source project}, booktitle = {Proceedings of the 2011 Community Building Workshop on Collaborative Teaching of Globally Distributed Software Development}, series = {CTGDSD {\textquoteright}11}, year = {2011}, pages = {20{\textendash}25}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software engineering courses in computer-science departments are meant to prepare students for the practice of designing, developing, understanding and maintaining software in the real world. The effectiveness of these courses have potentially a tremendous impact on the software industry, since it is through these courses that students must learn the state-of-the-art process and the tools of their eventual "trade", so that they can bring this knowledge to their job and thus advance the actual state of practice. The value of "learning software engineering" through project-based courses has long been recognized by educators and practitioners alike. In this paper, we discuss our experience with a distributed project-based course, which infuses the students{\textquoteright} learning experience with an increased degree of realism, which, we believe, further improves the quality of their learning and advances their readiness to join the profession.}, keywords = {distributed, education, pedagogical, project-based courses, software engineering education}, isbn = {978-1-4503-0590-7}, doi = {10.1145/1984665.1984670}, url = {http://doi.acm.org/10.1145/1984665.1984670}, author = {Stroulia, Eleni and Bauer, Ken and Craig, Michelle and Reid, Karen and Wilson, Greg} } @proceedings {1286, title = {Towards Improving OSS Products Selection {\textendash} Matching Selectors and OSS Communities Perspectives}, year = {2011}, month = {10/2011}, pages = {244-258}, publisher = {Springer}, abstract = {Adopting third-party software is becoming an economical and strategic need for today organizations. A fundamental part of its successful adoption is the informed selection of products that best fit the organization needs. One of the main current problems hampering selection, specially of OSS products is the vast amount of unstructured, incomplete, evolvable and widespread information about products that highly increases the risks of taking a wrong decision. In this paper, we aim to inform and provide evidence to OSS communities that help them to envisage improvements on their information rendering strategies to satisfy industrial OSS selectors{\textquoteright} needs. Our results are from the matching between the informational needs of 23 OSS selectors from diverse software-intensive organizations, and the in-depth study of 9 OSS communities of different sizes and domains. The results evidenced specific areas of improvement that might help to enhance the industrial OSS selection practice.}, keywords = {empirical study, information rendering strategy, open source software, selection}, author = {Ayala, Claudia and Cruzes, Daniela S. and Franch, Xavier and Conradi, Reidar} } @conference {Bougie:2011:TUT:1984701.1984707, title = {Towards understanding twitter use in software engineering: preliminary findings, ongoing challenges and future questions}, booktitle = {Proceedings of the 2nd International Workshop on Web 2.0 for Software Engineering}, series = {Web2SE {\textquoteright}11}, year = {2011}, note = {paper d/l from http://www.thechiselgroup.org/publications/content/towards-understanding-twitter-use-software-engineering-preliminary-findings-ong "From this site, we selected the top 30 individuals for the topics Linux and Eclipse. We chose these two topics based on their potential to expose "tweeters" from a large operating system community as well as an IDE development community. We also decided to investigate a project for which all committers use Twitter. Through a colleague, we were informed that the MXUnit project lists the Twitter user names for all eight of its committers. The MXUnit project [5] is a small, open source ColdFusion test framework that is written as an Eclipse plug-in."}, pages = {31{\textendash}36}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {There has been some research conducted around the motivation for the use of Twitter and the value brought by micro-blogging tools to individuals and business environments. This paper builds on our understanding of how the phenomenon affects the population which birthed the technology: Software Engineers. We find that the Software Engineering community extensively leverages Twitter{\textquoteright}s capabilities for conversation and information sharing and that use of the tool is notably different between distinct Software Engineering groups. Our work exposes topics for future research and outlines some of the challenges in exploring this type of data.}, keywords = {eclipse, linux, mxunit, social media, software development, twitter, web 2.0}, isbn = {978-1-4503-0595-2}, doi = {10.1145/1984701.1984707}, url = {http://doi.acm.org/10.1145/1984701.1984707}, attachments = {https://flosshub.org/sites/flosshub.org/files/WEB2SE2011.pdf}, author = {Bougie, Gargi and Starke, Jamie and Storey, Margaret-Anne and Daniel M. German} } @conference {Bougie:2011:TUT:1984701.1984707, title = {Towards understanding twitter use in software engineering: preliminary findings, ongoing challenges and future questions}, booktitle = {Proceedings of the 2nd International Workshop on Web 2.0 for Software Engineering}, series = {Web2SE {\textquoteright}11}, year = {2011}, note = {paper d/l from http://www.thechiselgroup.org/publications/content/towards-understanding-twitter-use-software-engineering-preliminary-findings-ong "From this site, we selected the top 30 individuals for the topics Linux and Eclipse. We chose these two topics based on their potential to expose "tweeters" from a large operating system community as well as an IDE development community. We also decided to investigate a project for which all committers use Twitter. Through a colleague, we were informed that the MXUnit project lists the Twitter user names for all eight of its committers. The MXUnit project [5] is a small, open source ColdFusion test framework that is written as an Eclipse plug-in."}, pages = {31{\textendash}36}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {There has been some research conducted around the motivation for the use of Twitter and the value brought by micro-blogging tools to individuals and business environments. This paper builds on our understanding of how the phenomenon affects the population which birthed the technology: Software Engineers. We find that the Software Engineering community extensively leverages Twitter{\textquoteright}s capabilities for conversation and information sharing and that use of the tool is notably different between distinct Software Engineering groups. Our work exposes topics for future research and outlines some of the challenges in exploring this type of data.}, keywords = {eclipse, linux, mxunit, social media, software development, twitter, web 2.0}, isbn = {978-1-4503-0595-2}, doi = {10.1145/1984701.1984707}, url = {http://doi.acm.org/10.1145/1984701.1984707}, attachments = {https://flosshub.org/sites/flosshub.org/files/WEB2SE2011_0.pdf}, author = {Bougie, Gargi and Starke, Jamie and Storey, Margaret-Anne and Daniel M. German} } @conference {Rigby:2011:UBB:1985793.1985867, title = {Understanding broadcast based peer review on open source software projects}, booktitle = {Proceedings of the 33rd International Conference on Software Engineering}, series = {ICSE {\textquoteright}11}, year = {2011}, note = {http://helium.cs.uvic.ca/other/Rigby2011ICSE.pdf 5 projects}, pages = {541{\textendash}550}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software peer review has proven to be a successful technique in open source software (OSS) development. In contrast to industry, where reviews are typically assigned to specific individuals, changes are broadcast to hundreds of potentially interested stakeholders. Despite concerns that reviews may be ignored, or that discussions will deadlock because too many uninformed stakeholders are involved, we find that this approach works well in practice. In this paper, we describe an empirical study to investigate the mechanisms and behaviours that developers use to find code changes they are competent to review. We also explore how stakeholders interact with one another during the review process. We manually examine hundreds of reviews across five high profile OSS projects. Our findings provide insights into the simple, community-wide techniques that developers use to effectively manage large quantities of reviews. The themes that emerge from our study are enriched and validated by interviewing long-serving core developers.}, keywords = {apache, case studies, email, freebsd, grounded theory, kde, linux, linux kernel, open source software, peer review, subversion}, isbn = {978-1-4503-0445-0}, doi = {10.1145/1985793.1985867}, attachments = {https://flosshub.org/sites/flosshub.org/files/Rigby2011ICSE.pdf}, author = {Peter C. Rigby and Storey, Margaret-Anne} } @article {1382, title = {Analysis of virtual communities supporting OSS projects using social network analysis}, journal = {Information and Software Technology}, volume = {52}, year = {2010}, month = {3/2010}, pages = {296 - 303}, abstract = {This paper analyses the behaviour of virtual communities for Open Source Software (OSS) projects. The development of OSS projects relies on virtual communities, which are built on relationships among members, being their final objective sharing knowledge and improving the underlying project. This study addresses the interactive collaboration in these kinds of communities applying social network analysis (SNA). In particular, SNA techniques will be used to identify those members playing a middle-man role among other community members. Results will illustrate the importance of this role to achieve successful virtual communities.}, keywords = {arm, email, Knowledge brokers, linux, mailing list, open source software, social network analysis, virtual communities}, issn = {09505849}, doi = {10.1016/j.infsof.2009.10.007}, url = {http://www.sciencedirect.com/science/article/pii/S0950584909001888}, author = {Toral, S.L. and Mart{\'\i}nez-Torres, M.R. and Barrero, F.} } @conference {1256, title = {Analyzing Leadership Dynamics in Distributed Group Communication}, booktitle = {2010 43rd Hawaii International Conference on System Sciences (HICSS 2010)}, year = {2010}, note = {"Our analysis examines the communication patterns in two FLOSS development projects, Fire and Gaim" "These data were imported into a database to allow automated analysis. The Fire data set includes about 1,800 events in the user email list, 7,800 messages in the developer venues, and 1,300 events in the combined trackers, spanning a period of 54 months. The significantly larger Gaim data set included over 41,000 events in the user forum, over 30,000 events in the developer venues, and about 20,000 events in the trackers, generated over 78 months." "The dynamic network analysis was performed using a scientific workflow tool, Taverna Workbench"}, pages = {1 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Honolulu, Hawaii, USA}, abstract = {We apply social network analysis (SNA) to examine the dynamics of leadership in distributed groups, specifically Free/Libre Open Source Software development projects, and its relation to group performance. Based on prior work on leadership in distributed groups, we identify leaders with those who make the highest level of contribution to the group and assess the degree of leadership by measuring centralization of communications. We compare the dynamics of leadership in two FLOSS projects, one more and one less effective. We find that in both projects, centralization was higher in developer-oriented communications venues than in user-oriented venues, suggesting higher degrees of leadership in developer venues. However, we do not find a consistent relation between centralization and effectiveness. We suggest that SNA can instead be useful for identifying interesting periods in the history of the project, e.g., periods where the leadership of the project is in transition.}, keywords = {core, DYNAMICS, email, email archives, fire, flossmole, gaim, leadership, mailing list, project success, social network analysis, srda}, isbn = {978-1-4244-5509-6}, doi = {10.1109/HICSS.2010.62}, attachments = {https://flosshub.org/sites/flosshub.org/files/07-06-02.pdf}, author = {Kevin Crowston and Andrea Wiggins and Howison, James} } @conference {954, title = {Clones: What is that smell?}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {72 - 81}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Clones are generally considered bad programming practice in software engineering folklore. They are identified as a bad smell and a major contributor to project maintenance difficulties. Clones inherently cause code bloat, thus increasing project size and maintenance costs. In this work, we try to validate the conventional wisdom empirically to see whether cloning makes code more defect prone. This paper analyses relationship between cloning and defect proneness. We find that, first, the great majority of bugs are not significantly associated with clones. Second, we find that clones may be less defect prone than non-cloned code. Finally, we find little evidence that clones with more copies are actually more error prone. Our findings do not support the claim that clones are really a "bad smell". Perhaps we can clone, and breathe easy, at the same time.}, keywords = {apache, bug fix revisions, bugs, clone, evolution, gimp, nautilus, scm, source code}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463343}, attachments = {https://flosshub.org/sites/flosshub.org/files/72rahman2010cws.pdf}, author = {Rahman, Foyzur and Christian Bird and Devanbu, Premkumar} } @conference {960, title = {A comparative exploration of FreeBSD bug lifetimes}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {106 - 109}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {In this paper, we explore the viability of mining the basic data provided in bug repositories to predict bug lifetimes. We follow the method of Lucas D. Panjer as described in his paper, Predicting Eclipse Bug Lifetimes. However, in place of Eclipse data, the FreeBSD bug repository is used. We compare the predictive accuracy of five different classification algorithms applied to the two data sets. In addition, we propose future work on whether there is a more informative way of classifying bugs than is considered by current bug tracking systems.}, keywords = {bug reports, bug tracking, classification, eclipse, msr challenge, prediction}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463291}, attachments = {https://flosshub.org/sites/flosshub.org/files/106ChallengeGargi.pdf}, author = {Bougie, Gargi and Treude, Christoph and Daniel M. German and Storey, Margaret-Anne} } @article {1330, title = {Data Mining User Activity in Free and Open Source Software (FOSS)/ Open Learning Management Systems}, journal = {International Journal of Open Source Software and Processes}, volume = {2}, year = {2010}, pages = {65 - 75}, abstract = {Free and Open Source Software (FOSS)/Open Educational Systems development projects abound in higher education today. Many universities worldwide have adopted open source software like ATutor and Moodle as an alternative to commercial or homegrown systems. The move to open source learning management systems entails many special considerations, including usage analysis facilities. The tracking of users and their activities poses major technical and analytical challenges within web-based systems. This paper examines how user activity tracking challenges are met with data mining techniques, particularly web usage mining methods, in four different open learning management systems: ATutor, LON-CAPA, Moodle, and Sakai. As examples of data mining technologies adapted within widely used systems, they represent important first steps for moving educational data mining outside the research laboratory. Moreover, as examples of different open source development contexts, exemplify the potential for programmatic integration of data mining technology processes in the future. As open systems mature in the use of educational data mining, they move closer to the long-sought goal of achieving more interactive, personalized, adaptive learning environments online on a broad scale.}, keywords = {data mining, education, student}, issn = {1942-3934}, doi = {10.4018/jossp.2010010105}, author = {McGrath, Owen} } @article {1328, title = {Developing a Dynamic and Responsive Online Learning Environment}, journal = {International Journal of Open Source Software and Processes}, volume = {2}, year = {2010}, pages = {32 - 48}, abstract = {Charles Stuart University adopted the open source software, Sakai, as the foundation for the university{\textquoteright}s new, integrated Online Learning Environment. This study explores whether a pedagogical advantage exists in adopting such an open source learning management system. Research suggests that the community source approach to development of open source software has many inherent pedagogical advantages, but this paper examines whether this is due to the choice of open source software or simply having access to appropriate technology for learning and teaching in the 21st century. The author also addresses the challenges of the project management methodology and processes in the large-scale implementation of an open-source courseware management solution at the institutional level. Consequently, this study outlines strategies that an institution can use to harness the potential of a community source approach to software development to meet the institutional and individual user needs into the future. }, keywords = {education, learning, sakai}, issn = {1942-3934}, doi = {10.4018/jossp.2010010103}, author = {Buchan, Janet} } @conference {961, title = {Do stack traces help developers fix bugs?}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {118 - 121}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {A widely shared belief in the software engineering community is that stack traces are much sought after by developers to support them in debugging. But limited empirical evidence is available to confirm the value of stack traces to developers. In this paper, we seek to provide such evidence by conducting an empirical study on the usage of stack traces by developers from the ECLIPSE project. Our results provide strong evidence to this effect and also throws light on some of the patterns in bug fixing using stack traces. We expect the findings of our study to further emphasize the importance of adding stack traces to bug reports and that in the future, software vendors will provide more support in their products to help general users make such information available when filing bug reports.}, keywords = {bug fixing, bug report, debugging, eclipse, stack trace}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463280}, attachments = {https://flosshub.org/sites/flosshub.org/files/118-10-msr.pdf}, author = {Schroter, Adrian and Schr{\"o}ter, Adrian and Bettenburg, Nicolas and Premraj, Rahul} } @conference {951, title = {The evolution of ANT build systems}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {42 - 51}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Build systems are responsible for transforming static source code artifacts into executable software. While build systems play such a crucial role in software development and maintenance, they have been largely ignored by software evolution researchers. With a firm understanding of build system aging processes, project managers could allocate personnel and resources to build system maintenance tasks more effectively, reducing the build maintenance overhead on regular development activities. In this paper, we study the evolution of ANT build systems from two perspectives: (1) a static perspective, where we examine the build system specifications using software metrics adopted from the source code domain; and (2) a dynamic perspective where representative sample build runs are conducted and their output logs are analyzed. Case studies of four open source ANT build systems with a combined history of 152 releases show that not only do ANT build systems evolve, but also that they need to react in an agile manner to changes in the source code.}, keywords = {ant, argouml, build, eclipse, jboss, maintenance, metrics, source code, tomcat}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463341}, attachments = {https://flosshub.org/sites/flosshub.org/files/42msr2010_mcintosh.pdf}, author = {McIntosh, Shane and Adams, Bram and Hassan, Ahmed E.} } @conference {1258, title = {Exploring Complexity in Open Source Software: Evolutionary Patterns, Antecedents, and Outcomes}, booktitle = {2010 43rd Hawaii International Conference on System Sciences (HICSS 2010)}, year = {2010}, note = {"The sample of projects was drawn from SourceForge" "projects were selected that were built with C++." "Applying the selection criteria generated a total of 108 projects for analysis" "Scientific Toolwork{\textquoteright}s Understand (version 1.4)"}, pages = {1 - 11}, publisher = {IEEE}, organization = {IEEE}, address = {Honolulu, Hawaii, USA}, abstract = {Software complexity is important to researchers and managers, yet much is unknown about how complexity evolves over the life of a software application and whether different dimensions of software complexity may exhibit similar or different evolutionary patterns. Using cross-sectional and longitudinal data on a sample of 108 open source projects, this research investigated how the complexity of open source project releases varied throughout the life of the project. Functional data analysis was applied to the release histories of the projects and recurring evolutionary patterns were derived. There were projects that saw little evolution, according to their measures of size and structural complexity. However, projects that displayed some evolution often differed on the pattern of evolution depending on whether size or structural complexity was examined. Factors that contribute to and result from the patterns of complexity were evaluated, and implications for research and practice are presented.}, keywords = {complexity, evolution, fda, life cycle, sourceforge, srda}, isbn = {978-1-4244-5509-6}, doi = {10.1109/HICSS.2010.198}, attachments = {https://flosshub.org/sites/flosshub.org/files/10-07-02.pdf}, author = {Darcy, David P. and Daniel, Sherae L. and Stewart, Katherine J.} } @conference {950, title = {An extensive comparison of bug prediction approaches}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {31 - 41}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Reliably predicting software defects is one of software engineering{\textquoteright}s holy grails. Researchers have devised and implemented a plethora of bug prediction approaches varying in terms of accuracy, complexity and the input data they require. However, the absence of an established benchmark makes it hard, if not impossible, to compare approaches. We present a benchmark for defect prediction, in the form of a publicly available data set consisting of several software systems, and provide an extensive comparison of the explanative and predictive power of well-known bug prediction approaches, together with novel approaches we devised. Based on the results, we discuss the performance and stability of the approaches with respect to our benchmark and deduce a number of insights on bug prediction models.}, keywords = {apache, bug reports, eclipse, famix, lucene, mylyn, prediction, scm}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463279}, attachments = {https://flosshub.org/sites/flosshub.org/files/31dambrosLanzaRobbes31.pdf}, author = {D{\textquoteright}Ambros, Marco and Lanza, Michele and Robbes, Romain} } @conference {1358, title = {Extracting source code from e-mails}, booktitle = {Proceedings of ICPC 2010 (18th IEEE International Conference on Program Comprehension)}, year = {2010}, note = {"We want to extract source code fragments from e-mail messages. To do this, we first need to select e-mails that contain source code fragments, and then we extract such fragments from the content in which they are enclosed." "we manually build a statistically significant benchmark taking sample e- mails from five unrelated open source Java software systems." }, pages = {24-33}, abstract = {E-mails, used by developers and system users to communicate over a broad range of topics, offer a valuable source of information. If archived, e-mails can be mined to support program comprehension activities and to provide views of a software system that are alternative and complementary to those offered by the source code. However, e-mails are written in natural language, and therefore contain noise that makes it difficult to retrieve the important data. Thus, before conducting an effective system analysis and extracting data for program comprehension, it is necessary to select the relevant messages, and to expose only the meaningful information. In this work we focus both on classifying e-mails that hold fragments of the source code of a system, and on extracting the source code pieces inside the e-mail. We devised and analyzed a number of lightweight techniques to accomplish these tasks. To assess the validity of our techniques, we manually inspected and annotated a statistically significant number of e-mails from five unrelated open source software systems written in Java. With such a benchmark in place, we measured the effectiveness of each technique in terms of precision and recall.}, keywords = {argouml, email, freenet, jmeter, mailing lists, mina, natural language, openjpa, source code}, url = {http://www.inf.usi.ch/phd/bacchelli/publications.php}, attachments = {https://flosshub.org/sites/flosshub.org/files/icpc2010.pdf}, author = {Bacchelli, Alberto and D{\textquoteright}Ambros, Marco and Lanza, Michele} } @article {1134, title = {A Fistful of Dollars: Financial Rewards, Payment Norms, and Motivation Crowding in Open Source Software Development}, year = {2010}, month = {04/2010}, institution = {ssrn}, address = {ssrn}, abstract = {Existing literature on open source software (OSS) maintains that intrinsic motivation and extrinsic financial rewards have a unidimensionally positive effect on the motivation of individual developers. Based on self-determination theory, which underlies most of these studies, we challenge this assumption. We argue that the effect of payment on both intrinsic motivation and total motivation of OSS developers is far more complex. To illustrate our point, we introduce the concept of individuals{\textquoteright} norms about payment to the field of OSS. In doing so, we are able to show that payment norms moderate the effect of payment on intrinsic motivation and total motivation. Conducting a scenario experiment, we find that intrinsic motivation decreases for individuals with norms against payment. This effect becomes even stronger when analyzing for mediation effects. Total motivation is impacted positively by payment, but the effect turns insignificant for individuals with norms for payment. Our findings help explain the results of previous studies in which OSS developers did not seem to be affected by motivation crowding. They further contribute to the more general debate on how to manage individuals in the absence of formal contracts. From a practical perspective, we show that financial rewards may create a management dilemma for OSS project leaders. }, keywords = {developers, experiment, financial, MOTIVATION}, author = {Oliver Alexy and Martin Leitner} } @article {1327, title = {Higher Education and FOSS for e-Learning}, journal = {International Journal of Open Source Software and Processes}, volume = {2}, year = {2010}, pages = {15 - 31}, abstract = {This paper examines the paradox of FOSS adoption in U.S. institutions of higher education, where campus-wide deployment of FOSS for e-learning lags far behind adoption for technical infrastructure applications. Drawing on the fields of organizational management, information systems, and education, the author argues that the gap between FOSS advocacy and the enterprise-wide deployment of FOSS for e-learning is a consequence of the divergent perspectives of two organizational sub-cultures{\textemdash}the technologist and the academic{\textemdash}and the extent to which those sub-cultures are likely to embrace FOSS. The author recommends (a) collaborative needs analysis/assessment prior to a go/no go adoption decision, and (b) broad dissemination of total cost of ownership (TCO) data by institutions deploying FOSS for e-learning enterprise-wide. This discussion satisfies e-learning administrators and practitioners seeking research-based, cross-disciplinary evidence about the FOSS decision-making process and also assists educators seeking to expand student knowledge of e-learning technology options. }, keywords = {education}, issn = {1942-3934}, doi = {10.4018/jossp.2010010102}, author = {Williams van Rooij, Shahron} } @conference {965, title = {Identifying licensing of jar archives using a code-search approach}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {151 - 160}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Free and open source software strongly promotes the reuse of source code. Some open source Java components/libraries are distributed as jar archives only containing the bytecode and some additional information. For whoever wanting to integrate this jar in her own project, it is important to determine the license(s) of the code from which the jar archive was produced, as this affects the way that such component can be used. This paper proposes an automatic approach to determine the license of jar archives, combining the use of a code-search engine with the automatic classification of licenses contained in textual flies enclosed in the jar. Results of an empirical study performed on 37 jars - from 17 different systems - indicate that this approach is able to successfully infer the jar licenses in over 95\% of the cases, but that in many cases the license in textual flies may differ from the one of the classes contained in the jar.}, keywords = {apache, bytecode, classification, eclipse, google code, jar, java, licenses, source code}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463282}, attachments = {https://flosshub.org/sites/flosshub.org/files/151msr2010.pdf}, author = {Di Penta, Massimiliano and Daniel M. German and Antoniol, Giuliano} } @article {1240, title = {Impact of Programming Language Fragmentation on Developer Productivity}, journal = {International Journal of Open Source Software and Processes}, volume = {2}, year = {2010}, month = {32/2010}, pages = {41 - 61}, abstract = {Programmers often develop software in multiple languages. In an effort to study the effects of programming language fragmentation on productivity{\textemdash}and ultimately on a developer{\textquoteright}s problem-solving abilities{\textemdash}the authors present a metric, language entropy, for characterizing the distribution of a developer{\textquoteright}s programming efforts across multiple programming languages. This paper presents an observational study examining the project contributions of a random sample of 500 SourceForge developers. Using a random coefficients model, the authors find a statistically (alpha level of 0.001) and practically significant correlation between language entropy and the size of monthly project contributions. Results indicate that programming language fragmentation is negatively related to the total amount of code contributed by developers within SourceForge, an open source software (OSS) community.}, keywords = {commits, entropy, language entropy, programming languages, sourceforge, srda}, issn = {1942-3934}, doi = {10.4018/jossp.2010040104}, author = {Krein, Jonathan L. and MacLean, Alexander C. and Knutson, Charles D. and Delorey, Daniel P. and Eggett, Dennis L.} } @conference {bird2010lee, title = {{Linkster: Enabling Efficient Manual Mining}}, booktitle = {Demonstration Track, Proceedings of the 17th SIGSOFT Symposium on Foundations of Software Engineering}, year = {2010}, note = {"LINKSTER efficiently displays, integrates, and allows inspection and annotation of information from three main sources of data: source code repositories, developer mailing lists archives, and bug tracking databases. LINKSTER requires access to a source code repository for file content and a database which contains the raw mined repository, mailing list, and bug tracking information. All notes and annotations made by the user are also recorded in the database."}, publisher = {ACM}, organization = {ACM}, abstract = {While many uses of mined software engineering data are automatic in nature, some techniques and studies either require, or can be improved, by manual methods. Unfortunately, manually inspecting, analyzing, and annotating mined data can be difficult and tedious, especially when information from multiple sources must be integrated. Oddly, while there are numerous tools and frameworks for automatically mining and analyzing data, there is a dearth of tools which facilitate manual methods. To fill this void, we have developed LINKSTER, a tool which integrates data from bug databases, source code repositories, and mailing list archives to allow manual inspection and annotation. LINKSTER has already been used successfully by an OSS project lead to obtain data for one empirical study.}, keywords = {artifacts, bug, bug tracking, data mining, email, mailing lists, open source, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/bird2010lee.pdf}, author = {Christian Bird and Adrian Bachman and Rahman, Foyzur and Bernstein, Abraham} } @conference {1218, title = {A Longitudinal Study on Collaboration Networks and Decision to Participate in a FLOSS Community}, booktitle = {5th Workshop on Public Data about Software Development (WoPDaSD 2010)}, year = {2010}, note = {"we chose as a case of study Epiphany, which is the default web browser of the GNOME graphical desktop environment" "We collected all relevant data by parsing all the bug reports in GNOME Bugzilla repository relative to Epiphany. The data collection and storing was done using Bicho (v. 0.4 rev. 7198), a software part of the FLOSSMetric project [14]."}, abstract = {In this paper we conjecture that individual decisions of FLOSS (Free/Libre Open Source Software) developers to take on a task are influenced by network relations generated by collaboration among project members. In order to explore our conjecture we collected data on a FLOSS project team consisting of 227 developers committed since 2002 to the development of a web browser. We reconstructed 2-mode co- collaboration networks (software developer by bug) in which a tie represents an action taken by a developer in order to solve a specific bug. Co-collaboration networks were collected at five points in time during a six-month development cycle of the software. We report and discuss results of longitudinal actor-based modeling that we specify to test for the influence of local network structures on developer{\textquoteright}s decision to take action on a specific bug. The study controls for bug-specific and developer-specific characteristics that may also affect developers{\textquoteright} decisions exogenously. We also control for priority and severity levels assigned by the team to bugs in an attempt to manage voluntary contribution.}, keywords = {bicho, bug fixing, bug reports, bugzilla, COLLABORATION, developers, epiphany, flossmetrics, gnome, social network analysis}, attachments = {https://flosshub.org/sites/flosshub.org/files/wopdasd002.pdf}, author = {Guido Conaldi and Tonellato, Marco} } @conference {964, title = {Mining subclassing directives to improve framework reuse}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {141 - 150}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {To help developers in using frameworks, good documentation is crucial. However, it is a challenge to create high quality documentation especially of hotspots in white-box frameworks. This paper presents an approach to documentation of object-oriented white-box frameworks which mines from client code four different kinds of documentation items, which we call subclassing directives. A case study on the Eclipse JFace user-interface framework shows that the approach can improve the state of API documentation w.r.t. subclassing directives.}, keywords = {api, documentation, eclipse, frameworks, jface, source code}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463347}, attachments = {https://flosshub.org/sites/flosshub.org/files/141Mining-Subclassing-Directives-to-Improve-Framework-Reuse.pdf}, author = {Bruch, Marcel and Mezini, Mira and Monperrus, Martin} } @article {1329, title = {Open for Social}, journal = {International Journal of Open Source Software and Processes}, volume = {2}, year = {2010}, pages = {49 - 64}, abstract = {Online learning in K-12 and higher education has been growing rapidly, and open source software has the potential to improve the quality of e-learning. This paper describes how FOSS enables turning e-learning from a potentially restrictive framing of the education experience to an emergent and social experience. The authors identify several key elements of the FOSS model that position open source initiatives to contribute to the emergent and social nature of experience in e-learning. The authors also describe several challenges to developing FOSS in a community of educators for e-learning. These elements and challenges are illustrated in a brief case report about the development of an open source software system called Context-aware Activity Notification System (CANS). CANS (http://cansaware.com) is a notification system that integrates with collaborative work and learning systems and is designed around the importance of awareness of user activity, a user{\textquoteright}s social context and personal notification preferences. }, keywords = {education}, issn = {1942-3934}, doi = {10.4018/jossp.2010010104}, author = {Laffey, James and Schmidt, Matthew and Amelung, Christopher} } @conference {962, title = {OSS developers context-specific Preferred Representational systems: A initial Neurolinguistic text analysis of the Apache mailing list}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {126 - 129}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Open Source Software (OSS) mailing lists are used by developers to discuss software engineering tasks performed in the project. In the last years, researchers have been conducting mailing lists linguistic analyses for understanding the intricacies of OSS development. An unpublished approach for that is to use NeuroLinguistic Theory (NT). NT postulates the use of a Preferred Representational cognitive System (PRS) in specific contexts. This means that different resources and cognitive channels are used by developers in order to understand software, but what types of representational systems are the preferred by software engineers? This paper introduces a psychometrically-based neuro-linguistic analysis tool to classify developers, and presents early results of an experiment to assess the PRS of four top developers against a baseline of the Apache server mailing list.}, keywords = {apache, email, email archives, mailing lists, neurolinguistics}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463339}, author = {Junior, Methanias Colaco and Mendonca, Manoel and Farias, Mario and Henrique, Paulo} } @conference {949, title = {Predicting the severity of a reported bug}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {1 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {The severity of a reported bug is a critical factor in deciding how soon it needs to be fixed. Unfortunately, while clear guidelines exist on how to assign the severity of a bug, it remains an inherent manual process left to the person reporting the bug. In this paper we investigate whether we can accurately predict the severity of a reported bug by analyzing its textual description using text mining algorithms. Based on three cases drawn from the open-source community (Mozilla, Eclipse and GNOME), we conclude that given a training set of sufficient size (approximately 500 reports per severity), it is possible to predict the severity with a reasonable accuracy (both precision and recall vary between 0.65-0.75 with Mozilla and Eclipse; 0.70-0.85 in the case of GNOME).}, keywords = {bug reports, eclipse, gnome, mozilla, severity, text mining}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463284}, attachments = {https://flosshub.org/sites/flosshub.org/files/1lamkanfiDemeyer1.pdf}, author = {Lamkanfi, Ahmed and Demeyer, Serge and Giger, Emanuel and Goethals, Bart} } @conference {966, title = {Replaying IDE interactions to evaluate and improve change prediction approaches}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {161 - 170}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Change prediction helps developers by recommending program entities that will have to be changed alongside the entities currently being changed. To evaluate their accuracy, current change prediction approaches use data from versioning systems such as CVS or SVN. These data sources provide a coarse-grained view of the development history that flattens the sequence of changes in a single commit. They are thus not a valid basis for evaluation in the case of development-style prediction, where the order of the predictions has to match the order of the changes a developer makes. We propose a benchmark for the evaluation of change prediction approaches based on fine-grained change data recorded from IDE usage. Moreover, the change prediction approaches themselves can use the more accurate data to fine-tune their prediction. We present an evaluation procedure and use it on several change prediction approaches, both novel and from the literature, and report on the results.}, keywords = {cbse, change based software evolution, change prediction, changes, commit, cvs, development history, eclipseeye, ide, mylyn, spyware, svn}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463278}, attachments = {https://flosshub.org/sites/flosshub.org/files/161Robbes2010changePrediction.pdf}, author = {Robbes, Romain and Pollet, Damien and Lanza, Michele} } @conference {1222, title = {Responsiveness as a measure for assessing the health of OSS ecosystems}, booktitle = {Second International Workshop on Building Sustainable Open Source Communities (OSCOMM 2010)}, year = {2010}, note = {"Data was collected from the GMANE (gmane.org) archives of the SourceForge {\textquotedblleft}Nagios-devel{\textquotedblright} mailing list for the period from January 2004 to October 2009". response time, responsiveness. "}, month = {05/2010}, abstract = {The health of an Open Source ecosystem is an important decision factor when considering the adoption of Open Source software or when monitoring a seeded Open Source project. In this paper we introduce responsiveness as a qualitative measure of the quality of replies within mailing lists, which can be used for assessing ecosystem health. We consider one specific metric of responsiveness in this paper, and that is the response time of follow-up messages in mailing lists. We also describe a way for characterising the nature of communication in messages with short and long response times. The approach is tested in the context of the Nagios project, and we particularly focus on the responsiveness for contributors acting in their professional roles as core developers. Our contribution is a step towards a deeper understanding of voluntary support provided in mailing lists of OSS projects.}, keywords = {email, email archives, gmane, mailing lists, nagios, response time, sourceforge}, attachments = {https://flosshub.org/sites/flosshub.org/files/osscomm002.pdf}, author = {Gamalielsson, Jonas and Lundell, Bj{\"o}rn and Lings, Brian} } @conference {967, title = {Should I contribute to this discussion?}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {181 - 190}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town}, abstract = {Development mailing lists play a central role in facilitating communication in open source projects. Since these lists frequently host design and project discussions, knowledgeable contribution to these discussion threads is essential to avoid mis-communication that might slow-down the progress of a project. However, given the sheer volume of emails on these lists, it is easy to miss important discussions. To find out how developers are able to deal with mailing list discussions, we study the main factors that encourage developers to contribute to the development mailing lists. We develop personalized models to automatically identify discussion threads that a developer would contribute to based on his previous contribution behavior. Case studies on development mailing lists of three open source projects (Apache, PostgreSQL and Python) show that the average accuracy of our models is 89-85\% and that the models vary significantly between different developers.}, keywords = {apache, contributions, developers, email, email archives, mailing lists, postgresql, python}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463345}, attachments = {https://flosshub.org/sites/flosshub.org/files/181ibrahim-msr2010.pdf}, author = {Ibrahim, Walid M and Bettenburg, Nicolas and Shihab, Emad and Adams, Bram and Hassan, Ahmed E.} } @conference {Rastkar:2010:SSA:1806799.1806872, title = {Summarizing software artifacts: a case study of bug reports}, booktitle = {Proceedings of the 32nd ACM/IEEE International Conference on Software Engineering - Volume 1}, series = {ICSE {\textquoteright}10}, year = {2010}, pages = {505{\textendash}514}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Many software artifacts are created, maintained and evolved as part of a software development project. As software developers work on a project, they interact with existing project artifacts, performing such activities as reading previously filed bug reports in search of duplicate reports. These activities often require a developer to peruse a substantial amount of text. In this paper, we investigate whether it is possible to summarize software artifacts automatically and effectively so that developers could consult smaller summaries instead of entire artifacts. To provide focus to our investigation, we consider the generation of summaries for bug reports. We found that existing conversation-based generators can produce better results than random generators and that a generator trained specifically on bug reports can perform statistically better than existing conversation-based generators. We demonstrate that humans also find these generated summaries reasonable indicating that summaries might be used effectively for many tasks.}, keywords = {bug reports, eclipse, gnome, human-centric software engineering, kde, machine learning, mozilla}, isbn = {978-1-60558-719-6}, doi = {10.1145/1806799.1806872}, url = {http://doi.acm.org/10.1145/1806799.1806872}, author = {Rastkar, Sarah and Murphy, Gail C. and Murray, Gabriel} } @conference {1217, title = {Trends That Affect Temporal Analysis Using SourceForge Data}, booktitle = {5th Workshop on Public Data about Software Development (WoPDaSD 2010)}, year = {2010}, note = {"In this paper we examine some of the limitations of artifact data by specifically addressing the applicability of SourceForge data to the study of project evolution." "For our analysis we examine 9,997 Production/Stable or Maintenance phase projects stored in CVS on SourceForge and extracted in October of 2006 [5]"}, abstract = {SourceForge is a valuable source of software artifact data for researchers who study project evolution and developer behavior. However, the data exhibit patterns that may bias temporal analyses. Most notable are cliff walls in project source code repository timelines, which indicate large commits that are out of character for the given project. These cliff walls often hide significant periods of development and developer collaboration{\textemdash}a threat to studies that rely on SourceForge repository data. We demonstrate how to identify these cliff walls, discuss reasons for their appearance, and propose preliminary measures for mitigating their effects in evolution-oriented studies.}, keywords = {cliff walls, committers, cvs, evolution, growth, source code, sourceforge, time, time series}, attachments = {https://flosshub.org/sites/flosshub.org/files/wopdasd001.pdf}, author = {MacLean, Alexander C. and Pratt, Landon J. and Krein, Jonathan L. and Knutson, Charles D.} } @article {1387, title = {Using the DEMO methodology for modeling open source software development processes}, journal = {Information and Software Technology}, volume = {52}, year = {2010}, month = {6/2010}, pages = {656 - 671}, abstract = {Context Open source software development (OSSD) process modeling has received increasing interest in recent years. These efforts aim to identify common elements in the development process between multiple open source software (OSS) projects. However, the complexity inherent to OSSD process modeling puts significant demands on the modeling language. Objective In this paper, we propose that the Design and Engineering Methodology for Organizations (DEMO) may provide an interesting alternative to develop OSSD process models. DEMO exhibits two unique features within the context of OSSD process modeling. First, DEMO analyzes processes at the ontological level and provides high-level process descriptions, instead of focusing on the implementation level. Second, DEMO studies the communication patterns between human actors, instead of the sequences in which activities are performed. Method We investigate the feasibility of using DEMO to construct OSSD process models by means of a case study. DEMO models were constructed to describe the NetBeans Requirements and Release process. In addition, the quality of these DEMO models was evaluated using a quality framework for conceptual modeling. Results Our results showed that our DEMO models exhibited a high level of abstraction, thereby reducing the complexity of the OSSD process models. In addition, the evaluation of the models developed in this paper by using the quality framework for conceptual modeling showed that the models were of high quality. Conclusions We have shown that the DEMO methodology can be successfully used to model OSSD processes and to obtain abstract and high-quality OSSD process models. However, given some potential drawbacks with respect to understandability and implementability, we primarily propose the use of DEMO within OSSD process modeling as an analysis tool that should be complemented with other techniques and models for communication and reenactment purposes.}, keywords = {DEMO, Enterprise ontology, open source software, Software process modeling}, issn = {09505849}, doi = {10.1016/j.infsof.2010.02.002}, url = {http://www.sciencedirect.com/science/article/pii/S0950584910000157}, author = {Huysmans, Philip and Ven, Kris and Verelst, Jan} } @conference {969, title = {Validity of network analyses in Open Source Projects}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, note = {"We have mined archival records of developer mailing lists to generate reply-to social networks for the three OSS projects: Apache, MySQL, and Perl." "For each of these projects, we construct an information flow network based on messages that are sent as replies to previous messages....We use this methodology on all mined data to create a network of mailing list participants. "}, pages = {201 - 209}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Social network methods are frequently used to analyze networks derived from Open Source Project communication and collaboration data. Such studies typically discover patterns in the information flow between contributors or contributions in these projects. Social network metrics have also been used to predict defect occurrence. However, such studies often ignore or side-step the issue of whether (and in what way) the metrics and networks of study are influenced by inadequate or missing data. In previous studies email archives of OSS projects have provided a useful trace of the communication and co-ordination activities of the participants. These traces have been used to construct social networks that are then subject to various types of analysis. However, during the construction of these networks, some assumptions are made, that may not always hold; this leads to incomplete, and sometimes incorrect networks. The question then becomes, do these errors affect the validity of the ensuing analysis? In this paper we specifically examine the stability of network metrics in the presence of inadequate and missing data. The issues that we study are: 1) the effect of paths with broken information flow (i.e. consecutive edges which are out of temporal order) on measures of centrality of nodes in the network, and 2) the effect of missing links on such measures. We demonstrate on three different OSS projects that while these issues do change network topology, the metrics used in the analysis are stable with respect to such changes.}, keywords = {apache, email archives, mailing lists, missing data, mysql, perl, social networks}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463342}, attachments = {https://flosshub.org/sites/flosshub.org/files/201NetworkAnalysis.pdf}, author = {Nia, Roozbeh and Christian Bird and Devanbu, Premkumar and Filkov, Vladimir} } @conference {953, title = {When process data quality affects the number of bugs: Correlations in software engineering datasets}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {62 - 71}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Software engineering process information extracted from version control systems and bug tracking databases are widely used in empirical software engineering. In prior work, we showed that these data are plagued by quality deficiencies, which vary in its characteristics across projects. In addition, we showed that those deficiencies in the form of bias do impact the results of studies in empirical software engineering. While these findings affect software engineering researchers the impact on practitioners has not yet been substantiated. In this paper we, therefore, explore (i) if the process data quality and characteristics have an influence on the bug fixing process and (ii) if the process quality as measured by the process data has an influence on the product (i.e., software) quality. Specifically, we analyze six Open Source as well as two Closed Source projects and show that process data quality and characteristics have an impact on the bug fixing process: the high rate of empty commit messages in Eclipse, for example, correlates with the bug report quality. We also show that the product quality - measured by number of bugs reported - is affected by process data quality measures. These findings have the potential to prompt practitioners to increase the quality of their software process and its associated data quality.}, keywords = {apache, bug reports, eclipse, gnome, log files, mozilla, netbeans, openoffice.org, version control}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463286}, attachments = {https://flosshub.org/sites/flosshub.org/files/62bachmann-msr10.pdf}, author = {Bachmann, Adrian and Bernstein, Abraham} } @conference {937, title = {Assigning bug reports using a vocabulary-based expertise model of developers}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {131 - 140}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {For popular software systems, the number of daily submitted bug reports is high. Triaging these incoming reports is a time consuming task. Part of the bug triage is the assignment of a report to a developer with the appropriate expertise. In this paper, we present an approach to automatically suggest developers who have the appropriate expertise for handling a bug report. We model developer expertise using the vocabulary found in their source code contributions and compare this vocabulary to the vocabulary of bug reports. We evaluate our approach by comparing the suggested experts to the persons who eventually worked on the bug. Using eight years of Eclipse development as a case study, we achieve 33.6\% top-1 precision and 71.0\% top-10 recall.}, keywords = {bug reports, bugzilla, develect, developers, eclipse, expertise, scm}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069491}, attachments = {https://flosshub.org/sites/flosshub.org/files/131AssigningBugReports.pdf}, author = {Matter, Dominique and Kuhn, Adrian and Nierstrasz, Oscar} } @conference {1360, title = {Benchmarking Lightweight Techniques to Link E-Mails and Source Code}, booktitle = {2009 16th Working Conference on Reverse Engineering}, year = {2009}, note = {"We present different lightweight approaches that, exploiting the specific characteristics of e-mails and the ones of the source code, are capable of establishing a bi-directional link between source code entities and e-mails" "We analyzed ArgoUML1, a UML modelling tool written in Java, developed over the course of approximately 9 years, and made available under the BSD Open Source License. We consider the release 0.28 (March 2009) that comprehends 2,197 classes. We employed the lightweight approaches to map such classes to the related e-mails in ArgoUML mailing lists. ArgoUML e-mails are stored in six mailing lists (see Table I), for a total amount of 79,175 messages" Figure 4 is helpful to understand what this does.}, pages = {205 - 214}, publisher = {IEEE}, organization = {IEEE}, address = {Lille, France}, abstract = {During the evolution of a software system, a large amount of information, which is not always directly related to the source code, is produced. Several researchers have provided evidence that the contents of mailing lists represent a valuable source of information: Through e-mails, developers discuss design decisions, ideas, known problems and bugs, etc. which are otherwise not to be found in the system. A technical challenge in this context is how to establish the missing link between free-form e-mails and the system artifacts they refer to. Although the range of approaches is vast, establishing their accuracy remains a problem, as there is no benchmark against which to compare their performance. To overcome this issue, we manually inspected a statistically significant number of e-mails pertaining to the ArgoUML system. Based on this benchmark, we present a variety of lightweight techniques to assign e-mails to software artifacts and measure their effectiveness in terms of precision and recall.}, keywords = {argouml, email, mailing lists}, isbn = {978-0-7695-3867-9}, doi = {10.1109/WCRE.2009.44}, attachments = {https://flosshub.org/sites/flosshub.org/files/wcre2009.pdf}, author = {Bacchelli, Alberto and D{\textquoteright}Ambros, Marco and Lanza, Michele and Robbes, Romain} } @conference {941, title = {Evolution of the core team of developers in libre software projects}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {167 - 170}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {In many libre (free, open source) software projects, most of the development is performed by a relatively small number of persons, the "core team". The stability and permanence of this group of most active developers is of great importance for the evolution and sustainability of the project. In this position paper we propose a quantitative methodology to study the evolution of core teams by analyzing information from source code management repositories. The most active developers in different periods are identified, and their activity is calculated over time, looking for core team evolution patterns.}, keywords = {core, cvs, cvsanaly, developers, evolution, gimp, scm}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069497}, attachments = {https://flosshub.org/sites/flosshub.org/files/167core-evolution.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona and Herraiz, Israel} } @conference {1203, title = {Flat for the few, steep for the many: Structural cohesion as a measure of hierarchy in FLOSS communities}, booktitle = {4th Workshop on Public Data about Software Development (WoPDaSD 2009)}, year = {2009}, note = {"The FLOSS project selected as case study is the GNOME web browser Epiphany. The communication network was generated tracing backwards mail threads in the development mailing list. "}, month = {2009}, abstract = {A discrepancy exists between the emphasis posed by practitioners on decentralized and non-hierarchical communication in Free/Libre Open Source Software (FLOSS) communities and empirical evidence of their hierarchical structure. In order to explain this apparent paradox it is here hypothesized that in FLOSS communities local sub-groups exist and are less hierarchical, more decentralized than the whole social network to which they belong. A measure of structural cohesion based on network node connectivity is proposed as an effective method to test whether FLOSS communication networks can be decomposed in nested hierarchies of progressively less centralized sub-groups. Preliminary results from a case study that are consistent with the hypothesis are presented and discussed. }, keywords = {case study, email, email archives, epiphany, gnome, mailing list, social network analysis}, attachments = {https://flosshub.org/sites/flosshub.org/files/guido-conaldi-flat-for-the-few.pdf}, author = {Guido Conaldi} } @conference {936, title = {From work to word: How do software developers describe their work?}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {121 - 130}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Developers take notes about their work sessions, either to remember the work status and share it with collaborators, or because employers explicitly require this for project management matters. We report on an exploratory study which aims at understanding how software developers describe their work. We analyzed more than 750,000 work descriptions of about 2,000 professionals taken over 8 years in three settings. We observed several similarities in the content and time meta-data of work descriptions. Most frequent terms, such as top-30 performed activities, are used consistently. Particular templates such as ldquoACTION concerning ARTIFACT because of CAUSErdquo occur frequently. Developers described sessions that last 30-120 min. 4-16 times a day. Maintaining diaries seems to consume between 3-6\% of the total work time, and in 10\% of the sessions, developers did not describe their work in sufficient detail. We argue that our results make the first step towards automatically generating work diaries for software developers.}, keywords = {apache, developers, diaries, eureka, mycomp, scm, work management system}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069490}, author = {Maalej, Walid and Happel, Hans-Jorg} } @conference {844, title = {A Jury of Your Peers: Quality, Experience and Ownership in Wikipedia}, booktitle = {The International Symposium on Wiki{\textquoteright}s and Open Collaboration}, year = {2009}, month = {10/2009}, address = {Orlando, FL}, keywords = {experience,, ownership,, peer, peer,, quality, review,, wikipedia,, wikiwork,}, author = {Halfaker, A. and Kittur, N. and Kraut, R. and Riedl, J.} } @conference {944, title = {Learning from defect removals}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {179 - 182}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Recent research has tried to identify changes in source code repositories that fix bugs by linking these changes to reports in issue tracking systems. These changes have been traced back to the point in time when they were previously modified as a way of identifying bug introducing changes. But we observe that not all changes linked to bug tracking systems are fixing bugs; some are enhancing the code. Furthermore, not all fixes are applied at the point in the code where the bug was originally introduced. We flesh out these observations with a manual review of several software projects, and use this opportunity to see how many defects are in the scope of static analysis tools.}, keywords = {bug fixing, bugzilla, change management, cherry, cvs, eclipse, groovy, launching, source code, svn, text editor}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069500}, attachments = {https://flosshub.org/sites/flosshub.org/files/179LearnFromDefects-MSR09.pdf}, author = {Ayewah, Nathaniel and Pugh, William} } @conference {bird2009pat, title = {{Putting it All Together: Using Socio-Technical Networks to Predict Failures}}, booktitle = {Proceedings of the 17th International Symposium on Software Reliability Engineering}, year = {2009}, note = {First, we build each type of network separately and use network analysis on both to gather metrics for use in a predictive model. Second, we build a socio-technical network which combines the nodes and edges from both the dependency network and the contribution network and use metrics gathered from this network in a predictive model. We evaluate our approach by collecting data from Mi- crosoft Windows Vista and ECLIPSE development and using logistic regression analysis.}, abstract = {Studies have shown that social factors in development organizations have a dramatic effect on software quality. Separately, program dependency information has also been used successfully to predict which software components are more fault prone. Interestingly, the influence of these two phenomena have only been studied separately. Intuition and practical experience suggests, however, that task assignment (i.e. who worked on which components and how much) and dependency structure (which components have dependencies on others) together interact to influence the quality of the resulting software. We study the influence of combined socio-technical software networks on the fault-proneness of individual software components within a system. The network properties of a software component in this combined network are able to predict if an entity is failure prone with greater accuracy than prior methods which use dependency or contribution information in isolation. We evaluate our approach in different settings by using it on Windows Vista and across six releases of the Eclipse development environment including using models built from one release to predict failure prone components in the next release. We compare this to previous work. In every case, our method performs as well or better and is able to more accurately identify those software components that have more post-release failures, with precision and recall rates as high as 85\%.}, keywords = {eclipse, microsoft, social network, vista, windows}, attachments = {https://flosshub.org/sites/flosshub.org/files/bird2009pat.pdf}, author = {Christian Bird and Nachiappan Nagappan and Devanbu, Premkumar and Gall, Harald and Brendan Murphy} } @article {schryen2009security, title = {Security of Open Source and Closed Source Software: An Empirical Comparison of Published Vulnerabilities}, journal = {AMCIS 2009 Proceedings}, year = {2009}, pages = {387}, abstract = {Reviewing literature on open source and closed source security reveals that the discussion is often determined by biased attitudes toward one of these development styles. The discussion specifically lacks appropriate metrics, methodology and hard data. This paper contributes to solving this problem by analyzing and comparing published vulnerabilities of eight open source software and nine closed source software packages, all of which are widely deployed. Thereby, it provides an extensive empirical analysis of vulnerabilities in terms of mean time between vulnerability disclosures, the development of disclosure over time, and the severity of vulnerabilities, and allows for validating models provided in the literature. The investigation reveals that (a) the mean time between vulnerability disclosures was lower for open source software in half of the cases, while the other cases show no differences, (b) in contrast to literature assumption, 14 out of 17 software packages showed a significant linear or piecewise linear correlation between time and the number of published vulnerabilities, and (c) regarding the severity of vulnerabilities, no significant differences were found between open source and closed source.}, keywords = {closed source software, empirical comparison, open source software, security, Vulnerabilities}, url = {http://epub.uni-regensburg.de/21296/1/Schryen_-_AMCIS_09_-_Security_of_open_source_and_closed_source_software_-_Web_version.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/Schryen_-_AMCIS_09_-_Security_of_open_source_and_closed_source_software_-_Web_version.pdf}, author = {Schryen, Guido} } @conference {1518, title = {Studying the use of developer IRC meetings in open source projects}, booktitle = {2009 IEEE International Conference on Software Maintenance (ICSM)}, year = {2009}, pages = {147 - 156}, publisher = {IEEE}, organization = {IEEE}, address = {Edmonton, AB, Canada}, abstract = {Open source developers communicate with each other via various online outlets. Thus far, mailing lists have been the main coordination mechanism. However, our previous study shows that the use of developer IRC meetings is increasing in recent years. In this paper, we perform a study on the IRC meetings of two large open source projects: the GTK+ and Evolution projects. We explore three dimensions: who participates in the meetings, what do they discuss and how do they run the meetings. We find (1) that a small and stable number of the participants contribute the majority of messages in meetings, (2) that there are commonly discussed topics as well as project specific topics (3) that meeting styles vary across different projects.}, keywords = {evolution, gtk, irc}, isbn = {978-1-4244-4897-5}, doi = {10.1109/ICSM.2009.5306333}, url = {http://sail.cs.queensu.ca/publications/pubs/icsm2009_shihab.pdf}, author = {Shihab, Emad and Zhen Ming Jiang and Hassan, Ahmed E.} } @article {1238, title = {Teaching Software Engineering with Free/Libre Open Source Projects}, journal = {International Journal of Open Source Software and Processes}, volume = {1}, year = {2009}, month = {31/2009}, pages = {72 - 90}, abstract = {One of the major problems in software engineering education is the involvement of students in real world software projects. Industry projects are a solution, but in many cases they are hard to find and student participation can be problematic due to cultural, familiarization and other practical reasons. The abundance of Free / Libre Open Source Software (FLOSS) projects is a neat solution, offering multi-lingual, multicultural environments in virtually every application domain, at different levels of project size, maturity, organization etc. The paper analyzes how acquisition of practical experience on several basic and advanced software engineering topics can be achieved by working in a FLOSS project. The kind of skills that can be acquired are those requested by the Overview Report for Computing Curricula by ACM and topics examined are those of the Software Engineering Body of Knowledge, by IEEE. Also software engineering areas that require special care or that may not prove suitable for such treatment are identified. Various isolated teaching cases pertaining to this approach are presented and discussed.}, keywords = {curriculum, education, teaching, undergraduate}, issn = {1942-3934}, doi = {10.4018/jossp.2009010105}, author = {Ioannis Stamelos} } @conference {926, title = {Tracking concept drift of software projects using defect prediction quality}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {51 - 60}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Defect prediction is an important task in the mining of software repositories, but the quality of predictions varies strongly within and across software projects. In this paper we investigate the reasons why the prediction quality is so fluctuating due to the altering nature of the bug (or defect) fixing process. Therefore, we adopt the notion of a concept drift, which denotes that the defect prediction model has become unsuitable as set of influencing features has changed - usually due to a change in the underlying bug generation process (i.e., the concept). We explore four open source projects (Eclipse, OpenOffice, Netbeans and Mozilla) and construct file-level and project-level features for each of them from their respective CVS and Bugzilla repositories. We then use this data to build defect prediction models and visualize the prediction quality along the time axis. These visualizations allow us to identify concept drifts and - as a consequence - phases of stability and instability expressed in the level of defect prediction quality. Further, we identify those project features, which are influencing the defect prediction quality using both a tree induction-algorithm and a linear regression model. Our experiments uncover that software systems are subject to considerable concept drifts in their evolution history. Specifically, we observe that the change in number of authors editing a file and the number of defects fixed by them contribute to a project{\textquoteright}s concept drift and therefore influence the defect prediction quality. Our findings suggest that project managers using defect prediction models for decision making should be aware of the actual phase of stability or instability due to a potential concept drift.}, keywords = {bugzilla, cvs, defect prediction, eclipse, mozilla, netbeans, openoffice}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069480}, attachments = {https://flosshub.org/sites/flosshub.org/files/51MSR2009_0111_Ekanayake_Jayalath.pdf}, author = {Ekanayake, Jayalath and Tappolet, Jonas and Gall, Harald C. and Bernstein, Abraham} } @conference {597, title = {Using FLOSS Project Metadata in the Undergraduate Classroom}, booktitle = {OSS2009: Open Source Ecosystems: Diverse Communities Interacting (IFIP 2.13)}, series = {IFIP Advances in Information and Communication Technology }, volume = {299/2009}, year = {2009}, month = {2009///}, pages = {330 - 339}, publisher = {Springer}, organization = {Springer}, chapter = {29}, abstract = {This paper describes our efforts to use the large amounts of data available from public repositories of free, libre, and open source software (FLOSS) in our undergraduate classrooms to teach concepts that would have previously been taught using other types of data from other sources. }, keywords = {artificial intelligence, database, education, teaching, undergraduate, undergraduate research}, issn = {978-3-642-02031-5}, doi = {http://dx.doi.org/10.1007/978-3-642-02032-2_29}, attachments = {https://flosshub.org/sites/flosshub.org/files/Using\%20FLOSS\%20Project\%20Metadata.pdf}, author = {Squire, Megan and Duvall, Shannon} } @article {10.1109/HICSS.2009.1014, title = {Using Software Archaeology to Measure Knowledge Loss in Software Projects Due to Developer Turnover}, journal = {2009 42nd Hawaii International Conference on System Sciences (HICSS 2009)}, year = {2009}, pages = {1-10}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, abstract = {Developer turnover can result in a major problem when developing software. When senior developers abandon a software project, they leave a knowledge gap that has to be managed. In addition, new (junior) developers require some time in order to achieve the desired level of productivity. In this paper, we present a methodology to measure the effect of knowledge loss due to developer turnover in software projects. For a given software project, we measure the quantity of code that has been authored by developers that do not belong to the current development team, which we define as orphaned code. Besides, we study how orphaned code is managed by the project. Our methodology is based on the concept of software archaeology, a derivation of software evolution. As case studies we have selected four FLOSS (free, libre, open source software) projects, from purely driven by volunteers to company-supported. The application of our methodology to these case studies will give insight into the turnover that these projects suffer and how they have managed it and shows that this methodology is worth being augmented in future research.}, keywords = {attrition, case study, developers, evince, evolution, gimp, growth, knowledge collaboration, lines of code, nautilus, quality, sloc, turnover}, isbn = {978-0-7695-3450-3}, doi = {http://doi.ieeecomputersociety.org/10.1109/HICSS.2009.1014}, attachments = {https://flosshub.org/sites/flosshub.org/files/07-07-08.pdf}, author = {Izquierdo-Cortazar, Daniel and Gregorio Robles and Ortega, Felipe and Jesus M. Gonzalez-Barahona} } @article {Xu2009151, title = {Volunteers{\textquoteright} involvement in online community based software development}, journal = {Information \& Management}, volume = {46}, number = {3}, year = {2009}, note = {"Data were collected through an online survey and by searching project archives. On Sourceforge.net, each developer was uniquely identified with a user account, and the developer{\textquoteright}s performance was assessed through the number of function points accepted by the project in a certain time period, obtained through conversion and calculation from the project{\textquoteright}s code repository. Data for other constructs were obtained from the developers{\textquoteright} response to the online survey. " "a developer{\textquoteright}s performance was measured by the number of function points made and accepted into the project during the observed time period." project age, development status, license type, number of developers}, pages = {151 - 158}, abstract = {We sought to gain understanding of voluntary developers{\textquoteright} involvement in open source software (OSS) projects. Data were collected from voluntary developers working on open source projects. Our findings indicated that a voluntary developer{\textquoteright}s involvement was very important to his or her performance and that involvement was dependent on individual motivations (personal software needs, reputation and skills gaining expectation, enjoyment in open source coding) and project community factors (leadership effectiveness, interpersonal relationship, community ideology). Our work contributes theoretically and empirically to the body of OSS research and has practical implications for OSS project management.}, keywords = {age, developers, effectiveness, function points, ideology, leadership, MOTIVATION, scm, sourceforge, status, Survey, team size, Volunteers}, issn = {0378-7206}, doi = {DOI: 10.1016/j.im.2008.12.005}, url = {http://www.sciencedirect.com/science/article/B6VD0-4VP1CN0-1/2/8e1c7be4fcedd1419209c5c843ffa923}, author = {Bo Xu and Donald R. Jones and Bingjia Shao} } @conference {1205, title = {Weaving~a~Semantic~Web~across~OSS~repositories: a~spotlight~on~bts-link,~UDD,~SWIM}, booktitle = {4th Workshop on Public Data about Software Development (WoPDaSD 2009)}, year = {2009}, note = {position paper; non-experimental}, abstract = {Several public repositories and archives of facts about libre software projects, developed either by open source communities or by research communities, have been flourishing over the Web in the recent years. These enable new analysis and support new quality assurance tasks. By using Semantic Web techniques, the databases containing data about open-source software projects development can be interconnected, hence letting OSS partakers identify resources, annotate them and further interlink them using dedicated properties, collectively designing a distributed semantic graph. Such links expressed with standard Semantic techniques are paving the way to new applications (including ones meant for {\textquotedblleft}end-users{\textquotedblright}). For instance this may have an impact on the way research efforts are conducted (less fragmented), and could also be used by development communities to improve Quality Assurance tasks. A goal of the research conducted within the HELIOS project, is to address bugtracker synchronization issues. For that, the potential of using Semantic Web technologies in navigating between many different bugtracker systems scattered all over the open source ecosystem is being investigated. This position paper presents some existing tools, projects and models proposed by OSS actors that are complementary to research initiatives, and that are likely to lead to useful future developments: UDD (Ultimate Debian Database) and bts-link, developed by the Debian community, and SWIM (Semantic Web enabled Issue Manager) developed by Mandriva. The HELIOS team welcomes comments on the future paths that can be considered in using the Semantic Web approach for improving these projects. }, keywords = {bts-link, bug tracker, bugzilla, debian, ecosystem, helios, mandriva, semantic Web, swim, udd}, attachments = {https://flosshub.org/sites/flosshub.org/files/HELIOS-WOPDASD-improved-Olivier.pdf}, author = {Olivier Berger and Valentin Vlasceanu and Christian Bac and Lauri{\`e}re, St{\'e}phane} } @conference {939, title = {On what basis to recommend: Changesets or interactions?}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {155 - 158}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Different flavours of recommendation systems have been proposed to help software developers perform software evolution tasks. A number of these recommendation systems are based on changesets. When changeset information is used, recommendations are based on only the end result of the activity undertaken to complete a task. In this paper, we report on an investigation that compared how recommendations based on changesets compare to recommendations based on interactions collected as a programmer performed the task that resulted in a changeset. To provide a common basis for the comparison, our investigation considered how bug reports considered similar based on changeset information compare to bug reports considered similar based on interaction information. We found that there is no direct relationship between the bug reports found similar with the different methods, suggesting that each comparison methods captures a different aspect of the problem.}, keywords = {bug reports, changeset, eclipse, mylyn}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069494}, author = {Rastkar, Sarah and Murphy, Gail C.} } @article {1338, title = {What Makes Free/Libre Open Source Software (FLOSS) Projects Successful? An Agent-Based Model of FLOSS Projects}, journal = {International Journal of Open Source Software and Processes}, volume = {1}, year = {2009}, pages = {1 - 13}, abstract = {The last few years have seen a rapid increase in the number of Free/Libre Open Source Software (FLOSS) projects. Some of these projects, such as Linux and the Apache web server, have become phenomenally successful. However, for every successful FLOSS project there are dozens of FLOSS projects which never succeed. These projects fail to attract developers and/or consumers and, as a result, never get off the ground. The aim of this research is to better understand why some FLOSS projects flourish while others wither and die. This article presents a simple agent-based model that is calibrated on key patterns of data from SourceForge, the largest online site hosting open source projects. The calibrated model provides insight into the conditions necessary for FLOSS success and might be used for scenario analysis of future developments of FLOSS.}, keywords = {Agent-Based Model, Emergent Properties, FLOSS, open source, Prediction Success, Simulation}, issn = {1942-3934}, doi = {10.4018/jossp.2009040101}, url = {http://www.public.asu.edu/~majansse/pubs/ijossp09.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/ijossp09.pdf}, author = {Radtke, Nicholas P. and Janssen, Marco A. and Collofello, James S.} } @conference {534, title = {Analysis of Coordination Between Developers and Users in the Apache Community}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {81 - 92}, publisher = {Springer}, organization = {Springer}, chapter = {7}, abstract = {Coordination is one of the keys for the success of open source software (OSS) communities because geographically distributed members need to collaborate on their work using communication tools (e.g., mailing lists, bulletin board systems, bug tracking systems, and so on). In this paper, we investigated the informal social structure among developers and users by analyzing two mailing lists of developers and users in the Apache community based on betweenness centrality, one centrality measure proposed by Freeman. From the analysis results, we found that (1) participants with high betweenness coordinated activities between developers and users and (2) some participants have been functioning as coordinators in the community for a long time. }, keywords = {apache, email, mailing list}, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_7}, attachments = {https://flosshub.org/sites/flosshub.org/files/Analysis\%20of\%20Coordination.pdf}, author = {Kamei, Yasutaka and Matsumoto, Shinsuke and Maeshima, Hirotaka and Onishi, Yoji and Ohira, Masao and Matsumoto, Ken-ichi} } @conference {Wermelinger:2008:AEE:1370750.1370783, title = {Analyzing the evolution of eclipse plugins}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, pages = {133{\textendash}136}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Eclipse is a good example of a modern component-based complex system that is designed for long-term evolution, due to its architecture of reusable and extensible components. This paper presents our preliminary results about the evolution of Eclipse{\textquoteright}s architecture, based on a lightweight and scalable analysis of the metadata in Eclipse{\textquoteright}s sources. We find that the development of Eclipse follows a systematic process: most architectural changes take place in milestones, and maintenance releases only make exceptional changes to component dependencies. We also found a stable architectural core that remains since the first release.}, keywords = {architectural evolution, cvs, eclipse, metadata, msr challenge, releases, source code}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370783}, url = {http://doi.acm.org/10.1145/1370750.1370783}, author = {Wermelinger, Michel and Yu, Yijun} } @conference {Wang:2008:ADD:1368088.1368151, title = {An approach to detecting duplicate bug reports using natural language and execution information}, booktitle = {Proceedings of the 30th international conference on Software engineering}, series = {ICSE {\textquoteright}08}, year = {2008}, pages = {461{\textendash}470}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {An open source project typically maintains an open bug repository so that bug reports from all over the world can be gathered. When a new bug report is submitted to the repository, a person, called a triager, examines whether it is a duplicate of an existing bug report. If it is, the triager marks it as DUPLICATE and the bug report is removed from consideration for further work. In the literature, there are approaches exploiting only natural language information to detect duplicate bug reports. In this paper we present a new approach that further involves execution information. In our approach, when a new bug report arrives, its natural language information and execution information are compared with those of the existing bug reports. Then, a small number of existing bug reports are suggested to the triager as the most similar bug reports to the new bug report. Finally, the triager examines the suggested bug reports to determine whether the new bug report duplicates an existing bug report. We calibrated our approach on a subset of the Eclipse bug repository and evaluated our approach on a subset of the Firefox bug repository. The experimental results show that our approach can detect 67\%-93\% of duplicate bug reports in the Firefox bug repository, compared to 43\%-72\% using natural language information alone.}, keywords = {bug report, duplicate bug report, execution information, information retrieval, natural language}, isbn = {978-1-60558-079-1}, doi = {10.1145/1368088.1368151}, url = {http://doi.acm.org/10.1145/1368088.1368151}, author = {Wang, Xiaoyin and Zhang, Lu and Xie, Tao and Anvik, John and Sun, Jiasu} } @conference {1211, title = {Are FLOSS developers committing to CVS/SVN as much as they are talking in mailing lists? Challenges for Integrating data from Multiple Repositories}, booktitle = {3rd Workshop on Public Data about Software Development (WoPDaSD 2008)}, year = {2008}, month = {09/2008}, pages = {49-54}, abstract = {This paper puts forward a framework for investigating Free and Open Source Software (F/OSS) developers activities in both source code and mailing lists repositories. We used data dumps of fourteen pro jects from the FLOSSMetrics (FM) retrieval system. Our intentions are (i) to present a possible methodology, its advantages and disadvantages which can benefit future researchers using some aspects of the FM retrieval system{\textquoteright}s data dumps, and (ii) discuss our initial research results on the contributions developers make to both coding and lists activities.}, keywords = {cvs, cvsanaly, developers, email, email archives, flossmetrics, mailing list, mlstats, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/49-542008.pdf}, author = {Sowe, Sulayman K. and Samoladas, Ioannis and Ioannis Stamelos and Lefteris Angelis} } @conference {1210, title = {Author Entropy: A Metric for Characterization of Software Authorship Patterns}, booktitle = {3rd Workshop on Public Data about Software Development (WoPDaSD 2008)}, year = {2008}, note = {used flossmole to get sample of SF developers}, month = {2008}, pages = {42-47}, abstract = {We propose the concept of author entropy and describe how file-level entropy measures may be used to understand and characterize authorship patterns within individual files, as well as across an entire project. As a proof of concept, we compute author entropy for 28,955 files from 33 open-source projects. We explore patterns of author entropy, identify techniques for visualizing author entropy, and propose avenues for further study. }, keywords = {developers, entropy, flossmole, sourceforge}, attachments = {https://flosshub.org/sites/flosshub.org/files/entropy2008.pdf}, author = {Taylor, Quinn C. and Stevenson, James E. and Delorey, Daniel P. and Knutson, Charles D.} } @article {Crowston:2008, title = {Bug Fixing Practices within Free/Libre Open Source Software Development Teams}, journal = {Journal of Database Management}, volume = {19}, number = {2}, year = {2008}, note = {"Projects to be studied were selected from those hosted on SourceForge, (http://sourceforge.net/)" "we chose projects for which data we need for our analysis are publicly available, meaning a large number of bug reports" "we chose teams with more than 8 developers" "Only 140 projects of SourceForge met the first two requirements in 2002 when we drew our sample" kicq, gaim, phpmyadmin, dynapi "First, we obtained data indica- tive of the effectiveness of each project, such as its level of activity, number of downloads and development status" "we elected to use objective data about the bug- fixing process. Hence, the main source of data about the bug-fixing process was obtained from the archives of the bug tracking system, which is the tool used to support the bug-fixing process"}, pages = {1{\textendash}30}, abstract = {Free/libre open source software (FLOSS, e.g., Linux or Apache) is primarily developed by distributed teams. Developers contribute from around the world and coordinate their activity almost exclusively by means of email and bulletin boards, yet some how profit from the advantages and evade the challenges of distributed software development. In this article we investigate the structure and the coordination practices adopted by development teams during the bug-fixing process, which is considered one of main areas of FLOSS project success. In particular, based on a codification of the messages recorded in the bug tracking system of four projects, we identify the accomplished tasks, the adopted coordination mechanisms, and the role undertaken by both the FLOSS development team and the FLOSS community. We conclude with suggestions for further research.}, keywords = {activity, bug tracker, bug tracking system, coordination, downloads, dynapi, effectiveness, FLOSS, gaim, kicq, phpmyadmin, project success, size, status}, issn = {1063-8016}, attachments = {https://flosshub.org/sites/flosshub.org/files/CrowstonScozziJDBM2008.pdf}, author = {Kevin Crowston and Barbara Scozzi} } @conference {1206, title = {Collecting data from distributed FOSS projects}, booktitle = {3rd Workshop on Public Data about Software Development (WoPDaSD 2008)}, year = {2008}, note = {"We selected three projects from the initial set of projects: Linux 2.6, an operating system kernel, gimp, a graphics program, and Blender, a 3d content creation suite." "To acquire data from each data source, we wrote special programs based on the earlier prototypes....The first program extracts information from mailing list archives....The second program obtains bug reports from bug tracking systems....The third program obtains source code from network-accessible version control systems and runs metric calculations on it."}, month = {2009}, pages = {8-13}, abstract = {A key trait of Free and Open Source Software (foss) development is its distributed nature. Nevertheless, two project-level operations, the fork and the merge of program code, are among the least well understood events in the lifespan of a foss project. Some projects have explicitly adopted these operations as the primary means of concurrent development. In this study, we examine the effect of highly distributed software development, as found in the Linux kernel project, on collection and modelling of software development data. We find that distributed development calls for sophisticated temporal modelling techniques where several versions of the source code tree can exist at once. Attention must be turned towards the methods of quality assurance and peer review that projects employ to manage these parallel source trees. Our analysis indicates that two new metrics, fork rate and merge rate, could be useful for determining the role of distributed version control systems in foss projects. The study presents a preliminary data set consisting of version control and mailing list data. }, keywords = {bitkeeper, bug tracking system, cvs, distributed, email archive, fork rate, git, life cycle, linux, linux kernel, mailing list, merge rate, subversion, svn, version control}, attachments = {https://flosshub.org/sites/flosshub.org/files/fagerholm.pdf}, author = {Fagerholm, Fabian and Taina, Juha} } @conference {970, title = {Determinism and evolution}, booktitle = {Proceedings of the 2008 international workshop on Mining software repositories - MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {1-9}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {It has been proposed that software evolution follows a Self-Organized Criticality (SOC) dynamics. This fact is supported by the presence of long range correlations in the time series of the number of changes made to the source code over time. Those long range correlations imply that the current state of the project was determined time ago. In other words, the evolution of the software project is governed by a sort of determinism. But this idea seems to contradict intuition. To explore this apparent contradiction, we have performed an empirical study on a sample of 3,821 libre (free, open source) software projects, finding that their evolution projects is short range correlated. This suggests that the dynamics of software evolution may not be SOC, and therefore that the past of a project does not determine its future except for relatively short periods of time, at least for libre software.}, keywords = {changes, evolution, source code, sourceforge}, isbn = {9781605580241}, doi = {10.1145/1370750.1370752}, attachments = {https://flosshub.org/sites/flosshub.org/files/p1-herraiz.pdf}, author = {Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Gregorio Robles and Herraiz, Israel} } @article {Koch2008345, title = {Effort modeling and programmer participation in open source software projects}, journal = {Information Economics and Policy (Empirical Issues in Open Source Software)}, volume = {20}, number = {4}, year = {2008}, note = {"Using a two-step approach, first a detailed case study on one project, GNOME, will be undertaken, then a large data set retrieved from a project hosting site, SourceForge.net, will be used to validate the results." CVS was the main source of data "e-mails sent to the different project discussion lists were identified as an additional source of information especially on communication and coordination besides the CVS-repository" basic counts were calculated for developer discussion levels}, month = {12/2008}, pages = {345 - 355}, abstract = {This paper develops models for programmer participation and effort estimation in open source software projects and employs the results to assess the efficiency of open source software creation. Successful development of such models will be important for decision makers of various kinds. We propose hypotheses based on a prior case study on manpower function and effort modeling. A large data set retrieved from a project repository is used to test these hypotheses. The main results are that if Norden-Rayleigh-based approaches are used, they need to be complemented in order to account for the addition of new features during a product life cycle, and that programmer-participation based effort models result in distinctly lower estimations of effort than those based on output metrics, such as lines of code.}, keywords = {cvs, developers, email, email archives, gnome, lines of code, scm, Software repository mining, source code, sourceforge}, issn = {0167-6245}, doi = {DOI: 10.1016/j.infoecopol.2008.06.004}, url = {http://www.sciencedirect.com/science/article/B6V8J-4SSND1J-1/2/c857fa1493e19aa7fe4297dedb077b3a}, attachments = {https://flosshub.org/sites/flosshub.org/files/KochEffortModeling.pdf}, author = {Koch, Stefan} } @article {1100, title = {An Empirical Study on the Relationship Between Software Design Quality, Development Effort and Governance in Open Source Projects}, journal = {IEEE Transactions on Software Engineering}, volume = {34}, year = {2008}, note = {"empirical data from a sample of 75 major OS projects" no PDF to confirm [ms]}, month = {11/2008}, pages = {765 - 782}, abstract = {The relationship among software design quality, development effort, and governance practices is a traditional research problem. However, the extent to which consolidated results on this relationship remain valid for open source (OS) projects is an open research problem. An emerging body of literature contrasts the view of open source as an alternative to proprietary software and explains that there exists a continuum between closed and open source projects. This paper hypothesizes that as projects approach the OS end of the continuum, governance becomes less formal. In turn a less formal governance is hypothesized to require a higher-quality code as a means to facilitate coordination among developers by making the structure of code explicit and facilitate quality by removing the pressure of deadlines from contributors. However, a less formal governance is also hypothesized to increase development effort due to a more cumbersome coordination overhead. The verification of research hypotheses is based on empirical data from a sample of 75 major OS projects. Empirical evidence supports our hypotheses and suggests that software quality, mainly measured as coupling and inheritance, does not increase development effort, but represents an important managerial variable to implement the more open governance approach that characterizes OS projects which, in turn, increases development effort.}, keywords = {effort estimation, governance, quality, source code}, issn = {0098-5589}, doi = {10.1109/TSE.2008.68}, author = {Capra, E. and Francalanci, C. and Merlo, F.} } @article {1091, title = {Evaluating the performance of open source software projects using data envelopment analysis}, journal = {Information Management \& Computer Security}, volume = {16}, year = {2008}, note = {"The data set used for this research consists of 34 OSS software projects listed on sourceforge.net." "only highly ranked projects in the security domain were considered." "Ultimately data were collected on the 34 highest ranked security-based OSS software projects on Sourcefore.net."[sic] fields: bug (?), developers, rank, downloads, Kperdownload (?)}, month = {2008}, pages = {449 - 462}, abstract = { Purpose {\textendash} The purpose of this paper is to develop and test a model of the relative performance of open source software (OSS) projects. Design/methodology/approach {\textendash} This paper evaluates the relative performance of OSS projects by evaluating multiple project inputs and multiple project outputs by using a data envelopment analysis (DEA) model. The DEA model produces an efficiency score for each project based on project inputs and outputs. The method of producing an efficiency score is based on the convex envelopment technology structure. The efficiency measure quantifies a {\textquotedblleft}distance{\textquotedblright} to an efficient frontier. Findings {\textendash} The DEA model produced an index of corresponding intensities linking an inefficient project to its benchmark efficient project(s). The inefficiency measures produced an ordering of inefficient projects. Eight projects were found to be {\textquotedblleft}efficient{\textquotedblright} and used as benchmarking projects. Research limitations/implications {\textendash} This research is limited to only security-based OSS projects. Future research on other areas of OSS projects is warranted. Practical implications {\textendash} The result of this research is a practical model that can be used by OSS project developers to evaluate the relative performance of their projects and make resource decisions. Originality/value {\textendash} This research extends the work of previous studies that have examined the relative performance of software development projects in a traditional development environment. As a result of this research, OSS projects can now be adequately benchmarked and evaluated according to project performance. An OSS project manger can effectively use these results to critically evaluate resources for their project and judge the relative efficiency of the resources.}, keywords = {dea, efficiency, Project performance, sourceforge}, issn = {0968-5227}, doi = {10.1108/09685220810920530}, author = {Wray, Barry and Mathieu, Richard and Teets, J.} } @conference {SGKL09, title = {Evaluating the Quality of Open Source Software}, booktitle = {Electronic Notes in Theoretical Computer Science}, volume = {233}, year = {2008}, note = {"the software source code and the associated data stored in the version control system, the bug tracking databases, the mailing lists, and the wikis allow us to evaluate quality in a transparent way" "The data collection system collects the raw data from open source projects" Mailing lists are measured in: Number of unique subscribers, Number of messages in user/support list per month, Number of messages in developers list per month, Average thread depth}, month = {03/2009}, pages = {5{\textendash}28}, publisher = {The Reengineering Forum}, organization = {The Reengineering Forum}, abstract = {Traditionally, research on quality attributes was either kept under wraps within the organization that performed it, or carried out by outsiders using narrow, black-box techniques. The emergence of open source software has changed this picture allowing us to evaluate both software products and the processes that yield them. Thus, the software source code and the associated data stored in the version control system, the bug tracking databases, the mailing lists, and the wikis allow us to evaluate quality in a transparent way. Even better, the large number of (often competing) open source projects makes it possible to contrast the quality of comparable systems serving the same domain. Furthermore, by combining historical source code snapshots with significant events, such as bug discoveries and fixes, we can further dig into the causes and effects of problems. Here we present motivating examples, tools, and techniques that can be used to evaluate the quality of open source (and by extension also proprietary) software. }, keywords = {bug tracking system, email, email archives, mailing list, metrics, open source, process quality attributes, product quality attributes, source code, SQO-OSS, wiki}, doi = {10.1016/j.entcs.2009.02.058}, url = {http://www.dmst.aueb.gr/dds/pubs/conf/2008-SQM-SQOOSS/html/SGKL09.html}, attachments = {https://flosshub.org/sites/flosshub.org/files/entcs-sqooss.pdf}, author = {Diomidis Spinellis and Gousios, Georgios and Vassilios Karakoidas and Panagiotis Louridas and Paul J. Adams and Samoladas, Ioannis and Ioannis Stamelos} } @conference {Alonso:2008:EIV:1370750.1370780, title = {Expertise identification and visualization from CVS}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {125{\textendash}128}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {As software evolves over time, the identification of expertise becomes an important problem. Component ownership and team awareness of such ownership are signals of solid project. Ownership and ownership awareness are also issues in open-source software (OSS) projects. Indeed, the membership in OSS projects is dynamic with team members arriving and leaving. In large open source projects, specialists who know the system very well are considered experts. How can one identify the experts in a project by mining a particular repository like the source code? Have they gotten help from other people? We provide an approach using classification of the source code tree as a path to derive the expertise of the committers. Because committers may get help from other people, we also retrieve their contributors. We also provide a visualization that helps to further explore the repository via committers and categories. We present a prototype implementation that describes our research using the Apache HTTP Web server project as a case study.}, keywords = {apache, classification, committers, components, contributors, expertise, expertise identification, repository, scm, source code}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370780}, url = {http://doi.acm.org/10.1145/1370750.1370780}, attachments = {https://flosshub.org/sites/flosshub.org/files/p125-alonso.pdf}, author = {Alonso, Omar and Premkumar T. Devanbu and Gertz, Michael} } @conference {Hata:2008:EFF:1370750.1370772, title = {An extension of fault-prone filtering using precise training and a dynamic threshold}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, pages = {89{\textendash}98}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Fault-prone module detection in source code is important for assurance of software quality. Most previous fault-prone detection approaches have been based on software metrics. Such approaches, however, have difficulties in collecting the metrics and in constructing mathematical models based on the metrics. To mitigate such difficulties, we have proposed a novel approach for detecting fault-prone modules using a spam-filtering technique, named Fault-Prone Filtering. In our approach, fault-prone modules are detected in such a way that the source code modules are considered as text files and are applied to the spam filter directly. In practice, we use the training only errors procedure and apply this procedure to fault-prone. Since no pre-training is required, this procedure can be applied to an actual development field immediately. This paper describes an extension of the training only errors procedures. We introduce a precise unit of training, "modified lines of code," instead of methods. In addition, we introduce the dynamic threshold for classification. The result of the experiment shows that our extension leads to twice the precision with about the same recall, and improves 15\% on the best F1 measurement.}, keywords = {eclipse, fault-prone modules, spam filter, text mining}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370772}, url = {http://doi.acm.org/10.1145/1370750.1370772}, attachments = {https://flosshub.org/sites/flosshub.org/files/p89-hata.pdf}, author = {Hata, Hideaki and Mizuno, Osamu and Kikuno, Tohru} } @conference {972, title = {Extracting structural information from bug reports}, booktitle = {Proceedings of the 2008 international workshop on Mining software repositories - MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {27-30}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {In software engineering experiments, the description of bug reports is typically treated as natural language text, although it often contains stack traces, source code, and patches. Neglecting such structural elements is a loss of valuable information; structure usually leads to a better performance of machine learning approaches. In this paper, we present a tool called infoZilla that detects structural elements from bug reports with near perfect accuracy and allows us to extract them. We anticipate that infoZilla can be used to leverage data from bug reports at a different granularity level that can facilitate interesting research in the future.}, keywords = {bug reports, eclipse, enumerations, infozilla, natural language, patches, source code, stack trace}, isbn = {9781605580241}, doi = {10.1145/1370750.1370757}, attachments = {https://flosshub.org/sites/flosshub.org/files/p27-bettenburg.pdf}, author = {Premraj, Rahul and Zimmermann, Thomas and Kim, Sunghun and Bettenburg, Nicolas} } @article {GonzalezBarahona2008356, title = {Geographic origin of libre software developers}, journal = {Information Economics and Policy}, volume = {20}, number = {4}, year = {2008}, note = {Empirical Issues in Open Source Software}, pages = {356 - 363}, abstract = {This paper examines the claim that libre (free, open source) software involves global development. The anecdotal evidence is that developers usually work in teams including individuals residing in many different geographical areas, time zones and even continents and that, as a whole, the libre software community is also diverse in terms of national origin. However, its exact composition is difficult to capture, since there are few records of the geographical location of developers. Past studies have been based on surveying a limited (and sometimes biased) sample and extrapolating that sample to the global distribution of developers. In this paper we present an alternate approach in which databases are analyzed to create traces of information from which the geographical origin of developers can be inferred. Applying this technique to the SourceForge users database and the mailing lists archives from several large projects, we have estimated the geographical origin of more than one million individuals who are closely related to the libre software development process. The paper concludes that the result is a good proxy for the actual distribution of libre software developers working on global projects.}, keywords = {developers, email, email address, email archives, geography, mailing list, open source software, sourceforge, timezone, users}, issn = {0167-6245}, doi = {DOI: 10.1016/j.infoecopol.2008.07.001}, url = {http://www.sciencedirect.com/science/article/B6V8J-4T3DCPK-1/2/3981dfbc523eae1d1ce65fb1f0c0edb7}, author = {Jesus M. Gonzalez-Barahona and Gregorio Robles and Roberto Andradas-Izquierdo and Rishab Aiyer Ghosh} } @booklet {481, title = {How Do Firms Make Use of Open Source Communities?}, howpublished = {Long Range Planning}, volume = {41}, number = {6}, year = {2008}, note = {"To obtain more information about the firms{\textquoteright} relationships with their respective communities, we followed mailing lists and forums over a three-month period, checking them at least three times a week. " "Firm data. Secondary sources, including annual reports, company directories, business and specialist press and homepages, were used to gather information on firms"}, month = {Dec}, pages = {629-649}, abstract = {Relying on four in-depth case studies of firms involved with open source software, we investigate how firms make use of open source communities, and how that use is associated with their business models. Three themes - accessing, aligning and assimilating -are inductively developed for how the firms relate to the external knowledge created in the communities. For each theme, we make an argument about the tactics associated with each theme and their positive and negative consequences. The findings are related to the literature on the open and distributed nature of innovation, and various theoretical and managerial implications are discussed.}, keywords = {case study, cendio, email, mailing list, mysql, roxen, secondary data, sot}, url = {http://www.acm.jhu.edu/~paulproteus/tmp/sdarticle.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/dahlandermagnusson2008.pdf}, author = {Linus Dahlander and M Magnusson} } @conference {Zhang:2008:ISG:1370750.1370785, title = {An initial study of the growth of eclipse defects}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, pages = {141{\textendash}144}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {We analyze the Eclipse defect data from June 2004 to November 2007, and find that the growth of the number of defects can be well modeled by polynomial functions. Furthermore, we can predict the number of future Eclipse defects based on the nature of defect growth.}, keywords = {bug reports, defect growth model, defect prediction, eclipse, msr challenge, polynomial regression}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370785}, url = {http://doi.acm.org/10.1145/1370750.1370785}, author = {Zhang, Hongyu} } @article {499, title = {The Material and Social Dynamics of Motivation: Contributions to Open Source Language Technology Development}, journal = {Science Studies}, number = {2}, year = {2008}, note = {"I conducted a two-phased qualitative analysis on multiple data (thematic phone-inter- views, notes from OpenOffice.org conference 2004 meetings, historical documents, mailing list discussions, homepages, Google, blogs, real-time video and audio presentations from OpenOffice.org conference 2005)."}, abstract = {Volunteer motivation has been a central theme in Free/Libre/Open Source Software (FLOSS) literature. This research has been largely dominated by economists who rely in their surveys on the distinction between intrinsic and extrinsic motivations and the "hacker ethic" for profit juxtaposition. The paper argues that survey-based analytical frameworks and research designs have led to a focus on some motivational attributions at the expense of others. It then presents a case study that explores dynamic, non individualistic and content-sensitive aspects of motivations. The approach is based on socio-cultural psychology and the author{\textquoteright}s observations of a hybrid firm-community FLOSS project, OpenOffice.org. Instead of separating intrinsic motivations from extrinsic ones, it is argued that complex and changing patterns of motivations are tied to changing objects and personal histories prior to and during participation. The boundary between work and hobby in an individual{\textquoteright}s participation path is blurred and shifting.}, keywords = {contributions, developers, email, email archives, mailing list, MOTIVATION, openoffice, openoffice.org, secondary data, Volunteers}, attachments = {https://flosshub.org/sites/flosshub.org/files/Freeman.pdf}, author = {Stephanie Freeman} } @conference {Schuler:2008:MUE:1370750.1370779, title = {Mining usage expertise from version archives}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {121{\textendash}124}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In software development, there is an increasing need to find and connect developers with relevant expertise. Existing expertise recommendation systems are mostly based on variations of the Line 10 Rule: developers who changed a file most often have the most implementation expertise. In this paper, we introduce the concept of usage expertise, which manifests itself whenever developers are using functionality, e.g., by calling API methods. We present preliminary results for the ECLIPSE project that demonstrate that our technique allows to recommend experts for files with no or little history, identify developers with similar expertise, and measure the usage of API methods.}, keywords = {api, computer-supported cooperative work, eclipse, expertise, recommendation, scm, software repository, source code}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370779}, url = {http://doi.acm.org/10.1145/1370750.1370779}, attachments = {https://flosshub.org/sites/flosshub.org/files/p121-schuler.pdf}, author = {Schuler, David and Zimmermann, Thomas} } @conference {Holmes:2008:NGE:1370750.1370787, title = {A newbie{\textquoteright}s guide to eclipse APIs}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, pages = {149{\textendash}152}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Eclipse has evolved from a fledgling Java IDE into a mature software ecosystem. One of the greatest benefits Eclipse provides developers is flexibility; however, this is not without cost. New Eclipse developers often find the framework to be large and confusing. Determining which parts of the framework they should be using can be a difficult task as Eclipse documentation tends to be either very high-level, focusing on the design of the framework, or low-level, focusing on specific APIs. We have developed a tool called PopCon that provides a bridge between high-level design documentation and low-level API documentation by statically analyzing a framework and several of its clients and providing a ranked list of the relative popularity of its APIs. We have applied PopCon to the Eclipse framework for this challenge to help newbie Eclipse developers identify some of the most relevant APIs for their tasks.}, keywords = {API popularity, documentation, eclipse, mining software repositories, module, msr challenge, PopCon, popularity}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370787}, url = {http://doi.acm.org/10.1145/1370750.1370787}, author = {Holmes, Reid and Walker, Robert J.} } @conference {Rigby:2008:OSS:1368088.1368162, title = {Open source software peer review practices: a case study of the apache server}, booktitle = {Proceedings of the 30th International Conference on Software Engineering (ICSE 2008)}, series = {ICSE {\textquoteright}08}, year = {2008}, note = {http://faculty.salisbury.edu/~xswang/Research/Papers/SERelated/testing/p541-rigby.pdf}, pages = {541{\textendash}550}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Peer review is seen as an important quality assurance mechanism in both industrial development and the open source software (OSS) community. The techniques for performing inspections have been well studied in industry; in OSS development, peer reviews are less well understood. We examine the two peer review techniques used by the successful, mature Apache server project: review-then-commit and commit-then-review. Using archival records of email discussion and version control repositories, we construct a series of metrics that produces measures similar to those used in traditional inspection experiments. Specifically, we measure the frequency of review, the level of participation in reviews, the size of the artifact under review, the calendar time to perform a review, and the number of reviews that find defects. We provide a comparison of the two Apache review techniques as well as a comparison of Apache review to inspection in an industrial project. We conclude that Apache reviews can be described as (1) early, frequent reviews (2) of small, independent, complete contributions (3) conducted asynchronously by a potentially large, but actually small, group of self-selected experts (4) leading to an efficient and effective peer review technique.}, keywords = {apache, cvs, email, inspection, mining software repositories (email), open source software, peer review, version control}, isbn = {978-1-60558-079-1}, doi = {10.1145/1368088.1368162}, url = {http://doi.acm.org/10.1145/1368088.1368162}, attachments = {https://flosshub.org/sites/flosshub.org/files/p541-rigby.pdf}, author = {Peter C. Rigby and Daniel M. German and Storey, Margaret-Anne} } @conference {973, title = {On the relation of refactorings and software defect prediction}, booktitle = {Proceedings of the 2008 international workshop on Mining software repositories - MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {35-38}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {This paper analyzes the influence of evolution activities such as refactoring on software defects. In a case study of five open source projects we used attributes of software evolution to predict defects in time periods of six months. We use versioning and issue tracking systems to extract 110 data mining features, which are separated into refactoring and non-refactoring related features. These features are used as input into classification algorithms that create prediction models for software defects. We found out that refactoring related features as well as non-refactoring related features lead to high quality prediction models. Additionally, we discovered that refactorings and defects have an inverse correlation: The number of software defects decreases, if the number of refactorings increased in the preceding time period. As a result, refactoring should be a significant part of both bug fixes and other evolutionary changes to reduce software defects.}, keywords = {argouml, bug fixing, bug reports, defects, evolution, jboss, liferay, prediction, refactoring, spring, weka, xdoclet}, isbn = {9781605580241}, doi = {10.1145/1370750.1370759}, attachments = {https://flosshub.org/sites/flosshub.org/files/p35-ratzinger.pdf}, author = {Sigmund, Thomas and Gall, Harald C. and Ratzinger, Jacek} } @conference {flosswp392, title = {Replication of FLOSS Research as eResearch}, booktitle = {Oxford e-Research Conference 2008}, year = {2008}, month = {May}, abstract = {We are working to introduce the ideas of eResearch to a multi-disciplinary research domain: those researchers examining Free/Libre and Open Source Software (FLOSS) and its development (Howison, Wiggins, \& Crowston, 2008).The first phase of this work focused on building a repository for data on FLOSS teams, FLOSSmole (Howison, Conklin, \& Crowston, 2006), and collaborating with other nascent data repositories in the field. Recently we have begun a second phase, which is to introduce another established principle of eResearch, that of broader collaboration through shared workflows accessing these data repositories. To provide an example of the potential value of this principle, we are replicating seminal FLOSS papers using eResearch approaches. This paper describes research outcomes and lessons learned from translating published literature into eResearch workflows.}, keywords = {eResearch eScience}, attachments = {https://flosshub.org/sites/flosshub.org/files/Wiggins-FLOSS-eResearch.pdf}, author = {Andrea Wiggins and James Howison and Kevin Crowston} } @article {1392, title = {Self-organization process in open-source software: An empirical study}, journal = {Information and Software Technology}, volume = {50}, year = {2008}, month = {4/2008}, pages = {361 - 374}, abstract = {Software systems must continually evolve to adapt to new functional requirements or quality requirements to remain competitive in the marketplace. However, different software systems follow different strategies to evolve, affecting both the release plan and the quality of these systems. In this paper, software evolution is considered as a self-organization process and the difference between closed-source software and open-source software is discussed in terms of self-organization. In particular, an empirical study of the evolution of Linux from version 2.4.0 to version 2.6.13 is reported. The study shows how open-source software systems self-organize to adapt to functional requirements and quality requirements.}, keywords = {Empirical study;, evolution, linux, requirements, Self-organization, software evolution}, issn = {09505849}, doi = {10.1016/j.infsof.2007.02.018}, url = {http://www.sciencedirect.com/science/article/pii/S0950584907000225}, author = {Yu, Liguo} } @conference {WeiBgerber:2008:SPG:1370750.1370767, title = {Small patches get in!}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {67{\textendash}76}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {While there is a considerable amount of research on analyzing the change information stored in software repositories, only few researcher have looked at software changes contained in email archives in form of patches. In this paper we look at the email archives of two open source projects and answer questions like the following: How many emails contain patches? How long does it take for a patch to be accepted? Does the size of the patch influence its chances to be accepted or the duration until it gets accepted? Obviously, the answers to these questions can be helpful for the authors of patches, in particular because some of the answers are surprising.}, keywords = {case study, cvs, email, email archives, flac, mailing list, openafs, patch acceptance, patches, revision control, scm}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370767}, url = {http://doi.acm.org/10.1145/1370750.1370767}, attachments = {https://flosshub.org/sites/flosshub.org/files/p67-weissgerber.pdf}, author = {Wei{\ss}gerber, Peter and Neu, Daniel and Diehl, Stephan} } @conference {Siy:2008:SDW:1370750.1370784, title = {Summarizing developer work history using time series segmentation: challenge report}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {137{\textendash}140}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Temporal segmentation partitions time series data with the intent of producing more homogeneous segments. It is a technique used to preprocess data so that subsequent time series analysis on individual segments can detect trends that may not be evident when performing time series analysis on the entire dataset. This technique allows data miners to partition a large dataset without making any assumption of periodicity or any other a priori knowledge of the dataset{\textquoteright}s features. We investigate the insights that can be gained from the application of time series segmentation to software version repositories. Software version repositories from large projects contain on the order of hundreds of thousands of timestamped entries or more. It is a continuing challenge to aggregate such data so that noise is reduced and important characteristics are brought out. In this paper, we present a way to summarize developer work history in terms of the files they have modified over time by segmenting the CVS change data of individual Eclipse developers. We show that the files they modify tends to change significantly over time though most of them tend to work within the same directories.}, keywords = {contributions, cvs, developers, eclipse, msr challenge, temporal segmentation, time series, work history}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370784}, url = {http://doi.acm.org/10.1145/1370750.1370784}, author = {Siy, Harvey and Chundi, Parvathi and Subramaniam, Mahadevan} } @conference {Pattison:2008:TWP:1370750.1370776, title = {Talk and work: a preliminary report}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {113{\textendash}116}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Developers in Open Source Software (OSS) projects communicate using mailing lists. By convention, the mailing lists used only for task-related discussions, so they are primarily concerned with the software under development, and software process issues (releases, etc.). We focus on the discussions concerning the software, and study the frequency with which software entities (functions, methods, classes, etc) are mentioned in the mail. We find a strong, striking, cumulative relationship between this mention count in the email, and the number of times these entities are included in changes to the software. When we study the same phenomena over a series of time-intervals, the relationship is much less strong. This suggests some interesting avenues for future research.}, keywords = {ant, apache, email, mailing lists, postgresql, python, scm, source code}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370776}, url = {http://doi.acm.org/10.1145/1370750.1370776}, attachments = {https://flosshub.org/sites/flosshub.org/files/p113-pattison.pdf}, author = {Pattison, David S. and Bird, Christian A. and Premkumar T. Devanbu} } @conference {Herraiz:2008:TSB:1370750.1370786, title = {Towards a simplification of the bug report form in eclipse}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, pages = {145{\textendash}148}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {We believe that the bug report form of Eclipse contains too many fields, and that for some fields, there are too many options. In this MSR challenge report, we focus in the case of the severity field. That field contains seven different levels of severity. Some of them seem very similar, and it is hard to distinguish among them. Users assign severity, and developers give priority to the reports depending on their severity. However, if users can not distinguish well among the various severity options, they will probably assign different priorities to bugs that require the same priority. We study the mean time to close bugs reported in Eclipse, and how the severity assigned by users affects this time. The results shows that classifying by time to close, there are less clusters of bugs than levels of severity. We therefore conclude that there is a need to make a simpler bug report form.}, keywords = {bug fixing, bug report, bug tracking system, classification, eclipse, msr challenge, severity}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370786}, url = {http://doi.acm.org/10.1145/1370750.1370786}, author = {Herraiz, Israel and Daniel M. German and Jesus M. Gonzalez-Barahona and Gregorio Robles} } @conference {Sudakrishnan:2008:UBF:1370750.1370761, title = {Understanding bug fix patterns in verilog}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, pages = {39{\textendash}42}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Today, many electronic systems are developed using a hardware description language, a kind of software that can be converted into integrated circuits or programmable logic devices. Like traditional software projects, hardware projects have bugs, and significant developer time is spent fixing them. A useful first step toward reducing bugs in hardware is developing an understanding of the frequency of different types of errors. Once the most common types are known, it is then possible to focus attention on eliminating them. As most hardware projects use software configuration management repositories, these can be mined for the textual bug fix changes. In this project, we analyze the bug fix history of four hardware projects written in Verilog and manually define 25 bug fix patterns. The frequency of each bug type is then computed for all projects. We find that 29 -- 55\% of the bug fix pattern instances in Verilog involve assignment statements, while 18 -- 25\% are related to if statements.}, keywords = {bug fixing, error classification, hdl, verilog, VHDL}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370761}, url = {http://doi.acm.org/10.1145/1370750.1370761}, attachments = {https://flosshub.org/sites/flosshub.org/files/p39-sudakrishnan.pdf}, author = {Sudakrishnan, Sangeetha and Madhavan, Janaki and Whitehead,Jr., E. James and Renau, Jose} } @article {138, title = {Understanding knowledge sharing activities in free/open source software projects: An empirical study}, journal = {Journal of Systems and Software}, volume = {81}, number = {3}, year = {2008}, note = {"Our study utilized data from the Debian project lists archives.... The Debian project hosts over 100 lists on all aspects related to the project. From the Debian lists archives (Debian Mailing Lists) we selected two high volume mailing lists. The following lists are analyzed in our study: {\textbullet} Debian-user. This list is specifically dedicated to help and discussion among users of Debian who speak English. {\textbullet} Debian-devel. This list is specifically dedicated to discus- sion about technical development topics. Our data collection period for both lists was from January 2000 to December 2005. We obtained archived mbox files of the two lists. Each file is a single text file containing one month of archived email messages. Every email message has a unique message-id, together with other identification fields defined by the Internet Message Format (RFC) 2822 (Internet Message Format, 2001)."}, pages = {431-446}, abstract = {Free/Open Source Software (F/OSS) projects are people-oriented and knowledge intensive software development environments. Many researchers focused on mailing lists to study coding activities of software developers. How expert software developers interact with each other and with non-developers in the use of community products have received little attention. This paper discusses the altruistic sharing of knowledge between knowledge providers and knowledge seekers in the Developer and User mailing lists of the Debian project. We analyze the posting and replying activities of the participants by counting the number of email messages they posted to the lists and the number of replies they made to questions others posted. We found out that participants interact and share their knowledge a lot, their positing activity is fairly highly correlated with their replying activity, the characteristics of posting and replying activities are different for different kinds of lists, and the knowledge sharing activity of self-organizing Free/Open Source communities could best be explained in terms of what we called "Fractal Cubic Distribution" rather than the power-law distribution mostly reported in the literature. The paper also proposes what could be researched in knowledge sharing activities in F/OSS projects mailing list and for what purpose. The research findings add to {\textquoteright} our understanding of knowledge sharing activities in F/OSS projects. (C) 2007 Elsevier Inc. All rights reserved.}, keywords = {debian, email, email archives, mailing list}, attachments = {https://flosshub.org/sites/flosshub.org/files/JSS_0.pdf}, author = {Sowe, Sulayman K. and Ioannis Stamelos and Lefteris Angelis} } @conference {Hindle:2008:LCT:1370750.1370773, title = {What do large commits tell us?: a taxonomical study of large commits}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {99{\textendash}108}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Research in the mining of software repositories has frequently ignored commits that include a large number of files (we call these large commits). The main goal of this paper is to understand the rationale behind large commits, and if there is anything we can learn from them. To address this goal we performed a case study that included the manual classification of large commits of nine open source projects. The contributions include a taxonomy of large commits, which are grouped according to their intention. We contrast large commits against small commits and show that large commits are more perfective while small commits are more corrective. These large commits provide us with a window on the development practices of maintenance teams.}, keywords = {boost, bug fixing, egroupware, enlightenment, evolution, firebird, large commits, maintenance, mysql, postgresql, samba, software evolution, source control system, spring}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370773}, url = {http://doi.acm.org/10.1145/1370750.1370773}, attachments = {https://flosshub.org/sites/flosshub.org/files/p99-hindle.pdf}, author = {Hindle, Abram and Daniel M. German and Holt, Ric} } @conference {996, title = {Correlating Social Interactions to Release History during Software Evolution}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {7 - 7}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {In this paper, we propose a method to reason about the nature of software changes by mining and correlating discussion archives. We employ an information retrieval approach to find correlation between source code change history and history of social interactions surrounding these changes. We apply our correlation method on two software systems, LSEdit and Apache Ant. The results of these exploratory case studies demonstrate the evidence of similarity between the content of free-form text emails among developers and the actual modifications in the code. We identify a set of correlation patterns between discussion and changed code vocabularies and discover that some releases referred to as minor should instead fall under the major category. These patterns can be used to give estimations about the type of a change and time needed to implement it.}, keywords = {ant, apache, change management, developers, discussion, effort estimation, lsedit, mailing lists, scm, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.4}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300007.pdf}, author = {Baysal, Olga and Malton, Andrew J.} } @article {springerlink:10.1007/s10588-006-9006-3, title = {Correlating temporal communication patterns of the Eclipse open source community with performance and creativity}, journal = {Computational \& Mathematical Organization Theory}, volume = {13}, year = {2007}, note = {"Social network data was collected from the Eclipse component development groups{\textquoteright} online mailing lists by using the online process tool (Gloor and Zhao, 2004). Data on bugs and enhancements for each group was collected from the Eclipse bugzilla database (Eclipse bugzilla, 2004). The social network data was analyzed with the TeCFlow tool (Gloor and Zhao, 2004)." "The study is based on data from the three main projects of the Eclipse open source development community, namely {\textquotedblleft}eclipse{\textquotedblright}, {\textquotedblleft}tools{\textquotedblright} and {\textquotedblleft}technology{\textquotedblright}. We have chosen thirty-three different component development groups for analysis." "The online process tool (online process tool, 2004) was utilized to collect communication data from their mailing list archives. The online process tool runs a robot that searches for URLs in the projects{\textquoteright} mailing list archives to compile a list of the possible URL links. It then extracts communication data as tuples in the form of {\textquotedblleft}sender, receiver, communication type, timestamp, communication contents{\textquotedblright} and stores it in the database. Further, bugs and enhancement data were collected from the Eclipse bugzilla database."}, pages = {17-27}, publisher = {Springer Netherlands}, abstract = {This paper studies the temporal communication patterns of online communities of developers and users of the open source Eclipse Java development environment. It measures the productivity of each community and seeks to identify correlations that exist between group communication characteristics and productivity attributes. The study uses the TeCFlow (Temporal Communication Flow) visualizer to create movie maps of the knowledge flow by analyzing the publicly accessible Eclipse developer mailing lists as an approximation of the social networks of developers and users. Thirty-three different Eclipse communities discussing development and use of components of Eclipse such as the Java Development Tools, the different platform components, the C/C++ Development Tools and the AspectJ extension have been analyzed over a period of six months. The temporal evolution of social network variables such as betweenness centrality, density, contribution index, and degree have been computed and plotted. Productivity of each development group is measured in terms of two indices, namely performance and creativity. Performance of a group is defined as the ratio of new bugs submitted compared with bugs fixed within the same period of time. Creativity is calculated as a function of new features proposed and implemented. Preliminary results indicate that there is a correlation between attributes of social networks such as density and betweenness centrality and group productivity measures in an open source development community. We also find a positive correlation between changes over time in betweenness centrality and creativity, and a negative correlation between changes in betweenness centrality and performance.}, keywords = {bug fixing, bugs, bugzilla, communication, creativity, developers, eclipse, email, email archives, feature requests, mailing lists, performance, productivity}, issn = {1381-298X}, url = {http://dx.doi.org/10.1007/s10588-006-9006-3}, attachments = {https://flosshub.org/sites/flosshub.org/files/16.pdf}, author = {Kidane, Yared and Gloor, Peter} } @conference {992, title = {Determining Implementation Expertise from Bug Reports}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {2 - 2}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {As developers work on a software product they accumulate expertise, including expertise about the code base of the software product. We call this type of expertise "implementation expertise". Knowing the set of developers who have implementation expertise for a software product has many important uses. This paper presents an empirical evaluation of two approaches to determining implementation expertise from the data in source and bug repositories. The expertise sets created by the approaches are compared to those provided by experts and evaluated using the measures of precision and recall. We found that both approaches are good at finding all of the appropriate developers, although they vary in how many false positives are returned.}, keywords = {bug reports, developers, eclipse, expertise, repository, scm, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.7}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300002.pdf}, author = {Anvik, John and Murphy, Gail C.} } @conference {1017, title = {Forecasting the Number of Changes in Eclipse Using Time Series Analysis}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {32 - 32}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {In order to predict the number of changes in the following months for the project Eclipse, we have applied a statistical (non-explanatory) model based on time series analysis. We have obtained the monthly number of changes in the CVS repository of Eclipse, using the CVSAnalY tool. The input to our model was the filtered series of the number of changes per month, and the output was the number of changes per month for the next three months. Then we aggregated the results of the three months to obtain the total number of changes in the given period in the challenge.}, keywords = {change management, cvs, cvsanaly, eclipse, prediction}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.10}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300032.pdf}, author = {Herraiz, Israel and Jesus M. Gonzalez-Barahona and Gregorio Robles} } @conference {991, title = {How Long Will It Take to Fix This Bug?}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {1}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Predicting the time and effort for a software problem has long been a difficult task. We present an approach that automatically predicts the fixing effort, i.e., the person-hours spent on fixing an issue. Our technique leverages existing issue tracking systems: given a new issue report, we use the Lucene framework to search for similar, earlier reports and use their average time as a prediction. Our approach thus allows for early effort estimation, helping in assigning issues and scheduling stable releases. We evaluated our approach using effort data from the JBoss project. Given a sufficient number of issues reports, our automatic predictions are close to the actual effort; for issues that are bugs, we are off by only one hour, beating naive predictions by a factor of four.}, keywords = {bug fixing, bug reports, effort estimation, jboss, lucene, prediction, time}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.13}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300001.pdf}, author = {Weiss, Cathrin and Premraj, Rahul and Zimmermann, Thomas and Zeller, Andreas} } @conference {625, title = {Innovation in Open Source Software Development: A Tale of Two Features}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, note = {"archives of project discussion forums were then examined to determine when the feature was first proposed, how it was debated, and when it was ultimately adopted"}, month = {2007///}, pages = {109 - 120}, publisher = {Springer}, organization = {Springer}, chapter = {9}, abstract = {Open Source Software Development appears to depart radically from conventional notions of software engineering. In particular, requirements for Open Source projects seem to be asserted rather than elicited. This paper examines two features of selected open source products: {\textquotedblleft}tabbed browsing{\textquotedblright} as realized in the Firefox web browser, and {\textquotedblleft}edge magnetism{\textquotedblright} found in the Gnome desktop environment{\textquoteright}s Metacity window manager. Using archives of mailing lists and issue tracking databases, these features were traced from first mention to release, in attempt to discover the process by which requirements are proposed, adopted, and implemented in their respective Open Source projects. The results confirm the importance of user participation in Open Source projects. }, keywords = {email, email archives, features, Firefox, gnome, mailing list, metacity, mozilla}, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_9}, attachments = {https://flosshub.org/sites/flosshub.org/files/Innovation\%20in\%20OSS.pdf}, author = {Noll, John} } @conference {1018, title = {Local and Global Recency Weighting Approach to Bug Prediction}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {33 - 33}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Finding and fixing software bugs is a challenging maintenance task, and a significant amount of effort is invested by software development companies on this issue. In this paper, we use the Eclipse project{\textquoteright}s recorded software bug history to predict occurrence of future bugs. The history contains information on when bugs have been reported and subsequently fixed.}, keywords = {bug fixing, bug reports, eclipse, maintenance, prediction}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.17}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300033.pdf}, author = {Joshi, Hemant and Zhang, Chuanlei and Ramaswamy, S. and Bayrak, Coskun} } @article {Wang:2007:MEO:1317471.1317479, title = {Measuring the evolution of open source software systems with their communities}, journal = {SIGSOFT Softw. Eng. Notes}, volume = {32}, year = {2007}, note = {"1. M1: The number of modules in software system at a series of specific moment. 2. CD: The number of developers (code contributors) at a series of specific moment. 3. MC: The correlation of M1 and CD." "We get the information of Modules from the Launchpad[1] which is a distributed collaborative infrastructure for Ubuntu and other open source software development. Ubuntu mainly grows though a manner of adding new packages. So, we treat packages as the modules we mention before. In Ubuntu community, some members are in charge of auditing the packages and add them to Ubuntu, here developers are refers them."}, month = {November}, publisher = {ACM}, address = {New York, NY, USA}, abstract = {Open Source Software (OSS) has become the subject of much commercial and academic interest in last few years. As traditional software, OSS also evolves to fulfill the need of stakeholders. Therefore, providing quantitative metrics for OSS evolution has also become an urgent issue. However, most existing studies of software evolution have been performed on systems developed within a single company using traditional management techniques. These metrics models are inadequate to measure OSS evolution. In this paper, we describe a preliminary evolution metrics set for evaluating OSS. The most significant novelty of this model is that it takes specific properties of Open Source Community (OSC) into consideration. In another word, we measure the evolution of OSS and OSC together. We also provide a lightweight case study on Ubuntu project using this metrics set. We find out that the Open Source Community and its members also play essential role in OSS evolution. We expect this metrics model can bring better understandings and explanations of phenomena in open source development and evolution.}, keywords = {evolution, launchpad, metrics, open source community, open source software, Ubuntu}, issn = {0163-5948}, doi = {http://doi.acm.org/10.1145/1317471.1317479}, url = {http://doi.acm.org/10.1145/1317471.1317479}, author = {Yi Wang and Defeng Guo and Shi, Huihui} } @article {125, title = {Membership herding and network stability in the open source community: The Ising perspective}, journal = {Management Science}, volume = {53}, number = {7}, year = {2007}, month = {Jul}, pages = {1086-1101}, abstract = {The aim of this paper is twofold: (1) to conceptually understand membership dynamics in the open source software (OSS) community, and (2) to explore how different network characteristics (i.e., network size and connectivity) influence the stability of an OSS network. Through the lens of Ising theory, which is widely accepted in physics, we investigate basic patterns of interaction and present fresh conceptual insight into dynamic and reciprocal relations among OSS community members. We also perform computer simulations based on empirical data collected from two actual OSS communities. Key findings include: (1) membership herding is highly present when external influences (e.g., the availability of other OSS projects) are weak, but decreases significantly when external influences increase, (2) propensity for membership herding is most likely to be seen in a large network with random connectivity, and (3) for large networks, when external influences are weak, random connectivity will result in higher network strength than scale-free connectivity (as external influences increase, however, the reverse phenomenon is observed). In addition, scale-free connectivity appears to be less volatile than random connectivity in response to an increase in the strength of external influences. We conclude with several implications that may be of significance to OSS stakeholders in particular, and to a broader range of online communities in general.}, keywords = {BEHAVIOR, DYNAMICS, ECONOMICS, INNOVATION, INVESTMENT, Ising theory, membership herding, MODEL, MOTIVATION, network connectivity, network stability, open source, PHASE-TRANSITION, UNCERTAINTY}, isbn = {0025-1909}, author = {Oh, Wonseok and Jeon, Sangyong} } @conference {1015, title = {Mining Eclipse Developer Contributions via Author-Topic Models}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {30 - 30}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {We present the results of applying statistical author-topic models to a subset of the Eclipse 3.0 source code consisting of 2,119 source files and 700,000 lines of code from 59 developers. This technique provides an intuitive and automated framework with which to mine developer contributions and competencies from a given code base while simultaneously extracting software function in the form of topics. In addition to serving as a convenient summary for program function and developer activities, our study shows that topic models provide a meaningful, effective, and statistical basis for developer similarity analysis.}, keywords = {contributions, developers, eclipse, expertise, mining challenge, msr challenge, source code, topics}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.20}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300030.pdf}, author = {Linstead, Erik and Rigor, Paul and Bajracharya, Sushil and Lopes, Cristina and Baldi, Pierre} } @conference {999, title = {Mining Software Repositories with iSPAROL and a Software Evolution Ontology}, booktitle = {Fourth International Workshop on Mining Software RepositoriesFourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {10 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {One of the most important decisions researchers face when analyzing the evolution of software systems is the choice of a proper data analysis/exchange format. Most existing formats have to be processed with special programs written specifically for that purpose and are not easily extendible. Most scientists, therefore, use their own database(s) requiring each of them to repeat the work of writing the import/export programs to their format. We present EvoOnt, a software repository data exchange format based on the Web Ontology Language (OWL). EvoOnt includes software, release, and bug-related information. Since OWL describes the semantics of the data, EvoOnt is (1) easily extendible, (2) comes with many existing tools, and (3) allows to derive assertions through its inherent Description Logic reasoning capabilities. The paper also shows iSPARQL -- our SPARQL-based Semantic Web query engine containing similarity joins. Together with EvoOnt, iSPARQL can accomplish a sizable number of tasks sought in software repository mining projects, such as an assessment of the amount of change between versions or the detection of bad code smells. To illustrate the usefulness of EvoOnt (and iSPARQL), we perform a series of experiments with a real-world Java project. These show that a number of software analyses can be reduced to simple iSPARQL queries on an EvoOnt dataset.}, keywords = {database, eclipse, evoont, java, owl, semantic, sparql}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.21}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300010.pdf}, author = {Kiefer, Christoph and Bernstein, Abraham and Tappolet, Jonas} } @conference {1016, title = {Predicting Defects and Changes with Import Relations}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {31 - 31}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Lowering the number of defects and estimating the development time of a software project are two important goals of software engineering. To predict the number of defects and changes we train models with import relations. This enables us to decrease the number of defects by more efficient testing and to assess the effort needed in respect to the number of changes.}, keywords = {defects, eclipse, effort estimation, mining challenge, msr challenge, prediction}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.24}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300031.pdf}, author = {Schroter, Adrian} } @conference {1014, title = {Predicting Eclipse Bug Lifetimes}, booktitle = {Fourth International Workshop on Mining Software RepositoriesFourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {29 - 29}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {In non-trivial software development projects planning and allocation of resources is an important and difficult task. Estimation of work time to fix a bug is commonly used to support this process. This research explores the viability of using data mining tools to predict the time to fix a bug given only the basic information known at the beginning of a bug{\textquoteright}s lifetime. To address this question, a historical portion of the Eclipse Bugzilla database is used for modeling and predicting bug lifetimes. A bug history transformation process is described and several data mining models are built and tested. Interesting behaviours derived from the models are documented. The models can correctly predict up to 34.9\% of the bugs into a discretized log scaled lifetime class.}, keywords = {bug fixing, bugzilla, classification, eclipse, effort estimation, mining challenge, msr challenge, prediction, weka}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.25}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300029.pdf}, author = {Panjer, Lucas D.} } @conference {994, title = {Recommending Emergent Teams}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {5 - 5}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {To build successful complex software systems, developers must collaborate with each other to solve issues. To facilitate this collaboration, specialized tools, such as chat and screen sharing, are being integrated into development environments. Currently, these tools require a developer to maintain a list of other developers with whom they may wish to communicate and to determine who within this list has expertise for a specific situation. For large, dynamic projects, like several successful open-source projects, these requirements place an unreasonable burden on the developer. In this paper, we show how the structure of a team emerges from how developers change software artifacts. We introduce the Emergent Expertise Locator (EEL) that uses emergent team information to propose experts to a developer within their development environment as the developer works. We found that EEL produces, on average, results with higher precision and higher recall than an existing heuristic for expertise recommendation.}, keywords = {bugzilla, developers, eclipse, evolution, expertise, Firefox, teams}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.27}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300005.pdf}, author = {Minto, Shawn and Murphy, Gail C.} } @conference {1004, title = {Release Pattern Discovery via Partitioning: Methodology and Case Study}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {19 - 19}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {The development of Open Source systems produces a variety of software artifacts such as source code, version control records, bug reports, and email discussions. Since the development is distributed across different tool environments and developer practices, any analysis of project behavior must be inferred from whatever common artifacts happen to be available. In this paper, we propose an approach to characterizing a project{\textquoteright}s behavior around the time of major and minor releases; we do this by partitioning the observed activities, such as artifact check-ins, around the dates of major and minor releases, and then look for recognizable patterns. We validate this approach by means of a case study on the MySQL database system; in this case study, we found patterns which suggested MySQL was behaving consistently within itself. These patterns included testing and documenting that took place more before a release than after and that the rate of source code changes dipped around release time.}, keywords = {bitkeeper, bt2csv, cvs, evolution, mysql, releases, revision history, scm, softchange, version control}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.28}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300019.pdf}, author = {Hindle, Abram and Godfrey, Michael W. and Holt, Richard C.} } @article {flosswp405, title = {Self-organization of teams for free/libre open source software development}, journal = {Information and Software Technology Journal}, volume = {49}, number = {564-575}, year = {2007}, note = {"First, the data from these projects that we needed for analysis had to be publicly available (ruling out projects that limit access to their email lists or trackers). Second, we chose the projects that had more than 7 members"..."projects that have attracted numerous developers beyond the initial project founders, are continuing to release software, have numerous downloads and have an active user community that provides feedback" "3 FLOSS projects were selected for analysis, namely Gaim, eGroupWare and Compiere ERP." all had sourceforge hosting Data: "The primary data used for our study were interactions on the main developer communication forum, either a developer mailing list or web-based discussion forum." Analysis: "For this project, we inductively content-analyzed developer email interactions to identify the task assignment mechanisms used in the process. We coded each instance of task assignment identified on three dimensions: who assigned the task, to whom, and how" }, abstract = {This paper provides empirical evidence about how free/libre open source software development teams self-organize their work. Following a case study methodology, we examined developer interaction data from three active and successful FLOSS projects using qualitative research methods, specifically inductive content analysis, to identify the task-assignment mechanisms used by the participants. We found that "self-assignment" was the most common mechanism across three FLOSS projects. This mechanism is consistent with expectations for distributed and largely volunteer teams. We conclude by discussing whether these emergent practices can be usefully transferred to mainstream practice and indicating directions for future research.}, keywords = {case study, compiere, coordination, DESIGN, distributed teams, egroupware, email, email archives, forum, free/libre open source software development, gaim, INTERNET, mailing list, metadata, qualitative research methods, self-organizing teams, sourceforge, SYSTEMS, task assignment, WORK}, attachments = {https://flosshub.org/sites/flosshub.org/files/task_assignment_final.pdf}, author = {Kevin Crowston and Li, Qing and Kangning Wei and Eseryel, U. Yeliz and Howison, James} } @conference {993, title = {Spam Filter Based Approach for Finding Fault-Prone Software Modules}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {4 - 4}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Because of the increase of needs for spam e-mail detection, the spam filtering technique has been improved as a convenient and effective technique for text mining. We propose a novel approach to detect fault-prone modules in a way that the source code modules are considered as text files and are applied to the spam filter directly. In order to show the applicability of our approach, we conducted experimental applications using source code repositories of Java based open source developments. The result of experiments shows that our approach can classify more than 75\% of software modules correctly.}, keywords = {argouml, bug reports, classification, eclipse, java, modules, scm, spam, text mining}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.29}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300004.pdf}, author = {Mizuno, Osamu and Ikami, Shiro and Nakaichi, Shuya and Kikuno, Tohru} } @conference {1006, title = {Towards a Theoretical Model for Software Growth}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {21 - 21}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Software growth (and more broadly, software evolution) is usually considered in terms of size or complexity of source code. However in different studies, usually different metrics are used, which make it difficult to compare approaches and results. In addition, not all metrics are equally easy to calculate for a given source code, which leads to the question of which one is the easiest to calculate without losing too much information. To address both issues, in this paper present a comprehensive study, based on the analysis of about 700,000 C source code files, calculating several size and complexity metrics for all of them. For this sample, we have found double Pareto statistical distributions for all metrics considered, and a high correlation between any two of them. This would imply that any model addressing software growth should produce this Pareto distributions, and that analysis based on any of the considered metrics should show a similar pattern, provided the sample of files considered is large enough.}, keywords = {C, complexity, evolution, freebsd, growth, halstead, lines of code, loc, mccabe, metrics, scm, size, sloc, sloccount, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.31}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300021.pdf}, author = {Herraiz, Israel and Jesus M. Gonzalez-Barahona and Gregorio Robles} } @article {495, title = {Towards an understanding of FLOSS: Infrastructures, Materiality and the Digital Business Ecosystem}, journal = {Science Studies}, number = {2}, year = {2007}, abstract = {In this paper we present empirical work detailing the engagement practices of a large FLOSS project, the Digital Business Ecosystem (DBE). In common with many other FLOSS projects, the DBE project focused on the development of infrastructural software components. Infrastructures and FLOSS software exhibit multiplicity: as objects they both change and stay the same. Whilst the implications of multiplicity with respect to infrastructure have been well-documented, with respect to FLOSS, they remain under-explored. Through examining how the DBE engaged new participants we were able to explore the nature of the FLOSS software object by asking the implied question: engagement with what? We draw on recent analysis by Law and Singleton to show how the innovative yet non-existent potentiality of the DBE was as significant to engagement as its steadily growing codebase. We argue that acknowledging the materiality and immateriality of the FLOSS software object has important consequences for management of, and engagement with, FLOSS projects.}, keywords = {engagement, FLOSS, infrastructures, INNOVATION, materiality}, attachments = {https://flosshub.org/sites/flosshub.org/files/Darking_Whitley.pdf}, author = {Mary Darking and Edgar A Whitley} } @conference {1226, title = {Understanding the KDE Social Structure through Mining of Email Archive}, booktitle = {2nd Workshop on Public Data about Software Development (WoPDaSD 2007)}, year = {2007}, note = {"Our data source is constituted by e-mails sent to KDE mailing-lists and archived by MARC" "Two problems quickly arise: neither the e-mails addresses nor the names can be considered unique. Consequently, we used an in-depth search algorithm to put together {\textquotedblleft}name-email{\textquotedblright} couples corresponding to a same contributor. Indeed, the algorithm suggests possible merges." "There is a specific mailing list in our data set, kde-commit, which gathers automatic notifications from the revision control system (RCS)....We measure {\textquotedblleft}commit{\textquotedblright} by the number of messages sent to the {\textquotedblleft}kde-commit{\textquotedblright} mailing list. However, we did not count {\textquotedblleft}silent{\textquotedblright} commits, nor usual messages sent to this mailing list." "We measured activities done in BTS in two ways: {\textquotedblleft}bug opener{\textquotedblright} and {\textquotedblleft}non bug opener{\textquotedblright}. First, we counted the number of modifications done by the contributor who opened the concerned bug report. " }, abstract = {In order to achieve a better understanding of FLOSS social structure, we need a definition of social position. From a theoretical perspective, we propose to think the participation as a trajectory. Empirically, we use optimal matching to build a typology of participation trajectories based on KDE email archives. We show how these trajectories structure the community as a whole by combining these results with a social network analysis.}, keywords = {bug tracking system, bugzilla, commit, email, email archive, kde, mailing list, participation, revision control, social network analysis}, attachments = {https://flosshub.org/sites/flosshub.org/files/wopdasd_studer_et_all_full.pdf}, author = {Studer, Matthias and M{\"u}ller, Beno{\^\i}t and Ritschard, Gilbert} } @conference {1009, title = {Using Software Distributions to Understand the Relationship among Free and Open Source Software Projects}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {24 - 24}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Success in the open source software world has been measured in terms of metrics such as number of downloads, number of commits, number of lines of code, number of participants, etc. These metrics tend to discriminate towards applications that are small and tend to evolve slowly. A problem is, however, how to identify applications in these latter categories that are important. Software distributions specify the dependencies needed to build and to run a given software application. We use this information to create a dependency graph of the applications contained in such a distribution. We explore the characteristics of this graph, and use it to define some metrics to quantify the dependencies (and dependents) of a given software application. We demonstrate that some applications that are invisible to the final user (such as libraries) are widely used by end-user applications. This graph can be used as a proxy to measure success of small, slowly evolving free and open source software.}, keywords = {dependencies, evolution, fink, metrics}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.32}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300024.pdf}, author = {Daniel M. German} } @conference {1008, title = {What Can OSS Mailing Lists Tell Us? A Preliminary Psychometric Text Analysis of the Apache Developer Mailing List}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {23 - 23}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Developer mailing lists are a rich source of information about Open Source Software (OSS) development. The unstructured nature of email makes extracting information difficult. We use a psychometrically-based linguistic analysis tool, the LIWC, to examine the Apache httpd server developer mailing list. We conduct three preliminary experiments to assess the appropriateness of this tool for information extraction from mailing lists. First, using LIWC dimensions that are correlated with the big five personality traits, we assess the personality of four top developers against a baseline for the entire mailing list. The two developers that were responsible for the major Apache releases had similar personalities. Their personalities were different from the baseline and the other developers. Second, the first and last 50 emails for two top developers who have left the project are examined. The analysis shows promise in understanding why developers join and leave a project. Third, we examine word usage on the mailing list for two major Apache releases. The differences may reflect the relative success of each release.}, keywords = {apache, developers, email, joining, liwc, mailing lists, personality}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.35}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300023.pdf}, author = {Peter C. Rigby and Hassan, Ahmed E.} } @conference {Tsunoda:2006:AOD:1137983.1138031, title = {Analyzing OSS developers{\textquoteright} working time using mailing lists archives}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {181{\textendash}182}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Our mining question is {\textquotedblleft}when OSS developers work?{\textquotedblright} OSS developers{\textquoteright} working time may be a good indicator to understand the development style of a project. (For example, if many developers work in office hour, these might be daily works in a company.)}, keywords = {developers, email, email archives, mailing lists, mining challenge, msr challenge, overtime work, postgresql, workload}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138031}, url = {http://doi.acm.org/10.1145/1137983.1138031}, attachments = {https://flosshub.org/sites/flosshub.org/files/181AnalyzingOSS.pdf}, author = {Tsunoda, Masateru and Monden, Akito and Kakimoto, Takeshi and Kamei, Yasutaka and Matsumoto, Ken-ichi} } @conference {D{\textquoteright}Ambros:2006:AER:1137983.1138029, title = {Applying the evolution radar to PostgreSQL}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {177{\textendash}178}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, keywords = {cvs, documentation, evolution, evolution radar, logical coupling, makefile, mining challenge, msr challenge, postgresql, re-engineering, refactoring, release history, rhdb, source code, version control, visualization}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138029}, url = {http://doi.acm.org/10.1145/1137983.1138029}, attachments = {https://flosshub.org/sites/flosshub.org/files/177ApplyingEvolution.pdf}, author = {D{\textquoteright}Ambros, Marco and Lanza, Michele} } @conference {683, title = {Communication Networks in an Open Source Software Project}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {203/2006}, year = {2006}, month = {2006///}, pages = {297 - 306}, publisher = {Springer}, organization = {Springer}, abstract = {This study explores the nature of the social network and the patterns of communication that exist in an open source software development project, the Apache HTTP (WEB) server project. Our analysis of archival data on email communications between developers in the Apache HTTP server project suggests an interesting pattern of communication. We find that the core developers self-organize into three sub-groups that communicate intensely in completing the project. Our analysis also reveals that a few prominent developers who are centrally located in the network are driving communications within the project. We identify the implications of our findings and suggest areas for further research. }, keywords = {apache, core, developers, email, email archive, mailing list, participation, social network analysis}, issn = {978-0-387-34225-2}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_30}, attachments = {https://flosshub.org/sites/flosshub.org/files/Communication\%20Networks\%20in\%20an\%20Open\%20Source.pdf}, author = {Roberts, Jeffrey and Il-Horn Hann and Sandra Slaughter} } @conference {Herraiz:2006:CSN:1116163.1116405, title = {Comparison Between SLOCs and Number of Files As Size Metrics for Software Evolution Analysis}, booktitle = {Proceedings of the Conference on Software Maintenance and Reengineering}, series = {CSMR {\textquoteright}06}, year = {2006}, pages = {206{\textendash}213}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, keywords = {empirical studies, libre software, metrics, software evolution}, isbn = {0-7695-2536-9}, url = {http://dl.acm.org/citation.cfm?id=1116163.1116405}, author = {Herraiz, Israel and Gregorio Robles and Gonzalez-Barahon, Jes us M.} } @conference {686, title = {Contributor Turnover in Libre Software Projects}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {273 - 286}, publisher = {Springer}, organization = {Springer}, abstract = {A common problem that management faces in software companies is the high instability of their staff. In libre (free, open source) software projects, the permanence of developers is also an open issue, with the potential of causing problems amplified by the self-organizing nature that most of them exhibit. Hence, human resources in libre software projects are even more difficult to manage: developers are in most cases not bound by a contract and, in addition, there is not a real management structure concerned about this problem. This raises some interesting questions with respect to the composition of development teams in libre software projects, and how they evolve over time. There are projects lead by their original founders (some sort of {\textquotedblleft}code gods{\textquotedblright}), while others are driven by several different developer groups over time (i.e. the project {\textquotedblleft}regenerates{\textquotedblright} itself). In this paper, we propose a quantitative methodology, based on the analysis of the activity in the source code management repositories, to study how these processes (developers leaving, developers joining) affect libre software projects. The basis of it is the analysis of the composition of the core group, the group of developers most active in a project, for several time lapses. We will apply this methodology to several large, well-known libre software projects, and show how it can be used to characterize them. In addition, we will discuss the lessons that can be learned, and the validity of our proposal. }, keywords = {apache, committers, core, cvs, cvsanaly, developers, evolution, freebsd, gimp, gnome, kde, mono, mozilla}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_28}, attachments = {https://flosshub.org/sites/flosshub.org/files/Contributor\%20Turnover\%20in\%20Libre\%20Software\%20Projects.pdf}, author = {Gregorio Robles and Gonzalez-Barahona, Jesus} } @conference {Sager:2006:DSJ:1137983.1138000, title = {Detecting similar Java classes using tree algorithms}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {65{\textendash}71}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Similarity analysis of source code is helpful during development to provide, for instance, better support for code reuse. Consider a development environment that analyzes code while typing and that suggests similar code examples or existing implementations from a source code repository. Mining software repositories by means of similarity measures enables and enforces reusing existing code and reduces the developing effort needed by creating a shared knowledge base of code fragments. In information retrieval similarity measures are often used to find documents similar to a given query document. This paper extends this idea to source code repositories. It introduces our approach to detect similar Java classes in software projects using tree similarity algorithms. We show how our approach allows to find similar Java classes based on an evaluation of three tree-based similarity measures in the context of five user-defined test cases as well as a preliminary software evolution analysis of a medium-sized Java project. Initial results of our technique indicate that it (1) is indeed useful to identify similar Java classes, (2)successfully identifies the ex ante and ex post versions of refactored classes, and (3) provides some interesting insights into within-version and between-version dependencies of classes within a Java project.}, keywords = {change analysis, clones, coogle, eclipse, famix, java, similarity, software evolution, software repositories, source code, tree similarity measures}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138000}, url = {http://doi.acm.org/10.1145/1137983.1138000}, attachments = {https://flosshub.org/sites/flosshub.org/files/65Detecting.pdf}, author = {Sager, Tobias and Bernstein, Abraham and Pinzger, Martin and Kiefer, Christoph} } @conference {Amor:2006:EEC:1139113.1139116, title = {Effort Estimation by Characterizing Developer Activity}, booktitle = {Proceedings of the 2006 International Workshop on Economics Driven Software Engineering Research}, series = {EDSER {\textquoteright}06}, year = {2006}, pages = {3{\textendash}6}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {During the latest years libre (free, open source) software has gained a lot of attention from the industry. Following this interest, the research community is also studying it. For instance, many teams are performing quantitative analysis on the large quantity of data which is publicly available from the development repositories maintained by libre software projects. However, not much of this research is focused on cost or effort estimations, despite its importance (for instance, for companies developing libre software or collaborating with libre software projects), and the availability of some data which could be useful for this purpose. Our position is that classical effort estimation models can be improved from the study of these data, at least when applied to libre software. In this paper, we focus on the characterization of developer activity, which we argue can improve effort estimation. This activity can be traced with a lot of detail, and the resulting data can also be used for validation of any effort estimation model. }, keywords = {developer characterization, effort estimation, mining software repositories, open source software, software economics}, isbn = {1-59593-396-4}, doi = {10.1145/1139113.1139116}, url = {http://doi.acm.org/10.1145/1139113.1139116}, author = {Amor, Juan Jose and Gregorio Robles and Jesus M. Gonzalez-Barahona} } @conference {689, title = {Evolution of Open Source Communities}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, note = {"we took snapshots of its membership at regular intervals" "we chose a one year period" "we retrieve the list of core developers ordered by their number of inbound messages, as noted above."}, pages = {21 - 32}, publisher = {Springer}, organization = {Springer}, abstract = {The goal of this paper is to document the evolution of a portfolio of related open source communities over time. As a case study, we explore the subprojects of the Apache project, one of the largest and most visible open source projects. We extract the community structure from the mailing list data, and study how the subcommunities evolve, and are interrelated over time. Our analysis leads us to propose the following hypotheses about the growth of open source communities: (1) communities add new developers by a process of preferential attachment; (2) links between existing communities are also subject to preferential attachment; (3) developers will migrate between communities together with other collaborators; and (4) information flow follows project dependencies. In particular, we are concerned with the underlying factors that motivate the migration between communities, such as information flow, co-worker ties, and project dependencies. }, keywords = {apache, COMMUNITY, core, developers, email, email archives, mailing list, membership}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_3}, attachments = {https://flosshub.org/sites/flosshub.org/files/Evolution\%20of\%20Open\%20Source\%20Communities.pdf}, author = {Weiss, Michael and Moroiu, Gabriella and Zhao, Ping} } @conference {D{\textquoteright}Ambros:2006:ERV:1137983.1137992, title = {The evolution radar: visualizing integrated logical coupling information}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {26{\textendash}32}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In software evolution research logical coupling has extensively been used to recover the hidden dependencies between source code artifacts. They would otherwise go lost because of the file-based nature of current versioning systems. Previous research has dealt with low-level couplings between files, leading to an explosion of data to be analyzed, or has abstracted the logical couplings to module level, leading to a loss of detailed information. In this paper we propose a visualization-based approach which integrates both file-level and module-level logical coupling information. This not only facilitates an in-depth analysis of the logical couplings at all granularity levels, it also leads to a precise characterization of the system modules in terms of their logical coupling dependencies.}, keywords = {change management, cvs, evolution, logical coupling, mozilla, scm, source code, thunderbird, tinderbox, visualization}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137992}, url = {http://doi.acm.org/10.1145/1137983.1137992}, attachments = {https://flosshub.org/sites/flosshub.org/files/26TheEvolutionRadar.pdf}, author = {D{\textquoteright}Ambros, Marco and Lanza, Michele and Lungu, Mircea} } @conference {Jiang:2006:EEC:1137983.1138030, title = {Examining the evolution of code comments in PostgreSQL}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {179{\textendash}180}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {It is common, especially in large software systems, for developers to change code without updating its associated comments due to their unfamiliarity with the code or due to time constraints. This is a potential problem since outdated comments may confuse or mislead developers who perform future development. Using data recovered from CVS, we study the evolution of code comments in the PostgreSQL project. Our study reveals that over time the percentage of commented functions remains constant except for early fluctuation due to the commenting style of a particular active developer.}, keywords = {code comments, comments, cvs, evolution, functions, maintenance, mining challenge, msr challenge, postgresql, software evolution, software maintenance, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138030}, url = {http://doi.acm.org/10.1145/1137983.1138030}, attachments = {https://flosshub.org/sites/flosshub.org/files/179ExaminingTheEvolution.pdf}, author = {Zhen Ming Jiang and Hassan, Ahmed E.} } @conference {Robles:2006:GLD:1137983.1138017, title = {Geographic location of developers at SourceForge}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {144{\textendash}150}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {The development of libre (free/open source) software is usually performed by geographically distributed teams. Participation in most cases is voluntary, sometimes sporadic, and often not framed by a pre-defined management structure. This means that anybody can contribute, and in principle no national origin has advantages over others, except for the differences in availability and quality of Internet connections and language. However, differences in participation across regions do exist, although there are little studies about them. In this paper we present some data which can be the basis for some of those studies. We have taken the database of users registered at SourceForge, the largest libre software development web-based platform, and have inferred their geographical locations. For this, we have applied several techniques and heuristics on the available data (mainly e-mail addresses and time zones), which are presented and discussed in detail. The results show a snapshot of the regional distribution of SourceForge users, which may be a good proxy of the actual distribution of libre software developers. In addition, the methodology may be of interest for similar studies in other domains, when the available data is similar (as is the case of mailing lists related to software projects).}, keywords = {distributed, email, email address, free software, geographical location, geography, libre software, mining software repositories, open source software, sourceforge, timezone}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138017}, url = {http://doi.acm.org/10.1145/1137983.1138017}, attachments = {https://flosshub.org/sites/flosshub.org/files/144GeographicLocation.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona} } @article {flosswp325, title = {Identifying Knowledge Brokers that Yield Software Engineering Knowledge in OSS Projects}, journal = {Information and Software Technology}, volume = {46}, year = {2006}, note = {Uses the Debian mailing lists "kde", "mentor", and "user". the collection period was from January 2001 to September 2004}, month = {11/2006}, pages = {1025-1033}, abstract = {Much research on open source software development concentrates on developer lists and other software repositories to investigate what motivates professional software developers to participate in open source software projects. Little attention has been paid to individuals who spend valuable time in lists helping participants on some mundane yet vital project activities. Using three Debian lists as a case study we investigate the impact of knowledge brokers and their associated activities in open source projects. Social network analysis was used to visualize how participants are affiliated with the lists. The network topology reveals substantial community participation. The consequence of collaborating in mundane activities for the success of open source software projects is discussed. The direct beneficiaries of this research are in the identification of knowledge experts in open source software projects.}, keywords = {debian, email, email archives, expertise, knowledge sharing, mailing list, project success, social network analysis}, doi = {10.1016/j.infsof.2005.12.019}, attachments = {https://flosshub.org/sites/flosshub.org/files/IST-Vol-48-11-2006.pdf}, author = {Sowe, Sulayman K. and Ioannis Stamelos and Lefteris Angelis} } @article {flosswp113, title = {The Impact of Ideology on Effectiveness in Open Source Software Development Teams}, journal = {MIS Quarterly}, volume = {30}, number = {2}, year = {2006}, note = {"...we collected data related to OSS projects hosted on Sourceforge (www.sourceforge.net)." "Data was collected using two surveys and from the Sourceforge website." }, month = {2006}, pages = {291-314}, abstract = {The emerging work on understanding open source software has argued for the importance of understanding what leads to effectiveness in OSS development teams and has pointed to the importance of ideology. This paper develops a framework of the OSS ideology (including specific norms, beliefs, and values) and a theoretical model to show how adherence to components of the ideology impact effectiveness in OSS teams. The model is based on the idea that ideology provides clan control, which is important in OSS development settings because OSS teams generally lack formal behavioral and outcome controls. The paper hypothesizes both direct effects of ideology on OSS team effectiveness and indirect effects via influences on affective trust, cognitive trust, and communication quality. Hypotheses are tested using survey and objective data on OSS projects. Four effectiveness measures are used to capture unique aspects of effectiveness in OSS including both the extent to which a team attracts input from the community and the team{\textquoteright}s success in accomplishing project outcomes. Results support the main thesis that OSS team members{\textquoteright} adherence to the tenets of the OSS community ideology enhances OSS team effectiveness. The study uncovers several differences in the importance of OSS norms, beliefs, and values to different kinds of OSS team effectiveness and discusses implications for theory and practice.}, keywords = {bug fixing, bug reports, bug tracking, communication, COMMUNITY, effectiveness, feature requests, ideology, metadata, sourceforge, Survey, team effort, team size, trust}, attachments = {https://flosshub.org/sites/flosshub.org/files/stewartgosain2.pdf}, author = {Stewart, K. and Gosain, S.} } @conference {Askari:2006:ITE:1137983.1138013, title = {Information theoretic evaluation of change prediction models for large-scale software}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {126{\textendash}132}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper, we analyze the data extracted from several open source software repositories. We observe that the change data follows a Zipf distribution. Based on the extracted data, we then develop three probabilistic models to predict which files will have changes or bugs. The first model is Maximum Likelihood Estimation (MLE), which simply counts the number of events, i.e., changes or bugs, that happen to each file and normalizes the counts to compute a probability distribution. The second model is Reflexive Exponential Decay (RED) in which we postulate that the predictive rate of modification in a file is incremented by any modification to that file and decays exponentially. The third model is called RED-Co-Change. With each modification to a given file, the RED-Co-Change model not only increments its predictive rate, but also increments the rate for other files that are related to the given file through previous co-changes. We then present an information-theoretic approach to evaluate the performance of different prediction models. In this approach, the closeness of model distribution to the actual unknown probability distribution of the system is measured using cross entropy. We evaluate our prediction models empirically using the proposed information-theoretic approach for six large open source systems. Based on this evaluation, we observe that of our three prediction models, the RED-Co-Change model predicts the distribution that is closest to the actual distribution for all the studied systems.}, keywords = {bugs, change analysis, cvs, evaluation approach, file, freebsd, information theory, kde, koffice, log files, netbsd, openbsd, postgresql, prediction, prediction models, scm, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138013}, url = {http://doi.acm.org/10.1145/1137983.1138013}, attachments = {https://flosshub.org/sites/flosshub.org/files/126InformationTheoretic.pdf}, author = {Askari, Mina and Holt, Ric} } @conference {1094, title = {Knowledge Reuse in Open Source Software: An Exploratory Study of 15 Open Source Projects}, booktitle = {Proceedings of the 38th Annual Hawaii International Conference on System Sciences}, year = {2006}, note = {"In a first step, we asked developers of different open source projects to respond to a very short web-based survey."... "In a second step, we started with gathering data from 15 projects, including interviews. In parallel, the source code, CVS comments and to a certain extent email communication was analyzed to receive a dynamic, and within the limits of the method, complete picture of knowledge reuse practices." "Projects included games (Adonthell, FlightGear, Xboard), text processing (Abiword), a GNU/Linux desktop (Xfce4), an instant messenger client (Miranda), fax software (HylaFAX), a content management system (Tiki/CMS Groupware), encryption software (OpenSSL), a collaborative music system (iRATE Radio), file sharing networks (GNUnet, Mnet, Freenet), a mailing list manager (Mailman), and an mp3 encoder (Lame)." "Our data sources included interviews with key developers, source code, CVS comments, mailing lists and various Internet resources"}, month = {2006}, pages = {1-10}, publisher = {IEEE}, organization = {IEEE}, address = {Big Island, HI, USA}, abstract = {To date, there is no investigation of knowledge reuse in open source software projects. This paper focuses on the forms of knowledge reuse and the factors impacting on them. It develops a theory drawn from data of 15 open source software projects and finds that the effort to search, integrate and maintain external knowledge influences the form of knowledge to be reused. Implications for firms and innovation research are discussed.}, keywords = {cvs, email, knowledge reuse, lines of code, loc, source code, Survey}, doi = {10.1109/HICSS.2005.378}, url = {http://www.computer.org/csdl/proceedings/hicss/2005/2268/07/22680198b-abs.html}, author = {von Krogh, G. and Spaeth, S. and Haefliger, S.} } @conference {Kim:2006:MPE:1137983.1137995, title = {Micro pattern evolution}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {40{\textendash}46}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {When analyzing the evolution history of a software project, we wish to develop results that generalize across projects. One approach is to analyze design patterns, permitting characteristics of the evolution to be associated with patterns, instead of source code. Traditional design patterns are generally not amenable to reliable automatic extraction from source code, yet automation is crucial for scalable evolution analysis. Instead, we analyze {\textquotedblleft}micro pattern{\textquotedblright} evolution; patterns whose abstraction level is closer to source code, and designed to be automatically extractable from Java source code or bytecode. We perform micro-pattern evolution analysis on three open source projects, ArgoUML, Columba, and jEdit to identify micro pattern frequencies, common kinds of pattern evolution, and bug-prone patterns. In all analyzed projects, we found that the micro patterns of Java classes do not change often. Common bug- prone pattern evolution kinds are {\textquoteleft}Pool {\textrightarrow} Pool{\textquoteright}, {\textquoteleft}Implementor {\textrightarrow} NONE{\textquoteright}, and {\textquoteleft}Sampler {\textrightarrow} Sampler{\textquoteright}. Among all pattern evolution kinds,{\textquoteleft}Box{\textquoteright},{\textquoteleft}CompoundBox{\textquoteright}, {\textquoteleft}Pool{\textquoteright}, {\textquoteleft}CommonState{\textquoteright}, and {\textquoteleft}Outline{\textquoteright} micro patterns have high bug rates, but they have low frequencies and a small number of changes. The pattern evolution kinds that are bug-prone are somewhat similar across projects. The bug-prone pattern evolution kinds of two different periods of the same project are almost identical.}, keywords = {argouml, bugs, columba, design patterns, evolution, extraction, java, jedit, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137995}, url = {http://doi.acm.org/10.1145/1137983.1137995}, attachments = {https://flosshub.org/sites/flosshub.org/files/40MicroPattern.pdf}, author = {Kim, Sunghun and Pan, Kai and Whitehead,Jr., E. James} } @conference {Zimmermann:2006:MAM:1137983.1138025, title = {Mining additions of method calls in ArgoUML}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {169{\textendash}170}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper we refine the classical co-change to the addition of method calls. We use this concept to find usage patterns and to identify cross-cutting concerns for ArgoUML.}, keywords = {argouml, change analysis, eclipse, function calls, mining challenge, msr challenge, pattern, source code, xelopes}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138025}, url = {http://doi.acm.org/10.1145/1137983.1138025}, attachments = {https://flosshub.org/sites/flosshub.org/files/169MiningAdditions.pdf}, author = {Zimmermann, Thomas and Breu, Silvia and Lindig, Christian and Livshits, Benjamin} } @conference {Breu:2006:MEC:1137983.1138006, title = {Mining eclipse for cross-cutting concerns}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {94{\textendash}97}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software may contain functionality that does not align with its architecture. Such cross-cutting concerns do not exist from the beginning but emerge over time. By analysing where developers add code to a program, our history-based mining identifies cross-cutting concerns in a two-step process. First, we mine CVS archives for sets of methods where a call to a specific single method was added. In a second step, such simple cross-cutting concerns are combined to complex cross-cutting concerns. To compute these efficiently, we apply formal concept analysis{\textemdash}an algebraic theory. History-based mining scales well: we are the first to report aspects mined from an industrial-sized project like Eclipse. For example, we identified a locking concern that crosscuts 1284 methods.}, keywords = {aspects, concept analysis, cvs, eclipse, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138006}, url = {http://doi.acm.org/10.1145/1137983.1138006}, attachments = {https://flosshub.org/sites/flosshub.org/files/94MiningEclipse.pdf}, author = {Breu, Silvia and Zimmermann, Thomas and Lindig, Christian} } @conference {Bird:2006:MES:1137983.1138016, title = {Mining email social networks}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {137{\textendash}143}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Communication \& Co-ordination activities are central to large software projects, but are difficult to observe and study in traditional (closed-source, commercial) settings because of the prevalence of informal, direct communication modes. OSS projects, on the other hand, use the internet as the communication medium,and typically conduct discussions in an open, public manner. As a result, the email archives of OSS projects provide a useful trace of the communication and co-ordination activities of the participants. However, there are various challenges that must be addressed before this data can be effectively mined. Once this is done, we can construct social networks of email correspondents, and begin to address some interesting questions. These include questions relating to participation in the email; the social status of different types of OSS participants; the relationship of email activity and commit activity (in the CVS repositories) and the relationship of social status with commit activity. In this paper, we begin with a discussion of our infrastructure (including a novel use of Scientific Workflow software) and then discuss our approach to mining the email archives; and finally we present some preliminary results from our data analysis.}, keywords = {communication, contributions, developers, email, email archives, mailing lists, open source, social networks}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138016}, url = {http://doi.acm.org/10.1145/1137983.1138016}, attachments = {https://flosshub.org/sites/flosshub.org/files/137MiningEmail.pdf}, author = {Christian Bird and Gourley, Alex and Devanbu, Prem and Gertz, Michael and Swaminathan, Anand} } @conference {Bird:2006:MES:1137983.1138033, title = {Mining email social networks in Postgres}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {185{\textendash}186}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Open Source Software (OSS) projects provide a unique opportunity to gather and analyze publicly available historical data. The Postgres SQL server, for example, has over seven years of recorded development and communication activity. We mined data from both the source code repository and the mailing list archives to examine the relationship between communication and development in Postgres. Along the way, we had to deal with the difficult challenge of resolving email aliases. We used a number of social network analysis measures and statistical techniques to analyze this data. We present our findings in this paper.}, keywords = {developers, email, email archives, open source, postgresql, scm, social network analysis, social networks, source code, status}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138033}, url = {http://doi.acm.org/10.1145/1137983.1138033}, attachments = {https://flosshub.org/sites/flosshub.org/files/185MiningEmail.pdf}, author = {Christian Bird and Gourley, Alex and Devanbu, Prem and Gertz, Michael and Swaminathan, Anand} } @conference {Robles:2006:MLS:1137983.1137986, title = {Mining large software compilations over time: another perspective of software evolution}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {3{\textendash}9}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {With the success of libre (free, open source) software, a new type of software compilation has become increasingly common. Such compilations, often referred to as {\textquoteright}distributions{\textquoteright}, group hundreds, if not thousands, of software applications and libraries written by independent parties into an integrated system. Software compilations raise a number of questions that have not been targeted so far by software evolution, which usually focuses on the evolution of single applications. Undoubtedly, the challenges that software compilations face differ from those found in single software applications. Nevertheless, it can be assumed that both, the evolution of applications and that of software compilations, have similarities and dependencies.In this sense, we identify a dichotomy, common to that in economics, of software evolution in the small (micro-evolution) and in the large (macro-evolution). The goal of this paper is to study the evolution of a large software compilation, mining the publicly available repository of a well-known Linux distribution, Debian. We will therefore investigate changes related to hundreds of millions of lines of code over seven years. The aspects that will be covered in this paper are size (in terms of number of packages and of number of lines of code), use of programming languages, maintenance of packages and file sizes.}, keywords = {debian, distributions, evolution, large software collections, lines of code, loc, metrics, mining software repositories, size, sloc, sloccount, software evolution, software integrators}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137986}, url = {http://doi.acm.org/10.1145/1137983.1137986}, attachments = {https://flosshub.org/sites/flosshub.org/files/3miningLarge.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona and Martin Michlmayr and Amor, Juan Jose} } @conference {WeiBgerber:2006:MRA:1137983.1138028, title = {Mining refactorings in ARGOUML}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, note = {"To see if refactorings in ARGOUML have an effect on the occurrence of new bugs and on communication between the developers, we relate the refactorings to bug reports in ISSUEZILLA respectively to mails on the developer mailing list. "}, pages = {175{\textendash}176}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper we combine the results of our refactoring reconstruc- tion technique with bug, mail and release information to perform process and bug analyses of the ARGOUML CVS archive.}, keywords = {argouml, bug tracking, bugs, cvs, email, evolution, mining challenge, msr challenge, re-engineering, refactoring, release history}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138028}, url = {http://doi.acm.org/10.1145/1137983.1138028}, attachments = {https://flosshub.org/sites/flosshub.org/files/175MiningRefactorings.pdf}, author = {Wei{\ss}gerber, Peter and Diehl, Stephan and G{\"o}rg, Carsten} } @conference {Voinea:2006:MSR:1137983.1138024, title = {Mining software repositories with CVSgrab}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {167{\textendash}168}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, keywords = {argouml, cvs, cvsgrab, evolution, mining challenge, msr challenge, postgresql, software visualization, source code, team, visualization}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138024}, url = {http://doi.acm.org/10.1145/1137983.1138024}, attachments = {https://flosshub.org/sites/flosshub.org/files/167MiningSoftware.pdf}, author = {Voinea, Lucian and Telea, Alexandru} } @article {1124, title = {Motivation, Governance, and the Viability of Hybrid Forms in Open Source Software Development}, journal = {Management Science}, volume = {52}, year = {2006}, note = {"Data from three primary sources informed this research (Table 1). All data were collected in 2001{\textendash}2002." Online project documentation: All publicly available project descriptions, charters, bylaws, meeting minutes, etc. Mailing lists: Over 2,000 messages read over a three-month period prior to interviews Interviews "Ireadallpostingstoproject-specific and general mailing lists for both communities for a three-month period preceding the interviews. Over 2,000 messages were posted during this period. Read- ing these messages allowed me to gain familiarity with the technology..."}, month = {07/2006}, pages = {1000 - 1014}, abstract = {Open source software projects rely on the voluntary efforts of thousands of software developers, yet we know little about why developers choose to participate in this collective development process. This paper inductively derives a framework for understanding participation from the perspective of the individual software developer based on data from two software communities with different governance structures. In both communities, a need for software-related improvements drives initial participation. The majority of participants leave the community once their needs are met, however, a small subset remains involved. For this set of developers, motives evolve over time and participation becomes a hobby. These hobbyists are critical to the long-term viability of the software code: They take on tasks that might otherwise go undone and work to maintain the simplicity and modularity of the code. Governance structures affect this evolution of motives. Implications for firms interested in implementing hybrid strategies designed to combine the advantages of open source software development with proprietary ownership and control are discussed.}, keywords = {email, email archives, governance, INNOVATION, interview, mailing list, MOTIVATION, open source software development, Volunteers}, issn = {1526-5501}, doi = {10.1287/mnsc.1060.0553}, url = {http://faculty.washington.edu/skshah/Shah\%20-\%20Motivation,\%20Governance,\%20Hybrid\%20Forms.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/Shah\%20-\%20Motivation\%2C\%20Governance\%2C\%20Hybrid\%20Forms.pdf}, author = {Shah, Sonali K.} } @article {895, title = {Multi-Modal Modeling, Analysis and Validation of Open Source Software Development Processes}, journal = {Intern. J. Internet Technology and Web Engineering}, volume = {1}, number = {3}, year = {2006}, pages = {49-63}, abstract = {Understanding the context, structure, activities, and content of software development processes found in practice has been and remains a challenging problem. In the world of free/open source software development, discovering and understanding what processes are used in particular projects is important in determining how they are similar to or different from those advocated by the software engineering community. Prior studies have revealed that development processes in F/OSSD projects are different in a number of ways. In this paper, we describe how a variety of modeling perspectives and techniques are used to elicit, analyze, and validate software development processes found in F/OSSD projects, with examples drawn from studies of the software requirements process found in the NetBeans.org project.}, keywords = {empirical studies of software engineering, open source software development, process modeling, requirements processes, software process}, attachments = {https://flosshub.org/sites/flosshub.org/files/Scacchi-Jensen-Noll-Elliott-OSSC05.pdf}, author = {Walt Scacchi and Chris Jensen and Noll, J. and Elliott, M.} } @conference {Voinea:2006:OFC:1137983.1137993, title = {An open framework for CVS repository querying, analysis and visualization}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {33{\textendash}39}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {We present an open framework for visual mining of CVS software repositories. We address three aspects: data extraction, analysis and visualization. We first discuss the challenges of CVS data extraction and storage, and propose a flexible way to deal with CVS implementation inconsistencies. We next present a new technique to enrich the raw data with information about artifacts showing similar evolution. Finally, we propose a visualization backend and show its applicability on industry-size repositories.}, keywords = {argouml, cvs, cvsgrab, evolution visualization, postgresql, software visualization}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137993}, url = {http://doi.acm.org/10.1145/1137983.1137993}, author = {Voinea, Lucian and Telea, Alexandru} } @article {2006, title = {Opportunities and Challenges Applying Functional Data Analysis to the Study of Open Source Software Evolution}, journal = {Statistical Science}, volume = {21}, number = {2}, year = {2006}, note = {"As part of a larger project, data were collected on 105 OSS projects hosted online at Sourceforge (sf.net)." "...we limited our data collection to projects that use only the Java programming language and were listed in the Internet and System Networking domains." "... only including these projects that use an OSI approved license..." "had to have posted at least one file on the Sourceforge site as of the time of our initial project selection Fall 2002" "Data were collected on the published release history of each project thatmet the screening criteria. Each release of each project was analyzed to calculate CplXLCoh. The size of each release was measured using a calculation of the number of lines of code (LOC)"}, pages = {167-178}, publisher = {Institute of Mathematical Statistics}, abstract = {This paper explores the application of functional data analysis (FDA) as a means to study the dynamics of software evolution in the open source context. Several challenges in analyzing the data from software projects are discussed, an approach to overcoming those challenges is described, and preliminary results from the analysis of a sample of open source software (OSS) projects are provided. The results demonstrate the utility of FDA for uncovering and categorizing multiple distinct patterns of evolution in the complexity of OSS projects. These results are promising in that they demonstrate some patterns in which the complexity of software decreased as the software grew in size, a particularly novel result. The paper reports preliminary explorations of factors that may be associated with decreasing complexity patterns in these projects. The paper concludes by describing several next steps for this research project as well as some questions for which more sophisticated analytical techniques may be needed.}, keywords = {complexity, evolution, fda, java, lines of code, loc, release history, scm, size, sourceforge}, issn = {08834237}, url = {http://www.jstor.org/stable/27645747}, author = {Stewart, Katherine J. and Darcy, David P. and Daniel, Sherae L.} } @article {1088, title = {Self-Organization Patterns in Wasp and Open Source Communities}, journal = {IEEE Intelligent Systems}, volume = {21}, year = {2006}, note = {"To investigate such claims, we studied an OSS community{\textquoteright}s social network from a dataset describing the email activity of 120 different software teams" "Our test data originated from Sourceforge (http://sourceforge.net), a large open source project repository, and included communi- ties ranging from very small networks with one or two members to large networks with thousands of members." "we limited our consideration to email traffic associated with bug fixes and bug reporting. As other researchers have shown[5] this email subset allows an effective reconstruction of the software community{\textquoteright}s social network." "We thank Kevin Crowston and James Howison for making their software data publicly available."}, month = {03/2006}, pages = {36 - 40}, abstract = {In this paper, we conducted a comparative study of how social organization takes place in a wasp colony and OSS developer communities. Both these systems display similar global organization patterns, such as hierarchies and clear labor divisions. As our analysis shows, both systems also define interacting agent networks with similar common features that reflect limited information sharing among agents. As far as we know, this is the first research study analyzing the patterns and functional significance of these systems{\textquoteright} weighted-interaction networks. By illuminating the extent to which self-organization is responsible for patterns such as hierarchical structure, we can gain insight into the origins of organization in OSS communities.}, keywords = {agents, decentralization, developers, email, email archives, flossmole, hierarchy, labor division, organization, self-organizing teams, social network analysis, social networks, sourceforge, teams, wasps}, issn = {1541-1672}, doi = {10.1109/MIS.2006.34}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.95.5574\&rep=rep1\&type=pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/valverde.pdf}, author = {Valverde, S. and Theraulaz, G. and Gautrais, J. and Fourcassie, V. and Sole, R.V.} } @article {1119, title = {Strategic Interaction and Knowledge Sharing in the KDE Developer Mailing List}, journal = {Management Science}, volume = {52}, year = {2006}, note = {"the threaded discussion from the K Desktop Environment (KDE) developer mailing list was used." March 15 to March 31, 2000. 128 threads selected. measures: (1) level of participation measured by number of postings made by indiv. devs in a thread (2) conversational interactivity (how long is a conversation) (3) cross-thread connectivity}, month = {07/2006}, pages = {1031 - 1042}, abstract = {In stark contrast with the fully participative "bazaar" imagery of open source software (OSS) development, some recent empirical research has pointed out that much of the OSS development is carried out by a small percentage of developers. This raises serious concerns that concentration of development effort on a few will limit knowledge sharing and underutilize the available resources. Using the notion of strategic interaction, this paper argues that individual developers often interact strategically with other highly resourceful developers by forming a smaller but better organized structure to intensify the types of epistemic interactions that matter most to the OSS development. A general framework of strategic interaction including participation inequality, conversational interactivity, and cross-thread connectivity is proposed to examine its impact on knowledge sharing, and validated using 128 discussion threads from the K Desktop Environment (KDE) developer mailing list. The findings indicate that strategic interaction has expanded knowledge sharing but with the caveat that extreme concentration of development could have an opposite effect. For researchers, this study dovetails the incentive logic by proposing and validating the strategic aspects of OSS participation to better understand the collective dynamics underpinning OSS development. Practitioners can use this approach to evaluate and better support existing knowledge-sharing initiatives.}, keywords = {developers, email, email archives, kde, knowledge collaboration, knowledge sharing, mailing list}, issn = {1526-5501}, doi = {10.1287/mnsc.1060.0551}, author = {Kuk, George} } @article {1122, title = {Understanding the Motivations, Participation, and Performance of Open Source Software Developers: A Longitudinal Study of the Apache Projects}, journal = {Management Science}, volume = {52}, year = {2006}, note = {"analyzing archival data collected from OSS project records over a period of four years" "Apart from the source and binary codes of the actual software programs, Apache products include developer websites, change logs, documentation, and developer communications in the form of e-mail archives. From these products, we extracted two types of information: each contributor{\textquoteright}s progression along the Apache career path, and each contributor{\textquoteright}s source code contributions to the project."}, month = {07/2006}, pages = {984 - 999}, abstract = {Understanding what motivates participation is a central theme in the research on open source software (OSS) development. Our study contributes by revealing how the different motivations of OSS developers are interrelated, how these motivations influence participation leading to performance, and how past performance influences subsequent motivations. Drawing on theories of intrinsic and extrinsic motivation, we develop a theoretical model relating the motivations, participation, and performance of OSS developers. We evaluate our model using survey and archival data collected from a longitudinal field study of software developers in the Apache projects. Our results reveal several important findings. First, we find that developers{\textquoteright} motivations are not independent but rather are related in complex ways. Being paid to contribute to Apache projects is positively related to developers{\textquoteright} status motivations but negatively related to their use-value motivations. Perhaps surprisingly, we find no evidence of diminished intrinsic motivation in the presence of extrinsic motivations; rather, status motivations enhance intrinsic motivations. Second, we find that different motivations have an impact on participation in different ways. Developers{\textquoteright} paid participation and status motivations lead to above-average contribution levels, but use-value motivations lead to below-average contribution levels, and intrinsic motivations do not significantly impact average contribution levels. Third, we find that developers{\textquoteright} contribution levels positively impact their performance rankings. Finally, our results suggest that past-performance rankings enhance developers{\textquoteright} subsequent status motivations.}, keywords = {apache, change logs, contributions, email, email archives, extrinsic motivation, intrinsic motivation, mailing lists, MOTIVATION, open source software, participation, software development performance, source code, status, Survey}, issn = {1526-5501}, doi = {10.1287/mnsc.1060.0554}, author = {Roberts, Jeffrey A. and Il-Horn Hann and Slaughter, Sandra A.} } @conference {German:2006:UEA:1137983.1138020, title = {Using evolutionary annotations from change logs to enhance program comprehension}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {159{\textendash}162}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Evolutionary annotations are descriptions of how source code evolves over time. Typical source comments, given their static nature, are usually inadequate for describing how a program has evolved over time; instead, source code comments are typically a description of what a program currently does. We propose the use of evolutionary annotations as a way of describing the rationale behind changes applied to a given program (for example "These lines were added to ..."). Evolutionary annotations can assist a software developer in the understanding of how a given portion of source code works by showing him how the source has evolved into its current form.In this paper we describe a method to automatically create evolutionary annotations from change logs, defect tracking systems and mailing lists. We describe the design of a prototype for Eclipse that can filter and present these annotations alongside their corresponding source code and in workbench views. We use Apache as a test case to demonstrate the feasibility of this approach.}, keywords = {annotations, apache, bug tracking, change history, eclipse, evolutionary, log files, mailing lists, mining software repositories, software evolution, version control}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138020}, url = {http://doi.acm.org/10.1145/1137983.1138020}, attachments = {https://flosshub.org/sites/flosshub.org/files/159UsingEvolutionary.pdf}, author = {Daniel M. German and Peter C. Rigby and Storey, Margaret-Anne} } @conference {Anvik:2006:FTB:1134285.1134336, title = {Who should fix this bug?}, booktitle = {Proceedings of the 28th international conference on Software engineering}, series = {ICSE {\textquoteright}06}, year = {2006}, pages = {361{\textendash}370}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Open source development projects typically support an open bug repository to which both developers and users can report bugs. The reports that appear in this repository must be triaged to determine if the report is one which requires attention and if it is, which developer will be assigned the responsibility of resolving the report. Large open source developments are burdened by the rate at which new bug reports appear in the bug repository. In this paper, we present a semi-automated approach intended to ease one part of this process, the assignment of reports to a developer. Our approach applies a machine learning algorithm to the open bug repository to learn the kinds of reports each developer resolves. When a new report arrives, the classifier produced by the machine learning technique suggests a small number of developers suitable to resolve the report. With this approach, we have reached precision levels of 57\% and 64\% on the Eclipse and Firefox development projects respectively. We have also applied our approach to the gcc open source development with less positive results. We describe the conditions under which the approach is applicable and also report on the lessons we learned about applying machine learning to repositories used in open source development.}, keywords = {bug fixing, bug report, bug report assignment, bug triage, eclipse, Firefox, gcc, issue tracking, machine learning, problem tracking}, isbn = {1-59593-375-1}, doi = {10.1145/1134285.1134336}, url = {http://doi.acm.org/10.1145/1134285.1134336}, author = {Anvik, John and Hiew, Lyndon and Murphy, Gail C.} } @conference {795, title = {The challenges of creating open source education software: the Gild experience}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {338-340}, keywords = {COMMUNITY, eclipse, learning environment, novice programmers, open source, programming environment}, url = {http://pascal.case.unibz.it/handle/2038/1539}, author = {Daniel M. German and Rigby, Peter and Cubranic, Davor and Storey, Margaret-Anne and Thomson, Suzanne} } @proceedings {89, title = {Coordination of Free/Libre Open Source Software development}, year = {2005}, note = {"The data used for the study were interactions on the main developer communication venue, either a developer mailing list or online forum" sourceforge }, address = {Las Vegas, NV, USA}, abstract = {The apparent success of free/libre open source software (FLOSS) development projects such as Linux, Apache, and many others has raised the question, what lessons from FLOSS development can be transferred to mainstream software development? In this paper, we use coordination theory to analyze coordination mechanisms in FLOSS development and compare our analysis with existing literature on coordination in proprietary software development. We examined developer interaction data from three active and successful FLOSS projects and used content analysis to identify the coordination mechanisms used by the participants. We found that there were similarities between the FLOSS groups and the reported practices of the proprietary project in the coordination mechanisms used to manage task-task dependencies. However, we found clear differences in the coordination mechanisms used to manage task-actor dependencies. While published descriptions of proprietary software development involved an elaborate system to locate the developer who owned the relevant piece of code, we found that {\textquotedblleft}self-assignment{\textquotedblright} was the most common mechanism across three FLOSS projects. This coordination mechanism is consistent with expectations for distributed and largely volunteer teams. We conclude by discussing whether these emergent practices can be usefully transferred to mainstream practice and indicating directions for future research.}, keywords = {case study, compiere, coordination, egroupware, email, email archives, FLOSS, gaim, mailing list}, attachments = {https://flosshub.org/sites/flosshub.org/files/CrowstonWeiLiEseryelHowison.pdf}, author = {Kevin Crowston and Kangning Wei and Li, Qing and Eseryel, U. Yeliz and Howison, James} } @conference {Robles:2005:DIM:1083142.1083162, title = {Developer identification methods for integrated data from various sources}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {106-110}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Studying a software project by mining data from a single repository has been a very active research field in software engineering during the last years. However, few efforts have been devoted to perform studies by integrating data from various repositories, with different kinds of information, which would, for instance, track the different activities of developers. One of the main problems of these multi-repository studies is the different identities that developers use when they interact with different tools in different contexts. This makes them appear as different entities when data is mined from different repositories (and in some cases, even from a single one). In this paper we propose an approach, based on the application of heuristics, to identify the many identities of developers in such cases, and a data structure for allowing both the anonymized distribution of information, and the tracking of identities for verification purposes. The methodology will be presented in general, and applied to the GNOME project as a case example. Privacy issues and partial merging with new data sources will also be considered and discussed.}, keywords = {anonymization, bug tracker, developers, email, email address, gnome, identity, mailing list, privacy, source code, version control}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083162}, url = {http://doi.acm.org/10.1145/1082983.1083162}, attachments = {https://flosshub.org/sites/flosshub.org/files/106DeveloperIdentification.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona} } @conference {Liu:2005:ESE:1062455.1062566, title = {Enriching software engineering courses with service-learning projects and the open-source approach}, booktitle = {Proceedings of the 27th international conference on Software engineering}, series = {ICSE {\textquoteright}05}, year = {2005}, pages = {613{\textendash}614}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Real-world software engineers deal with complex problem. Yet many software engineering courses do not involve projects of enough complexity to give students such experience. We sense that service-learning projects, while difficult to manage and sustain, can serve a crucial role in this regard. Through trials in a senior-level software engineering course, we discovered that the open-source approach works well to enable students to work on large, multiple-term service-learning projects. We developed GROw, a cross-term, cross-team educational software process to meet the challenges of adopting complex, real-world projects in one-term courses, and to sustain service learning.}, keywords = {education, pedagogical, service learning, software engineering education}, isbn = {1-58113-963-2}, doi = {10.1145/1062455.1062566}, url = {http://doi.acm.org/10.1145/1062455.1062566}, author = {Liu, Chang} } @conference {775, title = {eZPA portale e-government per enti locali}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {289-290}, abstract = {Tra i numerosi settori in cui Internet sta intervenendo con una forte carica di {\textquotedblleft}modernizzazione{\textquotedblright}, quello dell{\textquoteright}e- Government, inteso come possibilit{\`a} per tutti gli utenti, per tutti i cittadini, di gestire il rapporto con la Pubblica Amministrazione, anche on line, sta producendo gli effetti maggiormente innovativi. L{\textquoteright}occasione fornitaci dall{\textquoteright}amministrazione del Comune di Ozzano dell{\textquoteright}Emilia ha permesso di sviluppare l{\textquoteright}applicazione eZPA, scritta con software a codice sorgente aperto e rilasciata con licenza GNU-GPL (General Publish Licence), liberamente utilizzabile e modificabile. Il portale consente la fruibilit{\`a} telematica dei servizi comunali sia ai clienti esterni (cittadini e imprese), sia agli utenti interni (dipendenti e collaboratori comunali), attraverso una struttura di sportelli fisici/virtuali. In particolare, l{\textquoteright}utilizzo della metafora di comunicazione {\textquotedblleft}Evento della vita{\textquotedblright},[1] raccomandata dalle linee guida del Ministro per l{\textquoteright}Innovazione e le Tecnologie, ha ...}, keywords = {e-government, enti locali, INTERNET, open source, pubblica amministrazione}, url = {http://pascal.case.unibz.it/handle/2038/874}, author = {Colognesi, Cristina and Guandalini, Stefano} } @article {DBLP:journals/tse/Dinh-TrongB05, title = {The FreeBSD Project: A Replication Case Study of Open Source Development}, journal = {IEEE Trans. Software Eng.}, volume = {31}, number = {6}, year = {2005}, note = {" we obtained the necessary data from the [FreeBsd] CVS repository, the bug report database, and the e-mail archive. The CVS repository contains all of the code and related documentation that is committed to the project from 1993 until the present. The bug report database contains information describing all reported problems, as well as the status (such as fixed, under test, or open) of each problem. Each bug report is called a PR and assigned a reference number. The e-mail archive contains every e-mail message exchanged between the developers since 1994." d/l: research.cs.queensu.ca/~ahmed/home/teaching/.../F06/.../free-bsd.pdf}, pages = {481-494}, abstract = {Case studies can help to validate claims that open source software development produces higher quality software at lower cost than traditional commercial development. One problem inherent in case studies is external validity{\textemdash}we do not know whether or not results from one case study apply to another development project. We gain or lose confidence in case study results when similar case studies are conducted on other projects. This case study of the FreeBSD project, a long-lived open source project, provides further understanding of open source development. The paper details a method for mining repositories and querying project participants to retrieve key process information. The FreeBSD development process is fairly well-defined with proscribed methods for determining developer responsibilities, dealing with enhancements and defects, and managing releases. Compared to the Apache project, FreeBSD uses 1) a smaller set of core developers{\textemdash}developers who control the code base{\textemdash}that implement a smaller percentage of the system, 2) a larger set of top developers to implement 80 percent of the system, and 3) a more well-defined testing process. FreeBSD and Apache have a similar ratio of core developers to people involved in adapting and debugging the system and people who report problems. Both systems have similar defect densities and the developers are also users in both systems.}, keywords = {apache, bug reports, contributors, core, cvs, defect density, developers, email, email archive, freebsd, mailing list, scm, source code, users}, doi = {10.1109/TSE.2005.73}, attachments = {https://flosshub.org/sites/flosshub.org/files/DinhTrungBieman.pdf}, author = {Trung T. Dinh-Trong and James M. Bieman} } @conference {786, title = {Future Development in the European Software Industry: Patentability of Computer Programs or Open Source Software?}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {311-313}, abstract = {Economic literature has treated the patent system as an indispensable tool to incentive inventive activity and to foster diffusion of technological improvements, but recent developments have brought at the center of the stage the Open Source phenomenon which is based on completely different mechanisms among which the free disclosure of the inventive steps. This work analyzes changes that are taking place into patent literature in order to give account of the desirability of patents in software sector. In addition some ideas on empirical analysis are put forward: the possibility of measuring the relationship between patents and input of innovation process in the software sector and the influence that private firms will have on Open Source developers motivations.}, keywords = {european, market segment, open source, os, patent, patent literature, patentability, software industry, software sector}, url = {http://hdl.handle.net/2038/966}, author = {Rentocchini, Francesco} } @conference {798, title = {An International Master Programme in Free Software in the European Higher Education Space}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {349-352}, abstract = {The Universitat Oberta de Catalunya (Open University of Catalonia, UOC) offers an International Master programme in Free Software. The first edition of this master programme began on November 2003 and there are about 240 students currently enrolled at the different specialities offered by the program. In this paper, the design, the methodology and the first few conclusions drawn from this higher education experience are discussed and summarized. After this master programme was changed to accomplish with European Higher Education Space (EHES).}, keywords = {education, free software, FS community, GNU/Linux, learning, master programme, software development, university}, url = {http://pascal.case.unibz.it/handle/2038/713}, author = {Meg{\'\i}as, David and Serra, Jordi and Macau, Rafael} } @conference {Antoniol:2005:LPC:1083142.1083156, title = {Linear predictive coding and cepstrum coefficients for mining time variant information from software repositories}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {74-78}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {This paper presents an approach to recover time variant information from software repositories. It is widely accepted that software evolves due to factors such as defect removal, market opportunity or adding new features. Software evolution details are stored in software repositories which often contain the changes history. On the other hand there is a lack of approaches, technologies and methods to efficiently extract and represent time dependent information. Disciplines such as signal and image processing or speech recognition adopt frequency domain representations to mitigate differences of signals evolving in time. Inspired by time-frequency duality, this paper proposes the use of Linear Predictive Coding (LPC) and Cepstrum coefficients to model time varying software artifact histories. LPC or Cepstrum allow obtaining very compact representations with linear complexity. These representations can be used to highlight components and artifacts evolved in the same way or with very similar evolution patterns. To assess the proposed approach we applied LPC and Cepstral analysis to 211 Linux kernel releases (i.e., from 1.0 to 1.3.100), to identify files with very similar size histories. The approach, the preliminary results and the lesson learned are presented in this paper.}, keywords = {change history, data mining, evolution, files, kernel, linear predictive coding, linux, lpc, size, software evolution, source code}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083156}, url = {http://doi.acm.org/10.1145/1082983.1083156}, attachments = {https://flosshub.org/sites/flosshub.org/files/74LinearPredictive.pdf}, author = {Antoniol, Giuliano and Rollo, Vincenzo Fabio and Venturi, Gabriele} } @conference {777, title = {Live! I-Learn @ Home}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {294-295}, abstract = {In this paper we present a live CD based on GNU/Linux (Knoppix), customized in order to supply a complete working and studying environment for the students of the Corso di Studi in Informatica. In particular, it supplies the Moodle course management system for e-learning complete of the courses of the first year. The aim is to enable the use of such resources off-line and without requiring special skills that will be achieved by studying but that newbies do not have.}, keywords = {e-learning, environment, GNU/Linux, java, open source, technology}, url = {http://pascal.case.unibz.it/handle/2038/977}, author = {Baldoni, Matteo and Baroglio, Cristina and Roversi, Luca and Grandi, Claudio} } @conference {Fischer:2005:MED:1083142.1083145, title = {Mining evolution data of a product family}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {12-16}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Diversification of software assets through changing requirements impose a constant challenge on the developers and maintainers of large software systems. Recent research has addressed the mining for data in software repositories of single products ranging from fine- to coarse grained analyses. But so far, little attention has been payed to mining data about the evolution of product families. In this work, we study the evolution and commonalities of three variants of the BSD (Berkeley Software Distribution), a large open source operating system. The research questions we tackle are concerned with how to generate high level views of the system discovering and indicating evolutionary highlights. To process the large amount of data, we extended our previously developed approach for storing release history information to support the analysis of product families. In a case study we apply our approach on data from three different code repositories representing about 8.5GB of data and 10 years of active development.}, keywords = {bsd, change analysis, change history, cvs, evolution, freebsd, netbsd, openbsd, release history, source code, text mining}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083145}, url = {http://doi.acm.org/10.1145/1082983.1083145}, attachments = {https://flosshub.org/sites/flosshub.org/files/12MiningEvolution.pdf}, author = {Fischer, Michael and Oberleitner, Johann and Ratzinger, Jacek and Gall, Harald} } @conference {793, title = {Open Source and IMS Learning Design: Building the Infrastructure for eLearning}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {329-333}, abstract = {The development of open, flexible eLearning specifications has significant implications for and interactions with the FOSS movement. A short overview of eLearning specifications is provided, focusing on the difference between SCORM and Learning Design (LD). The significance of LD for FOSS is examined, and common values identified. The particular contribution made by FOSS to LD infrastructure is discussed, and the importance of reference applications described. An overview is given of the FOSS applications available, divided into design time and run time, with particular reference to LD editors and the CopperCore Learning Design engine.}, keywords = {eLearning, eLearning specification, FOSS, infrastructure, Learning Design}, url = {http://pascal.case.unibz.it/handle/2038/1264}, author = {Griffiths, David and Blat, Josep and Elferink, Ray and Zondergeld, Sara} } @conference {Phadke:2005:PRM:1145319.1145337, title = {Predicting risky modules in open-source software for high-performance computing}, booktitle = {Proceedings of the second international workshop on Software engineering for high performance computing system applications}, series = {SE-HPCS {\textquoteright}05}, year = {2005}, pages = {60{\textendash}64}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {This paper presents the position that software-quality modeling of open-source software for high-performance computing can identify modules that have a high risk of bugs.Given the source code for a recent release, a model can predict which modules are likely to have bugs, based on data from past releases. If a user knows which software modules correspond to functionality of interest, then risks to operations become apparent. If the risks are too great, the user may prefer not to upgrade to the most recent release.Of course, such predictions are never perfect. After release, bugs are discovered. Some bugs are missed by the model, and some predicted errors do not occur. A successful model will be accurate enough for informed management action at the time of the predictions.As evidence for this position, this paper summarizes a case study of the Portable Extensible Toolkit for Scientific Computation (PETSC), which is a mathematical library for high-performance computing. Data was drawn from source-code and configuration management logs. The accuracy of logistic-regression and decision-tree models indicated that the methodology is promising. The case study also illustrated several modeling issues.}, keywords = {C4.5, decision trees, empirical case study, high performance computing, logistic regression, Open-source software, PETSc, software metrics, software quality model, software reliability}, isbn = {1-59593-117-1}, doi = {10.1145/1145319.1145337}, url = {http://doi.acm.org/10.1145/1145319.1145337}, author = {Phadke, Amit A. and Allen, Edward B.} } @conference {Hindle:2005:SFM:1083142.1083161, title = {SCQL: a formal model and a query language for source control repositories}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {100-104}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Source Control Repositories are used in most software projects to store revisions to source code files. These repositories operate at the file level and support multiple users. A generalized formal model of source control repositories is described herein. The model is a graph in which the different entities stored in the repository become vertices and their relationships become edges. We then define SCQL, a first order, and temporal logic based query language for source control repositories. We demonstrate how SCQL can be used to specify some questions and then evaluate them using the source control repositories of five different large software projects.}, keywords = {evolution, file, gnumeric, modperl, openssl, revision, samba, scm, source code}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083161}, url = {http://doi.acm.org/10.1145/1082983.1083161}, attachments = {https://flosshub.org/sites/flosshub.org/files/100scql.pdf}, author = {Hindle, Abram and Daniel M. German} } @conference {Twidale:2005:SBF:1062455.1062468, title = {Silver bullet or fool{\textquoteright}s gold: supporting usability in open source software development}, booktitle = {Proceedings of the 27th international conference on Software engineering}, series = {ICSE {\textquoteright}05}, year = {2005}, pages = {35{\textendash}35}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {At first glance it can look like Open Source Software development violates many, if not all, of the precepts of decades of careful research and teaching in Software Engineering. One could take a classic SE textbook and compare the activities elaborated and advocated in the various chapters with what is actually done in plain sight in the public logs of an OSS project in say SourceForge. For a Professor of Software Engineering this might make for rather depressing reading. Are the principles of SE being rendered obsolete? Has OSS really discovered Brooks{\textquoteright} Silver Bullet? Or is it just a flash in the pan or Fool{\textquoteright}s Gold.In this talk I will mainly look at one aspect of Open Source Development, the {\textquoteright}problem{\textquoteright} of creating usable interfaces, particularly for non-technical end-users. Any approach involves the challenge of how to coordinate distributed collaborative interface analysis and design, given that in conventional software development this is usually done in small teams and almost always face to face. Indeed all the methods in any HCI text just assume same-time same-place work and don{\textquoteright}t map to distributed work, let alone the looser mechanisms of OSS development. Instead what is needed is a form of participatory usability involving the coordination of end users and developers in a constantly evolving redesign process.}, keywords = {course project, education, lifecycle model, pedagogical, software engineering education, software process}, isbn = {1-58113-963-2}, doi = {10.1145/1062455.1062468}, url = {http://doi.acm.org/10.1145/1062455.1062468}, author = {Twidale, Michael} } @conference {717, title = {A Social Network Approach To Free/Open Source Software Simulation}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, note = {"Second, we use mailing list archives for three different projects. This gives us a more stringent network measure, since links are generated only when one developer communicates directly with another." "The mailing lists were the developers list from a well deployed database server, the general list for an text and file processing library, and the general list for a smaller F/OSS web browser."}, pages = {16-23}, abstract = {Free and Open Source Software (F/OSS) development is a complex process that is just beginning to be understood. The actual development process is frequently characterized as disparate volunteer developers collaborating to create a piece of software. The developers of F/OSS, like most software engineers, spend a significant portion of their time fostering collaboration through various channels social communication. We have analyzed several methods of communication; a social networking site, project mailing lists, and developer weblogs; to gain an understanding of the social network structure behind F/OSS projects. This social network data was used to create a model of F/OSS development that allows for multiple projects, users, and developers with varying goals and socialization methods. Using this model we have been able to replicate some of the known phenomena observed in F/OSS and provide a first step in the creation of a robust model of F/OSS. }, keywords = {email, mailing list, social network analysis}, doi = {10.1.1.178.4984}, url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.178.4984}, author = {Wagstrom, Patrick Adam and Herbsleb, James and Carley, Kathleen} } @article {springerlink:10.1007/s10606-005-9000-1, title = {Socialization in an Open Source Software Community: A Socio-Technical Analysis}, journal = {Computer Supported Cooperative Work (CSCW)}, volume = {14}, year = {2005}, note = {"I present the results of my analyses of participation patterns in a particular OSS project, used as a case study: Python. More precisely, I use the Open Source Project Browser to qualitatively track and analyze the trajectories of several project members who evolved (or not) into full-fledged participants. This allows me to later discuss how socialization proceeds in an OSS community such as Python" "Over the course of 2002, I progressively retrieved the entire email archive of python-dev (the developers{\textquoteright} mailing list) and the CVS source tree for the project. " "I read the entirety of the messages written by these participants and also looked at the software code they produced. There were striking similarities between their progressions over time, which I will de- scribe shortly. Overall the trajectory of these participants reflects successful socialization in Python: an evolution from newcomer to developer." http://www2.parc.com/csl/members/nicolas/documents/JCSCW-OSS.pdf}, pages = {323-368}, publisher = {Springer Netherlands}, abstract = {Open Source Software (OSS) development is often characterized as a fundamentally new way to develop software. Past analyses and discussions, however, have treated OSS projects and their organization mostly as a static phenomenon. Consequently, we do not know how these communities of software developers are sustained and reproduced over time through the progressive integration of new members. To shed light on this issue I report on my analyses of socialization in a particular OSS community. In particular, I document the relationships OSS newcomers develop over time with both the social and material aspects of a project. To do so, I combine two mutually informing activities: ethnography and the use of software specially designed to visualize and explore the interacting networks of human and material resources incorporated in the email and code databases of OSS. Socialization in this community is analyzed from two perspectives: as an individual learning process and as a political process. From these analyses it appears that successful participants progressively construct identities as software craftsmen, and that this process is punctuated by specific rites of passage. Successful participants also understand the political nature of software development and progressively enroll a network of human and material allies to support their efforts. I conclude by discussing how these results could inform the design of software to support socialization in OSS projects, as well as practical implications for the future of these projects.}, keywords = {cvs, developers, email, email archive, mailing list, open source project browser, participation, python, scm, source code, team, tools}, issn = {0925-9724}, url = {http://dx.doi.org/10.1007/s10606-005-9000-1}, author = {DUCHENEAUT, NICOLAS} } @conference {Fielding:2005:SAO:1062455.1062474, title = {Software architecture in an open source world}, booktitle = {Proceedings of the 27th international conference on Software engineering}, series = {ICSE {\textquoteright}05}, year = {2005}, pages = {43{\textendash}43}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In spite of the hype and hysteria surrounding open source software development, there is very little that can be said of open source in general. Open source projects range in scope from the miniscule, such as the thousands of non-maintained code dumps left behind at the end of class projects, dissertations, and failed commercial ventures, to the truly international, with thousands of developers collaborating, directly or indirectly, on a common platform. One characteristic that is shared by the largest and most successful open source projects, however, is a software architecture designed to promote anarchic collaboration through extensions while at the same time preserving centralized control over the interfaces. This talk features a survey of the state-of-the-practice in open source development in regards to software architecture, with particular emphasis on the modular extensibility interfaces within several of the most successful projects, including Apache httpd, Eclipse, Mozilla Firefox, Linux kernel, and the World Wide Web (which few people recognize as an open source project in itself). These projects fall under the general category of collaborative open source software development, which emphasizes community aspects of software engineering in order to compensate for the often-volunteer nature of core developers and take advantage of the scalability obtainable through Internet-based virtual organizations.}, keywords = {apache, collaborative open source development, eclipse, extensibility, Firefox, linux, linux kernel, loose coupling, modularity, mozilla, open source, software architecture}, isbn = {1-58113-963-2}, doi = {10.1145/1062455.1062474}, url = {http://doi.acm.org/10.1145/1062455.1062474}, author = {Roy T. Fielding} } @conference {Hawthorne:2005:SEE:1062455.1062581, title = {Software engineering education in the era of outsourcing, distributed development, and open source software: challenges and opportunities}, booktitle = {Proceedings of the 27th international conference on Software engineering}, series = {ICSE {\textquoteright}05}, year = {2005}, pages = {643{\textendash}644}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {As software development becomes increasingly globally distributed, and more software functions are delegated to common open source software (OSS) and commercial off-the-shelf (COTS) components, practicing software engineers face significant challenges for which current software engineering curricula may leave them inadequately prepared. A new multi-faceted distributed development model is emerging that effectively commoditizes many development activities once considered integral to software engineering, while simultaneously requiring practitioners to apply engineering principles in new and often unfamiliar contexts. We discuss the challenges that software engineers face as a direct result of outsourcing and other distributed development approaches that are increasingly being utilized by industry, and some of the key ways we need to evolve software engineering curricula to address these challenges.}, keywords = {computer science education, contextual learning, education, informatics, software engineering education}, isbn = {1-58113-963-2}, doi = {10.1145/1062455.1062581}, url = {http://doi.acm.org/10.1145/1062455.1062581}, author = {Hawthorne, Matthew J. and Perry, Dewayne E.} } @conference {899, title = {Stopping spyware at the gate: a user study of privacy, notice and spyware}, booktitle = {2005 Symposium on Usable Privacy and Security}, year = {2005}, month = {07/2005}, pages = {43-52}, publisher = {Association for Computing Machinery}, organization = {Association for Computing Machinery}, address = {Pittsburgh, PA}, keywords = {agreement,, and, Aspects,, Design,, end, EULA,, Experimentation,, Factors,, Human, Legal, license, notice,, of, privacy,, security, service,, spyware,, terms, ToS,, usability,, user}, isbn = {1-59593-178-3 }, author = {N. Good and Dhamija, R. and J. Grossklags and D. Thaw and Aronowitz, S. and D. Mulligan and J. Konstan} } @conference {782, title = {Towards Supporting Agile Practice Within The Libre Software Paradigm}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {303-304}, abstract = {Individual agile methods have never been practiced as defined, in the same way that Royce{\textquoteright}s waterfall [1] model never reflected actual practice. Instead, practitioners adapted the core principles of these processes in order to suit their needs. Understanding this is key to appreciating the agile mindset. What does exist is a set of principles1 which, when followed loosely, form the agile practices. It is an important part of the agile mentality that the individuals within a project are more important that the process they follow. However, the individual methods do have their own identifying features that make them unique; for example testing must be performed before coding within eXtreme Programming (XP) [2]. However, if practitioners were to apply XP, exactly as Beck describes it, then they are probably not {\textquotedblleft}doing agile{\textquotedblright} as they may not be following the process that suits their needs best. One of the interesting features of the XP method is its requirement of a collocated team. Th...}, keywords = {agile methods, agile practice, extreme programming, libre software, open source, XP}, url = {http://pascal.case.unibz.it/handle/2038/1546}, author = {Adams, Paul and Boldyreff, Cornelia} } @conference {797, title = {Transfering Libre Software Development Practices to the Production of Educational Resources: the Edukalibre Project}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {341-348}, abstract = {The transfer of methodologies common in libre (free, open source) sofware development to the domain of educational resources can radically change the way educational content is developed and used, enabling both educational practitioners and students to become actively involved in its creation and distribution. New software architectures and tools are needed to effectively support this process. This paper describes a platform aimed to support the creation of free, collaboratively constructed educational content on the web, which has been developed within the Edukalibre project. It provides easy access to core technologies: a control version system combined with conversion tools to produce several convenient formats for each document. Its modular architecture offers many different interfaces to the users. The Edukalibre platform is distributed as libre software.}, keywords = {educational resources, information systems, open source, software development practices}, url = {http://pascal.case.unibz.it/handle/2038/1548}, author = {Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Chris Tebb and Vania Dimitrova and Chaparro, Diego and Romera, Teo} } @conference {Neamtiu:2005:USC:1083142.1083143, title = {Understanding source code evolution using abstract syntax tree matching}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {2-6}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Mining software repositories at the source code level can provide a greater understanding of how software evolves. We present a tool for quickly comparing the source code of different versions of a C program. The approach is based on partial abstract syntax tree matching, and can track simple changes to global variables, types and functions. These changes can characterize aspects of software evolution useful for answering higher level questions. In particular, we consider how they could be used to inform the design of a dynamic software updating system. We report results based on measurements of various versions of popular open source programs, including BIND, OpenSSH, Apache, Vsftpd and the Linux kernel.}, keywords = {abstract syntax trees, apache, bind, evolution, linux, openssh, software evolution, source code, source code analysis, vsftpd}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083143}, url = {http://doi.acm.org/10.1145/1082983.1083143}, attachments = {https://flosshub.org/sites/flosshub.org/files/2Understanding.pdf}, author = {Neamtiu, Iulian and Foster, Jeffrey S. and Hicks, Michael} } @conference {Kim:2005:UCG:1083142.1083146, title = {Using a clone genealogy extractor for understanding and supporting evolution of code clones}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {17-23}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Programmers often create similar code snippets or reuse existing code snippets by copying and pasting. Code clones {\textemdash}syntactically and semantically similar code snippets{\textemdash}can cause problems during software maintenance because programmers may need to locate code clones and change them consistently. In this work, we investigate (1) how code clones evolve, (2) how many code clones impose maintenance challenges, and (3) what kind of tool or engineering process would be useful for maintaining code clones. Based on a formal definition of clone evolution, we built a clone genealogy tool that automatically extracts the history of code clones from a source code repository (CVS). Our clone genealogy tool enables several analyses that reveal evolutionary characteristics of code clones. Our initial results suggest that aggressive refactoring may not be the best solution for all code clones; thus, we propose alternative tool solutions that assist in maintaining code clones using clone genealogy information.}, keywords = {clone, clone detection, cvs, developers, evolution, maintenance, refactoring, source code}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083146}, url = {http://doi.acm.org/10.1145/1082983.1083146}, attachments = {https://flosshub.org/sites/flosshub.org/files/17Using.pdf}, author = {Kim, Miryung and Notkin, David} } @conference {799, title = {What is the significance of Open Source Software for the education and training community?}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {353-358}, abstract = {There is growing interest in the education community in the development and use of Open Source Software. This paper explains the ideas behind OSS and how what its significance is for education. It examines regional and national initiatives to promote the use of OSS. The paper considers the advantages of OSS and potential problems. It goes on to look at the idea of open content and the sharing of learning content, and provides short case studies. The paper concludes that the major significance of OSS and open content is in the new social relations engendered and in fostering innovation in e-learning, both in application development and new pedagogies.}, keywords = {economic issues, education, open source, pedagocic impact, training}, url = {http://pascal.case.unibz.it/handle/2038/1563}, author = {Attwell, Graham} } @conference {Sliwerski:2005:CIF:1083142.1083147, title = {When do changes induce fixes?}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {24-28}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {As a software system evolves, programmers make changes that sometimes cause problems. We analyze CVS archives for fix-inducing changes{\textemdash}changes that lead to problems, indicated by fixes. We show how to automatically locate fix-inducing changes by linking a version archive (such as CVS) to a bug database (such as BUGZILLA). In a first investigation of the MOZILLA and ECLIPSE history, it turns out that fix-inducing changes show distinct patterns with respect to their size and the day of week they were applied.}, keywords = {bugzilla, change analysis, cvs, eclipse, evolution, mozilla}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083147}, url = {http://doi.acm.org/10.1145/1082983.1083147}, attachments = {https://flosshub.org/sites/flosshub.org/files/24WhenDoChanges.pdf}, author = {Sliwerski, J and Zimmermann, Thomas and Zeller, Andreas} } @conference {Xinyi04fourinteresting, title = {Four Interesting Ways in Which History Can Teach Us About Software}, booktitle = {Proceedings of the 2004 international workshop on Mining software repositories - MSR {\textquoteright}04}, year = {2004}, month = {05/2004}, pages = {58-62}, abstract = {In this position paper, we outline four kinds of studies that we have undertaken in trying to understand various aspects of a software system{\textquoteright}s evolutionary history. In each instance, the studies have involved detailed examination of real software systems based on {\textquotedblleft}facts{\textquotedblright} extracted from various kinds of source artifact repositories, as well as the development of accompanying tools to aid in the extraction, abstraction, and comprehension processes. We briefly discuss the goals, results, and methodology of each approach.}, keywords = {ant, apache, change analysis, clone, clone detection, cvs, evolution, gcc, growth, kepler, linux, midworld, mycore, postgresql, source code, version control}, attachments = {https://flosshub.org/sites/flosshub.org/files/58FourInterestingWays.pdf}, author = {Michael Godfrey and Xinyi Dong and Cory Kapser and Lijie Zou} } @conference {1482, title = {Group awareness in distributed software development}, booktitle = {2004 ACM conference on Computer supported cooperative work - CSCW {\textquoteright}04}, year = {2004}, pages = {72-81}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {Open-source software development projects are almost always collaborative and distributed. Despite the difficulties imposed by distance, these projects have managed to produce large, complex, and successful systems. However, there is still little known about how open-source teams manage their collaboration. In this paper we look at one aspect of this issue: how distributed developers maintain group awareness. We interviewed developers, read project communication, and looked at project artifacts from three successful open source projects. We found that distributed developers do need to maintain awareness of one another, and that they maintain both a general awareness of the entire team and more detailed knowledge of people that they plan to work with. Although there are several sources of information, this awareness is maintained primarily through text-based communication (mailing lists and chat systems). These textual channels have several characteristics that help to support the maintenance of awareness, as long as developers are committed to reading the lists and to making their project communication public.}, keywords = {email, email archive, mailing list}, isbn = {1581138105}, doi = {10.1145/1031607.1031621}, author = {Schneider, Kevin and Gutwin, Carl and Penner, Reagan} } @proceedings {1186, title = {How can Academic Software Research and Open Source Software Development help each other?}, year = {2004}, pages = {5-8}, abstract = {In this paper we discuss a few issues faced in coordinating, managing and implementing academic software research projects and suggest how some of these issues can be addressed by adopting tools and processes from Open Source Software Development. At the same time we also discuss how a few issues in Open Source Software Development (OSSD) projects can be addressed by adopting processes from Academic Software Research.}, keywords = {academic, education}, attachments = {https://flosshub.org/sites/flosshub.org/files/ambati6-9.pdf}, author = {Ambati, V. and Kishore, S.P.} } @proceedings {124, title = {Membership dynamics and network stability in the open-source community: the ising perspective}, year = {2004}, note = {"simulations with the empirical network data that were collected from two actual OSS communities, Linux and Hypermail." "we initially downloaded nearly 100,000 archived (between 1997 and 2003) LINUX Kernel and Hypermail newsgroup messages posted in a UNIX mailbox format" "Specific information was obtained regarding the characteristics of these two OSS communities, including the size, the number of average connections per participant, and the hierarchy of each community."}, abstract = {In this paper, we address the following two questions: (1)How does a participant{\textquoteright}s membership decision affect the others (neighbors) with whom he has collaborated over an extended period of time in an open source software (OSS) network? (2) To what extent do network characteristics (i.e, size and connectivity) mediate the impact of external factors on the OSS participants{\textquoteright} dynamic membership decisions and hence the stability of the network? From the Ising perspective, we present fresh theoretical insight into the dynamic and reciprocal membership relations between OSS participants. We also performed simulations based on empirical data that were collected from two actual OSS communities. Some of the key findings include that (1) membership herding is highly present when the external force is weak, but decreases significantly when the force increases, (2) the propensity for membership herding is most likely to be seen in a large network with a random connectivity, and (3) for large networks, at low external force a random connectivity will perform better than a scale-free counterpart in terms of the network strength. However, as the temperature (external force) increases, the reverse phenomenon is observed. In addition, the scale-free connectivity appears to be less volatile than with the random connectivity in response to the increase in the temperature. We conclude with several implications that may be of significance to OSS stakeholders.}, keywords = {email, email archive, hypermail, linux, mailing list, membership, membership herding, newsgroup, open source, participants, social network analysis, stakeholders, team size}, attachments = {https://flosshub.org/sites/flosshub.org/files/OhJeon.pdf}, author = {Oh, Wonseok and Jeon, Sangyong} } @conference {Germ04b, title = {Mining CVS repositories, the softChange experience}, booktitle = {Proc. Int{\textquoteright}l Workshop on Mining Software Repositories ({MSR})}, year = {2004}, note = {"Mailing lists. Mailing lists are an important source of information about the evolution of the project. We currently correlate MRs [modification requests] to mail messages by using the author and the date attributes of both the MR and the message."}, pages = {17{\textendash}21}, abstract = {CVS logs are a rich source of software trails (information left behind by the contributors to the development process, usually in the forms of logs). This paper describes how softChange extracts these trails, and enhances them. This paper also addresses some challenges that CVS fact extraction poses to researchers.}, keywords = {bugzilla, cvs, email archives, log files, logs, softchange}, attachments = {https://flosshub.org/sites/flosshub.org/files/17MiningCVS.pdf}, author = {German, Daniel} } @article {1077, title = {Profiling an Open Source Project Ecology and Its Programmers}, journal = {Electronic Markets}, volume = {14}, year = {2004}, month = {6/2004}, pages = {77 - 88}, abstract = {While many successful and well-known open source projects produce output of high quality, a general assessment of this development paradigm is still missing. In this paper, an online community of both small and large, successful and failed projects and their programmers is analysed mainly using the version-control data of each project, also according to their productivity and estimation of expended effort. As the results show, there are indeed significant differences between this cooperative development model and the commercial organization of work in the areas explored. Both open source software projects in their size and their programmers{\textquoteright} effort differ significantly, and the evolution of projects{\textquoteright} size over time seems in part to contradict the laws of software evolution proposed for commercial systems. Both the inequality of effort distribution between programmers and an increasing number of developers in a project do not lead to a decrease in productivity, opposing Brooks{\textquoteright}s Law. Effort estimation based on the COCOMO model for commercial organizations shows a large amount of effort expended for the projects, while a more general Norden-Rayleigh modeling shows a distinctly smaller expenditure. This proposes that either a highly efficient development is achieved by this self-organizing cooperative and highly decentralized form of work, or that the participation of users besides programming tasks is enormous and constitutes an economic factor of large proportions. }, keywords = {affiliation network, brooks law, cocomo, effort estimation, evolution, productivity, project success, scm, size, time, version control}, issn = {1422-8890}, doi = {10.1080/10196780410001675031}, author = {Koch, Stefan} } @conference {Gasser04researchinfrastructure, title = {Research Infrastructure for Empirical Science of F/OSS}, booktitle = {Proc. Intern. Workshop on Mining Software Repositories}, year = {2004}, pages = {12-16}, abstract = {F/OSS research faces a new and unusual situation: the traditional difficulties of gathering enough empirical data have been replaced by issues of dealing with enormous amounts of freely available data from many disparate sources (forums, code, bug reports, etc.) At present no means exist for assembling these data under common access points and frameworks for comparative, longitudinal, and collaborative research. Gathering and maintaining large F/OSS data collections reliably and making them usable present several research challenges. For example, current projects usually rely on {\textquotedblleft}web scraping{\textquotedblright} or on direct access to raw data from groups that generate it, and both of these methods require unique effort for each new corpus, or even for updating existing corpora. In this paper we identify several common needs and critical factors in F/OSS empirical research, and suggest orientations and recommendations for the design of a shared research infrastructure.}, keywords = {data, Data Collection, empirical, infrastructure}, attachments = {https://flosshub.org/sites/flosshub.org/files/12ResearchInfrastructure.pdf}, author = {Gasser, Les and Gabriel Ripoche and Sandusky, Robert J.} } @proceedings {1199, title = {Version Control Tools: A Collaborative Vehicle for Learning in F/OS}, year = {2004}, pages = {87-91}, abstract = {In this paper we explore how version control software participates in learning within free/open source activities (F/OS). We see F/OS in terms of a product, and a community of people engaged in the process of its development, with version control software at the centre of all three activities as they learn and innovate. Learning is analysed through the perspective provided by Bateson{\textquoteright}s Levels of Learning, a relational model that stresses collaboration and conflict as drivers of learning and showing how conflict resolution may lead to higher and more profound or significant learning.}, keywords = {education}, attachments = {https://flosshub.org/sites/flosshub.org/files/shaikh88-92.pdf}, author = {Shaikh, M. and Cornford, T.} } @proceedings {1175, title = {Adopting Open-Source Software Engineering in Computer Science Education}, year = {2003}, pages = {85-89}, keywords = {education}, attachments = {https://flosshub.org/sites/flosshub.org/files/85-89.pdf}, author = {Liu, C.} } @conference {German03automatingthe, title = {Automating the measurement of open source projects}, booktitle = {Proceedings of the 3rd Workshop on Open Source Software Engineering}, year = {2003}, note = {"We have chosen to use Evolution to illustrate some of the capabilities of SoftChange. We focus on the data provided by CVS logs and the CVS commit mailing list. Our data includes changes to the CVS repository from April 1998 to January 2003."}, pages = {63{\textendash}67}, abstract = {The proliferation of open source projects raises a number of vital economic, social, and software engineering questions that are subject of intense research. Based on experience analyzing numerous open source and commercial projects we propose a set of tools to support extraction and validation of software project data. Such tools would streamline empirical investigation of open source projects and make it possible to test existing and new theories about the nature of open source projects. Our soft- ware includes tools to extract and summarize information from mailing lists, CVS logs, ChangeLog files, and defect tracking databases. More importantly, it cross-links records from various data sources and identifies all contributors for a software change. We illustrate some of the capabilities by analyzing data from Ximian Evolution project.}, keywords = {bug reports, bug tracking, changelog, cvs, defects, evolution, log files, logs, mailing list, scm, softchange, source code, ximian, ximian evolution}, attachments = {https://flosshub.org/sites/flosshub.org/files/germanMockus2003.pdf}, author = {German, Daniel and Audris Mockus} } @article {Reinke2003707, title = {The availability of source code in relation to timely response to security vulnerabilities}, journal = {Computers \& Security}, volume = {22}, number = {8}, year = {2003}, note = {"This paper considers the effects of proprietary software versus non-proprietary software in determining the speed with which a security fix is made available" "The sources considered are the BugTraq mailing list (available at SecurityFocus.com), the CERT Coordination Center (www.cert.org), and Incidents.org (from the SANS Institute)" Note that these lists are generic topical discussion lists and not the mailing lists OF a particular open source project.}, pages = {707 - 724}, abstract = {Once a vulnerability has been found in an application or service that runs on a computer connected to the Internet, fixing that exploit in a timely fashion is of the utmost importance. There are two parts to fixing vulnerability: a party acting on behalf of the application{\textquoteright}s vendor gives instructions to fix it or makes a patch available that can be downloaded; then someone using that information fixes the computer or application in question. This paper considers the effects of proprietary software versus non-proprietary software in determining the speed with which a security fix is made available, since this can minimize the amount of time that the computer system remains vulnerable.}, keywords = {bugtraq, cert, email, email archives, mailing list, security, vulnerability}, issn = {0167-4048}, doi = {DOI: 10.1016/S0167-4048(03)00011-7}, url = {http://www.sciencedirect.com/science/article/B6V8G-4B9CV31-C/2/a218fccfaef185af5c122f118b252703}, author = {John Reinke and Hossein Saiedian} } @conference {DBLP:conf/csmr/CapiluppiLM03, title = {Characteristics of Open Source Projects}, booktitle = {7th European Conference on Software Maintenance and Reengineering (CSMR{\textquoteright}03)}, year = {2003}, note = {"We analyze a sample of around 400 projects from a popular OS project repository. " no pdf to confirm what projects these are or what the repository is.[ms]}, pages = {317-}, abstract = {Most empirical studies about Open Source (OS)projects or products are vertical and usually deal with the flagship, successful projects. There is a substantial lack of horizontal studies to shed light on the whole population of projects, including failures. This paper presents a horizontal study aimed at characterizing OS projects. We analyze a sample of around 400 projects from a popular OS project repository. Each project is characterized by a number of attributes. We analyze these attributes statically and over time. The main results show that few projects are capable of attracting a meaningful community of developers. The majority of projects is made by few (in many cases one) person with a very slow pace of evolution.}, keywords = {evolution, project success, repository}, author = {Capiluppi, Andrea and Patricia Lago and Maurizio Morisio} } @article {146, title = {Community, joining, and specialization in open source software innovation: a case study}, journal = {Research Policy}, volume = {32}, number = {7}, year = {2003}, note = {first, telephone interviews "Secondly, we collected the project{\textquoteright}s public email conversations stored in the projects{\textquoteright} mailing lists which is archived on Freenet{\textquoteright}s website" "The third source of data included the history of changes to the software code available via the project{\textquoteright}s software repository within the CVS ({\textquoteleft}Concurrent Versioning System{\textquoteright}) source code management tool" "Fourthly, in order obtain contextual understanding of the project we collected publicly available documents related to open source in general and to the project in particular. Among the most important sources were the Freenet project web pages (e.g. the Frequently Asked Questions (FAQ)7), Ian Clarke{\textquoteright}s master thesis (1999), newspaper interviews with the core developers, and a technical paper (Clarke, Sandberg, Wiley, \& Hong, 2000) describing the Freenet project written by some of the developers."}, pages = {1217-1241}, abstract = {This paper develops an inductive theory of the open source software innovation process by focussing on the creation of Freenet, a project aimed at developing a decentralized and anonymous peer-to-peer electronic file sharing network. We are particularly interested in the strategies and processes by which new people join the existing community of software developers, and how they initially contribute code. Analyzing data from multiple sources on the Freenet software development process, we generate the constructs of "joining script", We are grateful to helpful comments from two anonymous reviewers. We also thank Chris Argyris, John Seely Brown, Eric von Hippel, Stefan Haefliger, Petra Kugler, Heike Bruch, Simon Gchter, Simon Peck, and Hari Tsoukas for helpful comments and suggestions. Ben Ho and Craig Lebowitz provided technical assistance with data importation and parsing. We would like to thank Ian Clarke and the Freenet developers for their willingness to participate in our study and providing key insights into the open source development process. Karim R. Lakhani would like to acknowledge the generous support of The Boston Consulting Group and Canada{\textquoteright}s Social Science and Humanities Research Council doctoral fellowship. Georg von Krogh and Sebastian Spaeth acknowledge the generous support from the Research Foundation at the University of St. Gallen.}, keywords = {cvs, email, email archives, freenet, INNOVATION, mailing lists, roles, source code}, doi = {http://dx.doi.org/10.1016/S0048-7333(03)00050-7}, attachments = {https://flosshub.org/sites/flosshub.org/files/krogh03.pdf}, author = {Georg von Krogh and Spaeth, S. and Karim R Lakhani} } @article {2003, title = {From a Firm-Based to a Community-Based Model of Knowledge Creation: The Case of the Linux Kernel Development}, journal = {Organization Science}, volume = {14}, number = {6}, year = {2003}, note = {"we study the Linux development community mainly by analyzing the artifacts that the Linux developers have produced. A key output of knowledge creation activities is the artifacts. The most important artifact, of course, is the Linux operating system source code." "Along with the source code, a "Credits" text file and a "MAINTAINERS" text file are distributed to the users." "An equally important artifact is the development activities archived in the Linux-kernel mailing list"..."Using the weekly Linux-kernel email archive for years 1995 to 2000 as a key source of data, we focus on people who have sent at least one email to the Linux-kernel mailing list. " "In addition, we examine the developers{\textquoteright} demographic distributions, working patterns, and motivations by analyzing the raw data from an on-line survey"}, pages = {pp. 633-649}, publisher = {INFORMS}, abstract = {We propose a new model of knowledge creation in purposeful, loosely coordinated, distributed systems, as an alternative to a firm-based one. Specifically, using the case of the Linux kernel development project, we build a model of community-based, evolutionary knowledge creation to study how thousands of talented volunteers, dispersed across organizational and geographical boundaries, collaborate via the Internet to produce a knowledge-intensive, innovative product of high quality. By comparing and contrasting the Linux model with the traditional/commercial model of software development and firm-based knowledge creation efforts, we show how the proposed model of knowledge creation expands beyond the boundary of the firm. Our model suggests that the product development process can be effectively organized as an evolutionary process of learning driven by criticism and error correction. We conclude by offering some theoretical implications of our community-based model of knowledge creation for the literature of organizational learning, community life, and the uses of knowledge in society.}, keywords = {credits, developers, email, email archives, knowledge creation, linux kernel, mailing list, maintainers, scm, source code, Survey, Volunteers}, issn = {10477039}, url = {http://www.jstor.org/stable/4135125}, author = {Lee, Gwendolyn K. and Cole, Robert E.} } @article {flosswp77, title = {Guarding the Commons: How Community Managed Software Projects Protect Their Work}, journal = {Research Policy}, volume = {32}, year = {2003}, note = {"Data was collected from three primary sources: 1) observation at project and user group meetings, technical presentations and conferences; 2) informant interviews; and 3) project data archived on the Internet that detailed project interactions and structural developments." "Project data was collected from online archives and included documents such as: mission statements, charters, bylaws, meeting minutes, and mailing list archives."}, month = {February}, pages = {1179-1198}, edition = {7}, abstract = {Theorists often speculate why open source and free software project contributors give their work away. Although contributors make their work publicly available, they do not forfeit their rights to it. Community managed software projects protect their work by using several legal and normative tactics, which should not be conflated with a disregard for or neglect of intellectual property rights. These tactics allow a project?s intellectual property to be publicly and freely available and yet, governable. Exploration of this seemingly contradictory state may provide new insight into governance models for the management of digital intellectual property.}, keywords = {Common Pool Resources, email, email archives, intellectual property, mailing list, open source, Public Goods, Software, Survey}, attachments = {https://flosshub.org/sites/flosshub.org/files/rp-omahony.pdf}, author = {Siobhan O{\textquoteright}Mahony} } @conference {Ye:2003:TUM:776816.776867, title = {Toward an understanding of the motivation Open Source Software developers}, booktitle = {Proceedings of the 25th International Conference on Software Engineering}, series = {ICSE {\textquoteright}03}, year = {2003}, note = {"Analyzing the emails sent to the mailing fist is one way of understanding the structure of the community." "Table 2 displays the number of code contributions made by members to the GIMP system and the defined roles of those contributing members. We counted the number of contributions made by each person by analyzing the change log of the system."}, pages = {419{\textendash}429}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {An Open Source Software (OSS) project is unlikely to be successful unless there is an accompanied community that provides the platform for developers and users to collaborate. Members of such communities are volunteers whose motivation to participate and contribute is of essential importance to the success of OSS projects. In this paper, we aim to create an understanding of what motivates people to participate in OSS communities. We theorize that learning is one of the motivational forces. Our theory is grounded in the learning theory of Legitimate Peripheral Participation, and is supported by analyzing the social structure of OSS communities and the co-evolution between OSS systems and communities. We also discuss practical implications of our theory for creating and maintaining sustainable OSS communities as well as for software engineering research and education.}, keywords = {change log, COMMUNITY, contributions, contributors, developers, email, email archives, evolution, gimp, log files, mailing list, roles, source code}, isbn = {0-7695-1877-X}, url = {http://portal.acm.org/citation.cfm?id=776816.776867}, attachments = {https://flosshub.org/sites/flosshub.org/files/YeKishida.pdf}, author = {Ye, Yunwen and Kishida, Kouichi} } @conference {1160, title = {Adopting OSS Methods by Adopting OSS Tools}, booktitle = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, abstract = {The open source movement has created and used a set of software engineering tools with features that fit the characteristics of open source development processes. To a large extent, the open source culture and methodology are conveyed to new developers via the toolset itself, and through the demonstrated usage of these tools on existing projects. The rapid and wide adoption of open source tools stands in stark contrast to the difficulties encountered in adopting traditional CASE tools. This paper explores the characteristics that make these tools adoptable and how adopting them may influence software development processes.}, keywords = {ant, argouml, bugzilla, cactus, cvs, developers, eclipse, emacs, email, faq, junit, mailing lists, make, netbeans, package management, rpm, scarab, subversion, teams, tools, torque, WORK}, attachments = {https://flosshub.org/sites/flosshub.org/files/Robbins.pdf}, author = {Robbins, Jason E.} } @conference {1161, title = {The Coming Software Revolution}, booktitle = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, abstract = {The growing adoption of collaborative software development will change the global software industry by altering its economics of scale, location, and price. The growth of non-proprietary code will depress prices in the proprietary sector, and the possession of source code will enable the growth of more software industrial centers around the world; their emergence will threaten the business of current leaders. National governments which see themselves as disadvantaged or even threatened by the current software powers will encourage collaborative software development and the services that this development model drives. Only world-wide draconic legislation by the current software powers has any hope of preserving the status quo.}, keywords = {ECONOMICS, management, open source, politics}, attachments = {https://flosshub.org/sites/flosshub.org/files/Rosenberg.pdf}, author = {Rosenberg, D.K.} } @proceedings {104, title = {Economic incentives for participating in open source software projects}, year = {2002}, note = {"The data for this research come from two primary sources: Apache project archives and a targeted survey of Apache participants. Archival data are open source project artifacts such as e-mail and source code archives, source code version control meta-data, and developer Web sites" independent variables: patch, committer, work experience, programming experience, education, job switch, firm size, firm public, industry}, pages = {365{\textendash}372}, abstract = {Using the Internet as a basis for communication, collaboration, and storage of artifacts, the open source community is producing software of a quality that was previously thought to be achievable only by professional engineers following strict software development paradigms. This accomplishment is even more astounding as developers contribute to the source code without any remuneration. Open source leaders as well as academics have proposed theories about the motivation of open source developers that are rooted in diverse fields such as social psychology and anthropology. However, Lerner and Tirole (2000) argue that developer participation in open source projects may, in part, be explained by existing economic theory regarding career concerns. This research seeks to confirm or disconfirm the existence of economic returns to participation in open source development. Our findings suggest that greater open source participation per se, as measured in contributions made, is not associated with wage increases. However, a higher status in a merit-based ranking within the Apache Project is associated with significantly higher wages. This suggests that employers do not reward the gain in experience through open source participation as an increase in human capital. The results are also consistent with the notion that a high rank within the Apache Software Foundation is a credible signal of the productive capacity of a programmer.}, keywords = {apache, contributions, email, email archives, mailing list, organizational sponsorship, participation, patch, scm, source code, Survey, version control}, attachments = {https://flosshub.org/sites/flosshub.org/files/42.pdf}, author = {Il-Horn Hann and Jeff Roberts and Sandra Slaughter and Roy Fielding} } @article {1117, title = {Effort, co-operation and co-ordination in an open source software project: GNOME}, journal = {Information Systems Journal}, volume = {12}, year = {2002}, note = {"Also retrieved by a Perl script were the postings to the relevant discussion lists including the sender, the subject, time and complete text. For analysis of the posting behaviour of the programmers, the short name that each programmer uses for checkins had to be matched to the full name or email address used for postings. For 175 persons, this has been possible using several regular expressions with human check-up."}, month = {01/2002}, pages = {27 - 42}, abstract = {This paper presents results from research into open source projects from a software engineering perspective. The research methodology employed relies on public data retrieved from the CVS repository of the GNOME project and relevant discussion groups. This methodology is described, and results concerning the special characteristics of open source software development are given. These data are used for a first approach to estimating the total effort to be expended.}, keywords = {cvs, discussion, effort estimation, gnome}, issn = {1365-2575}, doi = {10.1046/j.1365-2575.2002.00110.x}, author = {Koch, Stefan and Schneider, Georg} } @article {121, title = {Two case studies of open source software development: Apache and Mozilla}, journal = {ACM Transactions on Software Engineering and Methodology}, volume = {11}, number = {3}, year = {2002}, note = {apache data sources: email, cvs, bug database regarding email: "We wrote Perl scripts to extract date, sender identity, message subject, and the message body that was further processed to obtain details on code changes and problem reports (see below). Manual inspection was used to resolve such things as multiple email addresses in cases where all automated techniques failed." (but the rest of the paper does not address this data source at all) mozilla data sources bugzilla, cvs }, pages = {309-346}, abstract = {According to its proponents, open source style software development has the capacity to compete successfully, and perhaps in many cases displace, traditional commercial development methods. In order to begin investigating such claims, we examine data from two major open source projects, the Apache web server and the Mozilla browser. By using email archives of source code change history and problem reports we quantify aspects of developer participation, core team size, code ownership, productivity, defect density, and problem resolution intervals for these OSS projects. We develop several hypotheses by comparing the Apache project with several commercial projects. We then test and refine several of these hypotheses, based on an analysis of Mozilla data. We conclude with thoughts about the prospects for high- performance commercial/ open source process hybrids.}, keywords = {apache, bug fixing, bug reports, bugzilla, change history, core, defect density, email, email archives, mailing list, mozilla, ownership, participation, productivity, scm, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/mockusFieldingHerbsleb2002.pdf}, author = {Audris Mockus and Roy Fielding and Herbsleb, J. D.} } @conference {1157, title = {Why Do Developers Contribute to Open Source Projects? First Evidence of Economic Incentives}, booktitle = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, note = {"The data for this research come from two primary sources: Apache project archives and a targeted survey of Apache participants. Archival data are open source project artifacts such as email and source code archives, source code version control meta-data and developer web sites."}, abstract = {The availability of commercial quality, free software products such as the Apache HTTP (web) server or the Linux operating system has focused significant attention on the open source development process by which these products were created. One of the more perplexing aspects of open source software projects is why developers freely devote their time and energy to these projects. While many open source participants cite idealistic motives for participation, Lerner and Tirole (2000) argue that developer participation in open source projects may, in part, be explained by existing economic theory regarding career concerns. This research seeks to confirm or disconfirm the existence of economic returns to participation in open source development. Preliminary results of our empirical investigation suggest that greater open source participation per se, as measured in contributions made, does not lead to wage increases. However, a higher status in a merit-based ranking within the Apache Project does lead to significantly higher wages. This suggests that employers do not reward the gain in experience through open source participation as an increase in human capital. The results are also consistent with the notion that a high rank within the Apache Software Foundation is a credible signal of the productive capacity of a programmer.}, keywords = {apache, contributions, cvs, developers, ECONOMICS, email, email archives, financial, Human capital, mailing list, MOTIVATION, participation, source code, version control}, attachments = {https://flosshub.org/sites/flosshub.org/files/HannRobertsSlaughterFielding.pdf}, author = {Il-Horn Hann and Jeff Roberts and Sandra Slaughter and Roy Fielding} } @article {54, title = {Code, Culture and Cash: The Fading Altruism of Open Source Development}, journal = {First Monday}, volume = {6}, number = {12}, year = {2001}, note = {"I collected information on the country of residence for key contributors to the two projects. In the case of Linux, I relied on information located in the CREDITS file of all major kernel releases (from version 1.0 to version 2.4.9) [22]. For Gnome, I gathered developer-contact information from the project{\textquoteright}s web-site. Where information on the home-country of developers was not explicitly available, I performed private research to ascertain said information, or - in the last-case scenario - trusted information in the provided e-mail address of developers to infer home-country from domain ownership [23]. In the case of Linux, to avoid bias that might be introduced over time as developers migrated internationally, developers are continually counted as residents of the countries they were associated with when their names first entered the CREDITS file"}, abstract = {The nexus of open source development appears to have shifted to Europe over the last ten years. This paper explains why this trend undermines cultural arguments about "hacker ethics" and "post-scarcity" gift economies. It suggests that classical economic theory offers a more succinct explanation for the peculiar international distribution of open source development: hacking rises and falls inversely to its opportunity cost. This finding throws doubt on the Schumpeterian assumption that the efficiency of industrial systems can be measured without reference to the social institutions that bind them.}, keywords = {credits, email address, european, geography, gnome, linux}, url = {http://131.193.153.231/www/issues/issue6_12/lancashire/index.html}, attachments = {https://flosshub.org/sites/flosshub.org/files/Lancashire.pdf}, author = {David Lancashire} } @conference {Godfrey:2001:GES:602461.602482, title = {Growth, evolution, and structural change in open source software}, booktitle = {Proceedings of the 4th International Workshop on Principles of Software Evolution (IWPSE 2001)}, series = {IWPSE {\textquoteright}01}, year = {2001}, note = {"We measured [linux] system size in uncommented LOC" "We also examined the growth of several other open source systems, including the VIM text editor, Eric Raymond{\textquoteright}s fetchmail utility, and the GCC compiler suite. "}, pages = {103{\textendash}106}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Our recent work has addressed how and why software systems evolve over time, with a particular emphasis on software architecture and open source software systems [2, 3, 6]. In this position paper, we present a short summary of two recent projects. First, we have performed a case study on the evolution of the Linux kernel [3], as well as some other open source software (OSS) systems. We have found that several OSS systems appear not to obey some of "Lehman{\textquoteright}s laws" of software evolution [5, 7], and that Linux in particular is continuing to grow at a geometric rate. Currently, we are working on a detailed study of the evolution of one of the subsystems of the Linux kernel: the SCSI drivers subsystem. We have found that cloning, which is usually considered to be an indicator of lazy development and poor process, is quite common and is even considered to be a useful practice. Second, we are developing a tool called Beagle to aid software maintainers in understanding how large systems have changed over time. Beagle integrates data from various static analysis and metrics tools and provides a query engine as well as navigable visualizations. Of particular note, Beagle aims to provide help in modelling long term evolution of systems that have undergone architectural and structural change.}, keywords = {agile methods, beagle, cloning, evolution, fetchmail, gcc, growth, kernel, lehman{\textquoteright}s laws, lines of code, linux, linux kernel, loc, open source software, software architecture, software evolution, source code, structural change, supporting environments, vim}, isbn = {1-58113-508-4}, doi = {http://doi.acm.org/10.1145/602461.602482}, url = {http://doi.acm.org/10.1145/602461.602482}, attachments = {https://flosshub.org/sites/flosshub.org/files/tu2001.pdf}, author = {Michael Godfrey and Tu, Qiang} } @conference {1148, title = {Introducing a {\textquotedblleft}Street Fair{\textquotedblright} Open source Practice Within Project Based Software Engineering Courses}, booktitle = {1st Workshop on Open Source Software Engineering at ICSE 2001}, year = {2001}, abstract = {Columbia University and USC, as well as many other universities, have software engineering courses where students work in small teams to develop a product of moderate complexity. Typically such efforts are {\textquotedblleft}closed source{\textquotedblright} where teams do not (or are forbidden) from sharing information and providing assistance to other teams. There are certainly many justifiable reasons for this such as grading uniformity. Yet strong drivers such as grade competition, generally thought to promote quality results (i.e. successful projects), often do not. The authors ran an experiment introducing a style of open source practice called {\textquotedblleft}street fair{\textquotedblright} with surprisingly positive results.}, keywords = {education, educational resources}, attachments = {https://flosshub.org/sites/flosshub.org/files/portkaiser.pdf}, author = {Port, D. and Kaiser, G.} } @article {flosswp26, title = {A Case Study of Open Source Software Development: The Apache Server}, journal = {Proceedings of the International Conference on Software Engineering (ICSE 2000)}, year = {2000}, note = {We used the following archival sources of data: Developer email list (EMAIL). Concurrent Version Control archive (CVS). Problem reporting database (BUGDB).}, month = {June}, abstract = {According to its proponents, open source style software development has the capacity to compete successfully, and perhaps in many cases displace, traditional commercial development methods. We examine the development process of a major open source application, the Apache web server. By using email archives of source code change history and problem reports we quantify aspects of developer participation, core team size, code ownership, productivity, defect density, and problem resolution interval for this OSS project. This analysis reveals a unique process, which performs well on important measures.}, keywords = {apache, bug fix revisions, bugs, core, cvs, defect density, developers, email archives, participation, productivity, revision control, revision history, roles, scm, source code, team size}, attachments = {https://flosshub.org/sites/flosshub.org/files/mockusapache.pdf}, author = {Audris Mockus and Roy Fielding and Herbsleb, James} } @conference {Yamauchi:2000:CLM:358916.359004, title = {Collaboration with Lean Media: how open-source software succeeds}, booktitle = {Proceedings of the 2000 ACM conference on Computer supported cooperative work (CSCW)}, series = {CSCW {\textquoteright}00}, year = {2000}, pages = {329{\textendash}338}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Open-source software, usually created by volunteer programmers dispersed worldwide, now competes with that developed by software firms. This achievement is particularly impressive as open-source programmers rarely meet. They rely heavily on electronic media, which preclude the benefits of face-to-face contact that programmers enjoy within firms. In this paper, we describe findings that address this paradox based on observation, interviews and quantitative analyses of two open-source projects. The findings suggest that spontaneous work coordinated afterward is effective, rational organizational culture helps achieve agreement among members and communications media moderately support spontaneous work. These findings can imply a new model of dispersed collaboration.}, keywords = {cooperative work, cvs, distributed work, electronic media, INNOVATION, open-source, software engineering}, isbn = {1-58113-222-0}, doi = {10.1145/358916.359004}, url = {http://doi.acm.org/10.1145/358916.359004}, author = {Yamauchi, Yutaka and Yokozawa, Makoto and Shinohara, Takeshi and Ishida, Toru} } @conference {Godfrey:2000:EOS:850948.853411, title = {Evolution in Open Source Software: A Case Study}, booktitle = {Proceedings of the International Conference on Software Maintenance (ICSM{\textquoteright}00)}, series = {ICSM {\textquoteright}00}, year = {2000}, note = {"We examined 96 kernel versions..." .c files, .h files only loc, lines of code number of functions number of modules}, pages = {131{\textendash}}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {Most studies of software evolution have been performed on systems developed within a single company using traditional management techniques. With the widespread availability of several large software systems that have been developed using an {\textquoteright}open source{\textquoteright} development approach, we now have a chance to examine these systems in detail, and see if their evolutionary narratives are significantly different from commercially developed systems. This paper summarizes our preliminary investigations into the evolution of the best known open source system: the Linux operating system kernel. Because Linux is large (over two million lines of code in the most recent version) and because its development model is not as tightly planned and managed as most industrial software processes, we had expected to find that Linux was growing more slowly as it got bigger and more complex. Instead, we have found that Linux has been growing at a super-linear rate for several years. In this paper, we explore the evolution of the Linux kernel both at the system level and within the major subsystems, and we discuss why we think Linux continues to exhibit such strong growth.}, keywords = {evolution, functions, growth, lines of code, linux, linux kernel, loc, source code}, isbn = {0-7695-0753-0}, url = {http://portal.acm.org/citation.cfm?id=850948.853411}, attachments = {https://flosshub.org/sites/flosshub.org/files/godfrey00.pdf}, author = {Godfrey, Michael W. and Tu, Qiang} }