@proceedings {1896, title = {Addressing Lock-in, Interoperability, and Long-Term Maintenance Challenges Through Open Source: How Can Companies Strategically Use Open Source?}, volume = {496}, year = {2017}, month = {05/2017}, pages = {80-88}, publisher = {Springer}, abstract = {This industry paper reports on how strategic use of open source in company contexts can provide effective support for addressing the fundamental challenges of lock-in, interoperability, and longevity of software and associated digital assets. The fundamental challenges and an overview of an ongoing collaborative research project are presented. Through a conceptual model for open source usage in company contexts we characterise how companies engage with open source and elaborate on how the fundamental challenges can be effectively addressed through open source usage in company contexts. }, doi = {10.1007/978-3-319-57735-7_9}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_9}, author = {Lundell, Bj{\"o}rn and Gamalielsson, Jonas and Stefan Tengblad and Bahram Hooshyar Yousefi and Thomas Fischer and Gert Johansson and Bengt Rodung and Mattsson, Anders and Johan Oppmark and Gustavsson, Tomas and Feist, Jonas and Stefan Landemoo and Erik L{\"o}nroth} } @proceedings {1901, title = {Adoption of Academic Tools in Open Source Communities: The Debian Case Study}, volume = {496}, year = {2017}, month = {05/2017}, pages = {139-150}, publisher = {Springer}, abstract = {Component repositories play a key role in the open software ecosystem. Managing the evolution of these repositories is a challenging task, and maintainers are confronted with a number of complex issues that need automatic tools to be adressed properly. In this paper, we present an overview of 10 years of research in this field and the process leading to the adoption of our tools in a FOSS community. We focus on the Debian distribution and in particular we look at the issues arising during the distribution lifecycle: ensuring buildability of source packages, detecting packages that cannot be installed and bootstrapping the distribution on a new architecture. We present three tools, distcheck, buildcheck and botch, that we believe of general interest for other open source component repositories. The lesson we have learned during this journey may provide useful guidance for researchers willing to see their tools broadly adopted by the community.}, keywords = {components, debian, distribution}, doi = {10.1007/978-3-319-57735-7_14}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_14}, author = {Abate, Pietro and Di Cosmo, Roberto} } @proceedings {1916, title = {Advancing Open Science with Version Control and Blockchains}, year = {2017}, month = {05/2017}, pages = {13-14}, abstract = {The scientific community is facing a crisis of reproducibility: confidence in scientific results is damaged by concerns regarding the integrity of experimental data and the analyses applied to that data. Experimental integrity can be compromised inadvertently when researchers overlook some important component of their experimental procedure, or intentionally by researchers or malicious third-parties who are biased towards ensuring a specific outcome of an experiment. The scientific community has pushed for {\textquotedblleft}open science{\textquotedblright} to add transparency to the experimental process, asking researchers to publicly register their data sets and experimental procedures. We argue that the software engineering community can leverage its expertise in tracking traceability and provenance of source code and its related artifacts to simplify data management for scientists. Moreover, by leveraging smart contract and blockchain technologies, we believe that it is possible for such a system to guarantee end-to-end integrity of scientific data and results while supporting collaborative research.}, keywords = {blockchain, replication, reproducible}, author = {Jonathan Bell and Thomas D. LaToza and Foteini Baldmitsi and Angelos Stavrou} } @proceedings {1902, title = {Assessing Code Authorship: The Case of the Linux Kernel}, volume = {496}, year = {2017}, month = {05/2017}, pages = {151-163}, publisher = {Springer}, abstract = {Code authorship is a key information in large-scale open-source systems. Among others, it allows maintainers to assess division of work and identify key collaborators. Interestingly, open-source communities lack guidelines on how to manage authorship. This could be mitigated by setting to build an empirical body of knowledge on how authorship-related measures evolve in successful open-source communities. Towards that direction, we perform a case study on the Linux kernel. Our results show that: (a) only a small portion of developers (26\%) makes significant contributions to the code base; (b) the distribution of the number of files per author is highly skewed{\textemdash}a small group of top-authors (3\%) is responsible for hundreds of files, while most authors (75\%) are responsible for at most 11 files; (c) most authors (62\%) have a specialist profile; (d) authors with a high number of co-authorship connections tend to collaborate with others with less connections.}, keywords = {code authorship, developer network, linux kernel}, doi = {10.1007/978-3-319-57735-7_15}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_15}, author = {Guilherme Avelino and Passos, Leonardo and Andre Hora and Marco Tulio Valente} } @proceedings {1819, title = {Analyzing the Decision Criteria of Software Developers Based on Prospect Theory}, year = {2016}, note = {they sent a survey to developers and used flossmole to harvest the email addresses}, month = {03/2016}, abstract = {To enhance the quality of software, many software development support tools and software development methodologies have been proposed. However, not all proposed tools and methodologies are widely used in software development. We assume that the evaluation of tools and methodologies by developers is different from the evaluation by researchers, and that this is one of the reasons why the tools and methodologies are not widely used. We analyzed the decision criteria of software developers as applied to the tools and methodologies, to clarify whether the difference exists or not. In behavioral economics, there are theories which assume people have biases, and they do not always act reasonably. In the experiment, we made a questionnaire based on behavioral economics, and collected answers from open source software developers. The results suggest that developers do not always act to maximize expected profit because of the certainty effect and ambiguity aversion. Therefore, we should reconsider the evaluation criteria of tools such as the f-measure or AUC, which mainly focus on the expected profit. }, keywords = {flossmole}, url = {http://se-naist.jp/pman3/pman3.cgi?DOWNLOAD=579}, attachments = {https://flosshub.org/sites/flosshub.org/files/saner2016-kina.pdf}, author = {Kanako Kina and Tsunoda, Masateru and Tamada, Haruaki and Hiroshi Igaki} } @proceedings {1759, title = {An Architectural Evolution Dataset}, year = {2015}, month = {05/2015}, publisher = {IEEE}, abstract = {A good evolution process and a good architecture can greatly support the maintainability of long-lived, large software systems. We present AREVOL, a dataset for the empirical study of architectural evolution. The dataset comprises two popular systems from the same domain and using the same component model, to make comparative studies possible. Besides the original component metadata, AREVOL includes scripts to obtain simplified models that nevertheless support rich studies of architectural evolution, as the authors{\textquoteright} previous work has shown.}, url = {http://oro.open.ac.uk/42318/1/wermelinger15msr.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/wermelinger15msr.pdf}, author = {Wermelinger, Michel and Yu, Yijun} } @conference {VGZ15, title = {Automatically Prioritizing Pull Requests}, booktitle = {Proceedings of the 12th International Conference on Mining Software Repositories}, year = {2015}, month = {05/2015}, publisher = {IEEE}, organization = {IEEE}, abstract = {In previous work, we observed that in the pull-based development model integrators face challenges with regard to prioritizing work in the face of multiple concurrent pull requests. We present the design and initial implementation of a prototype pull request prioritisation tool called PRioritizer. PRioritizer works like a priority inbox for pull requests, recommending the top pull requests the project owner should focus on. A preliminary user study showed that PRioritizer provides functionality that GitHub is currently lacking, even though users need more insight into how the priority ranking is established to make PRioritizer really useful.}, url = {http://www.gousios.gr/pub/prioritizer.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/prioritizer.pdf}, author = {van der Veen, Erik and Gousios, Georgios and Zaidman, Andy} } @inbook {1735, title = {On the Availability and Effectiveness of Open Source Software for Digital Signing of PDF Documents}, booktitle = {Open Source Systems: Adoption and Impact}, series = {IFIP Advances in Information and Communication Technology}, volume = {451}, year = {2015}, pages = {71-80}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {Digital signatures are important in order to ensure the integrity and authenticity of information communicated over the Internet involving different stakeholders within and beyond the borders of different nations. The topic has gained increased interest in the European context and there is legislation and project initiatives aiming to facilitate use and standardisation of digital signatures. Open standards and open source implementations of open standards are important means for the interoperability and long-term maintenance of software systems implementing digital signatures. In this paper we report from a study aiming to establish the availability and effectiveness of software provided under an open source license for digital signing and validation of PDF documents. Specifically, we characterise the use of digital signatures in Swedish Governmental agencies, report on the interoperability of open source and proprietary licensed software for digital signatures in PDF documents, and establish the effectiveness of software provided under an open source license for validation of digital signatures in PDF documents. }, isbn = {978-3-319-17836-3}, doi = {10.1007/978-3-319-17837-0_7}, url = {http://dx.doi.org/10.1007/978-3-319-17837-0_7}, author = {Gamalielsson, Jonas and Jakobsson, Fredrik and Lundell, Bj{\"o}rn and Feist, Jonas and Gustavsson, Tomas and Landqvist, Fredric}, editor = {Damiani, Ernesto and Frati, Fulvio and Dirk Riehle and Wasserman, Anthony I.} } @inbook {1596, title = {Adapting SCRUM to the Italian Army: Methods and (Open) Tools}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {61-69}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { Many software-related technologies, including software development methodologies, quality models, etc. have been developed due to the huge software needs of the Department of Defense (DoD) of the United States. Therefore, it is not surprising that the DoD is promoting open source software and agile approaches into the development processes of the defense contractors. The quality of many open source product has been demonstrated to be comparable to the close source ones and in many cases even higher and the effectiveness of agile approaches has been demonstrated in many industrial settings. Moreover, the availability of the source code makes open source products attractive for obvious reasons (e.g., security, long term maintenance, etc.). Following this trend, also the Italian Army has started using open source software and promotes its usage into the development processes of its contractors, also promoting agile approaches in many contexts focusing on the SCRUM methodology. This paper provides an overview of the SCRUM development process adopted by the Italian Army for the development of software systems using open source technologies. }, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_7}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_7}, author = {Cotugno, FrancoRaffaele and Messina, Angelo}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @inbook {1614, title = {The Agile Management of Development Projects of Software Combining Scrum, Kanban and Expert Consultation}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {176-180}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { At the University of Informatics Sciences (UCI), Havana, Cuba, it is found The Center of Free Solutions of Software (CESOL) who has an informatic project named {\textquotedblleft}Auditing of Source Code{\textquotedblright} (ACF). This project has as objective to develop an open source software solution to auditing the source code of several software solutions with an agile projects management. In the present investigation have been showed the experiences obtained in the mixed application of two methods of agile projects management; Kanban and Scrum, together with the method Judgment of Expert, during the stage of construction of the lifecycle of ACF, when it is was performed a quality auditing by specialists of the CALISOFT company. In the auditing were detected several errors and to resolve them was necessary to estimate efforts, time and to revalue the lifecycle of the project. Moreover, the investigation show how this method can be used as a guide for young project managers for a correct planification and how can be used as a personal organizational method. }, keywords = {Agile management of projects, kanban, scrum}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_25}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_25}, author = {Febles Parker, MichelEvaristo and Monte, YusleydiFern{\'a}ndez}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @conference {Matragkas:2014:ABO:2597073.2597119, title = {Analysing the {\textquoteright}Biodiversity{\textquoteright} of Open Source Ecosystems: The GitHub Case}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {356{\textendash}359}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In nature the diversity of species and genes in ecological communities affects the functioning of these communities. Biologists have found out that more diverse communities appear to be more productive than less diverse communities. Moreover such communities appear to be more stable in the face of perturbations. In this paper, we draw the analogy between ecological communities and Open Source Software (OSS) ecosystems, and we investigate the diversity and structure of OSS communities. To address this question we use the MSR 2014 challenge dataset, which includes data from the top-10 software projects for the top programming languages on GitHub. Our findings show that OSS communities on GitHub consist of 3 types of users (core developers, active users, passive users). Moreover, we show that the percentage of core developers and active users does not change as the project grows and that the majority of members of large projects are passive users. }, keywords = {Data and knowledge visualization, data mining, mining challenge, msr challenge}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597119}, url = {http://doi.acm.org/10.1145/2597073.2597119}, author = {Matragkas, Nicholas and Williams, James R. and Kolovos, Dimitris S. and Paige, Richard F.} } @inbook {1595, title = {Analyzing the Relationship between the License of Packages and Their Files in Free and Open Source Software}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {51-60}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = {Free and Open Source Software (FOSS) is widely reused today. To reuse FOSS one must accept the conditions imposed by the software license under which the component is made available. This is complicated by the fact that often FOSS packages contain files from many licenses. In this paper we analyze the source code of packages in the Fedora Core Linux distribution with the goal of discovering the relationship between the license of a source package, and the license of the files it contains. For this purpose we create license inclusion graphs. Our results show that more modern reciprocal licenses such as the General Public License v3 tend to include files of less licenses than its previous versions, and that packages under an Apache License tend to contain only files under the same license.}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_6}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_6}, author = {Manabe, Yuki and German, DanielM. and Inoue, Katsuro}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @inbook {1597, title = {Applying the Submission Multiple Tier (SMT) Matrix to Detect Impact on Developer Interest on Open Source Project Survivability}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {70-75}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { There is a significant relationship between project activity and developer interest on Open Source (OS) projects. Total project activity submission count number can be an indicator for gauging developer interest. The higher the project activity submission of a project is, the larger developer interest in a project. My paper proposed that applying a Submission Multiple Tier (SMT) matrix can detect the impact of developer interest on project activity. Results showed more volume of OS projects with low project activity than high. Activity submission results also showed that developers are more likely to review than correct projects, with the first priority to find and fix bugs. Further research is needed to determine the impact of project activity type on developer motivation to contribute, participate and support OS projects. }, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_8}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_8}, author = {Chua, BeeBee}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @inbook {1625, title = {Automation of Agricultural Irrigation System with Open Source}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {232-233}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { In this present job, we seek to develop a prototype of an automated agricultural irrigation system, monitored and controlled remotely. For that, we will use inexpensive tools, flexibility and support such as Arduino, XBee and Android. Arduino and XBee, will be responsible of the automating the system. Android will achieve the remote monitoring and control from anywhere in the world where there is cellular service and Internet. In this way we can give the farmer the comfort and security that he don{\textquoteright}t get with a manually controlled system. }, keywords = {ADK, android, Arduino, XBee}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_36}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_36}, author = {Quezada, BladimirJaimeP{\'e}rez and Fern{\'a}ndez, Javier}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @conference {hannemann2013adaptive, title = {An Adaptive Filter-Framework for the Quality Improvement of Open-Source Software Analysis.}, booktitle = {Software Engineering}, year = {2013}, pages = {143{\textendash}156}, publisher = {Citeseer}, organization = {Citeseer}, abstract = {Knowledge mining in Open-Source Software (OSS) brings a great benefit for software engineering (SE). The researchers discover, investigate, and even simulate the organization of development processes within open-source communities in order to understand the community-oriented organization and to transform its advantages into conventional SE projects. Despite a great number of different studies on OSS data, not much attention has been paid to the data filtering step so far. The noise within uncleaned data can lead to inaccurate conclusions for SE. A special challenge for data cleaning presents the variety of communicational and development infrastructures used by OSS projects. This paper presents an adaptive filter-framework supporting data cleaning and other preprocessing steps. The framework allows to combine filters in arbitrary order, defining which preprocessing steps should be performed. The filter-portfolio can by extended easily. A schema matching in case of cross-project analysis is available. Three filters - spam detection, quotation elimination and core periphery distinction - were implemented within the filter-framework. In the analysis of three large-scale OSS projects (BioJava, Biopython, BioPerl), the filtering led to a significant data modification and reduction. The results of text mining (sentiment analysis) and social network analysis on uncleaned and cleaned data differ significantly, confirming the importance of the data preprocessing step within OSS empirical studies.}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.437.5602\&rep=rep1\&type=pdf$\#$page=143}, author = {Hannemann, Anna and Hackstein, Michael and Klamma, Ralf and Jarke, Matthias} } @article {1642, title = {All complaints are not created equal: text analysis of open source software defect reports}, journal = {Empirical Software Engineering}, volume = {18}, year = {2013}, month = {2/2013}, pages = {117 - 138}, abstract = {As the use of Open Source Software (OSS) systems increases in the corporate environment, it is important to examine the maintenance process of these projects. OSS projects allow end users to directly submit reports in case of any operational issues. Timely resolution of these defect reports requires effective management of maintenance resources. This study analyzes the usefulness of the textual content of the defect reports as an early indicator of their resolution time. Text Mining techniques are used to categorize defect reports of five OSS projects. Significant variation in the defect resolution time amongst the resulting categories, for each of the sample projects, indicates that a text based classification of defect reports can be useful in early assessment of resolution time before source code level analysis. Such technique can assist in allocation of sufficient maintenance resources to targeted defects and also enable project teams to manage customer expectations regarding defect resolution times. }, issn = {1573-7616}, doi = {10.1007/s10664-012-9197-9}, author = {Raja, Uzma} } @proceedings {1552, title = {Analyzing Social Behavior of Software Developers Across Different Communication Channels}, year = {2013}, abstract = {Software developers use different project repositories (i.e., mailing list, bug tracking repositories, discussion forums etc.) to interact with each other or to solve software related problems. The growing interest in the usage of social media channels (i.e., Twitter, Facebook, LinkedIn) have also attracted the open source software community and software developers to adopt an identity in order to disseminate project-related information to a wider audience. Much research has been carried out to analyze the social behavior of software developers in different project repositories but so far no one has tried to study the social communication patterns of developers in other social media channels. We in this paper presents a new dimension to the social aspects of software developers and study if the social communication patterns of software developers is different on project repositories and social media channels (i.e., Twitter).}, keywords = {communication, developer, social media}, url = {http://index.ksi.edu/conf/seke/2013/cr/296.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/iqbal_a_et_al_june_2013.pdf}, author = {Iqbal, Aftab and M Karnstedt and M Hausenblas} } @proceedings {1503, title = {Apache Commits: Social Network Dataset}, year = {2013}, month = {05/2013}, abstract = {Building non-trivial software is a social endeavor. Therefore, understanding the social network of developers is key to the study of software development organizations. We present a graph representation of the commit behavior of developers within the Apache Software Foundation for 2010 and 2011. Relationships between developers in the network represent collaborative commit behavior. Several similarity and summary metrics have been pre-calculated. The data, along with the tools that were used to create it and some further discussion, can be found at: http://sequoia.cs.byu.edu/lab/?page=artifacts/apacheGraphs}, author = {MacLean, Alexander C. and Knutson, Charles D.} } @proceedings {1488, title = {Apache-Affiliated Twitter Screen Names: A Dataset}, year = {2013}, month = {05/2013}, abstract = {This paper describes a new dataset containing Twitter screen names for members of the projects affiliated with the Apache Software Foundation (ASF). The dataset includes the confirmed Twitter screen names, as well as the real name as listed on Twitter, and the user identification as used within the Apache organization. The paper also describes the process used to collect and clean this data, and shows some sample queries for learning how to use the data. The dataset has been donated to the FLOSSmole project and is available for download (https://code. google.com/p/flossmole/downloads/detail?name=apacheTwitter2013-Jan.zip) or direct querying via a database client. }, keywords = {apache, dataset, twitter}, attachments = {https://flosshub.org/sites/flosshub.org/files/apacheTwitterPREPRINT.pdf , https://flosshub.org/sites/flosshub.org/files/MSR\%20presentation.pdf}, author = {Squire, Megan} } @conference {Paschalidou:2013:ADE:2490257.2490264, title = {An application of data envelopment analysis to software quality assessment}, booktitle = {Proceedings of the 6th Balkan Conference in Informatics}, series = {BCI {\textquoteright}13}, year = {2013}, pages = {228{\textendash}235}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Data Envelopment Analysis (DEA) is a non-parametric technique which involves the use of linear programming methods to measure the efficiency of a homogenous set of units. These units are known as Decision Making Units (DMUs) and defined by multiple input and output data. Efficiencies are measured relative to a piece-wise surface (efficient frontier) which envelops the data, thus justifying the name of the technique. Although DEA has been mostly used in production economics, its application in the context of software quality evaluation seems to be a promising approach. This study provides an application of DEA to assess the evolution of two open-source software projects in terms of selected metric values for successive versions of each project. What is really interesting in DEA is that a single efficiency score is calculated for each version despite the often convoluted overall picture of the metric values. According to a simplified view of DEA, there are two categories of units, the efficient (onto the efficient frontier) and the inefficient ones. Each inefficient unit is characterized by a reference set of peers which involves all the efficient units "operating" closer to that unit. Through the consideration of the reference set of the inefficient versions of each project, the metrics that require improvement, as well as the extent of improvement, could be estimated. These results could assist software developers in identifying design issues that require further improvement. Notwithstanding the fact that there are a number of issues to be further investigated, the applicability of DEA and other operations research tools in the context of software quality might yield interesting results. }, keywords = {dea, design metrics, software evolution, software quality}, isbn = {978-1-4503-1851-8}, doi = {10.1145/2490257.2490264}, url = {http://doi.acm.org/10.1145/2490257.2490264}, author = {Paschalidou, Georgia and Stiakakis, Emmanouil and Chatzigeorgiou, Alexander} } @conference {1559, title = {Applying a Rule-Based Natural Language Classifier to Open Source Requirements: a Demonstration of Theory Exploration}, booktitle = {2013 46th Hawaii International Conference on System Sciences (HICSS)2013 46th Hawaii International Conference on System Sciences}, year = {2013}, pages = {3158 - 3167}, publisher = {IEEE}, organization = {IEEE}, address = {Wailea, HI, USA}, abstract = {Open source projects requirements are mostly informal, text descriptions found in requests, forums, and other correspondence. Understanding of such requirements can provide insight into the nature of open source projects. Previously, we have demonstrated the Requirements Classifier for Natural Language (RCNL), which aids in NL requirements analysis. Herein, we demonstrate how the RCNL can aid in theory building. From its application to 16 open source projects, we conjecture a simple wave theory of requirements innovation: innovations expressed in requirements appear as a wave that is reflected in a subsequent wave of features that is reflected in a subsequent wave of product downloads. Although the theory is a conjecture, the process of its exploration demonstrates how RCNL can be used to explore theories about open source projects -- theory exploration that would otherwise be intractable because of the difficulty in analyzing NL artifacts for requirements properties.}, isbn = {978-0-7695-4892-0}, doi = {10.1109/HICSS.2013.97}, author = {Vlas, Radu and Robinson, William N.} } @book {1542, title = {Authoritative Linked Data Descriptions of Debian Source Packages Using ADMS.SW}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {168 - 181}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = {he Debian Package Tracking System is a Web dashboard for Debian contributors and advanced users. This central tool publishes the status of subsequent releases of source packages in the Debian distribution. It has been improved to generate RDF meta-data documenting the source packages, their releases and links to other packaging artifacts, using the ADMS.SW 1.0 model. This constitutes an authoritative source of machine-readable Debian {\textquotedblleft}facts{\textquotedblright} and proposes a reference URI naming scheme for Linked Data resources about Debian packages. This should enable the interlinking of these Debian package descriptions with other ADMS.SW or DOAP descriptions of FLOSS projects available on the Semantic Web also using Linked Data principles. This will be particularly interesting for traceability with upstream projects whose releases are packaged in Debian, derivative distributions reusing Debian source packages, or with other FLOSS distributions.}, keywords = {debian}, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_12}, author = {Olivier Berger and Christian Bac}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @article {1419, title = {Adoption of free/libre open source software in public organizations: factors of impac}, journal = {Information Technology \& People}, volume = {25}, year = {2012}, pages = {156-187}, chapter = {156}, abstract = {Purpose {\textendash} In this paper the authors aim to investigate the importance of factors for the adoption of free/libre open source software (FLOSS) in the public sector. They seek to evaluate how different factors impact during the initiation and implementation phases of the adoption process. Design/methodology/approach {\textendash} The authors base the methodological approach on two exploratory case studies with a contrasting result logic. They build a multi-level framework grounded both on literature review, and feedback from stakeholders. They then apply the framework to two case studies to better frame the findings. They consider phases of adoption (initiation, implementation) and the levels of adoption (technological, organizational, environmental, individual). Findings {\textendash} In the case studies, the authors found the importance of a strong and decision-centric management board to give the impulse for the initiation phase of the process. As perceived by the stakeholders, a strong governmental support is of paramount importance to increase the adoption at the public level, although in the case studies examined the initiation stage started from the impulse of a championing management. Both case studies passed the initiation phase successfully. Continuous employees{\textquoteright} training, organizational objectives consensus, and business process reengineering have been found important for the implementation phase. In the case study in which these factors were not in place, the implementation phase of adoption failed. Environmental factors {\textendash} although relevant for the initiation of the adoption process {\textendash} are less significant during the actual implementation of the adoption process, as the contrasting result logic from the case studies shows. Research limitations/implications {\textendash} The study refers to two public organizations in a specific environmental setting. No causality among factors has been inferred. Quantitative objective data have been used to determine the success of adoption, for qualitative data multiple sources have been used when possible to limit threats to validity. Practical implications {\textendash} The framework can be used by stakeholders in public organizations to better frame their adoption strategies and to compare results across institutions. Lessons learnt from the case studies can be useful to drive future adoptions of FLOSS. Originality/value {\textendash} The framework combines phases of adoption and levels making it possible to frame the analysis of the case studies. It has been operationalized with a set of metrics, and with a protocol for the case studies to increase replicability value.}, doi = {http://dx.doi.org/10.1108/09593841211232677}, url = {http://www.emeraldinsight.com/journals.htm?articleid=17036351}, author = {Rossi, Bruno and Russo, Barbara and Succi, Giancarlo} } @article {1400, title = {Analyzing and mining a code search engine usage log}, journal = {Empirical Software Engineering}, volume = {17}, year = {2012}, month = {8/2012}, pages = {424 - 466}, abstract = {This paper presents an analysis of a year long usage log of Koders, the first commercially available Internet-Scale code search engine (http://www.koders.com). The usage log comprises about ten million activities from more than three million users. Analysis of the usage data shows that despite of attracting a large number of visitors, Koders has a very sparse usage and that it lacks regular usage from many of its users. When compared to Web search, search behavior in Koders showed many similar patterns. A topic modeling analysis of the usage data shows what topics users of Koders are looking for. Observations on the prevalence of these topics among the users, and observations on how search and download activities vary across topics, lead to the conclusion that users who find code search engines usable are those who already know to a high level of specificity what to look for. This paper also presents a general categorization of these topics that provides insights on the different ways code search engine users express their queries. It identifies various forms of queries in Koders{\textquoteright}s log and the kinds of results addressed by the queries. It also provides several suggestions for improvements in code search engines based on the analysis of usage, topics, and query forms. The work presented in this paper is the first of its kind that reveals several insights on the usage of an Internet-Scale code search engine.}, keywords = {code search, koders, search, search engine, topics}, issn = {1573-7616}, doi = {10.1007/s10664-010-9144-6}, author = {Bajracharya, Sushil Krishna and Lopes, Cristina Videira} } @article {1395, title = {Automated topic naming: supporting cross-project analysis of software maintenance activities}, journal = {Empirical Software Engineering}, year = {2012}, abstract = {Software repositories provide a deluge of software artifacts to analyze. Researchers have attempted to summarize, categorize, and relate these artifacts by using semi-unsupervised machine-learning algorithms, such as Latent Dirichlet Allocation (LDA). LDA is used for concept and topic analysis to suggest candidate word-lists or topics that describe and relate software artifacts. However, these word-lists and topics are difficult to interpret in the absence of meaningful summary labels. Current attempts to interpret topics assume manual labelling and do not use domain-specific knowledge to improve, contextualize, or describe results for the developers. We propose a solution: automated labelled topic extraction. Topics are extracted using LDA from commit-log comments recovered from source control systems. These topics are given labels from a generalizable cross-project taxonomy, consisting of non-functional requirements. Our approach was evaluated with experiments and case studies on three large-scale Relational Database Management System (RDBMS) projects: MySQL, PostgreSQL and MaxDB. The case studies show that labelled topic extraction can produce appropriate, context-sensitive labels that are relevant to these projects, and provide fresh insight into their evolving software development activities. }, keywords = {LDA, maxdb, mysql, postgresql, topics}, issn = {1573-7616}, doi = {10.1007/s10664-012-9209-9}, author = {Hindle, Abram and Ernst, Neil A. and Godfrey, Michael W. and Mylopoulos, John} } @article {1381, title = {Adopting Free/Libre/Open Source Software Practices, Techniques and Methods for Industrial Use}, journal = {Journal of the Association for Information Systems}, volume = {12}, number = {1}, year = {2011}, abstract = {Today{\textquoteright}s software companies face the challenges of highly distributed development projects and constantly changing requirements. This paper proposes the adoption of relevant Free/Libre/Open Source Software (FLOSS) practices in order to improve software development projects in industry. Many FLOSS projects have proven to be very successful, producing high quality products with steady and frequent releases. This study aims to identify FLOSS practices that can be adapted for the corporate environment. To achieve this goal, a framework to compare FLOSS and industrial development methodologies was created. Three successful FLOSS projects were selected as study targets (the Linux Kernel, the FreeBSD operating system, and the JBoss application server), as well as two projects from Ericsson, a large telecommunications company. Based on an analysis of these projects, FLOSS best practices were tailored to fit industrial development environments. The final results consisted of a set of key adoption opportunities that aimed to improve software quality and overall development productivity by importing best practices from the FLOSS environment. The adoption opportunities were then validated at three large corporations.}, keywords = {freebsd, jboss, linux, linux kernel}, url = {http://aisel.aisnet.org/jais/vol12/iss1/1}, author = {Torkar, Richard and Minoves, Pau and Garrig{\'o}s, Janina} } @proceedings {1285, title = {Adoption of OSS Development Practices by the Software Industry: A Survey}, year = {2011}, month = {10/2011}, pages = {233-243}, publisher = {Springer}, abstract = {The paper presents a survey of aspects related to the adoption of Open Source Software by the software industry. The aim of this study was to collect data related to practices and elements in the development process of companies that influence the trust in the quality of the product by potential adopters. The work is part of the research done inside the QualiPSo project and was carried out using a qualitative study based on a structured questionnaire focused on perceptions of experts and development practices used by companies involved in the Open Source Software industry. The results of the survey confirm intuitive concerns related to the adoption of Open Source Software as: the selection of the license, the quality issues addressed, and the development process tasks inside Open Source Software projects. The study uncovered specific aspects related to trust and trustworthiness of the Open Source Software development process that we did not find in previous studies as: the standards implemented by the OSS project, the project{\textquoteright}s roadmap is respected, and the communication channels that are available.}, keywords = {qualipso, Survey}, author = {Petrinja, Etiel and Sillitti, Alberto and Succi, Giancarlo} } @proceedings {1288, title = {An Analysis of Author Contribution Patterns in Eclipse Foundation Project Source Code}, year = {2011}, note = {"We conducted an observational study on existing Eclipse projects by extracting author attribution data for Java source code files from git repositories." "Specifically, we consider entropy of source code by counting the number of lines attributed to each author."}, month = {10/2011}, pages = {269-281}, publisher = {Springer}, abstract = {Collaborative development is a key tenet of open source software, but if not properly understood and managed, it can become a liability. We examine author contribution data for the newest revision of 251,633 Java source files in 592 Eclipse projects. We use this observational data to analyze collaboration patterns within files, and to explore relationships between file size, author count, and code authorship. We calculate author entropy to characterize the contributions of multiple authors to a given file, with an eye toward understanding the degree of collaboration and the most common interaction patterns.}, keywords = {COLLABORATION, contribution, eclipse, entropy, java}, author = {Taylor, Quinn C. and Krein, Jonathan L. and MacLean, Alexander C. and Knutson, Charles D.} } @conference {1314, title = {Apples vs. oranges?}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, note = {"In this MSR challenge report, we compare the source code of two in- dustrial grade Integrated Development Environments (IDE): Netbeans, developed by Oracle Corporation, and Eclipse, developed by the Eclipse Foundation. In the same spirit as [1], our goal is not to identify how similar or different they are, but to identify differences that, if not taken into consideration, might result in biased, and potentially erroneous conclusions."}, month = {05/2011}, pages = {246-249}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {We attempt to compare the source code of two Java IDE systems: Netbeans and Eclipse. The result of this experiment shows that many factors, if ignored, could risk a bias in the results, and we posit various observations that should be taken into consideration to minimize such risk. }, keywords = {eclipse, netbeans, source code}, isbn = {9781450305747}, doi = {10.1145/1985441.1985483}, author = {Davies, Julius and Daniel M. German} } @proceedings {1291, title = {Applying Open Source Practices and Principles in Open Innovation: The Case of the Demola Platform}, year = {2011}, month = {10/2011}, pages = {307-311}, publisher = {Springer}, abstract = {In numerous fields, businesses have to rely on rapid development and release cycles. Variant new ideas and concepts can emerge through open innovation as the participants are not limited to the company scope. This makes open innovation an increasingly appealing option for the industry. One such open innovation platform, Demola, allows university students to work on real life industrial cases of their own interest. We have identified similarities with its way of operation to open source software development and find that it offers a viable motivational, organizational and collaborative solution to open innovation.}, author = {Kilamo, Terhi and Hammouda, Imed and Kairamo, Ville and Rasanen, Petri} } @article {1324, title = {Are Developers Fixing Their Own Bugs?}, journal = {International Journal of Open Source Software and Processes}, volume = {3}, year = {2011}, note = {"The analysis is focused at the level of lines of code and it uses the information stored in the source code management system"}, pages = {23 - 42}, abstract = {The process of fixing software bugs plays a key role in the maintenance activities of a software project. Ideally, code ownership and responsibility should be enforced among developers working on the same artifacts, so that those introducing buggy code could also contribute to its fix. However, especially in FLOSS projects, this mechanism is not clearly understood: in particular, it is not known whether those contributors fixing a bug are the same introducing and seeding it in the first place. This paper analyzes the comm-central FLOSS project, which hosts part of the Thunderbird, SeaMonkey, Lightning extensions and Sunbird projects from the Mozilla community. The analysis is focused at the level of lines of code and it uses the information stored in the source code management system. The results of this study show that in 80\% of the cases, the bug-fixing activity involves source code modified by at most two developers. It also emerges that the developers fixing the bug are only responsible for 3.5\% of the previous modifications to the lines affected; this implies that the other developers making changes to those lines could have made that fix. In most of the cases the bug fixing process in comm-central is not carried out by the same developers than those who seeded the buggy code.}, keywords = {bug fixing, developers, loc, scm}, issn = {1942-3934}, doi = {10.4018/jossp.2011040102}, author = {Izquierdo-Cortazar, Daniel and Capiluppi, Andrea and Jesus M. Gonzalez-Barahona} } @proceedings {1295, title = {Aspects of an Open Source Software Sustainable Life Cycle}, year = {2011}, month = {10/2011}, pages = {325-329}, publisher = {Springer}, abstract = {In this paper we present a literature overview about OSS sustainability, considering not only financial resources, but also community growth, source code and tools management. Based on these aspects, we define an OSS life cycle that may contribute to OSS projects sustainability.}, keywords = {Financial Resources, OSS Communities, OSS Sustainability, software maintenance}, author = {Arantes, Flavia Linhalis and Freire, Fernanda Maria Pereira} } @article {1388, title = {Adoption of open source software in software-intensive organizations {\textendash} A systematic literature review}, journal = {Information and Software Technology}, volume = {52}, year = {2010}, month = {11/2010}, pages = {1133 - 1154}, abstract = {Context Open source software (OSS) is changing the way organizations develop, acquire, use, and commercialize software. Objective This paper seeks to identify how organizations adopt OSS, classify the literature according to these ways of adopting OSS, and with a focus on software development evaluate the research on adoption of OSS in organizations. Method Based on the systematic literature review method we reviewed publications from 24 journals and seven conference and workshop proceedings, published between 1998 and 2008. From a population of 24,289 papers, we identified 112 papers that provide empirical evidence on how organizations actually adopt OSS. Results We show that adopting OSS involves more than simply using OSS products. We moreover provide a classification framework consisting of six distinctly different ways in which organizations adopt OSS. This framework is used to illustrate some of the opportunities and challenges organizations meet when approaching OSS, to show that OSS can be adopted successfully in different ways, and to organize and review existing research. We find that existing research on OSS adoption does not sufficiently describe the context of the organizations studied, and it fails to benefit fully from related research fields. While existing research covers a large number of topics, it contains very few closely related studies. To aid this situation, we offer directions for future research. Conclusion The implications of our findings are twofold. On the one hand, practitioners should embrace the many opportunities OSS offers, but consciously evaluate the consequences of adopting it in their own context. They may use our framework and the success stories provided by the literature in their own evaluations. On the other hand, researchers should align their work, and perform more empirical research on topics that are important to organizations. Our framework may be used to position this research and to describe the context of the organization they are studying.}, keywords = {open source software, organizations, software development, Systematic literature review}, issn = {09505849}, doi = {10.1016/j.infsof.2010.05.008}, url = {http://www.sciencedirect.com/science/article/pii/S0950584910000972}, author = {Hauge, {\O}yvind and Ayala, Claudia and Conradi, Reidar} } @article {1331, title = {An Analysis of the Adoption of Open Source Software by Local Public Administrations}, journal = {International Journal of Open Source Software and Processes}, volume = {2}, year = {2010}, pages = {1 - 29}, abstract = {The wide diffusion of open source software (OSS) is driving discussion among scholars on a set of issues, including its adoption by public administrations (PA). Previous works discussed a few factors that drive the decision to adopt OSS and did not address the potential benefits in terms of e-government that OSS may bring to PA. This paper fills these gaps. The analysis is based on the Emilia-Romagna region of Italy and studies the adoption of software (both proprietary and open source) by local PA. The results show there is increased adoption of OSS in several different domains of application, both servers and desktop clients. Among the motivations to adopt OSS, dependence on software suppliers is important. Its adoption also positively affects the variety and extent of interactivity of local public e-services.}, keywords = {government}, issn = {1942-3934}, doi = {10.4018/jossp.2010070101}, author = {Rentocchini, Francesco and Tartari, Dimitri} } @article {1382, title = {Analysis of virtual communities supporting OSS projects using social network analysis}, journal = {Information and Software Technology}, volume = {52}, year = {2010}, month = {3/2010}, pages = {296 - 303}, abstract = {This paper analyses the behaviour of virtual communities for Open Source Software (OSS) projects. The development of OSS projects relies on virtual communities, which are built on relationships among members, being their final objective sharing knowledge and improving the underlying project. This study addresses the interactive collaboration in these kinds of communities applying social network analysis (SNA). In particular, SNA techniques will be used to identify those members playing a middle-man role among other community members. Results will illustrate the importance of this role to achieve successful virtual communities.}, keywords = {arm, email, Knowledge brokers, linux, mailing list, open source software, social network analysis, virtual communities}, issn = {09505849}, doi = {10.1016/j.infsof.2009.10.007}, url = {http://www.sciencedirect.com/science/article/pii/S0950584909001888}, author = {Toral, S.L. and Mart{\'\i}nez-Torres, M.R. and Barrero, F.} } @conference {1256, title = {Analyzing Leadership Dynamics in Distributed Group Communication}, booktitle = {2010 43rd Hawaii International Conference on System Sciences (HICSS 2010)}, year = {2010}, note = {"Our analysis examines the communication patterns in two FLOSS development projects, Fire and Gaim" "These data were imported into a database to allow automated analysis. The Fire data set includes about 1,800 events in the user email list, 7,800 messages in the developer venues, and 1,300 events in the combined trackers, spanning a period of 54 months. The significantly larger Gaim data set included over 41,000 events in the user forum, over 30,000 events in the developer venues, and about 20,000 events in the trackers, generated over 78 months." "The dynamic network analysis was performed using a scientific workflow tool, Taverna Workbench"}, pages = {1 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Honolulu, Hawaii, USA}, abstract = {We apply social network analysis (SNA) to examine the dynamics of leadership in distributed groups, specifically Free/Libre Open Source Software development projects, and its relation to group performance. Based on prior work on leadership in distributed groups, we identify leaders with those who make the highest level of contribution to the group and assess the degree of leadership by measuring centralization of communications. We compare the dynamics of leadership in two FLOSS projects, one more and one less effective. We find that in both projects, centralization was higher in developer-oriented communications venues than in user-oriented venues, suggesting higher degrees of leadership in developer venues. However, we do not find a consistent relation between centralization and effectiveness. We suggest that SNA can instead be useful for identifying interesting periods in the history of the project, e.g., periods where the leadership of the project is in transition.}, keywords = {core, DYNAMICS, email, email archives, fire, flossmole, gaim, leadership, mailing list, project success, social network analysis, srda}, isbn = {978-1-4244-5509-6}, doi = {10.1109/HICSS.2010.62}, attachments = {https://flosshub.org/sites/flosshub.org/files/07-06-02.pdf}, author = {Kevin Crowston and Andrea Wiggins and Howison, James} } @conference {957, title = {Assessment of issue handling efficiency}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {94 - 97}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {We mined the issue database of GNOME to assess how issues are handled. How many issues are submitted and resolved? Does the backlog grow or decrease? How fast are issues resolved? Does issue resolution speed increase or decrease over time? In which subproject are issues handled most efficiently? To answer such questions, we apply several visualization and quantification instruments to the raw issue data. In particular, we aggregate issues into four risk categories, based on their resolution time. These categories are the basis both for visualizing and ranking, which are used in concert for issue database exploration.}, keywords = {bug reports, bug tracking, classification, gnome, msr challenge, visualization}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463292}, attachments = {https://flosshub.org/sites/flosshub.org/files/94bluijtenMSR2010.pdf}, author = {Luijten, Bart and Visser, Joost and Zaidman, Andy} } @article {1816, title = {Atratividade de projetos de software livre: import{\^a}ncia te{\'o}rica e estrat{\'e}gias para administra{\c c}{\~a}o}, journal = {Revista de Administra{\c c}{\~a}o de Empresas}, volume = {50}, year = {2010}, month = {2010}, pages = {424 - 438}, abstract = {Milhares de Projetos de Software Livre (PSL) foram e continuam sendo criados na Internet. Esse cen{\'a}rio aumenta as oportunidades de colabora{\c c}{\~a}o tanto quanto acirra a concorr{\^e}ncia por usu{\'a}rios e contribuidores, que elevariam esses projetos a n{\'\i}veis superiores aos que seriam alcan{\c c}ados por seus fundadores sozinhos. E dado que o aprimoramento por meio de colabora{\c c}{\~a}o {\'e} o principal objetivo dos fundadores de PSL, a import{\^a}ncia de entender e administrar a capacidade de atrair usu{\'a}rios e contribuidores fica estabelecida. Para auxiliar pesquisadores e fundadores nesse desafio, o conceito de atratividade {\'e} introduzido neste artigo, que desenvolve um ferramental te{\'o}rico-gerencial sobre as causas, indicadores e consequ{\^e}ncias da atratividade, viabilizando sua administra{\c c}{\~a}o estrat{\'e}gica.}, keywords = {flossmole}, doi = {10.1590/S0034-75902010000400007}, author = {Santos Jr, Carlos Denner dos} } @conference {963, title = {Automated dependency resolution for open source software}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {130 - 140}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Opportunities for software reuse are plentiful, thanks in large part to the widespread adoption of open source processes and the availability of search engines for locating relevant artifacts. One challenge presented by open source software reuse is simply getting a newly downloaded artifact to build/run in the first place. The artifact itself likely reuses other artifacts, and so depends on their being located to function properly. While merely tedious in the individual case, this can cause serious difficulties for those seeking to study open source software. It is simply not feasible to manually resolve dependencies for thousands of projects, and many forms of analysis require declarative completeness. In this paper we present a method for automatically resolving dependencies for open source software. It works by cross-referencing a project{\textquoteright}s missing type information with a repository of candidate artifacts. We have implemented this method on top of the Sourcerer, an infrastructure for the large-scale indexing and analysis of open source code. The performance of our resolution algorithm was evaluated in two parts. First, for a small number of popular open source projects, we manually examined the artifacts suggested by our system to determine if they were appropriate. Second, we applied the algorithm to the 13,241 projects in the Sourcerer managed repository to evaluate the rate of resolution success. The results demonstrate the feasibility of this approach, as the algorithm located all of the required artifacts needed by 3,904 additional projects, increasing the percentage of declaratively complete projects in Sourcerer from 39\% to 69\%.}, keywords = {dependencies, java, source code, sourcerer}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463346}, author = {Ossher, Joel and Bajracharya, Sushil and Lopes, Cristina} } @article {1348, title = {An Agent Simulation Study on Conflict, Community Climate and Innovation in Open Source Communities}, journal = {International Journal of Open Source Software and Processes}, volume = {1}, year = {2009}, pages = {1 - 25}, abstract = {More than ever the complexity of innovation requires group efforts, as teams of scientists and engineers from diverse backgrounds work together to solve problems. One of the significant problems in understanding emergence of innovation involves how virtual innovation organizations and communities govern and coordinate to maximize innovation output. An agent simulation study is conducted to examine the impact of culture and conflict management styles on collective creativity in open source innovation systems. Findings suggest that decentralized coordination schemes such as emergent selection such as found in utility communities and moderate degrees of assertiveness and cooperation for conflict management result in higher incidence of innovation.}, issn = {1942-3934}, doi = {10.4018/jossp.2009100101}, author = {Yilmaz, Levent} } @conference {839, title = {Amassing and indexing a large sample of version control systems: towards the census of public source code history}, booktitle = {6th IEEE Working Conference on Mining Software Repositories}, year = {2009}, note = {Related work: "For example, FLOSSMole [8] collects the list of projects and their metadata from SourceForge, FreshMeat, Free Software Foundation (FSF), RubyForge, and ObjectWeb. " "The list of home pages from open source directories such as FSF or FLOSSMole could provide a focused set of web pages for the search" "To get the list of projects for SourceForge we used FLOSSMole project [8] that collects the list of SourceForge projects and their metadata. We use project ID, because it was used to specify the VCS URL within the SourceForge repository."}, month = {May 16{\textendash}17}, abstract = {The source code and its history represent the output and process of software development activities and are an in- valuable resource for study and improvement of software development practice. While individual projects and groups of projects have been extensively analyzed, some fundamental questions, such as the spread of innovation or genealogy of the source code, can be answered only by considering the entire universe of publicly available source code and its history. We describe methods we developed over the last six years to gather, index, and update an approximation of such a universal repository for publicly accessible version control systems and for the source code inside a large corporation. While challenging, the task is achievable with limited resources. The bottlenecks in network bandwidth, processing, and disk access can be dealt with using inherent parallelism of the tasks and suitable tradeoffs between the amount of storage and computations, but a completely automated discovery of public version control systems may require enticing participation of the sampled projects. Such universal repository would allow studies of global properties and origins of the source code that are not possible through other means.}, keywords = {bazaar, cvs, flossmole, git, mercurial, source code, sourceforge, subversion, version control}, attachments = {https://flosshub.org/sites/flosshub.org/files/11amassing.pdf}, author = {Audris Mockus} } @conference {1809, title = {On the Analysis of Contributions from Privileged Users in Virtual Open Communities}, booktitle = {2009 42nd Hawaii International Conference on System Sciences}, year = {2009}, pages = {1 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Waikoloa, Hawaii, USA}, abstract = {Collaborative projects built around virtual communities on the Internet have gained momentum over the last decade. Nevertheless, their rapid growth rate rises some questions:which is the most effective approach to manage and organize their content creation process? Can these communities scale, controlling their projects as their size continues to grow over time? To answer these questions, we undertake a quantitative analysis of privileged users in FLOSS development projects and in Wikipedia. From our results, we conclude that the inequality level of user contributions in both types of initiatives is remarkably distinct, even though both communities present almost identical patterns regard-ing the number of distinct contributors per file (in FLOSS projects) or per article (in Wikipedia). As a result, totally open projects like Wikipedia can effectively deal with faster growing rates, while FLOSS projects may be affected by bottlenecks on committers who play critical roles.}, isbn = {978-0-7695-3450-3}, doi = {10.1109/HICSS.2009.328}, author = {Ortega, Felipe and Izquierdo-Cort{\'a}zar, Daniel and Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Gregorio Robles} } @conference {577, title = {Analysis of Open Source Software Development Iterations by Means of Burst Detection Techniques}, booktitle = {OSS2009: Open Source Ecosystems: Diverse Communities Interacting (IFIP 2.13)}, series = {IFIP Advances in Information and Communication Technology }, volume = {299/2009}, year = {2009}, month = {2009///}, pages = {83 - 93}, publisher = {Springer}, organization = {Springer}, chapter = {9}, abstract = {A highly efficient bug fixing process and quick release cycles are considered key properties of the open source software development methodology. In this paper, we study the relation between code activities (such as lines of code added per commit), bug fixing activities, and software release dates in a subset of open source projects. To study the phenomenon, we gathered a large data set about the evolution of 5 major open source projects. We compared activities by means of a burst detection technique to discover temporal peaks in time-series. We found quick adaptation of issue tracking activities in proximity of releases, and a distribution of coding activities across releases. Results show the importance of the application type/domain for the evaluation of the development process. }, issn = {978-3-642-02031-5}, doi = {http://dx.doi.org/10.1007/978-3-642-02032-2_9}, attachments = {https://flosshub.org/sites/flosshub.org/files/Analysis\%20of\%20Open\%20Source\%20Software.pdf}, author = {Rossi, Bruno and Russo, Barbara and Succi, Giancarlo} } @proceedings {847, title = {Analyzing Software Licenses in Open Architecture Software Systems}, year = {2009}, month = {05/2009}, address = {Vancouver, Canada}, author = {Alspaugh, Thomas and Hazeline U. Asuncion and Walt Scacchi} } @conference {606, title = {Assessing FLOSS Communities: An Experience Report from the QualOSS Project}, booktitle = {OSS2009: Open Source Ecosystems: Diverse Communities Interacting (IFIP 2.13)}, series = {IFIP Advances in Information and Communication Technology }, volume = {299/2009}, year = {2009}, month = {2009///}, pages = {364 - 364}, publisher = {Springer}, organization = {Springer}, chapter = {38}, abstract = {This paper presents work done in the QualOSS (Quality of Open Source Software) research project,which aims at building a methodology and tools to help in the assessment of the quality of FLOSS (free, libre, open source software) endeavors. In particular, we introduce the research done to evaluate the FLOSS endeavor communities. Following the Goal-Question-Metric paradigm, QUALOSS describes goals, the associated questions and then metrics that allow to answer the questions. }, issn = {978-3-642-02031-5}, doi = {http://dx.doi.org/10.1007/978-3-642-02032-2_38}, attachments = {https://flosshub.org/sites/flosshub.org/files/Assessing\%20FLOSS\%20Communities.pdf}, author = {Izquierdo-Cortazar, Daniel and Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s and Deprez, Jean-Christophe} } @conference {937, title = {Assigning bug reports using a vocabulary-based expertise model of developers}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {131 - 140}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {For popular software systems, the number of daily submitted bug reports is high. Triaging these incoming reports is a time consuming task. Part of the bug triage is the assignment of a report to a developer with the appropriate expertise. In this paper, we present an approach to automatically suggest developers who have the appropriate expertise for handling a bug report. We model developer expertise using the vocabulary found in their source code contributions and compare this vocabulary to the vocabulary of bug reports. We evaluate our approach by comparing the suggested experts to the persons who eventually worked on the bug. Using eight years of Eclipse development as a case study, we achieve 33.6\% top-1 precision and 71.0\% top-10 recall.}, keywords = {bug reports, bugzilla, develect, developers, eclipse, expertise, scm}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069491}, attachments = {https://flosshub.org/sites/flosshub.org/files/131AssigningBugReports.pdf}, author = {Matter, Dominique and Kuhn, Adrian and Nierstrasz, Oscar} } @conference {605, title = {Assurance Evaluation for OSS Adoption in a Telco Context}, booktitle = {OSS2009: Open Source Ecosystems: Diverse Communities Interacting (IFIP 2.13)}, series = {IFIP Advances in Information and Communication Technology }, volume = {299/2009}, year = {2009}, month = {2009///}, pages = {363 - 363}, publisher = {Springer}, organization = {Springer}, chapter = {37}, abstract = {Software Assurance (SwA) is a complex concept that involves different stages of a software development process and may be defined differently depending on its focus, as for instance software quality, security, or dependability. In Computer Science, the term assurance is referred to all activities necessary to provide enough confidence that a software product will satisfy its users{\textquoteright} functional and non-functional requirements. }, issn = {978-3-642-02031-5}, doi = {http://dx.doi.org/10.1007/978-3-642-02032-2_37}, attachments = {https://flosshub.org/sites/flosshub.org/files/Assurance\%20Evaluation\%20for\%20OSS.pdf}, author = {Ardagna, Claudio and Banzi, Massimo and Damiani, Ernesto and El Ioini, Nabil and Frati, Fulvio} } @conference {930, title = {Author entropy vs. file size in the GNOME suite of applications}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {91 - 94}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {We present the results of a study in which author entropy was used to characterize author contributions per file. Our analysis reveals three patterns: banding in the data, uneven distribution of data across bands, and file size dependent distributions within bands. Our results suggest that when two authors contribute to a file, large files are more likely to have a dominant author than smaller files.}, keywords = {author entropy, contributions, gnome, msr challenge}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069484}, author = {Casebolt, Jason R. and Krein, Jonathan L. and MacLean, Alexander C. and Knutson, Charles D. and Delorey, Daniel P.} } @conference {943, title = {Automatic labeling of software components and their evolution using log-likelihood ratio of word frequencies in source code}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {175 - 178}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {As more and more open-source software components become available on the Internet we need automatic ways to label and compare them. For example, a developer who searches for reusable software must be able to quickly gain an understanding of retrieved components. This understanding cannot be gained at the level of source code due to the semantic gap between source code and the domain model. In this paper we present a lexical approach that uses the log-likelihood ratios of word frequencies to automatically provide labels for software components. We present a prototype implementation of our labeling/comparison algorithm and provide examples of its application. In particular, we apply the approach to detect trends in the evolution of a software system.}, keywords = {frequency, hapax, information retrieval, java, junit, keywords, labeling, source code}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069499}, attachments = {https://flosshub.org/sites/flosshub.org/files/175AutomaticLabeling.pdf}, author = {Kuhn, Adrian} } @conference {544, title = {Adoption of Open Source in the Software Industry}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {211 - 221}, publisher = {Springer}, organization = {Springer}, chapter = {17}, abstract = {Is Open Source Software (OSS) undergoing a transformation to a more commercially viable form? We have performed a survey to investigate the adoption of OSS in the Norwegian software industry. The survey was based on an extensive screening of software companies, with more than 700 responses. The survey results support the transformation predicted by Fitzgerald [4]. Close to 50\% of the software industry integrate OSS components into vertical solutions serving all major business sectors. In addition, more than 30\% of the 95 respondents in our survey have more than 40\% of their income from OSS related services or software. The extensive adoption of OSS in the software industry may be a precursor of the OSS adoption in other business sectors. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_17}, attachments = {https://flosshub.org/sites/flosshub.org/files/Adaption\%20of\%20Open\%20Source.pdf}, author = {Hauge, {\O}yvind and S{\o}rensen, Carl-Fredrik and Conradi, Reidar} } @conference {1208, title = {Advances in the Sourceforge Research Data Archive}, booktitle = {3rd Workshop on Public Data about Software Development (WoPDaSD 2008)}, year = {2008}, month = {2009}, pages = {25-29}, abstract = {The SourceForge Research Data Archive (SRDA), located at http://zerlot.cse.nd.edu, is a collection of Open Source Software (OSS) data and resources [6]. Over 100 researchers worldwide use the archive for research in many fields. In this paper, we describe the recent changes, the work in progress, and future plans for making the archive easier to use and for allowing more advanced research to be done with the data available.}, keywords = {forge, forges, repositories, repository, sourceforge, srda}, attachments = {https://flosshub.org/sites/flosshub.org/files/srda2008.pdf}, author = {Matthew Van Antwerp and Madey, Greg} } @article {denBesten2008316, title = {The allocation of collaborative efforts in open-source software}, journal = {Information Economics and Policy}, volume = {20}, number = {4}, year = {2008}, note = {"we have selected a set of 10 large open-source projects" apache, cvs, gaim, gcc, ghostscript, mozilla, netbsd, openssh, postgresql, python "Our data were extracted from logs of development activity generated by software version control systems. For each project in the selection, we extracted CVS development logs" "We notably computed for each file in the sample, and for each month in its history, the number of distinct maintainers that had committed a change during that month, and the number of commits, the blocks of code addition, each file had received during that month." "other variables used in the regressions are proxies for the size, age, and granularity of files; the size of a file is represented as its number of lines of code (LOCs), its age by its creation date (Youth), and its granularity by the number of functions it contains."}, pages = {316 - 322}, abstract = {The article investigates the allocation of collaborative efforts among core developers (maintainers) of open-source software by analyzing on-line development traces (logs) for a set of 10 large projects. Specifically, we investigate whether the division of labor within open-source projects is influenced by characteristics of software code. We suggest that the collaboration among maintainers tends to be influenced by different measures of code complexity. We interpret these findings by providing preliminary evidence that the organization of open-source software development would self-adapt to characteristics of the code base, in a {\textquoteright}stigmergic{\textquoteright} manner.}, keywords = {age, apache, complexity, cvs, division of labor, functions, gaim, gcc, ghostscript, lines of code, loc, log files, mozilla, netbsd, openssh, postgresql, python, revision control, scm, size, source code, Stigmergy, version control}, issn = {0167-6245}, doi = {DOI: 10.1016/j.infoecopol.2008.06.003}, url = {http://www.sciencedirect.com/science/article/B6V8J-4SSG4PN-1/2/88b3824c30a31c18929d8a5ca6d64f62}, author = {den Besten, Matthijs and Jean-Michel Dalle and Galia, Fabrice} } @conference {861, title = {Altruism, Selfishness, and Destructiveness on the Social Web}, booktitle = {5th International Conference on Adaptive Hypermedia and Adaptive Web-Based Systems}, year = {2008}, month = {7/29/2008}, pages = {9-11}, publisher = {Springer-Verlag}, organization = {Springer-Verlag}, address = {Hannover, Germany}, isbn = {978-3-540-70984-8 }, author = {Riedl, J.}, editor = {Nejdl, W., Kay, J., Pu, P., Herder, E.} } @conference {Hill:2008:AAM:1370750.1370771, title = {AMAP: automatically mining abbreviation expansions in programs to enhance software maintenance tools}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {79{\textendash}88}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {When writing software, developers often employ abbreviations in identifier names. In fact, some abbreviations may never occur with the expanded word, or occur more often in the code. However, most existing program comprehension and search tools do little to address the problem of abbreviations, and therefore may miss meaningful pieces of code or relationships between software artifacts. In this paper, we present an automated approach to mining abbreviation expansions from source code to enhance software maintenance tools that utilize natural language information. Our scoped approach uses contextual information at the method, program, and general software level to automatically select the most appropriate expansion for a given abbreviation. We evaluated our approach on a set of 250 potential abbreviations and found that our scoped approach provides a 57\% improvement in accuracy over the current state of the art.}, keywords = {automatic abbreviation expansion, azureus, itext.net, liferay, maintenance, natural language, openoffice.org, program comprehension, source code, tiger envelopes, tools}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370771}, url = {http://doi.acm.org/10.1145/1370750.1370771}, attachments = {https://flosshub.org/sites/flosshub.org/files/p79-hill.pdf}, author = {Hill, Emily and Fry, Zachary P. and Boyd, Haley and Sridhara, Giriprasad and Novikova, Yana and Pollock, Lori and Vijay-Shanker, K.} } @conference {534, title = {Analysis of Coordination Between Developers and Users in the Apache Community}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {81 - 92}, publisher = {Springer}, organization = {Springer}, chapter = {7}, abstract = {Coordination is one of the keys for the success of open source software (OSS) communities because geographically distributed members need to collaborate on their work using communication tools (e.g., mailing lists, bulletin board systems, bug tracking systems, and so on). In this paper, we investigated the informal social structure among developers and users by analyzing two mailing lists of developers and users in the Apache community based on betweenness centrality, one centrality measure proposed by Freeman. From the analysis results, we found that (1) participants with high betweenness coordinated activities between developers and users and (2) some participants have been functioning as coordinators in the community for a long time. }, keywords = {apache, email, mailing list}, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_7}, attachments = {https://flosshub.org/sites/flosshub.org/files/Analysis\%20of\%20Coordination.pdf}, author = {Kamei, Yasutaka and Matsumoto, Shinsuke and Maeshima, Hirotaka and Onishi, Yoji and Ohira, Masao and Matsumoto, Ken-ichi} } @conference {Wermelinger:2008:AEE:1370750.1370783, title = {Analyzing the evolution of eclipse plugins}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, pages = {133{\textendash}136}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Eclipse is a good example of a modern component-based complex system that is designed for long-term evolution, due to its architecture of reusable and extensible components. This paper presents our preliminary results about the evolution of Eclipse{\textquoteright}s architecture, based on a lightweight and scalable analysis of the metadata in Eclipse{\textquoteright}s sources. We find that the development of Eclipse follows a systematic process: most architectural changes take place in milestones, and maintenance releases only make exceptional changes to component dependencies. We also found a stable architectural core that remains since the first release.}, keywords = {architectural evolution, cvs, eclipse, metadata, msr challenge, releases, source code}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370783}, url = {http://doi.acm.org/10.1145/1370750.1370783}, author = {Wermelinger, Michel and Yu, Yijun} } @conference {563, title = {Applying Open Source Development Practices Inside a Company}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {381 - 387}, publisher = {Springer}, organization = {Springer}, chapter = {36}, abstract = {Open Source Software development is seen as a panacea by many companies. The promise of community-style development, innovation and cost savings drive the wider adoption of OSS in companies. However, it is still difficult to institutionalize the open and agile culture of sharing innovation especially into larger departmentalized organizations. The aim of this research paper is to investigate the characteristics of one successful OSS development implementation approach limited inside a company (Inner source). Based on our data, we argue that there are possibilities for employing OSS as a new kind of development process within a company and leveraging thus the innovation potential inside the company. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_36}, attachments = {https://flosshub.org/sites/flosshub.org/files/Applying\%20Open\%20Source\%20Development.pdf}, author = {Juho Lindman and Rossi, Matti and Marttiin, Pentti} } @conference {Wang:2008:ADD:1368088.1368151, title = {An approach to detecting duplicate bug reports using natural language and execution information}, booktitle = {Proceedings of the 30th international conference on Software engineering}, series = {ICSE {\textquoteright}08}, year = {2008}, pages = {461{\textendash}470}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {An open source project typically maintains an open bug repository so that bug reports from all over the world can be gathered. When a new bug report is submitted to the repository, a person, called a triager, examines whether it is a duplicate of an existing bug report. If it is, the triager marks it as DUPLICATE and the bug report is removed from consideration for further work. In the literature, there are approaches exploiting only natural language information to detect duplicate bug reports. In this paper we present a new approach that further involves execution information. In our approach, when a new bug report arrives, its natural language information and execution information are compared with those of the existing bug reports. Then, a small number of existing bug reports are suggested to the triager as the most similar bug reports to the new bug report. Finally, the triager examines the suggested bug reports to determine whether the new bug report duplicates an existing bug report. We calibrated our approach on a subset of the Eclipse bug repository and evaluated our approach on a subset of the Firefox bug repository. The experimental results show that our approach can detect 67\%-93\% of duplicate bug reports in the Firefox bug repository, compared to 43\%-72\% using natural language information alone.}, keywords = {bug report, duplicate bug report, execution information, information retrieval, natural language}, isbn = {978-1-60558-079-1}, doi = {10.1145/1368088.1368151}, url = {http://doi.acm.org/10.1145/1368088.1368151}, author = {Wang, Xiaoyin and Zhang, Lu and Xie, Tao and Anvik, John and Sun, Jiasu} } @conference {548, title = {Archetypal Internet-Scale Source Code Searching}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {257 - 263}, publisher = {Springer}, organization = {Springer}, chapter = {21}, abstract = {Programmers often search for Open Source code to use in their projects. To understand how and why programmers search for source code, we onducted a web-based survey and collected data from 69 respondents, including 58 specific examples of searches. Analyzing these anecdotes, we found that they could be categorized along two orthogonal dimensions: motivation (reuse vs. reference example) and size of search target. The targets of these searches could range in size from a block (a few lines of code) to a subsystem (e.g. library or API), to an entire system. Within these six combinations of motivations and target sizes, nine repeating motifs, or archetypes, were created to characterize Internet-scale source code searching. Tools used for searching and the criteria for selecting a component are also discussed. We conclude with guidance on how these archetypes can inform better evaluation of Internet-scale code search engines, as well as the design of new features for these tools. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_21}, attachments = {https://flosshub.org/sites/flosshub.org/files/Archetypal\%20Internet-Scale\%20Source.pdf}, author = {Umarji, Medha and Sim, Susan and Lopes, Crista} } @conference {1211, title = {Are FLOSS developers committing to CVS/SVN as much as they are talking in mailing lists? Challenges for Integrating data from Multiple Repositories}, booktitle = {3rd Workshop on Public Data about Software Development (WoPDaSD 2008)}, year = {2008}, month = {09/2008}, pages = {49-54}, abstract = {This paper puts forward a framework for investigating Free and Open Source Software (F/OSS) developers activities in both source code and mailing lists repositories. We used data dumps of fourteen pro jects from the FLOSSMetrics (FM) retrieval system. Our intentions are (i) to present a possible methodology, its advantages and disadvantages which can benefit future researchers using some aspects of the FM retrieval system{\textquoteright}s data dumps, and (ii) discuss our initial research results on the contributions developers make to both coding and lists activities.}, keywords = {cvs, cvsanaly, developers, email, email archives, flossmetrics, mailing list, mlstats, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/49-542008.pdf}, author = {Sowe, Sulayman K. and Samoladas, Ioannis and Ioannis Stamelos and Lefteris Angelis} } @conference {556, title = {Assessing Innovation in the Software Sector: Proprietary vs. FOSS Production Mode. Preliminary Evidence from the Italian Case}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {325 - 331}, publisher = {Springer}, organization = {Springer}, chapter = {29}, abstract = {Innovation in the software sector is a widely debated issue. Which are the most important dimensions to assess innovation in this field? Can we measure innovative processes carried out by software companies and what kind of innovation do they develop? Are FOSS solutions more innovative than proprietary ones? These are the research questions we endeavor to answer in this paper providing some empirical evidence, obtained using an original methodology of collecting experts{\textquoteright} evaluations on the innovation level of 134 solutions provided by a group of Italian Small and Medium Enterprises. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_29}, attachments = {https://flosshub.org/sites/flosshub.org/files/Assessing\%20Innovation\%20in\%20the\%20Software\%20Sector.pdf}, author = {Dario Lorenzi and Cristina Rossi} } @conference {flosswp408, title = {Asynchronous Decision-Making in Distributed Teams}, booktitle = {Computer Supported Cooperative Work 2008}, year = {2008}, month = {July}, pages = {1{\textendash}2}, abstract = {Extensive use of CSCW applications can influence group decision-making practices. Unlike previous research focused on the influence of synchronous ICTs, our study examines how group decisions are made in asynchronous communication channels. Our inductive qualitative analysis of 360 decision episodes of six FLOSS projects revealed diversity in decision-making practices, which appears to be related to differences in project effectiveness and task type. We also find that standardization of procedures through CSCW tools transforms the nature of some software development work from non-routine to standard procedure.}, attachments = {https://flosshub.org/sites/flosshub.org/files/CSCW2008FLOSSposter_sub.pdf , https://flosshub.org/sites/flosshub.org/files/CSCW2008Poster11x17sub.pdf}, author = {Li, Qing and Robert Heckman and Eileen Allen and Kevin Crowston and Eseryel, U. Yeliz and James Howison and Andrea Wiggins} } @conference {1210, title = {Author Entropy: A Metric for Characterization of Software Authorship Patterns}, booktitle = {3rd Workshop on Public Data about Software Development (WoPDaSD 2008)}, year = {2008}, note = {used flossmole to get sample of SF developers}, month = {2008}, pages = {42-47}, abstract = {We propose the concept of author entropy and describe how file-level entropy measures may be used to understand and characterize authorship patterns within individual files, as well as across an entire project. As a proof of concept, we compute author entropy for 28,955 files from 33 open-source projects. We explore patterns of author entropy, identify techniques for visualizing author entropy, and propose avenues for further study. }, keywords = {developers, entropy, flossmole, sourceforge}, attachments = {https://flosshub.org/sites/flosshub.org/files/entropy2008.pdf}, author = {Taylor, Quinn C. and Stevenson, James E. and Delorey, Daniel P. and Knutson, Charles D.} } @article {flosswp362, title = {An Analysis of Involvement of HCI Experts in Distributed Software Development: Practical Issues}, year = {2007}, month = {July}, abstract = {Abstract. Traditionally, free and open source software (F/OSS) developers have focused more on the features of a specific application, most of the time ignoring the necessity of user-centric design. This has mainly stemmed from the fact that developers have little interaction with HCI studies, knowledge bases and reports. Moreover, the lack of user interface designers has resulted in a lack of awareness of this area. As a consequence, the user centric design phenomenon within F/OSS applications has been neglected. In this paper, we have mentioned various problems that would slow down a F/OSS project development towards a user-engineered software, and investigated the ways that HCI experts and developers interact with each other and researched bug reporting systems by means of eligibility to issue usability bugs. For the conclusion part, we have explored possible ways to achieve a user-centric design in a project with asynchronous interaction among geographically distributed developers.}, attachments = {https://flosshub.org/sites/flosshub.org/files/cetin-verzulli-sandra}, author = {G{\"o}rkem {\c C}etin and Damiano Verzulli and Sandra Frings} } @article {flosswp370, title = {Analysis of the Informal Learning Environment of FLOSS (Free / Libre Open Source Software) Communities}, year = {2007}, month = {July}, abstract = {The general concept behind Free/Libre/Open Source Software (henceforth FLOSS) is making the source code of software accessible to anyone who wants to obtain it. Binaries or executables are available via the Internet and can be {\textquoteright}freely{\textquoteright} downloaded and used. Prolific licensing agreements such as the General Public License (GPL) define the rights users have over the product. In the literature, many terms are in use to describe the FLOSS phenomenon. Notably, Free Software (FS), a term used by Free Software Foundation (FSF) and Open Source Software (OSS) used by the Open Source Initiative (OSI). In addition, Free Open Source Software (FOSS), Libre Software (LS), and Free/Libre/Open Source Software (FLOSS) are terms frequently used by researchers. In this report the term FLOSS is used to refer to users??? freedom to use, modify, distribute, or even sell the software with little obligations as in propriety or closed source software. When a user modifies the software, he/she can either choose to keep changes made private or altruistically return them to the FLOSS community so that everyone can benefit from his derived work. FLOSS development did not begin with the inception of the Linux operating system, in 1991. Rather, the concept existed since the formation of SHARE ??? a working group set up to coordinate the programming work of the IBM 701, in 1952 (Sowe, 2007b). In the academic environment, software development and distribution among researchers and departments is not new, and goes back to the earliest days of software in university environments when software was developed to solve practical problems and could be freely shared. However, what is now true is that FLOSS has fundamentally changed the way we develop and distribute software. Enable by the Internet, geographically distributed individuals voluntarily contribute to a project by means of the Bazaar model (Raymond, 1999). Extensive peer collaboration allows project participants to write code, debug, test, and integrate software. Communities in various projects provide support services such as suggestions for products features, act as distributing organs, answer queries, help new members having problems with the software. FLOSS development not only exemplifies a viable software development approach, but also a model for the creation of self-learning and self-organizing communities (Sowe, et al. 2006c). FLOSS is also a virtual learning context in which both professional software developers and novice users benefit by leveraging their knowledge and information access repertoire. The context enable them to participate at their own convenience and learn (coding or other software related task) at their own pace (Sowe, et al. 2005; page 297). Furthermore, learners can conclude on the learning scope themselves (what to do) and decide on the method of acquiring the knowledge (how to do it). Knowledge acquisition is accomplished, for example, by having access to a large code-base, studying online documentation, asking more experienced members for assistance. In essence knowledge is acquired in FLOSS through learning by doing, which represents the drill-and-practice approach in normal constructivist environments. However, the FLOSS virtual learning context is not without its downside. Apart from physical isolation and detachment from face-to-face interaction commonly associated with virtual learning environments, learning in FLOSS requires access to Internet and moderate computer facilities. In addition, a high degree of computer literacy, reading, writing, and typing is required to participate effectively. The environment is also not conflict free. Flaming, the feeling of being ignored in a dominant discussion, disinterestedness, long delays in receiving responses from communities, the expenditure of searching through discussion archives to see issues previously raised in the communities, access rights to participate in some community activities are all major concerns in the management of the FLOSS learning context (Sowe, et al. 2005). In recent times, FLOSS is making inroads not only in business and software industries but in colleges and universities as well. There is increased interest in the FLOSS learning environment (Sowe, et al., 2004; Bacon and Dillion, 2006) and in FLOSS projects as bazaars of learning (Sowe, et al., 2006). As Faber (2002) noted, FLOSS is both an alternative teaching methodology and an educational model. The main objective of FLOSSCom is using the principles of informal learning environments of FLOSS communities to improve Information and Communication Technology (ICT) supported formal education. Despite the influence and popularity of FLOSS and the benefit inherent in its methodology, educational institutions have been slow to adapt. This can partly due to the fact that the FLOSS environment is fundamentally different from the formal learning environment in most institutions. The focus of this report is collate and report on the learning activities of individuals in various FLOSS communities. The report benefits from the experiences and expositions of various authors in order to provide a synergy and a fresh look into the learning environment of FLOSS communities. FLOSS communities, like other online communities (e.g. Community of Practice (CoPs)) have many interrelated elements that define the dynamics of the community. As such, this report is divided into 5 major sections, each focusing on a specific aspect of the learning environment of FLOSS communities with a 6th section summarizing the main findings. Section 1: The first part of the report is an introduction to FLOSS. The historical account of FLOSS is presented together with the many meanings of the term. This is followed by the demographic distribution of FLOSS participants and an explanation of what FLOSS communities are. Section 2: The next chapter examines the role of learning and knowledge exchange within the FLOSS community. It includes a description of the composition and roles of members within communities and what motivates various groups of individuals to participate in FLOSS. Section 3: After clarifying the role of learning in the FLOSS community the next chapter focuses on the content of learning processes in FLOSS. This chapter examines both, what skills are learnt and how the skills learnt within the FLOSS community are evaluated. The latter aspect is considered from the perspective of FLOSS community members as well as from the perspective of employers. Section 4: The fourth chapter examines how learning is organized within the FLOSS community. It examines the interaction between community members with regard to learning processes, the learning resources (except for technological resources) that are available and used within FLOSS communities, and the concrete learning processes. Section 5: This section looks at the technological resources used within FLOSS communities. FLOSS projects are almost exclusively administered online and one of the most important prerequisites for their coordination and cooperation is provided by the functionality of various communication and groupware tools. Section 6: The last section of the report proposes the project???s approach to learning in FLOSS. A preliminary set of guidelines as they apply to the principles of informal learning environments of FLOSS communities to improve ICT supported formal education are offered. These preliminary guidelines will be continuously amended in the following phase of the FLOSSCom project.}, attachments = {https://flosshub.org/sites/flosshub.org/files/FLOSSCom_WP2_Phase_1_Report_v070709_1.pdf}, author = {R{\"u}dige Glott and Andreas Meiszner and Sowe, Sulayman K.} } @conference {1007, title = {Analysis of the Linux Kernel Evolution Using Code Clone Coverage}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {22 - 22}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Most studies of the evolution of software systems are based on the comparison of simple software metrics. In this paper, we present our preliminary investigation of the evolution of the Linux kernel using code-clone analysis and the code-clone coverage metrics. We examined 136 versions of the stable Linux kernel using a distributed extension of the code clone detection tool CCFinder. The result is shown as a heat map.}, keywords = {ccfinder, clone, cloning, kernel, linux, metrics, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.1}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300022.pdf}, author = {Livieri, Simone and Higo, Yoshiki and Matsushita, Makoto and Inoue, Katsuro} } @conference {636, title = {Authenticating from multiple authentication sources in a collaborative platform}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {229 - 234}, publisher = {Springer}, organization = {Springer}, chapter = {20}, abstract = {This paper presents a proposal to address the need for multiple authentication sources for users of collaborative work platforms. The proposed approach, developed for the needs of GET and Picolibre, relies on a generic solution that integrate groupware servers in a Shibboleth infrastructure. We have developed adapters for this integration, that we contributed to the phpGroupware project. This document should serve as a basis for discussion in order to validate the level of generality of the proposed approach. We hope that this approach can also help maintainers of other collaboration platforms, who want to integrate a park of deployed platforms with external user identification and authentication services, get a better view of solutions available with Shibboleth. }, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_20}, attachments = {https://flosshub.org/sites/flosshub.org/files/Authenticating\%20from\%20multiple.pdf}, author = {Quang Vu Dang and Olivier Berger and Christian Bac and Hamet, Beno{\^\i}t} } @proceedings {91, title = {An absorptive capacity perspective of open source software development group performance}, year = {2006}, keywords = {NotreDameArchive}, author = {Sherae Daniel and Ritu Agarwal and Katherine Stewart} } @conference {679, title = {Adopting Open Source for Mission-Critical Applications: A Case Study on Single Sign-On}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {209 - 220}, publisher = {Springer}, organization = {Springer}, abstract = {In this paper, we describe a specific selection process for security-related open source code, based on a methodology aimed at evaluating open source security frameworks in general and Single-Sign-On (SSO) systems in particular. Our evaluation criteria for open source security-related software include the community{\textquoteright}s timeliness of reaction against newly discovered vulnerabilities or incidents. }, doi = {http://dx.doi.org/10.1007/0-387-34226-5_21}, attachments = {https://flosshub.org/sites/flosshub.org/files/Adapting\%20OS\%20for\%20Mission-critical\%20Applications.pdf}, author = {Ardagna, Claudio and Damiani, Ernesto and Frati, Fulvio and Reale, Salvatore} } @conference {Tsunoda:2006:AOD:1137983.1138031, title = {Analyzing OSS developers{\textquoteright} working time using mailing lists archives}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {181{\textendash}182}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Our mining question is {\textquotedblleft}when OSS developers work?{\textquotedblright} OSS developers{\textquoteright} working time may be a good indicator to understand the development style of a project. (For example, if many developers work in office hour, these might be daily works in a company.)}, keywords = {developers, email, email archives, mailing lists, mining challenge, msr challenge, overtime work, postgresql, workload}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138031}, url = {http://doi.acm.org/10.1145/1137983.1138031}, attachments = {https://flosshub.org/sites/flosshub.org/files/181AnalyzingOSS.pdf}, author = {Tsunoda, Masateru and Monden, Akito and Kakimoto, Takeshi and Kamei, Yasutaka and Matsumoto, Ken-ichi} } @article {120, title = {Applying Social Network Analysis Techniques to Community-Driven Libre Software Projects}, journal = {International Journal of Information Technology and Web Engineering}, volume = {1}, number = {3}, year = {2006}, abstract = {Source code management repositories of large, long-lived libre (free, open source) software projects can be a source of valuable data about the organizational structure, evolution, and knowledge exchange in the corresponding development communities. Unfortunately, the sheer volume of the available information renders it almost unusable without applying methodologies which highlight the relevant information for a given aspect of the project. Such methodology is proposed in this article, based on well known concepts from the social networks analysis field, which can be used to study the relationships among developers and how they collaborate in different parts of a project. It is also applied to data mined from some well known projects (Apache, GNOME, and KDE), focusing on the characterization of their collaboration network architecture. These cases help to understand the potentials of the methodology and how it is applied, but also shows some relevant results which open new paths in the understanding of the informal organization of libre software development communities.}, keywords = {apache, conway{\textquoteright}s law, cvs, gnome, kde, scm, social network analysis, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/06_Lopez_ijitwe_sna.pdf}, author = {L{\'o}pez-Fern{\'a}ndez, L. and Gregorio Robles and Jesus M. Gonzalez-Barahona and Herraiz, I.} } @conference {D{\textquoteright}Ambros:2006:AER:1137983.1138029, title = {Applying the evolution radar to PostgreSQL}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {177{\textendash}178}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, keywords = {cvs, documentation, evolution, evolution radar, logical coupling, makefile, mining challenge, msr challenge, postgresql, re-engineering, refactoring, release history, rhdb, source code, version control, visualization}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138029}, url = {http://doi.acm.org/10.1145/1137983.1138029}, attachments = {https://flosshub.org/sites/flosshub.org/files/177ApplyingEvolution.pdf}, author = {D{\textquoteright}Ambros, Marco and Lanza, Michele} } @conference {WeiBgerber:2006:RLE:1137983.1138011, title = {Are refactorings less error-prone than other changes?}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {112{\textendash}118}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Refactorings are program transformations which should preserve the program behavior. Consequently, we expect that during phases when there are mostly refactorings in the change history of a system, only few new bugs are introduced. For our case study we analyzed the version histories of several open source systems and reconstructed the refactorings performed. Furthermore, we obtained bug reports from various sources depending on the system. Based on this data we identify phases when the above hypothesis holds and those when it doesn{\textquoteright}t.}, keywords = {argouml, bug reports, bugs, change history, jedit, junit, re-engineering, refactoring, reverse engineering, software evolution, version control}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138011}, url = {http://doi.acm.org/10.1145/1137983.1138011}, attachments = {https://flosshub.org/sites/flosshub.org/files/112AreRefactorings.pdf}, author = {Wei{\ss}gerber, Peter and Diehl, Stephan} } @article {85, title = {Assessing the health of open source communities}, journal = {Computer}, volume = {39}, number = {5}, year = {2006}, note = {Times Cited: 0}, pages = {89-91}, author = {Kevin Crowston and James Howison} } @conference {Ohira:2005:ACK:1083142.1083163, title = {Accelerating cross-project knowledge collaboration using collaborative filtering and social networks}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {111-115}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Vast numbers of free/open source software (F/OSS) development projects use hosting sites such as Java.net and SourceForge.net. These sites provide each project with a variety of software repositories (e.g. repositories for source code sharing, bug tracking, discussions, etc.) as a media for communication and collaboration. They tend to focus on supporting rich collaboration among members in each project. However, a majority of hosted projects are relatively small projects consisting of few developers and often need more resources for solving problems. In order to support cross-project knowledge collaboration in F/OSS development, we have been developing tools to collect data of projects and developers at SourceForge, and to visualize the relationship among them using the techniques of collaborative filtering and social networks. The tools help a developer identify {\textquotedblleft}who should I ask?{\textquotedblright} and {\textquotedblleft}what can I ask?{\textquotedblright} and so on. In this paper, we report a case study of applying the tools to F/OSS projects data collected from SourceForge and how effective the tools can be used for helping cross-project knowledge collaboration.}, keywords = {collaborative filtering, developers, knowledge collaboration, projects, social networks, sourceforge, visualization tool}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083163}, url = {http://doi.acm.org/10.1145/1082983.1083163}, attachments = {https://flosshub.org/sites/flosshub.org/files/111Accelerating.pdf}, author = {Ohira, Masao and Ohsugi, Naoki and Ohoka, Tetsuya and Matsumoto, Ken-ichi} } @article {flosswp233, title = {Analysing the technological history of the Open Source Phenomenon: Stories from the Free Software Evolution}, year = {2005}, month = {September}, abstract = {The Free Libre Open Source Software represents an outstanding example of open development model of technological knowledge. It has been studied in several researches that produced valuable illustrations of the way it works. Our understanding of its principal features is growing exponentially and an entire new literature on open source has been created. However there appears to be an important gap in the literature: the origin of the phenomenon. The paper attempts to tackle this issue by analyzing the long-term technological history of Free Open Source Software; the main research questions at stake are: is the phenomenon completely new? and if it is not totally new, where it comes form? and, more generally, how open source software developed over time? As a consequence the present work focuses primarily on the analysis of the fee/open source software history of technological change over a period of almost sixty years. I adopted a multidisciplinary approach to analyse the network of relations emerging between inventions and technological innovations, as well as economic determinants and intellectual property rights regimes throughout the period considered. Thus, I attempt to investigate the origins of the phenomenon as a way of understanding its evolution.}, attachments = {https://flosshub.org/sites/flosshub.org/files/benussi.pdf}, author = {Lorenzo Benussi} } @conference {735, title = {An Analysis of COSPA - A Consortium for Open Source in the Public Administration}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {125-129}, abstract = {This paper reflects on a two-year EU funded specific research targeted project that officially began in January 2004 entitled COSPA, a Consortium for studying, evaluating and supporting the introduction of Open Source Software and Open Data Standards in the Public Administration. COSPA focuses on office automation and desktop system software and aims at rigorously measuring the effort, costs and benefits of a transition to Open Source. The project involves 15 European Universities and Public Administrations from Italy, Hungary, Ireland, Denmark, UK, Belgium and about sixty observers, including the University of Alberto (Canada), Victoria University of Wellington (New Zealand) and the United Nations Educational Scientific and Cultural Organisation (UNESCO).}, url = {http://pascal.case.unibz.it/handle/2038/624}, author = {Morgan, Lorraine} } @conference {Kim:2005:ASC:1083142.1083154, title = {Analysis of signature change patterns}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {1{\textendash}5}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software continually changes due to performance improvements, new requirements, bug fixes, and adaptation to a changing operational environment. Common changes include modifications to data definitions, control flow, method/function signatures, and class/file relationships. Signature changes are notable because they require changes at all sites calling the modified function, and hence as a class they have more impact than other change kinds.We performed signature change analysis over software project histories to reveal multiple properties of signature changes, including their kind, frequency, and evolution patterns. These signature properties can be used to alleviate the impact of signature changes. In this paper we introduce a taxonomy of signature change kinds to categorize observed changes. We report multiple properties of signature changes based on an analysis of eight prominent open source projects including the Apache HTTP server, GCC, and Linux 2.5 kernel.}, keywords = {apache, gcc, kernel, linux, signature change, signature change patterns, software evolution, software evolution path, soure code}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083154}, url = {http://doi.acm.org/10.1145/1082983.1083154}, attachments = {https://flosshub.org/sites/flosshub.org/files/64AnalysisOfSignature.pdf}, author = {Kim, Sunghun and Whitehead,Jr., E. James and Bevan, Jennifer} } @article {flosswp220, title = {Anarchy and Source Code - What does the Free Software Movement have to do with Anarchism?}, year = {2005}, month = {March}, abstract = {What does the hacker ethic have to do with anarchism? Why does Richard Stallman, an outstanding personality of the free software movement, described himself as an anarchist? We should not imagine the anarchists of the Free Software Movement to be like the cartoon image: A scruffy looking lunatic, with a crazy glint in his eyes and bristling with weapons. Instead of chaos, Stallman postulates a new form of order for the intellectual property in the terms of the hacker ethic - the access to knowledge should be free, decentralized, anti-bureaucratic and anti-authoritarian.}, attachments = {https://flosshub.org/sites/flosshub.org/files/imhorst.pdf}, author = {Christian Imhorst} } @article {1079, title = {Are All Open Source Projects Created Equal? Understanding the Sustainability of Open Source Software Development Model}, journal = {AMCIS 2005 Proceedings}, number = {435}, year = {2005}, note = {uses first 300 most active projects on Sourceforge: {\textquoteright}Our data sample consists of 300 open source software development projects hosted in the Sourceforge.Net. They are the first 300 active projects ranked by Sourceforge.Net....Our dependent variable in the model to measure the success of the projects is the number of downloads. It is an essential variable to show how successful the project is. Generally, more number of downloads means a more successful project. Independent variables include: development status, project lifespan, number of developers, number of messages in the forums, number of mailing list, number of bug report, number of patch report, number of CVS report, number of file releases and also number of news release."}, abstract = {A very intriguing question in Open Source software (OSS) development is: why there are only a few open source projects succeed, while the majority of projects never do. In this research, we examine the factors that may influence the performance of OSS projects. We particularly focus on the OSS{\textquoteright}s core developers{\textquoteright} role in the project{\textquoteright}s success. Extant research has yet to distinguish core developers and non-core developers from the community at large. The different roles of the core developers and non-core developers in OSS projects{\textquoteright} success still remain unclear. Our research contributes to the literature by separating the core developers from the development forces in general and empirically examining the core developers{\textquoteright} importance. Drawing the evidences from our extensive dataset of 300 open source projects, we demonstrated that core developers{\textquoteright} leadership and project advocation are crucial in determining the fate of the OSS projects. Our research could provide better understanding of OSS sustainability. It could also give practical advice to the OSS community on how to make the project successful.}, keywords = {contributors, core, developers, downloads, metadata, project success, sourceforge}, attachments = {https://flosshub.org/sites/flosshub.org/files/LongYuan.pdf}, author = {Long, J. and Yuan, M.J.} } @conference {728, title = {Assessing the Role of Open Source Software in the European Secondary Software Sector: A Voice from Industry}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {82-87}, abstract = {This paper addresses the open source software (OSS) phenomenon from an industry perspective and reveals a number of complexities surrounding the role of OSS in the secondary software sector. It presents the research results of an international workshop which was hosted with the explicit intention of extracting the voice of key industrial stakeholders. The data was gathered and analysed using a qualitative approach which revealed the key strengths and weaknesses of OSS from an industrial perspective. This formed the foundations for developing a framework describing the emerging commercial incarnation of OSS (we refer to this as Open Source Software, Inc.) The paper concludes that the European secondary software sector recognise the benefits of leveraging OSS but are aware of key issues pertinent to such an end.}, url = {http://pascal.case.unibz.it/handle/2038/752}, author = {{\r A}gerfalk, P{\"a}r J and Deverell, Andrea and Fitzgerald, Brian and Morgan, Lorraine} } @article {flosswp193, title = {Advancing Economic Research on the Free and Open Source Software Mode of Production}, year = {2004}, month = {December}, abstract = {Early contributions to the academic literature on free/libre and open source software (F/LOSS) movements have been directed primarily at identifying the motivations that account for the sustained and often intensive involvement of many people in this non-contractual and unremunerated productive activity. This issue has been particularly prominent in economists{\textquoteright} contributions to the literature, and it reflects a view that widespread voluntary participation in the creation of economically valuable goods that is to be distributed without charge constitutes a significant behavioral anomaly. Undoubtedly, the motivations of F/LOSS developers deserve to be studied more intensively, but not because their behaviors are unique, or historically unprecedented. In this essay we argue that other aspects of the "open source" phenomenon are just as intriguing, if not more so, and possibly are also more consequential topics for economic analysis. We describe the re-focusing and re-direction of empirical and theoretical research in an integrated international project (based at Stanford University/SIEPR) that aims at better understanding a set of less widely discussed topics: the modes of organization, governance and performance of F/LOSS development {\textendash} viewed as a collective distributed mode of production.. We discuss of the significance of tackling those questions in order to assess the potentialities of the "open source way of working" as a paradigm for a broader class of knowledge and information-goods production, and conclude with proposals for the trajectory of future research along that line.}, attachments = {https://flosshub.org/sites/flosshub.org/files/davidetal.pdf}, author = {Jean-Michel Dalle} } @article {flosswp175, title = {Applying Social Network Analysis to the Information in CVS Repositories}, year = {2004}, month = {June}, abstract = {The huge quantities of data available in the CVS repositories of large, long-lived libre (free, open source) software projects, and the many interrelationships among those data offer opportunities for extracting large amounts of valuable information about their structure, evolution and internal processes. Unfortunately, the sheer volume of that information renders it almost unusable without applying methodologies which highlight the relevant information for a given aspect of the project. In this paper, we propose the use of a well known set of methodologies (social network analysis) for characterizing libre software projects, their evolution over time and their internal structure. In addition, we show how we have applied such methodologies to real cases, and extract some preliminary conclusions from that experience.}, attachments = {https://flosshub.org/sites/flosshub.org/files/llopez-sna-short.pdf}, author = {Luis Lopez} } @booklet {Lopez-Fernandez_applyingsocial, title = {Applying Social Network Analysis to the Information in CVS Repositories}, howpublished = {International Workshop on Mining Software Repositories (MSR 2004)}, year = {2004}, pages = {101-105}, abstract = {The huge quantities of data available in the CVS repositories of large, long-lived libre (free, open source) software projects, and the many interrelationships among those data offer opportunities for extracting large amounts of valuable information about their structure, evolution and internal processes. Unfortunately, the sheer volume of that information renders it almost unusable without applying methodologies which highlight the relevant information for a given aspect of the project. In this paper, we propose the use of a well known set of methodologies (social network analysis) for characterizing libre software projects, their evolution over time and their internal structure. In addition, we show how we have applied such methodologies to real cases, and extract some preliminary conclusions from that experience.}, keywords = {apache, complex networks, cvs, gnome, kde, libre software engineering, source code, source code repositories, visualization techniques, vizualization}, attachments = {https://flosshub.org/sites/flosshub.org/files/101ApplyingSocial.pdf}, author = {L{\'o}pez-Fern{\'a}ndez, L. and Gregorio Robles and Jesus M. Gonzalez-Barahona} } @article {flosswp143, title = {Appropriating the Commons: Firms in Open Source Software}, year = {2004}, month = {April}, abstract = {Firms in open source software (OSS) are active in a field encompassing all the characteristics of a public good, given the non-excludability and non-rivalry nature of OSS. As the case of OSS demonstrates, the fact that many important inputs to the innovative process are public should not be taken to mean that innovators are prevented from capturing private returns. The objective of this paper is to explore how firms appropriate returns from innovations that are created outside the boundaries of firms and in the public domain using the case of OSS. To do so, the paper draws upon an explorative multiple case study of six small firms that attempt to appropriate returns from OSS, with rich empirical evidence from various data sources. The cases illustrate how firms try a variety of approaches to appropriate adequate returns and that selling services seem to be the dominant trend. Firm also balance the relative inefficiency of traditional means of intellectual property right such as patents by putting greater emphasis on first mover advantages and creating network externalities.}, attachments = {https://flosshub.org/sites/flosshub.org/files/dahlander2.pdf}, author = {Linus Dahlander} } @article {flosswp138, title = {The Architecture of Cooperation: How Code Architecture Mitigates Free Riding in the Open Source Development Model}, year = {2004}, month = {January}, abstract = {We argue that the architecture of a codebase is a critical factor that lies at the heart of the open source development process. To support this argument, we define two observable properties of an architecture: (1) its modularity and (2) its option values. Developers can make informed judgments about modularity and option value from early code releases. Their judgments in turn will influence their decisions to work and to contribute their code back to the community. We go on to suggest that the core of the open source development process can be thought of as two linked games played within a codebase architecture. The first game involves the implicit exchange of effort directed at the modules and option values of a codebase; the second is a Prisoners{\textquoteright} Dilemma game triggered by the irreducible costs of communicating. The implicit exchange of effort among developers is made possible by the the non-rivalrous nature of the codebase and by the modularity and option values of the codebase{\textquoteright}s architecture. This exchange creates value for all participants, both workers and free-riders. In contrast, the Prisoners{\textquoteright} Dilemma is a problem that must be surmounted if the exchanges are to take place. It can be addressed through a combination of reducing the costs of communication, providing rewards, and encouraging repeated interactions. Finally, the initial design and "opening up" of a codebase can be seen as a rational move by an architect who is seeking to test the environment in hopes of initiating exchanges of effort with other developers.}, attachments = {https://flosshub.org/sites/flosshub.org/files/baldwinclark.pdf}, author = {Carliss Baldwin} } @article {flosswp154, title = {Adaptive entry strategies under dominant standards: Hybrid business models in the Open Source software industry}, year = {2003}, month = {March}, abstract = {Although a growing body of literature is analysing Open Source software (OSS) issues, there is still lack of empirical data on the phenomenon and little is known about firms that enter the software industry by producing under the Open Source license scheme (Open Source firms). This paper is a contribution to fill this gap and focuses on the business models of these firms. We find significant heterogeneity among them, in particular many agents supply both proprietary and Open Source software. We present a model of adoption that studies the intra-firm diffusion of the new paradigm. Explanatory hypotheses are discussed analysing how the characteristics of the Open Source production mode and of network externalities in software demand shape the strategies of firms that entered the OSS field.}, attachments = {https://flosshub.org/sites/flosshub.org/files/bonnacorsirossigiannangeli.pdf}, author = {Andrea Bonaccorsi} } @proceedings {1165, title = {Addressing Challenges to Open Source Collaboration with the Semantic Web}, year = {2003}, pages = {9-13}, attachments = {https://flosshub.org/sites/flosshub.org/files/9-13.pdf}, author = {Ankolekar, A. and Herbsleb, J. D. and Sycara, K.} } @proceedings {1175, title = {Adopting Open-Source Software Engineering in Computer Science Education}, year = {2003}, pages = {85-89}, keywords = {education}, attachments = {https://flosshub.org/sites/flosshub.org/files/85-89.pdf}, author = {Liu, C.} } @article {flosswp83, title = {The Allocation of Software Development Resources in {\textquoteright}Open Source{\textquoteright} Production}, year = {2003}, month = {February}, abstract = {The paper develops a stochastic simulation model capable of describing the decentralized, micro-level decisions that allocate programming resources both within and among open source/free software (OS/FS) projects, and which critically shape their growth. The core or behavioral kernel of our simulation tool is based on dynamic "growing" trees, and incorporates the effects of the reputational reward structure of OS/FS communities as characterized by Eric S. Raymond (1998). In this regard, our line of investigation also follows recent approaches associated with studies of academic researchers in ?open science? communities. For the purposes of this first step, we mainly focus on showing the ways in which the specific norms of the reward system and organizational rules can shape emergent properties of projects, and we also point to a validation in this framework of the often adovcated, but yet mainly empirical "release early" rule.}, attachments = {https://flosshub.org/sites/flosshub.org/files/dalledavid.pdf}, author = {Jean-Michel Dalle} } @article {flosswp117, title = {Alternative Routes in the Digital World: Open Source Software in Africa}, year = {2003}, month = {September}, abstract = {Software allows people to work with computers. Operating Software controls the hardware components and application software provide tools to facilitate and support the users{\textquoteright} work. Most of the softwares are owned by private people or companies and users by licenses to use the software. This type of software is called proprietary or closed source software since the user purchases a license for using the product and the actual product (source code). At present Microsoft and Oracle are the biggest producers of this type software in the world. In the two decades a new approach for software development is emerging. Open Source Software movement is built on the premise that better software is produced when everyone is allowed to modify and change the software. So, in stead of selling user licenses, the product (source code) is distributed. The article discusses the differences between Open and Closed Source Software and reasons that organizations in the African context should decide to embrace the Open Source Software initiative. Several emerging initiatives promoting the use of Open Source Software are considered.}, attachments = {https://flosshub.org/sites/flosshub.org/files/reijswoudtopi.pdf}, author = {Victor Van Reijswoud} } @article {flosswp102, title = {Altruistic individuals, selfish firms? The structure of motivation in Open Source software}, year = {2003}, month = {August}, abstract = {A growing body of economic literature is addressing the incentives of the individuals that take part to the Open Source movement. However, empirical analyses focus on individual developers and neglect firms that do business with Open Source software (OSS). During 2002, we conducted a large-scale survey on 146 Italian firms supplying Open Source solutions in Italy. In this paper our data on firms? motivations are compared with data collected by the surveys made on individual programmers. We aim at analysing the role played by different classes of motivations (social, economic and technological) in determining the involvement of different groups of agents in Open Source}, attachments = {https://flosshub.org/sites/flosshub.org/files/bnaccorsirossimotivationshort.pdf}, author = {Andrea Bonaccorsi} } @article {flosswp104, title = {An analysis of Open Source production in Italy}, year = {2003}, month = {August}, abstract = {Final report of a survey on Italian firms that do business with Open Source software}, attachments = {https://flosshub.org/sites/flosshub.org/files/bonaccorsirossiccatenieliss.pdf}, author = {Andrea Bonaccorsi} } @article {flosswp219, title = {Assessing Free/Open Source Software Quality}, year = {2003}, month = {May}, abstract = {According to its proponents, one of the most acclaimed advantages of Free/Open Source Software (F/OSS) is its superior quality. However, this suggestion is an open issue, since there is little concrete evidence to justify whether F/OSS quality is indeed better or worse than that of proprietary software products. The general perspective of this article is to discuss the current status of F/OSS quality and to assess its performance in various aspects of quality, based on existing literature. Specifically, this article will provide some answers to various questions raised by the assertion concerning the quality of F/OSS. In this regard issues addressed in this article include the quality framework, through which F/OSS quality should be investigated and the performance of F/OSS in various quality factors within this quality framework. Answers to these issues are given by providing evidence from various research papers, empirical studies and reports based on experience about the quality of F/OSS products. The overall results seem to indicate that F/OSS has achieved an acceptable level of quality, although there is more to be done in order to outperform proprietary software.}, attachments = {https://flosshub.org/sites/flosshub.org/files/samoladasstamelos.pdf}, author = {Samoladas, Ioannis} } @proceedings {1174, title = {On Automatic Categorization of Open Source Software}, year = {2003}, pages = {79-83}, attachments = {https://flosshub.org/sites/flosshub.org/files/79-83.pdf}, author = {Kawaguchi, S. and Garg, P.K. and Matsushita, M. and Inoue, K.} } @proceedings {1173, title = {Automating the Discovery and Modeling of Open Source Software Development Processes}, year = {2003}, pages = {75-78}, attachments = {https://flosshub.org/sites/flosshub.org/files/75-78.pdf}, author = {Chris Jensen and Walt Scacchi} } @conference {German03automatingthe, title = {Automating the measurement of open source projects}, booktitle = {Proceedings of the 3rd Workshop on Open Source Software Engineering}, year = {2003}, note = {"We have chosen to use Evolution to illustrate some of the capabilities of SoftChange. We focus on the data provided by CVS logs and the CVS commit mailing list. Our data includes changes to the CVS repository from April 1998 to January 2003."}, pages = {63{\textendash}67}, abstract = {The proliferation of open source projects raises a number of vital economic, social, and software engineering questions that are subject of intense research. Based on experience analyzing numerous open source and commercial projects we propose a set of tools to support extraction and validation of software project data. Such tools would streamline empirical investigation of open source projects and make it possible to test existing and new theories about the nature of open source projects. Our soft- ware includes tools to extract and summarize information from mailing lists, CVS logs, ChangeLog files, and defect tracking databases. More importantly, it cross-links records from various data sources and identifies all contributors for a software change. We illustrate some of the capabilities by analyzing data from Ximian Evolution project.}, keywords = {bug reports, bug tracking, changelog, cvs, defects, evolution, log files, logs, mailing list, scm, softchange, source code, ximian, ximian evolution}, attachments = {https://flosshub.org/sites/flosshub.org/files/germanMockus2003.pdf}, author = {German, Daniel and Audris Mockus} } @article {Reinke2003707, title = {The availability of source code in relation to timely response to security vulnerabilities}, journal = {Computers \& Security}, volume = {22}, number = {8}, year = {2003}, note = {"This paper considers the effects of proprietary software versus non-proprietary software in determining the speed with which a security fix is made available" "The sources considered are the BugTraq mailing list (available at SecurityFocus.com), the CERT Coordination Center (www.cert.org), and Incidents.org (from the SANS Institute)" Note that these lists are generic topical discussion lists and not the mailing lists OF a particular open source project.}, pages = {707 - 724}, abstract = {Once a vulnerability has been found in an application or service that runs on a computer connected to the Internet, fixing that exploit in a timely fashion is of the utmost importance. There are two parts to fixing vulnerability: a party acting on behalf of the application{\textquoteright}s vendor gives instructions to fix it or makes a patch available that can be downloaded; then someone using that information fixes the computer or application in question. This paper considers the effects of proprietary software versus non-proprietary software in determining the speed with which a security fix is made available, since this can minimize the amount of time that the computer system remains vulnerable.}, keywords = {bugtraq, cert, email, email archives, mailing list, security, vulnerability}, issn = {0167-4048}, doi = {DOI: 10.1016/S0167-4048(03)00011-7}, url = {http://www.sciencedirect.com/science/article/B6V8G-4B9CV31-C/2/a218fccfaef185af5c122f118b252703}, author = {John Reinke and Hossein Saiedian} } @conference {1160, title = {Adopting OSS Methods by Adopting OSS Tools}, booktitle = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, abstract = {The open source movement has created and used a set of software engineering tools with features that fit the characteristics of open source development processes. To a large extent, the open source culture and methodology are conveyed to new developers via the toolset itself, and through the demonstrated usage of these tools on existing projects. The rapid and wide adoption of open source tools stands in stark contrast to the difficulties encountered in adopting traditional CASE tools. This paper explores the characteristics that make these tools adoptable and how adopting them may influence software development processes.}, keywords = {ant, argouml, bugzilla, cactus, cvs, developers, eclipse, emacs, email, faq, junit, mailing lists, make, netbeans, package management, rpm, scarab, subversion, teams, tools, torque, WORK}, attachments = {https://flosshub.org/sites/flosshub.org/files/Robbins.pdf}, author = {Robbins, Jason E.} } @article {65, title = {Analyzing cloning evolution in the Linux kernel}, journal = {Information and Software Technology}, volume = {44}, number = {13}, year = {2002}, pages = {755-765}, abstract = {Identifying code duplication in large multi-platform software systems is a challenging problem. This is due to a variety of reasons including the presence of high-level programming languages and structures interleaved with hardware-dependent low-level resources and assembler code, the use of GUI-based configuration scripts generating commands to compile the system, and the extremely high number of possible different configurations. This paper studies the extent and the evolution of code duplications in the Linux kernel. Linux is a large, multi-platform software system; it is based on the Open Source concept, and so there are no obstacles in discussing its implementation. In addition, it is decidedly too large to be examined manually: the current Linux kernel release (2.4.18) is about three million LOCs. Nineteen releases, from 2.4.0 to 2.4.18, were processed and analyzed, identifying code duplication among Linux subsystems by means of a metric-based approach. The obtained results support the hypothesis that the Linux system does not contain a relevant fraction of code duplication. Furthermore, code duplication tends to remain stable across releases, thus suggesting a fairly stable structure, evolving smoothly without any evidence of degradation. (C) 2002 Elsevier Science B.V. All rights reserved.}, keywords = {cvs, kernel, lines of code, linux, loc, project success, source code}, url = {web.soccerlab.polymtl.ca/~antoniol/publications/.../infsoft2002.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/infsoft2002.pdf}, author = {Antoniol, G. and Villano, U. and Merlo, E. and Di Penta, M.} }