@article {1381, title = {Adopting Free/Libre/Open Source Software Practices, Techniques and Methods for Industrial Use}, journal = {Journal of the Association for Information Systems}, volume = {12}, number = {1}, year = {2011}, abstract = {Today{\textquoteright}s software companies face the challenges of highly distributed development projects and constantly changing requirements. This paper proposes the adoption of relevant Free/Libre/Open Source Software (FLOSS) practices in order to improve software development projects in industry. Many FLOSS projects have proven to be very successful, producing high quality products with steady and frequent releases. This study aims to identify FLOSS practices that can be adapted for the corporate environment. To achieve this goal, a framework to compare FLOSS and industrial development methodologies was created. Three successful FLOSS projects were selected as study targets (the Linux Kernel, the FreeBSD operating system, and the JBoss application server), as well as two projects from Ericsson, a large telecommunications company. Based on an analysis of these projects, FLOSS best practices were tailored to fit industrial development environments. The final results consisted of a set of key adoption opportunities that aimed to improve software quality and overall development productivity by importing best practices from the FLOSS environment. The adoption opportunities were then validated at three large corporations.}, keywords = {freebsd, jboss, linux, linux kernel}, url = {http://aisel.aisnet.org/jais/vol12/iss1/1}, author = {Torkar, Richard and Minoves, Pau and Garrig{\'o}s, Janina} } @conference {1308, title = {Social interactions around cross-system bug fixings}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, note = {"We rely on information stored in versioning database and mailing lists of both systems and develop methods to reconstruct and integrate different historical database"}, month = {05/2011}, pages = {143-152}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {Cross-system bug fixing propagation is frequent among systems having similar characteristics, using a common framework, or, in general, systems with cloned source code fragments. While previous studies showed that clones tend to be properly maintained within a single system, very little is known about cross-system bug management. This paper describes an approach to mine explicitly documented cross-system bug fixings, and to relate their occurrences to social characteristics of contributors discussing through the project mailing lists--e.g., degree, betweenness, and brokerage--as well as to the contributors{\textquoteright} activity on source code. The paper reports results of an empirical study carried out on FreeBSD and OpenBSD kernels. The study shows that the phenomenon of cross-system bug fixing between these two projects occurs often, despite the limited overlap of contributors. The study also shows that cross-system bug fixings mainly involve contributors with the highest degree, betweenness and brokerage level, as well as contributors that change the source code more than others.}, keywords = {bug fixing, bug tracking system, committers, email, email archives, freebsd, mailing list, openbsd}, isbn = {9781450305747}, doi = {10.1145/1985441.1985463}, author = {Cerulo, Luigi and Cimitile, Marta and Di Penta, Massimiliano and Canfora, Gerardo} } @conference {Rigby:2011:UBB:1985793.1985867, title = {Understanding broadcast based peer review on open source software projects}, booktitle = {Proceedings of the 33rd International Conference on Software Engineering}, series = {ICSE {\textquoteright}11}, year = {2011}, note = {http://helium.cs.uvic.ca/other/Rigby2011ICSE.pdf 5 projects}, pages = {541{\textendash}550}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software peer review has proven to be a successful technique in open source software (OSS) development. In contrast to industry, where reviews are typically assigned to specific individuals, changes are broadcast to hundreds of potentially interested stakeholders. Despite concerns that reviews may be ignored, or that discussions will deadlock because too many uninformed stakeholders are involved, we find that this approach works well in practice. In this paper, we describe an empirical study to investigate the mechanisms and behaviours that developers use to find code changes they are competent to review. We also explore how stakeholders interact with one another during the review process. We manually examine hundreds of reviews across five high profile OSS projects. Our findings provide insights into the simple, community-wide techniques that developers use to effectively manage large quantities of reviews. The themes that emerge from our study are enriched and validated by interviewing long-serving core developers.}, keywords = {apache, case studies, email, freebsd, grounded theory, kde, linux, linux kernel, open source software, peer review, subversion}, isbn = {978-1-4503-0445-0}, doi = {10.1145/1985793.1985867}, attachments = {https://flosshub.org/sites/flosshub.org/files/Rigby2011ICSE.pdf}, author = {Peter C. Rigby and Storey, Margaret-Anne} } @conference {959, title = {Finding file clones in FreeBSD Ports Collection}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {102 - 105}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {In Open Source System (OSS) development, software components are often imported and reused; for this reason we might expect that files are copied in multiple projects (file clones). In this paper, we propose a file clone detection tool called FCFinder and show the analysis performed with it on the FreeBSD Ports Collection, a large OSS project collection. We found many file clones among similar or related projects, which are systematically introduced from base projects.}, keywords = {clone, freebsd, msr challenge, source code}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463293}, attachments = {https://flosshub.org/sites/flosshub.org/files/102FreeBSDClones.pdf}, author = {Sasaki, Yusuke and Yamamoto, Tetsuo and Hayase, Yasuhiro and Inoue, Katsuro} } @conference {956, title = {Mining security changes in FreeBSD}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {90 - 93}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Current research on historical project data is rarely touching on the subject of security related information. Learning how security is treated in projects and which parts of a software are historically security relevant or prone to security changes can enhance the security strategy of a software project. We present a mining methodology for security related changes by modifying an existing method of software repository analysis. We use the gathered security changes to find out more about the nature of security in the FreeBSD project and we try to establish a link between the identified security changes and a tracker for security issues (security advisories). We give insights how security is presented in the FreeBSD project and show how the mined data and known security problems are connected.}, keywords = {freebsd, msr challenge, security}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463289}, author = {Mauczka, Andreas and Schanes, Christian and Fankhauser, Florian and Bernhart, Mario and Grechenig, Thomas} } @conference {929, title = {Code siblings: Technical and legal implications of copying code between applications}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {81 - 90}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Source code cloning does not happen within a single system only. It can also occur between one system and another. We use the term code sibling to refer to a code clone that evolves in a different system than the code from which it originates. Code siblings can only occur when the source code copyright owner allows it and when the conditions imposed by such license are not incompatible with the license of the destination system. In some situations copying of source code fragments are allowed - legally - in one direction, but not in the other. In this paper, we use clone detection, license mining and classification, and change history techniques to understand how code siblings - under different licenses - flow in one direction or the other between Linux and two BSD Unixes, FreeBSD and OpenBSD. Our results show that, in most cases, this migration appears to happen according to the terms of the license of the original code being copied, favoring always copying from less restrictive licenses towards more restrictive ones. We also discovered that sometimes code is inserted to the kernels from an outside source.}, keywords = {bsd, fossology, freebsd, linux, openbsd, source code}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069483}, attachments = {https://flosshub.org/sites/flosshub.org/files/81CodeSiblings.pdf}, author = {Daniel M. German and Di Penta, Massimiliano and Gueheneuc, Yann-Gael and Antoniol, Giuliano} } @conference {Chang:2008:ESC:1370750.1370766, title = {Evaluation of source code copy detection methods on freebsd}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {61{\textendash}66}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Studies have shown that substantial code reuse is common in open source and in commercial projects. However, the precise extent of reuse and its impact on productivity and quality are not well investigated in the open source context. Previously, we have introduced a simple-to-use method that needs only a set of file pathnames to identifies directories that share filenames and partially validated its performance on a set of closed-source projects. To evaluate this method and to improve reuse detection at the file level, we apply it and four additional file copy detection methods that utilize the underlying content of multiple versions of the source code on the FreeBSD project. The evaluation quantified unique advantages of each method and showed that the filename method detected roughly half of all reuse cases. We are still faced with a challenge to scale the content based methods to large repositories containing all versions of open source files.}, keywords = {clone, cloning, code copying, freebsd, version control}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370766}, url = {http://doi.acm.org/10.1145/1370750.1370766}, attachments = {https://flosshub.org/sites/flosshub.org/files/p61-chang.pdf}, author = {Chang, Hung-Fu and Audris Mockus} } @conference {Spinellis:2008:TFK:1368088.1368140, title = {A tale of four kernels}, booktitle = {Proceedings of the 30th international conference on Software engineering}, series = {ICSE {\textquoteright}08}, year = {2008}, pages = {381{\textendash}390}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {The FreeBSD, GNU/Linux, Solaris, and Windows operating systems have kernels that provide comparable facilities. Interestingly, their code bases share almost no common parts, while their development processes vary dramatically. We analyze the source code of the four systems by collecting metrics in the areas of file organization, code structure, code style, the use of the C preprocessor, and data organization. The aggregate results indicate that across various areas and many different metrics, four systems developed using wildly different processes score comparably. This allows us to posit that the structure and internal quality attributes of a working, non-trivial software artifact will represent first and foremost the engineering requirements of its construction, with the influence of process being marginal, if any.}, keywords = {comparison, freebsd, linux, open source, opensolaris, proprietary software, windows, wrk}, isbn = {978-1-60558-079-1}, doi = {10.1145/1368088.1368140}, url = {http://doi.acm.org/10.1145/1368088.1368140}, author = {Diomidis Spinellis} } @conference {1006, title = {Towards a Theoretical Model for Software Growth}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {21 - 21}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Software growth (and more broadly, software evolution) is usually considered in terms of size or complexity of source code. However in different studies, usually different metrics are used, which make it difficult to compare approaches and results. In addition, not all metrics are equally easy to calculate for a given source code, which leads to the question of which one is the easiest to calculate without losing too much information. To address both issues, in this paper present a comprehensive study, based on the analysis of about 700,000 C source code files, calculating several size and complexity metrics for all of them. For this sample, we have found double Pareto statistical distributions for all metrics considered, and a high correlation between any two of them. This would imply that any model addressing software growth should produce this Pareto distributions, and that analysis based on any of the considered metrics should show a similar pattern, provided the sample of files considered is large enough.}, keywords = {C, complexity, evolution, freebsd, growth, halstead, lines of code, loc, mccabe, metrics, scm, size, sloc, sloccount, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.31}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300021.pdf}, author = {Herraiz, Israel and Jesus M. Gonzalez-Barahona and Gregorio Robles} } @conference {686, title = {Contributor Turnover in Libre Software Projects}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {273 - 286}, publisher = {Springer}, organization = {Springer}, abstract = {A common problem that management faces in software companies is the high instability of their staff. In libre (free, open source) software projects, the permanence of developers is also an open issue, with the potential of causing problems amplified by the self-organizing nature that most of them exhibit. Hence, human resources in libre software projects are even more difficult to manage: developers are in most cases not bound by a contract and, in addition, there is not a real management structure concerned about this problem. This raises some interesting questions with respect to the composition of development teams in libre software projects, and how they evolve over time. There are projects lead by their original founders (some sort of {\textquotedblleft}code gods{\textquotedblright}), while others are driven by several different developer groups over time (i.e. the project {\textquotedblleft}regenerates{\textquotedblright} itself). In this paper, we propose a quantitative methodology, based on the analysis of the activity in the source code management repositories, to study how these processes (developers leaving, developers joining) affect libre software projects. The basis of it is the analysis of the composition of the core group, the group of developers most active in a project, for several time lapses. We will apply this methodology to several large, well-known libre software projects, and show how it can be used to characterize them. In addition, we will discuss the lessons that can be learned, and the validity of our proposal. }, keywords = {apache, committers, core, cvs, cvsanaly, developers, evolution, freebsd, gimp, gnome, kde, mono, mozilla}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_28}, attachments = {https://flosshub.org/sites/flosshub.org/files/Contributor\%20Turnover\%20in\%20Libre\%20Software\%20Projects.pdf}, author = {Gregorio Robles and Gonzalez-Barahona, Jesus} } @conference {Askari:2006:ITE:1137983.1138013, title = {Information theoretic evaluation of change prediction models for large-scale software}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {126{\textendash}132}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper, we analyze the data extracted from several open source software repositories. We observe that the change data follows a Zipf distribution. Based on the extracted data, we then develop three probabilistic models to predict which files will have changes or bugs. The first model is Maximum Likelihood Estimation (MLE), which simply counts the number of events, i.e., changes or bugs, that happen to each file and normalizes the counts to compute a probability distribution. The second model is Reflexive Exponential Decay (RED) in which we postulate that the predictive rate of modification in a file is incremented by any modification to that file and decays exponentially. The third model is called RED-Co-Change. With each modification to a given file, the RED-Co-Change model not only increments its predictive rate, but also increments the rate for other files that are related to the given file through previous co-changes. We then present an information-theoretic approach to evaluate the performance of different prediction models. In this approach, the closeness of model distribution to the actual unknown probability distribution of the system is measured using cross entropy. We evaluate our prediction models empirically using the proposed information-theoretic approach for six large open source systems. Based on this evaluation, we observe that of our three prediction models, the RED-Co-Change model predicts the distribution that is closest to the actual distribution for all the studied systems.}, keywords = {bugs, change analysis, cvs, evaluation approach, file, freebsd, information theory, kde, koffice, log files, netbsd, openbsd, postgresql, prediction, prediction models, scm, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138013}, url = {http://doi.acm.org/10.1145/1137983.1138013}, attachments = {https://flosshub.org/sites/flosshub.org/files/126InformationTheoretic.pdf}, author = {Askari, Mina and Holt, Ric} } @article {Yu:2006:MKO:1150566.1150571, title = {Maintainability of the kernels of open-source operating systems: A comparison of Linux with FreeBSD, NetBSD, and OpenBSD}, journal = {J. Syst. Softw.}, volume = {79}, year = {2006}, note = {"Data regarding the number and total number of lines of code of kernel and nonkernel modules in the four operating systems are provided in Table 1" loc, kloc, number of kernel modules, number of nonkernel modules size c files .h files}, month = {June}, pages = {807{\textendash}815}, publisher = {Elsevier Science Inc.}, address = {New York, NY, USA}, abstract = {We compared and contrasted the maintainability of four open-source operating systems: Linux, FreeBSD, NetBSD, and OpenBSD. We used our categorization of common coupling in kernel-based software to highlight future maintenance problems. An unsafe definition is a definition of a global variable that can affect a kernel module if that definition is changed. For each operating system we determined a number of measures, including the number of global variables, the number of instances of global variables in the kernel and overall, as well as the number of unsafe definitions in the kernel and overall. We also computed the value of each our measures per kernel KLOC and per KLOC overall. For every measure and every ratio, Linux compared unfavorably with FreeBSD, NetBSD, and OpenBSD. Accordingly, we are concerned about the future maintainability of Linux. }, keywords = {abiword, Common coupling, coupling, Definition-use analysis, freebsd, kernel, lines of code, linux, linux kernel, loc, Maintainability, modules, netbsd, Open-source software, openbsd, source code}, issn = {0164-1212}, doi = {http://dx.doi.org/10.1016/j.jss.2005.08.014}, url = {http://dx.doi.org/10.1016/j.jss.2005.08.014}, attachments = {https://flosshub.org/sites/flosshub.org/files/YuSchachChen.pdf}, author = {Yu, Liguo and Schach, Stephen R. and Chen, Kai and Heller, Gillian Z. and Offutt, Jeff} } @article {DBLP:journals/tse/Dinh-TrongB05, title = {The FreeBSD Project: A Replication Case Study of Open Source Development}, journal = {IEEE Trans. Software Eng.}, volume = {31}, number = {6}, year = {2005}, note = {" we obtained the necessary data from the [FreeBsd] CVS repository, the bug report database, and the e-mail archive. The CVS repository contains all of the code and related documentation that is committed to the project from 1993 until the present. The bug report database contains information describing all reported problems, as well as the status (such as fixed, under test, or open) of each problem. Each bug report is called a PR and assigned a reference number. The e-mail archive contains every e-mail message exchanged between the developers since 1994." d/l: research.cs.queensu.ca/~ahmed/home/teaching/.../F06/.../free-bsd.pdf}, pages = {481-494}, abstract = {Case studies can help to validate claims that open source software development produces higher quality software at lower cost than traditional commercial development. One problem inherent in case studies is external validity{\textemdash}we do not know whether or not results from one case study apply to another development project. We gain or lose confidence in case study results when similar case studies are conducted on other projects. This case study of the FreeBSD project, a long-lived open source project, provides further understanding of open source development. The paper details a method for mining repositories and querying project participants to retrieve key process information. The FreeBSD development process is fairly well-defined with proscribed methods for determining developer responsibilities, dealing with enhancements and defects, and managing releases. Compared to the Apache project, FreeBSD uses 1) a smaller set of core developers{\textemdash}developers who control the code base{\textemdash}that implement a smaller percentage of the system, 2) a larger set of top developers to implement 80 percent of the system, and 3) a more well-defined testing process. FreeBSD and Apache have a similar ratio of core developers to people involved in adapting and debugging the system and people who report problems. Both systems have similar defect densities and the developers are also users in both systems.}, keywords = {apache, bug reports, contributors, core, cvs, defect density, developers, email, email archive, freebsd, mailing list, scm, source code, users}, doi = {10.1109/TSE.2005.73}, attachments = {https://flosshub.org/sites/flosshub.org/files/DinhTrungBieman.pdf}, author = {Trung T. Dinh-Trong and James M. Bieman} } @conference {Fischer:2005:MED:1083142.1083145, title = {Mining evolution data of a product family}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {12-16}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Diversification of software assets through changing requirements impose a constant challenge on the developers and maintainers of large software systems. Recent research has addressed the mining for data in software repositories of single products ranging from fine- to coarse grained analyses. But so far, little attention has been payed to mining data about the evolution of product families. In this work, we study the evolution and commonalities of three variants of the BSD (Berkeley Software Distribution), a large open source operating system. The research questions we tackle are concerned with how to generate high level views of the system discovering and indicating evolutionary highlights. To process the large amount of data, we extended our previously developed approach for storing release history information to support the analysis of product families. In a case study we apply our approach on data from three different code repositories representing about 8.5GB of data and 10 years of active development.}, keywords = {bsd, change analysis, change history, cvs, evolution, freebsd, netbsd, openbsd, release history, source code, text mining}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083145}, url = {http://doi.acm.org/10.1145/1082983.1083145}, attachments = {https://flosshub.org/sites/flosshub.org/files/12MiningEvolution.pdf}, author = {Fischer, Michael and Oberleitner, Johann and Ratzinger, Jacek and Gall, Harald} } @conference {1248, title = {Supporting Distributed and Decentralized Projects: Drawing Lessons from the Open Source Community}, booktitle = {1st Workshop on Open Source in an Industrial Context}, year = {2003}, note = {"We begin the remainder of the paper with discussion of a survey of open source projects, showing similarities that have arisen in tool usage"}, month = {10/2003}, abstract = {Open source projects are typically organized in a distributed and decentralized manner. These factors strongly determine the processes followed and constrain the types of tools that can be utilized. This paper explores how distribution and decentralization have affected processes and tools in existing open source projects with the goals of summarizing the lessons learned and identifying opportunities for improving both. Issues considered include decision-making, accountability, communication, awareness, rationale, managing source code, testing, and release management.}, keywords = {abiword, apache, debian, freebsd, kde, linux, mozilla, mysql, perl, PHP, postgresql, python, subversion, tomcat, tools}, attachments = {https://flosshub.org/sites/flosshub.org/files/erenkrantz2003.pdf}, author = {Erenkrantz, J. and Taylor, R.N.} }