@proceedings {1902, title = {Assessing Code Authorship: The Case of the Linux Kernel}, volume = {496}, year = {2017}, month = {05/2017}, pages = {151-163}, publisher = {Springer}, abstract = {Code authorship is a key information in large-scale open-source systems. Among others, it allows maintainers to assess division of work and identify key collaborators. Interestingly, open-source communities lack guidelines on how to manage authorship. This could be mitigated by setting to build an empirical body of knowledge on how authorship-related measures evolve in successful open-source communities. Towards that direction, we perform a case study on the Linux kernel. Our results show that: (a) only a small portion of developers (26\%) makes significant contributions to the code base; (b) the distribution of the number of files per author is highly skewed{\textemdash}a small group of top-authors (3\%) is responsible for hundreds of files, while most authors (75\%) are responsible for at most 11 files; (c) most authors (62\%) have a specialist profile; (d) authors with a high number of co-authorship connections tend to collaborate with others with less connections.}, keywords = {code authorship, developer network, linux kernel}, doi = {10.1007/978-3-319-57735-7_15}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_15}, author = {Guilherme Avelino and Passos, Leonardo and Andre Hora and Marco Tulio Valente} } @conference {Murphy:2017:CEF:3017680.3017682, title = {Community Engagement with Free and Open Source Software}, booktitle = {Proceedings of the 2017 ACM SIGCSE Technical Symposium on Computer Science Education}, series = {SIGCSE {\textquoteright}17}, year = {2017}, pages = {669{\textendash}670}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {A common refrain from Senior Exit Surveys and Alumni Surveys is the desire to work on "real-world," "practical" and "hands-on" projects using industry-ready tools and development environments. To assuage this, institutions have moved towards adopting Free and Open Source Software (FOSS) as an avenue to provide meaningful, applied learning interventions to students. Through these experiences, students benefit from engagement with various communities including: the community of contributors to the FOSS project; the community of local software developers; the community of citizens who reside in the local area; the community of students at their institution and others; and, the community of people impacted by the FOSS project. These engagements motivate students, enhance their communication and technical skills, allow them to grow and become more confident, help them form professional networks, and provide the "real-world" projects they seek. In this panel, we will discuss our experiences in engaging students with five different types of communities as part of incorporating FOSS into our courses, focusing on how other educators can provide the same benefits to their students as well. In order to satisfy the time constraints of the panel, the last two authors will present together.}, keywords = {free and open source software (FOSS), humanitarian free and open source software (HFOSS), localized free and open source software (LFOSS), under-represented minorities (URM)}, isbn = {978-1-4503-4698-6}, doi = {10.1145/3017680.3017682}, url = {http://doi.acm.org/10.1145/3017680.3017682}, author = {Murphy, Christian and Buffardi, Kevin and Dehlinger, Josh and Lambert, Lynn and Veilleux, Nanette} } @proceedings {1907, title = {Do Software Developers Understand Open Source Licenses?}, year = {2017}, note = {Data: We report on the results of a survey that asked developers about 42 different cases of the use of code under different open source licenses. To make the survey tractable for developers to answer, we focused on three popular open source licenses (GNU GPL 3.0, GNU LGPL 3.0 and MPL 2.0) Findings: The survey results indicate that most of the 375 respondents to our survey struggle with understanding the interaction of open source licenses in both simple and complex software development cases}, month = {05/2017}, pages = {1-11}, abstract = {{\textemdash}Software provided under open source licenses is widely used, from forming high-profile stand-alone applications (e.g., Mozilla Firefox) to being embedded in commercial offerings (e.g., network routers). Despite the high frequency of use of open source licenses, there has been little work about whether software developers understand the open source licenses they use. To our knowledge, only one survey has been conducted, which focused on which licenses developers choose and when they encounter problems with licensing open source software. To help fill the gap of whether or not developers understand the open source licenses they use, we conducted a survey that posed development scenarios involving three popular open source licenses (GNU GPL 3.0, GNU LGPL 3.0 and MPL 2.0) both alone and in combination. The 375 respondents to the survey, who were largely developers, gave answers consistent with those of a legal expert{\textquoteright}s opinion in 62\% of 42 cases. Although developers clearly understood cases involving one license, they struggled when multiple licenses were involved. An analysis of the quantitative and qualitative results of the study indicate a need for tool support to help guide developers in understanding this critical information attached to software components.}, keywords = {license, Survey}, doi = {10.1109/ICPC.2017.7}, author = {Almeida, Daniel A. and Murphy, Gail C. and Wilson, Greg and Hoye, Mike} } @proceedings {1895, title = {How are Developers Treating License Inconsistency Issues? A Case Study on License Inconsistency Evolution in FOSS Projects}, volume = {496}, year = {2017}, month = {05/2017}, pages = {69-79}, publisher = {Springer}, abstract = {A license inconsistency is the presence of two or more source files that evolved from the same original file containing different licenses. In our previous study, we have shown that license inconsistencies do exist in open source projects and may lead to potential license violation problems. In this study, we try to find out whether the issues of license inconsistencies are properly solved by analyzing two versions of a FOSS distribution{\textemdash}Debian{\textemdash}and investigate the evolution patterns of license inconsistencies. Findings are: license inconsistencies occur mostly because the original copyright owner updated the license while the reusers were still using the old version of the source files with the old license; most license inconsistencies would disappear when the reusers synchronize their project from the upstream, while some would exist permanently if reusers decide not to synchronize anymore. Legally suspicious cases have not been found yet in those Debian distributions.}, keywords = {Code clone, debian, License inconsistency, licenses, Software license}, doi = {10.1007/978-3-319-57735-7_8}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_8}, author = {Y. Wu and Manabe, Yuki and Daniel M. Germ{\'a}n and Inoue, K.} } @proceedings {1905, title = {Longitudinal Analysis of the Run-up to a Decision to Break-up (Fork) in a Community}, volume = {496}, year = {2017}, month = {05/2017}, pages = {204-217}, publisher = {Springer}, abstract = {In this paper, we use a developer-oriented statistical approach to understand what causes people in complex software development networks to decide to fork (break away), and what changes a community goes through in the run-up to a decision to break-up. Developing complex software systems is complex. Software developers interact. They may have the same or different goals, communication styles, or values. Interactions can be healthy or troubled. Troubled interactions cause troubled communities, that face failure. Some of these failures manifest themselves as a community split (known as forking). These failures affects many people; developers and users. Can we save troubled projects? We statistically model the longitudinal socio-grams of software developers and present early indicators and warning signs that can be used to predict an imminent break-up decision. }, keywords = {community of software developers, forks, longitudinal study}, doi = {10.1007/978-3-319-57735-7_19}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_19}, author = {Amirhosein {\textquotedblleft}Emerson{\textquotedblright} Azarbakht and Jensen, Carlos} } @proceedings {1911, title = {Machine Learning-Based Detection of Open Source License Exceptions}, year = {2017}, note = {"We address these questions by first performing a large scale mining-based study... [W]e analyzed the source code of 51,754 projects written in six different programming languages (Ruby, Javascript, Python, C, C++, and C$\#$) hosted on GitHub. }, month = {05/2017}, pages = {118-129}, abstract = {From a legal perspective, software licenses govern the redistribution, reuse, and modification of software as both source and binary code. Free and Open Source Software (FOSS) licenses vary in the degree to which they are permissive or restrictive in allowing redistribution or modification under licenses different from the original one(s). In certain cases developers may modify the license by appending to it an exception to specifically allow reuse or modification under a particular condition. These exceptions are an important factor to consider for license compliance analysis since they modify the standard (and widely understood_ terms of the original license. In this work, we first perform a large-scale empirical study on the change history of over 51k FOSS systems aimed at quantitatively investigating the prevalence of known license exceptions and identifying new ones. Subsequently, we performed a study on the detection of license exceptions by relying on machine learning. We evaluated the license exception classification with four different supervised learners and sensitivity analysis. Finally we present a categorization of license exceptions and explain their implications.}, keywords = {classifier, empirical studies, license, machine learning}, doi = {10.1109/ICSE.2017.19}, author = {Vendome, Christopher and Mario Linares-Vasquez and Bavota, Gabriele and Di Penta, Massimiliano and Daniel M. German and Poshyvanyk, Denys} } @proceedings {1900, title = {Understanding When to Adopt a Library: A Case Study on ASF Projects}, volume = {496}, year = {2017}, month = {05/2017}, pages = {128-138}, publisher = {Springer}, abstract = {Software libraries are widely used by both industrial and open source client projects. Ideally, a client user of a library should adopt the latest version that the library project releases. However, sometimes the latest version is not better than a previous version. This is because the latest version may include additional developer effort to test and integrate all changed features. In this study, our main goal is to better understand the relationship between adoption of library versions and its release cycle. Specifically, we conducted an empirical study of release cycles for 23 libraries and how they were adopted by 415 Apache Software Foundation (ASF) client projects. Our findings show that software projects are quicker to update earlier rapid-release libraries compared to library projects with a longer release cycle. Moreover, results suggest that software projects are more likely to adopt the latest version of a rapid-release library compared to libraries with a longer release cycles.}, keywords = {adoption, apache, apache software foundation, libraries}, doi = {10.1007/978-3-319-57735-7_13}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_13}, author = {Ihara, Akinori and Daiki Fujibayashi and Hirohiko Suwa and Raula Gaikovina Kula and Kenichi Matsumoto} } @proceedings {1854, title = {Differentiating Communication Styles of Leaders on the Linux Kernel Mailing List}, year = {2016}, note = {Slides link: https://docs.google.com/presentation/d/1_5kqOXBYwH33ayfGKCncCtCondfUYtsHSDBS3DBig6Y/edit?usp=sharing Edited to fix typo in abstract. New version is v3.pdf}, month = {08/2016}, publisher = {ACM}, abstract = {Much communication between developers of free, libre, and open source software (FLOSS) projects happens on email mailing lists. Geographically and temporally dispersed development teams use email as an asynchronous, centralized, persistently stored institutional memory for sharing code samples, discussing bugs, and making decisions. Email is especially important to large, mature projects, such as the Linux kernel, which has thousands of developers and a multi-layered leadership structure. In this paper, we collect and analyze data to understand the communication patterns in such a community. How do the leaders of the Linux Kernel project write in email? What are the salient features of their writing, and can we discern one leader from another? We find that there are clear written markers for two leaders who have been particularly important to recent discussions of leadership style on the Linux Kernel Mailing List (LKML): Linux Torvalds and Greg Kroah-Hartman. Furthermore, we show that it is straightforward to use a machine learning strategy to automatically differentiate these two leaders based on their writing. Our findings will help researchers understand how this community works, and why there is occasional controversy regarding differences in communication styles on the LKML.}, keywords = {email, flossmole, linus torvalds, linux, lkml}, attachments = {https://flosshub.org/sites/flosshub.org/files/v3_0.pdf}, author = {Schneider, Daniel and Spurlock, Scott and Squire, Megan} } @article {1746, title = {Evaluation of FLOSS by Analyzing Its Software Evolution:}, journal = {Journal of Information Technology Research}, volume = {8}, year = {2015}, month = {01/2015}, pages = {62 - 81}, abstract = {In today{\textquoteright}s world, management often rely on FLOSS (Free/Libre/Open Source Software) systems to run their organizations. However, the nature of FLOSS is different from the software they have been using in the last decades. Its development model is distributed, and its authors are diverse as many volunteers and companies may collaborate in the project. In this paper, we want to shed some light on how to evaluate a FLOSS system by looking at the Moodle platform, which is currently the most used learning management system among educational institutions worldwide. In contrast with other evaluation models that have been proposed so far, the one we present is based on retrieving historical information that can be obtained publicly from the Internet, allowing us to study its evolution. As a result, we will show how by using our methodology management can take informed decisions that lower the risk that organizations face when investing in a FLOSS system. }, keywords = {free software, LMS, moodle, open source, software engineering, software evaluation, software evolution}, issn = {1938-7865}, doi = {10.4018/JITR.2015010105}, attachments = {https://flosshub.org/sites/flosshub.org/files/Evaluation\%20of\%20FLOSS\%20by\%20Analyzing\%20its\%20Software\%20Evolution\%20-\%20An\%20Example\%20Using\%20the\%20Moodle\%20Platform.pdf}, author = {Macho, H{\'e}ctor J. and Gregorio Robles and Gonz{\'a}lez-Barahona, Jesus M} } @inbook {1741, title = {First Results About Motivation and Impact of License Changes in Open Source Projects}, booktitle = {Open Source Systems: Adoption and Impact}, series = {IFIP Advances in Information and Communication Technology}, volume = {451}, year = {2015}, pages = {137-145}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {Free and open source software is characterized by the freedoms and criteria that are warranted by specific licenses. These licenses describe the rights and duties of the licensors and licensees. However, a licensing change may be necessary in the life of an open source project to meet legal developments or to allow the implementation of new business models. This paper examines the motivations and impacts of license changes in open source projects. After a state of the art on the subject, a set of case studies where projects changed their license is presented. Then a set of motivations to change licenses, the ways to legally make this change, the problems caused by this change and a set of benefits of the license change are discussed. }, keywords = {Business model, Contributor agreement, intellectual property, license, open source}, isbn = {978-3-319-17836-3}, doi = {10.1007/978-3-319-17837-0_13}, url = {http://dx.doi.org/10.1007/978-3-319-17837-0_13}, author = {Viseur, Robert and Gregorio Robles}, editor = {Damiani, Ernesto and Frati, Fulvio and Dirk Riehle and Wasserman, Anthony I.} } @proceedings {1729, title = {A Large Scale Study of License Usage on GitHub}, volume = {2}, year = {2015}, note = {14.9\% of projects have a license file.}, month = {05/2015}, pages = {772-774}, publisher = {ACM/IEEE}, abstract = {The open source community relies upon licensing in order to govern the distribution, modification, and reuse of existing code. These licenses evolve to better suit the requirements of the development communities and to cope with unaddressed or new legal issues. In this paper, we report the results of a large empirical study conducted over the change history of 16,221 open source Java projects mined from GitHub. Our study investigates how licensing usage and adoption changes over a period of ten years. We consider both the distribution of license usage within projects of a rapidly growing forge and the extent that new versions of licenses are introduced in these projects.}, keywords = {github, license}, author = {Vendome, Christopher} } @proceedings {export:232407, title = {The Uniqueness of Changes: Characteristics and Applications}, year = {2015}, month = {05/2015}, abstract = {Changes in software development come in many forms. Some changes are frequent, idiomatic, or repetitive (e.g. adding checks for nulls or logging important values) while others are unique. We hypothesize that unique changes are different from the more common similar (or non-unique) changes in important ways; they may require more expertise or represent code that is more complex or prone to mistakes. As such, these changes are worthy of study. In this paper, we present a definition of unique changes and provide a method for identifying them in software project history. Based on the results of applying our technique on the Linux kernel and two large projects at Microsoft, we present an empirical study of unique changes. We explore how prevalent unique changes are and characterize where they occur along the architecture of the project. We further investigate developers{\textquoteright} contribution towards uniqueness of changes. We also describe potential applications of leveraging the uniqueness of change and implement two such applications, evaluating the risk of changes based on uniqueness and providing change recommendations for non-unique changes.}, keywords = {linux kernel}, url = {http://research.microsoft.com/apps/pubs/default.aspx?id=232407}, attachments = {https://flosshub.org/sites/flosshub.org/files/MSR-TR-2014-149.pdf}, author = {Ray, Baishakhi and Meiyappan Nagappan and Christian Bird and Nachiappan Nagappan and Zimmermann, Thomas} } @conference {Passos:2014:DFA:2597073.2597124, title = {A Dataset of Feature Additions and Feature Removals from the Linux Kernel}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {376{\textendash}379}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {This paper describes a dataset of feature additions and removals in the Linux kernel evolution history, spanning over seven years of kernel development. Features, in this context, denote configurable system options that users select when creating customized kernel images. The provided dataset is the largest corpus we are aware of capturing feature additions and removals, allowing researchers to assess the kernel evolution from a feature-oriented point-of-view. Furthermore, the dataset can be used to better understand how features evolve over time, and how different artifacts change as a result. One particular use of the dataset is to provide a real-world case to assess existing support for feature traceability and evolution. In this paper, we detail the dataset extraction process, the underlying database schema, and example queries. The dataset is directly available at our Bitbucket repository: https://bitbucket.org/lpassos/kconfigdb }, keywords = {evolution, linux, msr data showcase, Traceability, Version Control History}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597124}, url = {http://doi.acm.org/10.1145/2597073.2597124}, attachments = {https://flosshub.org/sites/flosshub.org/files/kernel.pdf}, author = {Passos, Leonardo and Czarnecki, Krzysztof} } @proceedings {1636, title = {Free/Open Source Software projects as early MOOCs}, year = {2014}, month = {04/2014}, pages = {874-883}, abstract = {This paper presents Free/Libre/Open Source Software (FLOSS) Projects as early Massive Online Open Courses (MOOCs). Being software development a process where learning and collaboration is of major importance, FLOSS projects have in common many characteristics with MOOCs. This is because many FLOSS projects (such as Linux, Apache, GNOME or KDE, among others) are massive, they are open to anyone to participate, and are driven mainly by telematic means. We therefore present the research literature that has studied FLOSS projects from points of view that are close to learning and discuss how the FLOSS community has approached many of the issues related to acquiring knowledge and skills over the Internet and compare them to how currently MOOCs, both xMOOCs and cMOOCs, address these situations. }, keywords = {education, FLOSS, learning, mooc}, doi = {10.1109/EDUCON.2014.6826200}, author = {Robles, G. and Plaza, H. and Gonzalez-Barahona, J.M.} } @inbook {1612, title = {A Layered Approach to Managing Risks in OSS Projects}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {168-171}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { In this paper, we propose a layered approach to managing risks in OSS projects. We define three layers: the first one for defining risk drivers by collecting and summarising available data from different data sources, including human-provided contextual information; the second layer, for converting these risk drivers into risk indicators; the third layer for assessing how these indicators impact the business of the adopting organisation. The contributions are: 1) the complexity of gathering data is isolated in one layer using appropriate techniques, 2) the context needed to interpret this data is provided by expert involvement evaluating risk scenarios and answering questionnaires in a second layer, 3) a pattern-based approach and risk reasoning techniques to link risks to business goals is proposed in the third layer. }, keywords = {Layered Model, open source, OSS, Risk Management}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_23}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_23}, author = {Franch, Xavier and Kenett, Ron and Mancinelli, Fabio and Susi, Angelo and Ameller, David and Ben-Jacob, Ron and Siena, Alberto}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @conference {Guo:2014:ODC:2597073.2597094, title = {Oops! Where Did That Code Snippet Come from?}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {52{\textendash}61}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {A kernel oops is an error report that logs the status of the Linux kernel at the time of a crash. Such a report can provide valuable first-hand information for a Linux kernel maintainer to conduct postmortem debugging. Recently, a repository has been created that systematically collects kernel oopses from Linux users. However, debugging based on only the information in a kernel oops is difficult. We consider the initial problem of finding the offending line, i.e., the line of source code that incurs the crash. For this, we propose a novel algorithm based on approximate sequence matching, as used in bioinformatics, to automatically pinpoint the offending line based on information about nearby machine-code instructions, as found in a kernel oops. Our algorithm achieves 92\% accuracy compared to 26\% for the traditional approach of using only the oops instruction pointer.}, keywords = {debugging, linux kernel, oops, sequence alignment}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597094}, url = {http://doi.acm.org/10.1145/2597073.2597094}, attachments = {https://flosshub.org/sites/flosshub.org/files/guo.pdf}, author = {Guo, Lisong and Lawall, Julia and Muller, Gilles} } @article {thomas2014studying, title = {Studying software evolution using topic models}, journal = {Science of Computer Programming}, volume = {80}, year = {2014}, pages = {457{\textendash}479}, publisher = {Elsevier}, abstract = {Topic models are generative probabilistic models which have been applied to information retrieval to automatically organize and provide structure to a text corpus. Topic models discover topics in the corpus, which represent real world concepts by frequently cooccurring words. Recently, researchers found topics to be effective tools for structuring various software artifacts, such as source code, requirements documents, and bug reports. This research also hypothesized that using topics to describe the evolution of software repositories could be useful for maintenance and understanding tasks. However, research has yet to determine whether these automatically discovered topic evolutions describe the evolution of source code in a way that is relevant or meaningful to project stakeholders, and thus it is not clear whether topic models are a suitable tool for this task. In this paper, we take a first step towards evaluating topic models in the analysis of software evolution by performing a detailed manual analysis on the source code histories of two well-known and well-documented systems, JHotDraw and jEdit. We define and compute various metrics on the discovered topic evolutions and manually investigate how and why the metrics evolve over time. We find that the large majority (87\%{\textendash}89\%) of topic evolutions correspond well with actual code change activities by developers. We are thus encouraged to use topic models as tools for studying the evolution of a software system.}, keywords = {Latent Dirichlet allocation, mining software repositories, software evolution, topic model}, url = {http://sail.cs.queensu.ca/publications/pubs/Thomas-2012-SCP.pdf}, author = {Stephen W. Thomas and Adams, Bram and Hassan, Ahmed E. and Blostein, Dorothea} } @inbook {1604, title = {Use of Open Software Tools for Data Offloading Techniques Analysis on Mobile Networks}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {111-112}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { This research aims to highlight the benefits of using free software based tools for studying a LTE mobile network with realistic parameters. We will overload this LTE network and offload it through data offloading techniques such as small cells and WiFi offload. For this research, discreteevent open software network simulator ns3 will be implemented. Ns3 is a network simulator based on the programming language C++, and has all the necessary libraries to simulate an LTE and WiFi network. }, keywords = {Data Offloading, LTE, ns3, OSS for research and education, small cells, WiFi}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_15}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_15}, author = {Koo, Jos{\'e}M. and Espino, JuanP. and Armuelles, Iv{\'a}n and Villarreal, Rub{\'e}n}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @proceedings {1522, title = {Boa: A Language and Infrastructure for Analyzing Ultra-Large-Scale Software Repositories}, year = {2013}, month = {05/2013}, pages = {422-431}, abstract = {In today{\textquoteright}s software-centric world, ultra-large-scale software repositories, e.g. SourceForge (350,000+ projects), GitHub (250,000+ projects), and Google Code (250,000+ projects) are the new library of Alexandria. They contain an enormous corpus of software and information about software. Scientists and engineers alike are interested in analyzing this wealth of information both for curiosity as well as for testing important hypotheses. However, systematic extraction of relevant data from these repositories and analysis of such data for testing hypotheses is hard, and best left for mining software repository (MSR) experts! The goal of Boa, a domain-specific language and infrastructure described here, is to ease testing MSR-related hypotheses. We have implemented Boa and provide a web-based interface to Boa{\textquoteright}s infrastructure. Our evaluation demonstrates that Boa substantially reduces programming efforts, thus lowering the barrier to entry. We also see drastic improvements in scalability. Last but not least, reproducing an experiment conducted using Boa is just a matter of re-running small Boa programs provided by previous researchers.}, keywords = {ease of use, forge, github, google code, lower barrier to entry, mining, repository, reproducible, scalable, Software, sourceforge}, author = {Dyer, Robert and Nguyen, Hoan Anh and Rajan, Hridesh and Nguyen, Tien N.} } @proceedings {1491, title = {Communication in Open Source Software Development Mailing Lists}, year = {2013}, note = {"The entire dataset used in the experiment, including the cards, the resolved aliases, and detailed statistical results, can be downloaded from ..." http://www.st.ewi.tudelft.nl/~guzzi/oss-communication/}, month = {05/2013}, pages = {277-286}, abstract = {Open source software (OSS) development teams use electronic means, such as emails, instant messaging, or forums, to conduct open and public discussions. Researchers investigated mailing lists considering them as a hub for project communication. Prior work focused on specific aspects of emails, for example the handling of patches, traceability concerns, or social networks. This led to insights pertaining to the investigated aspects, but not to a comprehensive view of what developers communicate about. Our objective is to increase the understanding of development mailing lists communication. We quantitatively and qualitatively analyzed a sample of 506 email threads from the development mailing list of a major OSS project, Lucene. Our investigation reveals that implementation details are discussed only in about 35\% of the threads, and that a range of other topics is discussed. Moreover, core developers participate in less than 75\% of the threads. We observed that the development mailing list is not the main player in OSS project communication, as it also includes other channels such as the issue repository.}, keywords = {email, lucene, mailling list}, url = {http://www.st.ewi.tudelft.nl/~guzzi/downloads/Guzzi2013msr.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/Guzzi2013msr.pdf}, author = {Guzzi, Anja and Bacchelli, Alberto and Lanza, Michele and Pinzger, Martin and van Deursen, Arie} } @conference {zmzkh-icsm-2013, title = {How does Context affect the Distribution of Software Maintainability Metrics?}, booktitle = {Proceedings of the 29th IEEE International Conference on Software Maintainability}, series = {ICSM {\textquoteright}13}, year = {2013}, note = {"FLOSSMole [25] is another data source, from where we download descriptions (i.e., application domain) of SourceForge software systems. Furthermore, we download latest application domain information4 and monthly download data5 of studied software systems directly from SourceForge."}, abstract = {Software metrics have many uses, e.g., defect prediction, effort estimation, and benchmarking an organization against peers and industry standards. In all these cases, metrics may depend on the context, such as the programming language. Here we aim to investigate if the distributions of commonly used metrics do, in fact, vary with six context factors: application domain, programming language, age, lifespan, the number of changes, and the number of downloads. For this preliminary study we select 320 nontrivial software systems from SourceForge. These software systems are randomly sampled from nine popular application domains of SourceForge. We calculate 39 metrics commonly used to assess software maintainability for each software system and use Kruskal Wallis test and Mann-Whitney U test to determine if there are significant differences among the distributions with respect to each of the six context factors. We use Cliff{\textquoteright}s delta to measure the magnitude of the differences and find that all six context factors affect the distribution of 20 metrics and the programming language factor affects 35 metrics. We also briefly discuss how each context factor may affect the distribution of metric values.We expect our results to help software benchmarking and other software engineering methods that rely on these commonly used metrics to be tailored to a particular context.}, keywords = {benchmark, context, contextual factor, flossmole, large scale, metrics, mining software repositories, sampling, software maintainability, sourceforge, static metrics}, attachments = {https://flosshub.org/sites/flosshub.org/files/icsm2013_contextstudy.pdf}, author = {Zhang, Feng and Audris Mockus and Ying Zou and Foutse Khomh and Hassan, Ahmed E.} } @article {1553, title = {Interlinking Developer Identities within and across Open Source Projects: The Linked Data Approach}, journal = {ISRN Software Engineering}, volume = {201330692164}, year = {2013}, month = {2013}, pages = {1 - 12}, abstract = {Software developers use various software repositories in order to interact with each other or to solve related problems. These repositories provide a rich source of information for a wide range of tasks. However, one issue to overcome in order to make this information useful is the identification and interlinking of multiple identities of developers. In this paper, we propose a Linked Data-based methodology to interlink and integrate multiple identities of a developer found in different software repositories of a project as well as across repositories of multiple projects. Providing such interlinking will enable us to keep track of a developer{\textquoteright}s activity not only within a single project but also across multiple projects. The methodology will be presented in general and applied to 5 Apache projects as a case study. Further, we show that the few methods suggested so far are not always appropriate to overcome the developer identification problem.}, keywords = {developer, identity, linked data}, doi = {10.1155/2013/58473110.1007/s10664-009-9110-310.1109/TSE.2005.7010.1007/978-0-387-72486-7_4}, attachments = {https://flosshub.org/sites/flosshub.org/files/584731.pdf}, author = {Iqbal, Aftab and Hausenblas, Michael} } @article {bettenburg2013management, title = {Management of community contributions}, journal = {Empirical Software Engineering}, year = {2013}, pages = {1{\textendash}38}, publisher = {Springer}, abstract = {In recent years, many companies have realized that collaboration with a thriving user or developer community is a major factor in creating innovative technology driven by market demand. As a result, businesses have sought ways to stimulate contributions from developers outside their corporate walls, and integrate external developers into their development process. To support software companies in this process, this paper presents an empirical study on the contribution management processes of two major, successful, open source software ecosystems. We contrast a for-profit (ANDROID) system having a hybrid contribution style, with a not-for-profit (LINUX kernel) system having an open contribution style. To guide our comparisons, we base our analysis on a conceptual model of contribution management that we derived from a total of seven major open-source software systems. A quantitative comparison based on data mined from the ANDROID code review system and the LINUX kernel code review mailing lists shows that both projects have significantly different contribution management styles, suited to their respective market goals, but with individual advantages and disadvantages that are important for practitioners. Contribution management is a real-world problem that has received very little attention from the research community so far. Both studied systems (LINUX and ANDROID) employ different strategies and techniques for managing contributions, and both approaches are valuable examples for practitioners. Each approach has specific advantages and disadvantages that need to be carefully evaluated by practitioners when adopting a contribution management process in practice.}, keywords = {android, contribution, linux, management}, url = {http://link.springer.com/article/10.1007/s10664-013-9284-6}, author = {Bettenburg, Nicolas and Hassan, Ahmed E. and Adams, Bram and Daniel M. German} } @proceedings {1561, title = {A Replicable Infrastructure for Empirical Studies of Email Archives}, year = {2013}, month = {10/2013}, pages = {43-50}, publisher = {IEEE}, address = {Baltimore, MD, USA}, abstract = {This paper describes a replicable infrastructure solution for conducting empirical software engineering studies based on email mailing list archives. Mailing list emails, such as those affiliated with free, libre, and open source software (FLOSS) projects, are currently archived in several places online, but each research team that wishes to study these email artifacts closely must design their own solution for collection, storage and cleaning of the data. Consequently, research results will be difficult to replicate, especially as the email archive for any living project will still be continually growing. This paper describes a simple, replicable infrastructure for the collection, storage, and cleaning of project email data and analyses.}, keywords = {apache, cleaning, collection, couchdb, database, document-oriented database, email, lucene, mailing lists, nosql, replication, storage}, isbn = {978-0-7695-5121-0}, attachments = {https://flosshub.org/sites/flosshub.org/files/RESERv2.pdf}, author = {Squire, Megan} } @article {1395, title = {Automated topic naming: supporting cross-project analysis of software maintenance activities}, journal = {Empirical Software Engineering}, year = {2012}, abstract = {Software repositories provide a deluge of software artifacts to analyze. Researchers have attempted to summarize, categorize, and relate these artifacts by using semi-unsupervised machine-learning algorithms, such as Latent Dirichlet Allocation (LDA). LDA is used for concept and topic analysis to suggest candidate word-lists or topics that describe and relate software artifacts. However, these word-lists and topics are difficult to interpret in the absence of meaningful summary labels. Current attempts to interpret topics assume manual labelling and do not use domain-specific knowledge to improve, contextualize, or describe results for the developers. We propose a solution: automated labelled topic extraction. Topics are extracted using LDA from commit-log comments recovered from source control systems. These topics are given labels from a generalizable cross-project taxonomy, consisting of non-functional requirements. Our approach was evaluated with experiments and case studies on three large-scale Relational Database Management System (RDBMS) projects: MySQL, PostgreSQL and MaxDB. The case studies show that labelled topic extraction can produce appropriate, context-sensitive labels that are relevant to these projects, and provide fresh insight into their evolving software development activities. }, keywords = {LDA, maxdb, mysql, postgresql, topics}, issn = {1573-7616}, doi = {10.1007/s10664-012-9209-9}, author = {Hindle, Abram and Ernst, Neil A. and Godfrey, Michael W. and Mylopoulos, John} } @proceedings {1439, title = {A Comprehensive Study of Software Forks: Dates, Reasons and Outcomes}, volume = {378}, year = {2012}, pages = {1-14}, publisher = {IFIP AICT}, abstract = {Summary. In general it is assumed that a software product evolves within the authoring company or group of developers that develop the project. However, in some cases different groups of developers make the software evolve in different directions, a situation which is commonly known as a fork. In the case of free software, although forking is a practice that is considered as a last resort, it is inherent to the four freedoms. This paper tries to shed some light on the practice of forking. Therefore, we have identified significant forks, several hundreds in total, and have studied them in depth. Among the issues that have been analyzed for each fork is the date when the forking occurred, the reason of the fork, and the outcome of the fork, i.e., if the original or the forking project are still developed. Our investigation shows, among other results, that forks occur in every software domain, that they have become more frequent in recent years, and that very few forks merge with the original project.}, keywords = {forking, forks, free software, Legal, open source, social, software evolution, sustainability}, attachments = {https://flosshub.org/sites/flosshub.org/files/paper_0.pdf}, author = {Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M.} } @conference {1316, title = {An Empirical Study of Volunteer Members{\textquoteright} Perceived Turnover in Open Source Software Projects}, booktitle = {45th Hawai{\textquoteright}i International Conference on System Sciences}, year = {2012}, note = {"After designing the questionnaire, we conducted a web-based survey by inviting developers working in sourceforge.net and launchpad.net."}, month = {01/2012}, pages = {3396-3405}, abstract = {Turnover of volunteer members and the ensuing instability bring about severe problems to open source software (OSS) projects. To better understand it, we based our study on Herzberg ́s two-factor theory to investigate the influence of hygiene factors on volunteer members ́ dissatisfaction and perceived turnover. After empirically testing the research model, we found shortcomings in project regulation and administration are the key reason for volunteer members ́ dissatisfaction, followed by future rewards and personal needs for software functionalities. By contrast, a possible lack of supportive working relationship among OSS developers was not found to be a trigger for developer dissatisfaction. Dissatisfaction was confirmed to be a significant predictor of perceived turnover. The results demonstrates generalized hygiene factors cannot unreflectively be transferred into the OSS context because volunteer members ́ personal expectation has a weaker influence on perceived turnover than objective attributes of OSS project. Our study further makes suggestions for project administrators.}, keywords = {developers, launchpad, sourceforge, Survey}, author = {Yu, Yiqing and Benlian, Alexander and Hess, Thomas} } @proceedings {1377, title = {Examining Turnover in Open Source Software Projects Using a Logistic Hierarchical Linear Modeling Approach}, volume = {378}, year = {2012}, month = {09/2012}, address = {Eighth International Conference on Open Source Systems}, abstract = {Developer turnover in open source software projects is a critical and insufficiently researched problem. Previous research has focused on understanding the developer motivations to contribute using either the individual developer perspective or the project perspective. In this exploratory study we argue that because the developers are embedded in projects it is imperative to include both perspectives. We analyze turnover in open source software projects by including both individual developer level factors, as well as project specific factors. Using the Logistic Hierarchical Linear Modeling approach allows us to empirically examine the factors influencing developer turnover and also how these factors differ among developers and projects.}, keywords = {Logistic Hierarchical Linear Modeling, sourceforge, turnover}, author = {Sharma, P.N. and Hulland, J. and Daniel, S.} } @proceedings {1451, title = {Exploring the Role of Outside Organizations in Free / Open Source Software Projects}, volume = {378}, year = {2012}, month = {09/2012}, pages = {201-215}, publisher = {IFIP AICT}, abstract = {Free/Open Source Software (FOSS) projects have a reputation for being grass-roots efforts driven by individual contributors volunteering their time and effort. While this may be true for a majority of smaller projects, it is not always the case for large projects. As projects grow in size, importance and complexity, many come to depend on corporations, universities, NGO{\textquoteright}s and governments, for support and contributions, either financially or through seconded staff. As outside organizations get involved in projects, how does this affect their governance, transparency and direction? To study this question we gathered bug reports and commit logs for GCC and the Linux Kernel. We found that outside organizations contribute a majority of code but rarely participate in bug triaging. Therefore their code does not necessarily address the needs of others and may distort governance and direction. We conclude that projects should examine their dependence on outside organizations}, keywords = {bug reports, commit, Community sustainability, Contributor affiliation, gcc, governance, linux kernel, Participation metrics}, url = {http://research.engr.oregonstate.edu/hci/sites/research.engr.oregonstate.edu.hci/files/papers/forrest2012exploring.pdf}, author = {Forrest, Darren and Jensen, Carlos and Mohan, Nitin and Davidson, Jennifer} } @article {1519, title = {How the FLOSS Research Community Uses Email Archives}, journal = {International Journal of Open Source Software and Processes}, volume = {4}, year = {2012}, note = {classifies 72 FLOSS papers into various analysis categories}, month = {12/2012}, pages = {37 - 59}, abstract = {Artifacts of the software development process, such as source code or emails between developers, are a frequent object of study in empirical software engineering literature. One of the hallmarks of free, libre, and open source software (FLOSS) projects is that the artifacts of the development process are publicly-accessible and therefore easily collected and studied. Thus, there is a long history in the FLOSS research community of using these artifacts to gain understanding about the phenomenon of open source software, which could then be compared to studies of software engineering more generally. This paper looks specifically at how the FLOSS research community has used email artifacts from free and open source projects. It provides a classification of the relevant literature using a publicly-available online repository of papers about FLOSS development using email. The outcome of this paper is to provide a broad overview for the software engineering and FLOSS research communities of how other researchers have used FLOSS email message artifacts in their work}, keywords = {email, email archives, literature, mailing lists, review, Survey}, issn = {1942-3934}, doi = {10.4018/jossp.2012010103}, attachments = {https://flosshub.org/sites/flosshub.org/files/ijossp_v3_PREPRINT.pdf}, author = {Squire, Megan} } @proceedings {1467, title = {A Linguistic Analysis on How Contributors Solve Software Problems in a Distributed Context}, volume = {378}, year = {2012}, note = {"... a sample of 4109 bug reports was extracted from Bugzilla bug report repository" "The analysis of our sample allows identifying the roles of participants in this activity according to their hierarchical statutes in the community. " "we look statistically at words that people use in Bugzilla to discern differences in the discourse and representation between participants (core and periphery)"}, month = {09/2012}, pages = {322-330}, publisher = {IFIP AICT, Springer}, abstract = {There is a little understanding of distributed solving activities in Open Source communities. This study aimed to provide some insights in this way. It was applied to the context of Bugzilla, the bug tracking system of Mozilla community. This study investigated the organizational aspects of this meditated, complex and highly distributed context through a linguistic analysis method. The main finding of this research shows that the organization of distributed problem-solving activities in Bugzilla isn{\textquoteright}t based only on the hierarchical distribution of the work between core and periphery participants but on their implication in the interactions. This implication varies according to the status of each one participant in the community. That is why we distinguish their roles, as well as, the established modes to manage such activity.}, keywords = {bug report, bugzilla, linguistic, text mining}, author = {Masmoudi, H{\'e}la and Boughzala, Imed} } @article {1879, title = {Linux Kernel Development: How Fast it is Going, Who is Doing It, What They are Doing, and Who is Sponsoring It}, year = {2012}, month = {03/2012}, institution = {The Linux Foundation}, abstract = {The kernel which forms the core of the Linux system is the result of one of the largest cooperative software projects ever attempted. Regular 2-3 month releases deliver stable updates to Linux users, each with signi cant new features, added device support, and improved performance. The rate of change in the kernel is high and increasing, with between 8,000 and 12,000 patches going into each recent kernel release. These releases each contain the work of over 1,000 developers representing nearly 200 corporations. Since 2005, over 7,800 individual developers from almost 800 different companies have contributed to the kernel. The Linux kernel, thus, has become a common resource developed on a massive scale by companies which are erce competitors in other areas. This is the fourth update of this document, which has been published roughly annually since 2008. It covers development through the 3.2 release, with an emphasis on the releases (2.6.36 to 3.2) made since the last update. It has been a busy period, with seven kernel releases created, many signi cant changes made, and continual growth of the kernel developer and user community. }, keywords = {corporate, corporations, developers, linux kernel, metrics}, attachments = {https://flosshub.org/sites/flosshub.org/files/lf_kernel_development_2012.pdf}, author = {Corbet, Jonathan and Greg Kroah-Hartman and Amanda McPherson} } @article {1381, title = {Adopting Free/Libre/Open Source Software Practices, Techniques and Methods for Industrial Use}, journal = {Journal of the Association for Information Systems}, volume = {12}, number = {1}, year = {2011}, abstract = {Today{\textquoteright}s software companies face the challenges of highly distributed development projects and constantly changing requirements. This paper proposes the adoption of relevant Free/Libre/Open Source Software (FLOSS) practices in order to improve software development projects in industry. Many FLOSS projects have proven to be very successful, producing high quality products with steady and frequent releases. This study aims to identify FLOSS practices that can be adapted for the corporate environment. To achieve this goal, a framework to compare FLOSS and industrial development methodologies was created. Three successful FLOSS projects were selected as study targets (the Linux Kernel, the FreeBSD operating system, and the JBoss application server), as well as two projects from Ericsson, a large telecommunications company. Based on an analysis of these projects, FLOSS best practices were tailored to fit industrial development environments. The final results consisted of a set of key adoption opportunities that aimed to improve software quality and overall development productivity by importing best practices from the FLOSS environment. The adoption opportunities were then validated at three large corporations.}, keywords = {freebsd, jboss, linux, linux kernel}, url = {http://aisel.aisnet.org/jais/vol12/iss1/1}, author = {Torkar, Richard and Minoves, Pau and Garrig{\'o}s, Janina} } @article {1324, title = {Are Developers Fixing Their Own Bugs?}, journal = {International Journal of Open Source Software and Processes}, volume = {3}, year = {2011}, note = {"The analysis is focused at the level of lines of code and it uses the information stored in the source code management system"}, pages = {23 - 42}, abstract = {The process of fixing software bugs plays a key role in the maintenance activities of a software project. Ideally, code ownership and responsibility should be enforced among developers working on the same artifacts, so that those introducing buggy code could also contribute to its fix. However, especially in FLOSS projects, this mechanism is not clearly understood: in particular, it is not known whether those contributors fixing a bug are the same introducing and seeding it in the first place. This paper analyzes the comm-central FLOSS project, which hosts part of the Thunderbird, SeaMonkey, Lightning extensions and Sunbird projects from the Mozilla community. The analysis is focused at the level of lines of code and it uses the information stored in the source code management system. The results of this study show that in 80\% of the cases, the bug-fixing activity involves source code modified by at most two developers. It also emerges that the developers fixing the bug are only responsible for 3.5\% of the previous modifications to the lines affected; this implies that the other developers making changes to those lines could have made that fix. In most of the cases the bug fixing process in comm-central is not carried out by the same developers than those who seeded the buggy code.}, keywords = {bug fixing, developers, loc, scm}, issn = {1942-3934}, doi = {10.4018/jossp.2011040102}, author = {Izquierdo-Cortazar, Daniel and Capiluppi, Andrea and Jesus M. Gonzalez-Barahona} } @proceedings {1278, title = {Building Knowledge in Open Source Software Research in Six Years of Conferences}, year = {2011}, note = {RQ1. Is there any social network underlying the research production at the OSS conference series? RQ2. What are the major streams of research proposed at the OSS conference series?}, month = {10/2011}, pages = {123-141}, publisher = {Springer}, abstract = {Since its origins, the diffusion of the OSS phenomenon and the information about it has been entrusted to the Internet and its virtual communities of developers. This public mass of data has attracted the interest of researchers and practitioners aiming at formalizing it into a body of knowledge. To this aim, in 2005, a new series of conferences on OSS started to collect and convey OSS knowledge to the research and industrial community. Our work mines articles of the OSS conference series to understand the process of knowledge grounding and the community surrounding it. As such, we propose a semi-automated approach for a systematic mapping study on these articles. We automatically build a map of cross-citations among all the papers of the conferences and then we manually inspect the resulting clusters to identify knowledge building blocks and their mutual relationships. We found that industry-related, quality assurance, and empirical studies often originate or maintain new streams of research.}, keywords = {Cross-citations, flossmole cited, graph, literature review, network, research, Systematic Mapping Study}, author = {Mulazzini, Fabio and Rossi, Bruno and Russo, Barbara and Steff, Maximilian} } @proceedings {1289, title = {Cliff Walls: An Analysis of Monolithic Commits Using Latent Dirichlet Allocation}, year = {2011}, note = {"Our data set consists of the version control logs of almost 10,000 projects from SourceForge, acquired in late 2006"}, month = {10/2011}, pages = {282-298}, publisher = {Springer}, abstract = {Artifact-based research provides a mechanism whereby researchers may study the creation of software yet avoid many of the difficulties of direct observation and experimentation. However, there are still many challenges that can affect the quality of artifact-based studies, especially those studies examining software evolution. Large commits, which we refer to as {\textquotedblleft}Cliff Walls,{\textquotedblright} are one significant threat to studies of software evolution because they do not appear to represent incremental development. We used Latent Dirichlet Allocation to extract topics from over 2 million commit log messages, taken from 10,000 SourceForge projects. The topics generated through this method were then analyzed to determine the causes of over 9,000 of the largest commits. We found that branch merges, code imports, and auto-generated documentation were significant causes of large commits. We also found that corrective maintenance tasks, such as bug fixes, did not play a significant role in the creation of large commits.}, keywords = {artifacts, commit, cvs, LDA, lines of code, log files, scm, sloc, sourceforge, version control}, author = {Pratt, Landon J. and MacLean, Alexander C. and Knutson, Charles D. and Ringger, Eric K.} } @conference {1216, title = {Experiences Mining Open Source Release Histories}, booktitle = {International Conference on Software and Systems Process (ICSSP 2011) }, year = {2011}, note = {"First, we selected the projects to initially target, using several criteria to get a broad picture of the open source landscape. Second, we collected the actual data, using a framework of parsers and some manual inspection. Third, we standardized and inserted the data into a database for later use." "but we plan to eventually cross reference our list of projects with existing open source project information (such as FLOSSmole) to take advantage of the work already done by other researchers." "For each release, we collected the following data: the project it belonged to, the date the release was published, the type of release, the release label (version number) and the source of the data" discussion of their difficulties "We conclude that programmatically creating a release history database from existing open source data is not trivial," "We have currently collected 1579 distinct releases from 22 different open source projects"}, month = {05/2011}, abstract = {Software releases form a critical part of the life cycle of a software project. Typically, each project produces releases in its own way, using various methods of versioning, archiving, announcing and publishing the release. Understanding the release history of a software project can shed light on the project history, as well as the release process used by that project, and how those processes change. However, many factors make automating the retrieval of release history information difficult, such as the many sources of data, a lack of relevant standards and a disparity of tools used to create releases. In spite of the large amount of raw data available, no attempt has been made to create a release history database of a large number of projects in the open source ecosystem. This paper presents our experiences, including the tools, techniques and pitfalls, in our early work to create a software release history database which will be of use to future researchers who want to study and model the release engineering process in greater depth.}, keywords = {doap, flossmole cited, life cycle, release engineering, release history, release management, releases}, attachments = {https://flosshub.org/sites/flosshub.org/files/icssp11short-p034-tsay.pdf}, author = {Jason Tsay and Wright, Hyrum and Perry, Dewayne} } @conference {1307, title = {How do developers blog?}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, note = {publishing frequency, post structure, word usage, publication patterns, content}, month = {05/2011}, pages = {123-132}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {We report on an exploratory study, which aims at understanding how software developers use social media compared to conventional development infrastructures. We analyzed the blogging and the committing behavior of 1,100 developers in four large open source communities. We observed that these communities intensively use blogs with one new entry about every 8 hours. A blog entry includes 14 times more words than a commit message. When analyzing the content of the blogs, we found that most popular topics represent high-level concepts such as functional requirements and domain concepts. Source code related topics are covered in less than 15\% of the posts. Our results also show that developers are more likely to blog after corrective engineering and management activities than after forward engineering and re-engineering activities. Our findings call for a hypothesis-driven research to further understand the role of social media in software engineering and integrate it into development processes and tools.}, keywords = {blog, communication, developer, eclipse, gnome, LDA, postgres, python}, isbn = {9781450305747}, doi = {10.1145/1985441.1985461}, author = {Maalej, Walid and Pagano, Dennis} } @conference {Sethanandha:2011:IOS:1985793.1986018, title = {Improving open source software patch contribution process: methods and tools}, booktitle = {Proceedings of the 33rd International Conference on Software Engineering}, series = {ICSE {\textquoteright}11}, year = {2011}, pages = {1134{\textendash}1135}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {The patch contribution process (PCP) is very important to the sustainability of OSS projects. Nevertheless, there are several issues on patch contribution in mature OSS projects, which include time consuming process, lost and ignored patches, slow review process. These issues are recognized by researchers and OSS projects, but have not been addressed. In this dissertation, I apply Kanban method to guide process improvement and tools development to reduce PCP cycle time.}, keywords = {coordination, free software, kanban, lean methods, open source software, patches, productivity}, isbn = {978-1-4503-0445-0}, doi = {10.1145/1985793.1986018}, url = {http://doi.acm.org/10.1145/1985793.1986018}, author = {Bhuricha Deen Sethanandha} } @proceedings {1277, title = {Knowledge Homogeneity and Specialization in the Apache HTTP Server Project}, year = {2011}, note = {"Our data set consists of the commit history and email archives for the Apache HTTP Server Project, spanning sixteen years (2/27/1995 - 1/31/2011)" "we 1) mapped the committers to email records, 2) cleaned the email records to remove extraneous information, 3) identified topics of discussion in the resulting messages, and 4) constructed a social network model from committers and topics." "If specialization exists within the httpd community, we should see distinct communities develop around topics. In addition, unique groups of developers should congregate around specialized subtopics. We examined the data from both angles: topical affinity and topic communities." }, month = {10/2011}, pages = {106-122}, publisher = {Springer}, abstract = {We present an analysis of developer communication in the Apache HTTP Server project. Using topic modeling techniques we expose latent conceptual sub-communities arising from developer specialization within the greater developer population. However, we found that among the major contributors to the project, very little specialization exists. We present theories to explain this phenomenon, and suggest further research.}, keywords = {apache, commits, developer, email, email archive, LDA, mailing list, revision control, revision history, scm, social network analysis, specialization, subversion, svn}, url = {http://sequoia.cs.byu.edu/lab/files/pubs/MacLean2011a.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/MacLean2011a.pdf}, author = {MacLean, Alexander C. and Pratt, Landon J. and Knutson, Charles D. and Ringger, Eric K.} } @proceedings {1281, title = {License Update and Migration Processes in Open Source Software Projects}, year = {2011}, note = {"The case studies in this report are part of an ongoing, multi-year research project discovering and modeling open source software processes. Our research methodology is ethnographically informed, applying a grounded theory to the analysis of artifacts found in OSS projects. The primary data sources in this study come from mailing list archives of the Apache and NetBeans projects."}, month = {10/2011}, pages = {177-195}, publisher = {Springer}, abstract = {Open source software (OSS) has increasingly been the subject of research efforts. Central to this focus is the nature under which the software can be distributed, used, and modified and the causes and consequent effects on software development, usage, and distribution. At present, we have little understanding of, what happens when these licenses change, what motivates such changes, and how new licenses are created, updated, and deployed. Similarly, little attention has been paid to the agreements under which contributions are made to OSS projects and the impacts of changes to these agreements. We might also ask these same questions regarding the licenses governing how individuals and groups contribute to OSS projects. This paper focuses on addressing these questions with case studies of processes by which the Apache Software Foundation{\textquoteright}s creation and migration to Version 2.0 of the Apache Software License and the NetBeans project{\textquoteright}s migration to the Joint Licensing Agreement.}, keywords = {apache, case study, email, email archive, license evolution, mailing list, netbeans, open source, process}, attachments = {https://flosshub.org/sites/flosshub.org/files/1.pdf}, author = {Chris Jensen and Walt Scacchi} } @proceedings {1290, title = {Package Upgrade Robustness: An Analysis for GNU/Linux Package Management Systems}, year = {2011}, month = {10/2011}, pages = {299-306}, publisher = {Springer}, abstract = {GNU/Linux systems are today used in servers, desktops, mobile and embedded devices. One of the critical operations is the installation and maintenance of software packages in the system. Currently there are no frameworks or tools for evaluating Package Management Systems (PMSs), such as RPM, in Linux and for measuring their reliability. The authors perform an analysis of the robustness of the RPM engine and discuss some of the current limitations. This article contributes to the enhancement of Software Reliability in Linux by providing a framework and testing tools under an open source license. These tools can easily be extended to other PMSs such as DEB packages or Gentoo Portage.}, keywords = {linux, package management, rpm}, author = {Thomson, John and Guerrriro, Andre and Paulo Trezentos and Johnson, Jeff} } @proceedings {1275, title = {Preparing FLOSS for Future Network Paradigms: A Survey on Linux Network Management}, year = {2011}, month = {10/2011}, pages = {75-89}, publisher = {Springer}, abstract = {Operating system tools must fulfill the requirements generated by the advances in networking paradigms. To understand the current state of the Free, Libre and Open Source Software (FLOSS) ecosystem, we present a survey on the main tools used to manage and interact with the network, and how they are organized in Linux-based operating systems. Based on the survey results, we present a reference Linux network stack that can serve as the basis for future heterogeneous network environments, contributing towards a standardized approach in Linux. Using this stack, and focusing on dynamic and spontaneous network interactions, we present an evolution path for network related technologies, contributing to Linux as a network research operating system and to FLOSS as a whole.}, keywords = {linux, networking, Survey}, author = {Matos, Alfredo and Thomson, John and Paulo Trezentos} } @proceedings {1276, title = {A Review of Tool Support for User-Related Communication in FLOSS Development}, year = {2011}, note = {"We have carried out a literature review addressing communication in FLOSS projects, and contrasted the findings with Human-Computer Interaction (HCI) literature on user-developer communication."}, month = {10/2011}, pages = {90-105}, abstract = {Free/Libre/Open Source Software (FLOSS) projects rely on Internet tools for communication and in coordinating their work. Communication between developers is well supported in FLOSS projects, but user-developer communication has proven out to be challenging. This paper examines the following questions: {\textquotedblright}What kinds of means for communication exist in FLOSS projects for user-developer communication? What kinds of means should there be?{\textquotedblright} We have carried out a literature review addressing communication in FLOSS projects, and contrasted the findings with Human-Computer Interaction (HCI) literature on user-developer communication. HCI literature indicates that user-developer communication is needed during requirements construction, design and evaluation tasks, and HCI specialists are needed for orchestrating the communication and the user related tasks. Communication during the evaluation task is somewhat supported in FLOSS projects, but design and requirements construction are badly in need for support, even though ideas have already been presented. In addition, HCI specialists are in need of different kinds of communication support in FLOSS projects.}, keywords = {Free/Libre/Open Source Software, Human-Computer Interaction, literature review, Tool Support, User-Developer Communication}, author = {Rantalinen, Aapo and Hedberg, Henrik and Iivari, Netta} } @conference {Meneely:2011:SDN:1985793.1985832, title = {Socio-technical developer networks: should we trust our measurements?}, booktitle = {Proceedings of the 33rd International Conference on Software Engineering}, series = {ICSE {\textquoteright}11}, year = {2011}, pages = {281{\textendash}290}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software development teams must be properly structured to provide effectiv collaboration to produce quality software. Over the last several years, social network analysis (SNA) has emerged as a popular method for studying the collaboration and organization of people working in large software development teams. Researchers have been modeling networks of developers based on socio-technical connections found in software development artifacts. Using these developer networks, researchers have proposed several SNA metrics that can predict software quality factors and describe the team structure. But do SNA metrics measure what they purport to measure? The objective of this research is to investigate if SNA metrics represent socio-technical relationships by examining if developer networks can be corroborated with developer perceptions. To measure developer perceptions, we developed an online survey that is personalized to each developer of a development team based on that developer{\textquoteright}s SNA metrics. Developers answered questions about other members of the team, such as identifying their collaborators and the project experts. A total of 124 developers responded to our survey from three popular open source projects: the Linux kernel, the PHP programming language, and the Wireshark network protocol analyzer. Our results indicate that connections in the developer network are statistically associated with the collaborators whom the developers named. Our results substantiate that SNA metrics represent socio-technical relationships in open source development projects, while also clarifying how the developer network can be interpreted by researchers and practitioners.}, keywords = {developer network, developers, linux, linux kernel, PHP, social network analysis, Survey, wireshark}, isbn = {978-1-4503-0445-0}, doi = {10.1145/1985793.1985832}, url = {http://doi.acm.org/10.1145/1985793.1985832}, author = {Meneely, Andrew and Williams, Laurie} } @proceedings {1271, title = {Towards a Unified Definition of Open Source Quality}, year = {2011}, note = {"In order to answer the research question, how is quality defined in the FLOSS literature, we performed a literature review." "we searched Google Scholar for journal articles and conference papers containing the terms {\textquotedblleft}open source{\textquotedblright} and {\textquotedblleft}quality{\textquotedblright}" "This process left us with 24 papers, to which we then added 16 from the quality and defect-fixing categories in [34] that met the above stated criteria. This left us with 40 papers that defined quality and performed some form of empirical validation of that definition." "there is little consensus in the FLOSS literature when it comes to defining quality." defect resolution versus modularity: "Defect resolution rates (amount of defects resolved, speed of resolution) are the best way to measure a community{\textquoteright}s commitment to quality, because they recognize that FLOSS is not a static product, but ever evolving. These rates should be calculated per release, and not cumulatively, because the cycle of FLOSS evolution is the release. Researchers should be careful to only include defects and not new feature requests, duplicates, or poorly reported bugs into their calculations. Modularity is being touted as the main driver of FLOSS quality success, but it needs to be further defined and studied in order to understand how it works."}, month = {10/2011}, pages = {17-33}, publisher = {Springer}, abstract = {Software quality needs to be specified and evaluated in order to determine the success of a development project, but this is a challenge with Free/Libre Open Source Software (FLOSS) because of its permanently emergent state. This has not deterred the growth of the assumption that FLOSS is higher quality than traditionally developed software, despite of mixed research results. With this literature review, we found the reason for these mixed results is that that quality is being defined, measured, and evaluated differently. We report the most popular definitions, such as software structure measures, process measures, such as defect fixing, and maturity assessment models. The way researchers have built their samples has also contributed to the mixed results with different project properties being considered and ignored. Because FLOSS projects are evolving, their quality is too, and it must be measured using metrics that take into account its community{\textquoteright}s commitment to quality rather than just its software structure. Challenges exist in defining what constitutes a defect or bug, and the role of modularity in affecting FLOSS quality.}, keywords = {literature review, measurement, open source, quality, Software}, author = {Ruiz, Claudia and Robinson, William} } @conference {Bougie:2011:TUT:1984701.1984707, title = {Towards understanding twitter use in software engineering: preliminary findings, ongoing challenges and future questions}, booktitle = {Proceedings of the 2nd International Workshop on Web 2.0 for Software Engineering}, series = {Web2SE {\textquoteright}11}, year = {2011}, note = {paper d/l from http://www.thechiselgroup.org/publications/content/towards-understanding-twitter-use-software-engineering-preliminary-findings-ong "From this site, we selected the top 30 individuals for the topics Linux and Eclipse. We chose these two topics based on their potential to expose "tweeters" from a large operating system community as well as an IDE development community. We also decided to investigate a project for which all committers use Twitter. Through a colleague, we were informed that the MXUnit project lists the Twitter user names for all eight of its committers. The MXUnit project [5] is a small, open source ColdFusion test framework that is written as an Eclipse plug-in."}, pages = {31{\textendash}36}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {There has been some research conducted around the motivation for the use of Twitter and the value brought by micro-blogging tools to individuals and business environments. This paper builds on our understanding of how the phenomenon affects the population which birthed the technology: Software Engineers. We find that the Software Engineering community extensively leverages Twitter{\textquoteright}s capabilities for conversation and information sharing and that use of the tool is notably different between distinct Software Engineering groups. Our work exposes topics for future research and outlines some of the challenges in exploring this type of data.}, keywords = {eclipse, linux, mxunit, social media, software development, twitter, web 2.0}, isbn = {978-1-4503-0595-2}, doi = {10.1145/1984701.1984707}, url = {http://doi.acm.org/10.1145/1984701.1984707}, attachments = {https://flosshub.org/sites/flosshub.org/files/WEB2SE2011.pdf}, author = {Bougie, Gargi and Starke, Jamie and Storey, Margaret-Anne and Daniel M. German} } @conference {Bougie:2011:TUT:1984701.1984707, title = {Towards understanding twitter use in software engineering: preliminary findings, ongoing challenges and future questions}, booktitle = {Proceedings of the 2nd International Workshop on Web 2.0 for Software Engineering}, series = {Web2SE {\textquoteright}11}, year = {2011}, note = {paper d/l from http://www.thechiselgroup.org/publications/content/towards-understanding-twitter-use-software-engineering-preliminary-findings-ong "From this site, we selected the top 30 individuals for the topics Linux and Eclipse. We chose these two topics based on their potential to expose "tweeters" from a large operating system community as well as an IDE development community. We also decided to investigate a project for which all committers use Twitter. Through a colleague, we were informed that the MXUnit project lists the Twitter user names for all eight of its committers. The MXUnit project [5] is a small, open source ColdFusion test framework that is written as an Eclipse plug-in."}, pages = {31{\textendash}36}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {There has been some research conducted around the motivation for the use of Twitter and the value brought by micro-blogging tools to individuals and business environments. This paper builds on our understanding of how the phenomenon affects the population which birthed the technology: Software Engineers. We find that the Software Engineering community extensively leverages Twitter{\textquoteright}s capabilities for conversation and information sharing and that use of the tool is notably different between distinct Software Engineering groups. Our work exposes topics for future research and outlines some of the challenges in exploring this type of data.}, keywords = {eclipse, linux, mxunit, social media, software development, twitter, web 2.0}, isbn = {978-1-4503-0595-2}, doi = {10.1145/1984701.1984707}, url = {http://doi.acm.org/10.1145/1984701.1984707}, attachments = {https://flosshub.org/sites/flosshub.org/files/WEB2SE2011_0.pdf}, author = {Bougie, Gargi and Starke, Jamie and Storey, Margaret-Anne and Daniel M. German} } @conference {Rigby:2011:UBB:1985793.1985867, title = {Understanding broadcast based peer review on open source software projects}, booktitle = {Proceedings of the 33rd International Conference on Software Engineering}, series = {ICSE {\textquoteright}11}, year = {2011}, note = {http://helium.cs.uvic.ca/other/Rigby2011ICSE.pdf 5 projects}, pages = {541{\textendash}550}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software peer review has proven to be a successful technique in open source software (OSS) development. In contrast to industry, where reviews are typically assigned to specific individuals, changes are broadcast to hundreds of potentially interested stakeholders. Despite concerns that reviews may be ignored, or that discussions will deadlock because too many uninformed stakeholders are involved, we find that this approach works well in practice. In this paper, we describe an empirical study to investigate the mechanisms and behaviours that developers use to find code changes they are competent to review. We also explore how stakeholders interact with one another during the review process. We manually examine hundreds of reviews across five high profile OSS projects. Our findings provide insights into the simple, community-wide techniques that developers use to effectively manage large quantities of reviews. The themes that emerge from our study are enriched and validated by interviewing long-serving core developers.}, keywords = {apache, case studies, email, freebsd, grounded theory, kde, linux, linux kernel, open source software, peer review, subversion}, isbn = {978-1-4503-0445-0}, doi = {10.1145/1985793.1985867}, attachments = {https://flosshub.org/sites/flosshub.org/files/Rigby2011ICSE.pdf}, author = {Peter C. Rigby and Storey, Margaret-Anne} } @conference {1311, title = {What topics do Firefox and Chrome contributors discuss?}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, month = {05/2011}, pages = {234-237}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {Firefox and Chrome are two very popular open source Web browsers, implemented in C/C++. This paper analyzes what topics were discussed in Firefox and Chrome bug reports over time. To this aim, we indexed the text contained in bug reports submitted each semester of the project history, and identified topics using Latent Dirichlet Allocation (LDA). Then, we investigated to what extent Firefox and Chrome developers/contributors discussed similar topics, either in different periods, or over the same period. Results indicate a non-negligible overlap of topics, mainly on issues related to page layouting, user interaction, and multimedia contents.}, keywords = {bug reports, chrome, Firefox, LDA, msr challenge}, isbn = {9781450305747}, doi = {10.1145/1985441.1985480}, author = {Zagarese, Quirino and Distante, Damiano and Di Penta, Massimiliano and Bernardi, Mario Luca and Sementa, Carmine} } @article {1382, title = {Analysis of virtual communities supporting OSS projects using social network analysis}, journal = {Information and Software Technology}, volume = {52}, year = {2010}, month = {3/2010}, pages = {296 - 303}, abstract = {This paper analyses the behaviour of virtual communities for Open Source Software (OSS) projects. The development of OSS projects relies on virtual communities, which are built on relationships among members, being their final objective sharing knowledge and improving the underlying project. This study addresses the interactive collaboration in these kinds of communities applying social network analysis (SNA). In particular, SNA techniques will be used to identify those members playing a middle-man role among other community members. Results will illustrate the importance of this role to achieve successful virtual communities.}, keywords = {arm, email, Knowledge brokers, linux, mailing list, open source software, social network analysis, virtual communities}, issn = {09505849}, doi = {10.1016/j.infsof.2009.10.007}, url = {http://www.sciencedirect.com/science/article/pii/S0950584909001888}, author = {Toral, S.L. and Mart{\'\i}nez-Torres, M.R. and Barrero, F.} } @conference {1256, title = {Analyzing Leadership Dynamics in Distributed Group Communication}, booktitle = {2010 43rd Hawaii International Conference on System Sciences (HICSS 2010)}, year = {2010}, note = {"Our analysis examines the communication patterns in two FLOSS development projects, Fire and Gaim" "These data were imported into a database to allow automated analysis. The Fire data set includes about 1,800 events in the user email list, 7,800 messages in the developer venues, and 1,300 events in the combined trackers, spanning a period of 54 months. The significantly larger Gaim data set included over 41,000 events in the user forum, over 30,000 events in the developer venues, and about 20,000 events in the trackers, generated over 78 months." "The dynamic network analysis was performed using a scientific workflow tool, Taverna Workbench"}, pages = {1 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Honolulu, Hawaii, USA}, abstract = {We apply social network analysis (SNA) to examine the dynamics of leadership in distributed groups, specifically Free/Libre Open Source Software development projects, and its relation to group performance. Based on prior work on leadership in distributed groups, we identify leaders with those who make the highest level of contribution to the group and assess the degree of leadership by measuring centralization of communications. We compare the dynamics of leadership in two FLOSS projects, one more and one less effective. We find that in both projects, centralization was higher in developer-oriented communications venues than in user-oriented venues, suggesting higher degrees of leadership in developer venues. However, we do not find a consistent relation between centralization and effectiveness. We suggest that SNA can instead be useful for identifying interesting periods in the history of the project, e.g., periods where the leadership of the project is in transition.}, keywords = {core, DYNAMICS, email, email archives, fire, flossmole, gaim, leadership, mailing list, project success, social network analysis, srda}, isbn = {978-1-4244-5509-6}, doi = {10.1109/HICSS.2010.62}, attachments = {https://flosshub.org/sites/flosshub.org/files/07-06-02.pdf}, author = {Kevin Crowston and Andrea Wiggins and Howison, James} } @conference {948, title = {Beyond replication: An example of the potential benefits of replicability in the mining of software repositories community}, booktitle = {1st Workshop on Replication in Empirical Software Engineering Research}, year = {2010}, month = {05/2010}, keywords = {literature review, msr, replication}, author = {Gregorio Robles and Daniel M. German} } @article {1328, title = {Developing a Dynamic and Responsive Online Learning Environment}, journal = {International Journal of Open Source Software and Processes}, volume = {2}, year = {2010}, pages = {32 - 48}, abstract = {Charles Stuart University adopted the open source software, Sakai, as the foundation for the university{\textquoteright}s new, integrated Online Learning Environment. This study explores whether a pedagogical advantage exists in adopting such an open source learning management system. Research suggests that the community source approach to development of open source software has many inherent pedagogical advantages, but this paper examines whether this is due to the choice of open source software or simply having access to appropriate technology for learning and teaching in the 21st century. The author also addresses the challenges of the project management methodology and processes in the large-scale implementation of an open-source courseware management solution at the institutional level. Consequently, this study outlines strategies that an institution can use to harness the potential of a community source approach to software development to meet the institutional and individual user needs into the future. }, keywords = {education, learning, sakai}, issn = {1942-3934}, doi = {10.4018/jossp.2010010103}, author = {Buchan, Janet} } @conference {1258, title = {Exploring Complexity in Open Source Software: Evolutionary Patterns, Antecedents, and Outcomes}, booktitle = {2010 43rd Hawaii International Conference on System Sciences (HICSS 2010)}, year = {2010}, note = {"The sample of projects was drawn from SourceForge" "projects were selected that were built with C++." "Applying the selection criteria generated a total of 108 projects for analysis" "Scientific Toolwork{\textquoteright}s Understand (version 1.4)"}, pages = {1 - 11}, publisher = {IEEE}, organization = {IEEE}, address = {Honolulu, Hawaii, USA}, abstract = {Software complexity is important to researchers and managers, yet much is unknown about how complexity evolves over the life of a software application and whether different dimensions of software complexity may exhibit similar or different evolutionary patterns. Using cross-sectional and longitudinal data on a sample of 108 open source projects, this research investigated how the complexity of open source project releases varied throughout the life of the project. Functional data analysis was applied to the release histories of the projects and recurring evolutionary patterns were derived. There were projects that saw little evolution, according to their measures of size and structural complexity. However, projects that displayed some evolution often differed on the pattern of evolution depending on whether size or structural complexity was examined. Factors that contribute to and result from the patterns of complexity were evaluated, and implications for research and practice are presented.}, keywords = {complexity, evolution, fda, life cycle, sourceforge, srda}, isbn = {978-1-4244-5509-6}, doi = {10.1109/HICSS.2010.198}, attachments = {https://flosshub.org/sites/flosshub.org/files/10-07-02.pdf}, author = {Darcy, David P. and Daniel, Sherae L. and Stewart, Katherine J.} } @conference {950, title = {An extensive comparison of bug prediction approaches}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {31 - 41}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Reliably predicting software defects is one of software engineering{\textquoteright}s holy grails. Researchers have devised and implemented a plethora of bug prediction approaches varying in terms of accuracy, complexity and the input data they require. However, the absence of an established benchmark makes it hard, if not impossible, to compare approaches. We present a benchmark for defect prediction, in the form of a publicly available data set consisting of several software systems, and provide an extensive comparison of the explanative and predictive power of well-known bug prediction approaches, together with novel approaches we devised. Based on the results, we discuss the performance and stability of the approaches with respect to our benchmark and deduce a number of insights on bug prediction models.}, keywords = {apache, bug reports, eclipse, famix, lucene, mylyn, prediction, scm}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463279}, attachments = {https://flosshub.org/sites/flosshub.org/files/31dambrosLanzaRobbes31.pdf}, author = {D{\textquoteright}Ambros, Marco and Lanza, Michele and Robbes, Romain} } @conference {965, title = {Identifying licensing of jar archives using a code-search approach}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {151 - 160}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Free and open source software strongly promotes the reuse of source code. Some open source Java components/libraries are distributed as jar archives only containing the bytecode and some additional information. For whoever wanting to integrate this jar in her own project, it is important to determine the license(s) of the code from which the jar archive was produced, as this affects the way that such component can be used. This paper proposes an automatic approach to determine the license of jar archives, combining the use of a code-search engine with the automatic classification of licenses contained in textual flies enclosed in the jar. Results of an empirical study performed on 37 jars - from 17 different systems - indicate that this approach is able to successfully infer the jar licenses in over 95\% of the cases, but that in many cases the license in textual flies may differ from the one of the classes contained in the jar.}, keywords = {apache, bytecode, classification, eclipse, google code, jar, java, licenses, source code}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463282}, attachments = {https://flosshub.org/sites/flosshub.org/files/151msr2010.pdf}, author = {Di Penta, Massimiliano and Daniel M. German and Antoniol, Giuliano} } @article {1240, title = {Impact of Programming Language Fragmentation on Developer Productivity}, journal = {International Journal of Open Source Software and Processes}, volume = {2}, year = {2010}, month = {32/2010}, pages = {41 - 61}, abstract = {Programmers often develop software in multiple languages. In an effort to study the effects of programming language fragmentation on productivity{\textemdash}and ultimately on a developer{\textquoteright}s problem-solving abilities{\textemdash}the authors present a metric, language entropy, for characterizing the distribution of a developer{\textquoteright}s programming efforts across multiple programming languages. This paper presents an observational study examining the project contributions of a random sample of 500 SourceForge developers. Using a random coefficients model, the authors find a statistically (alpha level of 0.001) and practically significant correlation between language entropy and the size of monthly project contributions. Results indicate that programming language fragmentation is negatively related to the total amount of code contributed by developers within SourceForge, an open source software (OSS) community.}, keywords = {commits, entropy, language entropy, programming languages, sourceforge, srda}, issn = {1942-3934}, doi = {10.4018/jossp.2010040104}, author = {Krein, Jonathan L. and MacLean, Alexander C. and Knutson, Charles D. and Delorey, Daniel P. and Eggett, Dennis L.} } @conference {837, title = {Lurking? Cyclopaths? A Quantitative Lifecyle Analysis of User Behavior in a Geowiki}, booktitle = {ACM Conference on Computer-Human Interaction (CHI)}, year = {2010}, month = {04/2010}, publisher = {Association for Computing Machinery}, organization = {Association for Computing Machinery}, address = {Atlanta, GA}, keywords = {content,, geographic, geowiki,, information,, lurking, open, volunteer, volunteered, Wiki,, work,}, author = {Panciera, K. and Priedhorsky, R. and Erickson, T. and Terveen, L.} } @conference {947, title = {Replicating MSR: A study of the potential replicability of papers published in the Mining Software Repositories proceedings}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {171 - 180}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {This paper is the result of reviewing all papers published in the proceedings of the former International Workshop on Mining Software Repositories (MSR) (2004-2006) and now Working Conference on MSR (2007-2009). We have analyzed the papers that contained any experimental analysis of software projects for their potentiality of being replicated. In this regard, three main issues have been addressed: i) the public availability of the data used as case study, ii) the public availability of the processed dataset used by researchers and iii) the public availability of the tools and scripts. A total number of 171 papers have been analyzed from the six workshops/working conferences up to date. Results show that MSR authors use in general publicly available data sources, mainly from free software repositories, but that the amount of publicly available processed datasets is very low. Regarding tools and scripts, for a majority of papers we have not been able to find any tool, even for papers where the authors explicitly state that they have built one. Lessons learned from the experience of reviewing the whole MSR literature and some potential solutions to lower the barriers of replicability are finally presented and discussed.}, keywords = {data, literature review, msr, replication}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463348}, url = {http://gsyc.urjc.es/~grex/msr2010}, attachments = {https://flosshub.org/sites/flosshub.org/files/171MSR_2010_69.final_.pdf}, author = {Gregorio Robles} } @conference {1259, title = {Towards an Openness Rating System for Open Source Software}, booktitle = {2010 43rd Hawaii International Conference on System Sciences (HICSS 2010)}, year = {2010}, pages = {1 - 8}, publisher = {IEEE}, organization = {IEEE}, address = {Honolulu, Hawaii, USA}, abstract = {Many open source software projects are not very open to third party developers. The point of open source is to enable anyone to fix bugs or add desired capabilities without holding them hostage to the original developers. This principle is important because an open source project{\textquoteright}s developers may be unresponsive or unable to meet third party needs, even if funding support for requested improvements is offered.This paper presents a simple rating system for evaluating the openness of software distributions. The rating system considers factors such as platform portability, documentation, licensing, and contribution policy. Several popular open source products are rated in order to illustrate the efficacy of the rating system.}, keywords = {alice, case study, contribution, documentation, freespire, galib, latex, license, linux, linux kernel, mediaportal, openness, openoffice, opensolaris, rating, unicon}, isbn = {978-1-4244-5509-6}, doi = {10.1109/HICSS.2010.405}, attachments = {https://flosshub.org/sites/flosshub.org/files/10-07-04.pdf}, author = {Bein, Wolfgang and Jeffery, Clinton} } @conference {953, title = {When process data quality affects the number of bugs: Correlations in software engineering datasets}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {62 - 71}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Software engineering process information extracted from version control systems and bug tracking databases are widely used in empirical software engineering. In prior work, we showed that these data are plagued by quality deficiencies, which vary in its characteristics across projects. In addition, we showed that those deficiencies in the form of bias do impact the results of studies in empirical software engineering. While these findings affect software engineering researchers the impact on practitioners has not yet been substantiated. In this paper we, therefore, explore (i) if the process data quality and characteristics have an influence on the bug fixing process and (ii) if the process quality as measured by the process data has an influence on the product (i.e., software) quality. Specifically, we analyze six Open Source as well as two Closed Source projects and show that process data quality and characteristics have an impact on the bug fixing process: the high rate of empty commit messages in Eclipse, for example, correlates with the bug report quality. We also show that the product quality - measured by number of bugs reported - is affected by process data quality measures. These findings have the potential to prompt practitioners to increase the quality of their software process and its associated data quality.}, keywords = {apache, bug reports, eclipse, gnome, log files, mozilla, netbeans, openoffice.org, version control}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463286}, attachments = {https://flosshub.org/sites/flosshub.org/files/62bachmann-msr10.pdf}, author = {Bachmann, Adrian and Bernstein, Abraham} } @conference {943, title = {Automatic labeling of software components and their evolution using log-likelihood ratio of word frequencies in source code}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {175 - 178}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {As more and more open-source software components become available on the Internet we need automatic ways to label and compare them. For example, a developer who searches for reusable software must be able to quickly gain an understanding of retrieved components. This understanding cannot be gained at the level of source code due to the semantic gap between source code and the domain model. In this paper we present a lexical approach that uses the log-likelihood ratios of word frequencies to automatically provide labels for software components. We present a prototype implementation of our labeling/comparison algorithm and provide examples of its application. In particular, we apply the approach to detect trends in the evolution of a software system.}, keywords = {frequency, hapax, information retrieval, java, junit, keywords, labeling, source code}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069499}, attachments = {https://flosshub.org/sites/flosshub.org/files/175AutomaticLabeling.pdf}, author = {Kuhn, Adrian} } @article {1237, title = {Bridging the Gap between Agile and Free Software Approaches}, journal = {International Journal of Open Source Software and Processes}, volume = {1}, year = {2009}, month = {31/2009}, pages = {58 - 71}, abstract = {Agile sprints are short events where a small team collocates in order to work on particular aspects of the overall project for a short period of time. Sprinting is a process that has been observed also in Free Software projects: these two paradigms, sharing common principles and values have shown several commonalities of practice. This article evaluates the impact of sprinting on a Free Software project through the analysis of code repository logs: sprints from two Free Software projects (Plone and KDE PIM) are assessed and two hypotheses are formulated: do sprints increase productivity? Are Free Software projects more productive after sprints compared with before? The primary contribution of this article is to show how sprinting creates a large increase in productivity both during the event, and immediately after the event itself: this argues for more in-depth studies focussing on the nature of sprinting.}, keywords = {agile, kde, lines of code, loc, plone, productivity, scm, scrum, sprints, subversion}, issn = {1942-3934}, doi = {10.4018/jossp.2009010104}, author = {Paul J. Adams and Capiluppi, Andrea} } @conference {929, title = {Code siblings: Technical and legal implications of copying code between applications}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {81 - 90}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Source code cloning does not happen within a single system only. It can also occur between one system and another. We use the term code sibling to refer to a code clone that evolves in a different system than the code from which it originates. Code siblings can only occur when the source code copyright owner allows it and when the conditions imposed by such license are not incompatible with the license of the destination system. In some situations copying of source code fragments are allowed - legally - in one direction, but not in the other. In this paper, we use clone detection, license mining and classification, and change history techniques to understand how code siblings - under different licenses - flow in one direction or the other between Linux and two BSD Unixes, FreeBSD and OpenBSD. Our results show that, in most cases, this migration appears to happen according to the terms of the license of the original code being copied, favoring always copying from less restrictive licenses towards more restrictive ones. We also discovered that sometimes code is inserted to the kernels from an outside source.}, keywords = {bsd, fossology, freebsd, linux, openbsd, source code}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069483}, attachments = {https://flosshub.org/sites/flosshub.org/files/81CodeSiblings.pdf}, author = {Daniel M. German and Di Penta, Massimiliano and Gueheneuc, Yann-Gael and Antoniol, Giuliano} } @conference {1264, title = {The Commit Size Distribution of Open Source Software}, booktitle = {2009 42nd Hawaii International Conference on System Sciences (HICSS 2009)}, year = {2009}, note = {"We use the database of the open source analytics firm Ohloh Inc." "This article is based on a March 2008 database snapshot, which contains 9,363 completely crawled and analyzed projects covering a time frame from January 1990 to February 2008." "The Ohloh database provides the complete configuration management history of each crawled project (to the extent available on the web). Thus, every single commit action of all the projects over their entire history is available." "We measure the size of commits in this paper in source lines of code (SLoC) using Ohloh{\textquoteright}s own open source diff too"}, pages = {1 - 8}, publisher = {IEEE}, organization = {IEEE}, address = {Waikoloa, Hawaii, USA}, abstract = {With the growing economic importance of open source, we need to improve our understanding of how open source software development processes work. The analysis of code contributions to open source projects is an important part of such research. In this paper we analyze the size of code contributions to more than 9,000 open source projects. We review the total distribution and distinguish three categories of code contributions using a size-based heuristic: single focused commits, aggregate team contributions, and repository refactorings. We find that both the overall distribution and the individual categories follow a power law. We also suggest that distinguishing these commit categories by size will benefit future analyses.}, keywords = {commits, configuration management, history, lines of code, sloc, source code}, isbn = {978-0-7695-3450-3}, doi = {10.1109/HICSS.2009.421}, attachments = {https://flosshub.org/sites/flosshub.org/files/07-07-07.pdf}, author = {Arafat, O. and Dirk Riehle} } @article {Subramaniam:2009:DOS:1480545.1480824, title = {Determinants of open source software project success: A longitudinal study}, journal = {Decis. Support Syst.}, volume = {46}, year = {2009}, month = {January}, pages = {576{\textendash}585}, publisher = {Elsevier Science Publishers B. V.}, address = {Amsterdam, The Netherlands, The Netherlands}, abstract = {In this paper, we investigate open source software (OSS) success using longitudinal data on OSS projects. We find that restrictive OSS licenses have an adverse impact on OSS success. On further analysis, restrictive OSS license is found to be negatively associated with developer interest, but is positively associated with the interest of non-developer users and project administrators. We also show that developer and non-developer interest in the OSS project and the project activity levels in any time period significantly affect the project success measures in subsequent time period. The implications of our findings for OSS research and practice are discussed. }, keywords = {contributors, developers, licenses, longitudinal study, Open source project, OSS, project success, restrictive, Software project success}, issn = {0167-9236}, doi = {10.1016/j.dss.2008.10.005}, url = {http://portal.acm.org/citation.cfm?id=1480545.1480824}, author = {Subramaniam, Chandrasekar and Sen, Ravi and Nelson, Matthew L.} } @conference {1204, title = {Language entropy: A metric for characterization of author programming language distribution}, booktitle = {4th Workshop on Public Data about Software Development (WoPDaSD 2009)}, year = {2009}, note = {The data set used in this study was previously collected for a separate, but related work. It was originally extracted from the SourceForge Research Archive (SFRA), August 2006. For a detailed discussion of the data source, collection tools and processes, and summary statistics, see [6]." "From the initial data set we extracted a random sample of 500 developers3 along with descriptive details of all revisions that those developers made since the inception of the projects on which they worked. We then condensed this sample by totaling the lines of code added by each developer for each month in which that developer made at least one code submission." [6] Daniel P. Delorey, Charles D. Knutson, and Alex MacLean. Studying production phase sourceforge projects: A case study using cvs2mysql and sfra+. In Second International Workshop on Public Data about Software Development (WoPDaSD {\textquoteright}07), June 2007.}, month = {2009}, abstract = {Programmers are often required to develop in multiple languages. In an effort to study the effects of programming language fragmentation on productivity{\textemdash}and ultimately on a programmer{\textquoteright}s problem solving abilities{\textemdash}we propose a metric, language entropy, for characterizing the distribution of an individual{\textquoteright}s development efforts across multiple programming languages. To evaluate this metric, we present an observational study examining all project contributions (through August 2006) of a random sample of 500 SourceForge developers. Using a random coefficients model, we found a statistically significant correlation (alpha level of 0.05) between language entropy and the size of monthly pro ject contributions (measured in lines of code added). Our results indicate that language entropy is a good candidate for characterizing author programing language distribution.}, keywords = {contributions, developers, language entropy, lines of code, loc, multiple languages, programming languages, sourceforge}, attachments = {https://flosshub.org/sites/flosshub.org/files/LanguageEntropy-JonathanKrein.pdf}, author = {Krein, Jonathan L. and MacLean, Alexander C. and Delorey, Daniel P. and Knutson, Charles D. and Eggett, Dennis L.} } @conference {944, title = {Learning from defect removals}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {179 - 182}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Recent research has tried to identify changes in source code repositories that fix bugs by linking these changes to reports in issue tracking systems. These changes have been traced back to the point in time when they were previously modified as a way of identifying bug introducing changes. But we observe that not all changes linked to bug tracking systems are fixing bugs; some are enhancing the code. Furthermore, not all fixes are applied at the point in the code where the bug was originally introduced. We flesh out these observations with a manual review of several software projects, and use this opportunity to see how many defects are in the scope of static analysis tools.}, keywords = {bug fixing, bugzilla, change management, cherry, cvs, eclipse, groovy, launching, source code, svn, text editor}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069500}, attachments = {https://flosshub.org/sites/flosshub.org/files/179LearnFromDefects-MSR09.pdf}, author = {Ayewah, Nathaniel and Pugh, William} } @conference {942, title = {On mining data across software repositories}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {171 - 174}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Software repositories provide abundance of valuable information about open source projects. With the increase in the size of the data maintained by the repositories, automated extraction of such data from individual repositories, as well as of linked information across repositories, has become a necessity. In this paper we describe a framework that uses web scraping to automatically mine repositories and link information across repositories. We discuss two implementations of the framework. In the first implementation, we automatically identify and collect security problem reports from project repositories that deploy the Bugzilla bug tracker using related vulnerability information from the National Vulnerability Database. In the second, we collect security problem reports for projects that deploy the Launchpad bug tracker along with related vulnerability information from the National Vulnerability Database. We have evaluated our tool on various releases of Fedora, Ubuntu, Suse, RedHat, and Firefox projects. The percentage of security bugs identified using our tool is consistent with that reported by other researchers.}, keywords = {bug reports, bugzilla, Fedora, Firefox, htmlscraper, integration, launchpad, national vulnerability database, RedHat, Suse, tracker, Ubuntu}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069498}, attachments = {https://flosshub.org/sites/flosshub.org/files/171MiningAcrossmsr09.pdf}, author = {Anbalagan, Prasanth and Vouk, Mladen} } @conference {935, title = {Mining search topics from a code search engine usage log}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {111 - 120}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {We present a topic modeling analysis of a year long usage log of Koders, one of the major commercial code search engines. This analysis contributes to the understanding of what users of code search engines are looking for. Observations on the prevalence of these topics among the users, and on how search and download activities vary across topics, leads to the conclusion that users who find code search engines usable are those who already know to a high level of specificity what to look for. This paper presents a general categorization of these topics that provides insights on the different ways code search engine users express their queries. The findings support the conclusion that existing code search engines provide only a subset of the various information needs of the users when compared to the categories of queries they look at.}, keywords = {analysis, black duck, koders, log, logfile, search, source code}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069489}, author = {Bajracharya, Sushil and Lopes, Cristina} } @article {Fang:2009:USP:1554441.1554443, title = {Understanding Sustained Participation in Open Source Software Projects}, journal = {J. Manage. Inf. Syst.}, volume = {25}, year = {2009}, month = {April}, pages = {9{\textendash}50}, publisher = {M. E. Sharpe, Inc.}, address = {Armonk, NY, USA}, abstract = {Prior research into open source software (OSS) developer participation has emphasized individuals{\textquoteright} motivations for joining these volunteer communities, but it has failed to explain why people stay or leave in the long run. Building upon Lave and Wenger{\textquoteright}s theory of legitimate peripheral participation (LPP), this paper offers a longitudinal investigation of one OSS community in which sustained participation is hypothesized to be associated with the coevolution of two major elements of LPP theory: "situated learning" (the process of acting knowledgeably and purposefully in the world) and "identity construction" (the process of being identified within the community). To test this hypothesis, data were collected from multiple sources, including online public project documents, electronic mail messages, tracker messages, and log files. Results from qualitative analyses revealed that initial conditions to participate did not effectively predict long-term participation, but that situated learning and identity construction behaviors were positively linked to sustained participation. Furthermore, this study reveals that sustained participants distinguished themselves by consistently engaging in situated learning that both made conceptual (advising others) and practical contributions (improving the code). Implications and future research are discussed.}, keywords = {Communities Of Practice, Legitimate Peripheral Participation, Open Source Projects, Open Source Software Community, Qualitative Study}, issn = {0742-1222}, doi = {10.2753/MIS0742-1222250401}, url = {http://portal.acm.org/citation.cfm?id=1554441.1554443}, author = {Fang, Yulin and Neufeld, Derrick} } @conference {940, title = {Using Latent Dirichlet Allocation for automatic categorization of software}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {163 - 166}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {In this paper, we propose a technique called LACT for automatically categorizing software systems in open-source repositories. LACT is based on latent Dirichlet Allocation, an information retrieval method which is used to index and analyze source code documents as mixtures of probabilistic topics. For an initial evaluation, we performed two studies. In the first study, LACT was compared against an existing tool, MUDABlue, for classifying 41 software systems written in C into problem domain categories. The results indicate that LACT can automatically produce meaningful category names and yield classification results comparable to MUDABlue. In the second study, we applied LACT to 43 software systems written in different programming languages such as C/C++, Java, C$\#$, PHP, and Perl. The results indicate that LACT can be used effectively for the automatic categorization of software systems regardless of the underlying programming language or paradigm. Moreover, both studies indicate that LACT can identify several new categories that are based on libraries, architectures, or programming languages, which is a promising improvement as compared to manual categorization and existing techniques.}, keywords = {categorization, category mining, lact, mudablue, multiple languages, repository}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069496}, attachments = {https://flosshub.org/sites/flosshub.org/files/163MSR2009_TianPos.pdf}, author = {Tian, Kai and Revelle, Meghan and Poshyvanyk, Denys} } @article {10.1109/HICSS.2009.1014, title = {Using Software Archaeology to Measure Knowledge Loss in Software Projects Due to Developer Turnover}, journal = {2009 42nd Hawaii International Conference on System Sciences (HICSS 2009)}, year = {2009}, pages = {1-10}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, abstract = {Developer turnover can result in a major problem when developing software. When senior developers abandon a software project, they leave a knowledge gap that has to be managed. In addition, new (junior) developers require some time in order to achieve the desired level of productivity. In this paper, we present a methodology to measure the effect of knowledge loss due to developer turnover in software projects. For a given software project, we measure the quantity of code that has been authored by developers that do not belong to the current development team, which we define as orphaned code. Besides, we study how orphaned code is managed by the project. Our methodology is based on the concept of software archaeology, a derivation of software evolution. As case studies we have selected four FLOSS (free, libre, open source software) projects, from purely driven by volunteers to company-supported. The application of our methodology to these case studies will give insight into the turnover that these projects suffer and how they have managed it and shows that this methodology is worth being augmented in future research.}, keywords = {attrition, case study, developers, evince, evolution, gimp, growth, knowledge collaboration, lines of code, nautilus, quality, sloc, turnover}, isbn = {978-0-7695-3450-3}, doi = {http://doi.ieeecomputersociety.org/10.1109/HICSS.2009.1014}, attachments = {https://flosshub.org/sites/flosshub.org/files/07-07-08.pdf}, author = {Izquierdo-Cortazar, Daniel and Gregorio Robles and Ortega, Felipe and Jesus M. Gonzalez-Barahona} } @article {Au20099, title = {Virtual organizational learning in open source software development projects}, journal = {Information \& Management}, volume = {46}, number = {1}, year = {2009}, pages = {9 - 15}, abstract = {We studied virtual organizational learning in open source software (OSS) development projects. Specifically, our research focused on learning effects of OSS projects and the factors that affect the learning process. The number and percentage of resolved bugs and bug resolution time of 118 SourceForge.net OSS projects were used to measure the learning effects. Projects were characterized by project type, number and experience of developers, number of bugs, and bug resolution time. Our results provided evidence of virtual organizational learning in OSS development projects and support for several factors as determinants of performance. Team size was a significant predictor, with mid-sized project teams functioning best. Teams of three to seven developers exhibited the highest efficiency over time and teams of eight to 15 produced the lowest mean time for bug resolution. Increasing the percentage of bugs assigned to specific developers or boosting developer participation in other OSS projects also improved performance. Furthermore, project type introduced variability in project team performance.}, keywords = {bug fixing, bugs, learning, Project performance, sourceforge, team size, teams, virtual organization}, issn = {0378-7206}, doi = {DOI: 10.1016/j.im.2008.09.004}, url = {http://www.sciencedirect.com/science/article/B6VD0-4V1D7NT-1/2/a3bbf7652c674f753398160b8f05f6e9}, author = {Yoris A. Au and Darrell Carpenter and Xiaogang Chen and Jan G. Clark} } @article {Xu2009151, title = {Volunteers{\textquoteright} involvement in online community based software development}, journal = {Information \& Management}, volume = {46}, number = {3}, year = {2009}, note = {"Data were collected through an online survey and by searching project archives. On Sourceforge.net, each developer was uniquely identified with a user account, and the developer{\textquoteright}s performance was assessed through the number of function points accepted by the project in a certain time period, obtained through conversion and calculation from the project{\textquoteright}s code repository. Data for other constructs were obtained from the developers{\textquoteright} response to the online survey. " "a developer{\textquoteright}s performance was measured by the number of function points made and accepted into the project during the observed time period." project age, development status, license type, number of developers}, pages = {151 - 158}, abstract = {We sought to gain understanding of voluntary developers{\textquoteright} involvement in open source software (OSS) projects. Data were collected from voluntary developers working on open source projects. Our findings indicated that a voluntary developer{\textquoteright}s involvement was very important to his or her performance and that involvement was dependent on individual motivations (personal software needs, reputation and skills gaining expectation, enjoyment in open source coding) and project community factors (leadership effectiveness, interpersonal relationship, community ideology). Our work contributes theoretically and empirically to the body of OSS research and has practical implications for OSS project management.}, keywords = {age, developers, effectiveness, function points, ideology, leadership, MOTIVATION, scm, sourceforge, status, Survey, team size, Volunteers}, issn = {0378-7206}, doi = {DOI: 10.1016/j.im.2008.12.005}, url = {http://www.sciencedirect.com/science/article/B6VD0-4VP1CN0-1/2/8e1c7be4fcedd1419209c5c843ffa923}, author = {Bo Xu and Donald R. Jones and Bingjia Shao} } @article {denBesten2008316, title = {The allocation of collaborative efforts in open-source software}, journal = {Information Economics and Policy}, volume = {20}, number = {4}, year = {2008}, note = {"we have selected a set of 10 large open-source projects" apache, cvs, gaim, gcc, ghostscript, mozilla, netbsd, openssh, postgresql, python "Our data were extracted from logs of development activity generated by software version control systems. For each project in the selection, we extracted CVS development logs" "We notably computed for each file in the sample, and for each month in its history, the number of distinct maintainers that had committed a change during that month, and the number of commits, the blocks of code addition, each file had received during that month." "other variables used in the regressions are proxies for the size, age, and granularity of files; the size of a file is represented as its number of lines of code (LOCs), its age by its creation date (Youth), and its granularity by the number of functions it contains."}, pages = {316 - 322}, abstract = {The article investigates the allocation of collaborative efforts among core developers (maintainers) of open-source software by analyzing on-line development traces (logs) for a set of 10 large projects. Specifically, we investigate whether the division of labor within open-source projects is influenced by characteristics of software code. We suggest that the collaboration among maintainers tends to be influenced by different measures of code complexity. We interpret these findings by providing preliminary evidence that the organization of open-source software development would self-adapt to characteristics of the code base, in a {\textquoteright}stigmergic{\textquoteright} manner.}, keywords = {age, apache, complexity, cvs, division of labor, functions, gaim, gcc, ghostscript, lines of code, loc, log files, mozilla, netbsd, openssh, postgresql, python, revision control, scm, size, source code, Stigmergy, version control}, issn = {0167-6245}, doi = {DOI: 10.1016/j.infoecopol.2008.06.003}, url = {http://www.sciencedirect.com/science/article/B6V8J-4SSG4PN-1/2/88b3824c30a31c18929d8a5ca6d64f62}, author = {den Besten, Matthijs and Jean-Michel Dalle and Galia, Fabrice} } @conference {Hill:2008:AAM:1370750.1370771, title = {AMAP: automatically mining abbreviation expansions in programs to enhance software maintenance tools}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {79{\textendash}88}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {When writing software, developers often employ abbreviations in identifier names. In fact, some abbreviations may never occur with the expanded word, or occur more often in the code. However, most existing program comprehension and search tools do little to address the problem of abbreviations, and therefore may miss meaningful pieces of code or relationships between software artifacts. In this paper, we present an automated approach to mining abbreviation expansions from source code to enhance software maintenance tools that utilize natural language information. Our scoped approach uses contextual information at the method, program, and general software level to automatically select the most appropriate expansion for a given abbreviation. We evaluated our approach on a set of 250 potential abbreviations and found that our scoped approach provides a 57\% improvement in accuracy over the current state of the art.}, keywords = {automatic abbreviation expansion, azureus, itext.net, liferay, maintenance, natural language, openoffice.org, program comprehension, source code, tiger envelopes, tools}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370771}, url = {http://doi.acm.org/10.1145/1370750.1370771}, attachments = {https://flosshub.org/sites/flosshub.org/files/p79-hill.pdf}, author = {Hill, Emily and Fry, Zachary P. and Boyd, Haley and Sridhara, Giriprasad and Novikova, Yana and Pollock, Lori and Vijay-Shanker, K.} } @conference {1206, title = {Collecting data from distributed FOSS projects}, booktitle = {3rd Workshop on Public Data about Software Development (WoPDaSD 2008)}, year = {2008}, note = {"We selected three projects from the initial set of projects: Linux 2.6, an operating system kernel, gimp, a graphics program, and Blender, a 3d content creation suite." "To acquire data from each data source, we wrote special programs based on the earlier prototypes....The first program extracts information from mailing list archives....The second program obtains bug reports from bug tracking systems....The third program obtains source code from network-accessible version control systems and runs metric calculations on it."}, month = {2009}, pages = {8-13}, abstract = {A key trait of Free and Open Source Software (foss) development is its distributed nature. Nevertheless, two project-level operations, the fork and the merge of program code, are among the least well understood events in the lifespan of a foss project. Some projects have explicitly adopted these operations as the primary means of concurrent development. In this study, we examine the effect of highly distributed software development, as found in the Linux kernel project, on collection and modelling of software development data. We find that distributed development calls for sophisticated temporal modelling techniques where several versions of the source code tree can exist at once. Attention must be turned towards the methods of quality assurance and peer review that projects employ to manage these parallel source trees. Our analysis indicates that two new metrics, fork rate and merge rate, could be useful for determining the role of distributed version control systems in foss projects. The study presents a preliminary data set consisting of version control and mailing list data. }, keywords = {bitkeeper, bug tracking system, cvs, distributed, email archive, fork rate, git, life cycle, linux, linux kernel, mailing list, merge rate, subversion, svn, version control}, attachments = {https://flosshub.org/sites/flosshub.org/files/fagerholm.pdf}, author = {Fagerholm, Fabian and Taina, Juha} } @article {Koch2008345, title = {Effort modeling and programmer participation in open source software projects}, journal = {Information Economics and Policy (Empirical Issues in Open Source Software)}, volume = {20}, number = {4}, year = {2008}, note = {"Using a two-step approach, first a detailed case study on one project, GNOME, will be undertaken, then a large data set retrieved from a project hosting site, SourceForge.net, will be used to validate the results." CVS was the main source of data "e-mails sent to the different project discussion lists were identified as an additional source of information especially on communication and coordination besides the CVS-repository" basic counts were calculated for developer discussion levels}, month = {12/2008}, pages = {345 - 355}, abstract = {This paper develops models for programmer participation and effort estimation in open source software projects and employs the results to assess the efficiency of open source software creation. Successful development of such models will be important for decision makers of various kinds. We propose hypotheses based on a prior case study on manpower function and effort modeling. A large data set retrieved from a project repository is used to test these hypotheses. The main results are that if Norden-Rayleigh-based approaches are used, they need to be complemented in order to account for the addition of new features during a product life cycle, and that programmer-participation based effort models result in distinctly lower estimations of effort than those based on output metrics, such as lines of code.}, keywords = {cvs, developers, email, email archives, gnome, lines of code, scm, Software repository mining, source code, sourceforge}, issn = {0167-6245}, doi = {DOI: 10.1016/j.infoecopol.2008.06.004}, url = {http://www.sciencedirect.com/science/article/B6V8J-4SSND1J-1/2/c857fa1493e19aa7fe4297dedb077b3a}, attachments = {https://flosshub.org/sites/flosshub.org/files/KochEffortModeling.pdf}, author = {Koch, Stefan} } @article {Giuri2008305, title = {Explaining leadership in virtual teams: The case of open source software}, journal = {Information Economics and Policy}, volume = {20}, number = {4}, year = {2008}, note = {Empirical Issues in Open Source Software}, pages = {305 - 315}, abstract = {This paper contributes to the open source software (OSS) literature by investigating the likelihood that a participant becomes a project leader. Project leaders are key actors in a virtual community and are crucial to the success of the OSS model. Knowledge of the forces that lead to the emergence of project managers among the multitude of participants is still limited. We aim to fill this gap in the literature by analyzing the association between the roles played by an individual who is registered with a project, and a set of individual-level and project-level characteristics. In line with the theory of occupational choice elaborated by (Lazear, E.P., 2002. Entrepreneurship. NBER Working Paper No. 9109, Cambridge, Mass; Lazear, E.P., 2004. Balanced skills and entrepreneurship, American Economic Review 94, pp. 208-211), we find that OSS project leaders possess diversified skill sets which are needed to select the inputs provided by various participants, motivate contributors, and coordinate their efforts. Specialists, like pure developers, are endowed with more focused skill sets. Moreover, we find that the degree of modularity of the development process is positively associated with the presence of project leaders. That result is consistent with the modern theory of modular production (Baldwin, C.Y., Clark, K.B., 1997. Managing in an age of modularity. Harvard Business Review September-October. pp. 84-93; Mateos-Garcia, J., Steinmueller, W.E., 2003. The Open Source Way of Working: A New Paradigm for the Division of Labour in Software Development? SPRU - Science and Technology Policy Studies. Open Source Movement Research INK Working Paper, No. 1; Aoki, M., 2004. An organizational architecture of T-form: Silicon Valley clustering and its institutional coherence. Industrial and Corporate Change 13, pp. 967-981).}, keywords = {contributors, Human capital, leadership, roles, sourceforge, team}, issn = {0167-6245}, doi = {DOI: 10.1016/j.infoecopol.2008.06.002}, url = {http://www.sciencedirect.com/science/article/B6V8J-4SRW10C-1/2/5ce36096ba3947338962268b54a5a7a9}, author = {Paola Giuri and Francesco Rullani and Salvatore Torrisi} } @conference {Gobeille:2008:FP:1370750.1370763, title = {The FOSSology project}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {47{\textendash}50}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {By its nature, the availability of FOSS has given computer scientists a large body of software and software projects to analyze. By having available source, version control system metadata, and open project communities, much can be learned about a software project, software development and collaborative project development. The goal of the FOSSology project is to create a public, open source software repository, together with tools to facilitate analysis, storage, and sharing of open source software and its metadata. FOSSology does license detection today.}, keywords = {abiword, fossology, license analysis, licenses}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370763}, url = {http://doi.acm.org/10.1145/1370750.1370763}, attachments = {https://flosshub.org/sites/flosshub.org/files/p47-gobeille.pdf}, author = {Gobeille, Robert} } @article {1120, title = {The institutions of open source software: Examining the Debian community*}, journal = {Information Economics and Policy}, volume = {20}, year = {2008}, note = {"using primary data from its mailing lists archives, handbooks written to inform potential and actual community members, and previous analyses of institutional evolution and political conflict" no discussion of which lists, how many, or when they were gathered...?[ms]}, month = {12/2008}, pages = {333 - 344}, abstract = {Free and open source software activities involve and, perhaps, evolve institutions (rules, norms and standards) that influence the formation, growth, and demise of communities. Community institutions are attractors for some individuals while discouraging other individuals from entering or continuing to participate. Their suitability may change as a community grows. This paper examines the institutions of the Debian community where issues of community identity, distribution of authority, and decentralisation have facilitated growth and development. These same institutions have also resulted in conflicts regarding community purposes and the quality and delivery of the community{\textquoteright}s output. We examine the institutional redesign undertaken to address these problems and derive implications for F/LOS communities and companies.}, keywords = {authority, COMMUNITY, conflict, debian, decentralization, growth, institutions, leadership}, issn = {01676245}, doi = {10.1016/j.infoecopol.2008.06.001}, attachments = {https://flosshub.org/sites/flosshub.org/files/The_institutions_of_open_source_software-_IR.pdf}, author = {Mateos Garcia, J. and Steinmueller, W.E.} } @conference {973, title = {On the relation of refactorings and software defect prediction}, booktitle = {Proceedings of the 2008 international workshop on Mining software repositories - MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {35-38}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {This paper analyzes the influence of evolution activities such as refactoring on software defects. In a case study of five open source projects we used attributes of software evolution to predict defects in time periods of six months. We use versioning and issue tracking systems to extract 110 data mining features, which are separated into refactoring and non-refactoring related features. These features are used as input into classification algorithms that create prediction models for software defects. We found out that refactoring related features as well as non-refactoring related features lead to high quality prediction models. Additionally, we discovered that refactorings and defects have an inverse correlation: The number of software defects decreases, if the number of refactorings increased in the preceding time period. As a result, refactoring should be a significant part of both bug fixes and other evolutionary changes to reduce software defects.}, keywords = {argouml, bug fixing, bug reports, defects, evolution, jboss, liferay, prediction, refactoring, spring, weka, xdoclet}, isbn = {9781605580241}, doi = {10.1145/1370750.1370759}, attachments = {https://flosshub.org/sites/flosshub.org/files/p35-ratzinger.pdf}, author = {Sigmund, Thomas and Gall, Harald C. and Ratzinger, Jacek} } @article {1392, title = {Self-organization process in open-source software: An empirical study}, journal = {Information and Software Technology}, volume = {50}, year = {2008}, month = {4/2008}, pages = {361 - 374}, abstract = {Software systems must continually evolve to adapt to new functional requirements or quality requirements to remain competitive in the marketplace. However, different software systems follow different strategies to evolve, affecting both the release plan and the quality of these systems. In this paper, software evolution is considered as a self-organization process and the difference between closed-source software and open-source software is discussed in terms of self-organization. In particular, an empirical study of the evolution of Linux from version 2.4.0 to version 2.6.13 is reported. The study shows how open-source software systems self-organize to adapt to functional requirements and quality requirements.}, keywords = {Empirical study;, evolution, linux, requirements, Self-organization, software evolution}, issn = {09505849}, doi = {10.1016/j.infsof.2007.02.018}, url = {http://www.sciencedirect.com/science/article/pii/S0950584907000225}, author = {Yu, Liguo} } @conference {Thummalapenta:2008:SDF:1370750.1370775, title = {SpotWeb: detecting framework hotspots via mining open source repositories on the web}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {109{\textendash}112}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {The essentials of modern software development (such as low cost and high efficiency) demand software developers to make intensive reuse of existing open source frameworks or libraries (generally referred as frameworks) available on the web. However, developers often face challenges in reusing these frameworks due to several factors such as the complexity and lack of proper documentation. In this paper, we propose a code-search-engine-based approach that tries to detect hotspots in a given framework by mining code examples gathered from open source repositories available on the web; these hotspots are the APIs that are frequently reused. Hotspots can serve as starting points for developers in understanding and reusing the given framework. We developed a tool, called SpotWeb, for frameworks or libraries written in Java and conducted two case studies with two open source frameworks JUnit and Log4j. We also show that the detected hotspots of Log4j and JUnit are consistent with their respective documentations.}, keywords = {code reuse, code search engine, frameworks, hotspots, junit, log4j, repositories}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370775}, url = {http://doi.acm.org/10.1145/1370750.1370775}, attachments = {https://flosshub.org/sites/flosshub.org/files/p109-thummalapenta.pdf}, author = {Thummalapenta, Suresh and Xie, Tao} } @conference {Spinellis:2008:TFK:1368088.1368140, title = {A tale of four kernels}, booktitle = {Proceedings of the 30th international conference on Software engineering}, series = {ICSE {\textquoteright}08}, year = {2008}, pages = {381{\textendash}390}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {The FreeBSD, GNU/Linux, Solaris, and Windows operating systems have kernels that provide comparable facilities. Interestingly, their code bases share almost no common parts, while their development processes vary dramatically. We analyze the source code of the four systems by collecting metrics in the areas of file organization, code structure, code style, the use of the C preprocessor, and data organization. The aggregate results indicate that across various areas and many different metrics, four systems developed using wildly different processes score comparably. This allows us to posit that the structure and internal quality attributes of a working, non-trivial software artifact will represent first and foremost the engineering requirements of its construction, with the influence of process being marginal, if any.}, keywords = {comparison, freebsd, linux, open source, opensolaris, proprietary software, windows, wrk}, isbn = {978-1-60558-079-1}, doi = {10.1145/1368088.1368140}, url = {http://doi.acm.org/10.1145/1368088.1368140}, author = {Diomidis Spinellis} } @conference {Hindle:2008:LCT:1370750.1370773, title = {What do large commits tell us?: a taxonomical study of large commits}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {99{\textendash}108}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Research in the mining of software repositories has frequently ignored commits that include a large number of files (we call these large commits). The main goal of this paper is to understand the rationale behind large commits, and if there is anything we can learn from them. To address this goal we performed a case study that included the manual classification of large commits of nine open source projects. The contributions include a taxonomy of large commits, which are grouped according to their intention. We contrast large commits against small commits and show that large commits are more perfective while small commits are more corrective. These large commits provide us with a window on the development practices of maintenance teams.}, keywords = {boost, bug fixing, egroupware, enlightenment, evolution, firebird, large commits, maintenance, mysql, postgresql, samba, software evolution, source control system, spring}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370773}, url = {http://doi.acm.org/10.1145/1370750.1370773}, attachments = {https://flosshub.org/sites/flosshub.org/files/p99-hindle.pdf}, author = {Hindle, Abram and Daniel M. German and Holt, Ric} } @conference {1007, title = {Analysis of the Linux Kernel Evolution Using Code Clone Coverage}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {22 - 22}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Most studies of the evolution of software systems are based on the comparison of simple software metrics. In this paper, we present our preliminary investigation of the evolution of the Linux kernel using code-clone analysis and the code-clone coverage metrics. We examined 136 versions of the stable Linux kernel using a distributed extension of the code clone detection tool CCFinder. The result is shown as a heat map.}, keywords = {ccfinder, clone, cloning, kernel, linux, metrics, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.1}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300022.pdf}, author = {Livieri, Simone and Higo, Yoshiki and Matsushita, Makoto and Inoue, Katsuro} } @conference {1005, title = {Comparing Approaches to Mining Source Code for Call-Usage Patterns}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {20 - 20}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Two approaches for mining function-call usage patterns from source code are compared. The first approach, itemset mining, has recently been applied to this problem. The other approach, sequential-pattern mining, has not been previously applied to this problem. Here, a call-usage pattern is a composition of function calls that occur in a function definition. Both approaches look for frequently occurring patterns that represent standard usage of functions and identify possible errors. Itemset mining produces unordered patterns, i.e., sets of function calls, whereas, sequential-pattern mining produces partially ordered patterns, i.e., sequences of function calls. The trade-off between the additional ordering context given by sequential-pattern mining and the efficiency of itemset mining is investigated. The two approaches are applied to the Linux kernel v2.6.14 and results show that mining ordered patterns is worth the additional cost.}, keywords = {function calls, functions, kernel, linux, sequence, sequencing, sequential-pattern mining}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.3}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300020.pdf}, author = {Kagdi, Huzefa and Collard, Michael L. and Maletic, Jonathan I.} } @conference {996, title = {Correlating Social Interactions to Release History during Software Evolution}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {7 - 7}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {In this paper, we propose a method to reason about the nature of software changes by mining and correlating discussion archives. We employ an information retrieval approach to find correlation between source code change history and history of social interactions surrounding these changes. We apply our correlation method on two software systems, LSEdit and Apache Ant. The results of these exploratory case studies demonstrate the evidence of similarity between the content of free-form text emails among developers and the actual modifications in the code. We identify a set of correlation patterns between discussion and changed code vocabularies and discover that some releases referred to as minor should instead fall under the major category. These patterns can be used to give estimations about the type of a change and time needed to implement it.}, keywords = {ant, apache, change management, developers, discussion, effort estimation, lsedit, mailing lists, scm, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.4}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300007.pdf}, author = {Baysal, Olga and Malton, Andrew J.} } @conference {623, title = {Experiences on Product Development with Open Source Software}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {85 - 96}, publisher = {Springer}, organization = {Springer}, chapter = {7}, abstract = {This article discusses Nokia{\textquoteright}s experiences of using open source in commercial product development. It presents the development model used in the creation of mobile consumer devices and highlights the opportunities and challenges experienced. This article concludes that the main benefits come from the utilization of already available open source components, and from their quality and flexibility. It illustrates the challenges and solutions faced when mixing open and closed development models at Nokia. }, keywords = {gtk, linux, nokia, tablet}, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_7}, attachments = {https://flosshub.org/sites/flosshub.org/files/Experiences\%20on\%20Product\%20Development.pdf}, author = {Jaaksi, Ari} } @conference {991, title = {How Long Will It Take to Fix This Bug?}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {1}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Predicting the time and effort for a software problem has long been a difficult task. We present an approach that automatically predicts the fixing effort, i.e., the person-hours spent on fixing an issue. Our technique leverages existing issue tracking systems: given a new issue report, we use the Lucene framework to search for similar, earlier reports and use their average time as a prediction. Our approach thus allows for early effort estimation, helping in assigning issues and scheduling stable releases. We evaluated our approach using effort data from the JBoss project. Given a sufficient number of issues reports, our automatic predictions are close to the actual effort; for issues that are bugs, we are off by only one hour, beating naive predictions by a factor of four.}, keywords = {bug fixing, bug reports, effort estimation, jboss, lucene, prediction, time}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.13}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300001.pdf}, author = {Weiss, Cathrin and Premraj, Rahul and Zimmermann, Thomas and Zeller, Andreas} } @conference {1001, title = {Identifying Changed Source Code Lines from Version Repositories}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {14 - 14}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Observing the evolution of software systems at different levels of granularity has been a key issue for a number of studies, aiming at predicting defects or at studying certain phenomena, such as the presence of clones or of crosscutting concerns. Versioning systems such as CVS and SVN, however, only provide information about lines added or deleted by a contributor: any change is shown as a sequence of additions and deletions. This provides an erroneous estimate of the amount of code changed. This paper shows how the evolution of changes at source code line level can be inferred from CVS repositories, by combining information retrieval techniques and the Levenshtein edit distance. The application of the proposed approach to the ArgoUML case study indicates a high precision and recall.}, keywords = {argouml, cvs, levenshtein, scm, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.14}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300014.pdf}, author = {Canfora, Gerardo and Cerulo, Luigi and Di Penta, Massimiliano} } @article {Wang:2007:MEO:1317471.1317479, title = {Measuring the evolution of open source software systems with their communities}, journal = {SIGSOFT Softw. Eng. Notes}, volume = {32}, year = {2007}, note = {"1. M1: The number of modules in software system at a series of specific moment. 2. CD: The number of developers (code contributors) at a series of specific moment. 3. MC: The correlation of M1 and CD." "We get the information of Modules from the Launchpad[1] which is a distributed collaborative infrastructure for Ubuntu and other open source software development. Ubuntu mainly grows though a manner of adding new packages. So, we treat packages as the modules we mention before. In Ubuntu community, some members are in charge of auditing the packages and add them to Ubuntu, here developers are refers them."}, month = {November}, publisher = {ACM}, address = {New York, NY, USA}, abstract = {Open Source Software (OSS) has become the subject of much commercial and academic interest in last few years. As traditional software, OSS also evolves to fulfill the need of stakeholders. Therefore, providing quantitative metrics for OSS evolution has also become an urgent issue. However, most existing studies of software evolution have been performed on systems developed within a single company using traditional management techniques. These metrics models are inadequate to measure OSS evolution. In this paper, we describe a preliminary evolution metrics set for evaluating OSS. The most significant novelty of this model is that it takes specific properties of Open Source Community (OSC) into consideration. In another word, we measure the evolution of OSS and OSC together. We also provide a lightweight case study on Ubuntu project using this metrics set. We find out that the Open Source Community and its members also play essential role in OSS evolution. We expect this metrics model can bring better understandings and explanations of phenomena in open source development and evolution.}, keywords = {evolution, launchpad, metrics, open source community, open source software, Ubuntu}, issn = {0163-5948}, doi = {http://doi.acm.org/10.1145/1317471.1317479}, url = {http://doi.acm.org/10.1145/1317471.1317479}, author = {Yi Wang and Defeng Guo and Shi, Huihui} } @article {springerlink:10.1007/s10368-007-0086-4, title = {Open source software: Motivation and restrictive licensing}, journal = {International Economics and Economic Policy}, volume = {4}, year = {2007}, note = {"We employ a unique data set consisting of 71 open source projects hosted at the SourceForge web site. The 71 projects in the sample were chosen (in January 2000)" "This sample was observed over an 18-month period from January 2002 through the middle of 2003, with data collected at 2-month intervals." "We are grateful to NERA for providing us with the data." "Although we only have data on a relatively small sample of the projects hosted SourceForge, the sample is unique because of data on lines of code as well as data on different versions of the program. The latter is a potentially important control variable, since a change in version may necessitate additional lines of code. Our data set contains information on the size of the open source projects in the form of source lines of code (SLOC). Using SLOC as a performance measure is not always ideal; nevertheless, this performance measure is employed in the profession and the literature.15 For our purposes, SLOC is in fact an ideal measure, because we want to measure the effort that is put into the project, rather than whether a project succeeds." }, pages = {209-225}, publisher = {Springer Berlin / Heidelberg}, abstract = {Open source software (OSS) is an economic paradox. Development of open source software is often done by unpaid volunteers and the source code is typically freely available. Surveys suggest that status, signaling, and intrinsic motivations play an important role in inducing developers to invest effort. Contribution to an OSS project is rewarded by adding one{\textquoteright}s name to the list of contributors which is publicly observable. Such incentives imply that programmers may have little incentive to contribute beyond the threshold level required for being listed as a contributor. Using a unique data set we empirically examine this hypothesis. We find that the output per contributor in open source projects is much higher when licenses are less restrictive and more commercially oriented. These results indeed suggest a status, signaling, or intrinsic motivation for participation in OSS projects with restrictive licenses.}, keywords = {contributions, contributors, developers, incentives, license analysis, licenses, lines of code, loc, MOTIVATION, restrictive, scm, size, status, version history}, issn = {1612-4804}, url = {http://dx.doi.org/10.1007/s10368-007-0086-4}, author = {Fershtman, Chaim and Gandal, Neil} } @article {119, title = {Social network structures in open source software development teams}, journal = {Journal of Database Management}, volume = {18}, number = {2}, year = {2007}, note = {"Three projects were selected from SourceForge.net in terms of their similarities as well as their differences. Monthly data were extracted from the bug tracking systems in order to achieve a longitudinal view of the interaction pattern of each project. Social network analysis was used to generate the indices of social structure." no pdf}, month = {Apr-Jun}, pages = {25-40}, abstract = {Drawing on social network theories and previous studies, this research examines the dynamics of social network structures in open source software (OSS) teams. Three projects were selected from SourceForge.net in terms of their similarities as well as their differences. Monthly data were extracted from the bug tracking systems in order to achieve a longitudinal view of the interaction pattern of each project. Social network analysis was used to generate the indices of social structure. The finding suggests that the interaction pattern of OSS projects evolves from a single hub at the beginning to a corel periphery model as the projects move forward.}, keywords = {bug tracking, bugs, COMMUNITY, INNOVATION, longitudinal study, MODEL, open source, social, social network analysis, social networks, sourceforge, structure}, isbn = {1063-8016}, author = {Long, Y. and Siau, K.} } @conference {1006, title = {Towards a Theoretical Model for Software Growth}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {21 - 21}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Software growth (and more broadly, software evolution) is usually considered in terms of size or complexity of source code. However in different studies, usually different metrics are used, which make it difficult to compare approaches and results. In addition, not all metrics are equally easy to calculate for a given source code, which leads to the question of which one is the easiest to calculate without losing too much information. To address both issues, in this paper present a comprehensive study, based on the analysis of about 700,000 C source code files, calculating several size and complexity metrics for all of them. For this sample, we have found double Pareto statistical distributions for all metrics considered, and a high correlation between any two of them. This would imply that any model addressing software growth should produce this Pareto distributions, and that analysis based on any of the considered metrics should show a similar pattern, provided the sample of files considered is large enough.}, keywords = {C, complexity, evolution, freebsd, growth, halstead, lines of code, loc, mccabe, metrics, scm, size, sloc, sloccount, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.31}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300021.pdf}, author = {Herraiz, Israel and Jesus M. Gonzalez-Barahona and Gregorio Robles} } @conference {1008, title = {What Can OSS Mailing Lists Tell Us? A Preliminary Psychometric Text Analysis of the Apache Developer Mailing List}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {23 - 23}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Developer mailing lists are a rich source of information about Open Source Software (OSS) development. The unstructured nature of email makes extracting information difficult. We use a psychometrically-based linguistic analysis tool, the LIWC, to examine the Apache httpd server developer mailing list. We conduct three preliminary experiments to assess the appropriateness of this tool for information extraction from mailing lists. First, using LIWC dimensions that are correlated with the big five personality traits, we assess the personality of four top developers against a baseline for the entire mailing list. The two developers that were responsible for the major Apache releases had similar personalities. Their personalities were different from the baseline and the other developers. Second, the first and last 50 emails for two top developers who have left the project are examined. The analysis shows promise in understanding why developers join and leave a project. Third, we examine word usage on the mailing list for two major Apache releases. The differences may reflect the relative success of each release.}, keywords = {apache, developers, email, joining, liwc, mailing lists, personality}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.35}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300023.pdf}, author = {Peter C. Rigby and Hassan, Ahmed E.} } @conference {D{\textquoteright}Ambros:2006:AER:1137983.1138029, title = {Applying the evolution radar to PostgreSQL}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {177{\textendash}178}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, keywords = {cvs, documentation, evolution, evolution radar, logical coupling, makefile, mining challenge, msr challenge, postgresql, re-engineering, refactoring, release history, rhdb, source code, version control, visualization}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138029}, url = {http://doi.acm.org/10.1145/1137983.1138029}, attachments = {https://flosshub.org/sites/flosshub.org/files/177ApplyingEvolution.pdf}, author = {D{\textquoteright}Ambros, Marco and Lanza, Michele} } @conference {682, title = {Collaborative Maintenance in Large Open-Source Projects}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {233 - 244}, publisher = {Springer}, organization = {Springer}, abstract = {The paper investigates collaborative work among maintainers of open source software by analyzing the logs of a set of 10 large projects. We inquire whether teamwork can be influenced by several characteristics of code. Preliminary results suggest that collaboration among maintainers in most large open-source projects seems to be positively influenced by file vintage and by Halstead volume of files, and negatively by McCabe complexity and size measured in SLOCs. These results could be consistent with an increased attractivity of files created early in the history of a project, and with maintainers being less attracted by more verbose code and by more complex code, although in this last case it might also reflect the fact that more complex files would be de facto more exclusive in terms of maintenance. }, keywords = {apache, COLLABORATION, complexity, cvs, gaim, gcc, ghostscript, halstead, lines of code, loc, mccabe, mozilla, netbsd, openssh, postgresql, python, sloc}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_23}, attachments = {https://flosshub.org/sites/flosshub.org/files/Collaborative\%20Maintenance.pdf}, author = {den Besten, Matthijs and Jean-Michel Dalle and Galia, Fabrice} } @conference {Herraiz:2006:CSN:1116163.1116405, title = {Comparison Between SLOCs and Number of Files As Size Metrics for Software Evolution Analysis}, booktitle = {Proceedings of the Conference on Software Maintenance and Reengineering}, series = {CSMR {\textquoteright}06}, year = {2006}, pages = {206{\textendash}213}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, keywords = {empirical studies, libre software, metrics, software evolution}, isbn = {0-7695-2536-9}, url = {http://dl.acm.org/citation.cfm?id=1116163.1116405}, author = {Herraiz, Israel and Gregorio Robles and Gonzalez-Barahon, Jes us M.} } @conference {D{\textquoteright}Ambros:2006:ERV:1137983.1137992, title = {The evolution radar: visualizing integrated logical coupling information}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {26{\textendash}32}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In software evolution research logical coupling has extensively been used to recover the hidden dependencies between source code artifacts. They would otherwise go lost because of the file-based nature of current versioning systems. Previous research has dealt with low-level couplings between files, leading to an explosion of data to be analyzed, or has abstracted the logical couplings to module level, leading to a loss of detailed information. In this paper we propose a visualization-based approach which integrates both file-level and module-level logical coupling information. This not only facilitates an in-depth analysis of the logical couplings at all granularity levels, it also leads to a precise characterization of the system modules in terms of their logical coupling dependencies.}, keywords = {change management, cvs, evolution, logical coupling, mozilla, scm, source code, thunderbird, tinderbox, visualization}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137992}, url = {http://doi.acm.org/10.1145/1137983.1137992}, attachments = {https://flosshub.org/sites/flosshub.org/files/26TheEvolutionRadar.pdf}, author = {D{\textquoteright}Ambros, Marco and Lanza, Michele and Lungu, Mircea} } @conference {Robles:2006:GLD:1137983.1138017, title = {Geographic location of developers at SourceForge}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {144{\textendash}150}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {The development of libre (free/open source) software is usually performed by geographically distributed teams. Participation in most cases is voluntary, sometimes sporadic, and often not framed by a pre-defined management structure. This means that anybody can contribute, and in principle no national origin has advantages over others, except for the differences in availability and quality of Internet connections and language. However, differences in participation across regions do exist, although there are little studies about them. In this paper we present some data which can be the basis for some of those studies. We have taken the database of users registered at SourceForge, the largest libre software development web-based platform, and have inferred their geographical locations. For this, we have applied several techniques and heuristics on the available data (mainly e-mail addresses and time zones), which are presented and discussed in detail. The results show a snapshot of the regional distribution of SourceForge users, which may be a good proxy of the actual distribution of libre software developers. In addition, the methodology may be of interest for similar studies in other domains, when the available data is similar (as is the case of mailing lists related to software projects).}, keywords = {distributed, email, email address, free software, geographical location, geography, libre software, mining software repositories, open source software, sourceforge, timezone}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138017}, url = {http://doi.acm.org/10.1145/1137983.1138017}, attachments = {https://flosshub.org/sites/flosshub.org/files/144GeographicLocation.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona} } @conference {Askari:2006:ITE:1137983.1138013, title = {Information theoretic evaluation of change prediction models for large-scale software}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {126{\textendash}132}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper, we analyze the data extracted from several open source software repositories. We observe that the change data follows a Zipf distribution. Based on the extracted data, we then develop three probabilistic models to predict which files will have changes or bugs. The first model is Maximum Likelihood Estimation (MLE), which simply counts the number of events, i.e., changes or bugs, that happen to each file and normalizes the counts to compute a probability distribution. The second model is Reflexive Exponential Decay (RED) in which we postulate that the predictive rate of modification in a file is incremented by any modification to that file and decays exponentially. The third model is called RED-Co-Change. With each modification to a given file, the RED-Co-Change model not only increments its predictive rate, but also increments the rate for other files that are related to the given file through previous co-changes. We then present an information-theoretic approach to evaluate the performance of different prediction models. In this approach, the closeness of model distribution to the actual unknown probability distribution of the system is measured using cross entropy. We evaluate our prediction models empirically using the proposed information-theoretic approach for six large open source systems. Based on this evaluation, we observe that of our three prediction models, the RED-Co-Change model predicts the distribution that is closest to the actual distribution for all the studied systems.}, keywords = {bugs, change analysis, cvs, evaluation approach, file, freebsd, information theory, kde, koffice, log files, netbsd, openbsd, postgresql, prediction, prediction models, scm, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138013}, url = {http://doi.acm.org/10.1145/1137983.1138013}, attachments = {https://flosshub.org/sites/flosshub.org/files/126InformationTheoretic.pdf}, author = {Askari, Mina and Holt, Ric} } @article {1232, title = {Integration of libre software applications to create a collaborative work platform for researchers at GET}, journal = {International Journal of Information Technology and Web Engineering}, volume = {1}, number = {3}, year = {2006}, month = {07/2006}, pages = {1-16}, publisher = {IGI Global}, abstract = {Libre software provides powerful applications ready to be integrated for the build-up of platforms for internal use in organizations. We describe the architecture of the collaborative work platform which we have integrated, designed for researchers at GET. We present the elements we have learned during this project in particular with respect to contribution to external libre projects, in order to better ensure the maintainability of the internal applications, and to phpGroupware as a framework for specific applications development.}, keywords = {collaborative work environment, contribution, free software, groupware, in-house applications, libre software, open source software, OpenLDAP, phpGroupware, PicoLibre, ProGET, Sympa, TWiki, WebDAV, wiki}, author = {Olivier Berger and Christian Bac and Benoit Hamet} } @conference {1094, title = {Knowledge Reuse in Open Source Software: An Exploratory Study of 15 Open Source Projects}, booktitle = {Proceedings of the 38th Annual Hawaii International Conference on System Sciences}, year = {2006}, note = {"In a first step, we asked developers of different open source projects to respond to a very short web-based survey."... "In a second step, we started with gathering data from 15 projects, including interviews. In parallel, the source code, CVS comments and to a certain extent email communication was analyzed to receive a dynamic, and within the limits of the method, complete picture of knowledge reuse practices." "Projects included games (Adonthell, FlightGear, Xboard), text processing (Abiword), a GNU/Linux desktop (Xfce4), an instant messenger client (Miranda), fax software (HylaFAX), a content management system (Tiki/CMS Groupware), encryption software (OpenSSL), a collaborative music system (iRATE Radio), file sharing networks (GNUnet, Mnet, Freenet), a mailing list manager (Mailman), and an mp3 encoder (Lame)." "Our data sources included interviews with key developers, source code, CVS comments, mailing lists and various Internet resources"}, month = {2006}, pages = {1-10}, publisher = {IEEE}, organization = {IEEE}, address = {Big Island, HI, USA}, abstract = {To date, there is no investigation of knowledge reuse in open source software projects. This paper focuses on the forms of knowledge reuse and the factors impacting on them. It develops a theory drawn from data of 15 open source software projects and finds that the effort to search, integrate and maintain external knowledge influences the form of knowledge to be reused. Implications for firms and innovation research are discussed.}, keywords = {cvs, email, knowledge reuse, lines of code, loc, source code, Survey}, doi = {10.1109/HICSS.2005.378}, url = {http://www.computer.org/csdl/proceedings/hicss/2005/2268/07/22680198b-abs.html}, author = {von Krogh, G. and Spaeth, S. and Haefliger, S.} } @article {Grewal:2006:LLL:1246148.1246155, title = {Location, Location, Location: How Network Embeddedness Affects Project Success in Open Source Systems}, journal = {Management Science}, volume = {52}, number = {7}, year = {2006}, month = {July}, pages = {1043{\textendash}1056}, publisher = {INFORMS}, address = {Institute for Operations Research and the Management Sciences (INFORMS), Linthicum, Maryland, USA}, abstract = {The community-based model for software development in open source environments is becoming a viable alternative to traditional firm-based models. To better understand the workings of open source environments, we examine the effects of network embeddedness---or the nature of the relationship among projects and developers---on the success of open source projects. We find that considerable heterogeneity exists in the network embeddedness of open source projects and project managers. We use a visual representation of the affiliation network of projects and developers as well as a formal statistical analysis to demonstrate this heterogeneity and to investigate how these structures differ across projects and project managers. Our main results surround the effect of this differential network embeddedness on project success. We find that network embeddedness has strong and significant effects on both technical and commercial success, but that those effects are quite complex. We use latent class regression analysis to show that multiple regimes exist and that some of the effects of network embeddedness are positive under some regimes and negative under others. We use project age and number of page views to provide insights into the direction of the effect of network embeddedness on project success. Our findings show that different aspects of network embeddedness have powerful but subtle effects on project success and suggest that this is a rich environment for further study.}, keywords = {affiliation network, age, developers, latent class analysis, network embeddedness, open source software, page views, perl, project success, registration, sourceforge}, issn = {0025-1909}, doi = {10.1287/mnsc.1060.0550}, url = {http://portal.acm.org/citation.cfm?id=1246148.1246155}, author = {Grewal, Rajdeep and Lilien, Gary L. and Mallapragada, Girish} } @article {Yu:2006:MKO:1150566.1150571, title = {Maintainability of the kernels of open-source operating systems: A comparison of Linux with FreeBSD, NetBSD, and OpenBSD}, journal = {J. Syst. Softw.}, volume = {79}, year = {2006}, note = {"Data regarding the number and total number of lines of code of kernel and nonkernel modules in the four operating systems are provided in Table 1" loc, kloc, number of kernel modules, number of nonkernel modules size c files .h files}, month = {June}, pages = {807{\textendash}815}, publisher = {Elsevier Science Inc.}, address = {New York, NY, USA}, abstract = {We compared and contrasted the maintainability of four open-source operating systems: Linux, FreeBSD, NetBSD, and OpenBSD. We used our categorization of common coupling in kernel-based software to highlight future maintenance problems. An unsafe definition is a definition of a global variable that can affect a kernel module if that definition is changed. For each operating system we determined a number of measures, including the number of global variables, the number of instances of global variables in the kernel and overall, as well as the number of unsafe definitions in the kernel and overall. We also computed the value of each our measures per kernel KLOC and per KLOC overall. For every measure and every ratio, Linux compared unfavorably with FreeBSD, NetBSD, and OpenBSD. Accordingly, we are concerned about the future maintainability of Linux. }, keywords = {abiword, Common coupling, coupling, Definition-use analysis, freebsd, kernel, lines of code, linux, linux kernel, loc, Maintainability, modules, netbsd, Open-source software, openbsd, source code}, issn = {0164-1212}, doi = {http://dx.doi.org/10.1016/j.jss.2005.08.014}, url = {http://dx.doi.org/10.1016/j.jss.2005.08.014}, attachments = {https://flosshub.org/sites/flosshub.org/files/YuSchachChen.pdf}, author = {Yu, Liguo and Schach, Stephen R. and Chen, Kai and Heller, Gillian Z. and Offutt, Jeff} } @conference {Robles:2006:MLS:1137983.1137986, title = {Mining large software compilations over time: another perspective of software evolution}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {3{\textendash}9}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {With the success of libre (free, open source) software, a new type of software compilation has become increasingly common. Such compilations, often referred to as {\textquoteright}distributions{\textquoteright}, group hundreds, if not thousands, of software applications and libraries written by independent parties into an integrated system. Software compilations raise a number of questions that have not been targeted so far by software evolution, which usually focuses on the evolution of single applications. Undoubtedly, the challenges that software compilations face differ from those found in single software applications. Nevertheless, it can be assumed that both, the evolution of applications and that of software compilations, have similarities and dependencies.In this sense, we identify a dichotomy, common to that in economics, of software evolution in the small (micro-evolution) and in the large (macro-evolution). The goal of this paper is to study the evolution of a large software compilation, mining the publicly available repository of a well-known Linux distribution, Debian. We will therefore investigate changes related to hundreds of millions of lines of code over seven years. The aspects that will be covered in this paper are size (in terms of number of packages and of number of lines of code), use of programming languages, maintenance of packages and file sizes.}, keywords = {debian, distributions, evolution, large software collections, lines of code, loc, metrics, mining software repositories, size, sloc, sloccount, software evolution, software integrators}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137986}, url = {http://doi.acm.org/10.1145/1137983.1137986}, attachments = {https://flosshub.org/sites/flosshub.org/files/3miningLarge.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona and Martin Michlmayr and Amor, Juan Jose} } @conference {Zimmermann:2006:MVA:1137983.1138001, title = {Mining version archives for co-changed lines}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {72{\textendash}75}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Files, classes, or methods have frequently been investigated in recent research on co-change. In this paper, we present a first study at the level of lines. To identify line changes across several versions, we define the annotation graph which captures how lines evolve over time. The annotation graph provides more fine-grained software evolution information such as life cycles of each line and related changes: "Whenever a developer changed line 1 of version.txt she also changed line 25 of Library.java."}, keywords = {change, change analysis, change management, graph, lines of code, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138001}, url = {http://doi.acm.org/10.1145/1137983.1138001}, attachments = {https://flosshub.org/sites/flosshub.org/files/72MiningVersionArchives.pdf}, author = {Zimmermann, Thomas and Kim, Sunghun and Zeller, Andreas and Whitehead,Jr., E. James} } @article {2006, title = {Opportunities and Challenges Applying Functional Data Analysis to the Study of Open Source Software Evolution}, journal = {Statistical Science}, volume = {21}, number = {2}, year = {2006}, note = {"As part of a larger project, data were collected on 105 OSS projects hosted online at Sourceforge (sf.net)." "...we limited our data collection to projects that use only the Java programming language and were listed in the Internet and System Networking domains." "... only including these projects that use an OSI approved license..." "had to have posted at least one file on the Sourceforge site as of the time of our initial project selection Fall 2002" "Data were collected on the published release history of each project thatmet the screening criteria. Each release of each project was analyzed to calculate CplXLCoh. The size of each release was measured using a calculation of the number of lines of code (LOC)"}, pages = {167-178}, publisher = {Institute of Mathematical Statistics}, abstract = {This paper explores the application of functional data analysis (FDA) as a means to study the dynamics of software evolution in the open source context. Several challenges in analyzing the data from software projects are discussed, an approach to overcoming those challenges is described, and preliminary results from the analysis of a sample of open source software (OSS) projects are provided. The results demonstrate the utility of FDA for uncovering and categorizing multiple distinct patterns of evolution in the complexity of OSS projects. These results are promising in that they demonstrate some patterns in which the complexity of software decreased as the software grew in size, a particularly novel result. The paper reports preliminary explorations of factors that may be associated with decreasing complexity patterns in these projects. The paper concludes by describing several next steps for this research project as well as some questions for which more sophisticated analytical techniques may be needed.}, keywords = {complexity, evolution, fda, java, lines of code, loc, release history, scm, size, sourceforge}, issn = {08834237}, url = {http://www.jstor.org/stable/27645747}, author = {Stewart, Katherine J. and Darcy, David P. and Daniel, Sherae L.} } @conference {706, title = {Retrieving Open Source Software Licenses}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {35 - 46}, publisher = {Springer}, organization = {Springer}, abstract = {Open Source Software maintenance and reuse require identifying and comprehending the applied software licenses. This paper first characterizes software maintenance, and open source software (OSS) reuse which are particularly relevant in this context. The information needs of maintainers and reusers can be supported by reverse engineering tools at different information retrieval levels. The paper presents an automated license retrieval approach called ASLA. User needs, system architecture, tool features, and tool evaluation are presented. The implemented tool features support identifying source file dependencies and licenses in source files, and adding new license templates for identifying licenses. The tool is evaluated against another tool for license information extraction. ASLA requires the source code as available input but is otherwise not limited to OSS. It supports the same programming languages as GCC. License identification coverage is good and the tool is extendable. }, keywords = {gaim, license, license analysis, maintenance, mozilla, reuse}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_4}, attachments = {https://flosshub.org/sites/flosshub.org/files/Retrieving\%20Open\%20Source\%20Software\%20Licenses.pdf}, author = {Tuunanen, Timo and Koskinen, Jussi and K{\"a}rkk{\"a}inen, Tommi} } @article {1088, title = {Self-Organization Patterns in Wasp and Open Source Communities}, journal = {IEEE Intelligent Systems}, volume = {21}, year = {2006}, note = {"To investigate such claims, we studied an OSS community{\textquoteright}s social network from a dataset describing the email activity of 120 different software teams" "Our test data originated from Sourceforge (http://sourceforge.net), a large open source project repository, and included communi- ties ranging from very small networks with one or two members to large networks with thousands of members." "we limited our consideration to email traffic associated with bug fixes and bug reporting. As other researchers have shown[5] this email subset allows an effective reconstruction of the software community{\textquoteright}s social network." "We thank Kevin Crowston and James Howison for making their software data publicly available."}, month = {03/2006}, pages = {36 - 40}, abstract = {In this paper, we conducted a comparative study of how social organization takes place in a wasp colony and OSS developer communities. Both these systems display similar global organization patterns, such as hierarchies and clear labor divisions. As our analysis shows, both systems also define interacting agent networks with similar common features that reflect limited information sharing among agents. As far as we know, this is the first research study analyzing the patterns and functional significance of these systems{\textquoteright} weighted-interaction networks. By illuminating the extent to which self-organization is responsible for patterns such as hierarchical structure, we can gain insight into the origins of organization in OSS communities.}, keywords = {agents, decentralization, developers, email, email archives, flossmole, hierarchy, labor division, organization, self-organizing teams, social network analysis, social networks, sourceforge, teams, wasps}, issn = {1541-1672}, doi = {10.1109/MIS.2006.34}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.95.5574\&rep=rep1\&type=pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/valverde.pdf}, author = {Valverde, S. and Theraulaz, G. and Gautrais, J. and Fourcassie, V. and Sole, R.V.} } @conference {German:2006:UEA:1137983.1138020, title = {Using evolutionary annotations from change logs to enhance program comprehension}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {159{\textendash}162}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Evolutionary annotations are descriptions of how source code evolves over time. Typical source comments, given their static nature, are usually inadequate for describing how a program has evolved over time; instead, source code comments are typically a description of what a program currently does. We propose the use of evolutionary annotations as a way of describing the rationale behind changes applied to a given program (for example "These lines were added to ..."). Evolutionary annotations can assist a software developer in the understanding of how a given portion of source code works by showing him how the source has evolved into its current form.In this paper we describe a method to automatically create evolutionary annotations from change logs, defect tracking systems and mailing lists. We describe the design of a prototype for Eclipse that can filter and present these annotations alongside their corresponding source code and in workbench views. We use Apache as a test case to demonstrate the feasibility of this approach.}, keywords = {annotations, apache, bug tracking, change history, eclipse, evolutionary, log files, mailing lists, mining software repositories, software evolution, version control}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138020}, url = {http://doi.acm.org/10.1145/1137983.1138020}, attachments = {https://flosshub.org/sites/flosshub.org/files/159UsingEvolutionary.pdf}, author = {Daniel M. German and Peter C. Rigby and Storey, Margaret-Anne} } @conference {699, title = {On the Weickian Model in the Context of Open Source Software Development: Some Preliminary Insights}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {3 - 8}, publisher = {Springer}, organization = {Springer}, abstract = {Despite being regarded as a path-breaking model of organising, Weick{\textquoteright}s Enactment-Selection-Retention (ESR) model has been labelled too abstract a model find any practical applications. This paper attempts to show that exploration-oriented open source projects represent valuable case studies where Weick{\textquoteright}s ESR model can be applied. By taking the Linux case study as a case in point, it is argued that a qualitative analysis of micro interactions (i.e. double interacts) might reveal broad organising patterns. Preliminary implications in terms of coordination and knowledge making processes are discussed in the final section. }, keywords = {case study, linux}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_1}, attachments = {https://flosshub.org/sites/flosshub.org/files/On\%20the\%20Weickian\%20Model\%20in\%20the\%20Context.pdf}, author = {Iannacci, Federico} } @conference {Kim:2005:ASC:1083142.1083154, title = {Analysis of signature change patterns}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {1{\textendash}5}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software continually changes due to performance improvements, new requirements, bug fixes, and adaptation to a changing operational environment. Common changes include modifications to data definitions, control flow, method/function signatures, and class/file relationships. Signature changes are notable because they require changes at all sites calling the modified function, and hence as a class they have more impact than other change kinds.We performed signature change analysis over software project histories to reveal multiple properties of signature changes, including their kind, frequency, and evolution patterns. These signature properties can be used to alleviate the impact of signature changes. In this paper we introduce a taxonomy of signature change kinds to categorize observed changes. We report multiple properties of signature changes based on an analysis of eight prominent open source projects including the Apache HTTP server, GCC, and Linux 2.5 kernel.}, keywords = {apache, gcc, kernel, linux, signature change, signature change patterns, software evolution, software evolution path, soure code}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083154}, url = {http://doi.acm.org/10.1145/1082983.1083154}, attachments = {https://flosshub.org/sites/flosshub.org/files/64AnalysisOfSignature.pdf}, author = {Kim, Sunghun and Whitehead,Jr., E. James and Bevan, Jennifer} } @conference {795, title = {The challenges of creating open source education software: the Gild experience}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {338-340}, keywords = {COMMUNITY, eclipse, learning environment, novice programmers, open source, programming environment}, url = {http://pascal.case.unibz.it/handle/2038/1539}, author = {Daniel M. German and Rigby, Peter and Cubranic, Davor and Storey, Margaret-Anne and Thomson, Suzanne} } @proceedings {1517, title = {Digesting Virtual {\textquotedblleft}Geek{\textquotedblright} Culture: The Summarization of Technical Internet Relay Chats}, year = {2005}, month = {06/2005}, pages = {298-305}, address = {Ann Arbor, MI, USA}, abstract = {This paper describes a summarization system for technical chats and emails on the Linux kernel. To reflect the complexity and sophistication of the discussions, they are clustered according to subtopic structure on the sub-message level, and immediate responding pairs are identified through machine learning methods. A resulting summary consists of one or more mini-summaries, each on a subtopic from the discussion.}, keywords = {computational linguistics, irc, linux, summarizing}, url = {http://acl.ldc.upenn.edu/P/P05/P05-1037.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/P05-1037.pdf}, author = {Liang Zhou and Edouard Hovy} } @article {102, title = {Empirical validation of object-oriented metrics on open source software for fault prediction}, journal = {IEEE Transactions on Software Engineering}, volume = {31}, number = {10}, year = {2005}, note = {"This paper describes how we calculated the object-oriented metrics given by Chidamber and Kemerer to illustrate how fault-proneness detection of the source code of the open source Web and e-mail suite called Mozilla can be carried out. We checked the values obtained against the number of bugs found in its bug database - called Bugzilla - using regression and machine learning methods to validate the usefulness of these metrics for fault-proneness prediction. We also compared the metrics of several versions of Mozilla to see how the predicted fault-proneness of the software system changed during its development cycle." metrics, wmc weighted methods per class, dit depth of inheritance, rfc response for a class, noc number of children, cbo coupling between object classes, cohesion, lines of code, loc, sloc chidamber and kemerer metrics}, pages = {897-910}, abstract = {Open source software systems are becoming increasingly important these days. Many companies are investing in open source projects and lots of them are also using such software in their own work. But, because open source software is often developed with a different management style than the industrial ones, the quality and reliability of the code needs to be studied. Hence, the characteristics of the source code of these projects need to be measured to obtain more information about it. This paper describes how we calculated the object-oriented metrics given by Chidamber and Kemerer to illustrate how fault-proneness detection of the source code of the open source Web and e-mail suite called Mozilla can be carried out. We checked the values obtained against the number of bugs found in its bug database - called Bugzilla - using regression and machine learning methods to validate the usefulness of these metrics for fault-proneness prediction. We also compared the metrics of several versions of Mozilla to see how the predicted fault-proneness of the software system changed during its development cycle.}, keywords = {bugs, bugzilla, cbo, defects, dit, fault-prone modules, faults, lcom, lcomn, loc, metrics, mozilla, noc, object-oriented, rfc, source code, wmc}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.115.8372\&rep=rep1\&type=pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/Gyimothy.pdf}, author = {Gyimothy, T. and Ferenc, R. and Siket, I.} } @article {flosswp184, title = {Exploring the Structure of Complex Software Designs: An Empirical Study of Open Source and Proprietary Code (updated)}, year = {2005}, note = {"For each design, we report data on the number of source files, the number of dependencies, the density of the DSM (i.e., the number of dependencies per source file pair) the propagation cost and the clustered cost. We also provide data on the average complexity of source files, in terms of the number of functions and lines of code."}, month = {June}, abstract = {This paper reports data from a study that seeks to characterize the differences in design structure between complex software products. In particular, we use Design Structure Matrices (DSMs) to map the dependencies between the elements of a design and define metrics that allow us to compare the structures of different designs. We first use these metrics to compare the architectures of two software products - the Linux operating system and the Mozilla web browser - that were developed via contrasting modes of organization: specifically, open source versus proprietary development. We then track the evolution of Mozilla, paying particular attention to a purposeful "re-design" effort that was undertaken with the intention of making the product more "modular." We find significant differences in structure between Linux and the first version of Mozilla, suggesting that Linux had a more modular architecture. We also find that the redesign of Mozilla resulted in an architecture that was significantly more modular than that of its predecessor, and indeed, than that of Linux. Our results, while exploratory, are consistent with a view that different modes of organization are associated with designs that possess different structures. However, we also illustrate that purposeful managerial actions can have a large impact on structure. This latter result is important given recent moves to release proprietary software into the public domain. These moves are likely to fail unless the product possesses an architecture that facilitates participation. Our paper provides evidence that a tightly-coupled design can be adapted to meet this objective.}, keywords = {complexity, cost, dependencies, functions, lines of code, linux, loc, mozilla, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/maccormackrusnakbaldwin2.pdf}, author = {Alan MacCormack and John Rusnak and Carliss Baldwin} } @article {vanWendeldeJoode2005109, title = {Handling variety: the tension between adaptability and interoperability of open source software}, journal = {Computer Standards \& Interfaces}, volume = {28}, number = {1}, year = {2005}, note = {unable to find concise description of the data used [ms]}, pages = {109 - 121}, abstract = {Open source software (OSS) offers unprecedented opportunities to create variety. This could lead to incompatibility and fragmentation. To prevent this some form of coordination is needed. This paper explores which mechanisms of coordination are responsible for limiting divergence in OSS. Two cases are examined: Java and Linux. A systematic difference seems to exist between the mechanisms identified in the two communities. With respect to Java, divergence is where possible avoided ex ante, whereas for Linux divergence is foremost reduced ex post. The conclusion discusses this difference and the implications of both types of coordination in respect to interoperability.}, keywords = {coordination, divergence, java, linux}, issn = {0920-5489}, doi = {DOI: 10.1016/j.csi.2004.12.004}, url = {http://www.sciencedirect.com/science/article/B6TYV-4F6K72H-1/2/c74c64ce51e6f46abf9f39ae945c9e15}, author = {Ruben van Wendel de Joode and Tineke M. Egyedi} } @conference {781, title = {Idealism and Commercialism {\textendash} Developing Free/Libre and Open Source Software in Private Businesses}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {301-302}, abstract = {This paper presents a PhD research project undertaken as part of a larger project aimed at paying sociological attention to different forms of distribution of knowledge, including program code. We want to investigate empirically how the commons knows as free/open source software is actually made. In my PhD project I study the use and development of FLOSS in private businesses, focusing on professional developers working in private businesses and at the same time participating in the FLOSS community. The theoretical starting point is theories of power, dominance and legitimacy by Max Weber and Pierre Bourdieu.}, keywords = {dominance, FLOSS, FLOSS community, free/libre, legitimacy, linux, open source, Private Businesses, social organisation, theories of power}, url = {http://pascal.case.unibz.it/handle/2038/970}, author = {Lundestad, Christian V.} } @conference {798, title = {An International Master Programme in Free Software in the European Higher Education Space}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {349-352}, abstract = {The Universitat Oberta de Catalunya (Open University of Catalonia, UOC) offers an International Master programme in Free Software. The first edition of this master programme began on November 2003 and there are about 240 students currently enrolled at the different specialities offered by the program. In this paper, the design, the methodology and the first few conclusions drawn from this higher education experience are discussed and summarized. After this master programme was changed to accomplish with European Higher Education Space (EHES).}, keywords = {education, free software, FS community, GNU/Linux, learning, master programme, software development, university}, url = {http://pascal.case.unibz.it/handle/2038/713}, author = {Meg{\'\i}as, David and Serra, Jordi and Macau, Rafael} } @conference {789, title = {Legal issues of Open Source Software}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {320-321}, abstract = {From the legal standpoint, Open Source amounts to distributing rights and obligations in software license agreements in such a way, that they would (1) both grant users control over the program and (2) facilitate the sharing of improvements. The Open Source idea aims at reversing the process usually referred to as proprietary licensing and equaling the rights of the users with these of the authors. To some extent, it attempts to destroy monopolies created by copyright laws and to prevent them to arise again. There is much in the discussion on the legal issues of Open Source. The first voices came from the programmers who started the movement.}, keywords = {cooperation, copyright law, freedom, intellectual property, legal issue, license, open source software}, url = {http://pascal.case.unibz.it/handle/2038/974}, author = {Siewicz, Krzysztof} } @conference {Antoniol:2005:LPC:1083142.1083156, title = {Linear predictive coding and cepstrum coefficients for mining time variant information from software repositories}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {74-78}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {This paper presents an approach to recover time variant information from software repositories. It is widely accepted that software evolves due to factors such as defect removal, market opportunity or adding new features. Software evolution details are stored in software repositories which often contain the changes history. On the other hand there is a lack of approaches, technologies and methods to efficiently extract and represent time dependent information. Disciplines such as signal and image processing or speech recognition adopt frequency domain representations to mitigate differences of signals evolving in time. Inspired by time-frequency duality, this paper proposes the use of Linear Predictive Coding (LPC) and Cepstrum coefficients to model time varying software artifact histories. LPC or Cepstrum allow obtaining very compact representations with linear complexity. These representations can be used to highlight components and artifacts evolved in the same way or with very similar evolution patterns. To assess the proposed approach we applied LPC and Cepstral analysis to 211 Linux kernel releases (i.e., from 1.0 to 1.3.100), to identify files with very similar size histories. The approach, the preliminary results and the lesson learned are presented in this paper.}, keywords = {change history, data mining, evolution, files, kernel, linear predictive coding, linux, lpc, size, software evolution, source code}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083156}, url = {http://doi.acm.org/10.1145/1082983.1083156}, attachments = {https://flosshub.org/sites/flosshub.org/files/74LinearPredictive.pdf}, author = {Antoniol, Giuliano and Rollo, Vincenzo Fabio and Venturi, Gabriele} } @conference {774, title = {Migrazione di un Sistema Informativo da UNIX-AIX a UNIX-Linux}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {287-288}, abstract = {Il presente documento ha come obiettivo quello di descrivere la politica adottata dall{\textquoteright}Istituto Nazionale di Statistica rispetto all{\textquoteright}uso del software Open Source. In particolare vengono descritti i sistemi che attualmente operano su piattaforma Linux, quelli che sono in fase di migrazione e le scelte che si sono fatte nel caso di convivenza tra sistemi {\textquotedblleft}open{\textquotedblright} e sistemi {\textquotedblleft}proprietari{\textquotedblright}. Viene inoltre illustrata l{\textquoteright}architettura hardware scelta nel caso della migrazione di un sistema complesso da piattaforma interamente proprietaria (UNIX AIX) a piattaforma con sistema operativo open Linux Red Hat.}, keywords = {architettura hardware, linux, migrazione, open source, server, sistema informativo, sistema proprietario, unix aix}, url = {http://pascal.case.unibz.it/handle/2038/978}, author = {Colasanti, Cecilia and Patruno, Vincenzo and Vaccari, Carlo} } @conference {Huang:2005:MVH:1083142.1083158, title = {Mining version histories to verify the learning process of Legitimate Peripheral Participants}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {84-88}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Since code revisions reflect the extent of human involvement in the software development process, revision histories reveal the interactions and interfaces between developers and modules.We therefore divide developers and modules into groups according to the revision histories of the open source software repository, for example, sourceforge.net. To describe the interactions in the open source development process, we use a representative model, Legitimate Peripheral Participation (LPP) [6], to divide developers into groups such as core and peripheral teams, based on the evolutionary process of learning behavior.With the conventional module relationship, we divide modules into kernel and non-kernel types (such as UI). In the past, groups of developers and modules have been partitioned naturally with informal criteria. In this work, however, we propose a developer-module relationship model to analyze the grouping structures between developers and modules. Our results show some process cases of relative importance on the constructed graph of project development. The graph reveals certain subtle relationships in the interactions between core and non-core team developers, and the interfaces between kernel and non-kernel modules.}, keywords = {awstats, bzflag, cvs, filezilla, gallery, Legitimate Peripheral Participants (LPP), moodle, open boundary, open source software development process, phpmyadmin, social networks, sourceforge}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083158}, url = {http://doi.acm.org/10.1145/1082983.1083158}, attachments = {https://flosshub.org/sites/flosshub.org/files/84MiningVersion.pdf}, author = {Huang, Shih-Kun and Liu, Kang-min} } @conference {793, title = {Open Source and IMS Learning Design: Building the Infrastructure for eLearning}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {329-333}, abstract = {The development of open, flexible eLearning specifications has significant implications for and interactions with the FOSS movement. A short overview of eLearning specifications is provided, focusing on the difference between SCORM and Learning Design (LD). The significance of LD for FOSS is examined, and common values identified. The particular contribution made by FOSS to LD infrastructure is discussed, and the importance of reference applications described. An overview is given of the FOSS applications available, divided into design time and run time, with particular reference to LD editors and the CopperCore Learning Design engine.}, keywords = {eLearning, eLearning specification, FOSS, infrastructure, Learning Design}, url = {http://pascal.case.unibz.it/handle/2038/1264}, author = {Griffiths, David and Blat, Josep and Elferink, Ray and Zondergeld, Sara} } @conference {Phadke:2005:PRM:1145319.1145337, title = {Predicting risky modules in open-source software for high-performance computing}, booktitle = {Proceedings of the second international workshop on Software engineering for high performance computing system applications}, series = {SE-HPCS {\textquoteright}05}, year = {2005}, pages = {60{\textendash}64}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {This paper presents the position that software-quality modeling of open-source software for high-performance computing can identify modules that have a high risk of bugs.Given the source code for a recent release, a model can predict which modules are likely to have bugs, based on data from past releases. If a user knows which software modules correspond to functionality of interest, then risks to operations become apparent. If the risks are too great, the user may prefer not to upgrade to the most recent release.Of course, such predictions are never perfect. After release, bugs are discovered. Some bugs are missed by the model, and some predicted errors do not occur. A successful model will be accurate enough for informed management action at the time of the predictions.As evidence for this position, this paper summarizes a case study of the Portable Extensible Toolkit for Scientific Computation (PETSC), which is a mathematical library for high-performance computing. Data was drawn from source-code and configuration management logs. The accuracy of logistic-regression and decision-tree models indicated that the methodology is promising. The case study also illustrated several modeling issues.}, keywords = {C4.5, decision trees, empirical case study, high performance computing, logistic regression, Open-source software, PETSc, software metrics, software quality model, software reliability}, isbn = {1-59593-117-1}, doi = {10.1145/1145319.1145337}, url = {http://doi.acm.org/10.1145/1145319.1145337}, author = {Phadke, Amit A. and Allen, Edward B.} } @conference {Stewart:2005:PAI:1042438.1043100, title = {A Preliminary Analysis of the Influences of Licensing and Organizational Sponsorship on Success in Open Source Projects}, booktitle = {Proceedings of the Proceedings of the 38th Annual Hawaii International Conference on System Sciences - Volume 07}, series = {HICSS {\textquoteright}05}, year = {2005}, note = {"Publicly available data on open source projects registered on the Freshmeat website (www.freshmeat.net) was used to test the hypotheses. Data was collected from each project{\textquoteright}s Freshmeat website at the start and end of an eight month period (March - December 2002)." "We first selected three project categories from which to draw our sample. These were utilities, software development, and games." "Within these categories we further differentiated between new projects, which had been registered on the site within the two weeks prior to our first data collection point and older projects that had been registered more than two weeks prior to our initial data collection." }, month = {2005}, pages = {1-10}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {This paper develops and tests a model of the impact of licensing restrictiveness and organizational sponsorship on the popularity and vitality of open source software (OSS) development projects. Using data gathered from Freshmeat.net and OSS project home pages the main conclusions derived from the analysis are that organizational sponsorship has a positive effect on project popularity by easing user concerns about cost and quality and that license restrictiveness may have a negative effect on popularity by reducing the perceived utility of open source software. Theoretical and practical implications are discussed, and the paper outlines several avenues for future research.}, keywords = {contributors, developers, freshmeat, license analysis, licensing, metadata, popularity, restrictive, users}, isbn = {0-7695-2268-8-7}, doi = {http://dx.doi.org/10.1109/HICSS.2005.38}, url = {http://dx.doi.org/10.1109/HICSS.2005.38}, author = {Stewart, Katherine J. and Ammeter, Anthony P. and Maruping, Likoebe M.} } @conference {Twidale:2005:SBF:1062455.1062468, title = {Silver bullet or fool{\textquoteright}s gold: supporting usability in open source software development}, booktitle = {Proceedings of the 27th international conference on Software engineering}, series = {ICSE {\textquoteright}05}, year = {2005}, pages = {35{\textendash}35}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {At first glance it can look like Open Source Software development violates many, if not all, of the precepts of decades of careful research and teaching in Software Engineering. One could take a classic SE textbook and compare the activities elaborated and advocated in the various chapters with what is actually done in plain sight in the public logs of an OSS project in say SourceForge. For a Professor of Software Engineering this might make for rather depressing reading. Are the principles of SE being rendered obsolete? Has OSS really discovered Brooks{\textquoteright} Silver Bullet? Or is it just a flash in the pan or Fool{\textquoteright}s Gold.In this talk I will mainly look at one aspect of Open Source Development, the {\textquoteright}problem{\textquoteright} of creating usable interfaces, particularly for non-technical end-users. Any approach involves the challenge of how to coordinate distributed collaborative interface analysis and design, given that in conventional software development this is usually done in small teams and almost always face to face. Indeed all the methods in any HCI text just assume same-time same-place work and don{\textquoteright}t map to distributed work, let alone the looser mechanisms of OSS development. Instead what is needed is a form of participatory usability involving the coordination of end users and developers in a constantly evolving redesign process.}, keywords = {course project, education, lifecycle model, pedagogical, software engineering education, software process}, isbn = {1-58113-963-2}, doi = {10.1145/1062455.1062468}, url = {http://doi.acm.org/10.1145/1062455.1062468}, author = {Twidale, Michael} } @conference {Fielding:2005:SAO:1062455.1062474, title = {Software architecture in an open source world}, booktitle = {Proceedings of the 27th international conference on Software engineering}, series = {ICSE {\textquoteright}05}, year = {2005}, pages = {43{\textendash}43}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In spite of the hype and hysteria surrounding open source software development, there is very little that can be said of open source in general. Open source projects range in scope from the miniscule, such as the thousands of non-maintained code dumps left behind at the end of class projects, dissertations, and failed commercial ventures, to the truly international, with thousands of developers collaborating, directly or indirectly, on a common platform. One characteristic that is shared by the largest and most successful open source projects, however, is a software architecture designed to promote anarchic collaboration through extensions while at the same time preserving centralized control over the interfaces. This talk features a survey of the state-of-the-practice in open source development in regards to software architecture, with particular emphasis on the modular extensibility interfaces within several of the most successful projects, including Apache httpd, Eclipse, Mozilla Firefox, Linux kernel, and the World Wide Web (which few people recognize as an open source project in itself). These projects fall under the general category of collaborative open source software development, which emphasizes community aspects of software engineering in order to compensate for the often-volunteer nature of core developers and take advantage of the scalability obtainable through Internet-based virtual organizations.}, keywords = {apache, collaborative open source development, eclipse, extensibility, Firefox, linux, linux kernel, loose coupling, modularity, mozilla, open source, software architecture}, isbn = {1-58113-963-2}, doi = {10.1145/1062455.1062474}, url = {http://doi.acm.org/10.1145/1062455.1062474}, author = {Roy T. Fielding} } @conference {899, title = {Stopping spyware at the gate: a user study of privacy, notice and spyware}, booktitle = {2005 Symposium on Usable Privacy and Security}, year = {2005}, month = {07/2005}, pages = {43-52}, publisher = {Association for Computing Machinery}, organization = {Association for Computing Machinery}, address = {Pittsburgh, PA}, keywords = {agreement,, and, Aspects,, Design,, end, EULA,, Experimentation,, Factors,, Human, Legal, license, notice,, of, privacy,, security, service,, spyware,, terms, ToS,, usability,, user}, isbn = {1-59593-178-3 }, author = {N. Good and Dhamija, R. and J. Grossklags and D. Thaw and Aronowitz, S. and D. Mulligan and J. Konstan} } @conference {782, title = {Towards Supporting Agile Practice Within The Libre Software Paradigm}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {303-304}, abstract = {Individual agile methods have never been practiced as defined, in the same way that Royce{\textquoteright}s waterfall [1] model never reflected actual practice. Instead, practitioners adapted the core principles of these processes in order to suit their needs. Understanding this is key to appreciating the agile mindset. What does exist is a set of principles1 which, when followed loosely, form the agile practices. It is an important part of the agile mentality that the individuals within a project are more important that the process they follow. However, the individual methods do have their own identifying features that make them unique; for example testing must be performed before coding within eXtreme Programming (XP) [2]. However, if practitioners were to apply XP, exactly as Beck describes it, then they are probably not {\textquotedblleft}doing agile{\textquotedblright} as they may not be following the process that suits their needs best. One of the interesting features of the XP method is its requirement of a collocated team. Th...}, keywords = {agile methods, agile practice, extreme programming, libre software, open source, XP}, url = {http://pascal.case.unibz.it/handle/2038/1546}, author = {Adams, Paul and Boldyreff, Cornelia} } @conference {Neamtiu:2005:USC:1083142.1083143, title = {Understanding source code evolution using abstract syntax tree matching}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {2-6}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Mining software repositories at the source code level can provide a greater understanding of how software evolves. We present a tool for quickly comparing the source code of different versions of a C program. The approach is based on partial abstract syntax tree matching, and can track simple changes to global variables, types and functions. These changes can characterize aspects of software evolution useful for answering higher level questions. In particular, we consider how they could be used to inform the design of a dynamic software updating system. We report results based on measurements of various versions of popular open source programs, including BIND, OpenSSH, Apache, Vsftpd and the Linux kernel.}, keywords = {abstract syntax trees, apache, bind, evolution, linux, openssh, software evolution, source code, source code analysis, vsftpd}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083143}, url = {http://doi.acm.org/10.1145/1082983.1083143}, attachments = {https://flosshub.org/sites/flosshub.org/files/2Understanding.pdf}, author = {Neamtiu, Iulian and Foster, Jeffrey S. and Hicks, Michael} } @booklet {Lopez-Fernandez_applyingsocial, title = {Applying Social Network Analysis to the Information in CVS Repositories}, howpublished = {International Workshop on Mining Software Repositories (MSR 2004)}, year = {2004}, pages = {101-105}, abstract = {The huge quantities of data available in the CVS repositories of large, long-lived libre (free, open source) software projects, and the many interrelationships among those data offer opportunities for extracting large amounts of valuable information about their structure, evolution and internal processes. Unfortunately, the sheer volume of that information renders it almost unusable without applying methodologies which highlight the relevant information for a given aspect of the project. In this paper, we propose the use of a well known set of methodologies (social network analysis) for characterizing libre software projects, their evolution over time and their internal structure. In addition, we show how we have applied such methodologies to real cases, and extract some preliminary conclusions from that experience.}, keywords = {apache, complex networks, cvs, gnome, kde, libre software engineering, source code, source code repositories, visualization techniques, vizualization}, attachments = {https://flosshub.org/sites/flosshub.org/files/101ApplyingSocial.pdf}, author = {L{\'o}pez-Fern{\'a}ndez, L. and Gregorio Robles and Jesus M. Gonzalez-Barahona} } @proceedings {1191, title = {Collaboration, Leadership, Control, and Conflict Negotiation in the Netbeans.org Community}, year = {2004}, pages = {48-52}, abstract = {Large open source software development communities are quickly learning that, to be successful, they must integrate efforts not only among the organizations investing developers within the community and unaffiliated volunteer contributors, but also negotiate relationships with external groups hoping to sway the social and technical direction of the community and its products. Leadership and control sharing across organizations and individuals in and between communities are common sources of conflict. Such conflict often leads to breakdowns in collaboration. This paper seeks to explore the negotiation of these conflicts, collaborative efforts, and leadership and control structures in the Netbeans.org community.}, keywords = {conflict, leadership, netbeans}, attachments = {https://flosshub.org/sites/flosshub.org/files/jensen_0.pdf}, author = {Chris Jensen and Walt Scacchi} } @conference {Xinyi04fourinteresting, title = {Four Interesting Ways in Which History Can Teach Us About Software}, booktitle = {Proceedings of the 2004 international workshop on Mining software repositories - MSR {\textquoteright}04}, year = {2004}, month = {05/2004}, pages = {58-62}, abstract = {In this position paper, we outline four kinds of studies that we have undertaken in trying to understand various aspects of a software system{\textquoteright}s evolutionary history. In each instance, the studies have involved detailed examination of real software systems based on {\textquotedblleft}facts{\textquotedblright} extracted from various kinds of source artifact repositories, as well as the development of accompanying tools to aid in the extraction, abstraction, and comprehension processes. We briefly discuss the goals, results, and methodology of each approach.}, keywords = {ant, apache, change analysis, clone, clone detection, cvs, evolution, gcc, growth, kepler, linux, midworld, mycore, postgresql, source code, version control}, attachments = {https://flosshub.org/sites/flosshub.org/files/58FourInterestingWays.pdf}, author = {Michael Godfrey and Xinyi Dong and Cory Kapser and Lijie Zou} } @conference {1105, title = {Free \& Open Source Software Creation and {\textquoteleft}the Economy of Regard{\textquoteright}}, booktitle = {Third EPIP Workshop}, year = {2004}, month = {04/2004}, keywords = {linux, linux kernel, scm, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/DalleDavidGhosh\%20Wolak.pdf}, author = {Jean-Michel Dalle and Paul A. David and Rishab Ayer Ghosh and Frank A. Wolak} } @proceedings {1196, title = {In the network: Distributed control in Gentoo Linux}, year = {2004}, abstract = {This position paper reports on the findings of an empirical pilot study of Gentoo Linux. Gentoo Linux is an open source Linux distribution developed by a geographically distributed community of volunteers. The reported findings are based on the analysis of a specific episode using actor network theory. With basis in the analysis, it is argued that control in this specific episode can be interpreted as both distributed and local at the same time. Control here being the power to define a problem and make the decision about the appropriate solution to the problem defined. Control, it is argued, is distributed in that it is the function of reciprocal influence among several human and non-human actors. Furthermore, it is argued that control can be interpreted as not inherent in organizational structures or hierarchies, but locally embedded among actors in the decision making process.}, keywords = {gentoo, linux}, attachments = {https://flosshub.org/sites/flosshub.org/files/osterlie77-82.pdf}, author = {{\O}sterlie, T.} } @proceedings {124, title = {Membership dynamics and network stability in the open-source community: the ising perspective}, year = {2004}, note = {"simulations with the empirical network data that were collected from two actual OSS communities, Linux and Hypermail." "we initially downloaded nearly 100,000 archived (between 1997 and 2003) LINUX Kernel and Hypermail newsgroup messages posted in a UNIX mailbox format" "Specific information was obtained regarding the characteristics of these two OSS communities, including the size, the number of average connections per participant, and the hierarchy of each community."}, abstract = {In this paper, we address the following two questions: (1)How does a participant{\textquoteright}s membership decision affect the others (neighbors) with whom he has collaborated over an extended period of time in an open source software (OSS) network? (2) To what extent do network characteristics (i.e, size and connectivity) mediate the impact of external factors on the OSS participants{\textquoteright} dynamic membership decisions and hence the stability of the network? From the Ising perspective, we present fresh theoretical insight into the dynamic and reciprocal membership relations between OSS participants. We also performed simulations based on empirical data that were collected from two actual OSS communities. Some of the key findings include that (1) membership herding is highly present when the external force is weak, but decreases significantly when the force increases, (2) the propensity for membership herding is most likely to be seen in a large network with a random connectivity, and (3) for large networks, at low external force a random connectivity will perform better than a scale-free counterpart in terms of the network strength. However, as the temperature (external force) increases, the reverse phenomenon is observed. In addition, the scale-free connectivity appears to be less volatile than with the random connectivity in response to the increase in the temperature. We conclude with several implications that may be of significance to OSS stakeholders.}, keywords = {email, email archive, hypermail, linux, mailing list, membership, membership herding, newsgroup, open source, participants, social network analysis, stakeholders, team size}, attachments = {https://flosshub.org/sites/flosshub.org/files/OhJeon.pdf}, author = {Oh, Wonseok and Jeon, Sangyong} } @conference {Germ04b, title = {Mining CVS repositories, the softChange experience}, booktitle = {Proc. Int{\textquoteright}l Workshop on Mining Software Repositories ({MSR})}, year = {2004}, note = {"Mailing lists. Mailing lists are an important source of information about the evolution of the project. We currently correlate MRs [modification requests] to mail messages by using the author and the date attributes of both the MR and the message."}, pages = {17{\textendash}21}, abstract = {CVS logs are a rich source of software trails (information left behind by the contributors to the development process, usually in the forms of logs). This paper describes how softChange extracts these trails, and enhances them. This paper also addresses some challenges that CVS fact extraction poses to researchers.}, keywords = {bugzilla, cvs, email archives, log files, logs, softchange}, attachments = {https://flosshub.org/sites/flosshub.org/files/17MiningCVS.pdf}, author = {German, Daniel} } @proceedings {1197, title = {Open Source and Closed Source Software Development Methodologies}, year = {2004}, pages = {105-109}, abstract = {Open source software development represents a fundamentally new concept in the field of software engineering. Open source development and delivery occurs over the Internet. Developers are not confined to a geographic area. They work voluntarily on a project of their choice. As new requirements emerge, the software is enhanced by the user/developers. In this paper we show a comparative study of open source and closed source software development approaches and present a software life cycle model for open source software development.}, keywords = {life cycle, lifecycle}, attachments = {https://flosshub.org/sites/flosshub.org/files/potdar106-110.pdf}, author = {Potdar, V. and Chang, E.} } @article {Chen:2004:OCL:990374.990391, title = {Open-Source Change Logs}, journal = {Empirical Softw. Engg.}, volume = {9}, year = {2004}, note = {"We decided to compare actual differences in the source code with entries in the ChangeLog file. We used lxr, the Linux cross-referencing tool..., to determine the precise differences between two successive software versions. We then compared these differences with the records in the ChangeLog file to check the completeness of the ChangeLog file." }, month = {September}, pages = {197{\textendash}210}, publisher = {Kluwer Academic Publishers}, address = {Hingham, MA, USA}, abstract = {A recent editorial in Empirical Software Engineering suggested that open-source software projects offer a great deal of data that can be used for experimentation. These data not only include source code, but also artifacts such as defect reports and update logs. A common type of update log that experimenters may wish to investigate is the ChangeLog, which lists changes and the reasons for which they were made. ChangeLog files are created to support the development of software rather than for the needs of researchers, so questions need to be asked about the limitations of using them to support research. This paper presents evidence that the ChangeLog files provided at three open-source web sites were incomplete. We examined at least three ChangeLog files for each of three different open-source software products, namely, GNUJSP, GCC-g++, and Jikes. We developed a method for counting changes that ensures that, as far as possible, each individual ChangeLog entry is treated as a single change. For each ChangeLog file, we compared the actual changes in the source code to the entries in the ChangeLog file and discovered significant omissions. For example, using our change-counting method, only 35 of the 93 changes in version 1.11 of Jikes appear in the ChangeLog file{\textemdash}that is, over 62\% of the changes were not recorded there. The percentage of omissions we found ranged from 3.7 to 78.6\%. These are significant omissions that should be taken into account when using ChangeLog files for research. Before using ChangeLog files as a basis for research into the development and maintenance of open-source software, experimenters should carefully check for omissions and inaccuracies.}, keywords = {change log, gcc, GCC-g, GNUJSP, Jikes, log files, Open-source software, source code}, issn = {1382-3256}, doi = {10.1023/B:EMSE.0000027779.70556.d0}, url = {http://portal.acm.org/citation.cfm?id=990374.990391}, attachments = {https://flosshub.org/sites/flosshub.org/files/chen.pdf}, author = {Chen, Kai and Schach, Stephen R. and Yu, Liguo and Offutt, Jeff and Heller, Gillian Z.} } @article {1113, title = {Release criteria for the Linux kernel}, journal = {First Monday}, volume = {9}, number = {4}, year = {2004}, note = {"The first was to examine the change logs [10, 11, 12] that are submitted with the public release of a version of the kernel on www.kernel.org [1]. Observations were made on the release nomenclature used in both the 2.4.x series and the 2.5.x (which later became the 2.6.x series). The number of patches incorporated into each release and the elapsed time between releases was recorded. The second approach was to review the Linux kernel mailing list [2] for any evidence of criteria for performing a particular release including any metrics used, tests carried out or bug databases referenced."}, month = {2004}, abstract = {Before software is released to its users, software developers will ensure that the software has met specified functional and technical requirements and that it is as free from bugs as possible. Users should be able to have a high degree of confidence that the software will perform as specified and without fault. With open source development practices such as those employed on the Linux kernel project, there are no detailed specifications and little formal testing processes. The questions, then, are what criteria, if any, are used in determining the suitability for release of a particular version of this software, and do users have any degree of confidence in the quality of that release of software? These questions were examined in this study using information from the Linux Kernel Mailing List (LKML), the primary forum for discussion of development issues of the Linux kernel, and change logs submitted with version releases of the Linux kernel. It was determined that very little planning is employed in determining the release of a particular version of the software and that a version of the software is essentially a collection of source patches released at regular intervals with some stabilisation of the code base before each release. Very little attempt is made to verify that the code is bug free, and consequently, the code released is of a largely unknown level of quality. End users are left to decide for themselves the suitability and robustness of a particular version of the software.}, keywords = {bugs, change log, linux, linux kernel, log files, mailing list, patches, quality, release history}, url = {http://firstmonday.org/htbin/cgiwrap/bin/ojs/index.php/fm/article/view/1136/1056}, attachments = {https://flosshub.org/sites/flosshub.org/files/Glance.pdf}, author = {Glance, D.G.} } @article {DBLP:journals/electronicmarkets/VemuriB04, title = {Will the Open Source Movement Survive a Litigious Society?}, journal = {Electronic Markets}, volume = {14}, number = {2}, year = {2004}, pages = {114-123}, abstract = {Since no one is willing to undertake costly research and development to create innovation, incentives in the form of patents were instituted to motivate R\&D. In software development, contrary to economic intuition, open source software has emerged as a viable alternative source of innovation. The patenting system has performed reasonably well in enhancing many other technologies. Since the mid-1990s patenting of software and business methods is increasingly accepted in the United States. The legitimacy of many of these new patents is subject to controversy and debate. In this paper we examine the trend, rate of litigation and disposition of US patents in the US Federal Courts. We find that litigation rates of software and business method patents is four times that of all other patents and is increasing. A majority of patent litigations are not won by the perpetrator of the lawsuits. The open source software community is not immune to heightened patent litigations. Since software development is incremental, the paths of OSS and commercial development are entwined. The spillover of patent litigation into OSS may have disastrous consequences: It may increase the {\textquoteright}cost{\textquoteright} of OSS, dissuade volunteer developers and make OSS less attractive to users. }, keywords = {courts, INNOVATION, lawsuit, litigation, patents, software patents}, author = {Vijay K. Vemuri and Vince Bertone} } @conference {German03automatingthe, title = {Automating the measurement of open source projects}, booktitle = {Proceedings of the 3rd Workshop on Open Source Software Engineering}, year = {2003}, note = {"We have chosen to use Evolution to illustrate some of the capabilities of SoftChange. We focus on the data provided by CVS logs and the CVS commit mailing list. Our data includes changes to the CVS repository from April 1998 to January 2003."}, pages = {63{\textendash}67}, abstract = {The proliferation of open source projects raises a number of vital economic, social, and software engineering questions that are subject of intense research. Based on experience analyzing numerous open source and commercial projects we propose a set of tools to support extraction and validation of software project data. Such tools would streamline empirical investigation of open source projects and make it possible to test existing and new theories about the nature of open source projects. Our soft- ware includes tools to extract and summarize information from mailing lists, CVS logs, ChangeLog files, and defect tracking databases. More importantly, it cross-links records from various data sources and identifies all contributors for a software change. We illustrate some of the capabilities by analyzing data from Ximian Evolution project.}, keywords = {bug reports, bug tracking, changelog, cvs, defects, evolution, log files, logs, mailing list, scm, softchange, source code, ximian, ximian evolution}, attachments = {https://flosshub.org/sites/flosshub.org/files/germanMockus2003.pdf}, author = {German, Daniel and Audris Mockus} } @conference {DBLP:conf/ecis/TsiavosH03, title = {Beyond good and evil: why open source development for peer-to-peer networks does not necessarily equal to an open society is as imbalanced as copyright law and definitely is not going to make you a better person}, booktitle = {European Conference on Information Systems (ECIS 2003)}, year = {2003}, note = {"In a first stage we collected data related to the development of the Gnutella protocol. The sources included: web sites that were used for hosting forums and file repositories related to the development of the protocol that could be either archived or still operational; messages posted on discussion groups, forums and newsgroups; the design documents of the Gnutella protocol. In a second stage we gathered material related to the Limewire application. The sources included: operational and archived web sites having been used for the development of the application; applications such as Concurrent Version Systems (CVS) or Bug reporting tools (such as Issuezila), design and implementation documentation and relevant press reports. The data gathered covered a time span from early 2000 to late November 2002." "Other sources informed our research and also acted, at times of uncertainty, as forms of triangulation and verification (Lee 1991). These sources include websites such as Slashdot.org and WiredNews; IRC-mediated communications and private messages exchanged between the various developers."}, month = {2003}, abstract = {This paper interrogates the claims that open source development is an ideal form of regulatory development. We begin by presenting the literature that offers a framework of modalities of regulation where code, along with laws, markets, and norms shape and influence individual action. Within this framework, it is argued that for an Open Society we need Open Code. We present the processes through which the Gnutella protocol and the Limewire application are developed by deconstructing the mechanisms of participation and contribution of the individual developers. The techniques of monitoring, modularization and filtering that we identify appear to be inconsistent with open society promises. Instead we suggest a different framing, that of creating nests of interests, whose creators can find refuge from inhabitants of other nests. From that perspective, we suggest that we should stop referring to the war between Copyright and peer-to-peer networks as the battle between good and evil.}, keywords = {bug reports, cvs, design documents, discussion, discussion forums, documentation, forums, gnutella, issuezilla, limewire, web site}, attachments = {https://flosshub.org/sites/flosshub.org/files/tsiavosHosein.pdf}, author = {Prodromos Tsiavos and Ian Hosein} } @article {flosswp103, title = {Contributing to the common pool resources in Open Source software. A comparison between individuals and firms}, year = {2003}, month = {August}, abstract = {This paper studies the contributions to Open Source projects of software firms. Our goal is to analyse whether they follow the same regularities that characterize the behaviour of individual programmers. An exhaustive empirical analysis is carried out using data on project membership, project coordination and contribution efforts of 146 Italian firms that do business with Open Source software. We follow a meta-analytic approach comparing our findings with the results of the surveys conducted on Free Software programmers. Moreover, the availability of the data gathered by Hertel et al. (2003) on 141 developers of the Linux kernel will allow direct comparisons between the two sets.}, keywords = {developers, linux, linux kernel, Survey}, attachments = {https://flosshub.org/sites/flosshub.org/files/bnaccorsirossidevelopers.pdf}, author = {Andrea Bonaccorsi} } @article {1099, title = {Evidences in the evolution of OS projects through Changelog Analyses}, journal = {Proceedings of the 3rd ICSE Workshop on Open Source}, year = {2003}, note = {"In this study we concentrate on a very large sample (406 projects) selected randomly from an OS portal[20]" (freshmeat) "We define three clusters of projects: {\textquoteright}large{\textquoteright} projects as long as they are based on more than 1000KB(40KLOC)..."}, pages = {19-24}, abstract = {Most empirical studies about Open Source (OS) projects or products are vertical and usually deal with the flagship, successful projects. There is a substantial lack of horizontal studies to shed light on the whole population of projects, including failures. This paper presents a horizontal study aimed at characterizing OS projects. We analyze a sample of around 400 projects from a popular OS project repository. Each project is characterized by a number of attributes. We analyze these attributes statically and over time. The main results show that few projects are capable of attracting a meaningful community of developers. The majority of projects is made by few (in many cases one) person with a very slow pace of evolution. We then try to observe how many projects count on a substantial number of developers, and analyze those projects more deeply. The goal is to achieve a better insight in the dynamics of open source development. The initial results of this analysis, especially growth in code size and tendency to stability in modularity, seem to be in line with traditional close source development.}, keywords = {classification, freshmeat, loc, modularity, repository, size, sloc, source code}, url = {http://hdl.handle.net/10552/1037}, attachments = {https://flosshub.org/sites/flosshub.org/files/capiluppi2003.pdf}, author = {Capiluppi, Andrea} } @article {2003, title = {From a Firm-Based to a Community-Based Model of Knowledge Creation: The Case of the Linux Kernel Development}, journal = {Organization Science}, volume = {14}, number = {6}, year = {2003}, note = {"we study the Linux development community mainly by analyzing the artifacts that the Linux developers have produced. A key output of knowledge creation activities is the artifacts. The most important artifact, of course, is the Linux operating system source code." "Along with the source code, a "Credits" text file and a "MAINTAINERS" text file are distributed to the users." "An equally important artifact is the development activities archived in the Linux-kernel mailing list"..."Using the weekly Linux-kernel email archive for years 1995 to 2000 as a key source of data, we focus on people who have sent at least one email to the Linux-kernel mailing list. " "In addition, we examine the developers{\textquoteright} demographic distributions, working patterns, and motivations by analyzing the raw data from an on-line survey"}, pages = {pp. 633-649}, publisher = {INFORMS}, abstract = {We propose a new model of knowledge creation in purposeful, loosely coordinated, distributed systems, as an alternative to a firm-based one. Specifically, using the case of the Linux kernel development project, we build a model of community-based, evolutionary knowledge creation to study how thousands of talented volunteers, dispersed across organizational and geographical boundaries, collaborate via the Internet to produce a knowledge-intensive, innovative product of high quality. By comparing and contrasting the Linux model with the traditional/commercial model of software development and firm-based knowledge creation efforts, we show how the proposed model of knowledge creation expands beyond the boundary of the firm. Our model suggests that the product development process can be effectively organized as an evolutionary process of learning driven by criticism and error correction. We conclude by offering some theoretical implications of our community-based model of knowledge creation for the literature of organizational learning, community life, and the uses of knowledge in society.}, keywords = {credits, developers, email, email archives, knowledge creation, linux kernel, mailing list, maintainers, scm, source code, Survey, Volunteers}, issn = {10477039}, url = {http://www.jstor.org/stable/4135125}, author = {Lee, Gwendolyn K. and Cole, Robert E.} } @article {116, title = {How open source software works: "free" user-to-user assistance}, journal = {Research Policy}, volume = {32}, number = {6}, year = {2003}, note = {"The empirical data we collected for study was related to postings to the Apache Usenet help forum, CIWS-U (comp.infosystems.www.servers.unix). " "For data regarding long-term participation in CIWS-U{\textemdash}who participated, long-term trends, etc.{\textemdash}we examined Usenet posting patterns from 1996 to 1999." "The Usenet log data was obtained from a World Wide Web service called Deja.com (since acquired by Google)" survey}, pages = {923-943}, abstract = {Research into free and open source software development projects has so far largely focused on how the major tasks of software development are organized and motivated. But a complete project requires the execution of "mundane but necessary" tasks as well. In this paper, we explore how the mundane but necessary task of field support is organized in the case of Apache web server software, and why some project participants are motivated to provide this service gratis to others. We find that the Apache field support system functions effectively. We also find that, when we partition the help system into its component tasks, 98\% of the effort expended by information providers in fact returns direct learning benefits to those providers. This finding considerably reduces the puzzle of why information providers are willing to perform this task "for free." Implications are discussed.}, keywords = {apache, help, logs, MOTIVATION, participants, Survey, usenet}, doi = {http://dx.doi.org/10.1016/S0048-7333(02)00095-1}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.110.8172\&rep=rep1\&type=pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/lakhani2003.pdf}, author = {Karim R Lakhani and von Hippel, Eric} } @article {flosswp124, title = {Maintainability of the Linux Kernel}, journal = {Proceedings of the 2nd Workshop on Open Source Software Engineering ICSE2002}, year = {2003}, note = {"We have examined 365 versions of Linux. For every version, we counted the number of instances of common (global) coupling between each of the 17 kernel modules and all the other modules in that version of Linux."}, month = {October}, abstract = {We have examined 365 versions of Linux. For every version, we counted the number of instances of common (global) coupling between each of the 17 kernel modules and all the other modules in that version of Linux. We found that the number of instances of common coupling grows exponentially with version number. This result is significant at the 99.99\% level, and no additional variables are needed to explain this increase. We conclude that, unless Linux is restructured with a bare minimum of common coupling, the dependencies induced by common coupling will, at some future date, make Linux exceedingly hard to maintain without inducing regression faults.}, keywords = {coupling, kernel, linux, linux kernel, modules, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/linux-maint_0.pdf}, author = {Schach, Stephen R. and Jin, B. and Wright, D.R.} } @article {123, title = {Open source software development and Lotka{\textquoteright}s Law: Bibliometric patterns in programming}, journal = {Journal of the American Society for Information Science and Technology}, volume = {54}, number = {2}, year = {2003}, note = {"Two lead- ing metadata repositories are the Linux Software Map (LSM) and Sourceforge, both of which were used for this research." "For this article, we examined data listing the number of registered developers for each software project hosted by Sourceforge." "The data we obtained from the LSM collection were taken mainly from the Author: field of LSM records. The Author: field in LSM records gives us the ability to track the author of record for a software package. LSM metadata also include a list of maintainers, primary software distribution sites, date of update and other items." "The data we obtained from Sourceforge consist of a list of developer ID numbers, followed by the number of projects on which the individual is listed as a developer, then the number of projects on which the individual is listed as an administrator. These data were provided for all 33,892 individuals registered to work on projects hosted by Sourceforge in July 2001."}, pages = {169-178}, abstract = {This research applies Lotka{\textquoteright}s Law to metadata on open source software development. Lotka{\textquoteright}s Law predicts the proportion of authors at different levels of productivity. Open source software development harnesses the creativity of thousands of programmers worldwide, is important to the progress of the Internet and many other computing environments, and yet has not been widely researched. We examine metadata from the Linux Software Map (LSM), which documents many open source projects, and Sourceforge, one of the largest resources for open source developers. Authoring patterns found are comparable to prior studies of Lotka{\textquoteright}s Law for scientific and scholarly publishing. Lotka{\textquoteright}s Law was found to be effective in understanding software development productivity patterns, and offer promise in predicting aggregate behavior of open source developers.}, keywords = {developers, linux, linux software map, lsm, sourceforge, team size}, doi = {10.1002/asi.10177}, author = {Newby, G. B. and Greenberg, J. and Jones, P.} } @conference {Wynn03organizationalstructure, title = {Organizational Structure of Open Source Projects: A Life Cycle Approach}, booktitle = {Proceedings of 7th Annual Conference of the Southern Association for Information Systems}, year = {2003}, note = {"The three graphs in Figure 2 below were taken from smoothed download counts for existing open source projects on Sourceforge.net" "A random sample of 150 open source projects will be taken from data provided by Sourceforge.net. Each project will be evaluated to determine their current life cycle stage (where possible) using download counts. Next, the project admins, developers, and several identifiable users for each evaluated project will be contacted via email to request completing a brief questionnaire to measure the current focus of the project, formal structure, division of labor, leader role, coordination, level of commitment, user success, and developer success. "}, abstract = {The structure of open source project communities is discussed in relation to the organizational life cycle. In lieu of sales figures, the download counts for each project are used to identify the life cycle stage of a random sample of open source projects. A research model is proposed that attempts to measure the fit between the life cycle stage and the specific organizational characteristics of these projects (focus, division of labor, role of the leader, level of commitment, and coordination/control) as an indicator of the success of a project as measured by the satisfaction and involvement of both developers and users.}, keywords = {division of labor, downloads, growth, interview, leadership, life cycle, lifecycle, project success, roles, sourceforge, Survey}, attachments = {https://flosshub.org/sites/flosshub.org/files/wynn2004.pdf}, author = {Donald E. Wynn} } @conference {1248, title = {Supporting Distributed and Decentralized Projects: Drawing Lessons from the Open Source Community}, booktitle = {1st Workshop on Open Source in an Industrial Context}, year = {2003}, note = {"We begin the remainder of the paper with discussion of a survey of open source projects, showing similarities that have arisen in tool usage"}, month = {10/2003}, abstract = {Open source projects are typically organized in a distributed and decentralized manner. These factors strongly determine the processes followed and constrain the types of tools that can be utilized. This paper explores how distribution and decentralization have affected processes and tools in existing open source projects with the goals of summarizing the lessons learned and identifying opportunities for improving both. Issues considered include decision-making, accountability, communication, awareness, rationale, managing source code, testing, and release management.}, keywords = {abiword, apache, debian, freebsd, kde, linux, mozilla, mysql, perl, PHP, postgresql, python, subversion, tomcat, tools}, attachments = {https://flosshub.org/sites/flosshub.org/files/erenkrantz2003.pdf}, author = {Erenkrantz, J. and Taylor, R.N.} } @conference {Ye:2003:TUM:776816.776867, title = {Toward an understanding of the motivation Open Source Software developers}, booktitle = {Proceedings of the 25th International Conference on Software Engineering}, series = {ICSE {\textquoteright}03}, year = {2003}, note = {"Analyzing the emails sent to the mailing fist is one way of understanding the structure of the community." "Table 2 displays the number of code contributions made by members to the GIMP system and the defined roles of those contributing members. We counted the number of contributions made by each person by analyzing the change log of the system."}, pages = {419{\textendash}429}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {An Open Source Software (OSS) project is unlikely to be successful unless there is an accompanied community that provides the platform for developers and users to collaborate. Members of such communities are volunteers whose motivation to participate and contribute is of essential importance to the success of OSS projects. In this paper, we aim to create an understanding of what motivates people to participate in OSS communities. We theorize that learning is one of the motivational forces. Our theory is grounded in the learning theory of Legitimate Peripheral Participation, and is supported by analyzing the social structure of OSS communities and the co-evolution between OSS systems and communities. We also discuss practical implications of our theory for creating and maintaining sustainable OSS communities as well as for software engineering research and education.}, keywords = {change log, COMMUNITY, contributions, contributors, developers, email, email archives, evolution, gimp, log files, mailing list, roles, source code}, isbn = {0-7695-1877-X}, url = {http://portal.acm.org/citation.cfm?id=776816.776867}, attachments = {https://flosshub.org/sites/flosshub.org/files/YeKishida.pdf}, author = {Ye, Yunwen and Kishida, Kouichi} } @article {65, title = {Analyzing cloning evolution in the Linux kernel}, journal = {Information and Software Technology}, volume = {44}, number = {13}, year = {2002}, pages = {755-765}, abstract = {Identifying code duplication in large multi-platform software systems is a challenging problem. This is due to a variety of reasons including the presence of high-level programming languages and structures interleaved with hardware-dependent low-level resources and assembler code, the use of GUI-based configuration scripts generating commands to compile the system, and the extremely high number of possible different configurations. This paper studies the extent and the evolution of code duplications in the Linux kernel. Linux is a large, multi-platform software system; it is based on the Open Source concept, and so there are no obstacles in discussing its implementation. In addition, it is decidedly too large to be examined manually: the current Linux kernel release (2.4.18) is about three million LOCs. Nineteen releases, from 2.4.0 to 2.4.18, were processed and analyzed, identifying code duplication among Linux subsystems by means of a metric-based approach. The obtained results support the hypothesis that the Linux system does not contain a relevant fraction of code duplication. Furthermore, code duplication tends to remain stable across releases, thus suggesting a fairly stable structure, evolving smoothly without any evidence of degradation. (C) 2002 Elsevier Science B.V. All rights reserved.}, keywords = {cvs, kernel, lines of code, linux, loc, project success, source code}, url = {web.soccerlab.polymtl.ca/~antoniol/publications/.../infsoft2002.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/infsoft2002.pdf}, author = {Antoniol, G. and Villano, U. and Merlo, E. and Di Penta, M.} } @article {Stamelos02codequality, title = {Code quality analysis in open source software development}, journal = {Information Systems Journal}, volume = {12}, year = {2002}, note = {"For our case study, we have used Logiscope{\quotesinglbase} (Telelogic, 2000), a comprehensive set of tools able to perform, automatically, code measurement and comparison with user-defined programming standards" "Using Logiscope, we examined a sample of 100 C programs found in the SUSE Linux 6.0 release." metrics collected: number of statements cyclomatic complexity maximum levels number of paths unconditional jumps comment frequency vocabulary frequency program length average size number of inputs/outputs}, pages = {43{\textendash}60}, abstract = {Proponents of open source style software development claim that better software is produced using this model compared with the traditional closed model. However, there is little empirical evidence in support of these claims. In this paper, we present the results of a pilot case study aiming: (a) to understand the implications of structural quality; and (b) to figure out the benefits of structural quality analysis of the code delivered by open source style development. To this end, we have measured quality characteristics of 100 applications written for Linux, using a software measurement tool, and compared the results with the industrial standard that is proposed by the tool. Another target of this case study was to investigate the issue of modularity in open source as this characteristic is being considered crucial by the proponents of open source for this type of software development. We have empirically assessed the relationship between the size of the application components and the delivered quality measured through user satisfaction. We have determined that, up to a certain extent, the average component size of an application is negatively related to the user satisfaction for this application.}, keywords = {C, Code quality characteristics, functions, linux, metrics, open source development, software measurement, structural code analysis, Suse, user satisfaction}, author = {Ioannis Stamelos and Lefteris Angelis and Apostolos Oikonomou and Georgios L. Bleris} } @article {1115, title = {De-Bugging open source software licensing}, journal = {University of Pittsburgh Law Review}, volume = {64}, number = {1}, year = {2002}, month = {2002}, pages = {75-103}, keywords = {licensing}, attachments = {https://flosshub.org/sites/flosshub.org/files/debugopensource.pdf}, author = {Gomulkiewicz, R.W.} } @conference {stewart2002an-explorat, title = {An Exploratory Study of Factors Influencing the Level of Vitality and Popularity of Open Source Projects}, booktitle = {ICIS 2002. Proceedings of International Conference on Information Systems 2002}, year = {2002}, note = {"We are currently tracking publicly available data on 240 open source projects registered on the freshmeat Website." "First, we randomly selected a total of 120 projects from the utilities, software development, and games and entertainment areas. We then selected 120 projects from these forums that had been registered on the site during the two weeks prior to the start of our data collection effort."}, month = {2002}, pages = {1-5}, abstract = {In this research, we ask the question: What differentiates successful from unsuccessful open source software projects? Using a sample of 240 open source projects, we examine how organizational sponsorship, target audience (developer versus end user), license choice, and development status interact over time to influence the extent to which open source software projects attract user attention and developer activity.}, keywords = {activity, audience, developers, freshmeat, license analysis, licenses, organizational sponsorship, project success, roles, status, target audience, users}, author = {Stewart, Katherine J. and Ammeter, Tony} } @article {1095, title = {High Quality and Open Source Software Practices}, journal = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, note = {"We examined the publicly visible portions of these projects from November 2001 through March 2002, ...The SLOC counts for the predominate languages are shown}, month = {2002}, abstract = {Surveys suggest that, according to various metrics, the quality and dependability of today{\textquoteright}s open source software is roughly on par with commercial and government developed software. What are the prospects for advancing to much higher levels of quality in open source software? More specifically, what attributes must be possessed by quality-related interventions for them to be feasibly adoptable in open source practice? In order to identify some of these attributes, we conducted a preliminary survey of the quality practices of a number of successful open source projects. We focus, in particular, on attributes related to adoptability by the open source practitioner community.}, keywords = {apache, bug report, bug tracker, bug tracking system, feature requests, gcc, gnome, kde, lines of code, linux, loc, mozilla, netbeans, perl, position paper, python, sloc, source code, Survey, tomcat, xfree86}, attachments = {https://flosshub.org/sites/flosshub.org/files/HalloranScherlis.pdf}, author = {T. Halloran and W. Scherlis} } @conference {1163, title = {On the Nonmaintainability of Open-Source Software}, booktitle = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, note = {"We downloaded 365 versions of Linux. For each version in turn, we examined the 17 kernel modules and counted the number of lines of code in each module. Then we counted the number of instances of common (global) coupling between each of the kernel modules and all the other modules in that version of Linux. We obtained two primary results."}, keywords = {Common coupling, coupling, lines of code, linux, linux kernel, loc, metrics}, attachments = {https://flosshub.org/sites/flosshub.org/files/SchachOffutt.pdf}, author = {Schach, Stephen R. and Offutt, Jeff} } @article {flosswp63, title = {The Scope of Open Source Licensing}, journal = {Journal of Law, Economics and Organization}, volume = {21}, number = {1}, year = {2002}, month = {2005}, pages = {20-56}, abstract = {This paper is an initial exploration of the determinants of open source license choice. It first enumerates the various considerations that should figure into the licensor{\textquoteright}s choice of contractual terms, in particular highlighting how the decision is shaped not just by the preferences of the licensor itself, but also by that of the community of developers. The paper then presents an empirical analysis of the determinants of license choice using the SourceForge database, a compilation of nearly 40,000 open source projects. Projects geared toward end-users tend to have restrictive licenses, while those oriented toward developers are less likely to do so. Projects that are designed to run on commercial operating systems and those geared towards the Internet are less likely to have restrictive licenses. Finally, projects that are likely to be attractive to consumers such as games are more likely to have restrictive licenses.}, keywords = {developers, license, licenses, permissive, restrictive, sourceforge}, attachments = {https://flosshub.org/sites/flosshub.org/files/lernertirole2.pdf}, author = {Josh Lerner and Jean Tirole} } @conference {1154, title = {Version Control: A Case Study in the Challenges and Opportunities for Open Source Software Development}, booktitle = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, abstract = {The growth of the worldwide open source development effort, driven in part by the recent entrance of large corporations into the open source arena, offers new opportunities to improve the software engineering tools available for that effort. Indeed, the increasing difficulty of managing large open source projects, as well as that of integrating related efforts into new programming environments, represents a challenge that must be met if the rapid growth of open source software is to continue. This position paper addresses these issues in the context of software version control.}, keywords = {cvs, kernel, linux, linux kernel, version control}, attachments = {https://flosshub.org/sites/flosshub.org/files/ChuCarrollShieldsWright.pdf}, author = {Chu-Carroll, M.C. and Sheilds, D. and Wright, J.} } @article {54, title = {Code, Culture and Cash: The Fading Altruism of Open Source Development}, journal = {First Monday}, volume = {6}, number = {12}, year = {2001}, note = {"I collected information on the country of residence for key contributors to the two projects. In the case of Linux, I relied on information located in the CREDITS file of all major kernel releases (from version 1.0 to version 2.4.9) [22]. For Gnome, I gathered developer-contact information from the project{\textquoteright}s web-site. Where information on the home-country of developers was not explicitly available, I performed private research to ascertain said information, or - in the last-case scenario - trusted information in the provided e-mail address of developers to infer home-country from domain ownership [23]. In the case of Linux, to avoid bias that might be introduced over time as developers migrated internationally, developers are continually counted as residents of the countries they were associated with when their names first entered the CREDITS file"}, abstract = {The nexus of open source development appears to have shifted to Europe over the last ten years. This paper explains why this trend undermines cultural arguments about "hacker ethics" and "post-scarcity" gift economies. It suggests that classical economic theory offers a more succinct explanation for the peculiar international distribution of open source development: hacking rises and falls inversely to its opportunity cost. This finding throws doubt on the Schumpeterian assumption that the efficiency of industrial systems can be measured without reference to the social institutions that bind them.}, keywords = {credits, email address, european, geography, gnome, linux}, url = {http://131.193.153.231/www/issues/issue6_12/lancashire/index.html}, attachments = {https://flosshub.org/sites/flosshub.org/files/Lancashire.pdf}, author = {David Lancashire} } @proceedings {140, title = {An exploratory study of ideology and trust in open source development groups}, year = {2001}, note = {"To do this, we scanned information in the public domain to develop a preliminary understanding of the context. We then identified 48 OS projects using www.sourceforge.net, an on-line open source meeting place that provides information on open source efforts and hosts code repositories. Projects were selected to represent a variety of types of software, licenses, and group sizes. We contacted administrators or project leaders and asked them to complete open-ended questionnaires regarding their experience and views on open source development."}, abstract = {Open source (OS) software development has been the subject of heightened interest among organizational scholars because of the novel social coordination practices that signal a departure from traditional proprietary software development. We propose that trust among group members in open source development groups (OSDGs) plays a key role in facilitating their success. Trust is important in this context because of the risk of opportunistic behavior by other members who volunteers may not have met and may never expect to meet, as well as a lack of explicit market contracts or common organizational affiliation. The open source community is differentiated by a coherent ideology that emphasizes a distinct set of interrelated norms, beliefs, and values. These serve to create incentives for open source practices that eschew conventional transactional norms in favor of a gift culture and a focus on reputations. In this study, we primarily examine the role of the shared ideology in enabling the development of affective and cognitive trust in OSDGs. We further examine how this trust leads to desired outcomes - group efficacy and effectiveness. The study is based on exploratory interviews, examination of archival records and a preliminary survey to understand the specific conditions of open source efforts on which this work-in-progress report is based. This is being followed-up by empirical testing of our research model through a survey of a broad variety of OSDGs. This study would contribute to a clarification of the role of trust in enabling software groups to work effectively and help to understand the bases of trust in ideology-permeated groups.}, keywords = {contributors, groups, ideology, license analysis, licenses, metadata, open source, sourceforge, Survey, team, team size, teams, trust, types}, doi = {10.1.1.104.638}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.104.638\&rep=rep1\&type=pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/stewartGosain2001.pdf}, author = {Katherine Stewart and Gosain, S.} } @conference {Godfrey:2001:GES:602461.602482, title = {Growth, evolution, and structural change in open source software}, booktitle = {Proceedings of the 4th International Workshop on Principles of Software Evolution (IWPSE 2001)}, series = {IWPSE {\textquoteright}01}, year = {2001}, note = {"We measured [linux] system size in uncommented LOC" "We also examined the growth of several other open source systems, including the VIM text editor, Eric Raymond{\textquoteright}s fetchmail utility, and the GCC compiler suite. "}, pages = {103{\textendash}106}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Our recent work has addressed how and why software systems evolve over time, with a particular emphasis on software architecture and open source software systems [2, 3, 6]. In this position paper, we present a short summary of two recent projects. First, we have performed a case study on the evolution of the Linux kernel [3], as well as some other open source software (OSS) systems. We have found that several OSS systems appear not to obey some of "Lehman{\textquoteright}s laws" of software evolution [5, 7], and that Linux in particular is continuing to grow at a geometric rate. Currently, we are working on a detailed study of the evolution of one of the subsystems of the Linux kernel: the SCSI drivers subsystem. We have found that cloning, which is usually considered to be an indicator of lazy development and poor process, is quite common and is even considered to be a useful practice. Second, we are developing a tool called Beagle to aid software maintainers in understanding how large systems have changed over time. Beagle integrates data from various static analysis and metrics tools and provides a query engine as well as navigable visualizations. Of particular note, Beagle aims to provide help in modelling long term evolution of systems that have undergone architectural and structural change.}, keywords = {agile methods, beagle, cloning, evolution, fetchmail, gcc, growth, kernel, lehman{\textquoteright}s laws, lines of code, linux, linux kernel, loc, open source software, software architecture, software evolution, source code, structural change, supporting environments, vim}, isbn = {1-58113-508-4}, doi = {http://doi.acm.org/10.1145/602461.602482}, url = {http://doi.acm.org/10.1145/602461.602482}, attachments = {https://flosshub.org/sites/flosshub.org/files/tu2001.pdf}, author = {Michael Godfrey and Tu, Qiang} } @conference {1142, title = {Software Engineering Research in the Bazaar}, booktitle = {1st Workshop on Open Source Software Engineering at ICSE 2001}, year = {2001}, note = {"To gain a better understanding of a software system, we recover its software architecture from the system{\textquoteright}s source code. We recovered the architecture of many open source systems such as the Linux kernel [3], the Mozilla browser [5], the Apache web server [7], and the VIM editor [14]. The recovered architecture is browse-able to permit developers to interact with it, [10] shows an example for the Linux kernel."}, abstract = {During the last five years, our research group has studied the architecture and evolution of several large open source systems {\textemdash} including Linux, GCC, VIM, Mozilla, and Apache {\textemdash} and we have found that open source software systems often exhibit interesting differences when compared to similar commercially-developed systems. Our investigations of these systems have involved the creation of software architecture models, software architecture repair, the creation of a reference architecture for web servers, the study of evolution and growth of open source systems, and the modelling of architectural properties of systems that are apparent only at build time.}, keywords = {apache, architecture, gcc, kernel, linux, linux kernel, mozilla, open source software, software architecture, Software Engineering Research, source code, vim}, attachments = {https://flosshub.org/sites/flosshub.org/files/hassangodfreyholt.pdf}, author = {Hassan, Ahmed E. and Godfrey, Michael W. and Holt, Richard C.} } @article {98, title = {Striking a balance between trust anti control in a virtual organization: a content analysis of open source software case studies}, journal = {Information Systems Journal}, volume = {11}, number = {4}, year = {2001}, note = {"I employ secondary analysis of published case studies of OSS projects. I used several search methods to identify such case studies about OSS pro- jects. First, I searched the electronic archives of both ACM and IEEE, using terms such as {\textquoteleft}open source.{\textquoteright} " "Secondly, I searched on Bell \& Howell/Proquest{\textquoteright}s ABI/Inform, a database of acade- mic and trade publications on business and management (including technology management). Thirdly, I reviewed the {\textquoteleft}position papers{\textquoteright} of the various authors who attended the 1st Work- shop on Open-Source Software Engineering (Feller et al., 2001) for relevant case studies. Fourthly, I searched on Harvard Publishing{\textquoteright}s websites, which contain case studies published" "Finally, as I located case studies or other publi- cations about OSS projects, I followed references from them to identify other case studies. Although I found literally hundreds of publications about OSS development, there was a much smaller number of case studies"}, pages = {277-304}, abstract = {Many organization theorists have predicted the emergence of the networked or virtual firm as a model for the design of future organizations. Researchers have also emphasized the importance of trust as a necessary condition for ensuring the success of virtual organizations. This paper examines the open source software (OSS) {\textquoteright}movement{\textquoteright} as an example of a virtual organization and proposes a model that runs contrary to the belief that trust is critical for virtual organizations. Instead, I argue that various control mechanisms can ensure the effective performance of autonomous agents who participate in virtual organizations. Borrowing from the theory of the {\textquoteright}McDonaldization{\textquoteright} of society, I argue that, given a set of practices to ensure the control, efficiency, predictability and calculability of processes and outcomes in virtual organizations, effective performance may occur in the absence of trust. As support for my argument, I employ content analysis to examine a set of published case studies of OSS projects. My results show that, although that trust is rarely mentioned, ensuring control is an important criterion for effective performance within OSS projects. The case studies feature few references to other dimensions of {\textquoteright}McDonaldization{\textquoteright} (efficiency, predictability and calculability), however, and I conclude that the OSS movement relies on many other forms of social control and self-control, which are often unacknowledged in OSS projects. Through these implicit forms of control, OSS projects are able to secure the cooperation of the autonomous agents that participate in project teams. I conclude by extrapolating from these case studies to other virtual organizations.}, keywords = {apache, case studies, Control, fetchmail, jun, linux, linux kernel, McDonaldization, mozilla, networked organization, perl, rationalization, trust, virtual organization}, author = {Gallivan, M. J.} } @conference {1146, title = {Taxonomy of Open Source Software Development}, booktitle = {1st Workshop on Open Source Software Engineering at ICSE 2001}, year = {2001}, keywords = {jun, linux, postgresql, wingnut}, attachments = {https://flosshub.org/sites/flosshub.org/files/nakakojiyamamoto.PDF}, author = {Nakakoji, K. and Yamamoto, Y.} } @conference {Godfrey:2000:EOS:850948.853411, title = {Evolution in Open Source Software: A Case Study}, booktitle = {Proceedings of the International Conference on Software Maintenance (ICSM{\textquoteright}00)}, series = {ICSM {\textquoteright}00}, year = {2000}, note = {"We examined 96 kernel versions..." .c files, .h files only loc, lines of code number of functions number of modules}, pages = {131{\textendash}}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {Most studies of software evolution have been performed on systems developed within a single company using traditional management techniques. With the widespread availability of several large software systems that have been developed using an {\textquoteright}open source{\textquoteright} development approach, we now have a chance to examine these systems in detail, and see if their evolutionary narratives are significantly different from commercially developed systems. This paper summarizes our preliminary investigations into the evolution of the best known open source system: the Linux operating system kernel. Because Linux is large (over two million lines of code in the most recent version) and because its development model is not as tightly planned and managed as most industrial software processes, we had expected to find that Linux was growing more slowly as it got bigger and more complex. Instead, we have found that Linux has been growing at a super-linear rate for several years. In this paper, we explore the evolution of the Linux kernel both at the system level and within the major subsystems, and we discuss why we think Linux continues to exhibit such strong growth.}, keywords = {evolution, functions, growth, lines of code, linux, linux kernel, loc, source code}, isbn = {0-7695-0753-0}, url = {http://portal.acm.org/citation.cfm?id=850948.853411}, attachments = {https://flosshub.org/sites/flosshub.org/files/godfrey00.pdf}, author = {Godfrey, Michael W. and Tu, Qiang} }