@proceedings {1911, title = {Machine Learning-Based Detection of Open Source License Exceptions}, year = {2017}, note = {"We address these questions by first performing a large scale mining-based study... [W]e analyzed the source code of 51,754 projects written in six different programming languages (Ruby, Javascript, Python, C, C++, and C$\#$) hosted on GitHub. }, month = {05/2017}, pages = {118-129}, abstract = {From a legal perspective, software licenses govern the redistribution, reuse, and modification of software as both source and binary code. Free and Open Source Software (FOSS) licenses vary in the degree to which they are permissive or restrictive in allowing redistribution or modification under licenses different from the original one(s). In certain cases developers may modify the license by appending to it an exception to specifically allow reuse or modification under a particular condition. These exceptions are an important factor to consider for license compliance analysis since they modify the standard (and widely understood_ terms of the original license. In this work, we first perform a large-scale empirical study on the change history of over 51k FOSS systems aimed at quantitatively investigating the prevalence of known license exceptions and identifying new ones. Subsequently, we performed a study on the detection of license exceptions by relying on machine learning. We evaluated the license exception classification with four different supervised learners and sensitivity analysis. Finally we present a categorization of license exceptions and explain their implications.}, keywords = {classifier, empirical studies, license, machine learning}, doi = {10.1109/ICSE.2017.19}, author = {Vendome, Christopher and Mario Linares-Vasquez and Bavota, Gabriele and Di Penta, Massimiliano and Daniel M. German and Poshyvanyk, Denys} } @article {1868, title = {Managing knowledge sharing in distributed innovation from the perspective of developers: empirical study of open source software projects in China}, journal = {Technology Analysis \& Strategic Management}, volume = {2929143029}, year = {2017}, month = {01/2017}, pages = {1 - 22}, abstract = {Knowledge sharing is the key factor that influences the performance of open source software (OSS) projects, which are the representative cases of distributed innovation. This paper aims to explore the mechanism of knowledge sharing in OSS projects from the perspective of developers in China. A quantitative method with the analysis of 403 valid questionnaires is adopted. A series of hypotheses about how distributed innovation (independent variables) influences knowledge sharing (mediating variable) and then affects the performance of OSS projects (dependent variable) are tested and approved. On the one side, we argue that developers will actively affect knowledge sharing in terms of participative motivation, social network and organisational culture. On the other hand, users may also affect the knowledge sharing when considering innovation willingness and capacity. It is interesting to find that social network is the most important factor in Chinese cases. It is strongly recommended to strengthen the collaboration between software companies and OSS communities.}, keywords = {Distributed innovation, knowledge sharing, performance of OSS projects, user innovation}, issn = {1465-3990}, doi = {http://dx.doi.org/10.1080/09537325.2016.1194387}, author = {Chen, Xiaohong and Zhou, Yuan and Probert, David and Su, Jun} } @proceedings {1892, title = {Measuring Perceived Trust in Open Source Software Communities}, volume = {496}, year = {2017}, month = {05/2017}, pages = {49-54}, publisher = {Springer}, abstract = {We investigate the different aspects of measuring trust in Open Source Software (OSS) communities. In the theoretical part we review seminal works related to trust in OSS development. This investigation provides background to our empirical part where we measure trust in a community (in terms of kudo). Our efforts provide further avenues to develop trust-based measurement tools. These are helpful for academics and practitioners interesting in quantifiable traits of OSS trust.}, doi = {10.1007/978-3-319-57735-7_5}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_5}, author = {Syeed, M.M. Mahbubul and Juho Lindman and Hammouda, Imed} } @article {Kazuhiro Yamashita2016, title = {Magnet or Sticky? Measuring Project Characteristics from the Perspective of Developer Attraction and Retention}, journal = {Journal of Information Processing}, volume = {24}, number = {2}, year = {2016}, pages = {339-348}, abstract = {Open Source Software (OSS) is vital to both end users and enterprises. As OSS systems are becoming a type of infrastructure, long-term OSS projects are desired. For the survival of OSS projects, the projects need to not only retain existing developers, but also attract new developers to grow. To better understand how projects retain and attract contributors, our preliminary study aimed to measure the personnel attraction and retention of OSS projects using a pair of population migration metrics, called Magnet (personnel attraction) and Sticky (retention) metrics. Because the preliminary study analyzed only 90 projects and the 90 projects are not representative of GitHub, this paper extend the preliminary study to better understand the generalizability of the results by analyzing 16, 552 projects of GitHub. Furthermore, we also add a pilot study to investigate the typical duration between releases to find more appropriate release duration. The study results show that (1) approximately 23\% of developers remain in the same projects that the developers contribute to, (2) the larger projects are likely to attract and retain more developers, (3) 53\% of terminal projects eventually decay to a state of fewer than ten developers and (4) 55\% of attractive projects remain in an attractive category. }, keywords = {github, retention}, doi = {10.2197/ipsjjip.24.339}, url = {https://www.jstage.jst.go.jp/article/ipsjjip/24/2/24_339/_article}, author = {Yamashita, Kazuhiro and Kamei, Yasutaka and McIntosh, Shane and Hassan, Ahmed E. and Ubayashi, Naoyasu} } @proceedings {1927, title = {Managing Hidden Dependencies in OO Software: a Study Based on Open Source Projects}, year = {2016}, month = {11/2016}, abstract = {Dependency-based software change impact analysis is the domain concerned with estimating sets of artifacts impacted by a change to a related artifact. Research has shown that analysing the various class dependency types independently will not reveal a complete estimate of impact sets. Therefore, dependency types are combined to improve the precision of estimated when compared to impact sets. Software classes can be linked in different ways; for instance semantically, if their meaning is somewhat related or, structurally, if one class depends on the services of other classes. {\textquoteright}Hidden{\textquoteright} dependencies arise when two classes, linked structurally, do not share the same semantic namespace or when semantically dependent classes do not share a structural link. With the goal of revealing hidden dependencies during change impact analysis, we empirically investigated the interplay between structural and semantic class dependencies in object-oriented software systems. Results show that (i) semantic and structural links are significantly associated, (ii) the strengths of those links does not play a significant role and, (iii) a significant number of dependencies are hidden. We propose refactoring techniques to deal with hidden dependencies, based on existing design patterns. Our approach has the potential for reducing refactoring and testing effort.}, author = {Nemitari Ajienka and Capiluppi, Andrea and Counsell, Steve} } @conference {HG15, title = {Matching GitHub developer profiles to job advertisements}, booktitle = {Proceedings of the 12th International Conference on Mining Software Repositories}, year = {2015}, month = {05/2015}, publisher = {IEEE}, organization = {IEEE}, abstract = {GitHub is a social coding platform that enables developers to efficiently work on projects, connect with other developers, collaborate and generally {\textquotedblleft}be seen{\textquotedblright} by the community. This visibility also extends to prospective employers and HR personnel who may use GitHub to learn more about a developer{\textquoteright}s skills and interests. We propose a pipeline that automatizes this process and automatically suggests matching job advertisements to developers, based on signals extracting from their activities on GitHub.}, url = {http://www.gousios.gr/pub/dev-profiles.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/dev-profiles.pdf}, author = {Hauff, Claudia and Gousios, Georgios} } @proceedings {1790, title = {Measuring the Crowd {\textendash} A Preliminary Taxonomy of Crowdsourcing Metrics}, year = {2015}, month = {08/2015}, abstract = {Crowdsourcing initiatives benefit from tapping into diversity. A vast plethora of disparate individuals, organizations, frameworks and skillsets can all play a role in sourcing solutions to a challenge. Nevertheless, while crowdsourcing has become a pervasive phenomenon, there is a paucity of research that addresses how the crowdsourcing process is measured. Whereas research has advanced various taxonomies of crowdsourcing none to date have specifically addressed the issue of measuring either specific stages of the crowdsourcing process or the process as a whole. As a first step towards achieving this goal, this research-inprogress paper examines crowdsourcing at the operational level with a view towards (i) identifying the parts of the process (ii) identifying what can be measured and (iii) categorising operational metrics to facilitate deployment in practice. The taxonomy advanced is overarching in nature and can be deployed across disciplines. Furthermore, the preliminary taxonomy presented will offer practitioners a comprehensive list of metrics that will enable them to facilitate comparison across various crowdsourcing initiatives.}, url = {http://www.opensym.org/os2015/proceedings-files/p200-cullina.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/p200-cullina.pdf}, author = {Eoin Cullina and Kieran Conboy and Morgan, Lorraine} } @proceedings {1776, title = {A Method to Detect License Inconsistencies in Large-Scale Open Source Projects}, year = {2015}, month = {05/2015}, publisher = {IEEE}, abstract = {The reuse of free and open source software (FOSS) components is becoming more and more popular. They usually contain one or more software licenses describing the requirements and conditions which should be followed when been reused. Licenses are usually written in the header of source code files as program comments. Removing or modifying the license header by re-distributors will result in the inconsistency of license with its ancestor, and may potentially cause license infringement. But to the best of our knowledge, no research has been devoted to investigate such kind of license infringements nor license inconsistencies. In this paper, we describe and categorize different types of license inconsistencies and propose a feasible method to detect them. Then we apply this method to Debian 7.5 and present the license inconsistencies found in it. With a manual analysis, we summarized various reasons behind these license inconsistencies, some of which imply license infringement and require the attention from the developers. This analysis also exposes the difficulty to discover license infringements, highlighting the usefulness of finding and maintaining source code provenance.}, url = {http://sel.ist.osaka-u.ac.jp/lab-db/betuzuri/archive/992/992.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/992.pdf}, author = {Yuhao Wu and Manabe, Yuki and Tetsuya Kanda and Daniel M. German and Inoue, Katsuro} } @proceedings {1748, title = {Mining Component Repositories for Installability Issues}, year = {2015}, month = {05/2015}, abstract = {Component repositories play an increasingly relevant role in software life-cycle management, from software distribution to end-user, to deployment and upgrade management. Software components shipped via such repositories are equipped with rich metadata that describe their relationship (e.g., dependencies and conflicts) with other components. In this practice paper we show how to use a tool, distcheck, that uses component metadata to identify all the components in a repository that cannot be installed (e.g., due to unsatisfiable dependencies), provides detailed information to help developers understanding the cause of the problem, and fix it in the repository. We report about detailed analyses of several repositories: the Debian distribution, the OPAM package collection, and Drupal modules. In each case, distcheck is able to efficiently identify not installable components and provide valuable explanations of the issues. Our experience provides solid ground for generalizing the use of distcheck to other component repositories.}, keywords = {drupal, opam}, url = {http://www.dicosmo.org/preprints/msr-2015-distcheck.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr-2015-distcheck.pdf}, author = {Abate, Pietro and Di Cosmo, Roberto and Gesbert, Louis and Fabrice Le Fessant and Ralf Treinen and Zacchiroli, Stefano} } @proceedings {1751, title = {Mining Energy-Aware Commits}, year = {2015}, month = {05/2015}, abstract = {Over the last years, energy consumption has become a first-class citizen in software development practice. While energy-efficient solutions on lower-level layers of the software stack are well-established, there is convincing evidence that even better results can be achieved by encouraging practitioners to participate in the process. For instance, previous work has shown that using a newer version of a concurrent data structure can yield a 2.19x energy savings when compared to the old associative implementation [75]. Nonetheless, little is known about how much software engineers are employing energy-efficient solutions in their applications and what solutions they employ for improving energy-efficiency. In this paper we present a qualitative study of {\textquotedblleft}energy-aware commits{\textquotedblright}. Using Github as our primary data source, we perform a thorough analysis on an initial sample of 2,189 commits and carefully curate a set of 371 energy-aware commits spread over 317 real-world non-trivial applications. Our study reveals that software developers heavily rely on lowlevel energy management approaches, such as frequency scaling and multiple levels of idleness. Also, our findings suggest that ill-chosen energy saving techniques can impact the correctness of an application. Yet, we found what we call {\textquotedblleft}energy-aware interfaces{\textquotedblright}, which are means for clients (e.g., developers or endusers) to save energy in their applications just by using a function, abstracting away the low-level implementation details.}, url = {http://gustavopinto.org/lost+found/msr2015.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/energy.pdf}, author = {Irineu Moura and Pinto, Gustavo and Felipe Ebert and Fernando Castor} } @proceedings {1728, title = {Mining Software Repositories for Social Norms}, volume = {2}, year = {2015}, pages = {627-630}, abstract = {Social norms facilitate coordination and cooperation among individuals, thus enable smoother functioning of social groups such as the highly distributed and diverse open source software development (OSSD) communities. In these communities, norms are mostly implicit and hidden in huge records of human-interaction information such as emails, discussions threads, bug reports, commit messages and even source code. This paper aims to introduce a new line of research on extracting social norms from the rich data available in software repositories. Initial results include a study of coding convention violations in JEdit, ArgoUML and Glassfish projects. It also presents a new lifecycle model for norms in OSSD communities and demonstrates how a number of norms extracted from the Python development community follow this life-cycle model.}, author = {Dam, Hoa Khanh and Savarimuthu, Bastin Tony Roy and Daniel Avery and Aditya Ghose} } @proceedings {1725, title = {Mining StackOverflow to Filter out Off-topic IRC Discussion}, year = {2015}, month = {05/2015}, pages = {4 pages}, abstract = {Internet Relay Chat (IRC) is a commonly used tool by OpenSource developers. Developers use IRC channels to discuss programming related problems, but much of the discussion is irrelevant and off-topic. Essentially if we treat IRC discussions like email messages, and apply spam filtering, we can try to filter out the spam (the off-topic discussions) from the ham (the programming discussions). Yet we need labelled data that unfortunately takes time to curate. To avoid costly curration in order to filter out off-topic discussions, we need positive and negative data-sources. On- line discussion forums, such as StackOverflow, are very effective for solving programming problems. By engaging in open-data, StackOverflow data becomes a powerful source of labelled text regarding programming. This work shows that we can train classifiers using StackOverflow posts as positive examples of on-topic programming discussion. YouTube video comments, notorious for their lack of quality, serve as training set of off- topic discussion. By exploiting these datasets, accurate classifiers can be built, tested and evaluated that require very little effort for end-users to deploy and exploit.}, keywords = {irc, topics}, url = {http://webdocs.cs.ualberta.ca/~hindle1/2015/shaiful-mining_so.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/shaiful-mining_so.pdf}, author = {Shaiful Alam Chowdhury and Hindle, Abram} } @proceedings {1755, title = {Mining StackOverflow to Filter out Off-topic IRC Discussion}, year = {2015}, month = {05/2015}, abstract = {Internet Relay Chat (IRC) is a commonly used tool by OpenSource developers. Developers use IRC channels to discuss programming related problems, but much of the discussion is irrelevant and off-topic. Essentially if we treat IRC discussions like email messages, and apply spam filtering, we can try to filter out the spam (the off-topic discussions) from the ham (the programming discussions). Yet we need labelled data that unfortunately takes time to curate. To avoid costly curration in order to filter out off-topic discussions, we need positive and negative data-sources. Online discussion forums, such as StackOverflow, are very effective for solving programming problems. By engaging in open-data, StackOverflow data becomes a powerful source of labelled text regarding programming. This work shows that we can train classifiers using StackOverflow posts as positive examples of on-topic programming discussion. YouTube video comments, notorious for their lack of quality, serve as training set of offtopic discussion. By exploiting these datasets, accurate classifiers can be built, tested and evaluated that require very little effort for end-users to deploy and exploit.}, keywords = {irc, Stack Overflow, youtube}, attachments = {https://flosshub.org/sites/flosshub.org/files/shaiful-mining_so_0.pdf}, author = {Shaiful Alam Chowdhury and Hindle, Abram} } @conference {barcomb:2015:multiple, title = {A multiple case study of small free software businesses as social entrepreneurships}, booktitle = {Proceedings of the 11th International Symposium on Open Collaboration (OpenSym 2015)}, year = {2015}, abstract = {Free/libre and open source software are frequently described as a single community or movement. The difference between free software and open source ideology may influence founders, resulting in different types of companies being created. Specifically, the relationship between free/libre software ideology and social entrepreneurships is investigated. This paper presents seven case studies of businesses, five of which were founded by people who identify with the free/libre software movement. The result is a theory that small businesses founded by free/libre software advocates have three characteristics of social entrepreneurships. First, social benefit is prioritized over wealth creation. Second, the business{\textquoteright}s social mission is not incidental but is furthered through its for-profit activities, rather than supported by the company{\textquoteright}s profits. Third, the company{\textquoteright}s success is defined in part by the success of its social mission Free/libre software entrepreneurs who recognize their activities as social entrepreneurships can benefit from the existing literature on the unique challenges faced by socially-oriented businesses.}, keywords = {free software, open source software, public good, small business, social entrepreneurship, social ventures}, url = {https://opus4.kobv.de/opus4-fau/frontdoor/index/index/docId/6334}, attachments = {https://flosshub.org/sites/flosshub.org/files/p100-barcomb.pdf}, author = {Barcomb, Ann} } @conference {Yamashita:2014:MSO:2597073.2597116, title = {Magnet or Sticky? An OSS Project-by-project Typology}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {344{\textendash}347}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {For Open Source Software (OSS) projects, retaining existing contributors and attracting new ones is a major concern. In this paper, we expand and adapt a pair of population migration metrics to analyze migration trends in a collection of open source projects. Namely, we study: (1) project stickiness, i.e., its tendency to retain existing contributors and (2) project magnetism, i.e., its tendency to attract new contributors. Using quadrant plots, we classify projects as attractive (highly magnetic and sticky), stagnant (highly sticky, weakly magnetic), fluctuating (highly magnetic, weakly sticky), or terminal (weakly magnetic and sticky). Through analysis of the MSR challenge dataset, we find that: (1) quadrant plots can effectively identify at-risk projects, (2) stickiness is often motivated by professional activity and (3) transitions among quadrants as a project ages often coincides with interesting events in the evolution history of a project. }, keywords = {Developer migration, Magnet, mining challenge, msr challenge, open source, Sticky}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597116}, url = {http://doi.acm.org/10.1145/2597073.2597116}, attachments = {https://flosshub.org/sites/flosshub.org/files/yamashita.pdf}, author = {Yamashita, Kazuhiro and McIntosh, Shane and Kamei, Yasutaka and Ubayashi, Naoyasu} } @conference {1823, title = {"May the fork be with you": novel metrics to analyze collaboration on GitHub}, booktitle = {Proceedings of the 5th International Workshop on Emerging Trends in Software Metrics - WETSoM 2014}, year = {2014}, note = {"According to FLOSSmole [8] (Free Libre OpenSource Software) statistics, GitHub had 191765 repositories publicly available at May 2012."}, pages = {37 - 43}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {Multi{\textendash}repository software projects are becoming more and more popular, thanks to web{\textendash}based facilities such as GitHub. Code and process metrics generally assume a single repository must be analyzed, in order to measure the characteristics of a codebase. Thus they are not apt to measure how much relevant information is hosted in multiple repositories contributing to the same codebase. Nor can they feature the characteristics of such a distributed development process. We present a set of novel metrics, based on an original classification of commits, conceived to capture some interesting aspects of a multi{\textendash}repository development process. We also describe an efficient way to build a data structure that allows to compute these metrics on a set of Git repositories. Interesting outcomes, obtained by applying our metrics on a large sample of projects hosted on GitHub, show the usefulness of our contribution.}, keywords = {flossmole, github}, isbn = {9781450328548}, doi = {10.1145/2593868.2593875}, url = {http://marbiaz.github.io/docs/Biazzini14b.pdf}, author = {Marco Biazzini and Benoit Baudry} } @unpublished {1799, title = {Measuring the Health of Open Source Software Ecosystems: Moving Beyond the Scope of Project Health}, year = {2014}, abstract = {Background. The livelihood of an open source ecosystem is important to different ecosystem participants: software developers, end-users, investors, and participants want to know whether their ecosystem is healthy and performing well. Currently, there exists no working operationalization available that can be used to determine the health of open source ecosystems. Health is typically looked at from a project scope, not from an ecosystem scope. Objectives. With such an operationalization, stakeholders can make better decisions on whether to invest in an ecosystem: developers can select the healthiest ecosystem to join, keystone organizers can establish which governance techniques are effective, and end-users can select ecosystems that are robust, will live long, and prosper. Method. Design research is used to create the health operationalization. The evaluation step is done using four ecosystem health projects from literature. Results. The Open Source Ecosystem Health Operationalization is provided, which establishes the health of a complete software ecosystem, using the data from collections of open source projects that belong to the ecosystem. Conclusion. The groundwork is done, by providing a summary of research challenges, for more research in ecosystem health. With the operationalization in hand, re- searchers no longer need to start from scratch when researching open source ecosystems{\textquoteright} health.}, keywords = {open source ecosystems, Software ecosystem health, Software repository mining}, url = {https://www.dropbox.com/s/borc730uw32kkzp/SECOhealth.pdf?dl=0}, attachments = {https://flosshub.org/sites/flosshub.org/files/SECOhealth\%20\%281\%29.pdf}, author = {Slinger Jansen} } @conference {Eckhardt:2014:MMO:2642803.2642810, title = {The Merits of a Meritocracy in Open Source Software Ecosystems}, booktitle = {Proceedings of the 2014 European Conference on Software Architecture Workshops}, series = {ECSAW {\textquoteright}14}, year = {2014}, pages = {7:1{\textendash}7:6}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {The Eclipse open source ecosystem has grown from a small internal IBM project to one of the biggest Integrated Development Environments in the market. Open source communities and ecosystems do not follow the standard governance strategies typically used in large organizations. A meritocracy is a frequently occurring form of governance on different levels in open ecosystems. In this paper we investigate how this form of governance influences the health of projects within the Eclipse ecosystem in terms of the amount of commits within each month. We analyzed the hierarchy of Eclipse, how merits are conceptualized within the ecosystem and the effect of the appointments of mentors and project leads on the amount of commits. From our research, we can conclude that this system is not always as fair as it seems; merits are only a benefit in some cases.}, keywords = {Ecosystem Health, Meritocracy, open source, Software ecosystems}, isbn = {978-1-4503-2778-7}, doi = {10.1145/2642803.2642810}, url = {http://doi.acm.org/10.1145/2642803.2642810}, author = {Eckhardt, Evert and Kaats, Erwin and Slinger Jansen and Alves, Carina} } @inbook {1613, title = {A Methodology for Managing FOSS Migration Projects}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {172-175}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { Since 2005, the Free Software Center (CESOL) at the University of Information Science (UCI) in Havana, Cuba, has conducted several free and open source software (FOSS) migration projects for various organizations. The experience gained from these projects enabled the creation of a FOSS Migration Methodology which documented how the technical elements of a project of this kind should be executed. Despite the usefulness of this methodology, the projects that have been undertaken experienced difficulties that were, in most cases, directly related to their management. This research aims to improve the methodology and minimize management-related challenges thereby improving the quality of migration projects. The proposed methodology was applied in a project that ran in a higher education organization and the results prove that the methodology enhanced the quality of the migration project. }, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_24}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_24}, author = {Go{\~n}i, Angel and Boodraj, Maheshwar and Cabreja, Yordanis}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @article {1637, title = {Microblogging in Open Source Software Development: The Case of Drupal and Twitter}, journal = {IEEE Software}, volume = {31}, year = {2014}, month = {7/2014}, pages = {72 - 80}, abstract = {Microblogging is a popular form of social media that has quickly permeated both enterprise and open source communities. However, exactly how open source communities can leverage microblogging isn{\textquoteright}t yet well understood. The authors investigate how Drupal{\textquoteright}s open source community uses Twitter, a household name in microblogging. Their analysis of group and individual accounts of Drupal developers reveals that they take on similar but distinct roles. Both serve as communicators of essential links to a vast and growing community knowledge base, such as work artifacts, issues, documentation, and blog posts, but community members often express positive emotions when tweeting about work, which reinforces a sense of community.}, issn = {0740-7459}, doi = {10.1109/MS.2013.98}, author = {Wang, Xiaofang and Kuzmickaja, Ilona and Stol, Klaas-Jan and Abrahamsson, Pekka and Fitzgerald, Brian} } @article {mcdonald2014modeling, title = {MODELING DISTRIBUTED COLLABORATION ON GITHUB}, journal = {Advances in Complex Systems}, year = {2014}, publisher = {World Scientific}, abstract = {In this paper, we apply concepts from Distributed Leadership, a theory suggesting that leadership is shared among members of an organization, to frame models of contribution that we uncover in five relatively successful open source software (OSS) projects hosted on GitHub. In this qualitative, comparative case study, we show how these projects make use of GitHub features such as pull requests (PRs). We find that projects in which member PRs are more frequently merged with the codebase experience more sustained participation. We also find that projects with higher success rates among contributors and higher contributor retention tend to have more distributed (non-centralized) practices for reviewing and processing PRs. The relationships between organizational form and GitHub practices are enabled and made visible as a result of GitHub{\textquoteright}s novel interface. Our results demonstrate specific dimensions along which these projects differ and explicate a framework that warrants testing in future studies of OSS, particularly GitHub.}, author = {McDONALD, NORA and Blincoe, Kelly and PETAKOVIC, EVA and Goggins, Sean} } @conference {Williams:2014:MOP:2597073.2597132, title = {Models of OSS Project Meta-information: A Dataset of Three Forges}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, note = {"FLOSSMole [4] is a similar initiative to OSSMETER; it aims to collect and freely redistribute in different formats the data of open source software. Differently from OSSMETER, however, the FLOSSMole project does not provide the instruments to analyse data, that are simply collected and made publicly available."}, pages = {408{\textendash}411}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {The process of selecting open-source software (OSS) for adoption is not straightforward as it involves exploring various sources of information to determine the quality, maturity, activity, and user support of each project. In the context of the OSSMETER project, we have developed a forge-agnostic metamodel that captures the meta-information common to all OSS projects. We specialise this metamodel for popular OSS forges in order to capture forge-specific meta-information. In this paper we present a dataset conforming to these metamodels for over 500,000 OSS projects hosted on three popular OSS forges: Eclipse, SourceForge, and GitHub. The dataset enables different kinds of automatic analysis and supports objective comparisons of cross-forge OSS alternatives with respect to a user{\textquoteright}s needs and quality requirements. }, keywords = {data mining, flossmole cited}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597132}, url = {http://doi.acm.org/10.1145/2597073.2597132}, attachments = {https://flosshub.org/sites/flosshub.org/files/Models_of_OSS_Project_Meta-Information_A_Dataset_of_Three_Forges_draft.pdf}, author = {Williams, James R. and Di Ruscio, Davide and Matragkas, Nicholas and Di Rocco, Juri and Kolovos, Dimitris S.} } @conference {Beller:2014:MCR:2597073.2597082, title = {Modern Code Reviews in Open-source Projects: Which Problems Do They Fix?}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {202{\textendash}211}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Code review is the manual assessment of source code by humans, mainly intended to identify defects and quality problems. Modern Code Review (MCR), a lightweight variant of the code inspections investigated since the 1970s, prevails today both in industry and open-source software (OSS) systems. The objective of this paper is to increase our understanding of the practical benefits that the MCR process produces on reviewed source code. To that end, we empirically explore the problems fixed through MCR in OSS systems. We manually classified over 1,400 changes taking place in reviewed code from two OSS projects into a validated categorization scheme. Surprisingly, results show that the types of changes due to the MCR process in OSS are strikingly similar to those in the industry and academic systems from literature, featuring the similar 75:25 ratio of maintainability-related to functional problems. We also reveal that 7{\textendash}35\% of review comments are discarded and that 10{\textendash}22\% of the changes are not triggered by an explicit review comment. Patterns emerged in the review data; we investigated them revealing the technical factors that influence the number of changes due to the MCR process. We found that bug-fixing tasks lead to fewer changes and tasks with more altered files and a higher code churn have more changes. Contrary to intuition, the person of the reviewer had no impact on the number of changes. }, keywords = {code review, defects, open source software}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597082}, url = {http://doi.acm.org/10.1145/2597073.2597082}, attachments = {https://flosshub.org/sites/flosshub.org/files/beller.pdf}, author = {Beller, Moritz and Bacchelli, Alberto and Zaidman, Andy and Juergens, Elmar} } @article {bettenburg2013management, title = {Management of community contributions}, journal = {Empirical Software Engineering}, year = {2013}, pages = {1{\textendash}38}, publisher = {Springer}, abstract = {In recent years, many companies have realized that collaboration with a thriving user or developer community is a major factor in creating innovative technology driven by market demand. As a result, businesses have sought ways to stimulate contributions from developers outside their corporate walls, and integrate external developers into their development process. To support software companies in this process, this paper presents an empirical study on the contribution management processes of two major, successful, open source software ecosystems. We contrast a for-profit (ANDROID) system having a hybrid contribution style, with a not-for-profit (LINUX kernel) system having an open contribution style. To guide our comparisons, we base our analysis on a conceptual model of contribution management that we derived from a total of seven major open-source software systems. A quantitative comparison based on data mined from the ANDROID code review system and the LINUX kernel code review mailing lists shows that both projects have significantly different contribution management styles, suited to their respective market goals, but with individual advantages and disadvantages that are important for practitioners. Contribution management is a real-world problem that has received very little attention from the research community so far. Both studied systems (LINUX and ANDROID) employ different strategies and techniques for managing contributions, and both approaches are valuable examples for practitioners. Each approach has specific advantages and disadvantages that need to be carefully evaluated by practitioners when adopting a contribution management process in practice.}, keywords = {android, contribution, linux, management}, url = {http://link.springer.com/article/10.1007/s10664-013-9284-6}, author = {Bettenburg, Nicolas and Hassan, Ahmed E. and Adams, Bram and Daniel M. German} } @conference {abdouSEKE2013, title = {{Managing Corrective Actions to Closure in Open Source Software Test Process}}, booktitle = {The 25th International Conference on Software Engineering and Knowledge Engineering (SEKE 2013)}, year = {2013}, pages = {306{\textendash}311}, address = {Boston, USA}, abstract = {In assessing test process maturity, one of the goals is to manage disciplinary issues. Managing corrective actions to closure is known to aid software quality assurance, in general, and testing process activities, in particular. In this paper, a framework for software testing assessment, namely OSS-TPA, that aims to evaluate corrective actions in OSS test process, is proposed. The OSS-TPA framework is based on earlier studies and relies on a conceptual model for test process activities in OSS development. Using success factors in OSS development, the relationship between the maturity of managing corrective actions and the adoption of OSS is investigated.}, keywords = {open source software, software engineering, software quality, Software testing, Test Process Improvement.}, url = {http://index.ksi.edu/conf/seke/2013/cr/282.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/282.pdf}, author = {Abdou, Tamer and Grogono, Peter and Kamthan, Pankaj} } @proceedings {1509, title = {The Maven Repository Dataset of Metrics, Changes, and Dependencies}, year = {2013}, month = {05/2013}, abstract = {We present the Maven Dependency Dataset (MDD), containing metrics, changes and dependencies of 148,253 jar files. Metrics and changes have been calculated at the level of individual methods, classes and packages of multiple library versions. A complete call graph is also presented which includes call, inheritance, containment and historical relationships between all units of the entire repository. In this paper, we describe our dataset and the methodology used to obtain it. We present different conceptual views of MDD and we also describe limitations and data quality issues that researchers using this data should be aware of. http://data.3tu.nl/repository/uuid:68a0e837-4fda-407a-949e-a159546e67b6}, keywords = {maven}, doi = {dx.doi.org/10.4121/uuid:68a0e837-4fda-407a-949e-a159546e67b6}, url = {http://data.3tu.nl/repository/uuid:68a0e837-4fda-407a-949e-a159546e67b6}, author = {Steven Raemaekers and van Deursen, Arie and Visser, Joost} } @conference {1525, title = {Mining Developer Contribution in Open Source Software Using Visualization Techniques}, booktitle = {2013 Third International Conference on Intelligent System Design and Engineering Applications (ISDEA)}, year = {2013}, pages = {934 - 937}, publisher = {IEEE}, organization = {IEEE}, address = {China, Hong Kong}, abstract = { The research of developers{\textquoteright} contribution is an important part of the software evolution area. It allows project owners to find potential long-term contributors earlier and helps the newcomers to improve their behaviors. In this paper, we examined the contribution characteristics of developers in open source environment based on visual analysis, and presented approaches from three aspects-influencing factors, time characteristics and region characteristics. Our analysis used data from github and revealed some regular patterns. We found that the code which newcomers started to contribute with more people engaged in would lead to less contribution in some degree. We also found that there{\textquoteright}s a relation between developers{\textquoteright} early and later period contribution. In addition, developers from different regions were more likely to have dominant relationship. Our findings may provide some support for future research in the area of software evolution.}, keywords = {github}, isbn = {978-0-7695-4923-1}, doi = {10.1109/ISDEA.2012.223}, author = {Ben, Xu and Beijun, Shen and Weicheng, Yang} } @proceedings {1495, title = {Mining Source Code Repositories at Massive Scale using Language Modeling}, year = {2013}, month = {05/2013}, abstract = {The tens of thousands of high-quality open source software projects on the Internet raise the exciting possibility of studying software development by finding patterns across truly large source code repositories. This could enable new tools for developing code, encouraging reuse, and navigating large projects. In this paper, we build the first giga-token probabilistic language model of source code, based on 352 million lines of Java. This is 100 times the scale of the pioneering work by Hindle et al. The giga-token model is significantly better at the code suggestion task than previous models. More broadly, our approach provides a new {\textquotedblleft}lens{\textquotedblright} for analyzing software projects, enabling new complexity metrics based on statistical analysis of large corpora. We call these metrics data-driven complexity metrics. We propose new metrics that measure the complexity of a code module and the topical centrality of a module to a software project. In particular, it is possible to distinguish reusable utility classes from classes that are part of a program{\textquoteright}s core logic based solely on general information theoretic criteria.}, url = {http://homepages.inf.ed.ac.uk/csutton/publications/msr2013.pdf}, author = {Allamanis, Miltiadis and Sutton, Charles} } @book {1545, title = {Misconceptions and Barriers to Adoption of FOSS in the U.S. Energy Industry}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {232 - 244}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { In this exploratory study, we map the use of free and open source software (FOSS) in the United States energy sector, especially as it relates to cyber security. Through two surveys and a set of semi-structured interviews{\textemdash}targeting both developers and policy makers{\textemdash}we identified key stakeholders, organizations, and FOSS projects, be they rooted in industry, academia, or public policy space that influence software and security practices in the energy sector. We explored FOSS tools, common attitudes and concerns, and challenges with regard to FOSS adoption. More than a dozen themes were identified from interviews and surveys. Of these, drivers for adoption and risks associated with FOSS were the most prevalent. More specifically, the misperceptions of FOSS, the new security challenges presented by the smart grid, and the extensive influence of vendors in this space play the largest roles in FOSS adoption in the energy sector. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_17}, author = {Kuechler, Victor and Jensen, Carlos and Bryant, Deborah}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1539, title = {Modeling Practices in Open Source Software}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {127 - 139}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { It is widely accepted that modeling in software engineering increases productivity and results in better code quality. Yet, modeling adoption remains low. The open source community, in particular, remains almost entirely code centric. In this paper, we explore the reasons behind such limited adoption of modeling practices among open source developers. We highlight characteristics of modeling tools that would encourage their adoption. We propose Umple as a solution where both modeling and coding elements are treated uniformly. In this approach, models can be manipulated textually and code can be edited visually. We also report on the Umple compiler itself as a case study of an open source project where contributors, using the above approach, have and continue to routinely commit code and model over a number of years. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_9}, attachments = {https://flosshub.org/sites/flosshub.org/files/Modeling-Practices-in-Open-Source-Software.pdf}, author = {Badreddin, Omar and Lethbridge, Timothy and Elassar, Maged}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @proceedings {1440, title = {A Model of Open Source Developer Foundations}, volume = {378}, year = {2012}, month = {09/2012}, pages = {15-28}, publisher = {IFIP AICT}, abstract = {Many community open source projects are of high economic relevance. As these projects mature, their leaders face a choice of continuing the project as is, making the project join an existing foundation, or creating their own foundation for the project. This article presents a model of open source developer foundations that project leaders can use to compare existing foundations with their needs or to design their own. The model is based on a three- iteration qualitative study involving interviews and supplementary materials review. To demonstrate its usefulness, we apply the model to nine foundations and present their organizational choices in a comparative table format.}, url = {http://dirkriehle.com/uploads/2012/05/Riehle-MOSDF-v12-Final-Web.pdf}, author = {Dirk Riehle and Sebastian Berschneider} } @conference {1813, title = {Modification and developer metrics at the function level: Metrics for the study of the evolution of a software project}, booktitle = {2012 3rd International Workshop on Emerging Trends in Software Metrics (WETSoM)}, year = {2012}, pages = {49 - 55}, publisher = {IEEE}, organization = {IEEE}, address = {Zurich, Switzerland}, abstract = {Software evolution, and particularly its growth, has been mainly studied at the file (also sometimes referred as module) level. In this paper we propose to move from the physical towards a level that includes semantic information by using functions or methods for measuring the evolution of a software system. We point out that use of functions-based metrics has many advantages over the use of files or lines of code. We demonstrate our approach with an empirical study of two Free/Open Source projects: a community-driven project, Apache, and a company-led project, Novell Evolution. We discovered that most functions never change; when they do their number of modifications is correlated with their size, and that very few authors who modify each; finally we show that the departure of a developer from a software project slows the evolution of the functions that she authored. }, isbn = {978-1-4673-1763-4}, doi = {10.1109/WETSoM.2012.6226993}, author = {Gregorio Robles and Herraiz, Israel and Daniel M. German and Izquierdo-Cortazar, Daniel} } @proceedings {1274, title = {Modding as an Open Source Approach to Extending Computer Game Systems}, year = {2011}, month = {10/2011}, pages = {62-74}, publisher = {Springer}, abstract = {This paper examines what is known so far about the role of open source software development within the world of game mods and modding practices. Game modding has become a leading method for developing games by customizing or creating OSS extensions to game software in general, and to proprietary closed source software games in particular. What, why, and how OSS and CSS come together within an application system is the subject for this study. The research method is observational and qualitative, so as to highlight current practices and issues that can be associated with software engineering and game studies foundations. Numerous examples of different game mods and modding practices are identified throughout.}, keywords = {games, modding, mods}, author = {Walt Scacchi} } @conference {956, title = {Mining security changes in FreeBSD}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {90 - 93}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Current research on historical project data is rarely touching on the subject of security related information. Learning how security is treated in projects and which parts of a software are historically security relevant or prone to security changes can enhance the security strategy of a software project. We present a mining methodology for security related changes by modifying an existing method of software repository analysis. We use the gathered security changes to find out more about the nature of security in the FreeBSD project and we try to establish a link between the identified security changes and a tracker for security issues (security advisories). We give insights how security is presented in the FreeBSD project and show how the mined data and known security problems are connected.}, keywords = {freebsd, msr challenge, security}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463289}, author = {Mauczka, Andreas and Schanes, Christian and Fankhauser, Florian and Bernhart, Mario and Grechenig, Thomas} } @conference {964, title = {Mining subclassing directives to improve framework reuse}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {141 - 150}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {To help developers in using frameworks, good documentation is crucial. However, it is a challenge to create high quality documentation especially of hotspots in white-box frameworks. This paper presents an approach to documentation of object-oriented white-box frameworks which mines from client code four different kinds of documentation items, which we call subclassing directives. A case study on the Eclipse JFace user-interface framework shows that the approach can improve the state of API documentation w.r.t. subclassing directives.}, keywords = {api, documentation, eclipse, frameworks, jface, source code}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463347}, attachments = {https://flosshub.org/sites/flosshub.org/files/141Mining-Subclassing-Directives-to-Improve-Framework-Reuse.pdf}, author = {Bruch, Marcel and Mezini, Mira and Monperrus, Martin} } @conference {858, title = {Making New Friends, but Keep the Old - Recommending People on Social Networking Sites (forthcoming)}, booktitle = {ACM SIGCHI Conference on Human Factors in Computing Systems}, year = {2009}, author = {Chen, J and Geyer, W. and Dugan, C. and Muller, M. and Guy, I.} } @conference {923, title = {MapReduce as a general framework to support research in Mining Software Repositories (MSR)}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {21 - 30}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Researchers continue to demonstrate the benefits of Mining Software Repositories (MSR) for supporting software development and research activities. However, as the mining process is time and resource intensive, they often create their own distributed platforms and use various optimizations to speed up and scale up their analysis. These platforms are project-specific, hard to reuse, and offer minimal debugging and deployment support. In this paper, we propose the use of MapReduce, a distributed computing platform, to support research in MSR. As a proof-of-concept, we migrate J-REX, an optimized evolutionary code extractor, to run on Hadoop, an open source implementation of MapReduce. Through a case study on the source control repositories of the Eclipse, BIRT and Datatools projects, we demonstrate that the migration effort to MapReduce is minimal and that the benefits are significant, as running time of the migrated J-REX is only 30\% to 50\% of the original J-REX{\textquoteright}s. This paper documents our experience with the migration, and highlights the benefits and challenges of the MapReduce framework in the MSR community.}, keywords = {hadoop, mapreduce}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069477}, attachments = {https://flosshub.org/sites/flosshub.org/files/21MSR2009-MSR-0114-Shang-Weiyi.pdf}, author = {Weiyi Shang and Zhen Ming Jiang and Adams, Bram and Hassan, Ahmed E.} } @article {1252, title = {On the Maturity of Open Source BPM Systems}, journal = {BP Trends}, year = {2009}, note = {"Figure 1 shows a snapshot (from 3rd of July 2008) of the most downloaded workflow and BPM systems distributed through a code repository. The systems in the figure are ordered historically with respect to month and year of their registration. The code repositories searched for this snapshot were SourceForge, RubyForge, ObjectWebForge, Tigris.org, BountySource, BerliOS, JavaForge and GNU Savannah"}, month = {06/2009}, pages = {(11 pages)}, abstract = {Two major trends in the area of information systems development are the growing interest for Workflow and Busi- ness Process Management (BPM) systems and the increasing acceptance and spread of open source software. This development raises the following questions: 1. What are the prominent representatives of current open source Workflow and BPM systems? 2. What is the maturity level of these systems? Seeking answers to these questions we have performed a study. First an overview of the existing open source systems was made and three systems: jBPM, OpenWFE, and Enhydra Shark, selected for further analysis. Then a detailed analysis on the selected systems was performed. The results from this analysis were documented and the developers invited to comment. Based on the feedback from the developers the results were revisited and the study finalized [5]. In this article we summarise the study and reflect on the area of open source BPM systems.}, attachments = {https://flosshub.org/sites/flosshub.org/files/bpm.pdf}, author = {Wohed, P. and ter Hofstede, A.H.M. and Russell, N. and Andersson, B. and van der Aalst, W.M.P} } @conference {1812, title = {Measuring the performance of open source development communities: The QualOSS approach}, booktitle = {MetriKon 2009 - Praxis der Software-Messung : Tagungsband des DASMA Software Metrik Kongresses}, year = {2009}, abstract = {Free and Open Source Software (F/OSS) has an increasing importance for the software industry. Similar to traditional (closed) software acquisition, F/OSS acquisition requires an assessment of whether its quality is sufficient for the intended purpose, and of whether the chances of being maintained and supported in the future, as well as of keeping certain quality standards over time, are sufficiently high. The first one of these aspects is strictly product related, and can be assessed using techniques that are not specific to F/OSS. The last two aspects, however, are related to the community developing the software, and require novel approaches to be evaluated properly. In this paper, we present an approach toward a comprehensive measurement framework for F/OSS projects, developed in the EU project QualOSS. Although this approach takes into account product quality as well as process maturity and sustainability of the underlying F/OSS community, we concentrate here on its community-related aspects. After describing our quality model and assessment techniques in some detail, we close with a description of our ongoing evaluation effort and a discussion of lessons learned.}, url = {http://publica.fraunhofer.de/documents/N-117870.html}, author = {Soto, Martin and Izquierdo-Cort{\'a}zar, Daniel and Ciolkowski, Marcus} } @conference {942, title = {On mining data across software repositories}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {171 - 174}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Software repositories provide abundance of valuable information about open source projects. With the increase in the size of the data maintained by the repositories, automated extraction of such data from individual repositories, as well as of linked information across repositories, has become a necessity. In this paper we describe a framework that uses web scraping to automatically mine repositories and link information across repositories. We discuss two implementations of the framework. In the first implementation, we automatically identify and collect security problem reports from project repositories that deploy the Bugzilla bug tracker using related vulnerability information from the National Vulnerability Database. In the second, we collect security problem reports for projects that deploy the Launchpad bug tracker along with related vulnerability information from the National Vulnerability Database. We have evaluated our tool on various releases of Fedora, Ubuntu, Suse, RedHat, and Firefox projects. The percentage of security bugs identified using our tool is consistent with that reported by other researchers.}, keywords = {bug reports, bugzilla, Fedora, Firefox, htmlscraper, integration, launchpad, national vulnerability database, RedHat, Suse, tracker, Ubuntu}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069498}, attachments = {https://flosshub.org/sites/flosshub.org/files/171MiningAcrossmsr09.pdf}, author = {Anbalagan, Prasanth and Vouk, Mladen} } @proceedings {1645, title = {Mining Programming Language Vocabularies from Source Code}, year = {2009}, pages = {12 pp}, abstract = {We can learn much from the artifacts produced as the by-products of software devel- opment and stored in software repositories. Of all such potential data sources, one of the most important from the perspective of program comprehension is the source code itself. While other data sources give insight into what developers intend a program to do, the source code is the most accurate human-accessible description of what it will do. However, the ability of an individual developer to comprehend a particular source file depends directly on his or her familiarity with the specific features of the programming language being used in the file. This is not unlike the difficulties second-language learners may encounter when attempting to read a text written in a new language. We propose that by applying the techniques used by corpus linguists in the study of natural language texts to a corpus of programming language texts (i.e., source code repositories), we can gain new insights into the communication medium that is programming language. In this paper we lay the foundation for applying corpus linguistic methods to programming language by 1) defining the term {\textquotedblleft}word{\textquotedblright} for programming language, 2) developing data collection tools and a data storage schema for the Java programming language, and 3) presenting an initial analysis of an example linguistic corpus based on version 1.5 of the Java Developers Kit.}, attachments = {https://flosshub.org/sites/flosshub.org/files/21st-delorey.pdf}, author = {Delorey, Daniel P. and Knutson, Charles D. and Davies, Mark} } @conference {935, title = {Mining search topics from a code search engine usage log}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {111 - 120}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {We present a topic modeling analysis of a year long usage log of Koders, one of the major commercial code search engines. This analysis contributes to the understanding of what users of code search engines are looking for. Observations on the prevalence of these topics among the users, and on how search and download activities vary across topics, leads to the conclusion that users who find code search engines usable are those who already know to a high level of specificity what to look for. This paper presents a general categorization of these topics that provides insights on the different ways code search engine users express their queries. The findings support the conclusion that existing code search engines provide only a subset of the various information needs of the users when compared to the categories of queries they look at.}, keywords = {analysis, black duck, koders, log, logfile, search, source code}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069489}, author = {Bajracharya, Sushil and Lopes, Cristina} } @conference {928, title = {Mining source code to automatically split identifiers for software analysis}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {71 - 80}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Automated software engineering tools (e.g., program search, concern location, code reuse, quality assessment, etc.) increasingly rely on natural language information from comments and identifiers in code. The first step in analyzing words from identifiers requires splitting identifiers into their constituent words. Unlike natural languages, where space and punctuation are used to delineate words, identifiers cannot contain spaces. One common way to split identifiers is to follow programming language naming conventions. For example, Java programmers often use camel case, where words are delineated by uppercase letters or non-alphabetic characters. However, programmers also create identifiers by concatenating sequences of words together with no discernible delineation, which poses challenges to automatic identifier splitting. In this paper, we present an algorithm to automatically split identifiers into sequences of words by mining word frequencies in source code. With these word frequencies, our identifier splitter uses a scoring technique to automatically select the most appropriate partitioning for an identifier. In an evaluation of over 8000 identifiers from open source Java programs, our Samurai approach outperforms the existing state of the art techniques.}, keywords = {java, samurai, sourceforge}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069482}, attachments = {https://flosshub.org/sites/flosshub.org/files/71EnslenandHillandPollockandVijayShanker.pdf}, author = {Enslen, Eric and Hill, Emily and Pollock, Lori and Vijay-Shanker, K.} } @conference {932, title = {Mining the coherence of GNOME bug reports with statistical topic models}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {99 - 102}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {We adapt latent Dirichlet allocation to the problem of mining bug reports in order to define a new information-theoretic measure of coherence. We then apply our technique to a snapshot of the GNOME Bugzilla database consisting of 431,863 bug reports for multiple software projects. In addition to providing an unsupervised means for modeling report content, our results indicate substantial promise in applying statistical text mining algorithms for estimating bug report quality. Complete results are available from our supplementary materials Web site at http://sourcerer.ics.uci.edu/msr2009/gnome_coherence.html.}, keywords = {bug reports, bugzilla, gnome, msr challenge, quality, sourcerer}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069486}, author = {Linstead, Erik and Baldi, Pierre} } @article {1118, title = {Monetary donations to an open source software platform}, journal = {Research Policy}, volume = {38}, year = {2009}, month = {03/2009}, pages = {404 - 414}, abstract = {Online open source software platforms, such as Sourceforge.net, play a vital role in creating an ecosystem that enables the creation and growth of open source projects. However, there is little research exploring the interactions between open source stakeholders and the platform. We believe that the sustainability of the platform crucially depends on financial incentives. While platforms can obtain these incentives through multiple means, in this paper we focus on one form of financial incentives{\textemdash}voluntary monetary donations by open source community members. We report findings from two empirical studies that examine factors that impact donations. Study 1 investigates the factors that cause some community members to donate and not others. We find that the decision to donate is impacted by relational commitment with open source software platform, donation to projects and accepting donations from others. Study 2 examines what drives the level of donation. We find that the length of association with the platform and relational commitment affects donation levels.}, keywords = {Collective action, Donation, Identification, incentives, metadata, MOTIVATION, Open source software platform, projects, Reciprocity, Relational commitment, sourceforge}, issn = {00487333}, doi = {10.1016/j.respol.2008.11.004}, author = {Sandeep Krishnamurthy and Tripathi, Arvind K.} } @article {1351, title = {Motivation of Open Source Developers}, journal = {International Journal of Open Source Software and Processes}, volume = {1}, year = {2009}, pages = {65 - 81}, abstract = {The motivational drivers of open source software developers have been researched by various investigators since about 2000. This work shows that developers are motivated by different extrinsic and intrinsic drivers, among them community aspirations, reciprocity and fairness, creative impulses, and monetary and career ambitions. There has been some work done in studying whether the profile of developer motivations is constant across open source projects or is sensitive to project organizational design. Among the many factors that could influence the mix of motives of OS developers is the license under which the work is performed. Licenses range in openness between those such as the GNU GPL that severely restrict the freedom of developers to mingle their OS code with proprietary code to those such as BSD licenses which allow programmers much greater latitude in integrating open source code with proprietary code. In addition to formal rules, meritocracies emerge to reward effort and performance, and also to direct, coordinate, and control other participants. The authors discuss these variables and how they may be related to motivations.}, issn = {1942-3934}, doi = {10.4018/jossp.2009100104}, author = {Allyn, Mark R. and Misra, Ram B.} } @conference {1263, title = {Multiple Social Networks Analysis of FLOSS Projects using Sargas}, booktitle = {2009 42nd Hawaii International Conference on System Sciences (HICSS 2009)}, year = {2009}, note = {"The interval of analysis for the first three networks was from 2003/02/24 to 2003/11/03. These networks were created using the OSSNetwork tool [18]. The source code of the PMD project was collected starting on 2002/06/24..." "Transflow performs a co-changes analysis of the source code modification history and creates a matrix where software components are connected by taking into account the frequency that they have been changed together (i.e., in the same check-in): if two files have been changed together in the same check-in, an edge is created to link these two files."}, pages = {1 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Waikoloa, Hawaii, USA}, abstract = {Due to their characteristics and claimed advantages, several researchers have been investigating free and open-source projects. Different aspects are being studied: for instance, what motivates developers to join FLOSS projects, the tools, processes and practices used in FLOSS projects, the evolution of FLOSS communities among other things. Researchers have studied collaboration and coordination of open source software developers using an approach known as social network analysis and have gained important insights about these projects. Most researchers, however, have not focused on the integrated study of these networks and, accordingly, in their interrelationships. This paper describes an approach and tool to combine multiple social networks to study the evolution of open-source projects. Our tool, named Sargas, allows comparison and visualization of different social networks at the same time. Initial results of our analysis can be used to extend the "onion-model" of open source participation.}, keywords = {case study, multiple social networks, ossnetwork, pmd, social network analysis, transflow}, isbn = {978-0-7695-3450-3}, doi = {10.1109/HICSS.2009.316}, attachments = {https://flosshub.org/sites/flosshub.org/files/07-07-06.pdf}, author = {de Sousa, S.F. and Balieiro, M.A. and dos R. Costa, J.M. and de Souza, C.R.B.} } @mastersthesis {485, title = {Managing Firm-Sponsored Open Source Communities}, year = {2008}, month = {May}, abstract = {The interest and use of open source software and methodology has gained an increasing amount of commercial attention, and we are currently witnessing that established proprietary software firms are taking a step further by opening their own software projects in an attempt to create firm-sponsored open source communities. Siobhan O{\textquoteright}Mahony{\textquoteright}s research finds that these firms have to handle a tension between openness and control in their product development, but little research has been done to detail how this balance is achieved. I have studied the American software company Novell and the openSUSE project, largely guided by an inductive, qualitative approach supplemented by some quantitative methods. In the study I draw upon Niklas Luhmann{\textquoteright}s theory of autopoietic social systems to create a distinction between the sponsor firm and the sponsored community, and I investigate the mechanisms that hold the two systems together despite their differences in interests. I argue that there are several elements that ensure a tight coupling between the two systems, including the boundary objects situated between them, the shared communication channels and the efforts of the marginal people whom have roles in both systems. A primary contribution to the theory of boundary objects is a distinction between what I describe as supportive-objects and target-objects. I argue that the latter holds a strong motivating power that should be appended to our understanding of individuals{\textquoteright} and collectives{\textquoteright} motivation to participate in open source software projects. I explore several possible future scenarios for the evolution of firm-sponsored communities, and find that Novell is pursuing a strategy for managing openness in such a community.}, author = {Jan Fredrik Stoveland} } @conference {530, title = {Mapping Linux Security Targets to Existing Test Suites}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {29 - 45}, publisher = {Springer}, organization = {Springer}, chapter = {3}, abstract = {The Common Criteria standard provides an infrastructure for evaluating security functions of IT products and for certifying that security policies claimed by product suppliers are correctly enforced by the security functions themselves. Certifying Open Source software (OSS) can pave the way to OSS adoption in a number of security-conscious application environments. Recent experiences in certifying Linux distributions has pointed out the problem of finding a mapping between descriptions of OSS security functions and existingtest suites developed independently, such as the Linux Test Project. In this paper, we describe a mechanism, based on matching techniques, which semiautomatically associates security functions to existing test suite such as the ones developed by Open Source communities. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_3}, attachments = {https://flosshub.org/sites/flosshub.org/files/Mapping\%20Linux\%20Security\%20Targets.pdf}, author = {Ardagna, C. and Damiani, E. and Ioini, N. and Frati, F. and Giovannini, P. and Tchokpon, R.} } @article {499, title = {The Material and Social Dynamics of Motivation: Contributions to Open Source Language Technology Development}, journal = {Science Studies}, number = {2}, year = {2008}, note = {"I conducted a two-phased qualitative analysis on multiple data (thematic phone-inter- views, notes from OpenOffice.org conference 2004 meetings, historical documents, mailing list discussions, homepages, Google, blogs, real-time video and audio presentations from OpenOffice.org conference 2005)."}, abstract = {Volunteer motivation has been a central theme in Free/Libre/Open Source Software (FLOSS) literature. This research has been largely dominated by economists who rely in their surveys on the distinction between intrinsic and extrinsic motivations and the "hacker ethic" for profit juxtaposition. The paper argues that survey-based analytical frameworks and research designs have led to a focus on some motivational attributions at the expense of others. It then presents a case study that explores dynamic, non individualistic and content-sensitive aspects of motivations. The approach is based on socio-cultural psychology and the author{\textquoteright}s observations of a hybrid firm-community FLOSS project, OpenOffice.org. Instead of separating intrinsic motivations from extrinsic ones, it is argued that complex and changing patterns of motivations are tied to changing objects and personal histories prior to and during participation. The boundary between work and hobby in an individual{\textquoteright}s participation path is blurred and shifting.}, keywords = {contributions, developers, email, email archives, mailing list, MOTIVATION, openoffice, openoffice.org, secondary data, Volunteers}, attachments = {https://flosshub.org/sites/flosshub.org/files/Freeman.pdf}, author = {Stephanie Freeman} } @conference {545, title = {Migration Discourse Structures: Escaping Microsoft{\textquoteright}s Desktop Path}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {223 - 235}, publisher = {Springer}, organization = {Springer}, chapter = {18}, abstract = {Most studies of FOSS organizational migration projects focus solely on technological and economical aspects, neglecting the importance of organizational discourse structures for migration decisions as well as success. In looking at the case of the municipality of Munich this paper uses structuration theory in combination with discourse analysis to explain why and how in this case actors were able to overcome strong barriers to migration in the field of desktop software. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_18}, attachments = {https://flosshub.org/sites/flosshub.org/files/Migration\%20Discourse\%20Structure.pdf}, author = {Dobusch, Leonhard} } @conference {536, title = {Mining for Practices in Community Collections: Finds From Simple Wikipedia}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {105 - 120}, publisher = {Springer}, organization = {Springer}, chapter = {9}, abstract = {The challenges of commons based peer production are usually associated with the development of complex software projects such as Linux and Apache. But the case of open content production should not be treated as a trivial one. For instance, while the task of maintaining a collection of encyclopedic articles might seem negligible compared to the one of keeping together a software system with its many modules and interdependencies, it still poses quite demanding problems. In this paper, we describe the methods and practices adopted by Simple Wikipedia to keep its articles easy to read. Based on measurements of article readability and similarity, we conclude that while the mechanisms adopted by the community had some effect, in the long run more efforts and new practices might be necessary in order to maintain an acceptable level of readability in the Simple Wikipedia collection. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_9}, attachments = {https://flosshub.org/sites/flosshub.org/files/Mining\%20for\%20Practices.pdf}, author = {den Besten, Matthijs and Rossi, Alessandro and Gaio, Loris and Loubser, Max and Jean-Michel Dalle} } @conference {Schuler:2008:MUE:1370750.1370779, title = {Mining usage expertise from version archives}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {121{\textendash}124}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In software development, there is an increasing need to find and connect developers with relevant expertise. Existing expertise recommendation systems are mostly based on variations of the Line 10 Rule: developers who changed a file most often have the most implementation expertise. In this paper, we introduce the concept of usage expertise, which manifests itself whenever developers are using functionality, e.g., by calling API methods. We present preliminary results for the ECLIPSE project that demonstrate that our technique allows to recommend experts for files with no or little history, identify developers with similar expertise, and measure the usage of API methods.}, keywords = {api, computer-supported cooperative work, eclipse, expertise, recommendation, scm, software repository, source code}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370779}, url = {http://doi.acm.org/10.1145/1370750.1370779}, attachments = {https://flosshub.org/sites/flosshub.org/files/p121-schuler.pdf}, author = {Schuler, David and Zimmermann, Thomas} } @article {867, title = {Mobilization of Software Developers: The Free Software Movement}, journal = {(revised version in) Information, Technology and People}, volume = {20}, year = {2008}, pages = {4-33}, author = {Elliott, M. and Walt Scacchi} } @conference {flosswp402, title = {The motivational arc of massive virtual collaboration}, booktitle = {IFIP WG 9.5, International Working Conference on Virtuality and Society: Massive Virtual Communities}, year = {2008}, month = {February}, pages = {1{\textendash}13}, abstract = {Massive virtual collaborations (MVC) involve large numbers of mostly unpaid contributors collectively creating new content. Wikipedia is the most dramatic example of MVC; smaller-scale examples include contributors to blogs and discussion groups and teams of programmers and users developing free/libre open source software (FLOSS). In this paper, we propose a model of motivations for contribution to MVC that integrates various theoretical perspectives. The model extends prior work by distinguishing three different levels of contribution to projects (initial, sustained and meta) and by capturing the dynamic and recursive effects of contributions on emergent individual and project states.}, attachments = {https://flosshub.org/sites/flosshub.org/files/crowston_fagnot_ifip.pdf}, author = {Kevin Crowston and Isabelle Fagnot} } @inbook {868, title = {Movement Ideology vs. User Pragmatism in the Organizational Adoption of Open Source Software}, booktitle = {Computerization Movements and Technology Diffusion: From Mainframes to Ubiquitous Computing}, year = {2008}, pages = {427-452}, publisher = {Information Today}, organization = {Information Today}, address = {Medford, NJ}, author = {Dedrick, Jason and Joel West}, editor = {Kraemer, Kenneth L. and Elliott, Margaret} } @mastersthesis {482, title = {Managing the Bazaar: Commercialization and peripheral participation in mature, community-led Free/Open source software projects}, year = {2007}, month = {Nov}, abstract = {The thesis investigates two fundamental dynamics of participation and collaboration in mature, community-led Free/Open Source (F/OS) software projects - commercialization and peripheral participation. The two primary case studies of the research are the GNOME and KDE communities.The thesis contributes insights into how the gift economy is embedded in the exchange economy and the role of peripheral contributors. The analysis indicates that community-integrated paid developers have a key role in project development, maintaining the infrastructure aspects of the code base. The analysis suggests that programming and non-programming contributors are distinct in their make-up, priorities and rhythms of participation, and that learning plays an important role in controlling access. The results show that volunteers are important drivers of peripheral activities, such as translation and documentation. The term "autonomous peripherality"" is used to capture the unique characteristics of these activities. These findings support the argument that centrality and peripherality are associated with the division of labour, which, in turn, is associated with employment relations and frameworks of institutional support. The thesis shows how the tensions produced by commercialization and peripheral participation are interwoven with values of meritocracy, ritual and strategic enactment of the idea of community as well as with tools and techniques developed to address the emergence of a set of problems specific to management and governance. These are characterized as "technologies of communities."}, attachments = {https://flosshub.org/sites/flosshub.org/files/PhD_Berdou.pdf}, author = {Evangelia Berdou} } @article {Wang:2007:MEO:1317471.1317479, title = {Measuring the evolution of open source software systems with their communities}, journal = {SIGSOFT Softw. Eng. Notes}, volume = {32}, year = {2007}, note = {"1. M1: The number of modules in software system at a series of specific moment. 2. CD: The number of developers (code contributors) at a series of specific moment. 3. MC: The correlation of M1 and CD." "We get the information of Modules from the Launchpad[1] which is a distributed collaborative infrastructure for Ubuntu and other open source software development. Ubuntu mainly grows though a manner of adding new packages. So, we treat packages as the modules we mention before. In Ubuntu community, some members are in charge of auditing the packages and add them to Ubuntu, here developers are refers them."}, month = {November}, publisher = {ACM}, address = {New York, NY, USA}, abstract = {Open Source Software (OSS) has become the subject of much commercial and academic interest in last few years. As traditional software, OSS also evolves to fulfill the need of stakeholders. Therefore, providing quantitative metrics for OSS evolution has also become an urgent issue. However, most existing studies of software evolution have been performed on systems developed within a single company using traditional management techniques. These metrics models are inadequate to measure OSS evolution. In this paper, we describe a preliminary evolution metrics set for evaluating OSS. The most significant novelty of this model is that it takes specific properties of Open Source Community (OSC) into consideration. In another word, we measure the evolution of OSS and OSC together. We also provide a lightweight case study on Ubuntu project using this metrics set. We find out that the Open Source Community and its members also play essential role in OSS evolution. We expect this metrics model can bring better understandings and explanations of phenomena in open source development and evolution.}, keywords = {evolution, launchpad, metrics, open source community, open source software, Ubuntu}, issn = {0163-5948}, doi = {http://doi.acm.org/10.1145/1317471.1317479}, url = {http://doi.acm.org/10.1145/1317471.1317479}, author = {Yi Wang and Defeng Guo and Shi, Huihui} } @article {125, title = {Membership herding and network stability in the open source community: The Ising perspective}, journal = {Management Science}, volume = {53}, number = {7}, year = {2007}, month = {Jul}, pages = {1086-1101}, abstract = {The aim of this paper is twofold: (1) to conceptually understand membership dynamics in the open source software (OSS) community, and (2) to explore how different network characteristics (i.e., network size and connectivity) influence the stability of an OSS network. Through the lens of Ising theory, which is widely accepted in physics, we investigate basic patterns of interaction and present fresh conceptual insight into dynamic and reciprocal relations among OSS community members. We also perform computer simulations based on empirical data collected from two actual OSS communities. Key findings include: (1) membership herding is highly present when external influences (e.g., the availability of other OSS projects) are weak, but decreases significantly when external influences increase, (2) propensity for membership herding is most likely to be seen in a large network with random connectivity, and (3) for large networks, when external influences are weak, random connectivity will result in higher network strength than scale-free connectivity (as external influences increase, however, the reverse phenomenon is observed). In addition, scale-free connectivity appears to be less volatile than random connectivity in response to an increase in the strength of external influences. We conclude with several implications that may be of significance to OSS stakeholders in particular, and to a broader range of online communities in general.}, keywords = {BEHAVIOR, DYNAMICS, ECONOMICS, INNOVATION, INVESTMENT, Ising theory, membership herding, MODEL, MOTIVATION, network connectivity, network stability, open source, PHASE-TRANSITION, UNCERTAINTY}, isbn = {0025-1909}, author = {Oh, Wonseok and Jeon, Sangyong} } @article {flosswp331, title = {Micro-dynamics of Free and Open Source Software Development. Lurking, laboring and launching new projects on SourceForge}, year = {2007}, month = {January}, abstract = {Quantitative methods are employed to describe two fundamental processes in the creation of free (libre) and open source software (FLOSS) that are at work in the collaborative development environment of the SourceForge.Net platform: resource mobilization and "entrepreneurial initiatives" which generate new development projects. The micro dynamics of the individuals{\textquoteright} involvements in these processes are analysed by defining "activity states" that correspond to "lurking" (not contributing or contributing to projects without become a member), "laboring" (joining one or more projects as members), and "launching" (founding one or more projects). The transition probability matrices constructed from observations on the activities of 222,835 individuals who registered on SF.net (during a 14-month period, mainly in 2001) characterize first-order Markov chains describing processes that are ergodic. The computation of the limiting "equilibrium" distribution of individual joining and launching activities is not used here to produce long-run predictions, because the time window of the available data is too short. Instead, it is conceived as an instrument to isolate the main forces acting in the underlying entrepreneurial and recruitment dynamics at work on the platform "shaking off" the weaker tendencies. It is shown that, although only a small proportion of the considered cohorts of SF.net registrants become even minimally active, the active "core" of project members and project founders is able to attract an increasing number of developers. SourceForge is seen to be more than an attractor of projects that are being "born again" under open source licenses: this virtual collaborative development environment shares the regenerative properties of tangible "industrial districts" that give rise to new, innovative enterprises. Implications for the exploitation and exploitation processes at work in the FLOSS model and about its sustainability are also derived.}, attachments = {https://flosshub.org/sites/flosshub.org/files/davidrullani.pdf}, author = {Paul A. David and Francesco Rullani} } @conference {997, title = {Mining CVS Repositories to Understand Open-Source Project Developer Roles}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {8 - 8}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {This paper presents a model to represent the interactions of distributed open-source software developers and utilizes data mining techniques to derive developer roles. The model is then applied on case studies of two open-source projects, ORAC-DR and Mediawiki with encouraging results.}, keywords = {cvs, developer interaction, developers, mediawiki, orac-dr, roles, scm, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.19}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300008.pdf}, author = {Yu, Liguo and Ramaswamy, Srini} } @conference {1015, title = {Mining Eclipse Developer Contributions via Author-Topic Models}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {30 - 30}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {We present the results of applying statistical author-topic models to a subset of the Eclipse 3.0 source code consisting of 2,119 source files and 700,000 lines of code from 59 developers. This technique provides an intuitive and automated framework with which to mine developer contributions and competencies from a given code base while simultaneously extracting software function in the form of topics. In addition to serving as a convenient summary for program function and developer activities, our study shows that topic models provide a meaningful, effective, and statistical basis for developer similarity analysis.}, keywords = {contributions, developers, eclipse, expertise, mining challenge, msr challenge, source code, topics}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.20}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300030.pdf}, author = {Linstead, Erik and Rigor, Paul and Bajracharya, Sushil and Lopes, Cristina and Baldi, Pierre} } @conference {999, title = {Mining Software Repositories with iSPAROL and a Software Evolution Ontology}, booktitle = {Fourth International Workshop on Mining Software RepositoriesFourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {10 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {One of the most important decisions researchers face when analyzing the evolution of software systems is the choice of a proper data analysis/exchange format. Most existing formats have to be processed with special programs written specifically for that purpose and are not easily extendible. Most scientists, therefore, use their own database(s) requiring each of them to repeat the work of writing the import/export programs to their format. We present EvoOnt, a software repository data exchange format based on the Web Ontology Language (OWL). EvoOnt includes software, release, and bug-related information. Since OWL describes the semantics of the data, EvoOnt is (1) easily extendible, (2) comes with many existing tools, and (3) allows to derive assertions through its inherent Description Logic reasoning capabilities. The paper also shows iSPARQL -- our SPARQL-based Semantic Web query engine containing similarity joins. Together with EvoOnt, iSPARQL can accomplish a sizable number of tasks sought in software repository mining projects, such as an assessment of the amount of change between versions or the detection of bad code smells. To illustrate the usefulness of EvoOnt (and iSPARQL), we perform a series of experiments with a real-world Java project. These show that a number of software analyses can be reduced to simple iSPARQL queries on an EvoOnt dataset.}, keywords = {database, eclipse, evoont, java, owl, semantic, sparql}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.21}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300010.pdf}, author = {Kiefer, Christoph and Bernstein, Abraham and Tappolet, Jonas} } @conference {1000, title = {Mining Workspace Updates in CVS}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {11 - 11}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {The version control archive CVS records not only all changes in a project but also activity data such as when developers create or update their workspaces. Furthermore, CVS records when it has to integrate changes because of parallel development. In this paper, we analyze the CVS activity data of four large open-source projects GCC, JBOSS, JEDIT, and PYTHON to investigate parallel development: What is the degree of parallel development? How frequently do conflicts occur during updates and how are they resolved? How do we identify changes that contain integrations?}, keywords = {change management, cvs, developers, gcc, jboss, jedit, python, workspaces}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.22}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300011.pdf}, author = {Zimmermann, Thomas} } @article {flosswp1604, title = {Motivating job design as a factor in open source governance}, journal = {J Manage Governance}, volume = {11}, number = {2}, year = {2007}, month = {May}, pages = {129{\textendash}137}, publisher = {Springer Science+Business Media}, abstract = {Successful open source software (OSS) projects can be considered as examples of how ambitious work results can be achieved in web-based collaboration even when monetary incentives are low or absent. While former explorations of motivational processes in OSS projects primarily focused on person-oriented aspects such as motives, interests, and expected benefits of contributors, job-related factors have been largely neglected. After discussing the limitations of a personoriented focus, a research perspective is suggested that concentrates on job design and work context in OSS based on frameworks from work psychology. A theoretical analysis is presented discussing job characteristics of successful OSS projects as potential explanations of the high motivation of OSS contributors. Compared to a person-oriented perspective, the results of a job-oriented approach might be better transferable to other projects (both OSS and ??closed source?? software development) and provide guidelines for the governance of successful web-based collaboration.}, attachments = {https://flosshub.org/sites/flosshub.org/files/fulltext.pdf}, author = {Guido Hertel} } @article {Yu:2006:MKO:1150566.1150571, title = {Maintainability of the kernels of open-source operating systems: A comparison of Linux with FreeBSD, NetBSD, and OpenBSD}, journal = {J. Syst. Softw.}, volume = {79}, year = {2006}, note = {"Data regarding the number and total number of lines of code of kernel and nonkernel modules in the four operating systems are provided in Table 1" loc, kloc, number of kernel modules, number of nonkernel modules size c files .h files}, month = {June}, pages = {807{\textendash}815}, publisher = {Elsevier Science Inc.}, address = {New York, NY, USA}, abstract = {We compared and contrasted the maintainability of four open-source operating systems: Linux, FreeBSD, NetBSD, and OpenBSD. We used our categorization of common coupling in kernel-based software to highlight future maintenance problems. An unsafe definition is a definition of a global variable that can affect a kernel module if that definition is changed. For each operating system we determined a number of measures, including the number of global variables, the number of instances of global variables in the kernel and overall, as well as the number of unsafe definitions in the kernel and overall. We also computed the value of each our measures per kernel KLOC and per KLOC overall. For every measure and every ratio, Linux compared unfavorably with FreeBSD, NetBSD, and OpenBSD. Accordingly, we are concerned about the future maintainability of Linux. }, keywords = {abiword, Common coupling, coupling, Definition-use analysis, freebsd, kernel, lines of code, linux, linux kernel, loc, Maintainability, modules, netbsd, Open-source software, openbsd, source code}, issn = {0164-1212}, doi = {http://dx.doi.org/10.1016/j.jss.2005.08.014}, url = {http://dx.doi.org/10.1016/j.jss.2005.08.014}, attachments = {https://flosshub.org/sites/flosshub.org/files/YuSchachChen.pdf}, author = {Yu, Liguo and Schach, Stephen R. and Chen, Kai and Heller, Gillian Z. and Offutt, Jeff} } @article {flosswp374, title = {A man on the inside: Unlocking communities as complementary assets}, year = {2006}, month = {October}, abstract = {Since Teece{\textquoteright}s seminal paper explaining who were the gainers from technological innovation, increased globalization and the information and communication technology revolution have brought newways for firms to organize and appropriate from innovation. A new more open model of innovation suggests that firms can benefit from sources of innovation that stem from outside the firm. The central theme of this paper is how firms try to unlock communities as complementary assets. These communities exist outside firm boundaries beyond ownership or hierarchical control. Because of practices developed by communities to protect their work, firms need to assign individuals to work in these communities in order to gain access to developments and, to an extent, influence the direction of the community. Using network analysis we show that some software firms sponsor individuals to act strategically within a free and open source software (FOSS) community. Firm sponsored individuals interact with more individuals than interact with them, and also they seek to interact with central individuals in the community. However, we can see differences in how individuals interact, depending on whether their affiliation is with a dedicated FOSS firm or an incumbent in the software industry. Apparently, some firm managers believe they need ???a man on the inside??? to be able to gain access to communities. PLEASE EMAIL IF YOU LIKE A COPY OF THE PAPER!}, attachments = {https://flosshub.org/sites/flosshub.org/files/dahlanderwallin.pdf}, author = {Linus Dahlander and Martin W. Wallin} } @conference {Xie:2006:MMA:1137983.1137997, title = {MAPO: mining API usages from open source repositories}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {54{\textendash}57}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {To improve software productivity, when constructing new software systems, developers often reuse existing class libraries or frameworks by invoking their APIs. Those APIs, however, are often complex and not well documented, posing barriers for developers to use them in new client code. To get familiar with how those APIs are used, developers may search the Web using a general search engine to find relevant documents or code examples. Developers can also use a source code search engine to search open source repositories for source files that use the same APIs. Nevertheless, the number of returned source files is often large. It is difficult for developers to learn API usages from a large number of returned results. In order to help developers understand API usages and write API client code more effectively, we have developed an API usage mining framework and its supporting tool called MAPO (for Mining API usages from Open source repositories). Given a query that describes a method, class, or package for an API, MAPO leverages the existing source code search engines to gather relevant source files and conducts data mining. The mining leads to a short list of frequent API usages for developers to inspect. MAPO currently consists of five components: a code search engine, a source code analyzer, a sequence preprocessor, a frequent sequence miner, and a frequent sequence post processor. We have examined the effectiveness of MAPO using a set of various queries. The preliminary results show that the framework is practical for providing informative and succinct API usage patterns.}, keywords = {api, application programming interfaces, documentation, mining software repositories, pmd, program comprehension, search engine, sequences, source code, source code search engine}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137997}, url = {http://doi.acm.org/10.1145/1137983.1137997}, attachments = {https://flosshub.org/sites/flosshub.org/files/54MAPO.pdf}, author = {Xie, Tao and Pei, Jian} } @book {52, title = {Math You Can{\textquoteright}t Use: Patents, Copyright, and Software}, year = {2006}, publisher = {Brookings Institution Press}, organization = {Brookings Institution Press}, author = {Klemens, B.} } @conference {Kim:2006:MPE:1137983.1137995, title = {Micro pattern evolution}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {40{\textendash}46}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {When analyzing the evolution history of a software project, we wish to develop results that generalize across projects. One approach is to analyze design patterns, permitting characteristics of the evolution to be associated with patterns, instead of source code. Traditional design patterns are generally not amenable to reliable automatic extraction from source code, yet automation is crucial for scalable evolution analysis. Instead, we analyze {\textquotedblleft}micro pattern{\textquotedblright} evolution; patterns whose abstraction level is closer to source code, and designed to be automatically extractable from Java source code or bytecode. We perform micro-pattern evolution analysis on three open source projects, ArgoUML, Columba, and jEdit to identify micro pattern frequencies, common kinds of pattern evolution, and bug-prone patterns. In all analyzed projects, we found that the micro patterns of Java classes do not change often. Common bug- prone pattern evolution kinds are {\textquoteleft}Pool {\textrightarrow} Pool{\textquoteright}, {\textquoteleft}Implementor {\textrightarrow} NONE{\textquoteright}, and {\textquoteleft}Sampler {\textrightarrow} Sampler{\textquoteright}. Among all pattern evolution kinds,{\textquoteleft}Box{\textquoteright},{\textquoteleft}CompoundBox{\textquoteright}, {\textquoteleft}Pool{\textquoteright}, {\textquoteleft}CommonState{\textquoteright}, and {\textquoteleft}Outline{\textquoteright} micro patterns have high bug rates, but they have low frequencies and a small number of changes. The pattern evolution kinds that are bug-prone are somewhat similar across projects. The bug-prone pattern evolution kinds of two different periods of the same project are almost identical.}, keywords = {argouml, bugs, columba, design patterns, evolution, extraction, java, jedit, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137995}, url = {http://doi.acm.org/10.1145/1137983.1137995}, attachments = {https://flosshub.org/sites/flosshub.org/files/40MicroPattern.pdf}, author = {Kim, Sunghun and Pan, Kai and Whitehead,Jr., E. James} } @conference {711, title = {The micro-dynamics of open source software development activity}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {203/2006}, year = {2006}, month = {2006///}, pages = {339 - 340}, publisher = {Springer}, organization = {Springer}, abstract = {This study aims to isolate and identify the properties of FLOSS development insofar as these can be revealed by examining the ecology of SF.net. It characterizes the contrast between the many {\textquotedblleft}lurkers{\textquotedblright} and a much smaller core of {\textquotedblleft}entrepreneurial{\textquotedblright} developers who are responsible for launching new projects, and gives an interpretation of the function of platforms such as SF.net as sites that people with a propensity to start open source projects can use to recruit {\textquotedblleft}laborers{\textquotedblright}. It describes the process underpinning the mobility of those who are recruited among the projects that are launched and provides insights on the evolution of developers{\textquoteright} level and mode of involvement in FLOSS production. }, issn = {978-0-387-34225-2}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_36}, attachments = {https://flosshub.org/sites/flosshub.org/files/The\%20micro-dynamics\%20of\%20open\%20source.pdf}, author = {David, Paul and Francesco Rullani} } @conference {Zimmermann:2006:MAM:1137983.1138025, title = {Mining additions of method calls in ArgoUML}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {169{\textendash}170}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper we refine the classical co-change to the addition of method calls. We use this concept to find usage patterns and to identify cross-cutting concerns for ArgoUML.}, keywords = {argouml, change analysis, eclipse, function calls, mining challenge, msr challenge, pattern, source code, xelopes}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138025}, url = {http://doi.acm.org/10.1145/1137983.1138025}, attachments = {https://flosshub.org/sites/flosshub.org/files/169MiningAdditions.pdf}, author = {Zimmermann, Thomas and Breu, Silvia and Lindig, Christian and Livshits, Benjamin} } @conference {1228, title = {Mining CVS Signals}, booktitle = {1st Workshop on Public Data about Software Development (WoPDaSD 2006)}, year = {2006}, pages = {10-19}, author = {Jean-Michel Dalle and L. Daudet and den Besten, Matthijs} } @conference {Breu:2006:MEC:1137983.1138006, title = {Mining eclipse for cross-cutting concerns}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {94{\textendash}97}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software may contain functionality that does not align with its architecture. Such cross-cutting concerns do not exist from the beginning but emerge over time. By analysing where developers add code to a program, our history-based mining identifies cross-cutting concerns in a two-step process. First, we mine CVS archives for sets of methods where a call to a specific single method was added. In a second step, such simple cross-cutting concerns are combined to complex cross-cutting concerns. To compute these efficiently, we apply formal concept analysis{\textemdash}an algebraic theory. History-based mining scales well: we are the first to report aspects mined from an industrial-sized project like Eclipse. For example, we identified a locking concern that crosscuts 1284 methods.}, keywords = {aspects, concept analysis, cvs, eclipse, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138006}, url = {http://doi.acm.org/10.1145/1137983.1138006}, attachments = {https://flosshub.org/sites/flosshub.org/files/94MiningEclipse.pdf}, author = {Breu, Silvia and Zimmermann, Thomas and Lindig, Christian} } @conference {Bird:2006:MES:1137983.1138016, title = {Mining email social networks}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {137{\textendash}143}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Communication \& Co-ordination activities are central to large software projects, but are difficult to observe and study in traditional (closed-source, commercial) settings because of the prevalence of informal, direct communication modes. OSS projects, on the other hand, use the internet as the communication medium,and typically conduct discussions in an open, public manner. As a result, the email archives of OSS projects provide a useful trace of the communication and co-ordination activities of the participants. However, there are various challenges that must be addressed before this data can be effectively mined. Once this is done, we can construct social networks of email correspondents, and begin to address some interesting questions. These include questions relating to participation in the email; the social status of different types of OSS participants; the relationship of email activity and commit activity (in the CVS repositories) and the relationship of social status with commit activity. In this paper, we begin with a discussion of our infrastructure (including a novel use of Scientific Workflow software) and then discuss our approach to mining the email archives; and finally we present some preliminary results from our data analysis.}, keywords = {communication, contributions, developers, email, email archives, mailing lists, open source, social networks}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138016}, url = {http://doi.acm.org/10.1145/1137983.1138016}, attachments = {https://flosshub.org/sites/flosshub.org/files/137MiningEmail.pdf}, author = {Christian Bird and Gourley, Alex and Devanbu, Prem and Gertz, Michael and Swaminathan, Anand} } @conference {Bird:2006:MES:1137983.1138033, title = {Mining email social networks in Postgres}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {185{\textendash}186}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Open Source Software (OSS) projects provide a unique opportunity to gather and analyze publicly available historical data. The Postgres SQL server, for example, has over seven years of recorded development and communication activity. We mined data from both the source code repository and the mailing list archives to examine the relationship between communication and development in Postgres. Along the way, we had to deal with the difficult challenge of resolving email aliases. We used a number of social network analysis measures and statistical techniques to analyze this data. We present our findings in this paper.}, keywords = {developers, email, email archives, open source, postgresql, scm, social network analysis, social networks, source code, status}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138033}, url = {http://doi.acm.org/10.1145/1137983.1138033}, attachments = {https://flosshub.org/sites/flosshub.org/files/185MiningEmail.pdf}, author = {Christian Bird and Gourley, Alex and Devanbu, Prem and Gertz, Michael and Swaminathan, Anand} } @conference {Robles:2006:MLS:1137983.1137986, title = {Mining large software compilations over time: another perspective of software evolution}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {3{\textendash}9}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {With the success of libre (free, open source) software, a new type of software compilation has become increasingly common. Such compilations, often referred to as {\textquoteright}distributions{\textquoteright}, group hundreds, if not thousands, of software applications and libraries written by independent parties into an integrated system. Software compilations raise a number of questions that have not been targeted so far by software evolution, which usually focuses on the evolution of single applications. Undoubtedly, the challenges that software compilations face differ from those found in single software applications. Nevertheless, it can be assumed that both, the evolution of applications and that of software compilations, have similarities and dependencies.In this sense, we identify a dichotomy, common to that in economics, of software evolution in the small (micro-evolution) and in the large (macro-evolution). The goal of this paper is to study the evolution of a large software compilation, mining the publicly available repository of a well-known Linux distribution, Debian. We will therefore investigate changes related to hundreds of millions of lines of code over seven years. The aspects that will be covered in this paper are size (in terms of number of packages and of number of lines of code), use of programming languages, maintenance of packages and file sizes.}, keywords = {debian, distributions, evolution, large software collections, lines of code, loc, metrics, mining software repositories, size, sloc, sloccount, software evolution, software integrators}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137986}, url = {http://doi.acm.org/10.1145/1137983.1137986}, attachments = {https://flosshub.org/sites/flosshub.org/files/3miningLarge.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona and Martin Michlmayr and Amor, Juan Jose} } @conference {WeiBgerber:2006:MRA:1137983.1138028, title = {Mining refactorings in ARGOUML}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, note = {"To see if refactorings in ARGOUML have an effect on the occurrence of new bugs and on communication between the developers, we relate the refactorings to bug reports in ISSUEZILLA respectively to mails on the developer mailing list. "}, pages = {175{\textendash}176}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper we combine the results of our refactoring reconstruc- tion technique with bug, mail and release information to perform process and bug analyses of the ARGOUML CVS archive.}, keywords = {argouml, bug tracking, bugs, cvs, email, evolution, mining challenge, msr challenge, re-engineering, refactoring, release history}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138028}, url = {http://doi.acm.org/10.1145/1137983.1138028}, attachments = {https://flosshub.org/sites/flosshub.org/files/175MiningRefactorings.pdf}, author = {Wei{\ss}gerber, Peter and Diehl, Stephan and G{\"o}rg, Carsten} } @conference {Kagdi:2006:MSC:1137983.1137996, title = {Mining sequences of changed-files from version histories}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {47{\textendash}53}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Modern source-control systems, such as Subversion, preserve change-sets of files as atomic commits. However, the specific ordering information in which files were changed is typically not found in these source-code repositories. In this paper, a set of heuristics for grouping change-sets (i.e., log-entries) found in source-code repositories is presented. Given such groups of change-sets, sequences of files that frequently change together are uncovered. This approach not only gives the (unordered) sets of files but supplements them with (partial temporal) ordering information. The technique is demonstrated on a subset of KDE source-code repository. The results show that the approach is able to find sequences of changed-files.}, keywords = {change, change history, change management, change sequences, heuristics, kde, mining software repositories, scm, sequences, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137996}, url = {http://doi.acm.org/10.1145/1137983.1137996}, attachments = {https://flosshub.org/sites/flosshub.org/files/47MiningSequences.pdf}, author = {Kagdi, Huzefa and Yusuf, Shehnaaz and Maletic, Jonathan I.} } @conference {Voinea:2006:MSR:1137983.1138024, title = {Mining software repositories with CVSgrab}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {167{\textendash}168}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, keywords = {argouml, cvs, cvsgrab, evolution, mining challenge, msr challenge, postgresql, software visualization, source code, team, visualization}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138024}, url = {http://doi.acm.org/10.1145/1137983.1138024}, attachments = {https://flosshub.org/sites/flosshub.org/files/167MiningSoftware.pdf}, author = {Voinea, Lucian and Telea, Alexandru} } @conference {Zimmermann:2006:MVA:1137983.1138001, title = {Mining version archives for co-changed lines}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {72{\textendash}75}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Files, classes, or methods have frequently been investigated in recent research on co-change. In this paper, we present a first study at the level of lines. To identify line changes across several versions, we define the annotation graph which captures how lines evolve over time. The annotation graph provides more fine-grained software evolution information such as life cycles of each line and related changes: "Whenever a developer changed line 1 of version.txt she also changed line 25 of Library.java."}, keywords = {change, change analysis, change management, graph, lines of code, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138001}, url = {http://doi.acm.org/10.1145/1137983.1138001}, attachments = {https://flosshub.org/sites/flosshub.org/files/72MiningVersionArchives.pdf}, author = {Zimmermann, Thomas and Kim, Sunghun and Zeller, Andreas and Whitehead,Jr., E. James} } @conference {889, title = {Motivating Participation by Displaying the Value of Contribution}, booktitle = {ACM SIGCHI Conference on Human Factors in Computing Systems}, year = {2006}, month = {22/04/2006}, pages = {955-958}, publisher = {ACM}, organization = {ACM}, address = {Montreal, Quebec, Canada}, isbn = {1-59593-372-7}, author = {Al Mamunur Rashid and Kimberly Ling and Regina D Tassone and Paul Resnick and Robert Kraut and John Riedl} } @article {1124, title = {Motivation, Governance, and the Viability of Hybrid Forms in Open Source Software Development}, journal = {Management Science}, volume = {52}, year = {2006}, note = {"Data from three primary sources informed this research (Table 1). All data were collected in 2001{\textendash}2002." Online project documentation: All publicly available project descriptions, charters, bylaws, meeting minutes, etc. Mailing lists: Over 2,000 messages read over a three-month period prior to interviews Interviews "Ireadallpostingstoproject-specific and general mailing lists for both communities for a three-month period preceding the interviews. Over 2,000 messages were posted during this period. Read- ing these messages allowed me to gain familiarity with the technology..."}, month = {07/2006}, pages = {1000 - 1014}, abstract = {Open source software projects rely on the voluntary efforts of thousands of software developers, yet we know little about why developers choose to participate in this collective development process. This paper inductively derives a framework for understanding participation from the perspective of the individual software developer based on data from two software communities with different governance structures. In both communities, a need for software-related improvements drives initial participation. The majority of participants leave the community once their needs are met, however, a small subset remains involved. For this set of developers, motives evolve over time and participation becomes a hobby. These hobbyists are critical to the long-term viability of the software code: They take on tasks that might otherwise go undone and work to maintain the simplicity and modularity of the code. Governance structures affect this evolution of motives. Implications for firms interested in implementing hybrid strategies designed to combine the advantages of open source software development with proprietary ownership and control are discussed.}, keywords = {email, email archives, governance, INNOVATION, interview, mailing list, MOTIVATION, open source software development, Volunteers}, issn = {1526-5501}, doi = {10.1287/mnsc.1060.0553}, url = {http://faculty.washington.edu/skshah/Shah\%20-\%20Motivation,\%20Governance,\%20Hybrid\%20Forms.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/Shah\%20-\%20Motivation\%2C\%20Governance\%2C\%20Hybrid\%20Forms.pdf}, author = {Shah, Sonali K.} } @article {895, title = {Multi-Modal Modeling, Analysis and Validation of Open Source Software Development Processes}, journal = {Intern. J. Internet Technology and Web Engineering}, volume = {1}, number = {3}, year = {2006}, pages = {49-63}, abstract = {Understanding the context, structure, activities, and content of software development processes found in practice has been and remains a challenging problem. In the world of free/open source software development, discovering and understanding what processes are used in particular projects is important in determining how they are similar to or different from those advocated by the software engineering community. Prior studies have revealed that development processes in F/OSSD projects are different in a number of ways. In this paper, we describe how a variety of modeling perspectives and techniques are used to elicit, analyze, and validate software development processes found in F/OSSD projects, with examples drawn from studies of the software requirements process found in the NetBeans.org project.}, keywords = {empirical studies of software engineering, open source software development, process modeling, requirements processes, software process}, attachments = {https://flosshub.org/sites/flosshub.org/files/Scacchi-Jensen-Noll-Elliott-OSSC05.pdf}, author = {Walt Scacchi and Chris Jensen and Noll, J. and Elliott, M.} } @conference {730, title = {Measuring Success of Open Source Projects Using Web Search Engines}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {93-99}, abstract = {What makes an open source project successful? In this paper we show that the traditional factors of success of open source projects, such as the number of downloads, deployments or community activity are inconvenient to collect or insufficient. We then correlate success of an open source project with its popularity on the Web. We show several ideas of how such popularity could be measured using Web search engines and provide experimental results from quantitative analysis of the measures we introduce on representative large samples of open source projects from SourceForge.}, url = {http://hdl.handle.net/2038/973}, author = {Weiss, Dawid} } @conference {774, title = {Migrazione di un Sistema Informativo da UNIX-AIX a UNIX-Linux}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {287-288}, abstract = {Il presente documento ha come obiettivo quello di descrivere la politica adottata dall{\textquoteright}Istituto Nazionale di Statistica rispetto all{\textquoteright}uso del software Open Source. In particolare vengono descritti i sistemi che attualmente operano su piattaforma Linux, quelli che sono in fase di migrazione e le scelte che si sono fatte nel caso di convivenza tra sistemi {\textquotedblleft}open{\textquotedblright} e sistemi {\textquotedblleft}proprietari{\textquotedblright}. Viene inoltre illustrata l{\textquoteright}architettura hardware scelta nel caso della migrazione di un sistema complesso da piattaforma interamente proprietaria (UNIX AIX) a piattaforma con sistema operativo open Linux Red Hat.}, keywords = {architettura hardware, linux, migrazione, open source, server, sistema informativo, sistema proprietario, unix aix}, url = {http://pascal.case.unibz.it/handle/2038/978}, author = {Colasanti, Cecilia and Patruno, Vincenzo and Vaccari, Carlo} } @conference {Fischer:2005:MED:1083142.1083145, title = {Mining evolution data of a product family}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {12-16}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Diversification of software assets through changing requirements impose a constant challenge on the developers and maintainers of large software systems. Recent research has addressed the mining for data in software repositories of single products ranging from fine- to coarse grained analyses. But so far, little attention has been payed to mining data about the evolution of product families. In this work, we study the evolution and commonalities of three variants of the BSD (Berkeley Software Distribution), a large open source operating system. The research questions we tackle are concerned with how to generate high level views of the system discovering and indicating evolutionary highlights. To process the large amount of data, we extended our previously developed approach for storing release history information to support the analysis of product families. In a case study we apply our approach on data from three different code repositories representing about 8.5GB of data and 10 years of active development.}, keywords = {bsd, change analysis, change history, cvs, evolution, freebsd, netbsd, openbsd, release history, source code, text mining}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083145}, url = {http://doi.acm.org/10.1145/1082983.1083145}, attachments = {https://flosshub.org/sites/flosshub.org/files/12MiningEvolution.pdf}, author = {Fischer, Michael and Oberleitner, Johann and Ratzinger, Jacek and Gall, Harald} } @conference {Huang:2005:MVH:1083142.1083158, title = {Mining version histories to verify the learning process of Legitimate Peripheral Participants}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {84-88}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Since code revisions reflect the extent of human involvement in the software development process, revision histories reveal the interactions and interfaces between developers and modules.We therefore divide developers and modules into groups according to the revision histories of the open source software repository, for example, sourceforge.net. To describe the interactions in the open source development process, we use a representative model, Legitimate Peripheral Participation (LPP) [6], to divide developers into groups such as core and peripheral teams, based on the evolutionary process of learning behavior.With the conventional module relationship, we divide modules into kernel and non-kernel types (such as UI). In the past, groups of developers and modules have been partitioned naturally with informal criteria. In this work, however, we propose a developer-module relationship model to analyze the grouping structures between developers and modules. Our results show some process cases of relative importance on the constructed graph of project development. The graph reveals certain subtle relationships in the interactions between core and non-core team developers, and the interfaces between kernel and non-kernel modules.}, keywords = {awstats, bzflag, cvs, filezilla, gallery, Legitimate Peripheral Participants (LPP), moodle, open boundary, open source software development process, phpmyadmin, social networks, sourceforge}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083158}, url = {http://doi.acm.org/10.1145/1082983.1083158}, attachments = {https://flosshub.org/sites/flosshub.org/files/84MiningVersion.pdf}, author = {Huang, Shih-Kun and Liu, Kang-min} } @conference {741, title = {Modular Design and the Development of Complex Artifacts: Lessons from Free/Open Source Software}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {162-171}, abstract = {Organizational and managerial theories of modularity applied to the design and production of complex artifacts are used to interpret the rise and success of development methodologies and practices in Free/Open Source software projects. Strengths and risks of the adoption of a modular approach in software project management are introduced and are related to the achievements of various Free/Open Source Software projects (among them: the GNU operating system, the Linux kernel, the HURD kernel). It is suggested that mindful implementation of the principles of modularity may improve the rate of success of many Free/Open Source software projects. Specific case studies here depicted, as well as indirect observation of common programming practices employed by Free/Open Source developers and users, suggest a possible revision towards an improved theory of modularity that may be extended also to settings different from software production.}, url = {http://pascal.case.unibz.it/handle/2038/979}, author = {Alessandro Narduzzo and Rossi, Alessandro} } @conference {715, title = {Multi-Modal Modeling of Open Source Software Requirements Processes}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {1-8}, abstract = {Understanding the context, structure, activities, and content of software development processes found in practice has been and remains a challenging problem. In the world of free/open source software development, discovering and understanding what processes are used in particular projects is important in determining how they are similar to or different from those advocated by the software engineering community. Prior studies however have revealed that the requirements processes in OSSD projects are different in a number of ways, including the general lack of explicit software requirements specifications. In this paper, we describe how a variety of modeling perspectives and techniques are used to elicit, analyze, and validate software requirements processes found in OSSD projects, with examples drawn from studies of the NetBeans.org project.}, url = {http://pascal.case.unibz.it/handle/2038/975}, author = {Walt Scacchi and Chris Jensen and Noll, John and Elliott, Margaret} } @conference {1682, title = {The Mysteries of Open Source Software: Black and White and Red All Over?}, booktitle = {Proceedings of the 38th Annual Hawaii International Conference on System Sciences}, year = {2005}, pages = {196a - 196a}, publisher = {IEEE}, organization = {IEEE}, address = {Big Island, HI, USA}, abstract = {Open Source Software (OSS) has attracted enormous media and research attention since the term was coined in February 1998. The concept itself is founded on the paradoxical premise that software source code-the {\textquoteright}crown jewels{\textquoteright} for many proprietary software companies-should be provided freely to anyone who wishes to see it. Given this fundamental initial paradox, it is perhaps hardly surprising that the OSS concept is characterised by contradictions, paradoxes and tensions throughout. In this paper we focus specifically on the following issues in relation to OSS: the cathedral v. bazaar development approach; collectivism v. individualism, the bitter strife within the OSS community itself (OSS v. OSS), and between OSS and the Free Software Foundation (OSS v. FSF); whether OSS represents a paradigm shift in the software industry; whether the software is truly open-the Berkeley Conundrum, as we have termed it here; whether OSS truly is high quality software; and whether OSS is a {\textquoteright}one size fits all,{\textquoteright} representing the future model for all software development. }, doi = {10.1109/HICSS.2005.609}, author = {Fitzgerald, B. and Agerfalk, P.J.} } @article {DBLP:journals/electronicmarkets/Joode04, title = {Managing Conflicts in Open Source Communities}, journal = {Electronic Markets}, volume = {14}, number = {2}, year = {2004}, pages = {104-113}, abstract = {An increasing number of companies adopt open source software. These companies will typically pay programmers to participate in the design, development and maintenance of open source software. The programmers, however, are reported to have different interests compared to the voluntary programmers who dominate most open source communities. The diversity of interest will inevitably result in conflicts. To ensure that their interests are achieved, companies should understand how conflicts between their programmers and the voluntary programmers can be managed. The aim of this paper is to identify and discuss mechanisms that are currently present to manage conflicts in open source communities. The mechanisms identified in this paper are based on an explorative literature study and on 48 semi-structured interviews with programmers from a variety of open source communities. Four mechanisms have been identified and their relevance in the management of conflicts are discussed. They are: third-party intervention; modularity; parallel software development lines; and the exit option. The paper ends with an example of Covalent, which deploys parallel software development lines to manage conflicts in the Apache community. }, keywords = {abiword, apache, conflict, covalent, interviews, organizational sponsorship, Volunteers}, author = {Ruben van Wendel de Joode} } @article {flosswp201, title = {Managing Volunteer Activity in Free Software Projects}, year = {2004}, month = {July}, abstract = {During the last few years, thousands of volunteers have created a large body of free software. Even though this accomplishment shows that the free software development model works, there are some drawbacks associated with this model. Due to the volunteer nature of most free software projects, it is impossible to fully rely on participants. Volunteers may become busy and neglect their duties. This may lead to a steady decrease of quality as work is not being carried out. The problem of inactive volunteers is intensified by the fact that most free software projects are distributed, which makes it hard to quickly identify volunteers who neglect their duties. This paper shows Debian{\textquoteright}s approach to inactive volunteers. Insights presented here can be applied to other free software projects in order to implement effective quality assurance strategies.}, keywords = {debian, volunteer, volunteer teams}, attachments = {https://flosshub.org/sites/flosshub.org/files/michlmayr-mia.pdf}, author = {Martin Michlmayr} } @article {1253, title = {The many meanings of open source}, journal = {IEEE Software}, volume = {21}, year = {2004}, note = {"We determined a set of characteristics that are almost always present and others that vary among open source projects, and this serves as the core of this work" "Section 3 describes some open source characteristics that can be used in determining whether a project is or not open source"}, month = {01/2004}, pages = {34 - 40}, abstract = {The term Open Source is widely applied to describe some software development methodologies. This paper does not provide a judgment on the open source approach, but exposes the fact that simply stating that a project is open source does not provide a precise description of the approach used to support the project. By taking a multi- disciplinary point of view, we propose a collection of characteristics that are common, as well as some that vary among open source projects. The set of open source characteristics we found can be used as a tick-list both for analysing and for setting up open source projects. Our tick-list also provides a starting point for understanding the many meanings of the term open source.}, keywords = {classification, projects, taxonomy}, issn = {0740-7459}, doi = {10.1109/MS.2004.1259206}, attachments = {https://flosshub.org/sites/flosshub.org/files/gacek.pdf}, author = {Gacek, C. and Arief, B.} } @proceedings {124, title = {Membership dynamics and network stability in the open-source community: the ising perspective}, year = {2004}, note = {"simulations with the empirical network data that were collected from two actual OSS communities, Linux and Hypermail." "we initially downloaded nearly 100,000 archived (between 1997 and 2003) LINUX Kernel and Hypermail newsgroup messages posted in a UNIX mailbox format" "Specific information was obtained regarding the characteristics of these two OSS communities, including the size, the number of average connections per participant, and the hierarchy of each community."}, abstract = {In this paper, we address the following two questions: (1)How does a participant{\textquoteright}s membership decision affect the others (neighbors) with whom he has collaborated over an extended period of time in an open source software (OSS) network? (2) To what extent do network characteristics (i.e, size and connectivity) mediate the impact of external factors on the OSS participants{\textquoteright} dynamic membership decisions and hence the stability of the network? From the Ising perspective, we present fresh theoretical insight into the dynamic and reciprocal membership relations between OSS participants. We also performed simulations based on empirical data that were collected from two actual OSS communities. Some of the key findings include that (1) membership herding is highly present when the external force is weak, but decreases significantly when the force increases, (2) the propensity for membership herding is most likely to be seen in a large network with a random connectivity, and (3) for large networks, at low external force a random connectivity will perform better than a scale-free counterpart in terms of the network strength. However, as the temperature (external force) increases, the reverse phenomenon is observed. In addition, the scale-free connectivity appears to be less volatile than with the random connectivity in response to the increase in the temperature. We conclude with several implications that may be of significance to OSS stakeholders.}, keywords = {email, email archive, hypermail, linux, mailing list, membership, membership herding, newsgroup, open source, participants, social network analysis, stakeholders, team size}, attachments = {https://flosshub.org/sites/flosshub.org/files/OhJeon.pdf}, author = {Oh, Wonseok and Jeon, Sangyong} } @proceedings {1187, title = {Migrating a Development Project to Open Source Software Development}, year = {2004}, pages = {9-13}, abstract = {The CommSy-system is a web-based community system, which has been in development since 1999 at the University of Hamburg. It has initially been developed by students and researchers in their spare time. Its last organizational setting was a publicly funded research project, which allowed for full-time and part-time developers. As that project has come to an end, we are aiming at an open source project to ensure continuity by providing a frame for people from different organizations. In this paper we discuss the characteristics of this specific project and of other open source projects to identify a strategy for migrating that particular project to open source. We outline the actions taken to migrate the existing project to open source software development and raise questions concerning the necessary characteristics of an open source project as well as whether the actions will suffice or not.}, attachments = {https://flosshub.org/sites/flosshub.org/files/bleek10-14.pdf}, author = {Bleek, W-G. and Finck, M.} } @conference {Germ04b, title = {Mining CVS repositories, the softChange experience}, booktitle = {Proc. Int{\textquoteright}l Workshop on Mining Software Repositories ({MSR})}, year = {2004}, note = {"Mailing lists. Mailing lists are an important source of information about the evolution of the project. We currently correlate MRs [modification requests] to mail messages by using the author and the date attributes of both the MR and the message."}, pages = {17{\textendash}21}, abstract = {CVS logs are a rich source of software trails (information left behind by the contributors to the development process, usually in the forms of logs). This paper describes how softChange extracts these trails, and enhances them. This paper also addresses some challenges that CVS fact extraction poses to researchers.}, keywords = {bugzilla, cvs, email archives, log files, logs, softchange}, attachments = {https://flosshub.org/sites/flosshub.org/files/17MiningCVS.pdf}, author = {German, Daniel} } @conference {1050, title = {Mining version control systems for FACs (frequently applied changes)}, booktitle = {International Workshop on Mining Software Repositories (MSR 2004)}, volume = {2004}, year = {2004}, pages = {48 - 52}, publisher = {IEE}, organization = {IEE}, address = {Edinburgh, Scotland, UK}, abstract = {Today, programmers are forced to maintain a software system based on their gut feeling and experience. This paper makes an attempt to turn the software maintenance craft into a more disciplined activity, by mining for frequently applied changes in a version control system. Next to some initial results, we show how this technique allows to recover and study successful maintenance strategies, adopted for the redesign of long{\textendash}lived systems.}, keywords = {ccfinder, change analysis, change history, clone, clone detection, cvs, maintenance, tomcat, version control}, doi = {10.1049/ic:20040475}, attachments = {https://flosshub.org/sites/flosshub.org/files/48MiningVersion.pdf}, author = {Van Rysselberghe, F. and Demeyer, S} } @article {flosswp159, title = {Mobilization of Software Developers: The Free Software Movement}, year = {2004}, month = {May}, abstract = {Free/open source software (F/OSS) development projects are growing at a rapid rate. Globally dispersed virtual communities with large groups of software developers contribute time and effort often without pay. One force behind this phenomenon is the Free Software Movement (FSM), a 20 year-old social movement whose purpose is to promote the use of free software instead of proprietary software. We show how the ideology of the FSM influences software development work practices in F/OSS communities and how an occupational community of F/OSS developers has emerged from this movement. We present results from an empirical study of a F/OSS development community, GNUenterprise (GNUe) whose purpose is to build an Enterprise Resource Planning system. We show how the beliefs in freedom and freedom of choice, and the values of cooperative work and community building are manifested in the GNUe norms of informal self-management, immediate acceptance of fellow contributors, and open disclosure.}, attachments = {https://flosshub.org/sites/flosshub.org/files/elliottscacchi2.pdf}, author = {Margret Elliott} } @article {flosswp167, title = {Munich/MIT Suvey: Development of Embedded Linux}, year = {2004}, month = {May}, abstract = {The use of Linux in embedded devices has increased enormously in recent years. Most of the publicly available code for embedded Linux is developed and contributed by commercial firms, not by hobbyists. This raises the question if and how the development process differs from that of other OSS. This issue was addressed in a survey of embedded Linux developers yielding 268 valid responses. This paper is a collection of descriptive results from the survey.}, attachments = {https://flosshub.org/sites/flosshub.org/files/henkeltins.pdf}, author = {Joachim Henkel} } @article {flosswp124, title = {Maintainability of the Linux Kernel}, journal = {Proceedings of the 2nd Workshop on Open Source Software Engineering ICSE2002}, year = {2003}, note = {"We have examined 365 versions of Linux. For every version, we counted the number of instances of common (global) coupling between each of the 17 kernel modules and all the other modules in that version of Linux."}, month = {October}, abstract = {We have examined 365 versions of Linux. For every version, we counted the number of instances of common (global) coupling between each of the 17 kernel modules and all the other modules in that version of Linux. We found that the number of instances of common coupling grows exponentially with version number. This result is significant at the 99.99\% level, and no additional variables are needed to explain this increase. We conclude that, unless Linux is restructured with a bare minimum of common coupling, the dependencies induced by common coupling will, at some future date, make Linux exceedingly hard to maintain without inducing regression faults.}, keywords = {coupling, kernel, linux, linux kernel, modules, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/linux-maint_0.pdf}, author = {Schach, Stephen R. and Jin, B. and Wright, D.R.} } @article {flosswp122, title = {Managing the Boundary of an {\textquoteright}Open{\textquoteright} Project}, year = {2003}, month = {October}, abstract = {In the past ten years, the boundaries between public and open science and commercial research efforts have become more porous. Scholars have thus more critically examined ways in which these two institutional regimes intersect. Large open source software projects have also attracted commercial collaborators and now struggle to develop code in an open public environment that still protects their communal boundaries. This research applies a dynamic social network approach to understand how one community managed software project, Debian, develops a membership process. We examine the project{\textquoteright}s face-to-face social network during a five-year period (1997-2001) to see how changes in the social structure affect the evolution of membership mechanisms and the determination of gatekeepers. While the amount and importance of a contributor{\textquoteright}s work increases the probability that a contributor will become a gatekeeper, those more central in the social network are more likely to become gatekeepers and influence the membership process. A greater understanding of the mechanisms open projects use to manage their boundaries has critical implications for research and knowledge producing communities operating in pluralistic, open and distributed environments.}, keywords = {debian, membership, social network analysis}, attachments = {https://flosshub.org/sites/flosshub.org/files/omahonyferraro.pdf}, author = {Siobhan O{\textquoteright}Mahony} } @conference {DBLP:conf/pakdd/ChawlaAD03, title = {Mining Open Source Software (OSS) Data Using Association Rules Network}, booktitle = {Lecture Notes in Computer Science }, volume = {2637}, year = {2003}, pages = {461-466}, abstract = {The Open Source Software(OSS) movement has attracted considerable attention in the last few years. In this paper we report our results of mining data acquired from SourceForge.net, the largest open source software hosting website. In the process we introduce Association Rules Network(ARN), a (hyper)graphical model to represent a special class of association rules. Using ARNs we discover important relationships between the attributes of successful OSS projects. We verify and validate these relationships using Factor Analysis, a classical statistical technique related to Singular Value Decomposition(SVD).}, keywords = {arn, association rules, factor analysis, project success, sourceforge, svd}, author = {Sanjay Chawla and Bavani Arunasalam and Joseph G. Davis} } @article {flosswp87, title = {Modularity in Action: GNU/Linux and Free/Open Source Software Development Model Unleashed}, year = {2003}, month = {May}, abstract = {Organizational and managerial theories of modularity applied to the design and production of complex artifacts are used to interpret the rise and success of Free/Open Source Software. Strengths and risks of the adoption of a modular approach in software project management are introduced and are related to the achievements of the GNU/Linux project. It is suggested that mindful implementation of the principles of modularity may improve the rate of success of many Free/Open Source software projects. Authors suggest a possible revision towards an improved theory of modularity that may be extended also to settings different from software production.}, attachments = {https://flosshub.org/sites/flosshub.org/files/narduzzorossi.pdf}, author = {Alessandro Narduzzo} } @article {flosswp78, title = {Motivation of Software Developers in Open Source Projects: An Internet-based Survey of Contributors to the Linux Kernel}, journal = {RP Special Issue}, year = {2003}, month = {February}, abstract = {The motives of 141 contributors to a large Open Source Software project (the Linux kernel) was explored with an internet-based questionnaire study. Measured factors were both derived from discussions within the Linux community as well as from models from social sciences. Participants? engagement was particularly determined by their identification as a Linux developer, by pragmatic motives to improve own software, and by their tolerance of time investments. Moreover, some of the software development was accomplished by teams. Activities in these teams were particularly determined by participants? evaluation of the team goals as well as by their perceived indispensability and self-efficacy.}, attachments = {https://flosshub.org/sites/flosshub.org/files/rp-hertelniednerherrmann.pdf}, author = {Guido Hertel} } @unpublished {flosswp21, title = {Management and Virtual Decentralized Networks: The Linux Project}, year = {2001}, month = {September}, abstract = {This paper examines whether geographically dispersed knowledge workers can virtually collaborate for a project under no central planning. Co-ordination, management and the role of knowledge arise as the central areas of focus. The Linux Project and its development model are selected as a case of analysis and the critical success factors of this organizational design are identified. The study proceeds to the formulation of a framework that can be applied to all kinds of virtual decentralized work and concludes that value creation is maximized when there is intense interaction and uninhibited sharing of information between the organization and the surrounding community.}, attachments = {https://flosshub.org/sites/flosshub.org/files/dafermoslinux.pdf}, author = {George Dafermos} } @article {flosswp31, title = {Managing Information Quality in Virtual Communities of Practice}, year = {2001}, month = {November}, abstract = {In this paper we review how the new economics of information enable a new paradigm of collaboration. Spearheaded by the Open Source community, this evolutionary approach to collaborative content creation is a way to address information quality in virtual communities of practice. Based on experience gained in community projects, a few simple steps toward improving the quality of information in virtual communities are presented and illustrated.}, attachments = {https://flosshub.org/sites/flosshub.org/files/neus.pdf}, author = {Andreas Neus} }