@conference {Robles:2014:EDE:2597073.2597107, title = {Estimating Development Effort in Free/Open Source Software Projects by Mining Software Repositories: A Case Study of OpenStack}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {222{\textendash}231}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Because of the distributed and collaborative nature of free / open source software (FOSS) projects, the development effort invested in a project is usually unknown, even after the software has been released. However, this information is becoming of major interest, especially ---but not only--- because of the growth in the number of companies for which FOSS has become relevant for their business strategy. In this paper we present a novel approach to estimate effort by considering data from source code management repositories. We apply our model to the OpenStack project, a FOSS project with more than 1,000 authors, in which several tens of companies cooperate. Based on data from its repositories and together with the input from a survey answered by more than 100 developers, we show that the model offers a simple, but sound way of obtaining software development estimations with bounded margins of error. }, keywords = {effort estimation, free software, mining software repositories, open source, openstack}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597107}, url = {http://doi.acm.org/10.1145/2597073.2597107}, attachments = {https://flosshub.org/sites/flosshub.org/files/robles_0.pdf}, author = {Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Cervig{\'o}n, Carlos and Capiluppi, Andrea and Izquierdo-Cort{\'a}zar, Daniel} } @article {1484, title = {Similarities, challenges and opportunities of Wikipedia content and open source projects}, journal = {Journal of Software: Evolution and Process}, year = {2012}, note = {The Sourceforge repository was chosen as the largest and most representative OSS repository. In order to extract a sample from it, the flossmole.org database [20] was downloaded, containing the basic information of all the Sourceforge projects. The latest available dump of such data, at the time of the extraction (April 2009) contained 126,142 projects. From this database, three filters were applied: one based on the given status of the projects; one classifying the activity of the project, and imposed by the SourceForge site ({\textquotedblleft}active{\textquotedblright} and {\textquotedblleft}inactive{\textquotedblright}); and one relative to the specific topic (or application domain) that each project is developed for.}, month = {08/2012}, pages = {n/a - n/a}, abstract = {Several years of research and evidence have demonstrated that open source software portals often contain a large amount of software projects that simply do not evolve, developed by relatively small communities, struggling to attract a sustained number of contributors. These portals have started to increasingly act as a storage for abandoned projects, and researchers and practitioners should try and point out how to take advantage of such content. Similarly, other online content portals (like Wikipedia) could be harvested for valuable content. In this paper we argue that, even with differences in the requested expertise, many projects reliant on content and contributions by users undergo a similar evolution, and follow similar patterns: when a project fails to attract contributors, it appears to be not evolving, or abandoned. Far from a negative finding, even those projects could provide valuable content that should be harvested and identified based on common characteristics: by using the attributes of {\textquoteleft}usefulness{\textquoteright} and {\textquoteleft}modularity{\textquoteright} we isolate valuable content in both Wikipedia pages and open source software projects. }, keywords = {flossmole}, doi = {10.1002/smr.1570}, url = {https://www.researchgate.net/publication/259684110_Similarities_challenges_and_opportunities_of_Wikipedia_content_and_open_source_projects}, attachments = {https://flosshub.org/sites/flosshub.org/files/jsep2012.pdf}, author = {Capiluppi, Andrea} } @article {Beecher2009739, title = {Identifying exogenous drivers and evolutionary stages in FLOSS projects}, journal = {Journal of Systems and Software}, volume = {82}, number = {5}, year = {2009}, pages = {739 - 750}, abstract = {The success of a Free/Libre/Open Source Software (FLOSS) project has been evaluated in the past through the number of commits made to its configuration management system, number of developers and number of users. Most studies, based on a popular FLOSS repository (SourceForge), have concluded that the vast majority of projects are failures. This study{\textquoteright}s empirical results confirm and expand conclusions from an earlier and more limited work. Not only do projects from different repositories display different process and product characteristics, but a more general pattern can be observed. Projects may be considered as early inceptors in highly visible repositories, or as established projects within desktop-wide projects, or finally as structured parts of FLOSS distributions. These three possibilities are formalized into a framework of transitions between repositories. The framework developed here provides a wider context in which results from FLOSS repository mining can be more effectively presented. Researchers can draw different conclusions based on the overall characteristics studied about an Open Source software project{\textquoteright}s potential for success, depending on the repository that they mine. These results also provide guidance to OSS developers when choosing where to host their project and how to distribute it to maximize its evolutionary success.}, keywords = {developers, forge, forges, repositories, repository, scm, software repositories, sourceforge, success, users}, issn = {0164-1212}, doi = {DOI: 10.1016/j.jss.2008.10.026}, url = {http://www.sciencedirect.com/science/article/B6V0N-4TVTJFS-1/2/e32ecee1bcb54bd4a5dff6d5e3daca8d}, author = {Karl Beecher and Capiluppi, Andrea and Boldyreff, Cornelia} } @article {Capiluppi200989, title = {Quality Factors and Coding Standards - a Comparison Between Open Source Forges}, journal = {Electronic Notes in Theoretical Computer Science}, volume = {233}, year = {2009}, note = {Proceedings of the International Workshop on Software Quality and Maintainability (SQM 2008)}, pages = {89 - 103}, abstract = {Enforcing adherence to standards in software development in order to produce high quality software artefacts has long been recognised as best practice in traditional software engineering. In a distributed heterogeneous development environment such those found within the Open Source paradigm, coding standards are informally shared and adhered to by communities of loosely coupled developers. Following these standards could potentially lead to higher quality software. This paper reports on the empirical analysis of two major forges where OSS projects are hosted. The first one, the KDE forge, provides a set of guidelines and coding standards in the form of a coding style that developers may conform to when producing the code source artefacts. The second studied forge, SourceForge, imposes no formal coding standards on developers. A sample of projects from these two forges has been analysed to detect whether the SourceForge sample, where no coding standards are reinforced, has a lower quality than the sample from KDE. Results from this analysis form a complex picture; visually, all the selected metrics show a clear divide between the two forges, but from the statistical standpoint, clear distinctions cannot be drawn amongst these quality related measures in the two forge samples.}, keywords = {artefacts, artifacts, coding standards, coding style, complexity, forge, forges, kde, metrics, quality, source code, sourceforge}, issn = {1571-0661}, doi = {DOI: 10.1016/j.entcs.2009.02.063}, url = {http://www.sciencedirect.com/science/article/B75H1-4VXDKRV-7/2/abcc2be2c4c3998e4bc9b53473ca2d81}, author = {Capiluppi, Andrea and Boldyreff, Cornelia and Karl Beecher and Paul J. Adams} } @article {1099, title = {Evidences in the evolution of OS projects through Changelog Analyses}, journal = {Proceedings of the 3rd ICSE Workshop on Open Source}, year = {2003}, note = {"In this study we concentrate on a very large sample (406 projects) selected randomly from an OS portal[20]" (freshmeat) "We define three clusters of projects: {\textquoteright}large{\textquoteright} projects as long as they are based on more than 1000KB(40KLOC)..."}, pages = {19-24}, abstract = {Most empirical studies about Open Source (OS) projects or products are vertical and usually deal with the flagship, successful projects. There is a substantial lack of horizontal studies to shed light on the whole population of projects, including failures. This paper presents a horizontal study aimed at characterizing OS projects. We analyze a sample of around 400 projects from a popular OS project repository. Each project is characterized by a number of attributes. We analyze these attributes statically and over time. The main results show that few projects are capable of attracting a meaningful community of developers. The majority of projects is made by few (in many cases one) person with a very slow pace of evolution. We then try to observe how many projects count on a substantial number of developers, and analyze those projects more deeply. The goal is to achieve a better insight in the dynamics of open source development. The initial results of this analysis, especially growth in code size and tendency to stability in modularity, seem to be in line with traditional close source development.}, keywords = {classification, freshmeat, loc, modularity, repository, size, sloc, source code}, url = {http://hdl.handle.net/10552/1037}, attachments = {https://flosshub.org/sites/flosshub.org/files/capiluppi2003.pdf}, author = {Capiluppi, Andrea} } @conference {1153, title = {Characterizing the OSS process}, booktitle = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, note = {"We have considered two well-known Open Source portals (FreshMeat [1] and SourceForge [2] )." "Using pseudo-random sampling we have selected a sample of 400 projects (mostly from FreshMeat). Each project is described by several variables (programming language, type of license, size of source code, type of documentation available and others). By indirect means (analysis of the Changelog file, or CVS) it is also possible to compute the number of people working on the project, and the number of external contributors. From FreshMeat we get both a vitality index, that considers the number of releases per time period, and a popularity index, which is a first measure of the interest of users to the project (project URL hits, mixed with subscriptions to it)."}, abstract = {The Open Source model of software development has gained the attention of both the business, the practitioners{\textquoteright} and the research communities. The Open Source process has been described by the seminal paper by Eric Raymond [4] and [5]. However, sound empirical studies are still very limited [3], [6]. Our goal is to investigate the OS process by empirical means, to analyze, characterize it, and possibly model it with quantitative models. It should be noted that the Open Source process provides open process and product data, and therefore is a rare opportunity for empirical research. Our initial research focus is on the characterization of the process, starting from the evolution of OS projects. In traditional projects, a significant number of releases in a short time is usually considered an instability factor [7] and [8], while in the OSS community, it is an evidence of vitality, shows the commitment of the authors and the power of attraction of other programmers [9]. Is it possible to characterize the vitality of projects? And, can vitality be traced to some other characteristics of a project?}, keywords = {bugs, change log, classification, cvs, downloads, freshmeat, metadata, patches, popularity, project success, release history, sourceforge, vitality}, attachments = {https://flosshub.org/sites/flosshub.org/files/CapiluppiLagoMorisio.pdf}, author = {Capiluppi, Andrea and Patricia Lago and Maurizio Morisio} }