@proceedings {1510, title = {A Historical Dataset for the Gnome Ecosystem}, year = {2013}, month = {05/2013}, abstract = {We present a dataset of the open source software ecosystem Gnome from a social point of view. We have collected historical data about the contributors to all Gnome projects stored on git.gnome.org, taking into account the problem of identity matching, and associating different activity types to the contributors. This type of information is very useful to complement the traditional, source-code related information one can obtain by mining and analyzing the actual source code. The dataset can be obtained at https://bitbucket.org/mgoeminne/sgl-flossmetric-dbmerge.}, keywords = {cvsanaly, flossmetrics, gnome}, author = {Mathieu Goeminne and Ma{\"e}lick Claes and Mens, Tom} } @conference {1218, title = {A Longitudinal Study on Collaboration Networks and Decision to Participate in a FLOSS Community}, booktitle = {5th Workshop on Public Data about Software Development (WoPDaSD 2010)}, year = {2010}, note = {"we chose as a case of study Epiphany, which is the default web browser of the GNOME graphical desktop environment" "We collected all relevant data by parsing all the bug reports in GNOME Bugzilla repository relative to Epiphany. The data collection and storing was done using Bicho (v. 0.4 rev. 7198), a software part of the FLOSSMetric project [14]."}, abstract = {In this paper we conjecture that individual decisions of FLOSS (Free/Libre Open Source Software) developers to take on a task are influenced by network relations generated by collaboration among project members. In order to explore our conjecture we collected data on a FLOSS project team consisting of 227 developers committed since 2002 to the development of a web browser. We reconstructed 2-mode co- collaboration networks (software developer by bug) in which a tie represents an action taken by a developer in order to solve a specific bug. Co-collaboration networks were collected at five points in time during a six-month development cycle of the software. We report and discuss results of longitudinal actor-based modeling that we specify to test for the influence of local network structures on developer{\textquoteright}s decision to take action on a specific bug. The study controls for bug-specific and developer-specific characteristics that may also affect developers{\textquoteright} decisions exogenously. We also control for priority and severity levels assigned by the team to bugs in an attempt to manage voluntary contribution.}, keywords = {bicho, bug fixing, bug reports, bugzilla, COLLABORATION, developers, epiphany, flossmetrics, gnome, social network analysis}, attachments = {https://flosshub.org/sites/flosshub.org/files/wopdasd002.pdf}, author = {Guido Conaldi and Tonellato, Marco} } @article {1385, title = {Survival analysis on the duration of open source projects}, journal = {Information and Software Technology}, volume = {52}, year = {2010}, note = {"For the purposes of our study we used data coming only from source code repositories."}, month = {9/2010}, pages = {902 - 922}, abstract = {Context Open source (FLOSS) project survivability is an important piece of information for many open source stakeholders. Coordinators of open source projects would like to know the chances for the survival of the projects they coordinate. Companies are also interested in knowing how viable a project is in order to either participate or invest in it, and volunteers want to contribute to vivid projects. Objective The purpose of this article is the application of survival analysis techniques for estimating the future development of a FLOSS project. Method In order to apply such approach, duration data regarding FLOSS projects from the FLOSSMETRICS (This work was partially supported by the European Community{\textquoteright}s Sixth Framework Program under the Contract FP6-033982) database were collected. Such database contains metadata for thousands of FLOSS projects, derived from various forges. Subsequently, survival analysis methods were employed to predict the survivability of the projects, i.e. their probability of continuation in the future, by examining their duration, combined with other project characteristics such as their application domain and number of committers. Results It was shown how probability of termination or continuation may be calculated and how a prediction model may be built to upraise project future. In addition, the benefit of adding more committers to FLOSS projects was quantified. Conclusion Analysis results demonstrate the usefulness of the proposed framework for assessing the survival probability of a FLOSS project. }, keywords = {flossmetrics, prediction, source code, survival analysis}, issn = {09505849}, doi = {10.1016/j.infsof.2010.05.001}, author = {Samoladas, Ioannis and Lefteris Angelis and Ioannis Stamelos} } @conference {herraiz2009research, title = {Research friendly software repositories}, booktitle = {Proceedings of the joint international and annual ERCIM workshops on Principles of software evolution (IWPSE) and software evolution (Evol) workshops}, year = {2009}, note = {"In spite of these rich availability of software repositories, the heterogeneity of the data makes it difficult to apply studies at a large scale, although some research projects, like FLOSSMole [10] or FLOSSMetrics [8] are addressing these issues and aim to provide datasets about thousands of libre software projects for research purposes." (more)}, pages = {19{\textendash}24}, publisher = {ACM}, organization = {ACM}, abstract = {What is the future of software evolution? In 1974, Meir M. Lehman had a vision of software evolution being driven by empirical studies of software repositories, and of a theory based on those empirical results. However, that scenario is yet to come. Software evolution studies are often based on a few cases, because the needed information is scarce, dispersed and incomplete. Their conclusions are not generalizable, slowing down the progress of this research discipline. Libre (free / open source) software supposes an opportunity to alleviate this situation. In this paper we describe the existing approaches to provide research datasets that are mining libre software repositories, and propose an agenda based on the concept of research friendly software repositories, which provides finer granularity and integrated data.}, keywords = {flossmetrics, flossmole cited}, attachments = {https://flosshub.org/sites/flosshub.org/files/herraiz-Research-Friendly-sw-repos.pdf}, author = {Herraiz, I. and Robles, G. and Gonzalez-Barahona, J.M.} } @conference {1211, title = {Are FLOSS developers committing to CVS/SVN as much as they are talking in mailing lists? Challenges for Integrating data from Multiple Repositories}, booktitle = {3rd Workshop on Public Data about Software Development (WoPDaSD 2008)}, year = {2008}, month = {09/2008}, pages = {49-54}, abstract = {This paper puts forward a framework for investigating Free and Open Source Software (F/OSS) developers activities in both source code and mailing lists repositories. We used data dumps of fourteen pro jects from the FLOSSMetrics (FM) retrieval system. Our intentions are (i) to present a possible methodology, its advantages and disadvantages which can benefit future researchers using some aspects of the FM retrieval system{\textquoteright}s data dumps, and (ii) discuss our initial research results on the contributions developers make to both coding and lists activities.}, keywords = {cvs, cvsanaly, developers, email, email archives, flossmetrics, mailing list, mlstats, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/49-542008.pdf}, author = {Sowe, Sulayman K. and Samoladas, Ioannis and Ioannis Stamelos and Lefteris Angelis} }