@proceedings {1888, title = {Technical Lag in Software Compilations: Measuring How Outdated a Software Deployment Is}, volume = {496}, year = {2017}, month = {05/2017}, pages = {182 - 192}, publisher = {Springer International Publishing}, abstract = {Large software compilations based on free, open source software (FOSS) packages are the basis for many software systems. When they are deployed in production, specific versions of the packages in the compilation are selected for installation. Over time, those versions become outdated with respect to the upstream software from which they are produced, and from the components available in the compilations as well. The fact that deployed components are outdated is not a problem in itself, but there is a price to pay for not being "as much updated as reasonable". This includes bug fixes and new features that could, at least potentially, be interesting for the deployed system. Therefore, a balance has to be maintained between "being up-to-date" and "keeping the good old working versions". This paper proposes a theoretical model (the "technical lag") for measuring how outdated a system is, with the aim of assisting in the decisions about upgrading in production. The paper explores several ways in which technical lag can be implemented, depending on requirements. As an illustration, it presents as well some specific cases in which the evolution of technical lag is computed.}, isbn = {978-3-319-57735-7}, issn = {1868-422X}, doi = {10.1007/978-3-319-57735-7_17}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_17}, author = {Gonz{\'a}lez-Barahona, J.M. and Sherwood, P. and Robles, G. and Izquierdo, D.}, editor = {Balaguer, Federico and Di Cosmo, Roberto and Garrido, Alejandra and Kon, Fabio and Gregorio Robles and Zacchiroli, Stefano} } @article {1861, title = {The Debsources Dataset: two decades of free and open source software}, journal = {Empirical Software Engineering}, year = {2016}, month = {05/2015}, publisher = {IEEE}, abstract = {We present the Debsources Dataset: distribution metadata and source code metrics spanning two decades of Free and Open Source Software (FOSS) history, seen through the lens of the Debian distribution. Debsources is a software platform used to gather, search, and publish on the Web the full source code of the Debian operating system, as well as measures about it. A notable public instance of Debsources is available at http://sources.debian.net, it includes both current and historical releases of Debian. Plugins to compute popular source code metrics (lines of code, defined symbols, disk usage) and other derived data (e.g., Checksums) have been written, integrated, and run on all the source code available on sources.debian.net. The Debsources Dataset is a PostgreSQL database dump of sources.debian.net metadata, as of February 10th, 2015. The dataset contains both Debian-specific metadata -- e.g., which software packages are available in which release, which source code file belong to which package, release dates, etc. -- and source code information gathered by running Debsources plugins. The Debsources Dataset offer a very long-term historical view of the macro-level evolution and constitution of FOSS through the lens of popular, representative FOSS projects of their times.}, keywords = {debian, metadata, postgresql}, issn = {1573-7616}, doi = {10.1007/s10664-016-9461-5}, url = {https://matthieu.io/dl/papers/debsources-ese-2016.pdf}, author = {Caneill, Matthieu and Daniel M. Germ{\'a}n and Zacchiroli, Stefano} } @proceedings {1757, title = {The Debsources Dataset: Two Decades of Debian Source Code Metadata}, year = {2015}, month = {05/2015}, publisher = {IEEE}, abstract = {We present the Debsources Dataset: distribution metadata and source code metrics spanning two decades of Free and Open Source Software (FOSS) history, seen through the lens of the Debian distribution. Debsources is a software platform used to gather, search, and publish on the Web the full source code of the Debian operating system, as well as measures about it. A notable public instance of Debsources is available at http://sources.debian.net; it includes both current and historical releases of Debian. Plugins to compute popular source code metrics (lines of code, defined symbols, disk usage) and other derived data (e.g., checksums) have been written, integrated, and run on all the source code available on sources.debian.net. The Debsources Dataset is a PostgreSQL database dump of sources.debian.net metadata, as of February 10th, 2015. The dataset contains both Debian-specific metadata{\textemdash}e.g., which software packages are available in which release, which source code file belong to which package, release dates, etc.{\textemdash}and source code information gathered by running Debsources plugins. The Debsources Dataset offer a very long-term historical view of the macro-level evolution and constitution of FOSS through the lens of popular, representative FOSS projects of their times.}, keywords = {debian}, url = {https://upsilon.cc/~zack/research/publications/debsources-msr-2015.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/debsources-msr-2015.pdf}, author = {Zacchiroli, Stefano} } @proceedings {1748, title = {Mining Component Repositories for Installability Issues}, year = {2015}, month = {05/2015}, abstract = {Component repositories play an increasingly relevant role in software life-cycle management, from software distribution to end-user, to deployment and upgrade management. Software components shipped via such repositories are equipped with rich metadata that describe their relationship (e.g., dependencies and conflicts) with other components. In this practice paper we show how to use a tool, distcheck, that uses component metadata to identify all the components in a repository that cannot be installed (e.g., due to unsatisfiable dependencies), provides detailed information to help developers understanding the cause of the problem, and fix it in the repository. We report about detailed analyses of several repositories: the Debian distribution, the OPAM package collection, and Drupal modules. In each case, distcheck is able to efficiently identify not installable components and provide valuable explanations of the issues. Our experience provides solid ground for generalizing the use of distcheck to other component repositories.}, keywords = {drupal, opam}, url = {http://www.dicosmo.org/preprints/msr-2015-distcheck.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr-2015-distcheck.pdf}, author = {Abate, Pietro and Di Cosmo, Roberto and Gesbert, Louis and Fabrice Le Fessant and Ralf Treinen and Zacchiroli, Stefano} } @conference {952, title = {The Ultimate Debian Database: Consolidating bazaar metadata for Quality Assurance and data mining}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {52 - 61}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {FLOSS distributions like RedHat and Ubuntu require a lot more complex infrastructures than most other FLOSS projects. In the case of community-driven distributions like Debian, the development of such an infrastructure is often not very organized, leading to new data sources being added in an impromptu manner while hackers set up new services that gain acceptance in the community. Mixing and matching data is then harder than should be, albeit being badly needed for Quality Assurance and data mining. Massive refactoring and integration is not a viable solution either, due to the constraints imposed by the bazaar development model. This paper presents the Ultimate Debian Database (UDD), which is the countermeasure adopted by the Debian project to the above "data hell". UDD gathers data from various data sources into a single, central SQL database, turning Quality Assurance needs that could not be easily implemented before into simple SQL queries. The paper also discusses the customs that have contributed to the data hell, the lessons learnt while designing UDD, and its applications and potentialities for data mining on FLOSS distributions.}, keywords = {debian, metadata, udd}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463277}, attachments = {https://flosshub.org/sites/flosshub.org/files/52msr2010-udd.pdf}, author = {Nussbaum, Lucas and Zacchiroli, Stefano} }