@proceedings {1912, title = {Developer Turnover in Global, Industrial Open Source Projects: Insights from Applying Survival Analysis}, year = {2017}, note = {we study five industrial OSS projects of different sizes (in terms of software size, contributors and number of companies involved) and examine whether the duration of developers staying in a project is related to following four factors: (i) the time of first contribution, (ii) the rate of maintaining own files, (iii) the main action type, and (iv) the main job type projects: WikiMedia, OpenStack, GlusterFS from Red Hat, Xen Project from the Linux Foundation and Apache CloudStack}, month = {05/2017}, pages = {66-75}, abstract = {Large open source software projects often have a globally distributed development team. Studies have shown developer turnover has a significant impact on the project success. Frequent developer turnover may lead to loss of productivity due to lacking relevant knowledge and spending extra time learning how projects work. Thus, lots of attention has been paid to which factors are related to developer retention; however, few of them focus on the impact of activities of individual developers. In this paper, we study five open source projects from different organizations and examine whether developer turnover is affected by when they start contributing and what types of contributions they are making. Our study reveals that developers have higher chances to survive in software projects when they 1) start contributing to the project earlier; 2) mainly modify instead of creating files; 3) mainly code instead of dealing with documentations. Our results also shed lights on the potential approaches to improving developer retention.}, keywords = {survival analysis}, author = {Bin Lin and Gregorio Robles and Serebrenik, Alexander} } @proceedings {1914, title = {Practices and Perceptions of UML Use in Open Source Projects}, year = {2017}, month = {05/2017}, pages = {203-212}, abstract = {Context: Open Source is getting more and more collaborative with industry. At the same time, modeling is today playing a crucial role in development of, e.g., safety critical software. Goal: However, there is a lack of research about the use of modeling in Open Source. Our goal is to shed some light into the motivation and benefits of the use of modeling and its use within project teams. Method: In this study, we perform a survey among Open Source developers. We focus on projects that use the Unified Modeling Language (UML) as a representative for software modeling. Results: We received 485 answers of contributors of 458 different Open Source projects. Conclusion: Collaboration seems to be the most important motivation for using UML. It benefits new contributors and contributors who do not create models. Teams use UML during communication and planning of joint implementation efforts.}, keywords = {architecture documentation, communication, effectiveness of UML, github, MOTIVATION, UML}, author = {Truong Ho-Quang and Hebig, Regina and Gregorio Robles and Chaudron, Michel R. V. and Miguel Angel Fernandez} } @proceedings {1888, title = {Technical Lag in Software Compilations: Measuring How Outdated a Software Deployment Is}, volume = {496}, year = {2017}, month = {05/2017}, pages = {182 - 192}, publisher = {Springer International Publishing}, abstract = {Large software compilations based on free, open source software (FOSS) packages are the basis for many software systems. When they are deployed in production, specific versions of the packages in the compilation are selected for installation. Over time, those versions become outdated with respect to the upstream software from which they are produced, and from the components available in the compilations as well. The fact that deployed components are outdated is not a problem in itself, but there is a price to pay for not being "as much updated as reasonable". This includes bug fixes and new features that could, at least potentially, be interesting for the deployed system. Therefore, a balance has to be maintained between "being up-to-date" and "keeping the good old working versions". This paper proposes a theoretical model (the "technical lag") for measuring how outdated a system is, with the aim of assisting in the decisions about upgrading in production. The paper explores several ways in which technical lag can be implemented, depending on requirements. As an illustration, it presents as well some specific cases in which the evolution of technical lag is computed.}, isbn = {978-3-319-57735-7}, issn = {1868-422X}, doi = {10.1007/978-3-319-57735-7_17}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_17}, author = {Gonz{\'a}lez-Barahona, J.M. and Sherwood, P. and Robles, G. and Izquierdo, D.}, editor = {Balaguer, Federico and Di Cosmo, Roberto and Garrido, Alejandra and Kon, Fabio and Gregorio Robles and Zacchiroli, Stefano} } @inbook {Bibi2016, title = {A Bayesian Belief Network for Modeling Open Source Software Maintenance Productivity}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {32{\textendash}44}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {A Bayesian Belief Network for Modeling Open Source Software Maintenance Productivity}, address = {Cham}, abstract = {Maintenance is one of the most effort consuming activities in the software development lifecycle. Efficient maintenance within short release cycles depends highly on the underlying source code structure, in the sense that complex modules are more difficult to maintain. In this paper we attempt to unveil and discuss relationships between maintenance productivity, the structural quality of the source code and process metrics like the type of a release and the number of downloads. To achieve this goal, we developed a Bayesian Belief Network (BBN) involving several maintainability predictors and three managerial indices for maintenance (i.e., duration, production, and productivity) on 20 open source software projects. The results suggest that maintenance duration depends on inheritance, coupling, and process metrics. On the other hand maintenance production and productivity depend mostly on code quality metrics. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_3}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_3}, author = {Bibi, Stamatia and Apostolos Ampatzoglou and Ioannis Stamelos}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Rodr{\'\i}guez-P{\'e}rez2016, title = {BugTracking: A Tool to Assist in the Identification of Bug Reports}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {192{\textendash}198}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {BugTracking: A Tool to Assist in the Identification of Bug Reports}, address = {Cham}, abstract = {Issue tracking systems are used, in most software projects, but in particular in almost all free open source software, to record many different kinds of issues: bug reports, feature requests, maintenance tickets and even design discussions. Identifying which of those issues are bug reports is not a trivial task. When researchers want to conduct studies on the bug reports, managed by a software development project, first of all they need to perform this identification. The job for researchers here is very different from the bug triaging that researchers do. In the latter case, people with a considerate experience in the project make a decision based on the information available at that time (maybe just a short comment by some user), asking, if needed, for more details. In the former case, researchers usually have not that experience in the project, but they have at their use all the information produced, until the moment the issue was closed. This may include not only all comments and actions on the issue tracking system, but for example, discussions about a fix in the code review system, or the final fixing patch in the source code management system. Having all that information conveyed to the researchers, in an easy, flexible and quick way, accelerates and makes their decision process much more reliable. It simplifies large scale manual analysis of issues (in hundreds or thousands), helping researchers to ensure that they are really working with what they intend to work: bug reports. This paper presents a tool designed to solve exactly the problem of providing the researchers with all the relevant information needed to decide whether an issue corresponds to a bug report or not. The tool uses information extracted automatically from the projects repositories. It offers a web-based interface which allows collaboration, traceability and transparency of the identification of bug reports. All this makes the process easier, faster, and more reliable.}, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_16}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_16}, author = {Rodr{\'\i}guez-P{\'e}rez, Gema and Gonzalez-Barahona, Jes{\'u}s M. and Gregorio Robles and Dalipaj, Dorealda and Sekitoleko, Nelson}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Kalliamvakou2016, title = {Certification of Open Source Software {\textendash} A Scoping Review}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {111{\textendash}122}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {Certification of Open Source Software {\textendash} A Scoping Review}, address = {Cham}, abstract = {Open source software (OSS) systems are being used for increasingly critical functions in modern societies, e.g., in health care, finance, government, defense, and other safety and security sensitive sectors. There is an increasing interest in software certification as a means to assure quality and dependability of such systems. However, the development processes and organizational structures of OSS projects can be substantially different from traditional closed-source projects. The distributed, {\textquotedblleft}bazaar-style{\textquotedblright} approach to software development in OSS systems is often perceived incompatible with certification. This paper presents the results of a scoping review on certification in OSS systems in order to identify and categorize key issues and provide a comprehensive overview of the current evidence on this topic. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_9}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_9}, author = {Kalliamvakou, Eirini and Weber, Jens and Knauss, Alessia}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Murphy2016, title = {Classifying Organizational Adoption of Open Source Software: A Proposal}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {123{\textendash}133}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {Classifying Organizational Adoption of Open Source Software: A Proposal}, address = {Cham}, abstract = {Staged adoption models are a common feature of information systems (IS) adoption literature, yet these are rarely used in open source software (OSS) adoption studies. In this paper, a staged model for classifying the organizational adoption of OSS is proposed, based upon a critical review of existing staged adoption models and factors identified from OSS adoption literature. Innovations in the proposed model include: defined transition pathways between stages, additional stages and a decomposition of cessation of use into four distinct pathways. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_10}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_10}, author = {Murphy, Stephen and Cox, Sharon}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Harzl2016, title = {Combining FOSS and Kanban: An Action Research}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {71{\textendash}84}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {Combining FOSS and Kanban: An Action Research}, address = {Cham}, abstract = {Even though Free and Open Source Software (FOSS) and Agile Software Development (ASD) have been recognized as important ways to develop software, share some similarities, and have many success stories, there is a lack of research regarding the comprehensive integration of both practices. This study attempts to consolidate these methods and to answer if FOSS and ASD can be combined successfully. Action Reseach (AR) is conducted with one sub-team of a large FOSS project. We performed two action research cycles based on the Kanban method. This paper has two main contributions; first, it describes a real world situation, where Kanban is applied to a FOSS project, and second, it suggests two new Kanban practices. These two methods are targeted specifically at FOSS projects and their characteristics. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_6}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_6}, author = {Harzl, Annemarie}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Crowston2016, title = {Core-Periphery Communication and the Success of Free/Libre Open Source Software Projects}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {45{\textendash}56}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Cham}, abstract = {We examine the relationship between communications by core and peripheral members and Free/Libre Open Source Software project success. The study uses data from 74 projects in the Apache Software Foundation Incubator. We conceptualize project success in terms of success building a community, as assessed by graduation from the Incubator. We compare successful and unsuccessful projects on volume of communication by core (committer) and peripheral community members and on use of inclusive pronouns as an indication of efforts to create intimacy among team members. An innovation of the paper is that use of inclusive pronouns is measured using natural language processing techniques. We find that core and peripheral members differ in their volume of contribution and in their use of inclusive pronouns, and that volume of communication is related to project success. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_4}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_4}, author = {Kevin Crowston and Shamshurin, Ivan}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Poo-Caama{\~n}o2016, title = {Herding Cats: A Case Study of Release Management in an Open Collaboration Ecosystem}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {147{\textendash}162}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {Herding Cats: A Case Study of Release Management in an Open Collaboration Ecosystem}, address = {Cham}, abstract = {Release management in large-scale software development projects requires significant communication and coordination. It is particularly challenging in Free and Open Source Software (FOSS) ecosystems, in which hundreds of loosely connected developers and their projects need to be coordinated to release software to a schedule. To better understand this process and its challenges, we analyzed over two and half years of communication in the GNOME ecosystem and studied developers{\textquoteright} interactions. We cataloged communication channels, categorized high level communication and coordination activities in one of them, and triangulated our results by interviewing developers. We found that a release schedule, influence instead of direct control, and diversity are factors that impact positively the release process in the GNOME ecosystem. Our results can help organizations build better large-scale teams and show that research focused on individual projects might miss important parts of the picture. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_12}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_12}, author = {Poo-Caama{\~n}o, Germ{\'a}n and Singer, Leif and Knauss, Eric and Daniel M. German}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Hirao2016, title = {The Impact of a Low Level of Agreement Among Reviewers in a Code Review Process}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {97{\textendash}110}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Cham}, abstract = {Software code review systems are commonly used in software development. In these systems, many patches are submitted to improve the quality. To verify the quality, voting is commonly used by contributors; however, there still exists a major problem, namely, that reviewers do not always simply reach a broad agreement. In our previous study, we found that consensus is not usually reached, implying that an individual reviewer{\textquoteright}s final decision usually differs from that of the majority of the other reviewers. In this study, we further investigate the reasons why such situations often occur, and provide suggestions for better handling of these problems. Our analysis of the Qt and OpenStack project datasets allow us to suggest that a patch owner should select more appropriate reviewers who often agree with others{\textquoteright} decisions. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_8}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_8}, author = {Hirao, Toshiki and Ihara, Akinori and Ueda, Yuki and Phannachitta, Passakorn and Matsumoto, Ken-ichi}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {M{\"a}enp{\"a}{\"a}2016, title = {In-between Open and Closed - Drawing the Fine Line in Hybrid Communities}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {134{\textendash}146}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {In-between Open and Closed - Drawing the Fine Line in Hybrid Communities}, address = {Cham}, abstract = {Today, the community driven development model extends into a variety of new, often web based collaborations. Among these are hybrid open source development set ups in which various online tools are used to facilitate cooperation between virtual teams of commercial and voluntary stakeholders. As yet, how these relationships form and evolve is not understood extensively. This article presents a longitudinal case study of a smartphone startup that founded its early product development strategy on reliance on feedback from its customers through a web based question and answer forum. With this, the company managed to extend values typical for open source communities to support development of its proprietary software. Our main findings include that the challenge in similar settings lies in striking the right balance between the open and the proprietary {\textendash} while overt openness may risk the competitive advantage of a company, leaving too much behind closed boundaries can create unnecessary friction in the relationship. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_11}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_11}, author = {M{\"a}enp{\"a}{\"a}, Hanna and Kilamo, Terhi and M{\"a}nnist{\"o}, Tomi}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Gamalielsson2016, title = {On Involvement in Open Standards: How Do Organisations Contribute to W3C Standards Through Editorship?}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {57{\textendash}70}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {On Involvement in Open Standards: How Do Organisations Contribute to W3C Standards Through Editorship?}, address = {Cham}, abstract = {Over the years, a number of open standards have been developed and implemented in software for addressing a number of challenges, such as lock-in, interoperability and longevity of software systems and associated digital artefacts. Understanding organisational involvement and collaboration in standardisation is important for informing any future policy and organisational decisions concerning involvement in standardisation. The overarching goal of the study is to establish how organisations contribute to open standards development through editorship. Specifically, the focus is on open standards development in W3C. Through an analysis of editorship for all W3C recommendations we contribute novel findings concerning organisational involvement and collaboration, and highlight contributions from different types of organisations and countries for headquarter of each organisation. We make three principal contributions. First, we establish an overall characterisation of organisational involvement in W3C standardisation. Second, we report on organisational involvement in W3C standardisation over time. Third, we establish organisational collaboration in W3C standardisation through social network analysis. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_5}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_5}, author = {Gamalielsson, Jonas and Lundell, Bj{\"o}rn}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Berger2016, title = {An Open Continuous Deployment Infrastructure for a Self-driving Vehicle Ecosystem}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {177{\textendash}183}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {An Open Continuous Deployment Infrastructure for a Self-driving Vehicle Ecosystem}, address = {Cham}, abstract = {Self-driving vehicles are an ongoing research and engineering topic even though first automotive OEMs started to deploy such features to their premium vehicles. Chalmers University of Technology and University of Gothenburg are operating and maintaining a vehicle laboratory comprising 1/10 scaled cars, a Volvo XC90, and a Volvo FH truck to conduct studies with automated driving. This laboratory is used both from researchers from different disciplines and in education. The experimental software for all these platforms is powered by the same software environment for different hardware architectures. Therefore, maintaining and deploying new features and bugfixes to the users of this laboratory in a fast way needs to be organized in a reproducible yet easily maintainable manner. This paper outlines our open approach to encapsulate our build, test, and deployment process using VirtualBox, Docker, and Jenkins. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_14}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_14}, author = {Berger, Christian}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Abdulwahhab2016, title = {The Role of Local Open Source Communities in the Development of Open Source Projects}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {3{\textendash}15}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {The Role of Local Open Source Communities in the Development of Open Source Projects}, address = {Cham}, abstract = {This paper investigates the position of local open source communities (LOSCs) in the development of open source projects (OSPs). We have conducted an empirical study to examine the role of LOSCs, their way of working, and the benefits/challenges they experience compared to the overall global community. The qualitative investigation consisted of ten semi-structured interviews with members within different LOSCs. The results confirm the importance of LOSCs and the pivotal role they play in the development of OSPs. In many cases, they act as the middleman between individual members and the project{\textquoteright}s global community. However, LOSCs have their own kinds of challenges. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_1}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_1}, author = {Abdulwahhab, Sinan and Alabady, Yazen and Sattar, Yacoub and Hammouda, Imed}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {AbbaspourAsadollah2016, title = {A Study of Concurrency Bugs in an Open Source Software}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {16{\textendash}31}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {A Study of Concurrency Bugs in an Open Source Software}, address = {Cham}, abstract = {Concurrent programming puts demands on software debugging and testing, as concurrent software may exhibit problems not present in sequential software, e.g., deadlocks and race conditions. In aiming to increase efficiency and effectiveness of debugging and bug-fixing for concurrent software, a deep understanding of concurrency bugs, their frequency and fixing-times would be helpful. Similarly, to design effective tools and techniques for testing and debugging concurrent software understanding the differences between non-concurrency and concurrency bugs in real-word software would be useful. This paper presents an empirical study focusing on understanding the differences and similarities between concurrency bugs and other bugs, as well as the differences among various concurrency bug types in terms of their severity and their fixing time. Our basis is a comprehensive analysis of bug reports covering several generations of an open source software system. The analysis involves a total of 4872 bug reports from the last decade, including 221 reports related to concurrency bugs. We found that concurrency bugs are different from other bugs in terms of their fixing time and their severity. Our findings shed light on concurrency bugs and could thereby influence future design and development of concurrent software, their debugging and testing, as well as related tools. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_2}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_2}, author = {Abbaspour Asadollah, Sara and Sundmark, Daniel and Eldh, Sigrid and Hansson, Hans~ and Enoiu, Eduard Paul}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Coman2016, title = {Towards Open Source/Data in the Context of Higher Education: Pragmatic Case Studies Deployed in Romania}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {184{\textendash}191}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {Towards Open Source/Data in the Context of Higher Education: Pragmatic Case Studies Deployed in Romania}, address = {Cham}, abstract = {The open source ideology is unfortunately not so popular in Romania. This subject represents, to this day, an untackled problem especially in various local educational areas. The paper describes an interesting initiative taken this year by the Faculty of Computer Science, University of Ia{\c s}i, Romania to change the collective opinion by progressively pushing the new generations of students through a binding process with the ideas involved in the open source philosophy. Three ongoing initiatives addressing this problem are detailed, including the results we have obtained so far through them, and also the steps that are planned to be taken soon on the matter. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_15}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_15}, author = {Coman, Alexandru and C{\^\i}tea, Alexandru and Buraga, Sabin C.}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Heppler2016, title = {Who Cares About My Feature Request?}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {85{\textendash}96}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Cham}, abstract = {Previous studies on issue tracking systems for open source software (OSS) focused mainly on requests for bug fixes. However, requests to add a new feature or an improvement to an OSS project are often also made in an issue tracking system. These inquiries are particularly important because they determine the further development of the software. This study examines if there is any difference between requests of the IBM developer community and other sources in terms of the likelihood of successful implementation. Our study consists of a case study of the issue tracking system BugZilla in the Eclipse integrated development environment (IDE). Our hypothesis, which was that feature requests from outsiders have less chances of being implemented, than feature requests from IBM developers, was confirmed. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_7}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_7}, author = {Heppler, Lukas and Eckert, Remo and Stuermer, Matthias}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @inbook {Robles2016, title = {Women in Free/Libre/Open Source Software: The Situation in the 2010s}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {163{\textendash}173}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {Women in Free/Libre/Open Source Software: The Situation in the 2010s}, address = {Cham}, abstract = {Women are underrepresented in the IT sector. But the situation in FLOSS (free, libre, open source software) development is really extreme in this respect: past publications and studies show a female participation of around 2 \% to 5 \% and have shed some light into this problem. In this paper, we give an update the state of knowledge to the current situation of gender in FLOSS, by analyzing the results of surveying more than 2,000 contributors to FLOSS projects in 2013, of which more than 200 were women. Our findings confirm that women enter the FLOSS community later than men, do primarily other tasks than coding, participate less if they have children, and have slightly different reasons to enter (and to stay in) the development communities they join. However, we also find evidence that women are joining FLOSS projects in higher numbers in recent years, and that the share of women devoting few hours per week to FLOSS and full-time dedication is higher than for men. All in all, comparing our results with the ones from the 2000s, the context of participation of women in FLOSS has not changed much. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_13}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_13}, author = {Gregorio Robles and Reina, Laura Arjona and Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Dom{\'\i}nguez, Santiago Due{\~n}as}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @proceedings {1830, title = {Women in Free/Libre/Open Source Software: The Situation in the 2010s}, year = {2016}, month = {05/2016}, pages = {163-173}, publisher = {Springer}, abstract = {Women are underrepresented in the IT sector. But the situation in FLOSS (free, libre, open source software) development is really extreme in this respect: past publications and studies show a female participation of around 2\% to 5\% and have shed some light into this problem. In this paper, we give an update the state of knowledge tot he current situation of gender in FLOSS, by analyzing the results of surveying more than 2,000 contributors to FLOSS projects in 2013, of which more than 200 were women. Our findings confirm that women enter the FLOSS community later than men, do primarily other tasks than coding, participate less if they have children, and have slightly different reasons to enter (and to stay in) the development communities they join. However, we also find evidence that women are joiningFLOSS projects in higher numbers in recent years, and that the share of women devoting few hours per week to FLOSS and full-time dedication is higher than for men. All in all, comparing our results with the ones from the 2000s, the context of participation of women in FLOSS has not changed much. }, attachments = {https://flosshub.org/sites/flosshub.org/files/paper-pre.pdf}, author = {Gregorio Robles and Reina, Laura Arjona and Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Due{\~n}as Dominguez, Santiago} } @article {1746, title = {Evaluation of FLOSS by Analyzing Its Software Evolution:}, journal = {Journal of Information Technology Research}, volume = {8}, year = {2015}, month = {01/2015}, pages = {62 - 81}, abstract = {In today{\textquoteright}s world, management often rely on FLOSS (Free/Libre/Open Source Software) systems to run their organizations. However, the nature of FLOSS is different from the software they have been using in the last decades. Its development model is distributed, and its authors are diverse as many volunteers and companies may collaborate in the project. In this paper, we want to shed some light on how to evaluate a FLOSS system by looking at the Moodle platform, which is currently the most used learning management system among educational institutions worldwide. In contrast with other evaluation models that have been proposed so far, the one we present is based on retrieving historical information that can be obtained publicly from the Internet, allowing us to study its evolution. As a result, we will show how by using our methodology management can take informed decisions that lower the risk that organizations face when investing in a FLOSS system. }, keywords = {free software, LMS, moodle, open source, software engineering, software evaluation, software evolution}, issn = {1938-7865}, doi = {10.4018/JITR.2015010105}, attachments = {https://flosshub.org/sites/flosshub.org/files/Evaluation\%20of\%20FLOSS\%20by\%20Analyzing\%20its\%20Software\%20Evolution\%20-\%20An\%20Example\%20Using\%20the\%20Moodle\%20Platform.pdf}, author = {Macho, H{\'e}ctor J. and Gregorio Robles and Gonz{\'a}lez-Barahona, Jesus M} } @inbook {1741, title = {First Results About Motivation and Impact of License Changes in Open Source Projects}, booktitle = {Open Source Systems: Adoption and Impact}, series = {IFIP Advances in Information and Communication Technology}, volume = {451}, year = {2015}, pages = {137-145}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {Free and open source software is characterized by the freedoms and criteria that are warranted by specific licenses. These licenses describe the rights and duties of the licensors and licensees. However, a licensing change may be necessary in the life of an open source project to meet legal developments or to allow the implementation of new business models. This paper examines the motivations and impacts of license changes in open source projects. After a state of the art on the subject, a set of case studies where projects changed their license is presented. Then a set of motivations to change licenses, the ways to legally make this change, the problems caused by this change and a set of benefits of the license change are discussed. }, keywords = {Business model, Contributor agreement, intellectual property, license, open source}, isbn = {978-3-319-17836-3}, doi = {10.1007/978-3-319-17837-0_13}, url = {http://dx.doi.org/10.1007/978-3-319-17837-0_13}, author = {Viseur, Robert and Gregorio Robles}, editor = {Damiani, Ernesto and Frati, Fulvio and Dirk Riehle and Wasserman, Anthony I.} } @booklet {1781, title = {Lessons Learned from Applying Social Network Analysis on an Industrial Free/Libre/Open Source Software Ecosystem}, year = {2015}, abstract = {Many software projects are no longer done in-house by a single organization. Instead, we are in a new age where software is developed by a networked community of individuals and organizations, which base their relations to each other on mutual interest. Paradoxically, recent research suggests that software development can actually be jointly-developed by rival firms. For instance, it is known that the mobile-device makers Apple and Samsung kept collaborating in open source projects while running expensive patent wars in the court. Taking a case study approach, we explore how rival firms collaborate in the open source arena by employing a multi-method approach that combines qualitative analysis of archival data (QA) with mining software repositories (MSR) and Social Network Analysis (SNA). While exploring collaborative processes within the OpenStack ecosystem, our research contributes to Software Engineering research by exploring the role of groups, sub-communities and business models within a high-networked open source ecosystem. Surprising results point out that competition for the same revenue model (i.e., operating conflicting business models) does not necessary affect collaboration within the ecosystem. Moreover, while detecting the different sub-communities of the OpenStack community, we found out that the expected social tendency of developers to work with developers from same firm (i.e., homophony) did not hold within the OpenStack ecosystem. Furthermore, while addressing a novel, complex and unexplored open source case, this research also contributes to the management literature in coopetition strategy and high-tech entrepreneurship with a rich description on how heterogeneous actors within a high-networked ecosystem (involving individuals, startups, established firms and public organizations) joint-develop a complex infrastructure for big-data in the open source arena.}, keywords = {business models, cloud computing, homophily, open source, Open-Coopetition, openstack, social network analysis, Software ecosystems}, url = {http://arxiv.org/abs/1507.04587}, author = {Teixeira, Jose and Gregorio Robles and Jesus M. Gonzalez-Barahona} } @book {1591, title = {Code Review Analytics: WebKit as Case Study}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Mobile Open Source Technologies. 10th IFIP WG 2.13 International Conference on Open Source Systems, OSS 2014, San Jos{\'e}, Costa Rica, May 6-9, 2014. Proceedings}, volume = {427}, year = {2014}, pages = {1 - 10}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = {During the last years, most of the large free / open source software projects have included code review as an usual, or even mandatory practice for changes to their code. In many cases it is implemented as a process in which a developer proposing some change needs to ask for a review by another developer before it can enter the code base. Code reviews, therefore, become a critical process for the project, which could cause delays in contributions being accepted, and risk to become a bottleneck if not enough reviewers are available. In this paper we present a methodology designed to analyze the code review process, to determine its main characteristics and parameters, and to detect potential problems with it. We also present how we have applied this methodology to the WebKit project, learning about the main characteristics of how code review works in their case.}, isbn = {978-3-642-55128-4}, issn = {1868-422X}, doi = {10.1007/978-3-642-55128-4_1}, author = {Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Izquierdo-Cort{\'a}zar, Daniel and Gregorio Robles and Mario Gallegos}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, Anthony I.} } @inbook {1606, title = {Considerations Regarding the Creation of a Post-graduate Master{\textquoteright}s Degree in Free Software}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {123-132}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { Free software has gained importance over the last few years, and can be found in almost any sphere in which {\textquoteleft}software processes{\textquoteright} are important. However, even when universities and higher education establishments include subjects concerning free programming and technologies in their curriculums, their graduates tend to attain limited technological, organisational and philosophical knowledge that limits them as regards their participation in, management and development of free software projects. This gap in skills and knowledge has recently led to a series of post-graduate studies whose objective is to offer students the possibility of acquiring competencies that will allow them to become experts in free software. This paper presents a study concerning the offers for post-graduate studies in free software that currently exist, with the intention of creating similar post-graduate studies in Ecuador. }, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_17}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_17}, author = {Montes Le{\'o}n, SergioRa{\'u}l and Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}sM. and S{\'a}nchez C., LuisE.}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @conference {1694, title = {Cream of the crop}, booktitle = {The International SymposiumProceedings of The International Symposium on Open Collaboration - OpenSym {\textquoteright}14}, year = {2014}, pages = {1 - 10}, publisher = {ACM Press}, organization = {ACM Press}, address = {Berlin, GermanyNew York, New York, USA}, abstract = {In open content communities like Wikipedia and StackOverflow and in open source software projects, a small proportion of users produce a majority of the content and take on much of the required community maintenance work. Understanding this class of users is crucial to creating and sustaining healthy communities. We carried out a mixed-method study of core contributors to the Cyclopath geographic wiki and bicycle routing web site. We present our findings and organize our discussion using concepts from activity theory. We found that the Cyclopath core contributors aren{\textquoteright}t the dedicated cyclists and that the characteristics of the community shape the site, the rules, and the tools for contributing. Additionally, we found that numerous aspects about the surrounding ecology of related systems and communities may help to shape how the site functions and views itself. We draw implications for future research and design from these findings. }, isbn = {9781450330169}, doi = {10.1145/2641580.2641609}, author = {Katherine Panciera and Mikhil Masli and Loren Terveen} } @conference {Robles:2014:EDE:2597073.2597107, title = {Estimating Development Effort in Free/Open Source Software Projects by Mining Software Repositories: A Case Study of OpenStack}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {222{\textendash}231}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Because of the distributed and collaborative nature of free / open source software (FOSS) projects, the development effort invested in a project is usually unknown, even after the software has been released. However, this information is becoming of major interest, especially ---but not only--- because of the growth in the number of companies for which FOSS has become relevant for their business strategy. In this paper we present a novel approach to estimate effort by considering data from source code management repositories. We apply our model to the OpenStack project, a FOSS project with more than 1,000 authors, in which several tens of companies cooperate. Based on data from its repositories and together with the input from a survey answered by more than 100 developers, we show that the model offers a simple, but sound way of obtaining software development estimations with bounded margins of error. }, keywords = {effort estimation, free software, mining software repositories, open source, openstack}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597107}, url = {http://doi.acm.org/10.1145/2597073.2597107}, attachments = {https://flosshub.org/sites/flosshub.org/files/robles_0.pdf}, author = {Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Cervig{\'o}n, Carlos and Capiluppi, Andrea and Izquierdo-Cort{\'a}zar, Daniel} } @proceedings {roblesestimating, title = {Estimating Development Effort in Free/Open Source Software Projects by Mining Software Repositories: A Case Study of OpenStack}, year = {2014}, month = {05/2014}, abstract = {Because of the distributed and collaborative nature of free/open source software (FOSS) projects, the development effort invested in a project is usually unknown, even after the software has been released. However, this information is becoming of major interest, especially - but not only- because of the growth in the number of companies for which FOSS has become relevant for their business strategy. In this paper we present a novel approach to estimate effort by considering data from source code management repositories. We apply our model to the OpenStack project, a FOSS project with more than 1,000 authors, in which several tens of companies cooperate. Based on data from its repositories and together with the input from a survey answered by more than 100 developers, we show that the model offers a simple, but sound way of obtaining software development estimations with bounded margins of error.}, keywords = {effort estimation, openstack}, url = {http://gsyc.urjc.es/~grex/repro/2014-msr-effort/msr14-robles-estimating-effort.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr14-robles-estimating-effort.pdf}, author = {Gregorio Robles and Gonz{\'a}lez-Barahona, Jesus M and Cervig{\'o}n, Carlos and Capiluppi, Andrea} } @conference {Robles:2014:FSD:2597073.2597129, title = {FLOSS 2013: A Survey Dataset About Free Software Contributors: Challenges for Curating, Sharing, and Combining}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {396{\textendash}399}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this data paper we describe a data set obtained by means of performing an on-line survey to over 2,000 Free Libre Open Source Software (FLOSS) contributors. The survey includes questions related to personal characteristics (gender, age, civil status, nationality, etc.), education and level of English, professional status, dedication to FLOSS projects, reasons and motivations, involvement and goals. We describe as well the possibilities and challenges of using private information from the survey when linked with other, publicly available data sources. In this regard, an example of data sharing will be presented and legal, ethical and technical issues will be discussed. }, keywords = {anonymization, data combining, data sharing, ethics, free software, microdata, msr data showcase, open data, open source, privacy, Survey}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597129}, url = {http://doi.acm.org/10.1145/2597073.2597129}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr14gregorio.pdf}, author = {Gregorio Robles and Reina, Laura Arjona and Serebrenik, Alexander and Vasilescu, Bogdan and Gonz{\'a}lez-Barahona, Jes{\'u}s M.} } @conference {1692, title = {Hackers on Forking}, booktitle = {The International SymposiumProceedings of The International Symposium on Open Collaboration - OpenSym {\textquoteright}14}, year = {2014}, pages = {1 - 10}, publisher = {ACM Press}, organization = {ACM Press}, address = {Berlin, GermanyNew York, New York, USA}, abstract = {All open source licenses allow the copying of an existing body of code for use as the basis of a separate development project. This practice is commonly known as forking the code. This paper presents the results of a study in which 11 programmers were interviewed about their opinions on the right to fork and the impact of forking on open source software development. The results show that there is a general consensus among programmers{\textquoteright} views regarding both the favourable and unfavourable aspects that stem from the right to fork. Interestingly, while all programmers noted potential downsides to the right to fork, it was seen by all as an integral component of open source software, and a right that must not be infringed regardless of circumstance or outcome.}, isbn = {9781450330169}, doi = {10.1145/2641580.2641590}, attachments = {https://flosshub.org/sites/flosshub.org/files/p06.pdf}, author = {Nyman, Linus} } @conference {1693, title = {Initial Results from the Study of the Open Source Sector in Belgium}, booktitle = {The International SymposiumProceedings of The International Symposium on Open Collaboration - OpenSym {\textquoteright}14}, year = {2014}, pages = {1 - 5}, publisher = {ACM Press}, organization = {ACM Press}, address = {Berlin, GermanyNew York, New York, USA}, abstract = {The economy of FLOSS (Free and open source software) has been the subject of numerous studies and publications, particularly on the issue of business models. However, there are fewer studies on the local networks of FLOSS providers. This research focuses on the ecosystem of Belgian FLOSS providers and, more specifically, their geographical distribution, the activities, technologies and software they support, their business models, their economic performance and the relationships between companies. The research is based on a directory containing nearly 150 companies. This directory led to the creation of a specialized search engine that helped to improve annotation. The research also uses financial data provided by the Belgian Central Balance Sheet Office. The initial results of this study show a concentration in major economic areas. The businesses are more active in the services and are heavily involved activities such as infrastructure software and Web development, activities which were common in the early years of free software development. Services for the support of business software is also common. A first analysis of the graph of relationships between providers{\textquoteright} websites highlights the role that is played by the multinational IT companies, by FLOSS editors, by commercial FLOSS associations and especially by the Walloon centers of competence that offer vast training catalogs that are dedicated to FLOSS. This research opens up many perspectives for improving the automation of the company directory updates, the analysis of the relationship between enterprises, and the automation of the financial analysis of companies. }, isbn = {9781450330169}, doi = {10.1145/2641580.2641591}, author = {Viseur, Robert} } @conference {1690, title = {"The Institutionalization of Digital Openness"}, booktitle = {The International SymposiumProceedings of The International Symposium on Open Collaboration - OpenSym {\textquoteright}14}, year = {2014}, pages = {1 - 2}, publisher = {ACM Press}, organization = {ACM Press}, address = {Berlin, GermanyNew York, New York, USA}, abstract = {Around the world national and municipal governments launch open data initiatives with declared goals like increased efficiency, transparency or economic growth. However, although little of these effects have been proven, more and more administrations open up their datasets to the public. The dissertation project describes this phenomenon as the ongoing institutionalization of digital openness in the field of public sector information. With empirical evidence from three case studies in large European cities the research project intends to theorize how NGOs, hackers and certain civil servants turn open data into an institution, which more and more public bodies feel the need to adapt to. }, isbn = {9781450330169}, doi = {10.1145/2641580.2641626}, author = {Maximilian Heimst{\"a}dt} } @conference {1691, title = {Older Adults and Free/Open Source Software}, booktitle = {The International SymposiumProceedings of The International Symposium on Open Collaboration - OpenSym {\textquoteright}14}, year = {2014}, pages = {1 - 10}, publisher = {ACM Press}, organization = {ACM Press}, address = {Berlin, GermanyNew York, New York, USA}, abstract = {The global population is aging rapidly, and older adults are becoming increasingly technically savvy. This paper explores ways to engage these individuals to contribute to free/open source software (FOSS) projects. We conducted a pilot diary study to explore motivations, barriers, and the contribution processes of first-time contributors in a real time, qualitative manner. In addition, we measured their self-efficacy before and after their participation. We found that what drove participants were intrinsic motivations, altruism, and internal values, which differed from previous work with older adults and with the general FOSS population. We also found that self-efficacy did not change significantly, even when participants encountered significant barriers or setbacks. The top 3 barriers were lack of communication, installation issues, and documentation issues. We found that asking for and receiving help, and avoiding difficult development environments were more likely to lead to success. To verify these results, we encourage a future large-scale diary study that involves multiple demographics. Given our pilot study, we recommend that future outreach efforts involving older adults focus on how to effectively communicate and build community amongst older contributors. }, isbn = {9781450330169}, doi = {10.1145/2641580.2641589}, author = {Davidson, Jennifer and Umme Ayda Mannan and Rithika Naik and Ishneet Dua and Jensen, Carlos} } @article {1745, title = {SENTIMENT ANALYSIS OF FREE/OPEN SOURCE DEVELOPERS: PRELIMINARY FINDINGS FROM A CASE STUDY}, journal = {Revista Eletr{\^o}nica de Sistemas de Informa{\c c}{\~a}o}, volume = {13}, year = {2014}, month = {08/2014}, abstract = {Software development is a human intensive activity. And as such, how developers face their tasks is of major importance. In an environment such as the one that is common in FOSS (free/open source software) projects where professionals (i.e., paid developers) share the development effort with volunteers, the morale of the development and user community is of major importance. In this paper, we present a preliminary analysis using sentiment analysis techniques to a FOSS project. We therefore mine the mailing list of a project and apply these techniques to the most relevant participants. Although the application is at this time limited, we hope that this experience can be of benefit in the future to determine situations that may affect the developers or the project, such as low productivity, developer abandonment, project forking, etc. }, keywords = {developer productivity, FLOSS, mailing lists, natural language processing, openSUSE, sentiment analysis, software development; software repository mining}, doi = {10.5329/RESI.2014.1302006}, url = {http://189.16.45.2/ojs/index.php/reinfo/article/view/1677}, attachments = {https://flosshub.org/sites/flosshub.org/files/1677-6732-1-PB.pdf}, author = {Rousinopoulos, Athanasios-Ilias and Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M.} } @article {1631, title = {The evolution of the laws of software evolution}, journal = {ACM Computing Surveys}, volume = {46}, year = {2013}, month = {11/2013}, pages = {1 - 28}, abstract = { After more than 40 years of life, software evolution should be considered as a mature field. However, despite such a long history, many research questions still remain open, and controversial studies about the validity of the laws of software evolution are common. During the first part of these 40 years the laws themselves evolved to adapt to changes in both the research and the software industry environments. This process of adaption to new paradigms, standards, and practices stopped about 15 years ago, when the laws were revised for the last time. However, most controversial studies have been raised during this latter period. Based on a systematic and comprehensive literature review, in this paper we describe how and when the laws, and the software evolution field, evolved. We also address the current state of affairs about the validity of the laws, how they are perceived by the research community, and the developments and challenges that are likely to occur in the coming years.}, issn = {03600300}, doi = {10.1145/2543581.2543595}, attachments = {https://flosshub.org/sites/flosshub.org/files/2013HerraizRRG_CSUR.pdf}, author = {Herraiz, Israel and Rodriguez, Daniel and Gregorio Robles and Jesus M. Gonzalez-Barahona} } @proceedings {1506, title = {Intensive Metrics for the Study of the Evolution of Open Source Projects: Case Studies from Apache Software Foundation Projects}, year = {2013}, month = {05/2013}, abstract = {Based on the empirical evidence that the ratio of email messages in public mailing lists to versioning system commits has remained relatively constant along the history of the Apache Software Foundation (ASF), this paper has as goal to study what can be inferred from such a metric for projects of the ASF. We have found that the metric seems to be an intensive metric as it is independent of the size of the project, its activity, or the number of developers, and remains relatively independent of the technology or functional area of the project. Our analysis provides evidence that the metric is related to the technical effervescence and popularity of project, and as such can be a good candidate to measure its healthy evolution. Other, similar metrics -like the ratio of developer messages to commits and the ratio of issue tracker messages to commits- are studied for several projects as well, in order to see if they have similar characteristics.}, url = {http://oa.upm.es/14698/1/paper.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/paper_1.pdf}, author = {Gala, Santiago and Gregorio Robles and Jesus M. Gonzalez-Barahona and Herraiz Tabernero, Israel} } @book {1541, title = {A Preliminary Analysis of Localization in Free Software: How Translations Are Performed}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {153 - 167}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { Software is more than just source code. There is a myriad of elements that compose a software project, among others documentation, translations, multimedia, artwork, marketing. In this paper, we focus on the translation efforts that free, libre, open source software (FLOSS) projects undergo to provide their software in multiple languages. We have therefore analyzed a large amount of projects for their support and procedures regarding translations, if they exist. Our results show that many, but not all, projects offer some type of support and specify some ways to those wanting to contribute. Usually, projects from a more traditional libre software domain are more prone to ease such tasks. However, there is no general way to contribute, as formats and procedures are often project-specific. We have identified as well a high number of translation-supporting tools, with many projects having their own one. All in all, information about how to contribute is the main factor for having a very internationalized application. Projects accepting and giving credit to contributing translators have high levels of internationalization, even if the process is rudimentary. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_11}, author = {Reina, Laura Arjona and Gregorio Robles and Jesus M. Gonzalez-Barahona}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @article {gonzalez2013studying, title = {Studying the laws of software evolution in a long-lived FLOSS project}, journal = {Journal of Software: Evolution and Process}, year = {2013}, publisher = {Wiley Online Library}, abstract = {Some free, open-source software projects have been around for quite a long time, the longest living ones dating from the early 1980s. For some of them, detailed information about their evolution is available in source code management systems tracking all their code changes for periods of more than 15 years. This paper examines in detail the evolution of one of such projects, glibc, with the main aim of understanding how it evolved and how it matched Lehman{\textquoteright}s laws of software evolution. As a result, we have developed a methodology for studying the evolution of such long-lived projects based on the information in their source code management repository, described in detail several aspects of the history of glibc, including some activity and size metrics, and found how some of the laws of software evolution may not hold in this case.}, url = {http://oa.upm.es/21355/1/smr1615.pdf}, author = {Jesus M. Gonzalez-Barahona and Gregorio Robles and Herraiz, Israel and Ortega, Felipe} } @article {1706, title = {Trends in Free, Libre, Open Source Software Communities: From Volunteers to Companies / Aktuelle Trends in Free-, Libre-, und Open-Source-Software-Gemeinschaften: Von Freiwilligen zu Unternehmen}, journal = {it - Information Technology}, volume = {55}, year = {2013}, month = {01/2013}, abstract = {The first free/libre/open source software (FLOSS) development communities were composed almost exclusively of volunteers. They were individuals who, despite their affiliation, contributed to the project on their own. They decided which project to join, and their contributions were personal in nature, even when in some cases they were employees of companies with some interests in the project. GNU, the first of such communities, and some others that emerged during the late 1980s and 1990s, followed this pattern. During the 1990s corporate interests started to have a role in some FLOSS communities. Companies hired people from those communities to gain influence, or tasked their employees to contribute to them, again with the idea of influencing their decisions. During the 2000s, corporate communities, in which companies are first-class citizens, have emerged, changing the rules and redefining the role of volunteers and non-affiliated individuals. However, the role of developers, with independence of the company for which they work, is still important even in these communities. This paper addresses this transition from volunteer-based to company-based development communities, and explores the structure and behavior of the latter.}, issn = {1611-2776}, doi = {10.1524/itit.2013.1012}, url = {http://dirkriehle.com/wp-content/uploads/2013/10/itit.2013.1012.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/itit.2013.1012.pdf}, author = {Jesus M. Gonzalez-Barahona and Gregorio Robles} } @article {1810, title = {Understanding How Companies Interact with Free Software Communities}, journal = {IEEE Software}, volume = {30}, year = {2013}, month = {9/2013}, pages = {38 - 45}, abstract = {When free, open source software development communities work with companies that use their output, it{\textquoteright}s especially important for both parties to understand how this collaboration is performing. The use of data analytics techniques on software development repositories can improve factual knowledge about performance metrics. }, issn = {0740-7459}, doi = {10.1109/MS.2013.95}, author = {Jesus M. Gonzalez-Barahona and Izquierdo-Cortazar, Daniel and Maffulli, Stefano and Gregorio Robles} } @article {1554, title = {Using software analytics to understand how companies interact in free software communities}, journal = {IEEE Software}, year = {2013}, month = {2013}, pages = {1 - 1}, abstract = { Free, open source software development communities may become large and complex. They may also be a focus of interest for competing companies relying on their outcomes, with their employees joining the development and maintenance effort. In those cases it is specially important for both companies and communities to understand how this collaboration is working, and how it matches their policies and expectations. In this paper we show two cases (OpenStack and WebKit) which we have studied using analytics techniques on the data obtained from their software development repositories. From them, we conclude that analytics on this data can improve the factual knowledge about how development communities are performing in aspects which are of interest to companies, and fundamental to ensure transparency and fairness. }, keywords = {openstack, webkit}, issn = {0740-7459}, doi = {10.1109/MS.2013.95}, author = {Gonzalez-Barahona, Jesus and Izquierdo, Daniel and Maffulli, Stefano and Gregorio Robles} } @proceedings {1439, title = {A Comprehensive Study of Software Forks: Dates, Reasons and Outcomes}, volume = {378}, year = {2012}, pages = {1-14}, publisher = {IFIP AICT}, abstract = {Summary. In general it is assumed that a software product evolves within the authoring company or group of developers that develop the project. However, in some cases different groups of developers make the software evolve in different directions, a situation which is commonly known as a fork. In the case of free software, although forking is a practice that is considered as a last resort, it is inherent to the four freedoms. This paper tries to shed some light on the practice of forking. Therefore, we have identified significant forks, several hundreds in total, and have studied them in depth. Among the issues that have been analyzed for each fork is the date when the forking occurred, the reason of the fork, and the outcome of the fork, i.e., if the original or the forking project are still developed. Our investigation shows, among other results, that forks occur in every software domain, that they have become more frequent in recent years, and that very few forks merge with the original project.}, keywords = {forking, forks, free software, Legal, open source, social, software evolution, sustainability}, attachments = {https://flosshub.org/sites/flosshub.org/files/paper_0.pdf}, author = {Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M.} } @proceedings {1458, title = {Do More Experienced Developers Introduce Fewer Bugs?}, volume = {378}, year = {2012}, note = {"Some Mozilla Foundation projects have been the selected case of study for this paper. The analysis is based on the Mercurial repository which offers a list of repositories that can be easily cloned. As a summary, 19 projects were analyzed, with more than 100,000 commits, more than 2,500 authors and around 4 years of history up to June 2011."}, month = {09/2012}, pages = {268-273}, publisher = {IFIP AICT, Springer}, abstract = {Developer experience is a common matter of study in the software maintenance and evolution research literature. However it is still not well understood if less experienced developers are more prone to introduce errors in the source code than their more experienced colleagues. This paper aims to study the relationships between experience and the bug introduction ratio using the Mozilla community as case of study. As results, statistical differences among developers with different levels of experience has not been observed, when the expected result would have been the opposite1.}, keywords = {mercurial, mozilla, scm, source code analysis}, author = {Izquierdo-Cort{\'a}zar, Daniel and Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M.} } @conference {1813, title = {Modification and developer metrics at the function level: Metrics for the study of the evolution of a software project}, booktitle = {2012 3rd International Workshop on Emerging Trends in Software Metrics (WETSoM)}, year = {2012}, pages = {49 - 55}, publisher = {IEEE}, organization = {IEEE}, address = {Zurich, Switzerland}, abstract = {Software evolution, and particularly its growth, has been mainly studied at the file (also sometimes referred as module) level. In this paper we propose to move from the physical towards a level that includes semantic information by using functions or methods for measuring the evolution of a software system. We point out that use of functions-based metrics has many advantages over the use of files or lines of code. We demonstrate our approach with an empirical study of two Free/Open Source projects: a community-driven project, Apache, and a company-led project, Novell Evolution. We discovered that most functions never change; when they do their number of modifications is correlated with their size, and that very few authors who modify each; finally we show that the departure of a developer from a software project slows the evolution of the functions that she authored. }, isbn = {978-1-4673-1763-4}, doi = {10.1109/WETSoM.2012.6226993}, author = {Gregorio Robles and Herraiz, Israel and Daniel M. German and Izquierdo-Cortazar, Daniel} } @conference {948, title = {Beyond replication: An example of the potential benefits of replicability in the mining of software repositories community}, booktitle = {1st Workshop on Replication in Empirical Software Engineering Research}, year = {2010}, month = {05/2010}, keywords = {literature review, msr, replication}, author = {Gregorio Robles and Daniel M. German} } @conference {947, title = {Replicating MSR: A study of the potential replicability of papers published in the Mining Software Repositories proceedings}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {171 - 180}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {This paper is the result of reviewing all papers published in the proceedings of the former International Workshop on Mining Software Repositories (MSR) (2004-2006) and now Working Conference on MSR (2007-2009). We have analyzed the papers that contained any experimental analysis of software projects for their potentiality of being replicated. In this regard, three main issues have been addressed: i) the public availability of the data used as case study, ii) the public availability of the processed dataset used by researchers and iii) the public availability of the tools and scripts. A total number of 171 papers have been analyzed from the six workshops/working conferences up to date. Results show that MSR authors use in general publicly available data sources, mainly from free software repositories, but that the amount of publicly available processed datasets is very low. Regarding tools and scripts, for a majority of papers we have not been able to find any tool, even for papers where the authors explicitly state that they have built one. Lessons learned from the experience of reviewing the whole MSR literature and some potential solutions to lower the barriers of replicability are finally presented and discussed.}, keywords = {data, literature review, msr, replication}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463348}, url = {http://gsyc.urjc.es/~grex/msr2010}, attachments = {https://flosshub.org/sites/flosshub.org/files/171MSR_2010_69.final_.pdf}, author = {Gregorio Robles} } @conference {1809, title = {On the Analysis of Contributions from Privileged Users in Virtual Open Communities}, booktitle = {2009 42nd Hawaii International Conference on System Sciences}, year = {2009}, pages = {1 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Waikoloa, Hawaii, USA}, abstract = {Collaborative projects built around virtual communities on the Internet have gained momentum over the last decade. Nevertheless, their rapid growth rate rises some questions:which is the most effective approach to manage and organize their content creation process? Can these communities scale, controlling their projects as their size continues to grow over time? To answer these questions, we undertake a quantitative analysis of privileged users in FLOSS development projects and in Wikipedia. From our results, we conclude that the inequality level of user contributions in both types of initiatives is remarkably distinct, even though both communities present almost identical patterns regard-ing the number of distinct contributors per file (in FLOSS projects) or per article (in Wikipedia). As a result, totally open projects like Wikipedia can effectively deal with faster growing rates, while FLOSS projects may be affected by bottlenecks on committers who play critical roles.}, isbn = {978-0-7695-3450-3}, doi = {10.1109/HICSS.2009.328}, author = {Ortega, Felipe and Izquierdo-Cort{\'a}zar, Daniel and Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Gregorio Robles} } @conference {606, title = {Assessing FLOSS Communities: An Experience Report from the QualOSS Project}, booktitle = {OSS2009: Open Source Ecosystems: Diverse Communities Interacting (IFIP 2.13)}, series = {IFIP Advances in Information and Communication Technology }, volume = {299/2009}, year = {2009}, month = {2009///}, pages = {364 - 364}, publisher = {Springer}, organization = {Springer}, chapter = {38}, abstract = {This paper presents work done in the QualOSS (Quality of Open Source Software) research project,which aims at building a methodology and tools to help in the assessment of the quality of FLOSS (free, libre, open source software) endeavors. In particular, we introduce the research done to evaluate the FLOSS endeavor communities. Following the Goal-Question-Metric paradigm, QUALOSS describes goals, the associated questions and then metrics that allow to answer the questions. }, issn = {978-3-642-02031-5}, doi = {http://dx.doi.org/10.1007/978-3-642-02032-2_38}, attachments = {https://flosshub.org/sites/flosshub.org/files/Assessing\%20FLOSS\%20Communities.pdf}, author = {Izquierdo-Cortazar, Daniel and Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s and Deprez, Jean-Christophe} } @conference {941, title = {Evolution of the core team of developers in libre software projects}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {167 - 170}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {In many libre (free, open source) software projects, most of the development is performed by a relatively small number of persons, the "core team". The stability and permanence of this group of most active developers is of great importance for the evolution and sustainability of the project. In this position paper we propose a quantitative methodology to study the evolution of core teams by analyzing information from source code management repositories. The most active developers in different periods are identified, and their activity is calculated over time, looking for core team evolution patterns.}, keywords = {core, cvs, cvsanaly, developers, evolution, gimp, scm}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069497}, attachments = {https://flosshub.org/sites/flosshub.org/files/167core-evolution.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona and Herraiz, Israel} } @article {1236, title = {Tools for the Study of the Usual Data Sources found in Libre Software Projects}, journal = {International Journal of Open Source Software and Processes}, volume = {1}, year = {2009}, month = {31/2009}, pages = {24 - 45}, abstract = {Due to the open nature of Free/Libre/Open Source software projects, researchers have gained access to a rich set of development-related information. Although this information is publicly available on the Internet, obtaining and analyzing it in a convenient way is not an easy task and many considerations have to be taken into account. In this paper we present the most important data sources that can be found in libre software projects and that are studied by the research community: source code, source code management systems, mailing lists and bug tracking systems. We will give advice for the problems that can be found when retrieving and preparing the data sources for a posterior analysis, as well as provide information about the tools that support these tasks.}, keywords = {bug tracking systems, data sources, mailing lists, scm, tools}, issn = {1942-3934}, doi = {10.4018/jossp.2009010102}, attachments = {https://flosshub.org/sites/flosshub.org/files/robles.pdf}, author = {Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Izquierdo-Cortazar, Daniel and Herraiz, Israel} } @article {10.1109/HICSS.2009.1014, title = {Using Software Archaeology to Measure Knowledge Loss in Software Projects Due to Developer Turnover}, journal = {2009 42nd Hawaii International Conference on System Sciences (HICSS 2009)}, year = {2009}, pages = {1-10}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, abstract = {Developer turnover can result in a major problem when developing software. When senior developers abandon a software project, they leave a knowledge gap that has to be managed. In addition, new (junior) developers require some time in order to achieve the desired level of productivity. In this paper, we present a methodology to measure the effect of knowledge loss due to developer turnover in software projects. For a given software project, we measure the quantity of code that has been authored by developers that do not belong to the current development team, which we define as orphaned code. Besides, we study how orphaned code is managed by the project. Our methodology is based on the concept of software archaeology, a derivation of software evolution. As case studies we have selected four FLOSS (free, libre, open source software) projects, from purely driven by volunteers to company-supported. The application of our methodology to these case studies will give insight into the turnover that these projects suffer and how they have managed it and shows that this methodology is worth being augmented in future research.}, keywords = {attrition, case study, developers, evince, evolution, gimp, growth, knowledge collaboration, lines of code, nautilus, quality, sloc, turnover}, isbn = {978-0-7695-3450-3}, doi = {http://doi.ieeecomputersociety.org/10.1109/HICSS.2009.1014}, attachments = {https://flosshub.org/sites/flosshub.org/files/07-07-08.pdf}, author = {Izquierdo-Cortazar, Daniel and Gregorio Robles and Ortega, Felipe and Jesus M. Gonzalez-Barahona} } @conference {970, title = {Determinism and evolution}, booktitle = {Proceedings of the 2008 international workshop on Mining software repositories - MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {1-9}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {It has been proposed that software evolution follows a Self-Organized Criticality (SOC) dynamics. This fact is supported by the presence of long range correlations in the time series of the number of changes made to the source code over time. Those long range correlations imply that the current state of the project was determined time ago. In other words, the evolution of the software project is governed by a sort of determinism. But this idea seems to contradict intuition. To explore this apparent contradiction, we have performed an empirical study on a sample of 3,821 libre (free, open source) software projects, finding that their evolution projects is short range correlated. This suggests that the dynamics of software evolution may not be SOC, and therefore that the past of a project does not determine its future except for relatively short periods of time, at least for libre software.}, keywords = {changes, evolution, source code, sourceforge}, isbn = {9781605580241}, doi = {10.1145/1370750.1370752}, attachments = {https://flosshub.org/sites/flosshub.org/files/p1-herraiz.pdf}, author = {Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Gregorio Robles and Herraiz, Israel} } @article {GonzalezBarahona2008356, title = {Geographic origin of libre software developers}, journal = {Information Economics and Policy}, volume = {20}, number = {4}, year = {2008}, note = {Empirical Issues in Open Source Software}, pages = {356 - 363}, abstract = {This paper examines the claim that libre (free, open source) software involves global development. The anecdotal evidence is that developers usually work in teams including individuals residing in many different geographical areas, time zones and even continents and that, as a whole, the libre software community is also diverse in terms of national origin. However, its exact composition is difficult to capture, since there are few records of the geographical location of developers. Past studies have been based on surveying a limited (and sometimes biased) sample and extrapolating that sample to the global distribution of developers. In this paper we present an alternate approach in which databases are analyzed to create traces of information from which the geographical origin of developers can be inferred. Applying this technique to the SourceForge users database and the mailing lists archives from several large projects, we have estimated the geographical origin of more than one million individuals who are closely related to the libre software development process. The paper concludes that the result is a good proxy for the actual distribution of libre software developers working on global projects.}, keywords = {developers, email, email address, email archives, geography, mailing list, open source software, sourceforge, timezone, users}, issn = {0167-6245}, doi = {DOI: 10.1016/j.infoecopol.2008.07.001}, url = {http://www.sciencedirect.com/science/article/B6V8J-4T3DCPK-1/2/3981dfbc523eae1d1ce65fb1f0c0edb7}, author = {Jesus M. Gonzalez-Barahona and Gregorio Robles and Roberto Andradas-Izquierdo and Rishab Aiyer Ghosh} } @conference {Herraiz:2008:TSB:1370750.1370786, title = {Towards a simplification of the bug report form in eclipse}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, pages = {145{\textendash}148}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {We believe that the bug report form of Eclipse contains too many fields, and that for some fields, there are too many options. In this MSR challenge report, we focus in the case of the severity field. That field contains seven different levels of severity. Some of them seem very similar, and it is hard to distinguish among them. Users assign severity, and developers give priority to the reports depending on their severity. However, if users can not distinguish well among the various severity options, they will probably assign different priorities to bugs that require the same priority. We study the mean time to close bugs reported in Eclipse, and how the severity assigned by users affects this time. The results shows that classifying by time to close, there are less clusters of bugs than levels of severity. We therefore conclude that there is a need to make a simpler bug report form.}, keywords = {bug fixing, bug report, bug tracking system, classification, eclipse, msr challenge, severity}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370786}, url = {http://doi.acm.org/10.1145/1370750.1370786}, author = {Herraiz, Israel and Daniel M. German and Jesus M. Gonzalez-Barahona and Gregorio Robles} } @conference {541, title = {Using Social Network Analysis Techniques to Study Collaboration between a FLOSS Community and a Company}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {171 - 186}, publisher = {Springer}, organization = {Springer}, chapter = {14}, abstract = {Because of the sheer volume of information available in FLOSS repositories, simple analysis have to face the problems of filtering the relevant information. Hence, it is essential to apply methodologies that highlight that information for a given aspect of the project. In this paper, some techniques from the social sciences have been used on data from version control systems to extract information about the development process of FLOSS projects with the aim of highlighting several processes that occur in FLOSS projects and that are difficult to obtain by other means. In particular, the collaboration between the FLOSS community and a company has been studied by selecting two projects as case studies. The results highlight aspects such as efficiency in the development process, release management and leadership turnover. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_14}, attachments = {https://flosshub.org/sites/flosshub.org/files/Using\%20Social\%20Network.pdf}, author = {Martinez-Romo, Juan and Gregorio Robles and Gonzalez-Barahona, Jesus and Ortu{\~n}o-Perez, Miguel} } @conference {667, title = {2nd International Workshop on Public Data about Software Development (WoPDaSD 2007)}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {381 - 383}, publisher = {Springer}, organization = {Springer}, chapter = {51}, abstract = {Exchange of detailed data about software development between research teams, and specifically about data available from public repositories of libre (free, open source) software projects is becoming more and more common. This workshop will explore the benefits and problems of such exchange, and the steps needed to foster it. As a case example of data exchange, the workshop organizers suggest two large datasets to be analyzed by participants. }, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_51}, attachments = {https://flosshub.org/sites/flosshub.org/files/2nd\%20Intl\%20Workshop\%20on\%20Public\%20Data.pdf}, author = {Gonzalez-Barahona, Jesus and Conklin, Megan and Gregorio Robles} } @conference {626, title = {Corporate Involvement of Libre Software: Study of Presence in Debian Code over Time}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {121 - 132}, publisher = {Springer}, organization = {Springer}, chapter = {10}, abstract = {Although much of the research on the libre (free, open source) phenomenon has been focused on the involvement of volunteers, the role of companies is also important in many projects. In fact, during the last years, the involvement of companies in the libre software world seems to be raising. In this paper we present an study that shows, quantitatively, how important this involvement is in the production of the largest collection of code available for Linux: the Debian GNU/Linux distribution. By studying copyright attributions in source code, we have identified those companies with more attributed code, and the trend of corporate presence in Debian from 1998 to 2004. }, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_10}, attachments = {https://flosshub.org/sites/flosshub.org/files/Corporate\%20Involvement\%20of\%20Libro.pdf}, author = {Gregorio Robles and Due{\~n}as, Santiago and Gonzalez-Barahona, Jesus} } @book {1803, title = {Emerging Free and Open Source Software PracticesVolunteers in Large Libre Software Projects}, series = {Emerging Free and Open Source Software Practices}, year = {2007}, pages = {1 - 24}, publisher = {IGI Global}, organization = {IGI Global}, abstract = {Most libre (free, open source) software projects rely on the work of volunteers. Therefore, attracting people who contribute their time and technical skills is of paramount importance, both in technical and economic terms. This reliance on volunteers leads to some fundamental management challenges: Volunteer contributions are inherently difficult to predict, plan, and manage, especially in the case of large projects. In this chapter we present an analysis of the evolution over time of the human resources in large libre software projects, using the Debian project, one of the largest and most complex libre software projects based mainly in voluntary work, as a case study. We have performed a quantitative investigation of data corresponding to roughly seven years, studying how volunteer involvement has affected the software released by the project, and the developer community itself. }, doi = {10.4018/978-1-59904-210-7.ch001}, url = {http://www.igi-global.com/chapter/volunteers-large-libre-software-projects/10080}, author = {Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M. and Martin Michlmayr}, editor = {Sowe, Sulayman K. and Stamelos, Ioannis G. and Samoladas, Ioannis} } @conference {1017, title = {Forecasting the Number of Changes in Eclipse Using Time Series Analysis}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {32 - 32}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {In order to predict the number of changes in the following months for the project Eclipse, we have applied a statistical (non-explanatory) model based on time series analysis. We have obtained the monthly number of changes in the CVS repository of Eclipse, using the CVSAnalY tool. The input to our model was the filtered series of the number of changes per month, and the output was the number of changes per month for the next three months. Then we aggregated the results of the three months to obtain the total number of changes in the given period in the challenge.}, keywords = {change management, cvs, cvsanaly, eclipse, prediction}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.10}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300032.pdf}, author = {Herraiz, Israel and Jesus M. Gonzalez-Barahona and Gregorio Robles} } @conference {660, title = {How to Gather FLOSS Metrics}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {361 - 362}, publisher = {Springer}, organization = {Springer}, chapter = {44}, abstract = {In this half-day tutorial, participants will gain hands-on exposure to key technologies for data collection about open source projects. }, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_44}, attachments = {https://flosshub.org/sites/flosshub.org/files/How\%20to\%20gather\%20Floss\%20Metrics.pdf}, author = {Conklin, Megan and Gonzalez-Barahona, Jesus and Gregorio Robles} } @conference {1013, title = {Impact of the Creation of the Mozilla Foundation in the Activity of Developers}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {28 - 28}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {During 2003, the Mozilla project transitioned from company-promoted (sponsored by AOL) to community-promoted (sponsored by the Mozilla Foundation). What happened to the group of developers during this transition? There was any significant impact on its activity or composition? To answer these questions, we have performed an analysis of the CVS repository of Mozilla, using the CVSAnalY tool, finding little on activity, but dramatic changes in the the composition of the development team.}, keywords = {cvs, cvsanaly, developers, mining challenge, mozilla, msr challenge, revision history}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.15}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300028.pdf}, author = {Jesus M. Gonzalez-Barahona and Gregorio Robles and Herraiz, Israel} } @article {64, title = {Study~of~Available~Tools }, year = {2007}, institution = {FLOSSmetrics}, type = {EU Framework deliverable}, isbn = {D1.1}, url = {http://flossmetrics.org/sections/deliverables/docs/deliverables/WP1/D1.1- Study_of_Available_Tools.pdf}, author = {Antoniades, Ionannis~ and Samoladas, Ioannis and Sowe, Sulayman K. and Gregorio Robles and Koch, Stefan and Fraczek, Ksenia and Hadzisalihovic, Anis} } @conference {1006, title = {Towards a Theoretical Model for Software Growth}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {21 - 21}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Software growth (and more broadly, software evolution) is usually considered in terms of size or complexity of source code. However in different studies, usually different metrics are used, which make it difficult to compare approaches and results. In addition, not all metrics are equally easy to calculate for a given source code, which leads to the question of which one is the easiest to calculate without losing too much information. To address both issues, in this paper present a comprehensive study, based on the analysis of about 700,000 C source code files, calculating several size and complexity metrics for all of them. For this sample, we have found double Pareto statistical distributions for all metrics considered, and a high correlation between any two of them. This would imply that any model addressing software growth should produce this Pareto distributions, and that analysis based on any of the considered metrics should show a similar pattern, provided the sample of files considered is large enough.}, keywords = {C, complexity, evolution, freebsd, growth, halstead, lines of code, loc, mccabe, metrics, scm, size, sloc, sloccount, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.31}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300021.pdf}, author = {Herraiz, Israel and Jesus M. Gonzalez-Barahona and Gregorio Robles} } @article {120, title = {Applying Social Network Analysis Techniques to Community-Driven Libre Software Projects}, journal = {International Journal of Information Technology and Web Engineering}, volume = {1}, number = {3}, year = {2006}, abstract = {Source code management repositories of large, long-lived libre (free, open source) software projects can be a source of valuable data about the organizational structure, evolution, and knowledge exchange in the corresponding development communities. Unfortunately, the sheer volume of the available information renders it almost unusable without applying methodologies which highlight the relevant information for a given aspect of the project. Such methodology is proposed in this article, based on well known concepts from the social networks analysis field, which can be used to study the relationships among developers and how they collaborate in different parts of a project. It is also applied to data mined from some well known projects (Apache, GNOME, and KDE), focusing on the characterization of their collaboration network architecture. These cases help to understand the potentials of the methodology and how it is applied, but also shows some relevant results which open new paths in the understanding of the informal organization of libre software development communities.}, keywords = {apache, conway{\textquoteright}s law, cvs, gnome, kde, scm, social network analysis, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/06_Lopez_ijitwe_sna.pdf}, author = {L{\'o}pez-Fern{\'a}ndez, L. and Gregorio Robles and Jesus M. Gonzalez-Barahona and Herraiz, I.} } @conference {Herraiz:2006:CSN:1116163.1116405, title = {Comparison Between SLOCs and Number of Files As Size Metrics for Software Evolution Analysis}, booktitle = {Proceedings of the Conference on Software Maintenance and Reengineering}, series = {CSMR {\textquoteright}06}, year = {2006}, pages = {206{\textendash}213}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, keywords = {empirical studies, libre software, metrics, software evolution}, isbn = {0-7695-2536-9}, url = {http://dl.acm.org/citation.cfm?id=1116163.1116405}, author = {Herraiz, Israel and Gregorio Robles and Gonzalez-Barahon, Jes us M.} } @conference {686, title = {Contributor Turnover in Libre Software Projects}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {273 - 286}, publisher = {Springer}, organization = {Springer}, abstract = {A common problem that management faces in software companies is the high instability of their staff. In libre (free, open source) software projects, the permanence of developers is also an open issue, with the potential of causing problems amplified by the self-organizing nature that most of them exhibit. Hence, human resources in libre software projects are even more difficult to manage: developers are in most cases not bound by a contract and, in addition, there is not a real management structure concerned about this problem. This raises some interesting questions with respect to the composition of development teams in libre software projects, and how they evolve over time. There are projects lead by their original founders (some sort of {\textquotedblleft}code gods{\textquotedblright}), while others are driven by several different developer groups over time (i.e. the project {\textquotedblleft}regenerates{\textquotedblright} itself). In this paper, we propose a quantitative methodology, based on the analysis of the activity in the source code management repositories, to study how these processes (developers leaving, developers joining) affect libre software projects. The basis of it is the analysis of the composition of the core group, the group of developers most active in a project, for several time lapses. We will apply this methodology to several large, well-known libre software projects, and show how it can be used to characterize them. In addition, we will discuss the lessons that can be learned, and the validity of our proposal. }, keywords = {apache, committers, core, cvs, cvsanaly, developers, evolution, freebsd, gimp, gnome, kde, mono, mozilla}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_28}, attachments = {https://flosshub.org/sites/flosshub.org/files/Contributor\%20Turnover\%20in\%20Libre\%20Software\%20Projects.pdf}, author = {Gregorio Robles and Gonzalez-Barahona, Jesus} } @conference {Amor:2006:EEC:1139113.1139116, title = {Effort Estimation by Characterizing Developer Activity}, booktitle = {Proceedings of the 2006 International Workshop on Economics Driven Software Engineering Research}, series = {EDSER {\textquoteright}06}, year = {2006}, pages = {3{\textendash}6}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {During the latest years libre (free, open source) software has gained a lot of attention from the industry. Following this interest, the research community is also studying it. For instance, many teams are performing quantitative analysis on the large quantity of data which is publicly available from the development repositories maintained by libre software projects. However, not much of this research is focused on cost or effort estimations, despite its importance (for instance, for companies developing libre software or collaborating with libre software projects), and the availability of some data which could be useful for this purpose. Our position is that classical effort estimation models can be improved from the study of these data, at least when applied to libre software. In this paper, we focus on the characterization of developer activity, which we argue can improve effort estimation. This activity can be traced with a lot of detail, and the resulting data can also be used for validation of any effort estimation model. }, keywords = {developer characterization, effort estimation, mining software repositories, open source software, software economics}, isbn = {1-59593-396-4}, doi = {10.1145/1139113.1139116}, url = {http://doi.acm.org/10.1145/1139113.1139116}, author = {Amor, Juan Jose and Gregorio Robles and Jesus M. Gonzalez-Barahona} } @conference {Robles:2006:ESE:1172962.1173018, title = {Empirical Software Engineering Research on Free/Libre/Open Source Software}, booktitle = {Proceedings of the 22nd IEEE International Conference on Software Maintenance}, series = {ICSM {\textquoteright}06}, year = {2006}, pages = {347{\textendash}350}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {Public available data sources are an important knowledge generator from which researchers can obtain, mostly in a non-intrusive way, data and facts from software projects. We present a methodological approach to the data sources commonly found in libre (free, open source) software projects over the Internet, explain how to extract these data and enhance them and offer some ways of analyzing it from various perspectives. The whole process has been implemented with tools that automatize the process so that an ample amount of analysis from various angles (that range from software maintenance and software evolution to the social structure of the underlying organization in charge of the development) of a huge amount of software projects has been used as case studies. This work demonstrates that it is possible to build research methodologies that can be applied to a large quantity of software projects and that empirical software engineering studies have not to refer to a limited number of software projects. Although specifically targeted to libre software development, many of the techniques and lessons learned can be generally applied to other types of software environments.}, isbn = {0-7695-2354-4}, doi = {10.1109/ICSM.2006.25}, url = {http://dx.doi.org/10.1109/ICSM.2006.25}, author = {Gregorio Robles} } @conference {Robles:2006:GLD:1137983.1138017, title = {Geographic location of developers at SourceForge}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {144{\textendash}150}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {The development of libre (free/open source) software is usually performed by geographically distributed teams. Participation in most cases is voluntary, sometimes sporadic, and often not framed by a pre-defined management structure. This means that anybody can contribute, and in principle no national origin has advantages over others, except for the differences in availability and quality of Internet connections and language. However, differences in participation across regions do exist, although there are little studies about them. In this paper we present some data which can be the basis for some of those studies. We have taken the database of users registered at SourceForge, the largest libre software development web-based platform, and have inferred their geographical locations. For this, we have applied several techniques and heuristics on the available data (mainly e-mail addresses and time zones), which are presented and discussed in detail. The results show a snapshot of the regional distribution of SourceForge users, which may be a good proxy of the actual distribution of libre software developers. In addition, the methodology may be of interest for similar studies in other domains, when the available data is similar (as is the case of mailing lists related to software projects).}, keywords = {distributed, email, email address, free software, geographical location, geography, libre software, mining software repositories, open source software, sourceforge, timezone}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138017}, url = {http://doi.acm.org/10.1145/1137983.1138017}, attachments = {https://flosshub.org/sites/flosshub.org/files/144GeographicLocation.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona} } @conference {Robles:2006:MLS:1137983.1137986, title = {Mining large software compilations over time: another perspective of software evolution}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {3{\textendash}9}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {With the success of libre (free, open source) software, a new type of software compilation has become increasingly common. Such compilations, often referred to as {\textquoteright}distributions{\textquoteright}, group hundreds, if not thousands, of software applications and libraries written by independent parties into an integrated system. Software compilations raise a number of questions that have not been targeted so far by software evolution, which usually focuses on the evolution of single applications. Undoubtedly, the challenges that software compilations face differ from those found in single software applications. Nevertheless, it can be assumed that both, the evolution of applications and that of software compilations, have similarities and dependencies.In this sense, we identify a dichotomy, common to that in economics, of software evolution in the small (micro-evolution) and in the large (macro-evolution). The goal of this paper is to study the evolution of a large software compilation, mining the publicly available repository of a well-known Linux distribution, Debian. We will therefore investigate changes related to hundreds of millions of lines of code over seven years. The aspects that will be covered in this paper are size (in terms of number of packages and of number of lines of code), use of programming languages, maintenance of packages and file sizes.}, keywords = {debian, distributions, evolution, large software collections, lines of code, loc, metrics, mining software repositories, size, sloc, sloccount, software evolution, software integrators}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137986}, url = {http://doi.acm.org/10.1145/1137983.1137986}, attachments = {https://flosshub.org/sites/flosshub.org/files/3miningLarge.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona and Martin Michlmayr and Amor, Juan Jose} } @conference {Robles:2005:DIM:1083142.1083162, title = {Developer identification methods for integrated data from various sources}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {106-110}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Studying a software project by mining data from a single repository has been a very active research field in software engineering during the last years. However, few efforts have been devoted to perform studies by integrating data from various repositories, with different kinds of information, which would, for instance, track the different activities of developers. One of the main problems of these multi-repository studies is the different identities that developers use when they interact with different tools in different contexts. This makes them appear as different entities when data is mined from different repositories (and in some cases, even from a single one). In this paper we propose an approach, based on the application of heuristics, to identify the many identities of developers in such cases, and a data structure for allowing both the anonymized distribution of information, and the tracking of identities for verification purposes. The methodology will be presented in general, and applied to the GNOME project as a case example. Privacy issues and partial merging with new data sources will also be considered and discussed.}, keywords = {anonymization, bug tracker, developers, email, email address, gnome, identity, mailing list, privacy, source code, version control}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083162}, url = {http://doi.acm.org/10.1145/1082983.1083162}, attachments = {https://flosshub.org/sites/flosshub.org/files/106DeveloperIdentification.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona} } @conference {Robles05anempirical, title = {An empirical approach to Software Archaeology}, booktitle = {In: Poster Proceedings of the International Conference on Software Maintenance}, year = {2005}, pages = {47{\textendash}50}, abstract = {The term {\textquotedblleft}software archaeology {\textquotedblright} provides a useful metaphor of the tasks that a software developer has to face when performing maintenance on large software projects. The source code of a program at any point in time is the result of many different changes performed in the past, usually by several people, which can be tracked when a version control system is used. We have designed a methodology for analyzing with detail the age of the source code in such cases, and have applied it to several large software projects. As a part of the methodology, we define a set of indexes which can help to characterize the history of a software system, and discuss how those could be used to estimate its past and future maintenance. We also show how our approach to software archaeology is simple both conceptually and computationally, but still very powerful at uncovering useful information.}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona and Herraiz, Israel} } @article {130, title = {Evolution and growth in large libre software projects}, journal = {Principles of Software Evolution, Eighth International Workshop on}, year = {2005}, pages = {165-174}, author = {Gregorio Robles and Amor, J. J. and Jesus M. Gonzalez-Barahona and Herraiz, I.} } @conference {731, title = {Evolution of Volunteer Participation in Libre Software Projects: Evidence from Debian}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, note = {"For each release we have retrieved the corresponding sources.gz file (see below) from the Debian archive. From it we have extracted information about the packages and their maintainers...." }, pages = {100-107}, abstract = {Most libre software projects rely on the work of volunteers. Therefore, attracting people who contribute their time and technical skills is of paramount importance, both in technical and economic terms. This reliance on volunteers leads to some fundamental management challenges: volunteer contributions are inherently difficult to predict, plan and manage, especially in the case of large projects. In this paper we analyze the evolution in time of the human resources of one of the largest and most complex libre software projects composed primarily of volunteers, the Debian project. Debian currently has around 1300 volunteers working on several tasks: much activity is focused on packaging software applications and libraries, but there is also major work related to the maintenance of the infrastructure needed to sustain the development. We have performed a quantitative investigation of data from almost seven years, studying how volunteer involvement has affected the software...}, keywords = {contributors, debian, maintainers, PopCon, popularity, Volunteers}, url = {http://pascal.case.unibz.it/handle/2038/857}, attachments = {https://flosshub.org/sites/flosshub.org/files/robles_barahona_michlmayr-evolution_participation.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona and Martin Michlmayr} } @booklet {Lopez-Fernandez_applyingsocial, title = {Applying Social Network Analysis to the Information in CVS Repositories}, howpublished = {International Workshop on Mining Software Repositories (MSR 2004)}, year = {2004}, pages = {101-105}, abstract = {The huge quantities of data available in the CVS repositories of large, long-lived libre (free, open source) software projects, and the many interrelationships among those data offer opportunities for extracting large amounts of valuable information about their structure, evolution and internal processes. Unfortunately, the sheer volume of that information renders it almost unusable without applying methodologies which highlight the relevant information for a given aspect of the project. In this paper, we propose the use of a well known set of methodologies (social network analysis) for characterizing libre software projects, their evolution over time and their internal structure. In addition, we show how we have applied such methodologies to real cases, and extract some preliminary conclusions from that experience.}, keywords = {apache, complex networks, cvs, gnome, kde, libre software engineering, source code, source code repositories, visualization techniques, vizualization}, attachments = {https://flosshub.org/sites/flosshub.org/files/101ApplyingSocial.pdf}, author = {L{\'o}pez-Fern{\'a}ndez, L. and Gregorio Robles and Jesus M. Gonzalez-Barahona} } @proceedings {1190, title = {Community structure of modules in the Apache project}, year = {2004}, pages = {44-48}, abstract = {The relationships among modules in a software project of a certain size can give us much information about its internal organization and a way to control and monitor development activities and evolution of large libre software projects. In this paper, we show how information available in CVS repositories can be used to study the structure of the modules in a project when they are related by the people working in them, and how techniques taken from the social networks fields can be used to highlight the characteristics of that structure. As a case example, we also show some results of applying this methodology to the Apache project in several points in time. Among other facts, it is shown how the project evolves and is self-structuring, with developer communities of modules corresponding to semantically related families of modules.}, keywords = {apache, cvs, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/gonzalezBarahona44-48.pdf}, author = {Jesus M. Gonzalez-Barahona and Luis Lopez and Gregorio Robles} } @article {flosswp176, title = {Executable source code and non-executable source code}, year = {2004}, month = {June}, abstract = {The concept of source code, understood as the source components used to obtain a binary, ready to execute version of a program, comprises currently more than source code written in a programming language. Specially when we move apart from systems-programming and enter the realm of end-user applications, we find source files with documentation, interface specifications, internationalization and localization modules, multimedia files, etc. All of them are source code in the sense that the developer works directly with them, and the application is built automatically using them as input. This paper discusses the relationship between {\textquoteright}classical{\textquoteright} source code (usually written in a programming language) and these other files by analyzing a publicly-available software versioning repository. Aspects that have been studied include the nature of the software repository, the different mixtures of source code found in several software projects stored in it, the specialization of developers to the different tasks, etc.}, attachments = {https://flosshub.org/sites/flosshub.org/files/robles-barahona_source_code.pdf}, author = {Gregorio Robles} } @article {flosswp178, title = {GlueTheos: Automating the Retrieval and Analysis of Data from Publicly Available Software Repositories}, journal = {Proceedings of the 2004 international workshop on Mining software repositories - MSR {\textquoteright}04}, year = {2004}, month = {05/2004}, abstract = {For efficient, large scale data mining of publicly available information about libre (free, open source) software projects, automating the retrieval and analysis processes is a must. A system implementing such automation must have into account the many kinds of repositories with interesting information (each with its own structure and access methods), and the many kinds of analysis which can be applied to the retrieved data. In addition, such a system should be capable of interfacing and reusing as much existing software for both retrieving and analyzing data as possible. As a proof of concept of how that system could be, we started sometime ago to implement the GlueTheos system, featuring a modular,flexible architecture which has been already used in several of our studies of libre software projects. In this paper we show its structure, how it can be used, and how it can be extended.}, attachments = {https://flosshub.org/sites/flosshub.org/files/robles-barahona-ghosh_gluetheos.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona and Rishab Ayer Ghosh} } @proceedings {131, title = {Remote analysis and measurement of libre software systems by means of the CVSanaly tool}, year = {2004}, address = {Edinburgh, Scotland}, abstract = {Libre (free, open source) software is one of the paradigmatic cases where heavy use of telematic tools and user-driven software development are key points. This paper proposes a methodology for measuring and analyzing remotely big libre software projects using publicly-available data from their version control repositories. By means of a tool called CVSAnalY that has been implemented following this methodology, measurements and analyses can be made in an automatic and non-intrusive way, providing real-time and historical data about the project and its contributors.}, attachments = {https://flosshub.org/sites/flosshub.org/files/robles-koch-barahona_cvsanaly.pdf}, author = {Gregorio Robles and Koch, Stefan and Jesus M. Gonzalez-Barahona} } @proceedings {1178, title = {Studying the evolution of libre software projects using publicly available data}, year = {2003}, month = {2003}, pages = {111-115}, attachments = {https://flosshub.org/sites/flosshub.org/files/111-115.pdf}, author = {Gregorio Robles and Jesus M. Gonzalez-Barahona and Jose Centeno-Gonz{\'a}lez and Vicente Matellan-Olivera and Luis Rodero-Merino} } @article {46, title = {Free/Libre and Open Source Software: Survey and Study}, year = {2002}, author = {Rishab Ayer Ghosh and R{\"u}dige Glott and Krieger, B. and Gregorio Robles} }