@proceedings {1902, title = {Assessing Code Authorship: The Case of the Linux Kernel}, volume = {496}, year = {2017}, month = {05/2017}, pages = {151-163}, publisher = {Springer}, abstract = {Code authorship is a key information in large-scale open-source systems. Among others, it allows maintainers to assess division of work and identify key collaborators. Interestingly, open-source communities lack guidelines on how to manage authorship. This could be mitigated by setting to build an empirical body of knowledge on how authorship-related measures evolve in successful open-source communities. Towards that direction, we perform a case study on the Linux kernel. Our results show that: (a) only a small portion of developers (26\%) makes significant contributions to the code base; (b) the distribution of the number of files per author is highly skewed{\textemdash}a small group of top-authors (3\%) is responsible for hundreds of files, while most authors (75\%) are responsible for at most 11 files; (c) most authors (62\%) have a specialist profile; (d) authors with a high number of co-authorship connections tend to collaborate with others with less connections.}, keywords = {code authorship, developer network, linux kernel}, doi = {10.1007/978-3-319-57735-7_15}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_15}, author = {Guilherme Avelino and Passos, Leonardo and Andre Hora and Marco Tulio Valente} } @proceedings {1915, title = {Classifying code comments in Java open-source software systems}, year = {2017}, note = {"we conducted an in-depth analysis of the comments in the source code files of six major OSS systems in Java"}, month = {05/2017}, pages = {227-237}, abstract = {Code comments are a key software component containing information about the underlying implementation. Several studies have shown that code comments enhance the readability of the code. Nevertheless, not all the comments have the same goal and target audience. In this paper, we investigate how six diverse Java OSS projects use code comments, with the aim of understanding their purpose. Through our analysis, we produce a taxonomy of source code comments; subsequently, we investigate how often each category occur by manually classifying more than 2,000 code comments from the aforementioned projects. In addition, we conduct an initial evaluation on how to automatically classify code comments at line level into our taxonomy using machine learning; initial results are promising and suggest that an accurate classification is within reach.}, keywords = {java, Survey}, author = {Luca Pascarella and Bacchelli, Alberto} } @proceedings {1909, title = {How Does Contributors{\textquoteright} Involvement Influence the Build Status of an Open-Source Software Project?}, year = {2017}, note = {We used the data released by TravisTorrent on 06-Dec-2016 as our main data source.}, month = {05/2017}, pages = {475-478}, abstract = {The recent introduction of the pull-based development model promoted agile development practices such as Code Reviews and Continuous Integration (CI). CI, in particular, is currently a standard development practice in opensource software (OSS) projects. Although it is well-known that OSS contributors have different involvements (e.g., while some developers drive the project, there is a long tail of peripheral developers), little is known about how the contributor{\textquoteright}s degree of participation can influence the build status of an OSS project. Through TravisTorrent{\textquoteright}s dataset, we compare the success rates of builds made by casual and non-casual contributors and what factors on their contributions may influence the build result. Our results suggest that there is no representative difference between their build success (they are similar in 85\% of the analyzed projects), meaning that being a casual contributor is not a strong indicator for creating failing builds. Also, factors like the size of their contributions and the number of project configurations (jobs) have the potential of impacting the build success.}, keywords = {travis torrent}, author = {Reboucas, Marcel and Santos, Renato O. and Pinto, Gustavo and Fernando Castor} } @conference {1925, title = {Investigating developers{\textquoteright} email discussions during decision-making in Python language evolution}, booktitle = {Proceedings of the 21st International Conference on Evaluation and Assessment in Software Engineering - EASE{\textquoteright}17}, year = {2017}, pages = {286 - 291}, publisher = {ACM Press}, organization = {ACM Press}, address = {Karlskrona, SwedenNew York, New York, USA}, abstract = {Context: Open Source Software (OSS) developers use mailing lists as their main forum for discussing the evolution of a project. However, the use of mailing lists by developers for decision-making has not received much research attention. Objective: We have explored this issue by studying developers{\textquoteright} email discussions around Python Enhancement Proposals (PEPs). Method: Our dataset comprised 42,672 emails from six different mailing lists pertaining to PEP development. We performed multiple forms of analysis on these emails, involving both quantitative measures (e.g., frequency) and deeper analysis of specific PEP discussions (i.e., outlier analysis). Results: Out of three PEP types (Informational, Process and Standard Track), Standard Track PEPs attract a large amount of discussion (both in volume and average number of messages per proposal). Our study also identified specific PEP states and topics that generated a disproportionate amount of discussion. Conclusion: Our outcomes point to several opportunities for improving the management of an OSS team based on the knowledge generated from discussions. We have also identified several interesting avenues for future work such as identifying individuals or groups that present persuasive arguments during decision-making.}, isbn = {9781450348041}, doi = {10.1145/3084226.3084271}, url = {https://www.researchgate.net/publication/317486549_Investigating_developers\%27_email_discussions_during_decision-making_in_Python_language_evolution}, author = {Stanger, Nigel and Rainer, Austen and Licorish, Sherlock A. and Sharma, Pankajeshwara and Savarimuthu, Bastin Tony Roy} } @proceedings {1919, title = {A Large-Scale Study on the Usage of Testing Patterns that Address Maintainability Attributes}, year = {2017}, note = {"we conducted a large-scale empirical study to measure the application of software testing in the open source community" Our novel approach includes a data set of 82,447 open source projects written in 48 languages The data used in this study include 82,447 open source projects, 251 unit testing frameworks, and 4 unit testing patterns data url: https://goo.gl/Mc7tHk}, month = {05/2017}, pages = {391-401}, abstract = {Test case maintainability is an important concern, especially in open source and distributed development environments where projects typically have high contributor turnover with varying backgrounds and experience, and where code ownership changes often. Similar to design patterns, patterns for unit testing promote maintainability quality attributes such as ease of diagnoses, modifiability, and comprehension. In this paper, we report the results of a large-scale study on the usage of four xUnit testing patterns which can be used to satisfy these maintainability attributes. This is a first-of-its-kind study which developed automated techniques to investigate these issues across 82,447 open source projects, and the findings provide more insight into testing practices in open source projects. Our results indicate that only 17\% of projects had test cases, and from the 251 testing frameworks we studied, 93 of them were being used. We found 24\% of projects with test files implemented patterns that could help with maintainability, while the remaining did not use these patterns. Multiple qualitative analyses indicate that usage of patterns was an ad-hoc decision by individual developers, rather than motivated by the characteristics of the project, and that developers sometimes used alternative techniques to address maintainability concerns.}, keywords = {maintenance, mining software repositories, msr, Unit Test Frameworks, Unit Test Patterns, Unit Testing}, author = {Danielle Gonzalez and Joanna C.S. Santos and Andrew Popovich and Mehdi Mirakhorli and Mei Nagappan} } @proceedings {1911, title = {Machine Learning-Based Detection of Open Source License Exceptions}, year = {2017}, note = {"We address these questions by first performing a large scale mining-based study... [W]e analyzed the source code of 51,754 projects written in six different programming languages (Ruby, Javascript, Python, C, C++, and C$\#$) hosted on GitHub. }, month = {05/2017}, pages = {118-129}, abstract = {From a legal perspective, software licenses govern the redistribution, reuse, and modification of software as both source and binary code. Free and Open Source Software (FOSS) licenses vary in the degree to which they are permissive or restrictive in allowing redistribution or modification under licenses different from the original one(s). In certain cases developers may modify the license by appending to it an exception to specifically allow reuse or modification under a particular condition. These exceptions are an important factor to consider for license compliance analysis since they modify the standard (and widely understood_ terms of the original license. In this work, we first perform a large-scale empirical study on the change history of over 51k FOSS systems aimed at quantitatively investigating the prevalence of known license exceptions and identifying new ones. Subsequently, we performed a study on the detection of license exceptions by relying on machine learning. We evaluated the license exception classification with four different supervised learners and sensitivity analysis. Finally we present a categorization of license exceptions and explain their implications.}, keywords = {classifier, empirical studies, license, machine learning}, doi = {10.1109/ICSE.2017.19}, author = {Vendome, Christopher and Mario Linares-Vasquez and Bavota, Gabriele and Di Penta, Massimiliano and Daniel M. German and Poshyvanyk, Denys} } @article {1868, title = {Managing knowledge sharing in distributed innovation from the perspective of developers: empirical study of open source software projects in China}, journal = {Technology Analysis \& Strategic Management}, volume = {2929143029}, year = {2017}, month = {01/2017}, pages = {1 - 22}, abstract = {Knowledge sharing is the key factor that influences the performance of open source software (OSS) projects, which are the representative cases of distributed innovation. This paper aims to explore the mechanism of knowledge sharing in OSS projects from the perspective of developers in China. A quantitative method with the analysis of 403 valid questionnaires is adopted. A series of hypotheses about how distributed innovation (independent variables) influences knowledge sharing (mediating variable) and then affects the performance of OSS projects (dependent variable) are tested and approved. On the one side, we argue that developers will actively affect knowledge sharing in terms of participative motivation, social network and organisational culture. On the other hand, users may also affect the knowledge sharing when considering innovation willingness and capacity. It is interesting to find that social network is the most important factor in Chinese cases. It is strongly recommended to strengthen the collaboration between software companies and OSS communities.}, keywords = {Distributed innovation, knowledge sharing, performance of OSS projects, user innovation}, issn = {1465-3990}, doi = {http://dx.doi.org/10.1080/09537325.2016.1194387}, author = {Chen, Xiaohong and Zhou, Yuan and Probert, David and Su, Jun} } @proceedings {1921, title = {Who Added that Permission to My App? An Analysis of Developer Permission Changes in Open Source Android Apps}, year = {2017}, note = {"Our first step was to collect open source Android repositories from F-Droid ... We collected the git repositories for each app, ... we recorded all permissions, including those which were custom. At the time of our analysis, F-Droid contained information for 2,372 open source Android apps. ... This process identified 1,402 apps that had a AndroidManifest.xml file with a history of commits... we created a tool known as Open Source Android Repository Analyzer (oSARA)...we extracted version control commit information ... extracts all committed AndroidManifest.xml files from the version control history ...the committed version of the AndroidManifest.xml file was also extracted from the repositories, and all metadata was stored in a SQLite database"}, month = {05/2017}, pages = {165-169}, abstract = {Android applications rely on a permission-based model to carry out core functionality. Appropriate permission usage is imperative for ensuring device security and protecting the user{\textquoteright}s desired privacy levels. But who is making the important decisions of which permissions the app should request? Are they experienced developers with the appropriate project knowledge to make such important decisions, or are these crucial choices being made by those with relatively minor amounts of contributions to the project? When are these permission-related decisions being made in the app{\textquoteright}s development life cycle? We examined 1,402 Android version control repositories containing over 331,318 commits including 18,751 AndroidManifest.xml versions to better understand when, why, and who is adding permissions to apps. We found that (I) developers with more experience are more likely to make permission-based changes (II) permissions are typically added earlier in apps{\textquoteright} commit lifetime, but their removal is more sustained throughout the commit lifetime (III) developers reverting permission-based changes are typically more experienced than developers who initially made the change being reverted.}, keywords = {android, mobile}, author = {Krutz, Daniel E. and Nuthan Munaiah and Anthony Peruma and Mohamed Wiem Mkaouer} } @inbook {Poo-Caama{\~n}o2016, title = {Herding Cats: A Case Study of Release Management in an Open Collaboration Ecosystem}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {147{\textendash}162}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, chapter = {Herding Cats: A Case Study of Release Management in an Open Collaboration Ecosystem}, address = {Cham}, abstract = {Release management in large-scale software development projects requires significant communication and coordination. It is particularly challenging in Free and Open Source Software (FOSS) ecosystems, in which hundreds of loosely connected developers and their projects need to be coordinated to release software to a schedule. To better understand this process and its challenges, we analyzed over two and half years of communication in the GNOME ecosystem and studied developers{\textquoteright} interactions. We cataloged communication channels, categorized high level communication and coordination activities in one of them, and triangulated our results by interviewing developers. We found that a release schedule, influence instead of direct control, and diversity are factors that impact positively the release process in the GNOME ecosystem. Our results can help organizations build better large-scale teams and show that research focused on individual projects might miss important parts of the picture. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_12}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_12}, author = {Poo-Caama{\~n}o, Germ{\'a}n and Singer, Leif and Knauss, Eric and Daniel M. German}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @proceedings {1826, title = {How Software Developers Use Work Breakdown Relationships in Issue Repositories}, year = {2016}, month = {05/2016}, pages = {281-285}, abstract = {Software developers use issues as a means to describe a range of activities to be undertaken on a software system, including features to be added and defects that require fixing. When creating issues, software developers expend manual effort to specify relationships between issues, such as one issue blocking another or one issue being a sub-task of another. In particular, developers use a variety of relationships to express how work is to be broken down on a project. To better understand how software developers use work breakdown relationships between issues, we manually coded a sample of work breakdown relationships from three open source systems. We report on our findings and describe how the recognition of work breakdown relationships opens up new ways to improve software development techniques.}, doi = {http://dx.doi.org/10.1145/2901739.2901779}, author = {C. Albert Thompson and Murphy, Gail C. and Palyart, Marc and Marko Ga{\v s}paric} } @inbook {Hirao2016, title = {The Impact of a Low Level of Agreement Among Reviewers in a Code Review Process}, booktitle = {Open Source Systems: Integrating Communities: 12th IFIP WG 2.13 International Conference, OSS 2016, Gothenburg, Sweden, May 30 - June 2, 2016, Proceedings}, year = {2016}, pages = {97{\textendash}110}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, address = {Cham}, abstract = {Software code review systems are commonly used in software development. In these systems, many patches are submitted to improve the quality. To verify the quality, voting is commonly used by contributors; however, there still exists a major problem, namely, that reviewers do not always simply reach a broad agreement. In our previous study, we found that consensus is not usually reached, implying that an individual reviewer{\textquoteright}s final decision usually differs from that of the majority of the other reviewers. In this study, we further investigate the reasons why such situations often occur, and provide suggestions for better handling of these problems. Our analysis of the Qt and OpenStack project datasets allow us to suggest that a patch owner should select more appropriate reviewers who often agree with others{\textquoteright} decisions. }, isbn = {978-3-319-39225-7}, doi = {10.1007/978-3-319-39225-7_8}, url = {http://dx.doi.org/10.1007/978-3-319-39225-7_8}, author = {Hirao, Toshiki and Ihara, Akinori and Ueda, Yuki and Phannachitta, Passakorn and Matsumoto, Ken-ichi}, editor = {Kevin Crowston and Hammouda, Imed and Lundell, Bj{\"o}rn and Gregorio Robles and Gamalielsson, Jonas and Juho Lindman} } @article {1847, title = {License usage and changes: a large-scale study on gitHub}, journal = {Empirical Software Engineering}, year = {2016}, abstract = {Open source software licenses determine, from a legal point of view, under which conditions software can be integrated and redistributed. The reason why developers of a project adopt (or change) a license may depend on various factors, e.g., the need for ensuring compatibility with certain third-party components, the perspective towards redistribution or commercialization of the software, or the need for protecting against somebody else{\textquoteright}s commercial usage of the software. This paper reports a large empirical study aimed at quantitatively and qualitatively investigating when and why developers adopt or change software licenses. Specifically, we first identify license changes in 1,731,828 commits, representing the entire history of 16,221 Java projects hosted on GitHub. Then, to understand the rationale of license changes, we perform a qualitative analysis on 1,160 projects written in seven different programming languages, namely C, C++, C$\#$, Java, Javascript, Python, and Ruby{\textemdash}following an open coding approach inspired by grounded theory{\textemdash}on commit messages and issue tracker discussions concerning licensing topics, and whenever possible, try to build traceability links between discussions and changes. On one hand, our results highlight how, in different contexts, license adoption or changes can be triggered by various reasons. On the other hand, the results also highlight a lack of traceability of when and why licensing changes are made. This can be a major concern, because a change in the license of a system can negatively impact those that reuse it. In conclusion, results of the study trigger the need for better tool support in guiding developers in choosing/changing licenses and in keeping track of the rationale of license changes.}, issn = {1573-7616}, doi = {10.1007/s10664-016-9438-4}, author = {Vendome, Christopher and Bavota, Gabriele and Di Penta, Massimiliano and Linares-V{\'a}squez, Mario and German, Daniel and Poshyvanyk, Denys} } @conference {1800, title = {Big data analytics on large-scale socio-technical software engineering archives}, booktitle = {2015 3rd International Conference on Information and Communication Technology (ICoICT )2015 3rd International Conference on Information and Communication Technology (ICoICT)}, year = {2015}, pages = {65 - 69}, publisher = {IEEE}, organization = {IEEE}, address = {Nusa Dua, Bali, Indonesia}, abstract = {Given the fast growing nature of software engineering data in online software repositories and open source communities, it would be helpful to analyse these assets to discover valuable information about the software engineering development process and other related data. Big Data Analytics (BDA) techniques and frameworks can be applied on these data resources to achieve a high-performance and relevant data collection and analysis. Software engineering is a socio-technical process which needs development team collaboration and technical knowledge to develop a high-quality application. GitHub, as an online social coding foundation, contains valuable information about the software engineers{\textquoteright} communications and project life cycles. In this paper, unsupervised data mining techniques are applied on the data collected by general Big Data approaches to analyse GitHub projects, source codes and interactions. Source codes and projects are clustered using features and metrics derived from historical data in repositories, object oriented programming metrics and the influences of developers on source codes. }, doi = {10.1109/ICoICT.2015.7231398}, author = {Bayati, Shahabedin and Parsons, David and Susnjak, Teo and Heidary, Marzieh} } @proceedings {1713, title = {Gender and Tenure Diversity in GitHub Teams}, year = {2015}, abstract = {Software development is usually a collaborative venture. Open Source Software (OSS) projects are no exception; indeed, by design, the OSS approach can accommodate teams that are more open, geographically distributed, and dynamic than commercial teams. This, we find, leads to OSS teams that are quite diverse. Team diversity, predominantly in of- fline groups, is known to correlate with team output, mostly with positive effects. How about in OSS? Using GITHUB, the largest publicly available collection of OSS projects, we studied how gender and tenure diversity relate to team productivity and turnover. Using regression modeling of GITHUB data and the results of a survey, we show that both gender and tenure diversity are positive and significant predictors of productivity, together explaining a sizable fraction of the data variability. These results can inform decision making on all levels, leading to better outcomes in recruiting and performance.}, keywords = {gender, github, team}, url = {http://bvasiles.github.io/papers/chi15.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/chi15.pdf}, author = {Vasilescu, Bogdan and Posnett, Daryl and Ray, Baishakhi and van den Brand, Mark G.J. and Serebrenik, Alexander and Devanbu, Premkumar and Filkov, Vladimir} } @proceedings {1751, title = {Mining Energy-Aware Commits}, year = {2015}, month = {05/2015}, abstract = {Over the last years, energy consumption has become a first-class citizen in software development practice. While energy-efficient solutions on lower-level layers of the software stack are well-established, there is convincing evidence that even better results can be achieved by encouraging practitioners to participate in the process. For instance, previous work has shown that using a newer version of a concurrent data structure can yield a 2.19x energy savings when compared to the old associative implementation [75]. Nonetheless, little is known about how much software engineers are employing energy-efficient solutions in their applications and what solutions they employ for improving energy-efficiency. In this paper we present a qualitative study of {\textquotedblleft}energy-aware commits{\textquotedblright}. Using Github as our primary data source, we perform a thorough analysis on an initial sample of 2,189 commits and carefully curate a set of 371 energy-aware commits spread over 317 real-world non-trivial applications. Our study reveals that software developers heavily rely on lowlevel energy management approaches, such as frequency scaling and multiple levels of idleness. Also, our findings suggest that ill-chosen energy saving techniques can impact the correctness of an application. Yet, we found what we call {\textquotedblleft}energy-aware interfaces{\textquotedblright}, which are means for clients (e.g., developers or endusers) to save energy in their applications just by using a function, abstracting away the low-level implementation details.}, url = {http://gustavopinto.org/lost+found/msr2015.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/energy.pdf}, author = {Irineu Moura and Pinto, Gustavo and Felipe Ebert and Fernando Castor} } @inbook {1743, title = {The Right to a Contribution: An Exploratory Survey on How Organizations Address It}, booktitle = {Open Source Systems: Adoption and Impact}, series = {IFIP Advances in Information and Communication Technology}, volume = {451}, year = {2015}, pages = {157-167}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {Free and Open Source Software (FOSS) projects are characterized by the opportunity to attract external contributors, where contributions can be in any form of copyrightable material, such as code or documentation. In most of them it is understood that contributions would be licensed in similar or compatible terms than the project{\textquoteright}s license. Some projects require a copyright transfer from the contributor to an organization for the work contributed to a project, such documents are known as copyright assignment agreements. In a way, it is similar to the copyright transfer than some researchers grant to a publisher. In this work we present an exploratory survey of the multiple visions of copyright assignments, and aggregate them in a work that researchers and practitioners could use to get informed of the alternatives available in the literature. We expect that our findings help inform practitioners on legal concerns when receiving external contributions. }, isbn = {978-3-319-17836-3}, doi = {10.1007/978-3-319-17837-0_15}, url = {http://dx.doi.org/10.1007/978-3-319-17837-0_15}, author = {Poo-Caama{\~n}o, Germ{\'a}n and German, DanielM.}, editor = {Damiani, Ernesto and Frati, Fulvio and Dirk Riehle and Wasserman, Anthony I.} } @article {1794, title = {On the role of FOSS business models and participation architectures in supporting open innovation}, year = {2015}, month = {08/2015}, abstract = {Most research regarding innovation in free and open source software (FOSS) pertains to identifying supporting conditions for promoting code contribution. This raises concerns about the ability of FOSS communities to remain innovative based only on the perspectives of developer-users. Preliminary research suggests different open source business models may provide motivation to support greater involvement of non-developer users. This research focuses on understanding the relationship between business model and supporting participation architectures, beyond users{\textquoteright} code contributions, to enable user participation in design of the software. }, url = {http://www.opensym.org/os2015/proceedings-files/c102-purcell.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/c102-purcell.pdf}, author = {Michelle W. Purcell} } @proceedings {1788, title = {Software Patents: A Replication Study}, year = {2015}, month = {08/2015}, abstract = {Previous research has documented the legal and economic aspects of software patents. To study the evolution in the granting of software patents we reproduced and extended part of the empirical study on software patents conducted by Bessen and Hunt. The original study established a criteria to identify software patents, and provided a look at the evolution of patents granted until 2002. We present a simple approach to retrieve patents from the full text database provided by the United States Patent and Trademark Of- fice (USPTO), which is freely accessible. We also present the evolution of software patents since the original study, and which we also present separated by major technological firms. Our research shows a continuous increase in the number of software patents granted higher, both in number of patents granted (in absolute numbers) and in proportion of overall patents (in relative terms). The relevance of studying the evolution of software patents relies in the challenges to find prior-art, either for practitioners looking for patenting as well as for examiners evaluating granting a new patent.}, url = {http://www.opensym.org/os2015/proceedings-files/p104-poo-caamano.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/p104-poo-caamano.pdf}, author = {Poo-Caama{\~n}o, Germ{\'a}n and Daniel M. German} } @proceedings {1792, title = {Toward understanding new feature request systems as participation architectures for supporting open innovation }, year = {2015}, month = {08/2015}, abstract = {Most research regarding innovation in open source software communities pertains to identifying supporting conditions for promoting code contribution as a way to innovate the software. Instead, this paper seeks to identify social and technological affordances of new feature request systems and their potential to support open innovation through integration of peripheral community members{\textquoteright} ideas for advancing the software. Initial findings from the first of a planned study of multiple open source software communities are presented to identify attributes of effective participation architectures.}, url = {http://www.opensym.org/os2015/proceedings-files/p202-purcell.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/p202-purcell.pdf}, author = {Michelle W. Purcell} } @inbook {1742, title = {On the Variability of the BSD and MIT Licenses}, booktitle = {Open Source Systems: Adoption and Impact}, series = {IFIP Advances in Information and Communication Technology}, volume = {451}, year = {2015}, pages = {146-156}, publisher = {Springer International Publishing}, organization = {Springer International Publishing}, abstract = {The MIT/X11 and the BSD are two of the most important family of Free and Open Source (FOSS) licenses. Because these licenses are to be inserted into the files that use it, and because they are expected to be changed by those who use them, their text has suffered alterations over time. Some of this variability is the result of licenses containing template fields which allow the license to be customized to include information such as the copyright holder name. Other variability can be attributed to changes in spelling, punctuation, and adding or removing conditions. This study empirically evaluated the extent that the BSD and MIT/X11 family of licenses are varied, and the manner and frequency in which license texts vary from the original definition. The study found that the BSD family has little variability, with a significant proportion fitting the common standard. The MIT/X11 family of licenses exhibited significantly more variation, with a higher propensity to customize the license text. In addition, the MIT/X11 license has spawned several specialized variants which likely constitute different legal meanings. Based on these findings, recommendations are proposed on what variability needs to be accommodated by the Software Package Data Exchange (SPDX) which is in the process of standardizing the allowed variability of both licenses. }, isbn = {978-3-319-17836-3}, doi = {10.1007/978-3-319-17837-0_14}, url = {http://dx.doi.org/10.1007/978-3-319-17837-0_14}, author = {Maryka, Trevor and German, DanielM. and Poo-Caama{\~n}o, Germ{\'a}n}, editor = {Damiani, Ernesto and Frati, Fulvio and Dirk Riehle and Wasserman, Anthony I.} } @proceedings {1775, title = {What is the Gist? Understanding the Use of Public Gists on GitHub}, year = {2015}, month = {05/2015}, publisher = {IEEE}, keywords = {gist, github}, author = {Weiliang Wang and Poo-Caama{\~n}o, Germ{\'a}n and Evan Wilde and German, Daniel} } @conference {Matragkas:2014:ABO:2597073.2597119, title = {Analysing the {\textquoteright}Biodiversity{\textquoteright} of Open Source Ecosystems: The GitHub Case}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {356{\textendash}359}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In nature the diversity of species and genes in ecological communities affects the functioning of these communities. Biologists have found out that more diverse communities appear to be more productive than less diverse communities. Moreover such communities appear to be more stable in the face of perturbations. In this paper, we draw the analogy between ecological communities and Open Source Software (OSS) ecosystems, and we investigate the diversity and structure of OSS communities. To address this question we use the MSR 2014 challenge dataset, which includes data from the top-10 software projects for the top programming languages on GitHub. Our findings show that OSS communities on GitHub consist of 3 types of users (core developers, active users, passive users). Moreover, we show that the percentage of core developers and active users does not change as the project grows and that the majority of members of large projects are passive users. }, keywords = {Data and knowledge visualization, data mining, mining challenge, msr challenge}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597119}, url = {http://doi.acm.org/10.1145/2597073.2597119}, author = {Matragkas, Nicholas and Williams, James R. and Kolovos, Dimitris S. and Paige, Richard F.} } @inbook {1619, title = {The Census of the Brazilian Open-Source Community}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {202-211}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { During a long time, software engineering research has been trying to better understand open-source communities and uncover two fundamental questions: (i) who are the contributors and (i) why they contribute. Most of these researches focus on well-known OSS projects, but little is known about the OSS movement in emerging countries. In this paper, we attempt to fill this gap by presenting a picture of the Brazilian open-source contributor. To achieve this goal, we examined activities from more than 12,400 programmers on Github, during the period of a year. Subsequently, we correlate our findings with a survey that was answered by more than 1,000 active contributors. Our results show that exists an OSS trend in Brazil: most part of the contributors are active, performing around 30 contributions per year, and they contribute to OSS basically by altruism. }, keywords = {Brazilian OSS Community, github, OSS}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_30}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_30}, author = {Pinto, Gustavo and Kamei, Fernando}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @conference {1694, title = {Cream of the crop}, booktitle = {The International SymposiumProceedings of The International Symposium on Open Collaboration - OpenSym {\textquoteright}14}, year = {2014}, pages = {1 - 10}, publisher = {ACM Press}, organization = {ACM Press}, address = {Berlin, GermanyNew York, New York, USA}, abstract = {In open content communities like Wikipedia and StackOverflow and in open source software projects, a small proportion of users produce a majority of the content and take on much of the required community maintenance work. Understanding this class of users is crucial to creating and sustaining healthy communities. We carried out a mixed-method study of core contributors to the Cyclopath geographic wiki and bicycle routing web site. We present our findings and organize our discussion using concepts from activity theory. We found that the Cyclopath core contributors aren{\textquoteright}t the dedicated cyclists and that the characteristics of the community shape the site, the rules, and the tools for contributing. Additionally, we found that numerous aspects about the surrounding ecology of related systems and communities may help to shape how the site functions and views itself. We draw implications for future research and design from these findings. }, isbn = {9781450330169}, doi = {10.1145/2641580.2641609}, author = {Katherine Panciera and Mikhil Masli and Loren Terveen} } @conference {Passos:2014:DFA:2597073.2597124, title = {A Dataset of Feature Additions and Feature Removals from the Linux Kernel}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {376{\textendash}379}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {This paper describes a dataset of feature additions and removals in the Linux kernel evolution history, spanning over seven years of kernel development. Features, in this context, denote configurable system options that users select when creating customized kernel images. The provided dataset is the largest corpus we are aware of capturing feature additions and removals, allowing researchers to assess the kernel evolution from a feature-oriented point-of-view. Furthermore, the dataset can be used to better understand how features evolve over time, and how different artifacts change as a result. One particular use of the dataset is to provide a real-world case to assess existing support for feature traceability and evolution. In this paper, we detail the dataset extraction process, the underlying database schema, and example queries. The dataset is directly available at our Bitbucket repository: https://bitbucket.org/lpassos/kconfigdb }, keywords = {evolution, linux, msr data showcase, Traceability, Version Control History}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597124}, url = {http://doi.acm.org/10.1145/2597073.2597124}, attachments = {https://flosshub.org/sites/flosshub.org/files/kernel.pdf}, author = {Passos, Leonardo and Czarnecki, Krzysztof} } @inbook {1617, title = {Flow Research SXP Agile Methodology for FOSS Projects}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {195-198}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { This paper aims to explain a procedure that takes into account the different research processes carried out in developing an open-source, allowing control and management. This study is the SXP methodology applied in this type of project was carried out, allowing the validity of the basis of this research. }, keywords = {methodology SXP, open-source, production, research, Software}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_28}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_28}, author = {Pe{\~n}alver Romero, GladysMarsi and Leyva Samada, LisandraIsabel and Abad, AbelMeneses}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @inbook {1598, title = {FOSS Service Management and Incidences}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {76-79}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { The Free Open Source Software (FOSS) solutions have been reaching a high demand, usage and global recognition, not only in the development of applications for companies and institutions also in the management of services and incidents. With the upswing of Information Technology (IT), the development of tools that enable the reporting of problems and incidents on any organization or company is necessary. Every day you need more applications, software generally, that make easier the user{\textquoteright}s actions. This paper describes the need to use these tools and recount the development of a web application that allows the management of reports and incidents from users of Nova, the GNU/Linux Cuban distribution. }, keywords = {FOSS, service management and incidences}, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_9}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_9}, author = {Ortiz, SusanaS{\'a}nchez and P{\'e}rez Benitez, Alfredo}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @proceedings {1636, title = {Free/Open Source Software projects as early MOOCs}, year = {2014}, month = {04/2014}, pages = {874-883}, abstract = {This paper presents Free/Libre/Open Source Software (FLOSS) Projects as early Massive Online Open Courses (MOOCs). Being software development a process where learning and collaboration is of major importance, FLOSS projects have in common many characteristics with MOOCs. This is because many FLOSS projects (such as Linux, Apache, GNOME or KDE, among others) are massive, they are open to anyone to participate, and are driven mainly by telematic means. We therefore present the research literature that has studied FLOSS projects from points of view that are close to learning and discuss how the FLOSS community has approached many of the issues related to acquiring knowledge and skills over the Internet and compare them to how currently MOOCs, both xMOOCs and cMOOCs, address these situations. }, keywords = {education, FLOSS, learning, mooc}, doi = {10.1109/EDUCON.2014.6826200}, author = {Robles, G. and Plaza, H. and Gonzalez-Barahona, J.M.} } @proceedings {1572, title = {Governance of Open Source Electronic Health Record Projects: A Successful Case of a Hybrid Model}, year = {2014}, month = {01/2014}, pages = {2798-2807}, publisher = {IEEE Computer Society}, abstract = {Electronic Health Records (EHRs) are at the heart of most health system reforms. As an increasing number of hospitals are adopting open source EHRs and as many questions are still unanswered for potential adopters of open source EHRs, analyzing the governance of such systems has become important. Due to the increased diversity of sponsors and stakeholders of open source software (OSS) over the past few years, numerous governance models of OSS projects have emerged. In contrast to earlier {\textquotedblleft}community-managed{\textquotedblright} governance model s, these emergent models are characterized by the sponsorship of a corporation or a not-for-profit entity or by other hybrid forms. This paper reports on the investigation of such a hybrid model of open source EHR project governance adopted with success by a large Canadian hospital. The case study provides rich insights for other hospitals wishing to adopt an EHR of the open source type.}, doi = {10.1109/HICSS.2014.350}, author = {Placide Poba-Nzaou and Josianne Marsan and Guy Pare and Louis Raymond} } @article {mcdonald2014modeling, title = {MODELING DISTRIBUTED COLLABORATION ON GITHUB}, journal = {Advances in Complex Systems}, year = {2014}, publisher = {World Scientific}, abstract = {In this paper, we apply concepts from Distributed Leadership, a theory suggesting that leadership is shared among members of an organization, to frame models of contribution that we uncover in five relatively successful open source software (OSS) projects hosted on GitHub. In this qualitative, comparative case study, we show how these projects make use of GitHub features such as pull requests (PRs). We find that projects in which member PRs are more frequently merged with the codebase experience more sustained participation. We also find that projects with higher success rates among contributors and higher contributor retention tend to have more distributed (non-centralized) practices for reviewing and processing PRs. The relationships between organizational form and GitHub practices are enabled and made visible as a result of GitHub{\textquoteright}s novel interface. Our results demonstrate specific dimensions along which these projects differ and explicate a framework that warrants testing in future studies of OSS, particularly GitHub.}, author = {McDONALD, NORA and Blincoe, Kelly and PETAKOVIC, EVA and Goggins, Sean} } @inbook {1623, title = {PROINFODATA: Monitoring a Large Park of Computational Laboratories}, booktitle = {Open Source Software: Mobile Open Source Technologies}, series = {IFIP Advances in Information and Communication Technology}, volume = {427}, year = {2014}, pages = {226-229}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, abstract = { This paper briefly presents a model for monitoring a large, heterogeneous and geographically scattered computer park. The data collection is performed by a software agent. The collected data are sent to the central server over the Internet, and stored by the storage system. An on-line portal makes up the visualization system, featuring charts, reports, and other tools for assessing the state of the park. This system is currently monitoring circa 150,000 machines. }, isbn = {978-3-642-55127-7}, doi = {10.1007/978-3-642-55128-4_34}, url = {http://dx.doi.org/10.1007/978-3-642-55128-4_34}, author = {Possamai, CleideL.B. and Pasqualin, Diego and Weingaertner, Daniel and Todt, Eduardo and Castilho, MarcosA. and Bona, LuisC.E. and Almeida, EduardoCunha}, editor = {Corral, Luis and Sillitti, Alberto and Succi, Giancarlo and Vlasenko, Jelena and Wasserman, AnthonyI.} } @conference {Pletea:2014:SES:2597073.2597117, title = {Security and Emotion: Sentiment Analysis of Security Discussions on GitHub}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {348{\textendash}351}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Application security is becoming increasingly prevalent during software and especially web application development. Consequently, countermeasures are continuously being discussed and built into applications, with the goal of reducing the risk that unauthorized code will be able to access, steal, modify, or delete sensitive data. In this paper we gauged the presence and atmosphere surrounding security-related discussions on GitHub, as mined from discussions around commits and pull requests. First, we found that security related discussions account for approximately 10\% of all discussions on GitHub. Second, we found that more negative emotions are expressed in security-related discussions than in other discussions. These findings confirm the importance of properly training developers to address security concerns in their applications as well as the need to test applications thoroughly for security vulnerabilities in order to reduce frustration and improve overall project atmosphere. }, keywords = {github, mining challenge, msr challenge, security, sentiment analysis}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597117}, url = {http://doi.acm.org/10.1145/2597073.2597117}, attachments = {https://flosshub.org/sites/flosshub.org/files/pletea.pdf}, author = {Pletea, Daniel and Vasilescu, Bogdan and Serebrenik, Alexander} } @booklet {1588, title = {The Small World Network Effect in Software Project Teams}, year = {2014}, note = {"This data was collected using the freely available FLOSSmole[14] datasets of free and open source projects. Two datasets were analyzed {\textendash} data from from the popular open source project site Freecode, 1 and another from the open source software repository SourceForge. 2 The Freecode dataset contained data up to September 2013, while the SourceForge dataset was slightly older with data gathered up to June 2009. "}, abstract = {Team cohesion and the dynamics of team formation are important parts of any project, with software projects being no exception. An interesting aspect of team building is the relationships formed between the team members. Because of these relationships, representing software team members as a graph may be a natural way to explore team dynamics. As team members move between projects, these graphs be- come more and more connected as team members col- laborate and form new relationships. We show that this connectivity, known as the {\textquotedblleft}small world effect,{\textquotedblright} has a positive impact on team performance when the connectivity levels are moderate. Performance de- grades, however, at both very high and very low lev- els of connectivity. This aligns with similar research findings of non-software teams.}, keywords = {flossmole}, url = {https://www.researchgate.net/publication/259217372_The_Small_World_Network_Effect_in_Software_Project_Teams}, attachments = {https://flosshub.org/sites/flosshub.org/files/kpeterson-small-world-software.pdf}, author = {Kevin Peterson} } @conference {Padhye:2014:SEC:2597073.2597113, title = {A Study of External Community Contribution to Open-source Projects on GitHub}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {332{\textendash}335}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Open-source software projects are primarily driven by community contribution. However, commit access to such projects{\textquoteright} software repositories is often strictly controlled. These projects prefer to solicit external participation in the form of patches or pull requests. In this paper, we analyze a set of 89 top-starred GitHub projects and their forks in order to explore the nature and distribution of such community contribution. We first classify commits (and developers) into three categories: core, external and mutant, and study the relative sizes of each of these classes through a ring-based visualization. We observe that projects written in mainstream scripting languages such as JavaScript and Python tend to include more external participation than projects written in upcoming languages such as Scala. We also visualize the geographic spread of these communities via geocoding. Finally, we classify the types of pull requests submitted based on their labels and observe that bug fixes are more likely to be merged into the main projects as compared to feature enhancements. }, keywords = {community participation, core committers, external contribution, mining challenge, mining software repositories, msr challenge, Open-source software, pull requests}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597113}, url = {http://doi.acm.org/10.1145/2597073.2597113}, author = {Padhye, Rohan and Mani, Senthil and Sinha, Vibha Singhal} } @conference {Paschalidou:2013:ADE:2490257.2490264, title = {An application of data envelopment analysis to software quality assessment}, booktitle = {Proceedings of the 6th Balkan Conference in Informatics}, series = {BCI {\textquoteright}13}, year = {2013}, pages = {228{\textendash}235}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Data Envelopment Analysis (DEA) is a non-parametric technique which involves the use of linear programming methods to measure the efficiency of a homogenous set of units. These units are known as Decision Making Units (DMUs) and defined by multiple input and output data. Efficiencies are measured relative to a piece-wise surface (efficient frontier) which envelops the data, thus justifying the name of the technique. Although DEA has been mostly used in production economics, its application in the context of software quality evaluation seems to be a promising approach. This study provides an application of DEA to assess the evolution of two open-source software projects in terms of selected metric values for successive versions of each project. What is really interesting in DEA is that a single efficiency score is calculated for each version despite the often convoluted overall picture of the metric values. According to a simplified view of DEA, there are two categories of units, the efficient (onto the efficient frontier) and the inefficient ones. Each inefficient unit is characterized by a reference set of peers which involves all the efficient units "operating" closer to that unit. Through the consideration of the reference set of the inefficient versions of each project, the metrics that require improvement, as well as the extent of improvement, could be estimated. These results could assist software developers in identifying design issues that require further improvement. Notwithstanding the fact that there are a number of issues to be further investigated, the applicability of DEA and other operations research tools in the context of software quality might yield interesting results. }, keywords = {dea, design metrics, software evolution, software quality}, isbn = {978-1-4503-1851-8}, doi = {10.1145/2490257.2490264}, url = {http://doi.acm.org/10.1145/2490257.2490264}, author = {Paschalidou, Georgia and Stiakakis, Emmanouil and Chatzigeorgiou, Alexander} } @book {1542, title = {Authoritative Linked Data Descriptions of Debian Source Packages Using ADMS.SW}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {168 - 181}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = {he Debian Package Tracking System is a Web dashboard for Debian contributors and advanced users. This central tool publishes the status of subsequent releases of source packages in the Debian distribution. It has been improved to generate RDF meta-data documenting the source packages, their releases and links to other packaging artifacts, using the ADMS.SW 1.0 model. This constitutes an authoritative source of machine-readable Debian {\textquotedblleft}facts{\textquotedblright} and proposes a reference URI naming scheme for Linked Data resources about Debian packages. This should enable the interlinking of these Debian package descriptions with other ADMS.SW or DOAP descriptions of FLOSS projects available on the Semantic Web also using Linked Data principles. This will be particularly interesting for traceability with upstream projects whose releases are packaged in Debian, derivative distributions reusing Debian source packages, or with other FLOSS distributions.}, keywords = {debian}, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_12}, author = {Olivier Berger and Christian Bac}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @proceedings {1491, title = {Communication in Open Source Software Development Mailing Lists}, year = {2013}, note = {"The entire dataset used in the experiment, including the cards, the resolved aliases, and detailed statistical results, can be downloaded from ..." http://www.st.ewi.tudelft.nl/~guzzi/oss-communication/}, month = {05/2013}, pages = {277-286}, abstract = {Open source software (OSS) development teams use electronic means, such as emails, instant messaging, or forums, to conduct open and public discussions. Researchers investigated mailing lists considering them as a hub for project communication. Prior work focused on specific aspects of emails, for example the handling of patches, traceability concerns, or social networks. This led to insights pertaining to the investigated aspects, but not to a comprehensive view of what developers communicate about. Our objective is to increase the understanding of development mailing lists communication. We quantitatively and qualitatively analyzed a sample of 506 email threads from the development mailing list of a major OSS project, Lucene. Our investigation reveals that implementation details are discussed only in about 35\% of the threads, and that a range of other topics is discussed. Moreover, core developers participate in less than 75\% of the threads. We observed that the development mailing list is not the main player in OSS project communication, as it also includes other channels such as the issue repository.}, keywords = {email, lucene, mailling list}, url = {http://www.st.ewi.tudelft.nl/~guzzi/downloads/Guzzi2013msr.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/Guzzi2013msr.pdf}, author = {Guzzi, Anja and Bacchelli, Alberto and Lanza, Michele and Pinzger, Martin and van Deursen, Arie} } @book {1536, title = {Community Dynamics in Open Source Software Projects: Aging and Social Reshaping}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {80 - 96}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { An undeniable factor for an open source software (OSS) project success is a vital community built around it. An OSS community not only needs to be established, but also to be persisted. This is not guaranteed considering the voluntary nature of participation in OSS. The dynamic analysis of the OSS community evolution can be used to extract indicators to rate the current stability of a community and to predict its future development. Despite the great amount of studies on mining project communication and development repositories, the evolution of OSS communities is rarely addressed. This paper presents an approach to analyze the OSS community history. We combine adapted demography measures to study community aging and social analysis to investigate the dynamics of community structures. The approach is applied to the communication and development history of three bioinformatics OSS communities over eleven years. First, in all three projects a survival rate pattern is identified. This finding allows us to define the minimal number of newcomers required for the further positive community growth. Second, dynamic social analysis shows that the node betweenness in combination with the network diameter can be used as an indicator for significant changes in the community core and the quality of community recovery after these modifications. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_6}, author = {Hannemann, Anna and Klamma, Ralf}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @proceedings {1502, title = {A Dataset from Change History to Support Evaluation of Software Maintenance Tasks}, year = {2013}, month = {05/2013}, pages = {131-134}, abstract = {Approaches that support software maintenance need to be evaluated and compared against existing ones, in order to demonstrate their usefulness in practice. However, oftentimes the lack of well-established sets of benchmarks leads to situations where these approaches are evaluated using different datasets, which results in biased comparisons. In this data paper we describe and make publicly available a set of benchmarks from six Java applications, which can be used in the evaluation of various software engineering (SE) tasks, such as feature location and impact analysis. These datasets consist of textual description of change requests, the locations in the source code where they were implemented, and execution traces. Four of the benchmarks were already used in several SE research papers, and two of them are new. In addition, we describe in detail the methodology used for generating these benchmarks and provide a suite of tools in order to encourage other researchers to validate our datasets and generate new benchmarks for other subject software systems. Our online appendix: http://www.cs.wm.edu/semeru/data/msr13/ }, url = {http://www.cs.wm.edu/~bdit/publications/MSR13DataPaper_Dit_CRC.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/MSR13DataPaper_Dit_CRC.pdf}, author = {Bogdan Dit and Andrew Holtzhauer and Poshyvanyk, Denys and Kagdi, Huzefa} } @book {1546, title = {A Dual Model of Open Source License Growth}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, note = {uses ohloh snapshot, march 2008}, pages = {245 - 256}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { Every open source project needs to decide on an open source license. This decision is of high economic relevance: Just which license is the best one to help the project grow and attract a community? The most common question is: Should the project choose a restrictive (reciprocal) license or a more permissive one? As an important step towards answering this question, this paper analyses actual license choice and correlated project growth from ten years of open source projects. It provides closed analytical models and finds that around 2001 a reversal in license choice occurred from restrictive towards permissive licenses. }, keywords = {ohloh}, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_18}, attachments = {https://flosshub.org/sites/flosshub.org/files/oss2013.hofmann.pdf}, author = {Hofmann, Gottfried and Dirk Riehle and Carsten Kolassa and Mauerer, Wolfgang}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @proceedings {1508, title = {The Eclipse and Mozilla Defect Tracking Dataset: A Genuine Dataset for Mining Bug Information}, year = {2013}, note = {Intended to be an "open bug database" or baseline for multiple studies in the community. 14 bug attributes [id, product, summary, status, etc], some change over time and some do not. Data set: github.com/ansymo/msr2013-bug_dataset}, month = {05/2013}, abstract = {The analysis of bug reports is an important subfield within the mining software repositories community. It explores the rich data available in defect tracking systems to uncover interesting and actionable information about the bug triaging process. While bug data is readily accessible from systems like Bugzilla and JIRA, a common database schema and a curated dataset could significantly enhance future research because it allows for easier replication. Consequently, in this paper we propose the Eclipse and Mozilla Defect Tracking Dataset, a representative database of bug data, filtered to contain only genuine defects (i.e., no feature requests) and designed to cover the whole bug-triage life cycle (i.e., store all intermediate actions). We have used this dataset ourselves for predicting bug severity, for studying bug-fixing time and for identifying erroneously assigned components. github.com/ansymo/msr2013-bug_dataset}, url = {http://github.com/ansymo/msr2013-bug_dataset}, author = {Lamkanfi, Ahmed and Javier P{\'e}rez and Demeyer, Serge} } @book {1544, title = {Effect on Business Growth by Utilization and Contribution of Open Source Software in Japanese IT Companies}, volume = {404}, year = {2013}, pages = {222 - 231}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { The expanded use of Open Source Software (OSS), and the expansion of the market caused by this adoption has led to a corresponding increase in the number of businesses acting as stakeholders in the field. Some of these are pure users of OSS technology but a great number are developers of such technology, and can be understood to have a substantial investment in this paradigm. It is reasonable to assume that such businesses are rational actors, and that their investment or contribution to the field implies a positive economic benefit either currently obtained or assumed as a return in the future. This paper analyzes how OSS affects Japanese IT companies{\textquoteright} business growth both through simple use and by deeper engagement as a stakeholder in OSS community. This is the first time that such a link between the utilization of OSS and economic growth has been explored in the context of Japan, and it can hopefully lay a foundation for further study regarding the real economic value of this approach to software. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_16}, author = {Noda, Tetsuo and Tansho, Terutaka and Coughlan, Shane}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1535, title = {The Emergence of Quality Assurance Practices in Free/Libre Open Source Software: A Case Study}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {271 - 276}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { As the user base of Free/Libre Open Source Software (FLOSS) diversifies, the need for higher quality is becoming more evident. This implies a more complex development model that includes various steps which were previously associated exclusively with proprietary development such as a formal quality assurance step (QA). However, little research has been done on how implementing formal quality assurance impacts the structure of FLOSS communities. This study aims to start filling this gap by analyzing interactions within such a community. Plone is just one among many FLOSS projects that acknowledged the importance of verification by implementing a quality assurance step. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_21}, author = {Barham, Adina}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1537, title = {Exploring Collaboration Networks in Open-Source Projects}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {97 - 108}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { Analysis of developer collaboration networks presents an opportunity for understanding and thus improving the software development process. Discovery of these networks, however, presents a challenge since the collaboration relationships are initially not known. In this work we apply an approach for discovering collaboration networks of open source developers from Version Control Systems (VCS). It computes similarities among developers based on common file changes, constructs the network of collaborating developers and applies filtering techniques to improve the readability of the visualized network. We use the approach in case studies of three different projects from open source (phpMyAdmin, Eclipse Data Tools Platform and Gnu Compiler Collection) to learn their organizational structure and patterns. Our results indicate that with little effort the approach is capable of revealing aspects of these projects that were previously not known or would require a lot of effort to discover manually via other means, such as reading project documentation and forums. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_7}, author = {Jermakovics, Andrejs and Sillitti, Alberto and Succi, Giancarlo}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1532, title = {How Healthy Is My Project? Open Source Project Attributes as Indicators of Success}, series = {IFIP Advances in Information and Communication Technology Open Source Software: Quality Verification}, volume = {404}, year = {2013}, note = {"The dataset used has thus been obtained through a third source which has made the data publicly available [18]. FlossMole.org contains data collected for the period 2006 to December 2009 from which a dataset was compiled of 125,700 projects. "}, pages = {30 - 44}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = {Determining what factors can influence the successful outcome of a software project has been labeled by many scholars and software engineers as a difficult problem. In this paper we use machine learning to create a model that can determine the stage a software project has obtained with some accuracy. Our model uses 8 Open Source project metrics to determine the stage a project is in. We validate our model using two performance measures; the exact success rate of classifying an Open Source Software project and the success rate over an interval of one stage of its actual performance using different scales of our dependent variable. In all cases we obtain an accuracy of above 70\% with one away classification (a classification which is away by one) and about 40\% accuracy with an exact classification. We also determine the factors (according to one classifier) that uses only eight variables among all the variables available in SourceForge, that determine the health of an OSS project. }, keywords = {flossmole, sourceforge}, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_3}, url = {http://link.springer.com/chapter/10.1007/978-3-642-38928-3_3}, attachments = {https://flosshub.org/sites/flosshub.org/files/OSSHealth_1.0.pdf}, author = {Piggot, James and Amrit, Chintan}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1533, title = {Identifying Success Factors for the Mozilla Project}, series = {IFIP Advances in Information and Communication Technology Open Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {45 - 60}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { The publication of the Netscape source code under free software license and the launch of the Mozilla project constitute a pioneering initiative in the field of free and open source software. However, five years after the publication came years of decline. The market shares rose again after 2004 with the lighter Firefox browser. We propose a case study covering the period from 1998 to 2012. We identify the factors that explain the evolution of the Mozilla project. Our study deepens different success factors identified in the literature. It is based on authors{\textquoteright} experience as well as the abundant literature dedicated to the Netscape company and the Mozilla project. It particularly highlights the importance of the source code complexity, its modularity, the responsibility assignment and the existence of an organisational sponsorship.}, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_4}, author = {Viseur, Robert}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1529, title = {IFIP Advances in Information and Communication Technology Open Source Software: Quality Verification Technological Innovation and Resource Bricolage in Firms: The Role of Open Source Software}, volume = {404}, year = {2013}, pages = {1 - 17}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = {Technological resources have been an important source of innovation in companies. They play a key role in the development of new products and services. However, the upfront investment in technical resources to enable the development of these new products and services can be quite high. Therefore, managers ask for well elaborated business cases before making investments. These business cases rely on market research to back up their assumptions rather than in-market testing. We argue that the advent of open source software (OSS) creates an opportunity space for companies to build low-cost prototypes and test their ideas in the market. Little is known about how OSS plays a role in allowing these firms to enhance product development. Using qualitative case studies, the paper sheds light on how OSS acts as a bricolage mechanism for technological innovation in the ICT services industry.}, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_1}, url = {http://link.springer.com/chapter/10.1007/978-3-642-38928-3_1}, author = {Mahajan, Aarti and Clarysse, Bart}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1530, title = {IFIP Advances in Information and Communication Technology Open Source Software: Quality Verification Empirical Study of the Relation between Open Source Software Use and Productivity of Japan{\textquoteright}s Information Service Industries}, volume = {404}, year = {2013}, pages = {18 - 29}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { This paper analyzes the relation between OSS (Open Source Software) use and the performance of Japanese information service industry. We first estimate the market value of OSS, an issue which only a few studies have specifically addressed. The results are then used to analyze the economic effect of OSS. Although our study has some methodological limitations regarding the calculation of the market value of OSS, we demonstrate that the economic effect of OSS is generally positive.}, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_2}, author = {Noda, Tetsuo and Tanihana, Keisuke}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1531, title = {Is It All Lost? A Study of Inactive Open Source Projects}, series = {IFIP Advances in Information and Communication Technology Open Source Software: Quality Verification }, volume = {404}, year = {2013}, pages = {61 - 79}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { Open Source Software (OSS) proponents suggest that when developers lose interest in their project, their last duty is to {\textquotedblleft}hand it off to a competent successor.{\textquotedblright} However, the mechanisms of such a hand-off are not clear, or widely known among OSS developers. As a result, many OSS projects, after a certain long period of evolution, stop evolving, in fact becoming {\textquotedblleft}inactive{\textquotedblright} or {\textquotedblleft}abandoned{\textquotedblright} projects. This paper presents an analysis of the population of projects contained within one of the largest OSS repositories available (SourceForge.net), in order to describe how projects abandoned by their developers can be identified, and to discuss the attributes and characteristics of these inactive projects. In particular, the paper attempts to differentiate projects that experienced maintainability issues from those that are inactive for other reasons, in order to be able to correlate common characteristics to the {\textquotedblleft}failure{\textquotedblright} of these projects. }, keywords = {sourceforge}, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_5}, url = {http://staff.lero.ie/stol/files/2013/03/2013-Is-It-All-Lost-A-Study-of-Inactive-Open-Source-Projects.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/2013-Is-It-All-Lost-A-Study-of-Inactive-Open-Source-Projects.pdf}, author = {Khondu, Jymit and Capiluppi, Andrea and Stol, Klaas}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1545, title = {Misconceptions and Barriers to Adoption of FOSS in the U.S. Energy Industry}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {232 - 244}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { In this exploratory study, we map the use of free and open source software (FOSS) in the United States energy sector, especially as it relates to cyber security. Through two surveys and a set of semi-structured interviews{\textemdash}targeting both developers and policy makers{\textemdash}we identified key stakeholders, organizations, and FOSS projects, be they rooted in industry, academia, or public policy space that influence software and security practices in the energy sector. We explored FOSS tools, common attitudes and concerns, and challenges with regard to FOSS adoption. More than a dozen themes were identified from interviews and surveys. Of these, drivers for adoption and risks associated with FOSS were the most prevalent. More specifically, the misperceptions of FOSS, the new security challenges presented by the smart grid, and the extensive influence of vendors in this space play the largest roles in FOSS adoption in the energy sector. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_17}, author = {Kuechler, Victor and Jensen, Carlos and Bryant, Deborah}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1539, title = {Modeling Practices in Open Source Software}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {127 - 139}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { It is widely accepted that modeling in software engineering increases productivity and results in better code quality. Yet, modeling adoption remains low. The open source community, in particular, remains almost entirely code centric. In this paper, we explore the reasons behind such limited adoption of modeling practices among open source developers. We highlight characteristics of modeling tools that would encourage their adoption. We propose Umple as a solution where both modeling and coding elements are treated uniformly. In this approach, models can be manipulated textually and code can be edited visually. We also report on the Umple compiler itself as a case study of an open source project where contributors, using the above approach, have and continue to routinely commit code and model over a number of years. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_9}, attachments = {https://flosshub.org/sites/flosshub.org/files/Modeling-Practices-in-Open-Source-Software.pdf}, author = {Badreddin, Omar and Lethbridge, Timothy and Elassar, Maged}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1543, title = {An Open Source Monitoring Framework for Enterprise SOA}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {182 - 193}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { Web services monitoring is currently emerging as an effective way to trace faults in services at runtime. The lack of testing information provided by web services specifications was an indication that other methods need to be used to assess the quality of web services. This is mainly due to the fact that it is difficult to simulate the client infrastructure during testing of web services. Monitoring consists of inspecting services at runtime and taking adequate actions when unacceptable events occur. Monitoring could be performed by different stakeholders and could target different properties of services. Predominantly, monitoring is performed by service providers to manage their internal resources and balance their requests load. In our effort to improve the monitoring infrastructures, we propose a monitoring framework in which all the participants (services providers, services requestors) can contribute to monitoring and at the same time have direct access to the monitoring data. This paper describes a monitoring framework developed as part of NEXOF-RA project. The framework offers a set of capabilities for a collaborative monitoring of web services. The paper presents motivations, system design, implementation and usage of the framework. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_13}, author = {El Ioini, Nabil and Garibbo, Alessandro and Sillitti, Alberto and Succi, Giancarlo}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1541, title = {A Preliminary Analysis of Localization in Free Software: How Translations Are Performed}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {153 - 167}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { Software is more than just source code. There is a myriad of elements that compose a software project, among others documentation, translations, multimedia, artwork, marketing. In this paper, we focus on the translation efforts that free, libre, open source software (FLOSS) projects undergo to provide their software in multiple languages. We have therefore analyzed a large amount of projects for their support and procedures regarding translations, if they exist. Our results show that many, but not all, projects offer some type of support and specify some ways to those wanting to contribute. Usually, projects from a more traditional libre software domain are more prone to ease such tasks. However, there is no general way to contribute, as formats and procedures are often project-specific. We have identified as well a high number of translation-supporting tools, with many projects having their own one. All in all, information about how to contribute is the main factor for having a very internationalized application. Projects accepting and giving credit to contributing translators have high levels of internationalization, even if the process is rudimentary. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_11}, author = {Reina, Laura Arjona and Gregorio Robles and Jesus M. Gonzalez-Barahona}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1540, title = {The Role of Microblogging in OSS Knowledge Management}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {140 - 152}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { Given that microblogging has been shown to play a valuable role in knowledge management within companies, it is useful to understand how it is being used in relation to OSS. This project studies tweets related to 12 open source projects and keywords, ranging from web content management systems (CMSes) to general office applications. It found considerable differences in the content and exchange of tweets, especially between specialist products such as CMSes and office suites such as OpenOffice. Tweets concerning the more specialist projects tended to provide information rather than updates on the user{\textquoteright}s current status. We found a high proportion of event-driven traffic for some CMS projects, and a lower proportion for the office products and groups of projects. }, keywords = {twitter}, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_10}, author = {Lewis, Jonathan}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1538, title = {Socio-technical Congruence in OSS Projects: Exploring Conway{\textquoteright}s Law in FreeBSD}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {109 - 126}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { Software development requires effective communication, coordination and collaboration among developers working on interdependent modules of the same project. The need for coordination is even more evident in open source projects where development is often more dispersed and distributed. In this paper, we study the match between the coordination needs established by the technical domain (i.e. source code) and the actual coordination activities carried out by the development team, such hypothetical match is also known as socio-technical congruence. We carry out our study by empirically examining Conway{\textquoteright}s law in FreeBSD project. Our study shows that the congruence measure is significantly high in FreeBSD and that the congruence value remains stable as the project matured. }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_8}, author = {Syeed, M.M. Mahbubul and Hammouda, Imed}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @book {1534, title = {Towards a Reference Model on How to Utilise Open Standards in Open Source Projects: Experiences Based on Drupal}, series = {IFIP Advances in Information and Communication TechnologyOpen Source Software: Quality Verification}, volume = {404}, year = {2013}, pages = {257 - 263}, publisher = {Springer Berlin Heidelberg}, organization = {Springer Berlin Heidelberg}, address = {Berlin, Heidelberg}, abstract = { It is known that standards implemented in Open Source software (OSS) can promote a competitive market, reduce the risk for lock-in and improve interoperability, whilst there is limited knowledge concerning the relationship between standards and their implementations in OSS. In this paper we report from an ongoing case study conducted in the context of the ORIOS (Open Source software Reference Implementations of Open Standards) project in which influences between OSS communities and software standard communities are investigated. The study focuses on the Drupal project and three of its implemented standards (RDFa, CMIS, and OpenID). }, isbn = {978-3-642-38928-3}, issn = {1868-422X}, doi = {10.1007/978-3-642-38928-3_19}, author = {Andersson, Stefan and Feist, Jonas and Gustavsson, Tomas and Strindberg, Henrik and Gamalielsson, Jonas and Lundell, Bj{\"o}rn and Grahn, Alexander}, editor = {Petrinja, Etiel and Succi, Giancarlo and Ioini, Nabil and Sillitti, Alberto} } @proceedings {1504, title = {Understanding the Evolution of Type-3 Clones: An Exploratory Study }, year = {2013}, month = {05/2013}, abstract = {Understanding the evolution of clones is important for both understanding the maintenance implications of clones and for building a robust clone management system. To this end, researchers have already conducted a number of studies to analyze the evolution of clones, mostly focusing on Type-1 and Type-2 clones. However, although there are a significant number of Type-3 clones in software systems, we know a little how they actually evolve. In this paper, we perform an exploratory study on the evolution of Type-1, Type-2, and Type-3 clones in six open source software systems written in two different programming languages and compare the result with a previous study to better understand the evolution of Type-3 clones. Our results show that although Type-3 clones are more likely to change inconsistently, the absolute number of consistently changed Type-3 clone classes is greater than that of Type-1 and Type-2. Type-3 clone classes also have a lifespan similar to that of Type-1 and Type-2 clones. In addition, a considerable number of Type-1 and Type-2 clones convert into Type-3 clones during evolution. Therefore, it is important to manage type-3 clones properly to limit their negative impact. However, various automated clone management techniques such as notifying developers about clone changes or linked editing should be chosen carefully due to the inconsistent nature of Type-3 clones.}, author = {Ripon K. Saha and Chanchal K. Roy and Kevin A. Schneider and Perry, Dewayne E.} } @proceedings {1446, title = {Exploring the Barriers and Enablers to the Use of Open Educational Resources by University Academics in Africa}, volume = {378}, year = {2012}, month = {09/2012}, pages = {112-128}, publisher = {IFIP AICT}, abstract = {Considerable effort has gone into Open Educational Resource (OER) initiatives in the past decade. These initiatives have created free, high quality educational resources for everyone and anyone to use. However, these open and free resources appear to remain largely unused by university academics on the educationally resource-poor African continent. The objectives of the research study are to explore the inhibitors and enablers are experienced by academics that use OER, and what barriers prevent academics from using OER. The sample consists of academics from East, West and Southern Africa. Information was gathered by means of a survey questionnaire. A modified version of the Unified Theory of Acceptance and Use of Technology model was used to identify the influence of certain factors on a user{\textquoteright}s intention to adopt OER. Some of the key findings indicate that Performance Expectancy and Effort Expectancy have a positive effect on a user{\textquoteright}s Behavioural Intention to use OER, and the latter has a strong influence on the Actual Use of OER. Facilitating Conditions do not have a statistically significant impact. Additionally, significant differences were found in the barriers which users and potential users of OER have identified as either limiting their current use of OER, or negatively affecting their intention to use OER. These barriers include discovery, relevance, context and individual resources. Addressing these factors could lead to a more widespread adoption of Open Educational Resources in Africa and, consequently, more pervasive and higher quality educational opportunities.}, author = {Percy, Tanya and Van Belle, Jean-Paul} } @proceedings {1476, title = {How Can Open Standards Be Effectively Implemented in Open Source? Challenges and the ORIOS Project}, volume = {378}, year = {2012}, month = {09/2012}, pages = {383-388}, publisher = {IFIP AICT, Springer}, address = {Eighth International Conference on Open Source Systems}, abstract = {Many organisations are currently restricted in their choice of software because of restrictions imposed by existing systems. Challenges include a lack of interoperability and a risk of technological lock-in, which many small companies seek to address by utilising Open Standards and Open Source implementations of such standards when developing and deploying systems. This paper presents an overview of how the industrial research project ORIOS (Open Source software Reference Implementations of Open Standards) seeks to address identified challenges. An overarching goal of the project is to improve understanding within organisations of Open Standards, Open Source Reference Implementations, and the ecosystems around them. This will be done by developing a reference model of necessary and desirable features of an Open Standard, and how Open Standards and their implementations can be utilised by small companies in different usage contexts. An action case study approach will be used as a core strategy for evolving a reference model together with Swedish companies.}, author = {Lundell, Bj{\"o}rn and AbduraHmanovic, Admir and Andersson, Stefan and Bergström, Erik and Feist, Jonas and Gamalielsson, Jonas and Gustavsson, Tomas and Kahlbom, Roger and Papaxanthis, Konstantin} } @article {1397, title = {How do open source communities blog?}, journal = {Empirical Software Engineering}, year = {2012}, abstract = {We report on an exploratory study, which aims at understanding how software communities use blogs compared to conventional development infrastructures. We analyzed the behavior of 1,100 bloggers in four large open source communities, distinguishing between committing bloggers and other community members. We observed that these communities intensively use blogs with one new entry every 8 h. A blog entry includes 14 times more words than a commit message. When analyzing the content of the blogs, we found that committers and others bloggers write about similar topics. Most popular topics in committers{\textquoteright} blogs represent high-level concepts such as features and domain concepts, while source code related topics are discussed in 15\% of their posts. Other community members frequently write about community events and conferences as well as configuration and deployment topics. We found that the blogging peak period is usually after the software is released. Moreover, committers are more likely to blog after corrective engineering than after forward engineering and re-engineering activities. Our findings call for a hypothesis-driven research to (a) further understand the role of social media in dissolving the collaboration boundaries between developers and other stakeholders and (b) integrate social media into development processes and tools. }, keywords = {blog, blogs}, issn = {1573-7616}, doi = {10.1007/s10664-012-9211-2}, author = {Pagano, Dennis and Maalej, Walid} } @conference {1318, title = {Network-Based Analysis of the Structure and Evolution of an Open Source Software Product}, booktitle = {45th Hawai{\textquoteright}i International Conference on System Sciences}, year = {2012}, note = {"raw data about the product structure is extracted from the source code"}, month = {01/2012}, pages = {3436-3445}, abstract = {In this paper, an analysis of product structures in open source software (OSS) at both product level and module level is presented. At the product level, the product structures are modeled as complex networks, and the evolutionary characteristics of product structures are analyzed by using network analysis metrics. At the module level, linking mechanisms, which describe how a module is attached with other modules, are proposed. The linking mechanisms are modeled as probability functions dependent on the degrees of linking modules. A case study from an open source software project, Drupal, is presented. The evolutionary trends of Drupal product structures are analyzed and discussed. Finally, a model is presented to illustrate the effects of linking mechanisms at the module level on the product structures at the system level. The results indicate that the model built using the proposed linking mechanisms generates networks whose evolutionary characteristics are close to that of the original network.}, keywords = {drupal, source code}, author = {Le, Qize and Panchal, Jitesh H.} } @conference {1523, title = {Social coding in GitHub}, booktitle = {Proceedings of the ACM 2012 conference on Computer Supported Cooperative Work - CSCW {\textquoteright}12}, year = {2012}, pages = {1277}, publisher = {ACM Press}, organization = {ACM Press}, address = {Seattle, Washington, USA}, abstract = { Social applications on the web let users track and follow the activities of a large number of others regardless of location or affiliation. There is a potential for this transparency to radically improve collaboration and learning in complex knowledge-based activities. Based on a series of in-depth interviews with central and peripheral GitHub users, we examined the value of transparency for large-scale distributed collaborations and communities of practice. We find that people make a surprisingly rich set of social inferences from the networked activity information in GitHub, such as inferring someone else{\textquoteright}s technical goals and vision when they edit code, or guessing which of several similar projects has the best chance of thriving in the long term. Users combine these inferences into effective strategies for coordinating work, advancing technical skills and managing their reputation. }, keywords = {github}, isbn = {9781450310864}, doi = {10.1145/2145204.2145396}, url = {http://www.cs.cmu.edu/afs/cs/Web/People/xia/resources/Documents/cscw2012_Github-paper-FinalVersion-1.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/cscw2012_Github-paper-FinalVersion-1.pdf}, author = {Laura Dabbish and Colleen Stuart and Jason Tsay and Herbsleb, Jim} } @proceedings {1452, title = {Two Evolution Indicators for FOSS Projects}, volume = {378}, year = {2012}, month = {09/2012}, pages = {216-232}, publisher = {IFIP AICT}, abstract = {In this paper we introduce two project evolution indicators. One is showing an increase of downloads of the project and therefore a growing interest of users in the results of the project. The second indica- tor is predicting the future evolution of the project with the submission of new revisions to the concurrent versioning system. Both indicators can provide evidence of the sustainability of a software project. We used the General Linear Model method to statistically formulate the two linear equations that can be used to predict the two indicators. The predicting equations were build by using two stratified data samples one of 760 projects and the second of 880 projects extracted from the SourceForge repository. The six metrics included into the final version of the two models were extracted from a set of thirty project and product metrics as: the number of downloads, the number of developers, etc. We have validated the discriminant and the concurrent validity of the two models by using different statistical tests as the goodness-of-fit and we have used the two models to predict the indicators on two hold-out validation samples. The model predicting the increment of downloads was correct in 75 percent of the cases, the model predicting the submission of new revisions was correct in 93 percent of the cases.}, keywords = {metrics, sourceforge}, author = {Petrinja, Etiel and Succi, Giancarlo} } @proceedings {1285, title = {Adoption of OSS Development Practices by the Software Industry: A Survey}, year = {2011}, month = {10/2011}, pages = {233-243}, publisher = {Springer}, abstract = {The paper presents a survey of aspects related to the adoption of Open Source Software by the software industry. The aim of this study was to collect data related to practices and elements in the development process of companies that influence the trust in the quality of the product by potential adopters. The work is part of the research done inside the QualiPSo project and was carried out using a qualitative study based on a structured questionnaire focused on perceptions of experts and development practices used by companies involved in the Open Source Software industry. The results of the survey confirm intuitive concerns related to the adoption of Open Source Software as: the selection of the license, the quality issues addressed, and the development process tasks inside Open Source Software projects. The study uncovered specific aspects related to trust and trustworthiness of the Open Source Software development process that we did not find in previous studies as: the standards implemented by the OSS project, the project{\textquoteright}s roadmap is respected, and the communication channels that are available.}, keywords = {qualipso, Survey}, author = {Petrinja, Etiel and Sillitti, Alberto and Succi, Giancarlo} } @proceedings {1289, title = {Cliff Walls: An Analysis of Monolithic Commits Using Latent Dirichlet Allocation}, year = {2011}, note = {"Our data set consists of the version control logs of almost 10,000 projects from SourceForge, acquired in late 2006"}, month = {10/2011}, pages = {282-298}, publisher = {Springer}, abstract = {Artifact-based research provides a mechanism whereby researchers may study the creation of software yet avoid many of the difficulties of direct observation and experimentation. However, there are still many challenges that can affect the quality of artifact-based studies, especially those studies examining software evolution. Large commits, which we refer to as {\textquotedblleft}Cliff Walls,{\textquotedblright} are one significant threat to studies of software evolution because they do not appear to represent incremental development. We used Latent Dirichlet Allocation to extract topics from over 2 million commit log messages, taken from 10,000 SourceForge projects. The topics generated through this method were then analyzed to determine the causes of over 9,000 of the largest commits. We found that branch merges, code imports, and auto-generated documentation were significant causes of large commits. We also found that corrective maintenance tasks, such as bug fixes, did not play a significant role in the creation of large commits.}, keywords = {artifacts, commit, cvs, LDA, lines of code, log files, scm, sloc, sourceforge, version control}, author = {Pratt, Landon J. and MacLean, Alexander C. and Knutson, Charles D. and Ringger, Eric K.} } @conference {1216, title = {Experiences Mining Open Source Release Histories}, booktitle = {International Conference on Software and Systems Process (ICSSP 2011) }, year = {2011}, note = {"First, we selected the projects to initially target, using several criteria to get a broad picture of the open source landscape. Second, we collected the actual data, using a framework of parsers and some manual inspection. Third, we standardized and inserted the data into a database for later use." "but we plan to eventually cross reference our list of projects with existing open source project information (such as FLOSSmole) to take advantage of the work already done by other researchers." "For each release, we collected the following data: the project it belonged to, the date the release was published, the type of release, the release label (version number) and the source of the data" discussion of their difficulties "We conclude that programmatically creating a release history database from existing open source data is not trivial," "We have currently collected 1579 distinct releases from 22 different open source projects"}, month = {05/2011}, abstract = {Software releases form a critical part of the life cycle of a software project. Typically, each project produces releases in its own way, using various methods of versioning, archiving, announcing and publishing the release. Understanding the release history of a software project can shed light on the project history, as well as the release process used by that project, and how those processes change. However, many factors make automating the retrieval of release history information difficult, such as the many sources of data, a lack of relevant standards and a disparity of tools used to create releases. In spite of the large amount of raw data available, no attempt has been made to create a release history database of a large number of projects in the open source ecosystem. This paper presents our experiences, including the tools, techniques and pitfalls, in our early work to create a software release history database which will be of use to future researchers who want to study and model the release engineering process in greater depth.}, keywords = {doap, flossmole cited, life cycle, release engineering, release history, release management, releases}, attachments = {https://flosshub.org/sites/flosshub.org/files/icssp11short-p034-tsay.pdf}, author = {Jason Tsay and Wright, Hyrum and Perry, Dewayne} } @conference {1307, title = {How do developers blog?}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, note = {publishing frequency, post structure, word usage, publication patterns, content}, month = {05/2011}, pages = {123-132}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {We report on an exploratory study, which aims at understanding how software developers use social media compared to conventional development infrastructures. We analyzed the blogging and the committing behavior of 1,100 developers in four large open source communities. We observed that these communities intensively use blogs with one new entry about every 8 hours. A blog entry includes 14 times more words than a commit message. When analyzing the content of the blogs, we found that most popular topics represent high-level concepts such as functional requirements and domain concepts. Source code related topics are covered in less than 15\% of the posts. Our results also show that developers are more likely to blog after corrective engineering and management activities than after forward engineering and re-engineering activities. Our findings call for a hypothesis-driven research to further understand the role of social media in software engineering and integrate it into development processes and tools.}, keywords = {blog, communication, developer, eclipse, gnome, LDA, postgres, python}, isbn = {9781450305747}, doi = {10.1145/1985441.1985461}, author = {Maalej, Walid and Pagano, Dennis} } @conference {1306, title = {Java generics adoption}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, note = {"we automatically analyzed 20 open source software projects. We analyzed the top {\textquotedblleft}most used{\textquotedblright} projects according to ohloh.net, including only projects with significant amounts of Java code" "The 20 selected projects were Ant, Azureus, CheckStyle, Commons Collections, Free- Mind, FindBugs, Jetty, JEdit, JDT, JUnit, Eclipse-cs, Hibernate, Log4j, Lucene, Maven, the Spring Frame- work, Squirrel-SQL, Subclipse, Weka, and Xerces." "In mining the full version histories of these 20 projects, we analyzed the full content of each version of each Java source file, a total of 548,982,841 lines."}, month = {05/2011}, pages = {3-12}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {Support for generic programming was added to the Java language in 2004, representing perhaps the most significant change to one of the most widely used programming languages today. Researchers and language designers anticipated this addition would relieve many long-standing problems plaguing developers, but surprisingly, no one has yet measured whether generics actually provide such relief. In this paper, we report on the first empirical investigation into how Java generics have been integrated into open source software by automatically mining the history of 20 popular open source Java programs, traversing more than 500 million lines of code in the process. We evaluate five hypotheses, each based on assertions made by prior researchers, about how Java developers use generics. For example, our results suggest that generics do not significantly reduce the number of type casts and that generics are usually adopted by a single champion in a project, rather than all committers.}, keywords = {commits, generics, java, source code, version history}, isbn = {9781450305747}, doi = {10.1145/1985441.1985446}, author = {Christian Bird and Murphy-Hill, Emerson and Parnin, Chris} } @proceedings {1277, title = {Knowledge Homogeneity and Specialization in the Apache HTTP Server Project}, year = {2011}, note = {"Our data set consists of the commit history and email archives for the Apache HTTP Server Project, spanning sixteen years (2/27/1995 - 1/31/2011)" "we 1) mapped the committers to email records, 2) cleaned the email records to remove extraneous information, 3) identified topics of discussion in the resulting messages, and 4) constructed a social network model from committers and topics." "If specialization exists within the httpd community, we should see distinct communities develop around topics. In addition, unique groups of developers should congregate around specialized subtopics. We examined the data from both angles: topical affinity and topic communities." }, month = {10/2011}, pages = {106-122}, publisher = {Springer}, abstract = {We present an analysis of developer communication in the Apache HTTP Server project. Using topic modeling techniques we expose latent conceptual sub-communities arising from developer specialization within the greater developer population. However, we found that among the major contributors to the project, very little specialization exists. We present theories to explain this phenomenon, and suggest further research.}, keywords = {apache, commits, developer, email, email archive, LDA, mailing list, revision control, revision history, scm, social network analysis, specialization, subversion, svn}, url = {http://sequoia.cs.byu.edu/lab/files/pubs/MacLean2011a.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/MacLean2011a.pdf}, author = {MacLean, Alexander C. and Pratt, Landon J. and Knutson, Charles D. and Ringger, Eric K.} } @conference {1406, title = {Process Mining Software Repositories}, booktitle = {15th European Conference on Software Maintenance and Reengineering (CSMR 2011) }, year = {2011}, pages = {5 - 14}, publisher = {IEEE}, organization = {IEEE}, address = {Oldenburg, Germany}, abstract = {Software developers{\textquoteright} activities are in general recorded in software repositories such as version control systems, bug trackers and mail archives. While abundant information is usually present in such repositories, successful information extraction is often challenged by the necessity to simultaneously analyze different repositories and to combine the information obtained. We propose to apply process mining techniques, originally developed for business process analysis, to address this challenge. However, in order for process mining to become applicable, different software repositories should be combined, and {\textquotedblleft}related{\textquotedblright} software development events should be matched: e.g., mails sent about a file, modifications of the file and bug reports that can be traced back to it. The combination and matching of events has been implemented in FRASR (FRamework for Analyzing Software Repositories), augmenting the process mining framework ProM. FRASR has been successfully applied in a series of case studies addressing such aspects of the development process as roles of different developers and the way bug reports are handled.}, keywords = {amsn, email, email archives, gcc, mailing list, Process mining, software repositories}, isbn = {978-1-61284-259-2}, doi = {10.1109/CSMR.2011.5}, attachments = {https://flosshub.org/sites/flosshub.org/files/2011-03_CSMR.pdf}, author = {Poncin, Wouter and Serebrenik, Alexander and Brand, Mark van den} } @article {1398, title = {The search for a research method for studying OSS process innovation}, journal = {Empirical Software Engineering}, volume = {16}, year = {2011}, month = {8/2011}, pages = {514 - 537}, abstract = {Medium-sized, open-participation Open Source Software (OSS) projects do not usually perform explicit software process improvement on any routine basis. It would be useful to understand how to get such a project to accept a process improvement proposal and hence to perform process innovation. We want to determine an effective and feasible qualitative research method for studying the above question. We present (narratively) a case study of how we worked towards and eventually found such a research method. The case involves four attempts at collecting suitable data about innovation episodes (direct participation (twice), polling developers for episodes, manually finding episodes in mailing list archives) and the adaptation of the Grounded Theory data analysis methodology. Direct participation allows gathering rather rich data, but does not allow for observing a sufficiently large number of innovation episodes. Polling developers for episodes did not prove to be useful. Using mailing list archives to find data to be analyzed is both feasible and effective. We also describe how the data thus found can be analyzed based on the Grounded Theory Method with suitable adjustments. By-and-large, our findings ought to apply to studying various phenomena in OSS development processes that are similarly heavyweight and infrequent. However, specific details may block this possibility and we cannot predict which details that might be. The amount of effort involved in direct participation approaches to qualitative research can easily be underestimated. Also, survey approaches are not well-suited for many process issues in OSS, because too few developers are sufficiently process-conscious. An approach based on passive observation is a viable alternative in the OSS context due to the availability of large amounts of fairly complete archival data. }, keywords = {argouml, Bochs, bugzilla, Flyspray, FreeDOS, gEDA, grounded theory, Grub, Innovation introduction, KVM, mailing list, Methodology, MonetDB, open source, Request Tracket, Rox, U-Boot, Xfce}, issn = {1573-7616}, doi = {10.1007/s10664-011-9160-1}, author = {Prechelt, Lutz and Oezbek, Christopher} } @conference {961, title = {Do stack traces help developers fix bugs?}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {118 - 121}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {A widely shared belief in the software engineering community is that stack traces are much sought after by developers to support them in debugging. But limited empirical evidence is available to confirm the value of stack traces to developers. In this paper, we seek to provide such evidence by conducting an empirical study on the usage of stack traces by developers from the ECLIPSE project. Our results provide strong evidence to this effect and also throws light on some of the patterns in bug fixing using stack traces. We expect the findings of our study to further emphasize the importance of adding stack traces to bug reports and that in the future, software vendors will provide more support in their products to help general users make such information available when filing bug reports.}, keywords = {bug fixing, bug report, debugging, eclipse, stack trace}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463280}, attachments = {https://flosshub.org/sites/flosshub.org/files/118-10-msr.pdf}, author = {Schroter, Adrian and Schr{\"o}ter, Adrian and Bettenburg, Nicolas and Premraj, Rahul} } @conference {837, title = {Lurking? Cyclopaths? A Quantitative Lifecyle Analysis of User Behavior in a Geowiki}, booktitle = {ACM Conference on Computer-Human Interaction (CHI)}, year = {2010}, month = {04/2010}, publisher = {Association for Computing Machinery}, organization = {Association for Computing Machinery}, address = {Atlanta, GA}, keywords = {content,, geographic, geowiki,, information,, lurking, open, volunteer, volunteered, Wiki,, work,}, author = {Panciera, K. and Priedhorsky, R. and Erickson, T. and Terveen, L.} } @conference {Oezbek:2010:OCS:1833272.1833274, title = {The onion has cancer: some social network analysis visualizations of open source project communication}, booktitle = {Proceedings of the 3rd International Workshop on Emerging Trends in Free/Libre/Open Source Software Research and Development (FLOSS {\textquoteright}10)}, series = {FLOSS {\textquoteright}10}, year = {2010}, note = {paper d/l from www.inf.fu-berlin.de/~oezbek/pub/OezThiPre10-SNA.pdf "We study the introduction of process innovations in Open Source projects [33] by manually extracting innovation episodes from arch- ives of mailing-lists and analyzing these episodes qualitatively by the Grounded Theory Method [38]." "we took all messages from the mailing-list archives in 2007 of the projects we were studying, turned each participant into a node (unifying multi- ple e-mail addresses where needed [6]), and computed relationship strength between A and B as the number of e-mails that are a reply of B to a message from A or vice versa, according to the in-reply-to header of the e-mail." Our data set covers 11 of the 13 projects (from 7 different do- mains, selected from mailing-list archive Gmane to build a diverse set of projects) for which we analyzed innovation episodes. They include three workflow applications (Bugzilla, Flyspray, Request Tracker), two desktop environments (Rox, Xfce), two design tools (ArgoUML, a UML CASE tool; gEDA, a set of electronic design automation tools), one bootloader (Grub), one hardware emulator (Bochs), one operating system (FreeDOS), and one database man- agement system (MonetDB).}, pages = {5{\textendash}10}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Background: People contribute to OSS projects in wildly different degrees, from reporting a single defect once and never coming back to spending many hours each workday on the project over several years - or anything in between. It is a common conception that these degrees of participation sort the participants into a number of similar groups which are layered like the peels of an onion: The onion model. Objective: We check whether this model of gradually different degrees of participation is valid with respect to the participation in OSS project mailing-list traffic. Methods: We perform social network analysis based on replies to mailing-list messages and use visualization to check the nature of three different groups of participants. Results: There appears to be a discontinuity with respect to core members: The degree to which very active core members (as opposed to less active co-developers) react to e-mails of senders from the project{\textquoteright}s periphery is significantly higher than would be expected from their level of activity in general. Limitations: The effect might be an artifact of the assumption that each mailing-list message can be treated the same. Conclusions: We conclude that core member status may be qualitatively (rather than just quantitatively) different and the transition of individual mailing-list participants towards ever higher participation is qualitatively discontinuous.}, keywords = {argouml, Bochs, bugzilla, communication structure, Flyspray, gEDA, Grub, MonetDB, open source process, request tracker, Rox, social network analysis, Xfce}, isbn = {978-1-60558-978-7}, doi = {10.1145/1833272.1833274}, url = {http://doi.acm.org/10.1145/1833272.1833274}, attachments = {https://flosshub.org/sites/flosshub.org/files/OezThiPre10-SNA.pdf}, author = {Oezbek, Christopher and Prechelt, Lutz and Thiel, Florian} } @conference {966, title = {Replaying IDE interactions to evaluate and improve change prediction approaches}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {161 - 170}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Change prediction helps developers by recommending program entities that will have to be changed alongside the entities currently being changed. To evaluate their accuracy, current change prediction approaches use data from versioning systems such as CVS or SVN. These data sources provide a coarse-grained view of the development history that flattens the sequence of changes in a single commit. They are thus not a valid basis for evaluation in the case of development-style prediction, where the order of the predictions has to match the order of the changes a developer makes. We propose a benchmark for the evaluation of change prediction approaches based on fine-grained change data recorded from IDE usage. Moreover, the change prediction approaches themselves can use the more accurate data to fine-tune their prediction. We present an evaluation procedure and use it on several change prediction approaches, both novel and from the literature, and report on the results.}, keywords = {cbse, change based software evolution, change prediction, changes, commit, cvs, development history, eclipseeye, ide, mylyn, spyware, svn}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463278}, attachments = {https://flosshub.org/sites/flosshub.org/files/161Robbes2010changePrediction.pdf}, author = {Robbes, Romain and Pollet, Damien and Lanza, Michele} } @conference {1223, title = {Success and Abandonment in Open Source Commons: Selected Findings from an Empirical Study of Sourceforge.net Projects}, booktitle = {Second International Workshop on Building Sustainable Open Source Communities (OSCOMM 2010)}, year = {2010}, note = {"we utilize data collected from August through October 2006 on 107,747 OSS projects hosted on the open source hosting site Sourceforge.net (SF henceforth). We combined SF project data gathered by the FLOSSmole project (Howison et al., 2006) with other SF data we {\textquotedblleft}crawled{\textquotedblright} ourselves"}, month = {05/2010}, abstract = {Some open source software collaborations are sustained over long periods of time and across several versions of a software product, while others become abandoned even before the first version of the product has been developed. In this study, we identify factors that might be responsible for one or the other of these collaborative trajectories. We examine 107,747 open source software projects hosted on Sourceforge.net in August 2006 using data available through the FLOSSmole Project. We employ Classification and Regression Tree modeling and Random Forests statistical approaches to begin to establish an understanding of how various project attributes, especially physical and community ones, contribute to project success or abandonment. We find that factors associated with success and abandonment differ for projects in the early stage of development (pre-first release) compared to projects that have had a first release, and that product utility, project vision, leadership, and group-size are associated with success in open source collaborations. We also find that successful open source projects exist across all types of software and not simply in areas associated with the open source {\textquotedblleft}movement.{\textquotedblright} Other evidence suggests that Sourceforge.net may play an important role in {\textquotedblleft}intellectual match-making.{\textquotedblright}}, keywords = {abandonment, flossmole, metadata, project failure, project success, sourceforge, time}, attachments = {https://flosshub.org/sites/flosshub.org/files/osscomm003.pdf}, author = {Schweik, C. M. and English, R. and Paienjton, Q. and Haire, S.} } @conference {1217, title = {Trends That Affect Temporal Analysis Using SourceForge Data}, booktitle = {5th Workshop on Public Data about Software Development (WoPDaSD 2010)}, year = {2010}, note = {"In this paper we examine some of the limitations of artifact data by specifically addressing the applicability of SourceForge data to the study of project evolution." "For our analysis we examine 9,997 Production/Stable or Maintenance phase projects stored in CVS on SourceForge and extracted in October of 2006 [5]"}, abstract = {SourceForge is a valuable source of software artifact data for researchers who study project evolution and developer behavior. However, the data exhibit patterns that may bias temporal analyses. Most notable are cliff walls in project source code repository timelines, which indicate large commits that are out of character for the given project. These cliff walls often hide significant periods of development and developer collaboration{\textemdash}a threat to studies that rely on SourceForge repository data. We demonstrate how to identify these cliff walls, discuss reasons for their appearance, and propose preliminary measures for mitigating their effects in evolution-oriented studies.}, keywords = {cliff walls, committers, cvs, evolution, growth, source code, sourceforge, time, time series}, attachments = {https://flosshub.org/sites/flosshub.org/files/wopdasd001.pdf}, author = {MacLean, Alexander C. and Pratt, Landon J. and Krein, Jonathan L. and Knutson, Charles D.} } @article {10.1109/TSE.2010.63, title = {What Makes a Good Bug Report?}, journal = {IEEE Transactions on Software Engineering}, volume = {36}, year = {2010}, note = {"o find out which matter most, we asked 872 devel- opers from the APACHE, ECLIPSE, and MOZILLA projects to: 1. Complete a survey on important information in bug reports and the problems they faced with them. We received a total of 156 responses to our survey (Section 2 and 3). 2. Rate the quality of bug reports from very poor to very good on a five-point Likert scale [22]. We received a total of 1,186 votes for 289 randomly selected bug reports (Section 4). In addition, we asked 1,354 reporters1 from the same projects to complete a similar survey, out of which 310 responded. "}, pages = {618-643}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, abstract = {In software development, bug reports provide crucial information to developers. However, these reports widely differ in their quality. We conducted a survey among developers and users of APACHE, ECLIPSE, and MOZILLA to find out what makes a good bug report. The analysis of the 466 responses revealed an information mis- match between what developers need and what users supply. Most developers consider steps to reproduce, stack traces, and test cases as helpful, which are at the same time most difficult to provide for users. Such insight is helpful to design new bug tracking tools that guide users at collecting and providing more helpful information. Our CUEZILLA prototype is such a tool and measures the quality of new bug reports; it also recommends which elements should be added to improve the quality. We trained CUEZILLA on a sample of 289 bug reports, rated by developers as part of the survey. In our experiments, CUEZILLA was able to predict the quality of 31{\textendash}48\% of bug reports accurately.}, keywords = {bug report, Survey}, issn = {0098-5589}, doi = {http://doi.ieeecomputersociety.org/10.1109/TSE.2010.63}, url = {http://dl.acm.org/citation.cfm?id=1453146}, attachments = {https://flosshub.org/sites/flosshub.org/files/bettenburg-fse-2008.pdf}, author = {Zimmermann, Thomas and Premraj, Rahul and Bettenburg, Nicolas and Sascha Just and Schroter, Adrian and Weiss, Cathrin} } @article {1340, title = {Innovation, Imitation and Open Source}, journal = {International Journal of Open Source Software and Processes}, volume = {1}, year = {2009}, pages = {28 - 42}, abstract = {An extensive empirical literature indicates that, even without formal intellectual property rights, innovators enjoy a variety of first-mover advantages and that {\textquoteleft}imitation{\textquoteright} is itself a costly activity. There is also accumulating evidence that an {\textquoteleft}open{\textquoteright} approach to knowledge production can deliver substantial efficiency advantages. This article introduces a formal framework incorporating all of these factors. We examine the relative performance of an {\textquoteleft}open{\textquoteright} versus a {\textquoteleft}closed{\textquoteright} (proprietary) regime, and explicitly characterize the circumstances in which an open approach, despite its effect on facilitating imitation, results in a higher level of innovation.}, issn = {1942-3934}, doi = {10.4018/jossp.2009040103}, author = {Pollock, Rufus} } @conference {944, title = {Learning from defect removals}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {179 - 182}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Recent research has tried to identify changes in source code repositories that fix bugs by linking these changes to reports in issue tracking systems. These changes have been traced back to the point in time when they were previously modified as a way of identifying bug introducing changes. But we observe that not all changes linked to bug tracking systems are fixing bugs; some are enhancing the code. Furthermore, not all fixes are applied at the point in the code where the bug was originally introduced. We flesh out these observations with a manual review of several software projects, and use this opportunity to see how many defects are in the scope of static analysis tools.}, keywords = {bug fixing, bugzilla, change management, cherry, cvs, eclipse, groovy, launching, source code, svn, text editor}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069500}, attachments = {https://flosshub.org/sites/flosshub.org/files/179LearnFromDefects-MSR09.pdf}, author = {Ayewah, Nathaniel and Pugh, William} } @conference {928, title = {Mining source code to automatically split identifiers for software analysis}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {71 - 80}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Automated software engineering tools (e.g., program search, concern location, code reuse, quality assessment, etc.) increasingly rely on natural language information from comments and identifiers in code. The first step in analyzing words from identifiers requires splitting identifiers into their constituent words. Unlike natural languages, where space and punctuation are used to delineate words, identifiers cannot contain spaces. One common way to split identifiers is to follow programming language naming conventions. For example, Java programmers often use camel case, where words are delineated by uppercase letters or non-alphabetic characters. However, programmers also create identifiers by concatenating sequences of words together with no discernible delineation, which poses challenges to automatic identifier splitting. In this paper, we present an algorithm to automatically split identifiers into sequences of words by mining word frequencies in source code. With these word frequencies, our identifier splitter uses a scoring technique to automatically select the most appropriate partitioning for an identifier. In an evaluation of over 8000 identifiers from open source Java programs, our Samurai approach outperforms the existing state of the art techniques.}, keywords = {java, samurai, sourceforge}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069482}, attachments = {https://flosshub.org/sites/flosshub.org/files/71EnslenandHillandPollockandVijayShanker.pdf}, author = {Enslen, Eric and Hill, Emily and Pollock, Lori and Vijay-Shanker, K.} } @conference {569, title = {Open Source Is Changing the Way Work Gets Done}, booktitle = {OSS2009: Open Source Ecosystems: Diverse Communities Interacting (IFIP 2.13)}, series = {IFIP Advances in Information and Communication Technology }, volume = {299/2009}, year = {2009}, month = {2009///}, pages = {1 }, publisher = {Springer}, organization = {Springer}, chapter = {1}, abstract = {Open source software is changing not only the way the software industry works, but also the way work gets done. In the open source software model, individuals and companies collaborate together to produce software. They learn new ways of getting work done that are based on meritocracy and little management. In addition, they learn ways of communication that work well across large groups and virtual environments. These new ways of communicating and getting work done are changing the nature of work across all industries, not just the software industry, industries like mobile technology providers and medical equipment. Come learn how the open source software model is changing the way individuals and companies work and collaborate. }, issn = {978-3-642-02031-5}, doi = {http://dx.doi.org/10.1007/978-3-642-02032-2_1}, attachments = {https://flosshub.org/sites/flosshub.org/files/Open\%20Source\%20is\%20Changing.pdf}, author = {Peters, Stormy} } @conference {611, title = {Panel: Governance in Open Source Projects and Communities}, booktitle = {OSS2009: Open Source Ecosystems: Diverse Communities Interacting (IFIP 2.13)}, series = {IFIP Advances in Information and Communication Technology }, volume = {299/2009}, year = {2009}, month = {2009///}, pages = {370 - 370}, publisher = {Springer}, organization = {Springer}, chapter = {43}, abstract = {{\textquotedblleft}Although considerable research has been devoted to the growth and expansion of open source communities and the comparison between the efficiency of corporate structures and community structures in the field of software development, rather less attention has been paid to their governance structures (control, monitoring, supervision){\textquotedblright} (Lattemann and Stieglitz 2005). }, issn = {978-3-642-02031-5}, doi = {http://dx.doi.org/10.1007/978-3-642-02032-2_43}, attachments = {https://flosshub.org/sites/flosshub.org/files/Panel\%20Governnance.pdf}, author = {Bolici, Francesco and de Laat, Paul and Ljungberg, Jan and Pontiggia, Andrea and Rossi Lamastra, Cristina} } @conference {607, title = {Release Mismanagement in Open Source}, booktitle = {OSS2009: Open Source Ecosystems: Diverse Communities Interacting (IFIP 2.13)}, series = {IFIP Advances in Information and Communication Technology }, volume = {299/2009}, year = {2009}, month = {2009///}, pages = {365 - 365}, publisher = {Springer}, organization = {Springer}, chapter = {39}, abstract = {To a user, unreleased software is nonexistent software, even in open source projects which make the source code readily accessible. Every project requires regular software releases to encourage adoption and attract developers. Different projects approach the task of releasing software in variousways [2], and many experience breakdowns in their release process at some point during their evolution [3]. This poster presents examples of these instances, and how projects are learning from and improving upon them. }, issn = {978-3-642-02031-5}, doi = {http://dx.doi.org/10.1007/978-3-642-02032-2_39}, attachments = {https://flosshub.org/sites/flosshub.org/files/Release\%20Mismanagement.pdf}, author = {Wright, Hyrum and Perry, Dewayne} } @conference {591, title = {A Survey of Usability Practices in Free/Libre/Open Source Software}, booktitle = {OSS2009: Open Source Ecosystems: Diverse Communities Interacting (IFIP 2.13)}, series = {IFIP Advances in Information and Communication Technology }, volume = {299/2009}, year = {2009}, month = {2009///}, pages = {264 - 273}, publisher = {Springer}, organization = {Springer}, chapter = {23}, abstract = {A review of case studies about usability in eight Free/Libre/Open Source Software (FLOSS) projects showed that an important issue regarding a usability initiative in the project was the lack of user research. User research is a key component in the user-centered design (UCD) process and a necessary step for creating usable products. Reasons why FLOSS projects suffered from a lack of user research included poor or unclear project leadership, cultural differences between developer and designers, and a lack of usability engineers. By identifying these critical issues, the FLOSS usability community can begin addressing problems in the efficacy of usability activities and work towards creating more usable FLOSS products. }, issn = {978-3-642-02031-5}, doi = {http://dx.doi.org/10.1007/978-3-642-02032-2_23}, attachments = {https://flosshub.org/sites/flosshub.org/files/Survey\%20of\%20Usability\%20Practices.pdf}, author = {Paul, Celeste} } @conference {938, title = {Using association rules to study the co-evolution of production \& test code}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {151 - 154}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Unit tests are generally acknowledged as an important aid to produce high quality code, as they provide quick feedback to developers on the correctness of their code. In order to achieve high quality, well-maintained tests are needed. Ideally, tests co-evolve with the production code to test changes as soon as possible. In this paper, we explore an approach based on association rule mining to determine whether production and test code co-evolve synchronously. Through two case studies, one with an open source and another one with an industrial software system, we show that our association rule mining approach allows one to assess the co-evolution of product and test code in a software project and, moreover, to uncover the distribution of programmer effort over pure coding, pure testing, or a more test-driven-like practice.}, keywords = {association rules, checkstyle, source code, unit test}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069493}, attachments = {https://flosshub.org/sites/flosshub.org/files/151UsingAssociation.pdf}, author = {Lubsen, Zeeger and Zaidman, Andy and Pinzger, Martin} } @conference {940, title = {Using Latent Dirichlet Allocation for automatic categorization of software}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {163 - 166}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {In this paper, we propose a technique called LACT for automatically categorizing software systems in open-source repositories. LACT is based on latent Dirichlet Allocation, an information retrieval method which is used to index and analyze source code documents as mixtures of probabilistic topics. For an initial evaluation, we performed two studies. In the first study, LACT was compared against an existing tool, MUDABlue, for classifying 41 software systems written in C into problem domain categories. The results indicate that LACT can automatically produce meaningful category names and yield classification results comparable to MUDABlue. In the second study, we applied LACT to 43 software systems written in different programming languages such as C/C++, Java, C$\#$, PHP, and Perl. The results indicate that LACT can be used effectively for the automatic categorization of software systems regardless of the underlying programming language or paradigm. Moreover, both studies indicate that LACT can identify several new categories that are based on libraries, architectures, or programming languages, which is a promising improvement as compared to manual categorization and existing techniques.}, keywords = {categorization, category mining, lact, mudablue, multiple languages, repository}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069496}, attachments = {https://flosshub.org/sites/flosshub.org/files/163MSR2009_TianPos.pdf}, author = {Tian, Kai and Revelle, Meghan and Poshyvanyk, Denys} } @conference {851, title = {Wikipedians are born, not made: a study of power editors on Wikipedia}, booktitle = {ACM 2009 International Conference on Group Work}, year = {2009}, month = {05/10/2009}, pages = {51-60}, publisher = {Association for Computing Machinery}, organization = {Association for Computing Machinery}, address = {Sanibel Island, FL}, keywords = {computer-supported, cooperative, interaction, web-based, work,}, author = {Panciera, K. and Halfaker, A. and Terveen, L.} } @conference {Hill:2008:AAM:1370750.1370771, title = {AMAP: automatically mining abbreviation expansions in programs to enhance software maintenance tools}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {79{\textendash}88}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {When writing software, developers often employ abbreviations in identifier names. In fact, some abbreviations may never occur with the expanded word, or occur more often in the code. However, most existing program comprehension and search tools do little to address the problem of abbreviations, and therefore may miss meaningful pieces of code or relationships between software artifacts. In this paper, we present an automated approach to mining abbreviation expansions from source code to enhance software maintenance tools that utilize natural language information. Our scoped approach uses contextual information at the method, program, and general software level to automatically select the most appropriate expansion for a given abbreviation. We evaluated our approach on a set of 250 potential abbreviations and found that our scoped approach provides a 57\% improvement in accuracy over the current state of the art.}, keywords = {automatic abbreviation expansion, azureus, itext.net, liferay, maintenance, natural language, openoffice.org, program comprehension, source code, tiger envelopes, tools}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370771}, url = {http://doi.acm.org/10.1145/1370750.1370771}, attachments = {https://flosshub.org/sites/flosshub.org/files/p79-hill.pdf}, author = {Hill, Emily and Fry, Zachary P. and Boyd, Haley and Sridhara, Giriprasad and Novikova, Yana and Pollock, Lori and Vijay-Shanker, K.} } @conference {972, title = {Extracting structural information from bug reports}, booktitle = {Proceedings of the 2008 international workshop on Mining software repositories - MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {27-30}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {In software engineering experiments, the description of bug reports is typically treated as natural language text, although it often contains stack traces, source code, and patches. Neglecting such structural elements is a loss of valuable information; structure usually leads to a better performance of machine learning approaches. In this paper, we present a tool called infoZilla that detects structural elements from bug reports with near perfect accuracy and allows us to extract them. We anticipate that infoZilla can be used to leverage data from bug reports at a different granularity level that can facilitate interesting research in the future.}, keywords = {bug reports, eclipse, enumerations, infozilla, natural language, patches, source code, stack trace}, isbn = {9781605580241}, doi = {10.1145/1370750.1370757}, attachments = {https://flosshub.org/sites/flosshub.org/files/p27-bettenburg.pdf}, author = {Premraj, Rahul and Zimmermann, Thomas and Kim, Sunghun and Bettenburg, Nicolas} } @conference {Parnin:2008:ICD:1370750.1370765, title = {Improving change descriptions with change contexts}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {51{\textendash}60}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software archives are one of the best sources available to researchers for understanding the software development process. However, much detective work is still necessary in order to unravel the software development story. During this process, researchers must isolate changes and follow their trails over time. In support of this analysis, several research tools have provided different representations for connecting the many changes extracted from software archives. Most of these tools are based on textual analysis of source code and use line-based differencing between software versions. This approach limits the ability to process changes structurally resulting in less concise and comparable items. Adoption of structure-based approaches have been hampered by complex implementations and overly verbose change descriptions. We present a technique for expressing changes that is fine-grained but preserves some structural aspects. The structural information itself may not have changed, but instead provides a context for interpreting the change. This in turn, enables more relevant and concise descriptions in terms of software types and programming activities. We apply our technique to common challenges that researchers face, and then we discuss and compare our results with other techniques.}, keywords = {bytecode analysis, cecil, change management, change pairs, semantic diff, zedgraph}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370765}, url = {http://doi.acm.org/10.1145/1370750.1370765}, attachments = {https://flosshub.org/sites/flosshub.org/files/p51-parnin.pdf}, author = {Parnin, Chris and G{\"o}rg, Carsten} } @article {68, title = {JADE: A software framework for developing multi-agent applications. Lessons learned}, journal = {Information and Software Technology}, volume = {50}, year = {2008}, note = {Times Cited: 0}, pages = {10-21}, abstract = {Since a number of years agent technology is considered one of the most innovative technologies for the development of distributed software systems. While not yet a mainstream approach in software engineering at large, a lot of work on agent technology has been done, many research results and applications have been presented, and some software products exists which have moved from the research community to the industrial community. One of these is JADE, a software framework that facilitates development of interoperable intelligent multi-agent systems and that is distributed under an Open Source License. JADE is a very mature product, used by a heterogeneous community of users both in research activities and in industrial applications. This paper presents JADE and its technological components together with a discussion of the possible reasons for its success and lessons learned from the somewhat detached perspective possible nine years after its inception. (c) 2007 Elsevier B.V. All rights reserved.}, author = {Bellifemine, F. and Caire, G. and Poggi, A. and Rimassa, G.} } @conference {bird2008lss, title = {{Latent Social Structure in Open Source Projects}}, booktitle = {SIGSOFT {\textquoteright}08/FSE-16: Proceedings of the 16th ACM SIGSOFT Symposium on Foundations of Software Engineering}, year = {2008}, note = {We first identified the projects of interest and mined the developer mailing list archives and source code repositories of each of the projects. Next, we filtered the mailing list messages and created a social network of the participants over 3-month intervals. We then calculated the community structure of each social network. Following that, the relevance of the divisions of participants was evaluated quantitatively using mined source code development data and qualitatively by manual methods.}, pages = {24{\textendash}35}, publisher = {ACM}, organization = {ACM}, abstract = {Commercial software project managers design project organizational structure carefully, mindful of available skills, division of labour, geographical boundaries, etc. These organizational {\textquotedblleft}cathedrals{\textquotedblright} are to be contrasted with the "bazaar-like" nature of Open Source Software (OSS) Projects, which have no pre-designed organizational structure. Any structure that exists is dynamic, self-organizing, latent, and usually not explicitly stated. Still, in large, complex, successful, OSS projects, we do expect that subcommunities will form spontaneously within the developer teams. Studying these subcommunities, and their behavior can shed light on how successful OSS projects self-organize. This phenomenon could well hold important lessons for how commercial software teams might be organized. Building on known well-established techniques for detecting community structure in complex networks, we extract and study latent subcommunities from the email social network of several projects: Apache HTTPD, Python, PostgresSQL, Perl, and Apache ANT. We then validate them with software development activity history. Our results show that subcommunities do indeed spontaneously arise within these projects as the projects evolve. These subcommunities manifest most strongly in technical discussions, and are significantly connected with collaboration behaviour.}, attachments = {https://flosshub.org/sites/flosshub.org/files/bird2008lss.pdf}, author = {Christian Bird and David Pattison and Raissa D{\textquoteright}Souza and Filkov, Vladimir and Devanbu, Premkumar} } @conference {553, title = {The Networked Forge: New Environments for Libre Software Development}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {299 - 306}, publisher = {Springer}, organization = {Springer}, chapter = {26}, abstract = {Libre (free, open source) software forges (sites hosting the development infrastructure for a collection of projects) have been stable in architecture, services and concept since they become popular during the late 1990s. During this time several problems that cannot be solved without dramatic design changes have become evident. To overcome them, we propose a new concept, the {\textquotedblleft}networked forge{\textquotedblright}, focused on addressing the core characteristics of libre software development and the needs of developers. The key of this proposal is to re-engineer forges as a net of distributed components which can be composed and configured according to the needs of users, using a combination of web 2.0, semantic web and mashup technologies. This approach is flexible enough to accommodate different development processes, while at the same time interoperates with current facilities. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_26}, attachments = {https://flosshub.org/sites/flosshub.org/files/Networked\%20Forge.pdf}, author = {Gonzalez-Barahona, Jesus and Mart{\'\i}nez, Andr{\'e}s and Polo, Alvaro and Hierro, Juan and Reyes, Marcos and Soriano, Javier and Fern{\'a}ndez, Rafael} } @conference {568, title = {Open Source Environments for Collaborative Experiments in e-Science}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {415 - 416}, publisher = {Springer}, organization = {Springer}, chapter = {41}, abstract = {Open Source Software (OSS) for e-Science should make reference to the paradigm of a distributed surrounding over a multi system mix of Web Services and Grid technologies, allowing data exchanging through services, according to standards in the area of the Grid and of Service Oriented Computing (SOC). In fact, biologists, medical doctors, and scientists are often involved in time consuming experiments and are aware of the degree of difficulty in validating or rejecting a given hypothesis by lab experiments. The benefits of OSS for e-Science consider that as many operating nodes as possible can work cooperatively sharing data, resources, and software, thus avoiding the bottleneck of licenses for distributed use of tools needed to perform cooperative scientific experiments. In particular, this chapter presents an architecture based on nodes equipped with a Grid and with Web Services in order to access OSS, showing how scientific experiments can be enacted through the use of a cooperation among OSS sites. Such a choice, besides reducing the cost of the experiments, would support distributed introduction of OSS among other actors of the dynamical networks, thus supporting the awareness about OSS and their diffusion. An OSS environment for cooperative scientific experiments (e-experiments) can effectively support the distributed execution of different classes of experiments, from visualization to model identification through clustering and rules generation, in various application fields, such as bioinformatics, neuro-informatics, tele-monitoring,or drug discovery. By applying Web Services and Grid computing, an experiment or a simulation can be executed in a cooperative way on various computation nodes of a network equipped with OSS, allowing data exchange among researchers. Our environment formalizes experiments as cooperative services on various computational nodes of a grid network. Basic elements are models, languages, and support tools creating a virtual network of organizational responsibility of the global experiments, according to rules under which each node can execute local services to be accessed by other nodes in order to achieve the whole experiment{\textquoteright}s results. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_41}, attachments = {https://flosshub.org/sites/flosshub.org/files/Open\%20Source\%20Environments.pdf}, author = {Bosin, Andrea and Dess{\'\i}, Nicoletta and Fugini, Maria and Liberati, Diego and Pes, Barbara} } @conference {565, title = {Open Source Reference Systems for Biometric Verification of Identity}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {397 - 404}, publisher = {Springer}, organization = {Springer}, chapter = {38}, abstract = {This paper focuses on the common evaluation framework which was developed by the BioSecure Network of Excellence during the European FP6 project BioSecure (Biometrics for Secure authentication). This framework, which is composed of open-source reference systems, publicly available databases, assessment protocols and benchmarking results, introduces a new experimental methodology for conducting, reporting and comparing experiments for biometric systems, participating to standardisation efforts. Its use will permit to make a link between different published works. It provides also the necessary tools to assure the reproducibility of the benchmarking biometric experiments. This framework can be considered as a re-liable and innovative process to evaluate the progress of research in the field of bio-metrics. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_38}, attachments = {https://flosshub.org/sites/flosshub.org/files/Open\%20Source\%20Reference\%20Systems.pdf}, author = {Mayoue, Aur{\'e}lien and Petrovska-Delacr{\'e}taz, Dijana} } @conference {531, title = {Overview on Trust in Large FLOSS Communities}, booktitle = {OSS2008: Open Source Development, Communities and Quality (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, volume = {275/2008}, year = {2008}, month = {2008///}, pages = {47 - 56}, publisher = {Springer}, organization = {Springer}, chapter = {4}, abstract = {The paper presents a survey of mature Free/Libre Open Source Software communities. The main focus of the survey is the collection of data related to the practices of these communities related to trust elements in their products. The survey is carried out using a structured questionnaire about thoughts and practices followed by Free/Libre Open Source Software communities. The survey focuses on the analysis of the development processes adopted by such communities. The results of the survey confirms basic ideas related to Free/Libre Open Source Software and explains in more detail specific issues related to trust and trustworthiness of the Free/Libre Open Source Software development process. }, issn = {978-0-387-09683-4}, doi = {http://dx.doi.org/10.1007/978-0-387-09684-1_4}, attachments = {https://flosshub.org/sites/flosshub.org/files/Overview\%20on\%20Trust.pdf}, author = {Petrinja, Etiel and Sillitti, Alberto and Succi, Giancarlo} } @conference {Pattison:2008:TWP:1370750.1370776, title = {Talk and work: a preliminary report}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {113{\textendash}116}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Developers in Open Source Software (OSS) projects communicate using mailing lists. By convention, the mailing lists used only for task-related discussions, so they are primarily concerned with the software under development, and software process issues (releases, etc.). We focus on the discussions concerning the software, and study the frequency with which software entities (functions, methods, classes, etc) are mentioned in the mail. We find a strong, striking, cumulative relationship between this mention count in the email, and the number of times these entities are included in changes to the software. When we study the same phenomena over a series of time-intervals, the relationship is much less strong. This suggests some interesting avenues for future research.}, keywords = {ant, apache, email, mailing lists, postgresql, python, scm, source code}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370776}, url = {http://doi.acm.org/10.1145/1370750.1370776}, attachments = {https://flosshub.org/sites/flosshub.org/files/p113-pattison.pdf}, author = {Pattison, David S. and Bird, Christian A. and Premkumar T. Devanbu} } @conference {Pohl:2008:DNM:1370114.1370135, title = {What dynamic network metrics can tell us about developer roles}, booktitle = {Proceedings of the 2008 international workshop on Cooperative and human aspects of software engineering (CHASE {\textquoteright}08)}, series = {CHASE {\textquoteright}08}, year = {2008}, note = {paper d/l from http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.217.4765 this particular paper focuses on building the SNA using co-authorship of files in Tomcat.}, pages = {81{\textendash}84}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software development is heavily dependent on the participants of the process and their roles within the process. Each developer has his specific skills and interests and hence contributes to the project in a different way. While some programmers work on separate modules, others developers integrate these modules towards the final product. To identify such different groups of people one approach is to work with methods taken from social network analysis. To this end, a social network has to be defined in a suitable way, and appropriate analysis strategies have to be chosen. This paper shows how a network of software developers could be defined based on information in a software repository, and what it can possibly tell about roles of developers (and what not) in the process of the application server Tomcat.}, keywords = {identifying roles, social network analysis}, isbn = {978-1-60558-039-5}, doi = {10.1145/1370114.1370135}, url = {http://doi.acm.org/10.1145/1370114.1370135}, attachments = {https://flosshub.org/sites/flosshub.org/files/10.1.1.217.4765.pdf}, author = {Pohl, Mathias and Diehl, Stephan} } @conference {673, title = {Can Knowledge Management be Open Source?}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {59 - 70}, publisher = {Springer}, organization = {Springer}, chapter = {5}, abstract = {As we move further into a knowledge economy where collaboration and innovation are increasingly central to organisational effectiveness, enterprises need to pay more attention to the informal networks that exist within the organisation. Wikis may provide a more appropriate knowledge management capability and environment to capture tacit knowledge. Where traditional organisational cultures see that knowledge management must be tightly protected, Wikis opt for an open source approach where knowledge is shared and distributed for innovation to continue. This paper aims to explicate more participatory organisational processes of creation, accumulation and maintenance of knowledge. It uses Activity Theory as a framework to describe the components of an activity system where a Wiki is a tool mediating employee-based knowledge management activities and thereby democratising organisational knowledge. }, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_5}, attachments = {https://flosshub.org/sites/flosshub.org/files/Can\%20Knowledge\%20Management.pdf}, author = {Pfaff, Charmaine and Hasan, Helen} } @conference {648, title = {Context-Dependent Evaluation Methodology for Open Source Software}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {301 - 306}, publisher = {Springer}, organization = {Springer}, chapter = {32}, abstract = {Many evaluation methodologies have been proposed to mitigate the risks of choosing Open Source Software as an effective solution to an enterprise{\textquoteright}s problem. This work extracts the shared traits from the most important and widely known evaluation models, and re-applies them to create a new methodology. This methodology has been designed both to be used for the creation of a common knowledge base, and to be specialized for application in the context of the particular breed of small and medium-size enterprises found on the Italian ground. }, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_32}, attachments = {https://flosshub.org/sites/flosshub.org/files/Context-Dependent\%20Evaluation.pdf}, author = {Cabano, Michele and Monti, Cesare and Piancastelli, Giulio} } @conference {882, title = {Creating, Destroying, and Restoring Value in Wikipedia}, booktitle = {Conference on Supporting Group Work}, year = {2007}, author = {Reid Priedhorsky and Jilin Chen and Shyong K. Lam and Katherine Panciera and Loren Terveen and John Riedl} } @conference {633, title = {EDOS Distribution System: a P2P architecture for open-source content dissemination}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {209 - 215}, publisher = {Springer}, organization = {Springer}, chapter = {17}, abstract = {The open-source software communities currently face an increasing complexity of managing the software content among theirs developers and contributors. This is mainly due to the continuously growing size of the software, the high frequency of the updates, and the heterogeneity of the participants. We propose a distribution system that tackles two main issues in the software content management: efficient content dissemination through a P2P system architecture, and advanced information system capabilities, using a distributed index for resource location. }, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_17}, attachments = {https://flosshub.org/sites/flosshub.org/files/EDOS\%20Distribution\%20System.pdf}, author = {Abiteboul, Serge and Dar, Itay and Pop, Radu and Vasile, Gabriel and Vodislav, Dan} } @article {76, title = {From planning to mature: On the success of open source projects}, journal = {Research Policy}, volume = {36}, year = {2007}, note = {Times Cited: 0}, pages = {1575-1586}, abstract = {Open source is an example of user-centric innovation initiated by an individual or group of users to satisfy their specific needs; the more a software evolves towards a stable release able to address the requirements of its developers, the more successful the project. In this paper we use a large data-set obtained from SourceForge.net to estimate the relationship between a series of different characteristics of OS projects and the probability of evolution of the source code from a preliminary release to a mature version. We show that while projects distributed under highly restrictive licensing terms (GPL) have a significantly smaller probability of reaching a stable release, applications towards sophisticated users have a larger probability of evolving in the development status. Interestingly, we find that the size of the "community of developers" increases the chances of progress but this effect decreases as the community gets larger, a signal of possible coordination problems. (C) 2007 Elsevier B.V. All rights reserved.}, author = {Stefano Comino and Fabio M. Manenti and Parisi, M. L.} } @conference {991, title = {How Long Will It Take to Fix This Bug?}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {1}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Predicting the time and effort for a software problem has long been a difficult task. We present an approach that automatically predicts the fixing effort, i.e., the person-hours spent on fixing an issue. Our technique leverages existing issue tracking systems: given a new issue report, we use the Lucene framework to search for similar, earlier reports and use their average time as a prediction. Our approach thus allows for early effort estimation, helping in assigning issues and scheduling stable releases. We evaluated our approach using effort data from the JBoss project. Given a sufficient number of issues reports, our automatic predictions are close to the actual effort; for issues that are bugs, we are off by only one hour, beating naive predictions by a factor of four.}, keywords = {bug fixing, bug reports, effort estimation, jboss, lucene, prediction, time}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.13}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300001.pdf}, author = {Weiss, Cathrin and Premraj, Rahul and Zimmermann, Thomas and Zeller, Andreas} } @conference {646, title = {Learning Through Practical Involvement in the OSS Ecosystem: Experiences from a Masters Assignment}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {289 - 294}, publisher = {Springer}, organization = {Springer}, chapter = {30}, abstract = {Increased awareness of and interest in Open Source has led to a number of university teaching initiatives, at both national and European level. In this paper we present experiences from a practical assignment designed to give students on an Open Source Masters course an insight into real involvement in Open Source projects. It discusses the motivations for the assignment, and how it was set up and executed. It reports on post facto student feedback, and reflects on a parallel, reduced exercise offered at undergraduate level. We find that the learning experience was both positive and valuable in that it gave real insight into Open Source participation, and also encouraged further participation in Open Source projects by students after the course had completed. }, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_30}, attachments = {https://flosshub.org/sites/flosshub.org/files/Learning\%20through\%20practical\%20involvement.pdf}, author = {Lundell, Bj{\"o}rn and Persson, Anna and Lings, Brian} } @conference {657, title = {Legal issues for free and open source software in government}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {353 - 354}, publisher = {Springer}, organization = {Springer}, chapter = {41}, abstract = {As more governments begin to adopt and release free and open source software, it is important to be able to readily recognise and identify the associated legal risks and potential liabilities. This panel will examine and discuss the most common of these associated risks and liabilities. These issues include, but are not limited to: indemnities against claims of intellectual property infringement from third parties requirements of consumer protection and antitrust legislation obligations to redistribute source, and when they arise enforceability of free software licences layering and combining of licences dual-licensing licence incompatibility software patent liability contemporary developments in software licensing }, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_41}, attachments = {https://flosshub.org/sites/flosshub.org/files/Legal\%20Issues\%20for\%20Free\%20and\%20OSS.pdf}, author = {Nic Suzor and Fitzgerald, Brian and Perry, Mark} } @article {flosswp328, title = {Open Source and the software industry. How firms do business out of an open innovation paradigm}, year = {2007}, month = {January}, abstract = {Open Source Software (OSS) represents an ???open innovation??? paradigm based on knowledge produced and shared by developers and users. The paper inquires how OSS challenges the three Teece???s building blocks. New findings from a large survey of European software companies, show that within the OSS paradigm: (i) OSS can be a sustainable business model even in the absence of any appropriability; (ii) complementary assets are distributed collectively and made widely available without the need for dedicated contractual arrangements; (iii) a de facto dominant design may stem from a community of users/producers even independently of the presence of powerful large companies.}, attachments = {https://flosshub.org/sites/flosshub.org/files/paper_euram_2007.pdf}, author = {Andrea Bonaccorsi and Monica Merito and Rossi Cristina and Lucia Piscitello} } @conference {1014, title = {Predicting Eclipse Bug Lifetimes}, booktitle = {Fourth International Workshop on Mining Software RepositoriesFourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {29 - 29}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {In non-trivial software development projects planning and allocation of resources is an important and difficult task. Estimation of work time to fix a bug is commonly used to support this process. This research explores the viability of using data mining tools to predict the time to fix a bug given only the basic information known at the beginning of a bug{\textquoteright}s lifetime. To address this question, a historical portion of the Eclipse Bugzilla database is used for modeling and predicting bug lifetimes. A bug history transformation process is described and several data mining models are built and tested. Interesting behaviours derived from the models are documented. The models can correctly predict up to 34.9\% of the bugs into a discretized log scaled lifetime class.}, keywords = {bug fixing, bugzilla, classification, eclipse, effort estimation, mining challenge, msr challenge, prediction, weka}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.25}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300029.pdf}, author = {Panjer, Lucas D.} } @conference {647, title = {Release Management in Free Software Projects: Practices and Problems}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, month = {2007///}, pages = {295 - 300}, publisher = {Springer}, organization = {Springer}, chapter = {31}, abstract = {Release management plays an important role in every software project since it is concerned with the delivery of a high quality product to end-users. This paper explores release practices employed by volunteer free software projects and shows problems that occur. A challenge that has been identified is the difficulty of coordinating a distributed team of volunteers in order to align their work for a release. }, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_31}, attachments = {https://flosshub.org/sites/flosshub.org/files/Release\%20Management\%20in\%20Free\%20Software.pdf}, author = {Martin Michlmayr and Hunt, Francis and Probert, David} } @conference {1724, title = {On Understanding How to Introduce an Innovation to an Open Source Project}, booktitle = {First International Workshop on Emerging Trends in FLOSS Research and Development (FLOSS{\textquoteright}07: ICSE Workshops 2007)First International Workshop on Emerging Trends in FLOSS Research and Development (FLOSS{\textquoteright}07: ICSE Workshops 2007)}, year = {2007}, pages = {12 - 12}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {We propose to research the introduction of Software Engineering inventions into Open Source projects (1) to help researchers with creating opportunities for evaluating their tools, methods and process designs in real-life settings, and (2) to help Open Source projects with improving their processes based on state-of-the-art knowledge. Such research will go beyond diffusion and dissemination of inventions to active introduction, and thus increase the chances of adoption. We will discuss the research approach, our preliminary insights, limitations of the approach, and why researchers interested in evaluating their own inventions should be interested in this research. }, isbn = {0-7695-2961-5}, doi = {10.1109/FLOSS.2007.11}, author = {Oezbek, Christopher and Prechelt, Lutz} } @conference {998, title = {Visual Data Mining in Software Archives to Detect How Developers Work Together}, booktitle = {Fourth International Workshop on Mining Software RepositoriesFourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {9 - 9}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Analyzing the check-in information of open source software projects which use a version control system such as CVS or SUBVERSION can yield interesting and important insights into the programming behavior of developers. As in every major project tasks are assigned to many developers, the development must be coordinated between these programmers. This paper describes three visualization techniques that help to examine how programmers work together, e.g. if they work as a team or if they develop their part of the software separate from each other. Furthermore, phases of stagnation in the lifetime of a project can be uncovered and thus, possible problems are revealed. To demonstrate the usefulness of these visualization techniques we performed case studies on two open source projects. In these studies interesting patterns of developers? behavior, e.g. the specialization on a certain module can be observed. Moreover, modules that have been changed by many developers can be identified as well as such ones that have been altered by only one programmer.}, keywords = {change, coordination, cvs, developers, junit, modules, scm, source code, svn, teams, tomcat, visualization}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.34}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300009.pdf}, author = {Weissgerber, Peter and Pohl, Mathias and Burch, Michael} } @conference {684, title = {Comparing macro development for personal productivity tools: an experience in validating accessibility of Talking Books}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {247 - 252}, publisher = {Springer}, organization = {Springer}, abstract = {We describe an experience in developing macros for both Power Point and Impress, to be used in accessibility validation for educational multimedia (Talking Books) designed for visually impaired people. Minor disadvantages in the use of Impress are outlined, which however do not constitute a serious obstacle to adoption of Open Source tools for our purposes. }, doi = {http://dx.doi.org/10.1007/0-387-34226-5_24}, attachments = {https://flosshub.org/sites/flosshub.org/files/Comparing\%20macro\%20development.pdf}, author = {Dodero, Gabriella and Lupi, Katia and Piffero, Erika} } @conference {Sager:2006:DSJ:1137983.1138000, title = {Detecting similar Java classes using tree algorithms}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {65{\textendash}71}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Similarity analysis of source code is helpful during development to provide, for instance, better support for code reuse. Consider a development environment that analyzes code while typing and that suggests similar code examples or existing implementations from a source code repository. Mining software repositories by means of similarity measures enables and enforces reusing existing code and reduces the developing effort needed by creating a shared knowledge base of code fragments. In information retrieval similarity measures are often used to find documents similar to a given query document. This paper extends this idea to source code repositories. It introduces our approach to detect similar Java classes in software projects using tree similarity algorithms. We show how our approach allows to find similar Java classes based on an evaluation of three tree-based similarity measures in the context of five user-defined test cases as well as a preliminary software evolution analysis of a medium-sized Java project. Initial results of our technique indicate that it (1) is indeed useful to identify similar Java classes, (2)successfully identifies the ex ante and ex post versions of refactored classes, and (3) provides some interesting insights into within-version and between-version dependencies of classes within a Java project.}, keywords = {change analysis, clones, coogle, eclipse, famix, java, similarity, software evolution, software repositories, source code, tree similarity measures}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138000}, url = {http://doi.acm.org/10.1145/1137983.1138000}, attachments = {https://flosshub.org/sites/flosshub.org/files/65Detecting.pdf}, author = {Sager, Tobias and Bernstein, Abraham and Pinzger, Martin and Kiefer, Christoph} } @conference {692, title = {How is it possible to profit from innovation in the absence of any appropriability?}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {203/2006}, year = {2006}, month = {2006///}, pages = {333 - 334}, publisher = {Springer}, organization = {Springer}, abstract = {Open Source Software (OSS) represents an {\textquotedblleft}open innovation{\textquotedblright} paradigm based on knowledge produced and shared by developers and users. New findings from a large survey of European software companies show that: (i) the OSS business model is currently involving almost one third of the industry, although with different intensity; (ii) compared with pure proprietary software producers, OSS firms have a broader product portfolio and are more diversified; moreover, (iii) OSS firms provide more complementary services to their customers; (iv) over time OSS firms increase the share of OS turnover out of the total turnover, becoming more and more OSS oriented; (v) both NOSS and OSS firms do not consider appropriability as a crucial requirement for innovation and do not consider the lack of appropriability as an obstacle to profitability. }, issn = {978-0-387-34225-2}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_33}, attachments = {https://flosshub.org/sites/flosshub.org/files/How\%20is\%20it\%20possible\%20to\%20profit.pdf}, author = {Andrea Bonaccorsi and Lucia Piscitello and Monica Merito and Cristina Rossi} } @conference {Xie:2006:MMA:1137983.1137997, title = {MAPO: mining API usages from open source repositories}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {54{\textendash}57}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {To improve software productivity, when constructing new software systems, developers often reuse existing class libraries or frameworks by invoking their APIs. Those APIs, however, are often complex and not well documented, posing barriers for developers to use them in new client code. To get familiar with how those APIs are used, developers may search the Web using a general search engine to find relevant documents or code examples. Developers can also use a source code search engine to search open source repositories for source files that use the same APIs. Nevertheless, the number of returned source files is often large. It is difficult for developers to learn API usages from a large number of returned results. In order to help developers understand API usages and write API client code more effectively, we have developed an API usage mining framework and its supporting tool called MAPO (for Mining API usages from Open source repositories). Given a query that describes a method, class, or package for an API, MAPO leverages the existing source code search engines to gather relevant source files and conducts data mining. The mining leads to a short list of frequent API usages for developers to inspect. MAPO currently consists of five components: a code search engine, a source code analyzer, a sequence preprocessor, a frequent sequence miner, and a frequent sequence post processor. We have examined the effectiveness of MAPO using a set of various queries. The preliminary results show that the framework is practical for providing informative and succinct API usage patterns.}, keywords = {api, application programming interfaces, documentation, mining software repositories, pmd, program comprehension, search engine, sequences, source code, source code search engine}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137997}, url = {http://doi.acm.org/10.1145/1137983.1137997}, attachments = {https://flosshub.org/sites/flosshub.org/files/54MAPO.pdf}, author = {Xie, Tao and Pei, Jian} } @conference {Kim:2006:MPE:1137983.1137995, title = {Micro pattern evolution}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {40{\textendash}46}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {When analyzing the evolution history of a software project, we wish to develop results that generalize across projects. One approach is to analyze design patterns, permitting characteristics of the evolution to be associated with patterns, instead of source code. Traditional design patterns are generally not amenable to reliable automatic extraction from source code, yet automation is crucial for scalable evolution analysis. Instead, we analyze {\textquotedblleft}micro pattern{\textquotedblright} evolution; patterns whose abstraction level is closer to source code, and designed to be automatically extractable from Java source code or bytecode. We perform micro-pattern evolution analysis on three open source projects, ArgoUML, Columba, and jEdit to identify micro pattern frequencies, common kinds of pattern evolution, and bug-prone patterns. In all analyzed projects, we found that the micro patterns of Java classes do not change often. Common bug- prone pattern evolution kinds are {\textquoteleft}Pool {\textrightarrow} Pool{\textquoteright}, {\textquoteleft}Implementor {\textrightarrow} NONE{\textquoteright}, and {\textquoteleft}Sampler {\textrightarrow} Sampler{\textquoteright}. Among all pattern evolution kinds,{\textquoteleft}Box{\textquoteright},{\textquoteleft}CompoundBox{\textquoteright}, {\textquoteleft}Pool{\textquoteright}, {\textquoteleft}CommonState{\textquoteright}, and {\textquoteleft}Outline{\textquoteright} micro patterns have high bug rates, but they have low frequencies and a small number of changes. The pattern evolution kinds that are bug-prone are somewhat similar across projects. The bug-prone pattern evolution kinds of two different periods of the same project are almost identical.}, keywords = {argouml, bugs, columba, design patterns, evolution, extraction, java, jedit, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137995}, url = {http://doi.acm.org/10.1145/1137983.1137995}, attachments = {https://flosshub.org/sites/flosshub.org/files/40MicroPattern.pdf}, author = {Kim, Sunghun and Pan, Kai and Whitehead,Jr., E. James} } @conference {701, title = {Open Source Software Development (OSSD) Based On Software Engineering}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {203/2006}, year = {2006}, month = {2006///}, pages = {345 - 346}, publisher = {Springer}, organization = {Springer}, abstract = {With the advent of Open Source Software (OSS) at the end of last century, many proponents believe that OSS is a new software development process and some even advocate OSS as a revolution for software engineering. The Cathedral and the Bazaar is a typical metaphor of the software development methodologies for the Closed Source Software (CSS) and the OSS. By comparing the phased (namely, requirement analysis, document design and system design, coding, testing and maintenance) software development methodology proposed by Software Engineering (SE), and by studying the management tools provided by SourceForge.net, we believe OSS development method not only follows the phased software development process, but also in return enriches the theory of SE. }, issn = {978-0-387-34225-2}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_39}, attachments = {https://flosshub.org/sites/flosshub.org/files/Open\%20Source\%20Software\%20Development\%20\%28OSSD\%29.pdf}, author = {Zhu, Dengya and Potdar, Vidyasagar and Chang, Elizabeth} } @conference {Knab:2006:PDD:1137983.1138012, title = {Predicting defect densities in source code files with decision tree learners}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {119{\textendash}125}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {With the advent of open source software repositories the data available for defect prediction in source files increased tremendously. Although traditional statistics turned out to derive reasonable results the sheer amount of data and the problem context of defect prediction demand sophisticated analysis such as provided by current data mining and machine learning techniques.In this work we focus on defect density prediction and present an approach that applies a decision tree learner on evolution data extracted from the Mozilla open source web browser project. The evolution data includes different source code, modification, and defect measures computed from seven recent Mozilla releases. Among the modification measures we also take into account the change coupling, a measure for the number of change-dependencies between source files. The main reason for choosing decision tree learners, instead of for example neural nets, was the goal of finding underlying rules which can be easily interpreted by humans. To find these rules, we set up a number of experiments to test common hypotheses regarding defects in software entities. Our experiments showed, that a simple tree learner can produce good results with various sets of input data.}, keywords = {change analysis, data mining, decision tree learner, defect density, defect prediction, mozilla, prediction, release history, scm, source code, version control}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138012}, url = {http://doi.acm.org/10.1145/1137983.1138012}, attachments = {https://flosshub.org/sites/flosshub.org/files/119Predicting.pdf}, author = {Knab, Patrick and Pinzger, Martin and Bernstein, Abraham} } @conference {1229, title = {Regurgitate: Using GIT For F/LOSS Data Collection}, booktitle = {1st Workshop on Public Data about Software Development (WoPDaSD 2006)}, year = {2006}, abstract = {We have created a new tool, regurgitate, for importing CVS repositories into the GIT source code management system. Important features of GIT include great expressiveness in capturing relationships between revisions and across files as well as extremely high-speed processing. These features make GIT an ideal platform for gathering detailed longitudinal metrics for open source projects. The availability of regurgitate facilitates using GIT as an analysis tool for that majority of open source projects that keep their repositories in CVS. In particular, GIT is fast enough that it is practical to replay the entire development history of a project commit-at-a-time, collecting metrics at each step. We demonstrate this process for a simple metric and a collection of benchmark F/LOSS repositories.}, keywords = {cvs, cvsanaly, git, history, promise, regurgitate, scm}, attachments = {https://flosshub.org/sites/flosshub.org/files/massey.pdf}, author = {Bart Massey and Keith Packard} } @article {flosswp351, title = {Skills, Division of Labor and Performance in Collective Inventions. Evidence from the Open Source Software}, year = {2006}, month = {July}, abstract = {This paper investigates the role of skills and the division of labor among participants in collective inventions. Our analysis draws on a large sample of projects registered at Sourceforge.net, the world{\textquoteright}s largest incubator of open source software activity. We test the hypothesis that the level of skills of participants and their skill variety are important for project performance. Skill heterogeneity across participants is in line with two fundamental organizational features of the open source development model: team work and modular design. We also explore the hypothesis whether the level of modularization of project activities is an important predictor of performance. Our econometric estimations show that both skill level and skill heterogeneity positively affect projects{\textquoteright} survival and performances. However, the impact of skill diversity is non linear. Design modularity is also positively associated with the performance of the project.}, attachments = {https://flosshub.org/sites/flosshub.org/files/giuriplonerrullanitorrisi.pdf}, author = {Paola Giuri and Matteo Ploner and Francesco Rullani and Salvatore Torrisi} } @inbook {896, title = {Understanding Free/Open Source Software Evolution}, booktitle = {Software Evolution and Feedback: Theory and Practice}, year = {2006}, pages = {181-206}, publisher = {John Wiley and Sons Inc.}, organization = {John Wiley and Sons Inc.}, address = {New York}, author = {Walt Scacchi}, editor = {Madhavji, N.H. and Lehman, M.M. and Ramil, J.F. and Perry, D.} } @article {893, title = {User Choices and Regret: Understanding Users{\textquoteright} Decision Process bout Consensually Acquired Spyware}, journal = {I/S: A Journal of Law and Policy for the Information Society}, volume = {2}, number = {2}, year = {2006}, month = {01/2006}, author = {N. Good and J. Grossklags and Konstan, J.A. and D. Mulligan and A. Perzanowski and D. Thaw} } @conference {729, title = {Communication, coordination and control in distributed development: an OSS case study}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {88-92}, abstract = {It has been claimed that distributed development practices in OSS development may be a model for enterprise development practices of the future. With this in mind, we have conducted a study of one OSS project, namely ArgoUML, with a view to understanding development practice within the project, and specifically to considering possible differences from traditional (non-OSS) distributed development conducted in a commercial project. We do this by explicitly considering issues of communication, coordination and control. Our findings suggest that primary differences lie in control and resulting project structures, motivated through differing goals. We comment on the open question of how the advantages of one development context can be realised in the other.}, url = {http://pascal.case.unibz.it/handle/2038/769}, author = {Persson, Anna and Lings, Brian and Lundell, Bj{\"o}rn and Mattsson, Anders and {\"A}rlig, Ulf} } @conference {725, title = {EDOS: Environment for the Development and Distribution of Open Source Software}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {66-70}, abstract = {The open-source software community is now comprised of a very large and growing number of contributors and users. The GNU/Linux operating system for instance has an estimated 18 million users worldwide and its contributing developers can be counted by thousands. The critical mass of contributors taking part in various opensource projects has helped to ensure high quality for open source software. However, despite the achievements of the open-source software industry, there are issues in the production of large scale open-source software (OSS) such as the GNU/Linux operating system that have to be addressed as the numbers of users, of contributors, and of available applications grow. EDOS is a European project supported by IST started October 2004 and ending in 2007, whose objective is to provide a new generation of methodologies, theoretical models, technical tools and quality models specifically tailored to OSS engineering and to software distribution over the Interne...}, url = {http://pascal.case.unibz.it/handle/2038/737}, author = {Abiteboul, Serge and Leroy, Xavier and Vrdoljak, Boris and Di Cosmo, Roberto and Fermigier, St{\'e}fane and Lauri{\`e}re, St{\'e}phane and Lepied, Fr{\'e}d{\'e}ric and Pop, Radu and Villard, Florent and Smets, Jean-Paul and Bryce, Ciar{\'a}n and Dittrich, Klaus R. and Milo, Tova and Sagi, Assaf and Shtossel, Yotam and Panto, Eleonora} } @conference {768, title = {Free software and research}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {270-271}, abstract = {Free software licenses are a natural choice in a research environment. In the following, we will try to back this simple statement with some considerations and examples, in an effort to analyse the significant interactions between free software and research.}, url = {http://pascal.case.unibz.it/handle/2038/864}, author = {Potort{\`\i}, Francesco} } @conference {774, title = {Migrazione di un Sistema Informativo da UNIX-AIX a UNIX-Linux}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {287-288}, abstract = {Il presente documento ha come obiettivo quello di descrivere la politica adottata dall{\textquoteright}Istituto Nazionale di Statistica rispetto all{\textquoteright}uso del software Open Source. In particolare vengono descritti i sistemi che attualmente operano su piattaforma Linux, quelli che sono in fase di migrazione e le scelte che si sono fatte nel caso di convivenza tra sistemi {\textquotedblleft}open{\textquotedblright} e sistemi {\textquotedblleft}proprietari{\textquotedblright}. Viene inoltre illustrata l{\textquoteright}architettura hardware scelta nel caso della migrazione di un sistema complesso da piattaforma interamente proprietaria (UNIX AIX) a piattaforma con sistema operativo open Linux Red Hat.}, keywords = {architettura hardware, linux, migrazione, open source, server, sistema informativo, sistema proprietario, unix aix}, url = {http://pascal.case.unibz.it/handle/2038/978}, author = {Colasanti, Cecilia and Patruno, Vincenzo and Vaccari, Carlo} } @conference {Phadke:2005:PRM:1145319.1145337, title = {Predicting risky modules in open-source software for high-performance computing}, booktitle = {Proceedings of the second international workshop on Software engineering for high performance computing system applications}, series = {SE-HPCS {\textquoteright}05}, year = {2005}, pages = {60{\textendash}64}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {This paper presents the position that software-quality modeling of open-source software for high-performance computing can identify modules that have a high risk of bugs.Given the source code for a recent release, a model can predict which modules are likely to have bugs, based on data from past releases. If a user knows which software modules correspond to functionality of interest, then risks to operations become apparent. If the risks are too great, the user may prefer not to upgrade to the most recent release.Of course, such predictions are never perfect. After release, bugs are discovered. Some bugs are missed by the model, and some predicted errors do not occur. A successful model will be accurate enough for informed management action at the time of the predictions.As evidence for this position, this paper summarizes a case study of the Portable Extensible Toolkit for Scientific Computation (PETSC), which is a mathematical library for high-performance computing. Data was drawn from source-code and configuration management logs. The accuracy of logistic-regression and decision-tree models indicated that the methodology is promising. The case study also illustrated several modeling issues.}, keywords = {C4.5, decision trees, empirical case study, high performance computing, logistic regression, Open-source software, PETSc, software metrics, software quality model, software reliability}, isbn = {1-59593-117-1}, doi = {10.1145/1145319.1145337}, url = {http://doi.acm.org/10.1145/1145319.1145337}, author = {Phadke, Amit A. and Allen, Edward B.} } @conference {Hawthorne:2005:SEE:1062455.1062581, title = {Software engineering education in the era of outsourcing, distributed development, and open source software: challenges and opportunities}, booktitle = {Proceedings of the 27th international conference on Software engineering}, series = {ICSE {\textquoteright}05}, year = {2005}, pages = {643{\textendash}644}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {As software development becomes increasingly globally distributed, and more software functions are delegated to common open source software (OSS) and commercial off-the-shelf (COTS) components, practicing software engineers face significant challenges for which current software engineering curricula may leave them inadequately prepared. A new multi-faceted distributed development model is emerging that effectively commoditizes many development activities once considered integral to software engineering, while simultaneously requiring practitioners to apply engineering principles in new and often unfamiliar contexts. We discuss the challenges that software engineers face as a direct result of outsourcing and other distributed development approaches that are increasingly being utilized by industry, and some of the key ways we need to evolve software engineering curricula to address these challenges.}, keywords = {computer science education, contextual learning, education, informatics, software engineering education}, isbn = {1-58113-963-2}, doi = {10.1145/1062455.1062581}, url = {http://doi.acm.org/10.1145/1062455.1062581}, author = {Hawthorne, Matthew J. and Perry, Dewayne E.} } @conference {720, title = {Towards an Open Source Development Process - Evaluating the Migration to an Open Source Project by Means of the Capability Maturity Model}, booktitle = {OSS2005: Open Source Systems }, year = {2005}, pages = {37-43}, abstract = {In this paper we review the ongoing development of a Web-based community system that has been migrated from a closed software development to an open source project. We identify three different phases in the migration process where the development process changed significantly. We analyse these phases by means of the Capability Maturity Model (CMM). The insights gained show the implications of such a migration process towards open source concerning the process quality of a development process. They also show underlying assumptions of the CMM that do not totally match with developments in this specific case study. As a helpful outcome, our reflection about the ongoing software development process helped identify two crucial factors: reflection about the process is possible even at lower levels and how to handle people{\textquoteright}s fluctuation to sustain a development project.}, url = {http://pascal.case.unibz.it/handle/2038/1543}, author = {Bleek, Wolf-Gideon and Finck, Matthias and Pape, Bernd} } @article {127, title = {An empirical study of open-source and closed-source software products}, journal = {Ieee Transactions on Software Engineering}, volume = {30}, number = {4}, year = {2004}, note = {Times Cited: 8}, pages = {246-256}, abstract = {This paper describes an empirical study of open-source and closed-source software projects. The motivation for this research is to quantitatively investigate common perceptions about open-source projects, and to validate these perceptions through an empirical study. This paper investigates the hypothesis that open-source software grows more quickly, but does not find evidence to support this. The project growth is similar for all the projects in the analysis, indicating that other factors may limit growth. The hypothesis that creativity is more prevalent in open-source software is also examined, and evidence to support this hypothesis is found using the metric of functions added over time. The concept of open-source projects succeeding because of their simplicity is not supported by the analysis, nor is the hypothesis of open-source projects being more modular. However, the belief that defects are found and fixed more rapidly in open-source projects is supported by an analysis of the functions modified. The paper finds support for two of the five common beliefs and concludes that, when implementing or switching to the open-source development model, practitioners should ensure that an appropriate metrics Collection strategy is in place to verify the perceived benefits.}, author = {Paulson, J. W. and Succi, G. and Eberlein, A.} } @conference {1482, title = {Group awareness in distributed software development}, booktitle = {2004 ACM conference on Computer supported cooperative work - CSCW {\textquoteright}04}, year = {2004}, pages = {72-81}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {Open-source software development projects are almost always collaborative and distributed. Despite the difficulties imposed by distance, these projects have managed to produce large, complex, and successful systems. However, there is still little known about how open-source teams manage their collaboration. In this paper we look at one aspect of this issue: how distributed developers maintain group awareness. We interviewed developers, read project communication, and looked at project artifacts from three successful open source projects. We found that distributed developers do need to maintain awareness of one another, and that they maintain both a general awareness of the entire team and more detailed knowledge of people that they plan to work with. Although there are several sources of information, this awareness is maintained primarily through text-based communication (mailing lists and chat systems). These textual channels have several characteristics that help to support the maintenance of awareness, as long as developers are committed to reading the lists and to making their project communication public.}, keywords = {email, email archive, mailing list}, isbn = {1581138105}, doi = {10.1145/1031607.1031621}, author = {Schneider, Kevin and Gutwin, Carl and Penner, Reagan} } @unpublished {flosswp162, title = {Linux Adoption by Firms}, year = {2004}, month = {May}, abstract = {The objective of this study is to examine the evolution of the market for Linux based products for the 1993-2003 period. Using data on 317 Linux suppliers available online, the differences in firms? size and in their first products were explored across the adoption stages of the Linux life cycle. Then two temporal patterns of the Linux-market were identified: changes in the entry rate of new Linux suppliers and changes in product diversity. Finally, the attributes of the partnerships formed by four major Linux distributors were examined. The study determined whether the number of partnerships formed by Linux distributors was related to the number of new entrants, whether the motives for partnerships formed by Linux distributors varied over adoption stages, and whether the type of partner selected by Linux distributors was a function of partnership motive. This study builds on the literature on open source software and traditional theories of technology adoption to make three important contributions. First, it develops a method to identify the stages of the life of a new technology. Secondly, it provides a way to measure the temporal patterns of the evolution of a new market. Finally, it validates the densitydependence model using data on open source.}, attachments = {https://flosshub.org/sites/flosshub.org/files/peng.pdf}, author = {Zheshi Peng} } @article {flosswp145, title = {Network Formation via Contests: The Production Process of Open Source Software}, year = {2004}, month = {January}, abstract = {Why do both software developers and firms contribute to the production process of Open Source Software (OSS) despite not receiving direct monetary rewards for it? This papers extends results of the economic literature by modelling the OSS production process as an application contest to a "qualified network". The winners receive reputation and high investments. Investors searching for highly talented applicants profit from the selection mechanism of the OSS production process and finance it to receive inside information. We describe incentives for developers and firms and compare the mechanism with alternatives for its efficiency.}, attachments = {https://flosshub.org/sites/flosshub.org/files/prufer.pdf}, author = {Jens Prufer} } @proceedings {1197, title = {Open Source and Closed Source Software Development Methodologies}, year = {2004}, pages = {105-109}, abstract = {Open source software development represents a fundamentally new concept in the field of software engineering. Open source development and delivery occurs over the Internet. Developers are not confined to a geographic area. They work voluntarily on a project of their choice. As new requirements emerge, the software is enhanced by the user/developers. In this paper we show a comparative study of open source and closed source software development approaches and present a software life cycle model for open source software development.}, keywords = {life cycle, lifecycle}, attachments = {https://flosshub.org/sites/flosshub.org/files/potdar106-110.pdf}, author = {Potdar, V. and Chang, E.} } @proceedings {1192, title = {WebDAV based Open Source Collaborative Development Environment}, year = {2004}, pages = {53-57}, abstract = {Open source projects are characterized by their distributed developers, openness, and use of a community-based development process. Successful open source projects such as Apache, Linux, and PHP have a web-based Collaborative Development Environment (CDE) that provides source code repository access, knowledge management, discussion lists, bug tracking, and user support. Distributed software development is also increasingly common for commercial software projects, and benefits from using web-based CDEs. GForge, derived from the SourceForge package, is an open web-based CDE that is seeing widespread open source and commercial use. To support remote authoring of web pages and file contents, as well as remote source code access, GForge uses several network protocols, including SSH/SFTP, CVS pserver, and FTP. These protocols inevitably introduce security risks and add administrative work. We modified GForge to eliminate use of FTP, SSH/SFTP, and CVS protocols, using instead a WebDAV-based integration. The new CDE, called Davenport, uses only the HTTP/WebDAV protocol, and HTTP/WebDAV replaces the other protocols without sacrificing functionality. In this paper we describe the architecture and features of Davenport, and show how WebDAV replaces existing network protocols. We discuss the future development directions of CDEs including integration of client side IDEs and using a CDE as a software test bed.}, attachments = {https://flosshub.org/sites/flosshub.org/files/Kim54-58.pdf}, author = {Kim, S. and Pan, K. and Whitehead, J.} } @article {flosswp42, title = {Open Source and the Network Society}, year = {2002}, month = {June}, abstract = {This paper discusses how the open source movement can be regarded as a civil society. Through an analysis and discussion of the recent development in society the context and to a certain extent the prerequisites surrounding the open source movement will be presented. The analysis and discussion of the open source movement will both derive advantage from this basis as well as throw new light back on the recent development in society.}, attachments = {https://flosshub.org/sites/flosshub.org/files/pedersen.pdf}, author = {Soren Thing Pedersen} } @article {hansen2002open, title = {The Open Source approach{\textemdash}opportunities and limitations with respect to security and privacy}, journal = {Computers \& Security}, volume = {21}, number = {5}, year = {2002}, pages = {461{\textendash}471}, publisher = {Elsevier}, abstract = {Today{\textquoteright}s software often does not even fulfil basic security or privacy requirements. Some people regard the open source paradigm as the solution to this problem. First, we carefully explain the security and privacy aspects of open source, which in particular offer the possibility for a dramatic increase in trustworthiness for and autonomy of the user. We show which expectations for an improvement of the software trustworthiness dilemma are realistic. Finally, we describe measures necessary for developing secure and trustworthy open source systems.}, keywords = {security}, url = {https://dud.inf.tu-dresden.de/literatur/HaKP_02OpenSource_0214.doc}, attachments = {https://flosshub.org/sites/flosshub.org/files/HaKP_02OpenSource_0214.doc}, author = {Hansen, Marit and K{\"o}hntopp, Kristian and Pfitzmann, Andreas} } @article {payne2002security, title = {On the security of open source software}, journal = {Information systems journal}, volume = {12}, number = {1}, year = {2002}, pages = {61{\textendash}78}, publisher = {Wiley Online Library}, abstract = {With the rising popularity of so-called {\textquoteleft}open source{\textquoteright} software there has been increasing interest in both its various benefits and disadvantages. In particular, despite its prominent use in providing many aspects of the Internet{\textquoteright}s basic infrastructure, many still question the suitability of such software for the commerce-oriented Internet of the future. This paper evaluates the suitability of open source software with respect to one of the key attributes that tomorrow{\textquoteright}s Internet will require, namely security. It seeks to present a variety of arguments that have been made, both for and against open source security and analyses in relation to empirical evidence of system security from a previous study. The results represent preliminary quantitative evidence concerning the security issues surrounding the use and development of open source software, in particular relative to traditional proprietary software.}, keywords = {security}, attachments = {https://flosshub.org/sites/flosshub.org/files/Payne2002_ISJ12_SecurityOSS.pdf}, author = {Payne, Christian} } @article {Hissam, title = {Trust and vulnerability in open source software}, journal = {Software, {IEE} Proceedings -}, volume = {149}, number = {1}, year = {2002}, month = {02/2002}, pages = {47{\textendash}51}, chapter = {47}, abstract = {Software plays an ever increasing role in the critical infrastructures that run our cities, manage our economies, and defend our nations. In 1999, the Presidents Information Technology Advisory Committee (PITAC) reported to the United States President the need for software components that are reliable, tested, modelled and secure supporting the development of predictably reliable and secure systems that underscore our critical infrastructures. Open source software (OSS) constitutes a viable source for software components. Some believe that OSS is more reliable and more secure than closed source software (CSS)-due to a phenomenon dubbed {\textquoteright}many eyeballs{\textquoteright}-but is this truly the case? Or does OSS give the cyber criminal an edge that he would likewise not have? We explore OSS from the perspective of the cyber criminal and discuss what the community of software developers and users alike can do to increase their trust in both open source software and closed source software}, keywords = {closed source software, community of software developers, critical infrastructures, cyber criminal, open source software, PITAC, predictably reliable systems, predictably secure systems, software components, trust, users, vulnerability}, issn = {1462-5970}, doi = {10.1049/ip-sen:20020208}, author = {Hissam, S. A. and Plakosh, D. and Weinstock, C.} } @conference {1148, title = {Introducing a {\textquotedblleft}Street Fair{\textquotedblright} Open source Practice Within Project Based Software Engineering Courses}, booktitle = {1st Workshop on Open Source Software Engineering at ICSE 2001}, year = {2001}, abstract = {Columbia University and USC, as well as many other universities, have software engineering courses where students work in small teams to develop a product of moderate complexity. Typically such efforts are {\textquotedblleft}closed source{\textquotedblright} where teams do not (or are forbidden) from sharing information and providing assistance to other teams. There are certainly many justifiable reasons for this such as grading uniformity. Yet strong drivers such as grade competition, generally thought to promote quality results (i.e. successful projects), often do not. The authors ran an experiment introducing a style of open source practice called {\textquotedblleft}street fair{\textquotedblright} with surprisingly positive results.}, keywords = {education, educational resources}, attachments = {https://flosshub.org/sites/flosshub.org/files/portkaiser.pdf}, author = {Port, D. and Kaiser, G.} } @conference {1150, title = {Leveraging Open-Source Communities To Improve the Quality \& Performance of Open-Source Software}, booktitle = {1st Workshop on Open Source Software Engineering at ICSE 2001}, year = {2001}, abstract = {Open-source processes have emerged as an effective approach to reduce cycle-time and decrease development and quality assurance costs for certain types of software. they are not without challenges, however, such as decreasing long-term maintenance and evolution costs, improving quality assurance, sustaining end-user confidence and good will, and ensuring the coherency of system-wide software and usability properties. Although aspects of these issues are unique to open-source development, well-organized open-source projects make it easier to address certain of these challenges compared with traditional closed-source approaches to building software. We have begun a long-term research effort, called Skoll, whose goal is to leverage common open-source project assets, such as their technologically sophisticated worldwide user communities, to devise techniques that address key challenges of open-source software development. In particular, we are conducting a long-term case study of two widely used open-source projects, ACE and TAO, to design, deploy, and evaluate techniques for improving quality through continuous distributed testing and profiling. This position paper presents our view of the pros and cons of open-source processes and outlines the work we are doing to improve the quality and performance of open-source software.}, attachments = {https://flosshub.org/sites/flosshub.org/files/schmidt.pdf}, author = {Schmidt, D.C. and Porter, A.} } @unpublished {flosswp17, title = {The Use of Computer Conferencing as an Organizational Knowledge Transfer}, year = {2001}, month = {June}, abstract = {Electronic bulletin boards were used as an organization-wide problem solving process that transferred private knowledge. Key organizational implications were found.}, attachments = {https://flosshub.org/sites/flosshub.org/files/willpope.pdf}, author = {William Pope} } @article {913, title = {Dimensions of information systems success}, journal = {Communications of the Association for Information Systems}, volume = {20}, number = {2}, year = {1999}, pages = {61}, author = {Seddon, PB and Staples, S and Patnayakuni, R and Bowtell, M} }