@proceedings {1522, title = {Boa: A Language and Infrastructure for Analyzing Ultra-Large-Scale Software Repositories}, year = {2013}, month = {05/2013}, pages = {422-431}, abstract = {In today{\textquoteright}s software-centric world, ultra-large-scale software repositories, e.g. SourceForge (350,000+ projects), GitHub (250,000+ projects), and Google Code (250,000+ projects) are the new library of Alexandria. They contain an enormous corpus of software and information about software. Scientists and engineers alike are interested in analyzing this wealth of information both for curiosity as well as for testing important hypotheses. However, systematic extraction of relevant data from these repositories and analysis of such data for testing hypotheses is hard, and best left for mining software repository (MSR) experts! The goal of Boa, a domain-specific language and infrastructure described here, is to ease testing MSR-related hypotheses. We have implemented Boa and provide a web-based interface to Boa{\textquoteright}s infrastructure. Our evaluation demonstrates that Boa substantially reduces programming efforts, thus lowering the barrier to entry. We also see drastic improvements in scalability. Last but not least, reproducing an experiment conducted using Boa is just a matter of re-running small Boa programs provided by previous researchers.}, keywords = {ease of use, forge, github, google code, lower barrier to entry, mining, repository, reproducible, scalable, Software, sourceforge}, author = {Dyer, Robert and Nguyen, Hoan Anh and Rajan, Hridesh and Nguyen, Tien N.} } @conference {1317, title = {Describing the Software Forge Ecosystem}, booktitle = {45th Hawai{\textquoteright}i International Conference on System Sciences}, year = {2012}, note = {http://flossmole.org/content/everything-you-ever-wanted-know-about-software-forges-code-forges-june-2011}, month = {01/2012}, pages = {3416-3425}, abstract = {Code forges are online software systems that are designed to support teams doing software development work. There have been few if any attempts in the research literature to describe the web of people, projects, and tools that make up the free, libre, and open source (FLOSS) forge ecosystem. The main contributions of this paper are (1) to introduce a classification of FLOSS-oriented forges according to their characteristics; (2) to describe the forge-level and project-level data and artifacts currently available at each FLOSS forge; (3) to show various patterns already discovered in the FLOSS forge ecosystem, such as timelines of creation or arrangements by size or feature; (4) to make some recommendations to forge providers and data collectors about how to expose the structure and information in the forges; and (5) to describe the effort needed to extend our publicly- available information about the FLOSS forge ecosystem into the future.}, keywords = {features, FLOSS, forge, hosting, metrics}, attachments = {https://flosshub.org/sites/flosshub.org/files/SquireWilliamsHICSS2012.pdf}, author = {Squire, Megan and Williams, David} } @article {1239, title = {Weaving a Semantic Web Across OSS Repositories}, journal = {International Journal of Open Source Software and Processes}, volume = {2}, year = {2010}, month = {32/2010}, pages = {29 - 40}, abstract = {Several public repositories and archives of {\textquotedblleft}facts{\textquotedblright} about libre software projects, maintained either by open source communities or by research communities, have been flourishing over the Web in recent years. These have enabled new analysis and support for new quality assurance tasks. This paper presents some complementary existing tools, projects and models proposed both by OSS actors or research initiatives that are likely to lead to useful future developments in terms of study of the FLOSS phenomenon, and also to the very practitioners in the FLOSS development projects. A goal of the research conducted within the HELIOS project is to address bugs traceability issues. In this regard, the authors investigate the potential of using Semantic Web technologies in navigating between many different bugtracker systems scattered all over the open source ecosystem. By using Semantic Web techniques, it is possible to interconnect the databases containing data about open-source software projects development, which enables OSS partakers to identify resources, annotate them, and further interlink those using dedicated properties and collectively designing a distributed semantic graph.}, keywords = {archive, bug, bugtracker, database, debian, forge, interoperability, ontology, OSLC-CM, RDF, repository of repositories, semantic, semantic Web}, issn = {1942-3934}, doi = {10.4018/jossp.2010040103}, attachments = {https://flosshub.org/sites/flosshub.org/files/wopdasd2009-olivier-berger.pdf}, author = {Olivier Berger and Valentin Vlasceanu and Christian Bac and Quang Vu Dang and Lauriere, St{\'e}phane} } @article {1241, title = {Collaborative Software Development Using R-Forge}, journal = {The R Journal}, volume = {1}, number = {1}, year = {2009}, month = {05/2009}, pages = {9-14}, abstract = {Open source software (OSS) is typically created in a decentralized self-organizing process by a community of developers having the same or similar interests (see the famous essay by Raymond, 1999). A key factor for the success of OSS over the last two decades is the Internet: Developers who rarely meet face-to-face can employ new means of communication, both for rapidly writing and deploying software (in the spirit of Linus Torvald{\textquoteright}s {\textquotedblleft}release early, release often paradigm{\textquotedblright}). Therefore, many tools emerged that assist a collaborative software development process, including in particular tools for source code management (SCM) and version control. In the R world, SCM is not a new idea; in fact, the R Development Core Team has always been using SCM tools for the R sources, first by means of Concurrent Versions System (CVS, see Cederqvist et al., 2006), and then via Subversion (SVN, see Pilato et al., 2004). A central repository is hosted by ETH Zürich mainly for managing the development of the base R system. Mailing lists like R-help, R-devel and many others are currently the main communication channels in the R community. First, we present the core features that R- Forge offers to the R community. Second, we give a hands-on tutorial on how users and developers can get started with R-Forge. In particular, we illustrate how people can register, set up new projects, use R- Forge{\textquoteright}s SCM facilities, provide their packages on R-Forge, host a project-specific website, and how package maintainers submit a package to the Compre- hensive R Archive Network (CRAN, http://CRAN. R-project.org/). Finally, we summarize recent developments and give a brief outlook to future work.}, keywords = {forge, R, scm, source code repositories, statistics}, attachments = {https://flosshub.org/sites/flosshub.org/files/rjournal.pdf}, author = {Stefan Theu{\ss}l and Achim Zeileis} } @article {Beecher2009739, title = {Identifying exogenous drivers and evolutionary stages in FLOSS projects}, journal = {Journal of Systems and Software}, volume = {82}, number = {5}, year = {2009}, pages = {739 - 750}, abstract = {The success of a Free/Libre/Open Source Software (FLOSS) project has been evaluated in the past through the number of commits made to its configuration management system, number of developers and number of users. Most studies, based on a popular FLOSS repository (SourceForge), have concluded that the vast majority of projects are failures. This study{\textquoteright}s empirical results confirm and expand conclusions from an earlier and more limited work. Not only do projects from different repositories display different process and product characteristics, but a more general pattern can be observed. Projects may be considered as early inceptors in highly visible repositories, or as established projects within desktop-wide projects, or finally as structured parts of FLOSS distributions. These three possibilities are formalized into a framework of transitions between repositories. The framework developed here provides a wider context in which results from FLOSS repository mining can be more effectively presented. Researchers can draw different conclusions based on the overall characteristics studied about an Open Source software project{\textquoteright}s potential for success, depending on the repository that they mine. These results also provide guidance to OSS developers when choosing where to host their project and how to distribute it to maximize its evolutionary success.}, keywords = {developers, forge, forges, repositories, repository, scm, software repositories, sourceforge, success, users}, issn = {0164-1212}, doi = {DOI: 10.1016/j.jss.2008.10.026}, url = {http://www.sciencedirect.com/science/article/B6V0N-4TVTJFS-1/2/e32ecee1bcb54bd4a5dff6d5e3daca8d}, author = {Karl Beecher and Capiluppi, Andrea and Boldyreff, Cornelia} } @article {Capiluppi200989, title = {Quality Factors and Coding Standards - a Comparison Between Open Source Forges}, journal = {Electronic Notes in Theoretical Computer Science}, volume = {233}, year = {2009}, note = {Proceedings of the International Workshop on Software Quality and Maintainability (SQM 2008)}, pages = {89 - 103}, abstract = {Enforcing adherence to standards in software development in order to produce high quality software artefacts has long been recognised as best practice in traditional software engineering. In a distributed heterogeneous development environment such those found within the Open Source paradigm, coding standards are informally shared and adhered to by communities of loosely coupled developers. Following these standards could potentially lead to higher quality software. This paper reports on the empirical analysis of two major forges where OSS projects are hosted. The first one, the KDE forge, provides a set of guidelines and coding standards in the form of a coding style that developers may conform to when producing the code source artefacts. The second studied forge, SourceForge, imposes no formal coding standards on developers. A sample of projects from these two forges has been analysed to detect whether the SourceForge sample, where no coding standards are reinforced, has a lower quality than the sample from KDE. Results from this analysis form a complex picture; visually, all the selected metrics show a clear divide between the two forges, but from the statistical standpoint, clear distinctions cannot be drawn amongst these quality related measures in the two forge samples.}, keywords = {artefacts, artifacts, coding standards, coding style, complexity, forge, forges, kde, metrics, quality, source code, sourceforge}, issn = {1571-0661}, doi = {DOI: 10.1016/j.entcs.2009.02.063}, url = {http://www.sciencedirect.com/science/article/B75H1-4VXDKRV-7/2/abcc2be2c4c3998e4bc9b53473ca2d81}, author = {Capiluppi, Andrea and Boldyreff, Cornelia and Karl Beecher and Paul J. Adams} } @conference {1208, title = {Advances in the Sourceforge Research Data Archive}, booktitle = {3rd Workshop on Public Data about Software Development (WoPDaSD 2008)}, year = {2008}, month = {2009}, pages = {25-29}, abstract = {The SourceForge Research Data Archive (SRDA), located at http://zerlot.cse.nd.edu, is a collection of Open Source Software (OSS) data and resources [6]. Over 100 researchers worldwide use the archive for research in many fields. In this paper, we describe the recent changes, the work in progress, and future plans for making the archive easier to use and for allowing more advanced research to be done with the data available.}, keywords = {forge, forges, repositories, repository, sourceforge, srda}, attachments = {https://flosshub.org/sites/flosshub.org/files/srda2008.pdf}, author = {Matthew Van Antwerp and Madey, Greg} } @conference {1212, title = {Studying Production Phase SourceForge Projects: An Exploratory Analysis Using cvs2mysql and SFRA}, booktitle = {2nd Workshop on Public Data about Software Development (WoPDaSD 2007)}, year = {2007}, month = {2007}, abstract = {A wealth of data can be extracted from the natural by-products of software development processes and used in empirical studies of software engineering. However, the size and accuracy of such studies depend in large part on the availability of tools that facilitate the collection of data from individual projects and the combination of data from multiple projects. To demonstrate this point, we present our experience gathering and analyzing data from nearly 10,000 open source projects hosted on SourceForge. We describe the tools we developed to collect the data and the ways in which these tools and data may be used by other researchers. We also provide examples of statistics that we have calculated from these data to describe interesting author- and project-level behaviors of the SourceForge community.}, keywords = {Data Collection, forge, repositories, sourceforge}, attachments = {https://flosshub.org/sites/flosshub.org/files/Delorey2007c.pdf}, author = {Delorey, Daniel P. and Knutson, Charles D. and MacLean, Alexander C.} }