@proceedings {1287, title = {To Fork or Not to Fork: Fork Motivations in SourceForge Projects}, year = {2011}, note = {"Using this search function, we compiled a list of all of the programs with the word {\textquotedblleft}fork{\textquotedblright}..." "We then analyzed the motivations stated in the descriptions of the forked programs...." "Based on the descriptions entered by the developer, we were able to identify motivations for 381 of the forks." }, month = {10/2011}, pages = {259-268}, publisher = {Springer}, abstract = {A project fork occurs when software developers take a copy of source code from one software package and use it to begin an independent development work that is maintained separately from its origin. Although forking in open source software does not require the permission of the original authors, the new version, nevertheless, competes for the attention of the same developers that have worked on the original version. The motivations developers have for performing forks are many, but in general they have received little attention. In this paper, we present the results of a study of forks performed in SourceForge (http://sourceforge.net/) and list the developers{\textquoteright} motivations for their actions. The main motivation, seen in close to half of the cases of forking, was content modification; either adding content to the original program or focusing the content to the needs of a specific segment of users. In a quarter of the cases the motivation was technical modification; either porting the program to new hardware or software, or improving the original.}, keywords = {fork rate, sourceforge}, author = {Nyman, Linus and Mikkonen, Tommi} } @conference {1206, title = {Collecting data from distributed FOSS projects}, booktitle = {3rd Workshop on Public Data about Software Development (WoPDaSD 2008)}, year = {2008}, note = {"We selected three projects from the initial set of projects: Linux 2.6, an operating system kernel, gimp, a graphics program, and Blender, a 3d content creation suite." "To acquire data from each data source, we wrote special programs based on the earlier prototypes....The first program extracts information from mailing list archives....The second program obtains bug reports from bug tracking systems....The third program obtains source code from network-accessible version control systems and runs metric calculations on it."}, month = {2009}, pages = {8-13}, abstract = {A key trait of Free and Open Source Software (foss) development is its distributed nature. Nevertheless, two project-level operations, the fork and the merge of program code, are among the least well understood events in the lifespan of a foss project. Some projects have explicitly adopted these operations as the primary means of concurrent development. In this study, we examine the effect of highly distributed software development, as found in the Linux kernel project, on collection and modelling of software development data. We find that distributed development calls for sophisticated temporal modelling techniques where several versions of the source code tree can exist at once. Attention must be turned towards the methods of quality assurance and peer review that projects employ to manage these parallel source trees. Our analysis indicates that two new metrics, fork rate and merge rate, could be useful for determining the role of distributed version control systems in foss projects. The study presents a preliminary data set consisting of version control and mailing list data. }, keywords = {bitkeeper, bug tracking system, cvs, distributed, email archive, fork rate, git, life cycle, linux, linux kernel, mailing list, merge rate, subversion, svn, version control}, attachments = {https://flosshub.org/sites/flosshub.org/files/fagerholm.pdf}, author = {Fagerholm, Fabian and Taina, Juha} }