@proceedings {1489, title = {Project Roles in the Apache Software Foundation: A Dataset}, year = {2013}, month = {05/2013}, abstract = {This paper outlines the steps in the creation and maintenance of a new dataset listing leaders of the various projects of the Apache Software Foundation (ASF). Included in this dataset are different levels of committers to the various ASF project code bases, as well as regular and emeritus members of the ASF, and directors and officers of the ASF. The dataset has been donated to the FLOSSmole project under an open source license, and is available for download (https://code.google.com /p/flossmole/downloads/detail?name=apachePeople2013-Jan.zip), or for direct querying via a database client.}, keywords = {apache, dataset, roles}, attachments = {https://flosshub.org/sites/flosshub.org/files/apacheRolesPREPRINT.pdf , https://flosshub.org/sites/flosshub.org/files/MSR\%20presentation_0.pdf}, author = {Squire, Megan} } @proceedings {1498, title = {Who Does What during a Code Review? Datasets of OSS Peer Review Repositories }, year = {2013}, month = {05/2013}, abstract = {We present four datasets that are focused on the general roles of OSS peer review members. With data mined from both an integrated peer review system and code source repositories, our rich datasets comprise of peer review data that was automatically recorded. Using the Android project as a case study, we describe our extraction methodology, the datasets and their application used for three separate studies. Our datasets are available online at http://sdlab.naist.jp/reviewmining/}, keywords = {android, case study, code review, data set, peer review, roles, source code}, author = {Kazuki Hamasaki and Raula Gaikovina Kula and Norihiro Yoshida and A. E. Camargo Cruz and Kenji Fujiwara and Hajimu Iida} } @article {1082, title = {Returns from social capital in open source software networks}, journal = {Journal of Evolutionary Economics}, volume = {19}, year = {2009}, note = {"The data we use in this analysis come from the SourceForge.net Research Data (Department of Computer Science and Engineering, University of Notre Dame)." "Particularly, our dataset is a sub-sample of 2,962 valid observations over twelve months of projects aimed at developing games{\textquoteright} software. To get this sample, we look at the monthly dumps of data and select all projects that belong to the category of GNU Public License (GPL) [6]. " dependent variable: performance (number of downloads); independent variable: social network ("Here we measure ties among projects through the individuals{\textquoteright} member and contributor roles at projects on the network over time.)}, month = {4/2009}, pages = {277 - 295}, abstract = {Open Source Software projects base their operation on a collaborative structure for knowledge exchange in the form of provision or reception of information, expertise, and feedback on the creation of source code. Here, we address the direction of these knowledge flows among projects throughout social networks and their impact on project success. We identify the roles of membership or contribution that individuals play within projects. We found that connections through contributors who bring their knowledge to the project, improve project success, and that connection through members, who transfer their knowledge towards other projects, enhance project success. Finally, we found that ties through shared membership and contributions hamper project success. The analysis of knowledge flows and their impact on project success imply a translation of returns from investment in social capital, where investment takes the shape of knowledge flows and the returns mean the projects{\textquoteright} diffusion over the network.}, keywords = {contributors, developers, games, gpl, project success, roles, social capital, social network analysis, social networks, sourceforge, srda, teams}, issn = {1432-1386}, doi = {10.1007/s00191-008-0125-5}, attachments = {https://flosshub.org/sites/flosshub.org/files/Mendez-DuronGarcia.pdf}, author = {M{\'e}ndez-Dur{\'o}n, Rebeca and Garc{\'\i}a, Clara E.} } @article {1084, title = {Dynamics of innovation in an "open source" collaboration environment: lurking, laboring, and launching FLOSS projects on SourceForge}, journal = {Industrial and Corporate Change}, volume = {17}, year = {2008}, note = {"For the purposes of this study, we are able to draw upon micro-level data pertaining to the activities undertaken during the period between the beginning of September 2000 and December 2002 by the entire cohort of 222,835 individuals who had registered on SF.net during the 14 months from September 1, 2000 through October 26, 2001." "The statistical analysis in this article is based upon an edited dataset (referred to here as the SFnetDataset) covering the SourceForge cohort of 222,835 individuals who registered on SF.net during the 14 months from September 1, 2000 through October 26, 2001." They appear to be using their own dataset which is from 2000-2001 even though this paper is from 2008}, month = {07/2008}, pages = {647 - 710}, abstract = {A systems analysis perspective is adopted to examine the critical properties of the Free/Libre/Open Source Software (FLOSS) mode of innovation, as reflected on the SourceForge platform (SF.net). This approach re-scales March{\textquoteright}s (1991) framework and applies it to characterize the {\textquotedblleft}innovation system{\textquotedblright} of a {\textquotedblleft}distributed organization{\textquotedblright} of interacting agents in a virtual collaboration environment, rather than to innovation within a firm. March (1991) views the process of innovation at the organizational level as the coupling of sub-processes of exploration and exploitation. Correspondingly, the innovation system of the virtual collaboration environment represented by SF.net is an emergent property of two {\textquotedblleft}coupled{\textquotedblright} processes: one involves the interactions among agents searching the locale for information and knowledge resources to use in designing novel software products (i.e., exploration), and the other involves the mobilization of individuals{\textquoteright} capabilities for application in the software development projects that become established on the platform (i.e., exploitation). The micro-dynamics of this system are studied empirically by constructing transition probability matrices representing the movements of 222,835 SF.net users among seven different activity states, which range from {\textquotedblleft}lurking{\textquotedblright} (not contributing or contributing to projects without becoming a member) to {\textquotedblleft}laboring{\textquotedblright} (joining one or more projects as members), and to {\textquotedblleft}launching{\textquotedblright} (founding one or more projects) within each successive 6-month interval. The estimated probabilities are found to form first-order Markov chains describing ergodic processes. This makes it possible the computation of the equilibrium distribution of agents among the states, thereby suppressing transient effects and revealing persisting patterns of project joining and project launching. The latter show the FLOSS innovation process on SF.net to be highly dissipative: a very large proportion of the registered {\textquotedblleft}developers{\textquotedblright} fail to become even minimally active on the platform. There is nevertheless an active core of mobile project joiners, and a (still smaller) core of project founders who persist in creating new projects. The structure of these groups{\textquoteright} interactions (as displayed within the 3-year period examined) is investigated in detail, and it is shown that it would be sufficient to sustain both the exploration and exploitation phases of the platform{\textquoteright}s global dynamics.}, keywords = {contributors, core, developers, roles, SFnetDataset, sourceforge, users, virtual communities, virtual organization, virtual organizations}, issn = {1464-3650}, doi = {10.1093/icc/dtn026}, author = {David, P. A. and Rullani, F.} } @article {Giuri2008305, title = {Explaining leadership in virtual teams: The case of open source software}, journal = {Information Economics and Policy}, volume = {20}, number = {4}, year = {2008}, note = {Empirical Issues in Open Source Software}, pages = {305 - 315}, abstract = {This paper contributes to the open source software (OSS) literature by investigating the likelihood that a participant becomes a project leader. Project leaders are key actors in a virtual community and are crucial to the success of the OSS model. Knowledge of the forces that lead to the emergence of project managers among the multitude of participants is still limited. We aim to fill this gap in the literature by analyzing the association between the roles played by an individual who is registered with a project, and a set of individual-level and project-level characteristics. In line with the theory of occupational choice elaborated by (Lazear, E.P., 2002. Entrepreneurship. NBER Working Paper No. 9109, Cambridge, Mass; Lazear, E.P., 2004. Balanced skills and entrepreneurship, American Economic Review 94, pp. 208-211), we find that OSS project leaders possess diversified skill sets which are needed to select the inputs provided by various participants, motivate contributors, and coordinate their efforts. Specialists, like pure developers, are endowed with more focused skill sets. Moreover, we find that the degree of modularity of the development process is positively associated with the presence of project leaders. That result is consistent with the modern theory of modular production (Baldwin, C.Y., Clark, K.B., 1997. Managing in an age of modularity. Harvard Business Review September-October. pp. 84-93; Mateos-Garcia, J., Steinmueller, W.E., 2003. The Open Source Way of Working: A New Paradigm for the Division of Labour in Software Development? SPRU - Science and Technology Policy Studies. Open Source Movement Research INK Working Paper, No. 1; Aoki, M., 2004. An organizational architecture of T-form: Silicon Valley clustering and its institutional coherence. Industrial and Corporate Change 13, pp. 967-981).}, keywords = {contributors, Human capital, leadership, roles, sourceforge, team}, issn = {0167-6245}, doi = {DOI: 10.1016/j.infoecopol.2008.06.002}, url = {http://www.sciencedirect.com/science/article/B6V8J-4SRW10C-1/2/5ce36096ba3947338962268b54a5a7a9}, author = {Paola Giuri and Francesco Rullani and Salvatore Torrisi} } @conference {997, title = {Mining CVS Repositories to Understand Open-Source Project Developer Roles}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {8 - 8}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {This paper presents a model to represent the interactions of distributed open-source software developers and utilizes data mining techniques to derive developer roles. The model is then applied on case studies of two open-source projects, ORAC-DR and Mediawiki with encouraging results.}, keywords = {cvs, developer interaction, developers, mediawiki, orac-dr, roles, scm, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.19}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300008.pdf}, author = {Yu, Liguo and Ramaswamy, Srini} } @conference {German:2006:SCP:1137983.1138022, title = {A study of the contributors of PostgreSQL}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {163{\textendash}164}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {This report describes some characteristics of the development team of PostgreSQL that were uncovered by analyzing the history of its software artifacts as recorded by the project{\textquoteright}s CVS repository.}, keywords = {contributions, contributors, cvs, developers, mining challenge, mining software repositories, msr challenge, patches, postgresql, revision history, roles, software evolution, source code, team}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138022}, url = {http://doi.acm.org/10.1145/1137983.1138022}, attachments = {https://flosshub.org/sites/flosshub.org/files/163AStudyOf.pdf}, author = {Daniel M. German} } @conference {1090, title = {A Topological Analysis of the Open Souce Software Development Community}, booktitle = {Proceedings of the 38th Annual Hawaii International Conference on System Sciences}, year = {2006}, note = {"We extracted data from a 2003 data dump obtained from SourceForge. " roles}, pages = {1-10}, publisher = {IEEE}, organization = {IEEE}, address = {Big Island, HI, USA}, abstract = {The fast growth of OSS has increased the interest in studying the composition of the OSS community and its collaboration mechanisms. Moreover, the success of a project may be related to the underlying social structure of the OSS development community. In this paper, we perform a quantitative analysis of Open Source Software developers by studying the entire development community at SourceForge [26]. Statistics and social network properties are explored to find collaborations and the effects of different members in the OSS development community. Small world phenomenon and scale free behaviors are found in the SourceForge development network. These topological properties may potentially explain the success and efficiency of OSS development practices. We also infer from our analysis that weakly associated but contributing co-developers and active users may be an important factor in OSS development.}, keywords = {contributors, developers, roles, social network analysis, social networks, sourceforge, srda, users}, doi = {10.1109/HICSS.2005.57}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.132.6830\&rep=rep1\&type=pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/xuGao.pdf}, author = {Jin Xu and Gao, Yongqin and Christley, S. and Madey, G.} } @article {146, title = {Community, joining, and specialization in open source software innovation: a case study}, journal = {Research Policy}, volume = {32}, number = {7}, year = {2003}, note = {first, telephone interviews "Secondly, we collected the project{\textquoteright}s public email conversations stored in the projects{\textquoteright} mailing lists which is archived on Freenet{\textquoteright}s website" "The third source of data included the history of changes to the software code available via the project{\textquoteright}s software repository within the CVS ({\textquoteleft}Concurrent Versioning System{\textquoteright}) source code management tool" "Fourthly, in order obtain contextual understanding of the project we collected publicly available documents related to open source in general and to the project in particular. Among the most important sources were the Freenet project web pages (e.g. the Frequently Asked Questions (FAQ)7), Ian Clarke{\textquoteright}s master thesis (1999), newspaper interviews with the core developers, and a technical paper (Clarke, Sandberg, Wiley, \& Hong, 2000) describing the Freenet project written by some of the developers."}, pages = {1217-1241}, abstract = {This paper develops an inductive theory of the open source software innovation process by focussing on the creation of Freenet, a project aimed at developing a decentralized and anonymous peer-to-peer electronic file sharing network. We are particularly interested in the strategies and processes by which new people join the existing community of software developers, and how they initially contribute code. Analyzing data from multiple sources on the Freenet software development process, we generate the constructs of "joining script", We are grateful to helpful comments from two anonymous reviewers. We also thank Chris Argyris, John Seely Brown, Eric von Hippel, Stefan Haefliger, Petra Kugler, Heike Bruch, Simon Gchter, Simon Peck, and Hari Tsoukas for helpful comments and suggestions. Ben Ho and Craig Lebowitz provided technical assistance with data importation and parsing. We would like to thank Ian Clarke and the Freenet developers for their willingness to participate in our study and providing key insights into the open source development process. Karim R. Lakhani would like to acknowledge the generous support of The Boston Consulting Group and Canada{\textquoteright}s Social Science and Humanities Research Council doctoral fellowship. Georg von Krogh and Sebastian Spaeth acknowledge the generous support from the Research Foundation at the University of St. Gallen.}, keywords = {cvs, email, email archives, freenet, INNOVATION, mailing lists, roles, source code}, doi = {http://dx.doi.org/10.1016/S0048-7333(03)00050-7}, attachments = {https://flosshub.org/sites/flosshub.org/files/krogh03.pdf}, author = {Georg von Krogh and Spaeth, S. and Karim R Lakhani} } @conference {Wynn03organizationalstructure, title = {Organizational Structure of Open Source Projects: A Life Cycle Approach}, booktitle = {Proceedings of 7th Annual Conference of the Southern Association for Information Systems}, year = {2003}, note = {"The three graphs in Figure 2 below were taken from smoothed download counts for existing open source projects on Sourceforge.net" "A random sample of 150 open source projects will be taken from data provided by Sourceforge.net. Each project will be evaluated to determine their current life cycle stage (where possible) using download counts. Next, the project admins, developers, and several identifiable users for each evaluated project will be contacted via email to request completing a brief questionnaire to measure the current focus of the project, formal structure, division of labor, leader role, coordination, level of commitment, user success, and developer success. "}, abstract = {The structure of open source project communities is discussed in relation to the organizational life cycle. In lieu of sales figures, the download counts for each project are used to identify the life cycle stage of a random sample of open source projects. A research model is proposed that attempts to measure the fit between the life cycle stage and the specific organizational characteristics of these projects (focus, division of labor, role of the leader, level of commitment, and coordination/control) as an indicator of the success of a project as measured by the satisfaction and involvement of both developers and users.}, keywords = {division of labor, downloads, growth, interview, leadership, life cycle, lifecycle, project success, roles, sourceforge, Survey}, attachments = {https://flosshub.org/sites/flosshub.org/files/wynn2004.pdf}, author = {Donald E. Wynn} } @conference {Ye:2003:TUM:776816.776867, title = {Toward an understanding of the motivation Open Source Software developers}, booktitle = {Proceedings of the 25th International Conference on Software Engineering}, series = {ICSE {\textquoteright}03}, year = {2003}, note = {"Analyzing the emails sent to the mailing fist is one way of understanding the structure of the community." "Table 2 displays the number of code contributions made by members to the GIMP system and the defined roles of those contributing members. We counted the number of contributions made by each person by analyzing the change log of the system."}, pages = {419{\textendash}429}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {An Open Source Software (OSS) project is unlikely to be successful unless there is an accompanied community that provides the platform for developers and users to collaborate. Members of such communities are volunteers whose motivation to participate and contribute is of essential importance to the success of OSS projects. In this paper, we aim to create an understanding of what motivates people to participate in OSS communities. We theorize that learning is one of the motivational forces. Our theory is grounded in the learning theory of Legitimate Peripheral Participation, and is supported by analyzing the social structure of OSS communities and the co-evolution between OSS systems and communities. We also discuss practical implications of our theory for creating and maintaining sustainable OSS communities as well as for software engineering research and education.}, keywords = {change log, COMMUNITY, contributions, contributors, developers, email, email archives, evolution, gimp, log files, mailing list, roles, source code}, isbn = {0-7695-1877-X}, url = {http://portal.acm.org/citation.cfm?id=776816.776867}, attachments = {https://flosshub.org/sites/flosshub.org/files/YeKishida.pdf}, author = {Ye, Yunwen and Kishida, Kouichi} } @conference {stewart2002an-explorat, title = {An Exploratory Study of Factors Influencing the Level of Vitality and Popularity of Open Source Projects}, booktitle = {ICIS 2002. Proceedings of International Conference on Information Systems 2002}, year = {2002}, note = {"We are currently tracking publicly available data on 240 open source projects registered on the freshmeat Website." "First, we randomly selected a total of 120 projects from the utilities, software development, and games and entertainment areas. We then selected 120 projects from these forums that had been registered on the site during the two weeks prior to the start of our data collection effort."}, month = {2002}, pages = {1-5}, abstract = {In this research, we ask the question: What differentiates successful from unsuccessful open source software projects? Using a sample of 240 open source projects, we examine how organizational sponsorship, target audience (developer versus end user), license choice, and development status interact over time to influence the extent to which open source software projects attract user attention and developer activity.}, keywords = {activity, audience, developers, freshmeat, license analysis, licenses, organizational sponsorship, project success, roles, status, target audience, users}, author = {Stewart, Katherine J. and Ammeter, Tony} } @article {flosswp26, title = {A Case Study of Open Source Software Development: The Apache Server}, journal = {Proceedings of the International Conference on Software Engineering (ICSE 2000)}, year = {2000}, note = {We used the following archival sources of data: Developer email list (EMAIL). Concurrent Version Control archive (CVS). Problem reporting database (BUGDB).}, month = {June}, abstract = {According to its proponents, open source style software development has the capacity to compete successfully, and perhaps in many cases displace, traditional commercial development methods. We examine the development process of a major open source application, the Apache web server. By using email archives of source code change history and problem reports we quantify aspects of developer participation, core team size, code ownership, productivity, defect density, and problem resolution interval for this OSS project. This analysis reveals a unique process, which performs well on important measures.}, keywords = {apache, bug fix revisions, bugs, core, cvs, defect density, developers, email archives, participation, productivity, revision control, revision history, roles, scm, source code, team size}, attachments = {https://flosshub.org/sites/flosshub.org/files/mockusapache.pdf}, author = {Audris Mockus and Roy Fielding and Herbsleb, James} }