@proceedings {1896, title = {Addressing Lock-in, Interoperability, and Long-Term Maintenance Challenges Through Open Source: How Can Companies Strategically Use Open Source?}, volume = {496}, year = {2017}, month = {05/2017}, pages = {80-88}, publisher = {Springer}, abstract = {This industry paper reports on how strategic use of open source in company contexts can provide effective support for addressing the fundamental challenges of lock-in, interoperability, and longevity of software and associated digital assets. The fundamental challenges and an overview of an ongoing collaborative research project are presented. Through a conceptual model for open source usage in company contexts we characterise how companies engage with open source and elaborate on how the fundamental challenges can be effectively addressed through open source usage in company contexts. }, doi = {10.1007/978-3-319-57735-7_9}, url = {https://link.springer.com/chapter/10.1007/978-3-319-57735-7_9}, author = {Lundell, Bj{\"o}rn and Gamalielsson, Jonas and Stefan Tengblad and Bahram Hooshyar Yousefi and Thomas Fischer and Gert Johansson and Bengt Rodung and Mattsson, Anders and Johan Oppmark and Gustavsson, Tomas and Feist, Jonas and Stefan Landemoo and Erik L{\"o}nroth} } @article {Kazuhiro Yamashita2016, title = {Magnet or Sticky? Measuring Project Characteristics from the Perspective of Developer Attraction and Retention}, journal = {Journal of Information Processing}, volume = {24}, number = {2}, year = {2016}, pages = {339-348}, abstract = {Open Source Software (OSS) is vital to both end users and enterprises. As OSS systems are becoming a type of infrastructure, long-term OSS projects are desired. For the survival of OSS projects, the projects need to not only retain existing developers, but also attract new developers to grow. To better understand how projects retain and attract contributors, our preliminary study aimed to measure the personnel attraction and retention of OSS projects using a pair of population migration metrics, called Magnet (personnel attraction) and Sticky (retention) metrics. Because the preliminary study analyzed only 90 projects and the 90 projects are not representative of GitHub, this paper extend the preliminary study to better understand the generalizability of the results by analyzing 16, 552 projects of GitHub. Furthermore, we also add a pilot study to investigate the typical duration between releases to find more appropriate release duration. The study results show that (1) approximately 23\% of developers remain in the same projects that the developers contribute to, (2) the larger projects are likely to attract and retain more developers, (3) 53\% of terminal projects eventually decay to a state of fewer than ten developers and (4) 55\% of attractive projects remain in an attractive category. }, keywords = {github, retention}, doi = {10.2197/ipsjjip.24.339}, url = {https://www.jstage.jst.go.jp/article/ipsjjip/24/2/24_339/_article}, author = {Yamashita, Kazuhiro and Kamei, Yasutaka and McIntosh, Shane and Hassan, Ahmed E. and Ubayashi, Naoyasu} } @proceedings {1759, title = {An Architectural Evolution Dataset}, year = {2015}, month = {05/2015}, publisher = {IEEE}, abstract = {A good evolution process and a good architecture can greatly support the maintainability of long-lived, large software systems. We present AREVOL, a dataset for the empirical study of architectural evolution. The dataset comprises two popular systems from the same domain and using the same component model, to make comparative studies possible. Besides the original component metadata, AREVOL includes scripts to obtain simplified models that nevertheless support rich studies of architectural evolution, as the authors{\textquoteright} previous work has shown.}, url = {http://oro.open.ac.uk/42318/1/wermelinger15msr.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/wermelinger15msr.pdf}, author = {Wermelinger, Michel and Yu, Yijun} } @proceedings {1763, title = {A Dataset of High Impact Bugs: Manually-Classified Issue Reports}, year = {2015}, month = {05/2015}, publisher = {IEEE}, abstract = {The importance of supporting test and maintenance activities in software development has been increasing, since recent software systems have become large and complex. Although in the field of Mining Software Repositories (MSR) there are many promising approaches to predicting, localizing, and triaging bugs, most of them do not consider impacts of each bug on users and developers but rather treat all bugs with equal weighting, excepting a few studies on high impact bugs including security, performance, blocking, and so forth. To make MSR techniques more actionable and effective in practice, we need deeper understandings of high impact bugs. In this paper we introduced our dataset of high impact bugs which was created by manually reviewing four thousand issue reports in four open source projects (Ambari, Camel, Derby and Wicket).}, keywords = {ambari, camel, derby, wicket}, url = {http://oss.sys.wakayama-u.ac.jp/publications/pman3.cgi?DOWNLOAD=141}, attachments = {https://flosshub.org/sites/flosshub.org/files/5594a518.pdf}, author = {Ohira, Masao and Yutaro Kashiwa and Yosuke Yamatani and Hayato Yoshiyuki and Yoshiya Maeda and Nachai Limsettho and Keisuke Fujino and Hata, Hideaki and Ihara, Akinori and Kenichi Matsumoto} } @conference {1859, title = {OSSEAN: Mining Crowd Wisdom in Open Source Communities}, booktitle = {2015 IEEE Symposium on Service-Oriented System Engineering (SOSE)}, year = {2015}, pages = {367 - 371}, publisher = {IEEE}, organization = {IEEE}, address = {San Francisco Bay, CA, USA}, abstract = {Nowadays open source software represents a successful crowd-based software production model and is becoming an ecosystem combining huge amounts of software producers (such as software developers) and consumers (such as software users and customers). Lots of research work has been conducted on analyzing software artifacts created by producers, but few of them reveal the power of feedback from consumers which we believe is very important for the evaluation and evolution of open source software. This paper introduces OSSEAN, a platform for Open Source Software Evaluating, Analyzing and Networking. OSSEAN divides the open source communities into two groups: software production communities and software consumption communities. The former contain structured software artifacts such as projects, source code and issues, while the latter are full of textual documents with rich semantics of user feedback. We show the power of OSSEAN with some interesting demos by analyzing more than 200 thousands of open source projects and 10 million documents.}, keywords = {flossmole}, doi = {10.1109/SOSE.2015.51}, author = {Yin, Gang and Wang, Tao and Wang, Huaimin and Fan, Qiang and Zhang, Yang and Yu, Yue and Yang, Cheng} } @proceedings {1766, title = {Wait For It: Determinants of Pull Request Evaluation Latency on GitHub}, year = {2015}, month = {05/2015}, publisher = {IEEE}, abstract = {The pull-based development model, enabled by git and popularised by collaborative coding platforms like BitBucket, Gitorius, and GitHub, is widely used in distributed software teams. While this model lowers the barrier to entry for potential contributors (since anyone can submit pull requests to any repository), it also increases the burden on integrators (i.e., members of a project{\textquoteright}s core team, responsible for evaluating the proposed changes and integrating them into the main development line), who struggle to keep up with the volume of incoming pull requests. In this paper we report on a quantitative study that tries to resolve which factors affect pull request evaluation latency in GitHub. Using regression modeling on data extracted from a sample of GitHub projects using the Travis-CI continuous integration service, we find that latency is a complex issue, requiring many independent variables to explain adequately.}, url = {https://bvasiles.github.io/papers/msr15.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr15.pdf}, author = {Yu, Yue and Wang, Huaimin and Filkov, Vladimir and Devanbu, Premkumar and Vasilescu, Bogdan} } @conference {Zhang:2014:ISM:2666539.2666572, title = {Investigating Social Media in GitHub{\textquoteright}s Pull-requests: A Case Study on Ruby on Rails}, booktitle = {Proceedings of the 1st International Workshop on Crowd-based Software Development Methods and Technologies}, series = {CrowdSoft 2014}, year = {2014}, pages = {37{\textendash}41}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In GitHub, pull-request mechanism is an outstanding social development method by integrating with many social media. Many studies have explored that social media has an important effect on software development. @-mention as a typical social media, is a useful tool in social platform. In this paper, we made a quantitative analysis of @-mention in pull-requests of the project Ruby on Rails. First, we make a convictive statistics of the popularity of pull-request mechanism in GitHub. Then we investigate the current situation of @-mention in the Ruby on Rails. Our empirical analysis results find some insights of @-mention. }, keywords = {@-mention, github, pull-request, social media}, isbn = {978-1-4503-3224-8}, doi = {10.1145/2666539.2666572}, url = {http://doi.acm.org/10.1145/2666539.2666572}, author = {Zhang, Yang and Yin, Gang and Yu, Yue and Wang, Huaimin} } @conference {Yamashita:2014:MSO:2597073.2597116, title = {Magnet or Sticky? An OSS Project-by-project Typology}, booktitle = {Proceedings of the 11th Working Conference on Mining Software Repositories}, series = {MSR 2014}, year = {2014}, pages = {344{\textendash}347}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {For Open Source Software (OSS) projects, retaining existing contributors and attracting new ones is a major concern. In this paper, we expand and adapt a pair of population migration metrics to analyze migration trends in a collection of open source projects. Namely, we study: (1) project stickiness, i.e., its tendency to retain existing contributors and (2) project magnetism, i.e., its tendency to attract new contributors. Using quadrant plots, we classify projects as attractive (highly magnetic and sticky), stagnant (highly sticky, weakly magnetic), fluctuating (highly magnetic, weakly sticky), or terminal (weakly magnetic and sticky). Through analysis of the MSR challenge dataset, we find that: (1) quadrant plots can effectively identify at-risk projects, (2) stickiness is often motivated by professional activity and (3) transitions among quadrants as a project ages often coincides with interesting events in the evolution history of a project. }, keywords = {Developer migration, Magnet, mining challenge, msr challenge, open source, Sticky}, isbn = {978-1-4503-2863-0}, doi = {10.1145/2597073.2597116}, url = {http://doi.acm.org/10.1145/2597073.2597116}, attachments = {https://flosshub.org/sites/flosshub.org/files/yamashita.pdf}, author = {Yamashita, Kazuhiro and McIntosh, Shane and Kamei, Yasutaka and Ubayashi, Naoyasu} } @conference {1687, title = {Recommending relevant projects via user behaviour: an exploratory study on github}, booktitle = {Proceedings of the 1st International Workshop on Crowd-based Software Development Methods and Technologies - CrowdSoft 2014}, year = {2014}, pages = {25 - 30}, publisher = {ACM Press}, organization = {ACM Press}, address = {Hong Kong, ChinaNew York, New York, USA}, abstract = {Social coding sites (e.g., Github) provide various features like Forking and Sending Pull-requests to support crowd-based software engineering. When using these features, a large amount of user behavior data is recorded. User behavior data can reflect developers preferences and interests in software development activities. Online service providers in many fields have been using user behavior data to discover user preferences and interests to achieve various purposes. In the field of software engineering however, there has been few studies in mining large amount of user behavior data. Our goal is to design an approach based on user behavior data, to recommend relevant open source projects to developers, which can be helpful in activities like searching for the right open source solutions to quickly build prototypes. In this paper, we explore the possibilities of such a method by conducting a set of experiments on selected data sets from Github. We find it a promising direction in mining projects{\textquoteright} relevance from user behavior data. Our study also obtain some important issues that is worth considering in this method.}, isbn = {9781450332248}, doi = {10.1145/2666539.2666570}, url = {http://dl.acm.org/citation.cfm?id=2666570}, author = {Zhang, Lingxiao and Yanzhen Zou and Bing Xie and Zixiao Zhu} } @proceedings {1514, title = {INVocD: Identifier Name Vocabulary Dataset }, year = {2013}, month = {05/2013}, abstract = {INVocD is a database of the identifier name declarations and vocabulary found in 60 FLOSS Java projects where the source code structure is recorded and the identifier name vocabulary is made directly available, offering advantages for identifier name research over conventional source code models. The database has been used to support a range of research projects from identifier name analysis to concept location, and provides many opportunities to researchers. INVocD may be downloaded from http://oro.open.ac.uk/36992}, author = {Simon Butler and Wermelinger, Michel and Yu, Yijun and Helen Sharp} } @article {1556, title = {Research on mining the online co mmunity: a case of Open Source Software community }, journal = {WSEAS Transactions on Computers}, volume = {12}, year = {2013}, month = {06/2013}, pages = {233-242}, abstract = {The development of Open Source Software (OSS) projects is a process of collective innovation in the environment of online community. The paper addresses the challenge of efficiently mining data from OSS web repositories and building models to study OSS community features. Data collection for OSS community study is nontrivial since most OSS projects are develope d by distributed developers using web tools. We design a mining process which combines web mining and database mining together to identify, extract, filter and analyze data. We address and analyze the difficulty of mining OSS community data. Our work provides a general solution for researchers to implement advanced techniques, such as web mining, data mining, statistics, and algorithms to collect and analyze online community data. }, attachments = {https://flosshub.org/sites/flosshub.org/files/5705-185.pdf}, author = {Yuo, Lan} } @proceedings {1498, title = {Who Does What during a Code Review? Datasets of OSS Peer Review Repositories }, year = {2013}, month = {05/2013}, abstract = {We present four datasets that are focused on the general roles of OSS peer review members. With data mined from both an integrated peer review system and code source repositories, our rich datasets comprise of peer review data that was automatically recorded. Using the Android project as a case study, we describe our extraction methodology, the datasets and their application used for three separate studies. Our datasets are available online at http://sdlab.naist.jp/reviewmining/}, keywords = {android, case study, code review, data set, peer review, roles, source code}, author = {Kazuki Hamasaki and Raula Gaikovina Kula and Norihiro Yoshida and A. E. Camargo Cruz and Kenji Fujiwara and Hajimu Iida} } @conference {1680, title = {Developing an h-index for OSS developers}, booktitle = {2012 9th IEEE Working Conference on Mining Software Repositories (MSR)}, year = {2012}, pages = {251 - 254}, publisher = {IEEE}, organization = {IEEE}, address = {Zurich}, abstract = {The public data available in Open Source Software (OSS) repositories has been used for many practical reasons: detecting community structures; identifying key roles among developers; understanding software quality; predicting the arousal of bugs in large OSS systems, and so on; but also to formulate and validate new metrics and proof-of-concepts on general, non-OSS specific, software engineering aspects. One of the results that has not emerged yet from the analysis of OSS repositories is how to help the {\textquotedblleft}career advancement{\textquotedblright} of developers: given the available data on products and processes used in OSS development, it should be possible to produce measurements to identify and describe a developer, that could be used externally as a measure of recognition and experience. This paper builds on top of the h-index, used in academic contexts, and which is used to determine the recognition of a researcher among her peers. By creating similar indices for OSS (or any) developers, this work could help defining a baseline for measuring and comparing the contributions of OSS developers in an objective, open and reproducible way.}, isbn = {978-1-4673-1760-3}, doi = {10.1109/MSR.2012.6224288}, attachments = {https://flosshub.org/sites/flosshub.org/files/MSR2012.pdf}, author = {A. Capiluppi and Serebrenik, A. and Youssef, A.} } @conference {1316, title = {An Empirical Study of Volunteer Members{\textquoteright} Perceived Turnover in Open Source Software Projects}, booktitle = {45th Hawai{\textquoteright}i International Conference on System Sciences}, year = {2012}, note = {"After designing the questionnaire, we conducted a web-based survey by inviting developers working in sourceforge.net and launchpad.net."}, month = {01/2012}, pages = {3396-3405}, abstract = {Turnover of volunteer members and the ensuing instability bring about severe problems to open source software (OSS) projects. To better understand it, we based our study on Herzberg ́s two-factor theory to investigate the influence of hygiene factors on volunteer members ́ dissatisfaction and perceived turnover. After empirically testing the research model, we found shortcomings in project regulation and administration are the key reason for volunteer members ́ dissatisfaction, followed by future rewards and personal needs for software functionalities. By contrast, a possible lack of supportive working relationship among OSS developers was not found to be a trigger for developer dissatisfaction. Dissatisfaction was confirmed to be a significant predictor of perceived turnover. The results demonstrates generalized hygiene factors cannot unreflectively be transferred into the OSS context because volunteer members ́ personal expectation has a weaker influence on perceived turnover than objective attributes of OSS project. Our study further makes suggestions for project administrators.}, keywords = {developers, launchpad, sourceforge, Survey}, author = {Yu, Yiqing and Benlian, Alexander and Hess, Thomas} } @proceedings {1442, title = {Step-by-Step Strategies and Case Studies for Embedded Software Companies to Adapt to the FOSS Ecosystem}, volume = {378}, year = {2012}, month = {09/2012}, pages = {48-60}, abstract = {Due to the continuous expansion of the FOSS ecosystem and the introduction of high-quality FOSS, FOSS is increasingly used in consumer electronics (CE) such as smartphones, televisions, and cameras. As a result, manufacturers of CE products have developed a close relationship with the FOSS ecosystem. For CE product manufacturers, efficient adaptation to the FOSS ecosystem has become an essential component in their business operations. This paper will divide the activities of CE product manufacturers within the FOSS ecosystem into the following four levels: identification, adoption, compliance, and contribution. It will examine the main activities and obstacles that arise in each level. The paper will also present instances where companies have succeeded in overcoming these obstacles.}, keywords = {embedded software, FOSS ecosystem}, author = {Kim, Suhyun and Yoo, Jaehyun and Lee, Myunghwa} } @proceedings {1300, title = {The Third Generation of OSS: A Three-Stage Evolution from Gift to Commerce-Economy}, year = {2011}, month = {10/2011}, pages = {368-378}, publisher = {Springer}, abstract = {Linux is penetrating into mobile software as the basis for a mobile middleware platform. It is accelerating the increasing visibility of open source software (OSS) components in mobile middleware platforms. Considering the 10-million lines of code of OSS-based industrial platforms such as a mobile middleware platform, engagement in foundations is inevitable for large-scale packages of OSS for industrial solutions. The author discusses the driving factors toward a foundation-based OSS and the transition of the underlying economy types to analyze the transitions to the third-generation OSS.}, author = {Yamakami, Toshihiko} } @conference {959, title = {Finding file clones in FreeBSD Ports Collection}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {102 - 105}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {In Open Source System (OSS) development, software components are often imported and reused; for this reason we might expect that files are copied in multiple projects (file clones). In this paper, we propose a file clone detection tool called FCFinder and show the analysis performed with it on the FreeBSD Ports Collection, a large OSS project collection. We found many file clones among similar or related projects, which are systematically introduced from base projects.}, keywords = {clone, freebsd, msr challenge, source code}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463293}, attachments = {https://flosshub.org/sites/flosshub.org/files/102FreeBSDClones.pdf}, author = {Sasaki, Yusuke and Yamamoto, Tetsuo and Hayase, Yasuhiro and Inoue, Katsuro} } @article {1336, title = {Open Source Software Systems}, journal = {International Journal of Open Source Software and Processes}, volume = {2}, year = {2010}, pages = {28 - 47}, abstract = {Complex software systems and the huge amounts of data they produce are becoming an integral part of our organizations. We are also becoming increasingly dependent on high quality software products in our everyday lives. These systems {\textquoteleft}evolve{\textquoteright} as we identify and correct existing defects, provide new functionalities, or increase their nonfunctional qualities - such as security, maintainability, performance, etc. Simultaneously, more software development projects are distributed over multiple locations (often globally) and are often several millions of dollars in development costs. Consequently, as the Internet continually eliminates geographic boundaries, the concept of doing business within a single country has given way to companies focusing on competing in an international marketplace. The digitalization of work and the reorganization of work processes across many organizations have resulted in routine and/or commodity components being outsourced.}, issn = {1942-3934}, doi = {10.4018/IJOSSP.2010100103}, author = {Lenin, R. B. and Ramaswamy, S. and Yu, Liguo and Govindan, R. B.} } @article {1348, title = {An Agent Simulation Study on Conflict, Community Climate and Innovation in Open Source Communities}, journal = {International Journal of Open Source Software and Processes}, volume = {1}, year = {2009}, pages = {1 - 25}, abstract = {More than ever the complexity of innovation requires group efforts, as teams of scientists and engineers from diverse backgrounds work together to solve problems. One of the significant problems in understanding emergence of innovation involves how virtual innovation organizations and communities govern and coordinate to maximize innovation output. An agent simulation study is conducted to examine the impact of culture and conflict management styles on collective creativity in open source innovation systems. Findings suggest that decentralized coordination schemes such as emergent selection such as found in utility communities and moderate degrees of assertiveness and cooperation for conflict management result in higher incidence of innovation.}, issn = {1942-3934}, doi = {10.4018/jossp.2009100101}, author = {Yilmaz, Levent} } @conference {Yatani:2009:UWO:1518701.1518853, title = {Understanding how and why open source contributors use diagrams in the development of Ubuntu}, booktitle = {Proceedings of the 27th international conference on Human factors in computing systems}, series = {CHI {\textquoteright}09}, year = {2009}, note = {"To examine how and why diagrams are used in any and all aspects of the software development process of an OSS project, we performed a series of semi-structured interviews with contributors to one particular effort{\textemdash}Ubuntu." "Our study was divided into two phases. First, we asked participants to complete a questionnaire and provide us with information and materials for discussion. The questionnaire featured questions about participants{\textquoteright} OSS experience, project participation, their roles in each project, and basic demographics. We also asked participants to share diagrams they had created, modified or used as part of their work on Ubuntu. In the second phase of the study, we conducted semi-structured interviews with participants."}, pages = {995{\textendash}1004}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Some of the most interesting differences between Open Source Software (OSS) development and commercial co-located software development lie in the communication and collaboration practices of these two groups of developers. One interesting practice is that of diagramming. Though well studied and important in many aspects of co-located software development (including communication and collaboration among developers), its role in OSS development has not been thoroughly studied. In this paper, we report our investigation on how and why Ubuntu contributors use diagrams in their work. Our study shows that diagrams are not actively used in many scenarios where they commonly would in co-located software development efforts. We describe differences in the use and practices of diagramming, their possible reasons, and present design considerations for potential systems aimed at better supporting diagram use in OSS development.}, keywords = {developers, diagramming, interviews, open source software (oss), software development, Ubuntu, visual representation}, isbn = {978-1-60558-246-7}, doi = {http://doi.acm.org/10.1145/1518701.1518853}, url = {http://doi.acm.org/10.1145/1518701.1518853}, author = {Yatani, Koji and Chung, Eunyoung and Jensen, Carlos and Truong, Khai N.} } @conference {Wermelinger:2008:AEE:1370750.1370783, title = {Analyzing the evolution of eclipse plugins}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, pages = {133{\textendash}136}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Eclipse is a good example of a modern component-based complex system that is designed for long-term evolution, due to its architecture of reusable and extensible components. This paper presents our preliminary results about the evolution of Eclipse{\textquoteright}s architecture, based on a lightweight and scalable analysis of the metadata in Eclipse{\textquoteright}s sources. We find that the development of Eclipse follows a systematic process: most architectural changes take place in milestones, and maintenance releases only make exceptional changes to component dependencies. We also found a stable architectural core that remains since the first release.}, keywords = {architectural evolution, cvs, eclipse, metadata, msr challenge, releases, source code}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370783}, url = {http://doi.acm.org/10.1145/1370750.1370783}, author = {Wermelinger, Michel and Yu, Yijun} } @article {147, title = {The business of open source}, journal = {Communications of the ACM}, volume = {51}, number = {4}, year = {2008}, note = {Times Cited: 0}, pages = {41-46}, author = {Watson, R. T. and Boudreau, M. C. and York, P. T. and Greiner, M. E. and Wynn, D.} } @article {1392, title = {Self-organization process in open-source software: An empirical study}, journal = {Information and Software Technology}, volume = {50}, year = {2008}, month = {4/2008}, pages = {361 - 374}, abstract = {Software systems must continually evolve to adapt to new functional requirements or quality requirements to remain competitive in the marketplace. However, different software systems follow different strategies to evolve, affecting both the release plan and the quality of these systems. In this paper, software evolution is considered as a self-organization process and the difference between closed-source software and open-source software is discussed in terms of self-organization. In particular, an empirical study of the evolution of Linux from version 2.4.0 to version 2.6.13 is reported. The study shows how open-source software systems self-organize to adapt to functional requirements and quality requirements.}, keywords = {Empirical study;, evolution, linux, requirements, Self-organization, software evolution}, issn = {09505849}, doi = {10.1016/j.infsof.2007.02.018}, url = {http://www.sciencedirect.com/science/article/pii/S0950584907000225}, author = {Yu, Liguo} } @conference {997, title = {Mining CVS Repositories to Understand Open-Source Project Developer Roles}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {8 - 8}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {This paper presents a model to represent the interactions of distributed open-source software developers and utilizes data mining techniques to derive developer roles. The model is then applied on case studies of two open-source projects, ORAC-DR and Mediawiki with encouraging results.}, keywords = {cvs, developer interaction, developers, mediawiki, orac-dr, roles, scm, source code}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.19}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300008.pdf}, author = {Yu, Liguo and Ramaswamy, Srini} } @conference {884, title = {Recommenders Everywhere: The WikiLens Community-Maintained Recommender System}, booktitle = {Wikisym 2007}, year = {2007}, address = {Montreal, Quebec, Canada}, isbn = {978-1-59593-861-9}, author = {Dan Frankowski and Shyong K. Lam and Shilad Sen and Harper, F. Maxwell and Scott Yilek and Michael Cassano and John Riedl} } @article {flosswp322, title = {Developer Learning Dynamics in Open Source Software Projects: A Hidden Markov Model Analysis}, year = {2006}, month = {December}, abstract = {This work proposes a dynamic model of developer learning in open source software (OSS) projects. A Hidden Markov Model (HMM) is proposed to explain how the code contribution behaviors of OSS de-velopers change as their levels of knowledge on their projects increase. In this model, discrete hidden states represent the unobserved knowledge levels of developers, and their observed code contribution be-haviors are modeled as state dependent. Developers??? knowledge levels evolve as they learn about the pro-jects over time. Two modes of learning are considered: learning-by-doing (code development) and learn-ing through interactions with peers. The model is calibrated using data spanning six years for 25 OSS pro-jects and 251 developers hosted at Sourceforge. The proposed model identifies three knowledge states (high, medium, and low) and estimates the impact of the two modes of learning on the transition of devel-opers between the three knowledge states. The model results suggest that in the low knowledge state de-velopers exhibit the greatest inertia, followed by those in the medium and high states. Both modes of learning are found to have varying impact across the three knowledge states. Interactions with peers ap-pear to be an important source of learning for developers in all states. A developer in the low state learns only through participation in threads started by others. Prior code contribution and starting discussion by initiating threads do not impact the knowledge level of a developer in the low state. Initiating threads, par-ticipating in threads started by others, and prior code contributions have positive impacts on the knowl-edge level of a developer in the medium or high state and, hence, influence his long term code contribu-tion behavior. Explanations for these varying impacts of learning activities on the transitions of develop-ers between the three states are provided. We also find a lack of persistence of knowledge in all states. The HMM better describes the data than a latent class model which would suggests that the learning ac-tivities have a long term, dynamic impact, rather than an immediate, static impact on the code contribu-tion behavior of a developer.}, attachments = {https://flosshub.org/sites/flosshub.org/files/singh-youn-tan.pdf}, author = {Param Vir Singh and Nara Youn and Yong Tan} } @article {Yu:2006:MKO:1150566.1150571, title = {Maintainability of the kernels of open-source operating systems: A comparison of Linux with FreeBSD, NetBSD, and OpenBSD}, journal = {J. Syst. Softw.}, volume = {79}, year = {2006}, note = {"Data regarding the number and total number of lines of code of kernel and nonkernel modules in the four operating systems are provided in Table 1" loc, kloc, number of kernel modules, number of nonkernel modules size c files .h files}, month = {June}, pages = {807{\textendash}815}, publisher = {Elsevier Science Inc.}, address = {New York, NY, USA}, abstract = {We compared and contrasted the maintainability of four open-source operating systems: Linux, FreeBSD, NetBSD, and OpenBSD. We used our categorization of common coupling in kernel-based software to highlight future maintenance problems. An unsafe definition is a definition of a global variable that can affect a kernel module if that definition is changed. For each operating system we determined a number of measures, including the number of global variables, the number of instances of global variables in the kernel and overall, as well as the number of unsafe definitions in the kernel and overall. We also computed the value of each our measures per kernel KLOC and per KLOC overall. For every measure and every ratio, Linux compared unfavorably with FreeBSD, NetBSD, and OpenBSD. Accordingly, we are concerned about the future maintainability of Linux. }, keywords = {abiword, Common coupling, coupling, Definition-use analysis, freebsd, kernel, lines of code, linux, linux kernel, loc, Maintainability, modules, netbsd, Open-source software, openbsd, source code}, issn = {0164-1212}, doi = {http://dx.doi.org/10.1016/j.jss.2005.08.014}, url = {http://dx.doi.org/10.1016/j.jss.2005.08.014}, attachments = {https://flosshub.org/sites/flosshub.org/files/YuSchachChen.pdf}, author = {Yu, Liguo and Schach, Stephen R. and Chen, Kai and Heller, Gillian Z. and Offutt, Jeff} } @conference {Kagdi:2006:MSC:1137983.1137996, title = {Mining sequences of changed-files from version histories}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {47{\textendash}53}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Modern source-control systems, such as Subversion, preserve change-sets of files as atomic commits. However, the specific ordering information in which files were changed is typically not found in these source-code repositories. In this paper, a set of heuristics for grouping change-sets (i.e., log-entries) found in source-code repositories is presented. Given such groups of change-sets, sequences of files that frequently change together are uncovered. This approach not only gives the (unordered) sets of files but supplements them with (partial temporal) ordering information. The technique is demonstrated on a subset of KDE source-code repository. The results show that the approach is able to find sequences of changed-files.}, keywords = {change, change history, change management, change sequences, heuristics, kde, mining software repositories, scm, sequences, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137996}, url = {http://doi.acm.org/10.1145/1137983.1137996}, attachments = {https://flosshub.org/sites/flosshub.org/files/47MiningSequences.pdf}, author = {Kagdi, Huzefa and Yusuf, Shehnaaz and Maletic, Jonathan I.} } @article {1079, title = {Are All Open Source Projects Created Equal? Understanding the Sustainability of Open Source Software Development Model}, journal = {AMCIS 2005 Proceedings}, number = {435}, year = {2005}, note = {uses first 300 most active projects on Sourceforge: {\textquoteright}Our data sample consists of 300 open source software development projects hosted in the Sourceforge.Net. They are the first 300 active projects ranked by Sourceforge.Net....Our dependent variable in the model to measure the success of the projects is the number of downloads. It is an essential variable to show how successful the project is. Generally, more number of downloads means a more successful project. Independent variables include: development status, project lifespan, number of developers, number of messages in the forums, number of mailing list, number of bug report, number of patch report, number of CVS report, number of file releases and also number of news release."}, abstract = {A very intriguing question in Open Source software (OSS) development is: why there are only a few open source projects succeed, while the majority of projects never do. In this research, we examine the factors that may influence the performance of OSS projects. We particularly focus on the OSS{\textquoteright}s core developers{\textquoteright} role in the project{\textquoteright}s success. Extant research has yet to distinguish core developers and non-core developers from the community at large. The different roles of the core developers and non-core developers in OSS projects{\textquoteright} success still remain unclear. Our research contributes to the literature by separating the core developers from the development forces in general and empirically examining the core developers{\textquoteright} importance. Drawing the evidences from our extensive dataset of 300 open source projects, we demonstrated that core developers{\textquoteright} leadership and project advocation are crucial in determining the fate of the OSS projects. Our research could provide better understanding of OSS sustainability. It could also give practical advice to the OSS community on how to make the project successful.}, keywords = {contributors, core, developers, downloads, metadata, project success, sourceforge}, attachments = {https://flosshub.org/sites/flosshub.org/files/LongYuan.pdf}, author = {Long, J. and Yuan, M.J.} } @article {Chen:2004:OCL:990374.990391, title = {Open-Source Change Logs}, journal = {Empirical Softw. Engg.}, volume = {9}, year = {2004}, note = {"We decided to compare actual differences in the source code with entries in the ChangeLog file. We used lxr, the Linux cross-referencing tool..., to determine the precise differences between two successive software versions. We then compared these differences with the records in the ChangeLog file to check the completeness of the ChangeLog file." }, month = {September}, pages = {197{\textendash}210}, publisher = {Kluwer Academic Publishers}, address = {Hingham, MA, USA}, abstract = {A recent editorial in Empirical Software Engineering suggested that open-source software projects offer a great deal of data that can be used for experimentation. These data not only include source code, but also artifacts such as defect reports and update logs. A common type of update log that experimenters may wish to investigate is the ChangeLog, which lists changes and the reasons for which they were made. ChangeLog files are created to support the development of software rather than for the needs of researchers, so questions need to be asked about the limitations of using them to support research. This paper presents evidence that the ChangeLog files provided at three open-source web sites were incomplete. We examined at least three ChangeLog files for each of three different open-source software products, namely, GNUJSP, GCC-g++, and Jikes. We developed a method for counting changes that ensures that, as far as possible, each individual ChangeLog entry is treated as a single change. For each ChangeLog file, we compared the actual changes in the source code to the entries in the ChangeLog file and discovered significant omissions. For example, using our change-counting method, only 35 of the 93 changes in version 1.11 of Jikes appear in the ChangeLog file{\textemdash}that is, over 62\% of the changes were not recorded there. The percentage of omissions we found ranged from 3.7 to 78.6\%. These are significant omissions that should be taken into account when using ChangeLog files for research. Before using ChangeLog files as a basis for research into the development and maintenance of open-source software, experimenters should carefully check for omissions and inaccuracies.}, keywords = {change log, gcc, GCC-g, GNUJSP, Jikes, log files, Open-source software, source code}, issn = {1382-3256}, doi = {10.1023/B:EMSE.0000027779.70556.d0}, url = {http://portal.acm.org/citation.cfm?id=990374.990391}, attachments = {https://flosshub.org/sites/flosshub.org/files/chen.pdf}, author = {Chen, Kai and Schach, Stephen R. and Yu, Liguo and Offutt, Jeff and Heller, Gillian Z.} } @proceedings {1167, title = {Tools for light-weight knowledge sharing in open-source software development}, year = {2003}, pages = {25-29}, attachments = {https://flosshub.org/sites/flosshub.org/files/25-29.pdf}, author = {Cubranic, Davor and Holmes, Reid and Ying, Annie and Murphy, Gail C.} } @conference {Ye:2003:TUM:776816.776867, title = {Toward an understanding of the motivation Open Source Software developers}, booktitle = {Proceedings of the 25th International Conference on Software Engineering}, series = {ICSE {\textquoteright}03}, year = {2003}, note = {"Analyzing the emails sent to the mailing fist is one way of understanding the structure of the community." "Table 2 displays the number of code contributions made by members to the GIMP system and the defined roles of those contributing members. We counted the number of contributions made by each person by analyzing the change log of the system."}, pages = {419{\textendash}429}, publisher = {IEEE Computer Society}, organization = {IEEE Computer Society}, address = {Washington, DC, USA}, abstract = {An Open Source Software (OSS) project is unlikely to be successful unless there is an accompanied community that provides the platform for developers and users to collaborate. Members of such communities are volunteers whose motivation to participate and contribute is of essential importance to the success of OSS projects. In this paper, we aim to create an understanding of what motivates people to participate in OSS communities. We theorize that learning is one of the motivational forces. Our theory is grounded in the learning theory of Legitimate Peripheral Participation, and is supported by analyzing the social structure of OSS communities and the co-evolution between OSS systems and communities. We also discuss practical implications of our theory for creating and maintaining sustainable OSS communities as well as for software engineering research and education.}, keywords = {change log, COMMUNITY, contributions, contributors, developers, email, email archives, evolution, gimp, log files, mailing list, roles, source code}, isbn = {0-7695-1877-X}, url = {http://portal.acm.org/citation.cfm?id=776816.776867}, attachments = {https://flosshub.org/sites/flosshub.org/files/YeKishida.pdf}, author = {Ye, Yunwen and Kishida, Kouichi} } @conference {Nakakoji:2002:EPO:512035.512055, title = {Evolution patterns of open-source software systems and communities}, booktitle = {Proceedings of the International Workshop on Principles of Software Evolution}, series = {IWPSE {\textquoteright}02}, year = {2002}, pages = {76{\textendash}85}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Open-Source Software (OSS) development is regarded as a successful model of encouraging "natural product evolution". To understand how this "natural product evolution" happens, we have conducted a case study of four typical OSS projects. Unlike most previous studies on software evolution that focus on the evolution of the system per se, our study takes a broader perspective: It examines not only the evolution of OSS systems, but also the evolution of the associated OSS communities, as well as the relationship between the two types of evolution.Through the case study, we have found that while collaborative development within a community is the essential characteristic of OSS, different collaboration models exist, and that the difference in collaboration model results in different evolution patterns of OSS systems and communities. To treat such differences systematically, we propose to classify OSS into three types: Exploration-Oriented, Utility-Oriented, and Service-Oriented. Such a classification can provide guidance on the creation and maintenance of sustainable OSS development and communities.}, keywords = {case study, open-source software (OSS), open-source software community, software evolution}, isbn = {1-58113-545-9}, doi = {10.1145/512035.512055}, url = {http://doi.acm.org/10.1145/512035.512055}, author = {Nakakoji, Kumiyo and Yamamoto, Yasuhiro and Nishinaka, Yoshiyuki and Kishida, Kouichi and Ye, Yunwen} } @conference {1146, title = {Taxonomy of Open Source Software Development}, booktitle = {1st Workshop on Open Source Software Engineering at ICSE 2001}, year = {2001}, keywords = {jun, linux, postgresql, wingnut}, attachments = {https://flosshub.org/sites/flosshub.org/files/nakakojiyamamoto.PDF}, author = {Nakakoji, K. and Yamamoto, Y.} } @article {flosswp47, title = {Usability and open-source software development}, journal = {Proceedings of the Symposium on Computer Human Interaction}, year = {2001}, month = {June}, pages = {49{\textendash}54}, publisher = {ACM SIGCHI New Zealand}, abstract = {This paper reports a usability study of the open-source Greenstone Digital Library collection-building software. The problems highlighted by the study are analysed to identify their likely source within the social context of Greenstone{\textquoteright}s development environment. We discuss how characteristics of open-source software development influence the usability of resulting software products.}, keywords = {Greenstone, user study}, attachments = {https://flosshub.org/sites/flosshub.org/files/nichols.pdf}, author = {David M Nichols and Kirsten Thomson and Stuart A Yeates} } @conference {Yamauchi:2000:CLM:358916.359004, title = {Collaboration with Lean Media: how open-source software succeeds}, booktitle = {Proceedings of the 2000 ACM conference on Computer supported cooperative work (CSCW)}, series = {CSCW {\textquoteright}00}, year = {2000}, pages = {329{\textendash}338}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Open-source software, usually created by volunteer programmers dispersed worldwide, now competes with that developed by software firms. This achievement is particularly impressive as open-source programmers rarely meet. They rely heavily on electronic media, which preclude the benefits of face-to-face contact that programmers enjoy within firms. In this paper, we describe findings that address this paradox based on observation, interviews and quantitative analyses of two open-source projects. The findings suggest that spontaneous work coordinated afterward is effective, rational organizational culture helps achieve agreement among members and communications media moderately support spontaneous work. These findings can imply a new model of dispersed collaboration.}, keywords = {cooperative work, cvs, distributed work, electronic media, INNOVATION, open-source, software engineering}, isbn = {1-58113-222-0}, doi = {10.1145/358916.359004}, url = {http://doi.acm.org/10.1145/358916.359004}, author = {Yamauchi, Yutaka and Yokozawa, Makoto and Shinohara, Takeshi and Ishida, Toru} } @article {flosswp383, title = {Knee-jerk Anti-LOOPism and other E-mail Phenomena: Oral, Written, and Electronic Patterns in Computer-Mediated Communication}, number = {WP \3578-93}, year = {1993}, month = {June}, institution = {Massachusetts Institute of Technology}, type = {Technical Report}, address = {Cambridge, MA}, abstract = {This paper reports on an empirical investigation into the on-going electronic interaction of a natural distributed group. Prior organizational research into use of electronic media has focused primarily on usage patterns and only occasionally on a few linguistic features, while linguistics researchers have looked more closely at certain technical aspects of language use in electronic communication. Interested in a broader range of linguistic and textual features that might be exhibited in the electronic mail medium, we conducted an exploratory study of the electronic communication of a task-oriented group over a 27-month period. Using qualitative and quantitative techniques, we found that the electronic mail messages displayed features normally associated with both speech and written discourse, as well as features that seem new to the electronic medium. The use of all three patterns was influenced by characteristics of the medium, the group, and its task.}, author = {JoAnne Yates and Wanda Orlikowski} }