@proceedings {1512, title = {Discovering, Reporting, and Fixing Performance Bugs}, year = {2013}, month = {05/2013}, abstract = {Software performance is critical for how users perceive the quality of software products. Performance bugs---programming errors that cause significant performance degradation---lead to poor user experience and low system throughput. Designing effective techniques to address performance bugs requires a deep understanding of how performance bugs are discovered, reported, and fixed. In this paper, we study how performance bugs are discovered, reported to developers, and fixed by developers, and compare the results with those for non-performance bugs. We study performance and non-performance bugs from three popular code bases: Eclipse JDT, Eclipse SWT, and Mozilla. First, we find little evidence that fixing performance bugs has a higher chance to introduce new functional bugs than fixing non-performance bugs, which implies that developers may not need to be overconcerned about fixing performance bugs. Second, although fixing performance bugs is about as error-prone as fixing nonperformance bugs, fixing performance bugs is more difficult than fixing non-performance bugs, indicating that developers need better tool support for fixing performance bugs and testing performance bug patches. Third, unlike many non-performance bugs, a large percentage of performance bugs are discovered through code reasoning, not through users observing the negative effects of the bugs (e.g., performance degradation) or through profiling. The result suggests that techniques to help developers reason about performance, better test oracles, and better profiling techniques are needed for discovering performance bugs.}, keywords = {bugs, eclipse, mozilla, performance bugs}, author = {Adrian Nistor and Tian Jiang and Tan, Lin} } @proceedings {1497, title = {Why So Complicated? Simple Term Filtering and Weighting for Location-Based Bug Report Assignment Recommendation}, year = {2013}, month = {05-2013}, pages = {2-11}, abstract = {Large software development projects receive many bug reports and each of these reports needs to be triaged. An important step in the triage process is the assignment of the report to a developer. Most previous e orts towards improving bug report assignment have focused on using an activity-based approach. We address some of the limitations of activity-based approaches by proposing a two-phased location-based approach where bug report assignment recommendations are based on the predicted location of the bug. The proposed approach utilizes a noun extraction process on several information sources to determine bug location information and a simple term weighting scheme to provide a bug report assignment recommendation. We found that by using a location-based approach, we achieved an accuracy of 89.41\% and 59.76\% when recommending five developers for the Eclipse and Mozilla projects, respectively.}, keywords = {bug, bug report assignment, eclipse, File Activity Histories, Mining Software Artifacts, mozilla, Named Entity Recognition, POS Filtering}, url = {http://www.cwu.edu/~janvik/papers/msr2013.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr2013.pdf} } @proceedings {1458, title = {Do More Experienced Developers Introduce Fewer Bugs?}, volume = {378}, year = {2012}, note = {"Some Mozilla Foundation projects have been the selected case of study for this paper. The analysis is based on the Mercurial repository which offers a list of repositories that can be easily cloned. As a summary, 19 projects were analyzed, with more than 100,000 commits, more than 2,500 authors and around 4 years of history up to June 2011."}, month = {09/2012}, pages = {268-273}, publisher = {IFIP AICT, Springer}, abstract = {Developer experience is a common matter of study in the software maintenance and evolution research literature. However it is still not well understood if less experienced developers are more prone to introduce errors in the source code than their more experienced colleagues. This paper aims to study the relationships between experience and the bug introduction ratio using the Mozilla community as case of study. As results, statistical differences among developers with different levels of experience has not been observed, when the expected result would have been the opposite1.}, keywords = {mercurial, mozilla, scm, source code analysis}, author = {Izquierdo-Cort{\'a}zar, Daniel and Gregorio Robles and Gonz{\'a}lez-Barahona, Jes{\'u}s M.} } @proceedings {1457, title = {The Impact of Formal QA Practices on FLOSS Communities {\textendash} The Case of Mozilla}, volume = {378}, year = {2012}, month = {09/2012}, pages = {262-267}, publisher = {IFIP AICT, Springer}, abstract = {The number of FLOSS projects that include a QA step in the development model is increasing which suggests that a new layer may be emerging in the classic {\textquotedblleft}onion model{\textquotedblright}. This change might affect the information flow within projects and implicitly their sustainability. Communities, the essential resource of FLOSS projects, have been extensively studied but questions concerning QA remain. This paper takes a step towards answering such questions by analyzing QA mailing lists and issue tracker data for the Mozilla group of projects. Because the Bugzilla data set contains over half a million bugs, data processing and analysis is a considerable challenge for this research. The provisional conclusions are that QA activity may not be increasing steadily over time but is dependent on other factors and that the QA team and other groups of contributors form a highly connected network that doesn{\textquoteright}t contain isolates.}, keywords = {email, information flow, mailing lists, mozilla, quality assurance, social network analysis, test}, author = {Barham, Adina} } @conference {949, title = {Predicting the severity of a reported bug}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {1 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {The severity of a reported bug is a critical factor in deciding how soon it needs to be fixed. Unfortunately, while clear guidelines exist on how to assign the severity of a bug, it remains an inherent manual process left to the person reporting the bug. In this paper we investigate whether we can accurately predict the severity of a reported bug by analyzing its textual description using text mining algorithms. Based on three cases drawn from the open-source community (Mozilla, Eclipse and GNOME), we conclude that given a training set of sufficient size (approximately 500 reports per severity), it is possible to predict the severity with a reasonable accuracy (both precision and recall vary between 0.65-0.75 with Mozilla and Eclipse; 0.70-0.85 in the case of GNOME).}, keywords = {bug reports, eclipse, gnome, mozilla, severity, text mining}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463284}, attachments = {https://flosshub.org/sites/flosshub.org/files/1lamkanfiDemeyer1.pdf}, author = {Lamkanfi, Ahmed and Demeyer, Serge and Giger, Emanuel and Goethals, Bart} } @conference {Rastkar:2010:SSA:1806799.1806872, title = {Summarizing software artifacts: a case study of bug reports}, booktitle = {Proceedings of the 32nd ACM/IEEE International Conference on Software Engineering - Volume 1}, series = {ICSE {\textquoteright}10}, year = {2010}, pages = {505{\textendash}514}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Many software artifacts are created, maintained and evolved as part of a software development project. As software developers work on a project, they interact with existing project artifacts, performing such activities as reading previously filed bug reports in search of duplicate reports. These activities often require a developer to peruse a substantial amount of text. In this paper, we investigate whether it is possible to summarize software artifacts automatically and effectively so that developers could consult smaller summaries instead of entire artifacts. To provide focus to our investigation, we consider the generation of summaries for bug reports. We found that existing conversation-based generators can produce better results than random generators and that a generator trained specifically on bug reports can perform statistically better than existing conversation-based generators. We demonstrate that humans also find these generated summaries reasonable indicating that summaries might be used effectively for many tasks.}, keywords = {bug reports, eclipse, gnome, human-centric software engineering, kde, machine learning, mozilla}, isbn = {978-1-60558-719-6}, doi = {10.1145/1806799.1806872}, url = {http://doi.acm.org/10.1145/1806799.1806872}, author = {Rastkar, Sarah and Murphy, Gail C. and Murray, Gabriel} } @conference {953, title = {When process data quality affects the number of bugs: Correlations in software engineering datasets}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {62 - 71}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Software engineering process information extracted from version control systems and bug tracking databases are widely used in empirical software engineering. In prior work, we showed that these data are plagued by quality deficiencies, which vary in its characteristics across projects. In addition, we showed that those deficiencies in the form of bias do impact the results of studies in empirical software engineering. While these findings affect software engineering researchers the impact on practitioners has not yet been substantiated. In this paper we, therefore, explore (i) if the process data quality and characteristics have an influence on the bug fixing process and (ii) if the process quality as measured by the process data has an influence on the product (i.e., software) quality. Specifically, we analyze six Open Source as well as two Closed Source projects and show that process data quality and characteristics have an impact on the bug fixing process: the high rate of empty commit messages in Eclipse, for example, correlates with the bug report quality. We also show that the product quality - measured by number of bugs reported - is affected by process data quality measures. These findings have the potential to prompt practitioners to increase the quality of their software process and its associated data quality.}, keywords = {apache, bug reports, eclipse, gnome, log files, mozilla, netbeans, openoffice.org, version control}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463286}, attachments = {https://flosshub.org/sites/flosshub.org/files/62bachmann-msr10.pdf}, author = {Bachmann, Adrian and Bernstein, Abraham} } @conference {926, title = {Tracking concept drift of software projects using defect prediction quality}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {51 - 60}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Defect prediction is an important task in the mining of software repositories, but the quality of predictions varies strongly within and across software projects. In this paper we investigate the reasons why the prediction quality is so fluctuating due to the altering nature of the bug (or defect) fixing process. Therefore, we adopt the notion of a concept drift, which denotes that the defect prediction model has become unsuitable as set of influencing features has changed - usually due to a change in the underlying bug generation process (i.e., the concept). We explore four open source projects (Eclipse, OpenOffice, Netbeans and Mozilla) and construct file-level and project-level features for each of them from their respective CVS and Bugzilla repositories. We then use this data to build defect prediction models and visualize the prediction quality along the time axis. These visualizations allow us to identify concept drifts and - as a consequence - phases of stability and instability expressed in the level of defect prediction quality. Further, we identify those project features, which are influencing the defect prediction quality using both a tree induction-algorithm and a linear regression model. Our experiments uncover that software systems are subject to considerable concept drifts in their evolution history. Specifically, we observe that the change in number of authors editing a file and the number of defects fixed by them contribute to a project{\textquoteright}s concept drift and therefore influence the defect prediction quality. Our findings suggest that project managers using defect prediction models for decision making should be aware of the actual phase of stability or instability due to a potential concept drift.}, keywords = {bugzilla, cvs, defect prediction, eclipse, mozilla, netbeans, openoffice}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069480}, attachments = {https://flosshub.org/sites/flosshub.org/files/51MSR2009_0111_Ekanayake_Jayalath.pdf}, author = {Ekanayake, Jayalath and Tappolet, Jonas and Gall, Harald C. and Bernstein, Abraham} } @article {denBesten2008316, title = {The allocation of collaborative efforts in open-source software}, journal = {Information Economics and Policy}, volume = {20}, number = {4}, year = {2008}, note = {"we have selected a set of 10 large open-source projects" apache, cvs, gaim, gcc, ghostscript, mozilla, netbsd, openssh, postgresql, python "Our data were extracted from logs of development activity generated by software version control systems. For each project in the selection, we extracted CVS development logs" "We notably computed for each file in the sample, and for each month in its history, the number of distinct maintainers that had committed a change during that month, and the number of commits, the blocks of code addition, each file had received during that month." "other variables used in the regressions are proxies for the size, age, and granularity of files; the size of a file is represented as its number of lines of code (LOCs), its age by its creation date (Youth), and its granularity by the number of functions it contains."}, pages = {316 - 322}, abstract = {The article investigates the allocation of collaborative efforts among core developers (maintainers) of open-source software by analyzing on-line development traces (logs) for a set of 10 large projects. Specifically, we investigate whether the division of labor within open-source projects is influenced by characteristics of software code. We suggest that the collaboration among maintainers tends to be influenced by different measures of code complexity. We interpret these findings by providing preliminary evidence that the organization of open-source software development would self-adapt to characteristics of the code base, in a {\textquoteright}stigmergic{\textquoteright} manner.}, keywords = {age, apache, complexity, cvs, division of labor, functions, gaim, gcc, ghostscript, lines of code, loc, log files, mozilla, netbsd, openssh, postgresql, python, revision control, scm, size, source code, Stigmergy, version control}, issn = {0167-6245}, doi = {DOI: 10.1016/j.infoecopol.2008.06.003}, url = {http://www.sciencedirect.com/science/article/B6V8J-4SSG4PN-1/2/88b3824c30a31c18929d8a5ca6d64f62}, author = {den Besten, Matthijs and Jean-Michel Dalle and Galia, Fabrice} } @conference {1013, title = {Impact of the Creation of the Mozilla Foundation in the Activity of Developers}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {28 - 28}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {During 2003, the Mozilla project transitioned from company-promoted (sponsored by AOL) to community-promoted (sponsored by the Mozilla Foundation). What happened to the group of developers during this transition? There was any significant impact on its activity or composition? To answer these questions, we have performed an analysis of the CVS repository of Mozilla, using the CVSAnalY tool, finding little on activity, but dramatic changes in the the composition of the development team.}, keywords = {cvs, cvsanaly, developers, mining challenge, mozilla, msr challenge, revision history}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.15}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300028.pdf}, author = {Jesus M. Gonzalez-Barahona and Gregorio Robles and Herraiz, Israel} } @conference {625, title = {Innovation in Open Source Software Development: A Tale of Two Features}, booktitle = {OSS2007: Open Source Development, Adoption and Innovation (IFIP 2.13)}, series = {IFIP International Federation for Information Processing }, volume = {234/2007}, year = {2007}, note = {"archives of project discussion forums were then examined to determine when the feature was first proposed, how it was debated, and when it was ultimately adopted"}, month = {2007///}, pages = {109 - 120}, publisher = {Springer}, organization = {Springer}, chapter = {9}, abstract = {Open Source Software Development appears to depart radically from conventional notions of software engineering. In particular, requirements for Open Source projects seem to be asserted rather than elicited. This paper examines two features of selected open source products: {\textquotedblleft}tabbed browsing{\textquotedblright} as realized in the Firefox web browser, and {\textquotedblleft}edge magnetism{\textquotedblright} found in the Gnome desktop environment{\textquoteright}s Metacity window manager. Using archives of mailing lists and issue tracking databases, these features were traced from first mention to release, in attempt to discover the process by which requirements are proposed, adopted, and implemented in their respective Open Source projects. The results confirm the importance of user participation in Open Source projects. }, keywords = {email, email archives, features, Firefox, gnome, mailing list, metacity, mozilla}, issn = {978-0-387-72485-0}, doi = {http://dx.doi.org/10.1007/978-0-387-72486-7_9}, attachments = {https://flosshub.org/sites/flosshub.org/files/Innovation\%20in\%20OSS.pdf}, author = {Noll, John} } @conference {682, title = {Collaborative Maintenance in Large Open-Source Projects}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {233 - 244}, publisher = {Springer}, organization = {Springer}, abstract = {The paper investigates collaborative work among maintainers of open source software by analyzing the logs of a set of 10 large projects. We inquire whether teamwork can be influenced by several characteristics of code. Preliminary results suggest that collaboration among maintainers in most large open-source projects seems to be positively influenced by file vintage and by Halstead volume of files, and negatively by McCabe complexity and size measured in SLOCs. These results could be consistent with an increased attractivity of files created early in the history of a project, and with maintainers being less attracted by more verbose code and by more complex code, although in this last case it might also reflect the fact that more complex files would be de facto more exclusive in terms of maintenance. }, keywords = {apache, COLLABORATION, complexity, cvs, gaim, gcc, ghostscript, halstead, lines of code, loc, mccabe, mozilla, netbsd, openssh, postgresql, python, sloc}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_23}, attachments = {https://flosshub.org/sites/flosshub.org/files/Collaborative\%20Maintenance.pdf}, author = {den Besten, Matthijs and Jean-Michel Dalle and Galia, Fabrice} } @conference {686, title = {Contributor Turnover in Libre Software Projects}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {273 - 286}, publisher = {Springer}, organization = {Springer}, abstract = {A common problem that management faces in software companies is the high instability of their staff. In libre (free, open source) software projects, the permanence of developers is also an open issue, with the potential of causing problems amplified by the self-organizing nature that most of them exhibit. Hence, human resources in libre software projects are even more difficult to manage: developers are in most cases not bound by a contract and, in addition, there is not a real management structure concerned about this problem. This raises some interesting questions with respect to the composition of development teams in libre software projects, and how they evolve over time. There are projects lead by their original founders (some sort of {\textquotedblleft}code gods{\textquotedblright}), while others are driven by several different developer groups over time (i.e. the project {\textquotedblleft}regenerates{\textquotedblright} itself). In this paper, we propose a quantitative methodology, based on the analysis of the activity in the source code management repositories, to study how these processes (developers leaving, developers joining) affect libre software projects. The basis of it is the analysis of the composition of the core group, the group of developers most active in a project, for several time lapses. We will apply this methodology to several large, well-known libre software projects, and show how it can be used to characterize them. In addition, we will discuss the lessons that can be learned, and the validity of our proposal. }, keywords = {apache, committers, core, cvs, cvsanaly, developers, evolution, freebsd, gimp, gnome, kde, mono, mozilla}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_28}, attachments = {https://flosshub.org/sites/flosshub.org/files/Contributor\%20Turnover\%20in\%20Libre\%20Software\%20Projects.pdf}, author = {Gregorio Robles and Gonzalez-Barahona, Jesus} } @conference {D{\textquoteright}Ambros:2006:ERV:1137983.1137992, title = {The evolution radar: visualizing integrated logical coupling information}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {26{\textendash}32}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In software evolution research logical coupling has extensively been used to recover the hidden dependencies between source code artifacts. They would otherwise go lost because of the file-based nature of current versioning systems. Previous research has dealt with low-level couplings between files, leading to an explosion of data to be analyzed, or has abstracted the logical couplings to module level, leading to a loss of detailed information. In this paper we propose a visualization-based approach which integrates both file-level and module-level logical coupling information. This not only facilitates an in-depth analysis of the logical couplings at all granularity levels, it also leads to a precise characterization of the system modules in terms of their logical coupling dependencies.}, keywords = {change management, cvs, evolution, logical coupling, mozilla, scm, source code, thunderbird, tinderbox, visualization}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1137992}, url = {http://doi.acm.org/10.1145/1137983.1137992}, attachments = {https://flosshub.org/sites/flosshub.org/files/26TheEvolutionRadar.pdf}, author = {D{\textquoteright}Ambros, Marco and Lanza, Michele and Lungu, Mircea} } @conference {697, title = {Life cycle of Defects in Open Source Software Projects}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, note = {"we collected and analyzed defects of the apache http server and mozilla firefox" "all the defects reported between X and X were analyzed"}, pages = {195 - 200}, publisher = {Springer}, organization = {Springer}, abstract = {We studied the maintenance process from the viewpoint of defect management and the defect life cycle. First, we outline a model for the defect life cycle based on ISO/IEC standards, the Framework for Open Source maintenance process, and the Bugzilla defect management system. Thereafter, we analyze defects from two Open Source software projects. The aim of the study was support the maintenance reliability. However, we found that most of the defects did not follow the life-cycle model. Defects were usually directly resolved from initial state without being assigned. }, keywords = {apache, defects, Firefox, mozilla}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_19}, attachments = {https://flosshub.org/sites/flosshub.org/files/Life\%20cycle\%20of\%20Defects\%20in\%20OSS\%20Projects.pdf}, author = {Koponen, Timo} } @conference {Knab:2006:PDD:1137983.1138012, title = {Predicting defect densities in source code files with decision tree learners}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {119{\textendash}125}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {With the advent of open source software repositories the data available for defect prediction in source files increased tremendously. Although traditional statistics turned out to derive reasonable results the sheer amount of data and the problem context of defect prediction demand sophisticated analysis such as provided by current data mining and machine learning techniques.In this work we focus on defect density prediction and present an approach that applies a decision tree learner on evolution data extracted from the Mozilla open source web browser project. The evolution data includes different source code, modification, and defect measures computed from seven recent Mozilla releases. Among the modification measures we also take into account the change coupling, a measure for the number of change-dependencies between source files. The main reason for choosing decision tree learners, instead of for example neural nets, was the goal of finding underlying rules which can be easily interpreted by humans. To find these rules, we set up a number of experiments to test common hypotheses regarding defects in software entities. Our experiments showed, that a simple tree learner can produce good results with various sets of input data.}, keywords = {change analysis, data mining, decision tree learner, defect density, defect prediction, mozilla, prediction, release history, scm, source code, version control}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138012}, url = {http://doi.acm.org/10.1145/1137983.1138012}, attachments = {https://flosshub.org/sites/flosshub.org/files/119Predicting.pdf}, author = {Knab, Patrick and Pinzger, Martin and Bernstein, Abraham} } @conference {706, title = {Retrieving Open Source Software Licenses}, booktitle = {OSS2006: Open Source Systems (IFIP 2.13)}, series = {IFIP International Federation for Information Processing}, year = {2006}, pages = {35 - 46}, publisher = {Springer}, organization = {Springer}, abstract = {Open Source Software maintenance and reuse require identifying and comprehending the applied software licenses. This paper first characterizes software maintenance, and open source software (OSS) reuse which are particularly relevant in this context. The information needs of maintainers and reusers can be supported by reverse engineering tools at different information retrieval levels. The paper presents an automated license retrieval approach called ASLA. User needs, system architecture, tool features, and tool evaluation are presented. The implemented tool features support identifying source file dependencies and licenses in source files, and adding new license templates for identifying licenses. The tool is evaluated against another tool for license information extraction. ASLA requires the source code as available input but is otherwise not limited to OSS. It supports the same programming languages as GCC. License identification coverage is good and the tool is extendable. }, keywords = {gaim, license, license analysis, maintenance, mozilla, reuse}, doi = {http://dx.doi.org/10.1007/0-387-34226-5_4}, attachments = {https://flosshub.org/sites/flosshub.org/files/Retrieving\%20Open\%20Source\%20Software\%20Licenses.pdf}, author = {Tuunanen, Timo and Koskinen, Jussi and K{\"a}rkk{\"a}inen, Tommi} } @article {10.1109/TSE.2005.89, title = {Comparing High-Change Modules and Modules with the Highest Measurement Values in Two Large-Scale Open-Source Products}, journal = {IEEE Transactions on Software Engineering}, volume = {31}, year = {2005}, pages = {625-642}, publisher = {IEEE Computer Society}, address = {Los Alamitos, CA, USA}, abstract = {Identifying change-prone modules can enable software developers to take focused preventive actions that can reduce maintenance costs and improve quality. Some researchers observed a correlation between change proneness and structural measures, such as size, coupling, cohesion, and inheritance measures. However, the modules with the highest measurement values were not found to be the most troublesome modules by some of our colleagues in industry, which was confirmed by our previous study of six large-scale industrial products. To obtain additional evidence, we identified and compared high-change modules and modules with the highest measurement values in two large-scale open-source products, Mozilla and OpenOffice, and we characterized the relationship between them. Contrary to common intuition, we found through formal hypothesis testing that the top modules in change-count rankings and the modules with the highest measurement values were different. In addition, we observed that high-change modules had fairly high places in measurement rankings, but not the highest places. The accumulated findings from these two open-source products, together with our previous similar findings for six closed-source products, should provide practitioners with additional guidance in identifying the change-prone modules.}, keywords = {mozilla, openoffice}, issn = {0098-5589}, doi = {http://doi.ieeecomputersociety.org/10.1109/TSE.2005.89}, author = {G{\"u}nes Koru, A. and Tian, Jeff (Jianhui)} } @article {102, title = {Empirical validation of object-oriented metrics on open source software for fault prediction}, journal = {IEEE Transactions on Software Engineering}, volume = {31}, number = {10}, year = {2005}, note = {"This paper describes how we calculated the object-oriented metrics given by Chidamber and Kemerer to illustrate how fault-proneness detection of the source code of the open source Web and e-mail suite called Mozilla can be carried out. We checked the values obtained against the number of bugs found in its bug database - called Bugzilla - using regression and machine learning methods to validate the usefulness of these metrics for fault-proneness prediction. We also compared the metrics of several versions of Mozilla to see how the predicted fault-proneness of the software system changed during its development cycle." metrics, wmc weighted methods per class, dit depth of inheritance, rfc response for a class, noc number of children, cbo coupling between object classes, cohesion, lines of code, loc, sloc chidamber and kemerer metrics}, pages = {897-910}, abstract = {Open source software systems are becoming increasingly important these days. Many companies are investing in open source projects and lots of them are also using such software in their own work. But, because open source software is often developed with a different management style than the industrial ones, the quality and reliability of the code needs to be studied. Hence, the characteristics of the source code of these projects need to be measured to obtain more information about it. This paper describes how we calculated the object-oriented metrics given by Chidamber and Kemerer to illustrate how fault-proneness detection of the source code of the open source Web and e-mail suite called Mozilla can be carried out. We checked the values obtained against the number of bugs found in its bug database - called Bugzilla - using regression and machine learning methods to validate the usefulness of these metrics for fault-proneness prediction. We also compared the metrics of several versions of Mozilla to see how the predicted fault-proneness of the software system changed during its development cycle.}, keywords = {bugs, bugzilla, cbo, defects, dit, fault-prone modules, faults, lcom, lcomn, loc, metrics, mozilla, noc, object-oriented, rfc, source code, wmc}, url = {http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.115.8372\&rep=rep1\&type=pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/Gyimothy.pdf}, author = {Gyimothy, T. and Ferenc, R. and Siket, I.} } @article {flosswp184, title = {Exploring the Structure of Complex Software Designs: An Empirical Study of Open Source and Proprietary Code (updated)}, year = {2005}, note = {"For each design, we report data on the number of source files, the number of dependencies, the density of the DSM (i.e., the number of dependencies per source file pair) the propagation cost and the clustered cost. We also provide data on the average complexity of source files, in terms of the number of functions and lines of code."}, month = {June}, abstract = {This paper reports data from a study that seeks to characterize the differences in design structure between complex software products. In particular, we use Design Structure Matrices (DSMs) to map the dependencies between the elements of a design and define metrics that allow us to compare the structures of different designs. We first use these metrics to compare the architectures of two software products - the Linux operating system and the Mozilla web browser - that were developed via contrasting modes of organization: specifically, open source versus proprietary development. We then track the evolution of Mozilla, paying particular attention to a purposeful "re-design" effort that was undertaken with the intention of making the product more "modular." We find significant differences in structure between Linux and the first version of Mozilla, suggesting that Linux had a more modular architecture. We also find that the redesign of Mozilla resulted in an architecture that was significantly more modular than that of its predecessor, and indeed, than that of Linux. Our results, while exploratory, are consistent with a view that different modes of organization are associated with designs that possess different structures. However, we also illustrate that purposeful managerial actions can have a large impact on structure. This latter result is important given recent moves to release proprietary software into the public domain. These moves are likely to fail unless the product possesses an architecture that facilitates participation. Our paper provides evidence that a tightly-coupled design can be adapted to meet this objective.}, keywords = {complexity, cost, dependencies, functions, lines of code, linux, loc, mozilla, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/maccormackrusnakbaldwin2.pdf}, author = {Alan MacCormack and John Rusnak and Carliss Baldwin} } @conference {Fielding:2005:SAO:1062455.1062474, title = {Software architecture in an open source world}, booktitle = {Proceedings of the 27th international conference on Software engineering}, series = {ICSE {\textquoteright}05}, year = {2005}, pages = {43{\textendash}43}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In spite of the hype and hysteria surrounding open source software development, there is very little that can be said of open source in general. Open source projects range in scope from the miniscule, such as the thousands of non-maintained code dumps left behind at the end of class projects, dissertations, and failed commercial ventures, to the truly international, with thousands of developers collaborating, directly or indirectly, on a common platform. One characteristic that is shared by the largest and most successful open source projects, however, is a software architecture designed to promote anarchic collaboration through extensions while at the same time preserving centralized control over the interfaces. This talk features a survey of the state-of-the-practice in open source development in regards to software architecture, with particular emphasis on the modular extensibility interfaces within several of the most successful projects, including Apache httpd, Eclipse, Mozilla Firefox, Linux kernel, and the World Wide Web (which few people recognize as an open source project in itself). These projects fall under the general category of collaborative open source software development, which emphasizes community aspects of software engineering in order to compensate for the often-volunteer nature of core developers and take advantage of the scalability obtainable through Internet-based virtual organizations.}, keywords = {apache, collaborative open source development, eclipse, extensibility, Firefox, linux, linux kernel, loose coupling, modularity, mozilla, open source, software architecture}, isbn = {1-58113-963-2}, doi = {10.1145/1062455.1062474}, url = {http://doi.acm.org/10.1145/1062455.1062474}, author = {Roy T. Fielding} } @conference {Sliwerski:2005:CIF:1083142.1083147, title = {When do changes induce fixes?}, booktitle = {Proceedings of the 2005 international workshop on Mining software repositories}, series = {MSR {\textquoteright}05}, year = {2005}, pages = {24-28}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {As a software system evolves, programmers make changes that sometimes cause problems. We analyze CVS archives for fix-inducing changes{\textemdash}changes that lead to problems, indicated by fixes. We show how to automatically locate fix-inducing changes by linking a version archive (such as CVS) to a bug database (such as BUGZILLA). In a first investigation of the MOZILLA and ECLIPSE history, it turns out that fix-inducing changes show distinct patterns with respect to their size and the day of week they were applied.}, keywords = {bugzilla, change analysis, cvs, eclipse, evolution, mozilla}, isbn = {1-59593-123-6}, doi = {http://doi.acm.org/10.1145/1082983.1083147}, url = {http://doi.acm.org/10.1145/1082983.1083147}, attachments = {https://flosshub.org/sites/flosshub.org/files/24WhenDoChanges.pdf}, author = {Sliwerski, J and Zimmermann, Thomas and Zeller, Andreas} } @conference {1109, title = {Distributed Collective Practices and F/OSS Problem Management: Perspective and Methods}, booktitle = {Conference on Cooperation, Innovation \& Technology (CITE 2003)}, year = {2003}, note = {"We have begun studying in detail the general issues raised above using a large collection of research data from the Mozilla project" "Our Bugzilla snapshot contains over 128,000 problem reports, of which about 88,000 have been resolved"}, abstract = {This paper presents the state of our research on Distributed Collective Practices (DCPs) in Free/Open-Source Software (F/OSS) projects, focusing on sensemaking and resolution of software problems. We are exploring the hypothesis that variations in the content and in the articulation of these socio-technical processes have an impact on the outcome of the activity of F/OSS collectives, and more specifically on problem resolution. Our preliminary techniques for combining qualitative data analysis with automated process extraction result in a scalable analysis method called Computational Amplification (CA). We are applying CA to 128,000 problem reports from the Mozilla F/OSS project. The paper illustrates how CA is used to create multidimensional process models and shows types of conclusions we can reach.}, keywords = {Automated process extraction, bug fixing, bug reports, bugzilla, Collective knowledge management, Information extraction from natural language texts, mozilla, Software problem management}, author = {Gasser, Les and Gabriel Ripoche} } @article {1116, title = {Organizational learning in open-source software projects: an analysis of debugging data}, journal = {IEEE Transactions on Engineering Management}, volume = {50}, year = {2003}, month = {11/2003}, pages = {485 - 493}, abstract = {This paper studies organizational learning effects in open-source programming projects. Working with data from the Apache and Mozilla projects, the study focuses on three aspects of open-source development. The first is the use of the open-source approach as a hedge against system complexity. The second is the adaptive learning mechanisms realized by the debugging process. The last is the learning curve effects of project-specific experience on bug cycle times. The results indicate that while open-source development is subject to positive learning effects, these effects are not universal, with some projects deriving more benefit than others.}, keywords = {apache, bugs, complexity, mozilla}, issn = {0018-9391}, doi = {10.1109/TEM.2003.820136}, author = {Huntley, C.L.} } @conference {1248, title = {Supporting Distributed and Decentralized Projects: Drawing Lessons from the Open Source Community}, booktitle = {1st Workshop on Open Source in an Industrial Context}, year = {2003}, note = {"We begin the remainder of the paper with discussion of a survey of open source projects, showing similarities that have arisen in tool usage"}, month = {10/2003}, abstract = {Open source projects are typically organized in a distributed and decentralized manner. These factors strongly determine the processes followed and constrain the types of tools that can be utilized. This paper explores how distribution and decentralization have affected processes and tools in existing open source projects with the goals of summarizing the lessons learned and identifying opportunities for improving both. Issues considered include decision-making, accountability, communication, awareness, rationale, managing source code, testing, and release management.}, keywords = {abiword, apache, debian, freebsd, kde, linux, mozilla, mysql, perl, PHP, postgresql, python, subversion, tomcat, tools}, attachments = {https://flosshub.org/sites/flosshub.org/files/erenkrantz2003.pdf}, author = {Erenkrantz, J. and Taylor, R.N.} } @article {1095, title = {High Quality and Open Source Software Practices}, journal = {Proceedings of the 2nd ICSE Workshop on Open Source}, year = {2002}, note = {"We examined the publicly visible portions of these projects from November 2001 through March 2002, ...The SLOC counts for the predominate languages are shown}, month = {2002}, abstract = {Surveys suggest that, according to various metrics, the quality and dependability of today{\textquoteright}s open source software is roughly on par with commercial and government developed software. What are the prospects for advancing to much higher levels of quality in open source software? More specifically, what attributes must be possessed by quality-related interventions for them to be feasibly adoptable in open source practice? In order to identify some of these attributes, we conducted a preliminary survey of the quality practices of a number of successful open source projects. We focus, in particular, on attributes related to adoptability by the open source practitioner community.}, keywords = {apache, bug report, bug tracker, bug tracking system, feature requests, gcc, gnome, kde, lines of code, linux, loc, mozilla, netbeans, perl, position paper, python, sloc, source code, Survey, tomcat, xfree86}, attachments = {https://flosshub.org/sites/flosshub.org/files/HalloranScherlis.pdf}, author = {T. Halloran and W. Scherlis} } @article {121, title = {Two case studies of open source software development: Apache and Mozilla}, journal = {ACM Transactions on Software Engineering and Methodology}, volume = {11}, number = {3}, year = {2002}, note = {apache data sources: email, cvs, bug database regarding email: "We wrote Perl scripts to extract date, sender identity, message subject, and the message body that was further processed to obtain details on code changes and problem reports (see below). Manual inspection was used to resolve such things as multiple email addresses in cases where all automated techniques failed." (but the rest of the paper does not address this data source at all) mozilla data sources bugzilla, cvs }, pages = {309-346}, abstract = {According to its proponents, open source style software development has the capacity to compete successfully, and perhaps in many cases displace, traditional commercial development methods. In order to begin investigating such claims, we examine data from two major open source projects, the Apache web server and the Mozilla browser. By using email archives of source code change history and problem reports we quantify aspects of developer participation, core team size, code ownership, productivity, defect density, and problem resolution intervals for these OSS projects. We develop several hypotheses by comparing the Apache project with several commercial projects. We then test and refine several of these hypotheses, based on an analysis of Mozilla data. We conclude with thoughts about the prospects for high- performance commercial/ open source process hybrids.}, keywords = {apache, bug fixing, bug reports, bugzilla, change history, core, defect density, email, email archives, mailing list, mozilla, ownership, participation, productivity, scm, source code}, attachments = {https://flosshub.org/sites/flosshub.org/files/mockusFieldingHerbsleb2002.pdf}, author = {Audris Mockus and Roy Fielding and Herbsleb, J. D.} } @conference {1149, title = {Software Development Practices in Open Software Development Communities: A Comparative Case Study}, booktitle = {1st Workshop on Open Source Software Engineering at ICSE 2001}, year = {2001}, abstract = {This study presents an initial set of findings from an empirical study of social processes, technical system configurations, organizational contexts, and interrelationships that give rise to open software. "Open software", or more narrowly, open source software, represents an approach for communities of like-minded participants to develop software system representations that are intended to be shared freely, rather than offered as closed commercial products. While there is a growing popular literature attesting to open software [DiBona, Ockman, Stone 1999, Fogel 1999], there are very few systematic studies [e.g., Feller and Fitzgerald 2000, Mockus, Fielding, Herbsleb 2000] that informs how these communities produce software. Similarly, little is known about how people in these communities coordinate software development across different settings, or about what software processes, work practices, and organizational contexts are necessary to their success. To the extent that academic research communities and commercial enterprises seek the supposed efficacy of open software [Smarr and Graham 2000], they will need grounded models of the processes and practices of open software development to allow effective investment of their resources. This study investigates four communities engaged in open software development. Case study methods are used to compare practices across communities.}, keywords = {apache, argouml, astronomy, chandra, games, infrastructure, internet news, mozilla, systems design}, attachments = {https://flosshub.org/sites/flosshub.org/files/scacchi.pdf}, author = {Walt Scacchi} } @conference {1142, title = {Software Engineering Research in the Bazaar}, booktitle = {1st Workshop on Open Source Software Engineering at ICSE 2001}, year = {2001}, note = {"To gain a better understanding of a software system, we recover its software architecture from the system{\textquoteright}s source code. We recovered the architecture of many open source systems such as the Linux kernel [3], the Mozilla browser [5], the Apache web server [7], and the VIM editor [14]. The recovered architecture is browse-able to permit developers to interact with it, [10] shows an example for the Linux kernel."}, abstract = {During the last five years, our research group has studied the architecture and evolution of several large open source systems {\textemdash} including Linux, GCC, VIM, Mozilla, and Apache {\textemdash} and we have found that open source software systems often exhibit interesting differences when compared to similar commercially-developed systems. Our investigations of these systems have involved the creation of software architecture models, software architecture repair, the creation of a reference architecture for web servers, the study of evolution and growth of open source systems, and the modelling of architectural properties of systems that are apparent only at build time.}, keywords = {apache, architecture, gcc, kernel, linux, linux kernel, mozilla, open source software, software architecture, Software Engineering Research, source code, vim}, attachments = {https://flosshub.org/sites/flosshub.org/files/hassangodfreyholt.pdf}, author = {Hassan, Ahmed E. and Godfrey, Michael W. and Holt, Richard C.} } @article {98, title = {Striking a balance between trust anti control in a virtual organization: a content analysis of open source software case studies}, journal = {Information Systems Journal}, volume = {11}, number = {4}, year = {2001}, note = {"I employ secondary analysis of published case studies of OSS projects. I used several search methods to identify such case studies about OSS pro- jects. First, I searched the electronic archives of both ACM and IEEE, using terms such as {\textquoteleft}open source.{\textquoteright} " "Secondly, I searched on Bell \& Howell/Proquest{\textquoteright}s ABI/Inform, a database of acade- mic and trade publications on business and management (including technology management). Thirdly, I reviewed the {\textquoteleft}position papers{\textquoteright} of the various authors who attended the 1st Work- shop on Open-Source Software Engineering (Feller et al., 2001) for relevant case studies. Fourthly, I searched on Harvard Publishing{\textquoteright}s websites, which contain case studies published" "Finally, as I located case studies or other publi- cations about OSS projects, I followed references from them to identify other case studies. Although I found literally hundreds of publications about OSS development, there was a much smaller number of case studies"}, pages = {277-304}, abstract = {Many organization theorists have predicted the emergence of the networked or virtual firm as a model for the design of future organizations. Researchers have also emphasized the importance of trust as a necessary condition for ensuring the success of virtual organizations. This paper examines the open source software (OSS) {\textquoteright}movement{\textquoteright} as an example of a virtual organization and proposes a model that runs contrary to the belief that trust is critical for virtual organizations. Instead, I argue that various control mechanisms can ensure the effective performance of autonomous agents who participate in virtual organizations. Borrowing from the theory of the {\textquoteright}McDonaldization{\textquoteright} of society, I argue that, given a set of practices to ensure the control, efficiency, predictability and calculability of processes and outcomes in virtual organizations, effective performance may occur in the absence of trust. As support for my argument, I employ content analysis to examine a set of published case studies of OSS projects. My results show that, although that trust is rarely mentioned, ensuring control is an important criterion for effective performance within OSS projects. The case studies feature few references to other dimensions of {\textquoteright}McDonaldization{\textquoteright} (efficiency, predictability and calculability), however, and I conclude that the OSS movement relies on many other forms of social control and self-control, which are often unacknowledged in OSS projects. Through these implicit forms of control, OSS projects are able to secure the cooperation of the autonomous agents that participate in project teams. I conclude by extrapolating from these case studies to other virtual organizations.}, keywords = {apache, case studies, Control, fetchmail, jun, linux, linux kernel, McDonaldization, mozilla, networked organization, perl, rationalization, trust, virtual organization}, author = {Gallivan, M. J.} }