@conference {1313, title = {Do comments explain codes adequately?}, booktitle = {Proceedings of the 8th working conference on Mining software repositories - MSR {\textquoteright}11}, year = {2011}, month = {05/2011}, pages = {242-245}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {Comment lines in the software source code include descriptions of codes, usage of codes, copyrights, unused codes, comments, and so on. It is required for comments to explain the content of written code adequately, since the wrong description in the comment may causes further bug and confusion in maintenance. In this paper, we try to clarify a research question: "In which projects do comments describe the code adequately?" To answer this question, we selected the group 1 of mining challenge and used data obtained from Eclipse and Netbeans. Since it is difficult to answer the above question directly, we define the distance between codes and comments. By utilizing the fault-prone module prediction technique, we can answer the alternative question from the data of two projects. The result shows that Eclipse project has relatively adequate comments.}, keywords = {comments, eclipse, msr challenge, netbeans, prediction}, isbn = {9781450305747}, doi = {10.1145/1985441.1985482}, author = {Mizuno, Osamu and Hirata, Yukinao} } @conference {960, title = {A comparative exploration of FreeBSD bug lifetimes}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {106 - 109}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {In this paper, we explore the viability of mining the basic data provided in bug repositories to predict bug lifetimes. We follow the method of Lucas D. Panjer as described in his paper, Predicting Eclipse Bug Lifetimes. However, in place of Eclipse data, the FreeBSD bug repository is used. We compare the predictive accuracy of five different classification algorithms applied to the two data sets. In addition, we propose future work on whether there is a more informative way of classifying bugs than is considered by current bug tracking systems.}, keywords = {bug reports, bug tracking, classification, eclipse, msr challenge, prediction}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463291}, attachments = {https://flosshub.org/sites/flosshub.org/files/106ChallengeGargi.pdf}, author = {Bougie, Gargi and Treude, Christoph and Daniel M. German and Storey, Margaret-Anne} } @conference {950, title = {An extensive comparison of bug prediction approaches}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {31 - 41}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Reliably predicting software defects is one of software engineering{\textquoteright}s holy grails. Researchers have devised and implemented a plethora of bug prediction approaches varying in terms of accuracy, complexity and the input data they require. However, the absence of an established benchmark makes it hard, if not impossible, to compare approaches. We present a benchmark for defect prediction, in the form of a publicly available data set consisting of several software systems, and provide an extensive comparison of the explanative and predictive power of well-known bug prediction approaches, together with novel approaches we devised. Based on the results, we discuss the performance and stability of the approaches with respect to our benchmark and deduce a number of insights on bug prediction models.}, keywords = {apache, bug reports, eclipse, famix, lucene, mylyn, prediction, scm}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463279}, attachments = {https://flosshub.org/sites/flosshub.org/files/31dambrosLanzaRobbes31.pdf}, author = {D{\textquoteright}Ambros, Marco and Lanza, Michele and Robbes, Romain} } @article {1385, title = {Survival analysis on the duration of open source projects}, journal = {Information and Software Technology}, volume = {52}, year = {2010}, note = {"For the purposes of our study we used data coming only from source code repositories."}, month = {9/2010}, pages = {902 - 922}, abstract = {Context Open source (FLOSS) project survivability is an important piece of information for many open source stakeholders. Coordinators of open source projects would like to know the chances for the survival of the projects they coordinate. Companies are also interested in knowing how viable a project is in order to either participate or invest in it, and volunteers want to contribute to vivid projects. Objective The purpose of this article is the application of survival analysis techniques for estimating the future development of a FLOSS project. Method In order to apply such approach, duration data regarding FLOSS projects from the FLOSSMETRICS (This work was partially supported by the European Community{\textquoteright}s Sixth Framework Program under the Contract FP6-033982) database were collected. Such database contains metadata for thousands of FLOSS projects, derived from various forges. Subsequently, survival analysis methods were employed to predict the survivability of the projects, i.e. their probability of continuation in the future, by examining their duration, combined with other project characteristics such as their application domain and number of committers. Results It was shown how probability of termination or continuation may be calculated and how a prediction model may be built to upraise project future. In addition, the benefit of adding more committers to FLOSS projects was quantified. Conclusion Analysis results demonstrate the usefulness of the proposed framework for assessing the survival probability of a FLOSS project. }, keywords = {flossmetrics, prediction, source code, survival analysis}, issn = {09505849}, doi = {10.1016/j.infsof.2010.05.001}, author = {Samoladas, Ioannis and Lefteris Angelis and Ioannis Stamelos} } @conference {973, title = {On the relation of refactorings and software defect prediction}, booktitle = {Proceedings of the 2008 international workshop on Mining software repositories - MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {35-38}, publisher = {ACM Press}, organization = {ACM Press}, address = {New York, New York, USA}, abstract = {This paper analyzes the influence of evolution activities such as refactoring on software defects. In a case study of five open source projects we used attributes of software evolution to predict defects in time periods of six months. We use versioning and issue tracking systems to extract 110 data mining features, which are separated into refactoring and non-refactoring related features. These features are used as input into classification algorithms that create prediction models for software defects. We found out that refactoring related features as well as non-refactoring related features lead to high quality prediction models. Additionally, we discovered that refactorings and defects have an inverse correlation: The number of software defects decreases, if the number of refactorings increased in the preceding time period. As a result, refactoring should be a significant part of both bug fixes and other evolutionary changes to reduce software defects.}, keywords = {argouml, bug fixing, bug reports, defects, evolution, jboss, liferay, prediction, refactoring, spring, weka, xdoclet}, isbn = {9781605580241}, doi = {10.1145/1370750.1370759}, attachments = {https://flosshub.org/sites/flosshub.org/files/p35-ratzinger.pdf}, author = {Sigmund, Thomas and Gall, Harald C. and Ratzinger, Jacek} } @conference {1017, title = {Forecasting the Number of Changes in Eclipse Using Time Series Analysis}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {32 - 32}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {In order to predict the number of changes in the following months for the project Eclipse, we have applied a statistical (non-explanatory) model based on time series analysis. We have obtained the monthly number of changes in the CVS repository of Eclipse, using the CVSAnalY tool. The input to our model was the filtered series of the number of changes per month, and the output was the number of changes per month for the next three months. Then we aggregated the results of the three months to obtain the total number of changes in the given period in the challenge.}, keywords = {change management, cvs, cvsanaly, eclipse, prediction}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.10}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300032.pdf}, author = {Herraiz, Israel and Jesus M. Gonzalez-Barahona and Gregorio Robles} } @conference {991, title = {How Long Will It Take to Fix This Bug?}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {1}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Predicting the time and effort for a software problem has long been a difficult task. We present an approach that automatically predicts the fixing effort, i.e., the person-hours spent on fixing an issue. Our technique leverages existing issue tracking systems: given a new issue report, we use the Lucene framework to search for similar, earlier reports and use their average time as a prediction. Our approach thus allows for early effort estimation, helping in assigning issues and scheduling stable releases. We evaluated our approach using effort data from the JBoss project. Given a sufficient number of issues reports, our automatic predictions are close to the actual effort; for issues that are bugs, we are off by only one hour, beating naive predictions by a factor of four.}, keywords = {bug fixing, bug reports, effort estimation, jboss, lucene, prediction, time}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.13}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300001.pdf}, author = {Weiss, Cathrin and Premraj, Rahul and Zimmermann, Thomas and Zeller, Andreas} } @conference {1018, title = {Local and Global Recency Weighting Approach to Bug Prediction}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {33 - 33}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Finding and fixing software bugs is a challenging maintenance task, and a significant amount of effort is invested by software development companies on this issue. In this paper, we use the Eclipse project{\textquoteright}s recorded software bug history to predict occurrence of future bugs. The history contains information on when bugs have been reported and subsequently fixed.}, keywords = {bug fixing, bug reports, eclipse, maintenance, prediction}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.17}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300033.pdf}, author = {Joshi, Hemant and Zhang, Chuanlei and Ramaswamy, S. and Bayrak, Coskun} } @conference {1016, title = {Predicting Defects and Changes with Import Relations}, booktitle = {Fourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {31 - 31}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {Lowering the number of defects and estimating the development time of a software project are two important goals of software engineering. To predict the number of defects and changes we train models with import relations. This enables us to decrease the number of defects by more efficient testing and to assess the effort needed in respect to the number of changes.}, keywords = {defects, eclipse, effort estimation, mining challenge, msr challenge, prediction}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.24}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300031.pdf}, author = {Schroter, Adrian} } @conference {1014, title = {Predicting Eclipse Bug Lifetimes}, booktitle = {Fourth International Workshop on Mining Software RepositoriesFourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {29 - 29}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {In non-trivial software development projects planning and allocation of resources is an important and difficult task. Estimation of work time to fix a bug is commonly used to support this process. This research explores the viability of using data mining tools to predict the time to fix a bug given only the basic information known at the beginning of a bug{\textquoteright}s lifetime. To address this question, a historical portion of the Eclipse Bugzilla database is used for modeling and predicting bug lifetimes. A bug history transformation process is described and several data mining models are built and tested. Interesting behaviours derived from the models are documented. The models can correctly predict up to 34.9\% of the bugs into a discretized log scaled lifetime class.}, keywords = {bug fixing, bugzilla, classification, eclipse, effort estimation, mining challenge, msr challenge, prediction, weka}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.25}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300029.pdf}, author = {Panjer, Lucas D.} } @conference {Askari:2006:ITE:1137983.1138013, title = {Information theoretic evaluation of change prediction models for large-scale software}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {126{\textendash}132}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {In this paper, we analyze the data extracted from several open source software repositories. We observe that the change data follows a Zipf distribution. Based on the extracted data, we then develop three probabilistic models to predict which files will have changes or bugs. The first model is Maximum Likelihood Estimation (MLE), which simply counts the number of events, i.e., changes or bugs, that happen to each file and normalizes the counts to compute a probability distribution. The second model is Reflexive Exponential Decay (RED) in which we postulate that the predictive rate of modification in a file is incremented by any modification to that file and decays exponentially. The third model is called RED-Co-Change. With each modification to a given file, the RED-Co-Change model not only increments its predictive rate, but also increments the rate for other files that are related to the given file through previous co-changes. We then present an information-theoretic approach to evaluate the performance of different prediction models. In this approach, the closeness of model distribution to the actual unknown probability distribution of the system is measured using cross entropy. We evaluate our prediction models empirically using the proposed information-theoretic approach for six large open source systems. Based on this evaluation, we observe that of our three prediction models, the RED-Co-Change model predicts the distribution that is closest to the actual distribution for all the studied systems.}, keywords = {bugs, change analysis, cvs, evaluation approach, file, freebsd, information theory, kde, koffice, log files, netbsd, openbsd, postgresql, prediction, prediction models, scm, source code}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138013}, url = {http://doi.acm.org/10.1145/1137983.1138013}, attachments = {https://flosshub.org/sites/flosshub.org/files/126InformationTheoretic.pdf}, author = {Askari, Mina and Holt, Ric} } @conference {Knab:2006:PDD:1137983.1138012, title = {Predicting defect densities in source code files with decision tree learners}, booktitle = {Proceedings of the 2006 international workshop on Mining software repositories}, series = {MSR {\textquoteright}06}, year = {2006}, pages = {119{\textendash}125}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {With the advent of open source software repositories the data available for defect prediction in source files increased tremendously. Although traditional statistics turned out to derive reasonable results the sheer amount of data and the problem context of defect prediction demand sophisticated analysis such as provided by current data mining and machine learning techniques.In this work we focus on defect density prediction and present an approach that applies a decision tree learner on evolution data extracted from the Mozilla open source web browser project. The evolution data includes different source code, modification, and defect measures computed from seven recent Mozilla releases. Among the modification measures we also take into account the change coupling, a measure for the number of change-dependencies between source files. The main reason for choosing decision tree learners, instead of for example neural nets, was the goal of finding underlying rules which can be easily interpreted by humans. To find these rules, we set up a number of experiments to test common hypotheses regarding defects in software entities. Our experiments showed, that a simple tree learner can produce good results with various sets of input data.}, keywords = {change analysis, data mining, decision tree learner, defect density, defect prediction, mozilla, prediction, release history, scm, source code, version control}, isbn = {1-59593-397-2}, doi = {http://doi.acm.org/10.1145/1137983.1138012}, url = {http://doi.acm.org/10.1145/1137983.1138012}, attachments = {https://flosshub.org/sites/flosshub.org/files/119Predicting.pdf}, author = {Knab, Patrick and Pinzger, Martin and Bernstein, Abraham} }