@conference {926, title = {Tracking concept drift of software projects using defect prediction quality}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {51 - 60}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {Defect prediction is an important task in the mining of software repositories, but the quality of predictions varies strongly within and across software projects. In this paper we investigate the reasons why the prediction quality is so fluctuating due to the altering nature of the bug (or defect) fixing process. Therefore, we adopt the notion of a concept drift, which denotes that the defect prediction model has become unsuitable as set of influencing features has changed - usually due to a change in the underlying bug generation process (i.e., the concept). We explore four open source projects (Eclipse, OpenOffice, Netbeans and Mozilla) and construct file-level and project-level features for each of them from their respective CVS and Bugzilla repositories. We then use this data to build defect prediction models and visualize the prediction quality along the time axis. These visualizations allow us to identify concept drifts and - as a consequence - phases of stability and instability expressed in the level of defect prediction quality. Further, we identify those project features, which are influencing the defect prediction quality using both a tree induction-algorithm and a linear regression model. Our experiments uncover that software systems are subject to considerable concept drifts in their evolution history. Specifically, we observe that the change in number of authors editing a file and the number of defects fixed by them contribute to a project{\textquoteright}s concept drift and therefore influence the defect prediction quality. Our findings suggest that project managers using defect prediction models for decision making should be aware of the actual phase of stability or instability due to a potential concept drift.}, keywords = {bugzilla, cvs, defect prediction, eclipse, mozilla, netbeans, openoffice}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069480}, attachments = {https://flosshub.org/sites/flosshub.org/files/51MSR2009_0111_Ekanayake_Jayalath.pdf}, author = {Ekanayake, Jayalath and Tappolet, Jonas and Gall, Harald C. and Bernstein, Abraham} } @conference {999, title = {Mining Software Repositories with iSPAROL and a Software Evolution Ontology}, booktitle = {Fourth International Workshop on Mining Software RepositoriesFourth International Workshop on Mining Software Repositories (MSR{\textquoteright}07:ICSE Workshops 2007)}, year = {2007}, pages = {10 - 10}, publisher = {IEEE}, organization = {IEEE}, address = {Minneapolis, MN, USA}, abstract = {One of the most important decisions researchers face when analyzing the evolution of software systems is the choice of a proper data analysis/exchange format. Most existing formats have to be processed with special programs written specifically for that purpose and are not easily extendible. Most scientists, therefore, use their own database(s) requiring each of them to repeat the work of writing the import/export programs to their format. We present EvoOnt, a software repository data exchange format based on the Web Ontology Language (OWL). EvoOnt includes software, release, and bug-related information. Since OWL describes the semantics of the data, EvoOnt is (1) easily extendible, (2) comes with many existing tools, and (3) allows to derive assertions through its inherent Description Logic reasoning capabilities. The paper also shows iSPARQL -- our SPARQL-based Semantic Web query engine containing similarity joins. Together with EvoOnt, iSPARQL can accomplish a sizable number of tasks sought in software repository mining projects, such as an assessment of the amount of change between versions or the detection of bad code smells. To illustrate the usefulness of EvoOnt (and iSPARQL), we perform a series of experiments with a real-world Java project. These show that a number of software analyses can be reduced to simple iSPARQL queries on an EvoOnt dataset.}, keywords = {database, eclipse, evoont, java, owl, semantic, sparql}, isbn = {0-7695-2950-X}, doi = {10.1109/MSR.2007.21}, attachments = {https://flosshub.org/sites/flosshub.org/files/28300010.pdf}, author = {Kiefer, Christoph and Bernstein, Abraham and Tappolet, Jonas} }