@proceedings {1521, title = {It{\textquoteright}s Not a Bug, It{\textquoteright}s a Feature: How Misclassification Impacts Bug Prediction}, year = {2013}, month = {05/2013}, pages = {392-401}, abstract = {In a manual examination of more than 7,000 issue reports from the bug databases of five open-source projects, we found 33.8\% of all bug reports to be misclassified{\textemdash}that is, rather than referring to a code fix, they resulted in a new feature, an update to documentation, or an internal refactoring. This misclassification introduces bias in bug prediction models, confusing bugs and features: On average, 39\% of files marked as defective actually never had a bug. We discuss the impact of this misclassification on earlier studies and recommend manual data validation for future studies.}, keywords = {bias, bug reports, data quality, mining software repositories, noise}, author = {Kim Herzig and Sascha Just and Zeller, Andreas} }