@proceedings {1774, title = {Characterization and prediction of issue-related risks in software projects}, year = {2015}, month = {05/2015}, publisher = {IEEE}, abstract = {Identifying risks relevant to a software project and planning measures to deal with them are critical to the success of the project. Current practices in risk assessment mostly rely on high-level, generic guidance or the subjective judgements of experts. In this paper, we propose a novel approach to risk assessment using historical data associated with a software project. Specifically, our approach identifies patterns of past events that caused project delays, and uses this knowledge to identify risks in the current state of the project. A set of risk factors characterizing {\textquotedblleft}risky{\textquotedblright} software tasks (in the form of issues) were extracted from five open source projects: Apache, Duraspace, JBoss, Moodle, and Spring. In addition, we performed feature selection using a sparse logistic regression model to select risk factors with good discriminative power. Based on these risk factors, we built predictive models to predict if an issue will cause a project delay. Our predictive models are able to predict both the risk impact (i.e. the extend of the delay) and the likelihood of a risk occurring. The evaluation results demonstrate the effectiveness of our predictive models, achieving on average 48\%{\textendash}81\% precision, 23\%{\textendash}90\% recall, 29\%{\textendash}71\% F-measure, and 70\%{\textendash}92\% Area Under the ROC Curve. Our predictive models also have low error rates: 0.39{\textendash}0.75 for Macroaveraged Mean Cost-Error and and 0.7{\textendash}1.2 for Macro-averaged Mean Absolute Error}, url = {http://www.uow.edu.au/~hoa/papers/msr-2015-preprint.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr-2015-preprint.pdf}, author = {Morakot Choetkiertikul and Dam, Hoa Khanh and Truyen Tran and Aditya Ghose} }