@conference {1357, title = {Content classification of developer emails}, booktitle = {Proceedings of the 34th IEEE/ACM International Conference On Software Engineering (ICSE 2012)}, year = {2012}, note = {We created a web application to manually classify email content in the chosen categories. We classified a statistically significant set of emails from four java open source software (OSS) systems, used to evaluate the accuracy of our approach. The contributions of this paper are: 1) a novel approach that fuses parsing and ML techniques for classification of email lines; 2) a web application to manually classify email content; 3) the manual classification of a statistically significant sample set of emails (for a total of 67,792 lines) from mailing lists of four different software systems{\textendash}in the form of a freely available benchmark; and 4) the empirical evaluation of our approach against the benchmark}, month = {06/2012}, abstract = {Emails related to the development of a software system contain information about design choices and issues encountered during the development process. Exploiting the knowledge embedded in emails with automatic tools is challenging, due to the unstructured, noisy and mixed language nature of this communication medium. Natural language text is often not well-formed and is interleaved with languages with other syntaxes, such as code or stack traces. We present an approach to classify email content at line level. Our technique classifies email lines in five categories (i.e., text, junk, code, patch, and stack trace) to allow one to subsequently apply ad hoc analysis techniques for each category. We evaluated our approach on a statistically significant set of emails gathered from mailing lists of four unrelated open source systems.}, keywords = {email, Emails, Empirical software engineering, mailing list, natural language, Unstructured Data Mining}, url = {http://www.inf.usi.ch/phd/bacchelli/publications.php}, attachments = {https://flosshub.org/sites/flosshub.org/files/icse2012.pdf}, author = {Bacchelli, Alberto and Dal Sasso, Tommaso and D{\textquoteright}Ambros, Marco and Lanza, Michele} } @conference {1358, title = {Extracting source code from e-mails}, booktitle = {Proceedings of ICPC 2010 (18th IEEE International Conference on Program Comprehension)}, year = {2010}, note = {"We want to extract source code fragments from e-mail messages. To do this, we first need to select e-mails that contain source code fragments, and then we extract such fragments from the content in which they are enclosed." "we manually build a statistically significant benchmark taking sample e- mails from five unrelated open source Java software systems." }, pages = {24-33}, abstract = {E-mails, used by developers and system users to communicate over a broad range of topics, offer a valuable source of information. If archived, e-mails can be mined to support program comprehension activities and to provide views of a software system that are alternative and complementary to those offered by the source code. However, e-mails are written in natural language, and therefore contain noise that makes it difficult to retrieve the important data. Thus, before conducting an effective system analysis and extracting data for program comprehension, it is necessary to select the relevant messages, and to expose only the meaningful information. In this work we focus both on classifying e-mails that hold fragments of the source code of a system, and on extracting the source code pieces inside the e-mail. We devised and analyzed a number of lightweight techniques to accomplish these tasks. To assess the validity of our techniques, we manually inspected and annotated a statistically significant number of e-mails from five unrelated open source software systems written in Java. With such a benchmark in place, we measured the effectiveness of each technique in terms of precision and recall.}, keywords = {argouml, email, freenet, jmeter, mailing lists, mina, natural language, openjpa, source code}, url = {http://www.inf.usi.ch/phd/bacchelli/publications.php}, attachments = {https://flosshub.org/sites/flosshub.org/files/icpc2010.pdf}, author = {Bacchelli, Alberto and D{\textquoteright}Ambros, Marco and Lanza, Michele} }