@proceedings {1754, title = {Unveiling Exception Handling Bug Hazards in Android based on GitHub and Google Code Issues}, year = {2015}, month = {05/2015}, abstract = {This paper reports on a study mining the exception stack traces included in 159,048 issues reported on Android projects hosted in GitHub (482 projects) and Google Code (157 projects). The goal of this study is to investigate whether stack trace information can reveal bug hazards related to exception handling code that may lead to a decrease in application robustness. Overall 6,005 exception stack traces were extracted, and subjected to source code and bytecode analysis. The outcomes of this study include the identification of the following bug hazards: (i) unexpected cross-type exception wrappings (for instance, trying to handle an instance of OutOfMemoryError {\textquotedblleft}hidden{\textquotedblright} in a checked exception) which can make the exceptionrelated code more complex and negatively impact the application robustness; (ii) undocumented runtime exceptions thrown by both the Android platform and third party libraries; and (iii) undocumented checked exceptions thrown by the Android Platform. Such undocumented exceptions make difficult, and most of the times infeasible for the client code to protect against {\textquotedblleft}unforeseen{\textquotedblright} situations that may happen while calling third-party code. This study provides further insights on such bug hazards and the robustness threats they impose to Android apps as well as to other systems based on the Java exception model.}, keywords = {github, google code}, url = {http://www.gousios.gr/pub/android-stacks.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/android-stacks.pdf}, author = {Roberta Coelho and Lucas Almeida and Gousios, Georgios and van Deursen, Arie} } @proceedings {1522, title = {Boa: A Language and Infrastructure for Analyzing Ultra-Large-Scale Software Repositories}, year = {2013}, month = {05/2013}, pages = {422-431}, abstract = {In today{\textquoteright}s software-centric world, ultra-large-scale software repositories, e.g. SourceForge (350,000+ projects), GitHub (250,000+ projects), and Google Code (250,000+ projects) are the new library of Alexandria. They contain an enormous corpus of software and information about software. Scientists and engineers alike are interested in analyzing this wealth of information both for curiosity as well as for testing important hypotheses. However, systematic extraction of relevant data from these repositories and analysis of such data for testing hypotheses is hard, and best left for mining software repository (MSR) experts! The goal of Boa, a domain-specific language and infrastructure described here, is to ease testing MSR-related hypotheses. We have implemented Boa and provide a web-based interface to Boa{\textquoteright}s infrastructure. Our evaluation demonstrates that Boa substantially reduces programming efforts, thus lowering the barrier to entry. We also see drastic improvements in scalability. Last but not least, reproducing an experiment conducted using Boa is just a matter of re-running small Boa programs provided by previous researchers.}, keywords = {ease of use, forge, github, google code, lower barrier to entry, mining, repository, reproducible, scalable, Software, sourceforge}, author = {Dyer, Robert and Nguyen, Hoan Anh and Rajan, Hridesh and Nguyen, Tien N.} } @article {1589, title = {Integrating FLOSS repositories on the Web}, year = {2012}, note = {"To provide the researchers easy access to the project{\textquoteright}s data, two research projects were initiated (with slightly different objective) by the FLOSS research community which are FLOSSmole and FLOSSMetrics , also known as {\textquotedblleft}repository of repositories (RoR){\textquotedblright}. These RoRs were created to consolidate metadata and analysis of projects from a variety of code forges into a centralized place for use by the researchers in academia and industry. "In this paper, we take into consideration only project{\textquoteright}s metadata from the code forges which are made available to download by the FLOSSmole community. Further, we only study Googlecode and Sourceforge data for this paper, although our methods extend to other code forges as well. Names are not available for the developers in the database dump of Googlecode provided by FLOSSmole, so we consider matching only developer ID(s) in the case of Googlecode."}, month = {2012}, abstract = {This paper provides a novel approach to the problem of integrating data from multiple code forges of FLOSS. We review the current problems in integrating the data from multiple forges and argue that Semantic Web technologies are suitable for representing knowledge contained in code forges. Further, we show the advantage of linking the metadata of projects to other data sources on the Web which will enable querying extra information from the Web. The paper briefly describes how the modeling is achieved and what benefits can be obtained by enabling linking to other relevant data sources already available on the Web.}, keywords = {flossmole, google code, sourceforge}, url = {https://www.researchgate.net/publication/259757473_Integrating_FLOSS_repositories_on_the_Web}, attachments = {https://flosshub.org/sites/flosshub.org/files/DERI-TR-AFTAB-2012-12-10_0.pdf}, author = {Iqbal, Aftab and Cyganiak, Richard and Hausenblas, Michael} } @conference {965, title = {Identifying licensing of jar archives using a code-search approach}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {151 - 160}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Free and open source software strongly promotes the reuse of source code. Some open source Java components/libraries are distributed as jar archives only containing the bytecode and some additional information. For whoever wanting to integrate this jar in her own project, it is important to determine the license(s) of the code from which the jar archive was produced, as this affects the way that such component can be used. This paper proposes an automatic approach to determine the license of jar archives, combining the use of a code-search engine with the automatic classification of licenses contained in textual flies enclosed in the jar. Results of an empirical study performed on 37 jars - from 17 different systems - indicate that this approach is able to successfully infer the jar licenses in over 95\% of the cases, but that in many cases the license in textual flies may differ from the one of the classes contained in the jar.}, keywords = {apache, bytecode, classification, eclipse, google code, jar, java, licenses, source code}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463282}, attachments = {https://flosshub.org/sites/flosshub.org/files/151msr2010.pdf}, author = {Di Penta, Massimiliano and Daniel M. German and Antoniol, Giuliano} }