@conference {963, title = {Automated dependency resolution for open source software}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, pages = {130 - 140}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Opportunities for software reuse are plentiful, thanks in large part to the widespread adoption of open source processes and the availability of search engines for locating relevant artifacts. One challenge presented by open source software reuse is simply getting a newly downloaded artifact to build/run in the first place. The artifact itself likely reuses other artifacts, and so depends on their being located to function properly. While merely tedious in the individual case, this can cause serious difficulties for those seeking to study open source software. It is simply not feasible to manually resolve dependencies for thousands of projects, and many forms of analysis require declarative completeness. In this paper we present a method for automatically resolving dependencies for open source software. It works by cross-referencing a project{\textquoteright}s missing type information with a repository of candidate artifacts. We have implemented this method on top of the Sourcerer, an infrastructure for the large-scale indexing and analysis of open source code. The performance of our resolution algorithm was evaluated in two parts. First, for a small number of popular open source projects, we manually examined the artifacts suggested by our system to determine if they were appropriate. Second, we applied the algorithm to the 13,241 projects in the Sourcerer managed repository to evaluate the rate of resolution success. The results demonstrate the feasibility of this approach, as the algorithm located all of the required artifacts needed by 3,904 additional projects, increasing the percentage of declaratively complete projects in Sourcerer from 39\% to 69\%.}, keywords = {dependencies, java, source code, sourcerer}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463346}, author = {Ossher, Joel and Bajracharya, Sushil and Lopes, Cristina} } @conference {945, title = {SourcererDB: An aggregated repository of statically analyzed and cross-linked open source Java projects}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)2009 6th IEEE International Working Conference on Mining Software Repositories}, year = {2009}, pages = {183 - 186}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {The open source movement has made vast quantities of source code available online for free, providing an extremely large dataset for empirical study and potential resuse. A major difficulty in exploiting this potential fully is that the data are currently scattered between competing source code repositories, none of which are structured for empirical analysis and cross-project comparison. As a result, software researchers and developers are left to compile their own datasets, resulting in duplicated effort and limited results. To address this challenge, we built SourcererDB, an aggregated repository of statically analyzed and cross-linked open source Java projects. SourcererDB contains local snapshots of 2,852 Java projects taken from Sourceforge, Apache and Java.net. These projects are statically analyzed to extract rich structural information, which is then stored in a relational database. References to entities in the 16,058 external jars are resolved and grouped, allowing for cross-project usage information to be accessed easily. This paper describes: (a) the mechanism for resolving and grouping these cross-project references, (b) the structure of and the metamodel for the SourcererDB repository, and (d) end-user dataset access mechanisms. Our goal in building SourcererDB is to provide a rich dataset of source code to facilitate the sharing of extracted data and to encourage reuse and repeatability of experiments.}, keywords = {apache, integration, java, java.net, project, repository, sourceforge, SourcererDB}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069501}, author = {Ossher, Joel and Bajracharya, Sushil and Linstead, Erik and Baldi, Pierre and Lopes, Cristina} } @conference {1066, title = {SourcererDB: An aggregated repository of statically analyzed and cross-linked open source Java projects}, booktitle = {2009 6th IEEE International Working Conference on Mining Software Repositories (MSR)}, year = {2009}, pages = {183 - 186}, publisher = {IEEE}, organization = {IEEE}, address = {Vancouver, BC, Canada}, abstract = {The open source movement has made vast quantities of source code available online for free, providing an extremely large dataset for empirical study and potential re-use. A major difficulty in exploiting this potential fully is that the data are currently scattered between competing source code repositories, none of which are structured for empirical analysis and cross-project comparison. As a result, software researchers and developers are left to compile their own datasets, resulting in duplicated effort and limited results. To address this challenge, we built SourcererDB, an aggregated repository of statically analyzed and cross-linked open source Java projects. SourcererDB contains local snapshots of 2,852 Java projects taken from Sourceforge, Apache and Java.net. These projects are statically analyzed to extract rich structural information, which is then stored in a relational database. References to entities in the 16,058 external jars are resolved and grouped, allowing for cross-project usage information to be accessed easily. This paper describes: (a) the mechanism for resolving and grouping these cross-project references, (b) the structure of and the metamodel for the SourcererDB repository, and (d) end-user dataset access mechanisms. Our goal in building SourcererDB is to provide a rich dataset of source code to facilitate the sharing of extracted data and to encourage reuse and repeatability of experiments.}, keywords = {apache, java, java.net, source code, sourceforge, sourcerer}, isbn = {978-1-4244-3493-0}, doi = {10.1109/MSR.2009.5069501}, author = {Ossher, Joel and Bajracharya, Sushil and Linstead, Erik and Baldi, Pierre and Lopes, Cristina} }