@book {1579, title = {Finding Source Code on the Web for Remix and Reuse}, series = {Infrastructure for Building Code Search Applications for Developers}, year = {2013}, note = {In "further reading": "Although not a code search infrastructure, FLOSSmole [13] is another major undertaking in building large collection of metadata about open source projects on the Web. Currently, FLOSSmole reports a massive data collection of more than 500,000 open source projects in its web site [32]. For code search infrastructure builders, now it is possible to leverage FLOSSmole{\textquoteright}s project metadata to build code repositories instead of spending an effort in implementing custom spiders and crawlers for code."}, pages = {135 - 164}, publisher = {Springer New York}, organization = {Springer New York}, address = {New York, NY}, abstract = {The large availability of open source code on the Web provides great opportunities to build useful code search applications for developers. Building such applications requires addressing several challenges inherent in collecting and analyzing code from open source repositories to make them available for search. An infrastructure that supports collection, analysis, and search services for open source code available on the Web can greatly facilitate building effective code search applications. This chapter presents such an infrastructure called Sourcerer that facilitates collection, analysis, and search of source code available in code repositories on the Web. This chapter provides useful information to researchers and implementors of code search applications interested in harnessing the large availability of source code in the repositories on the Web. In particular, this chapter highlights key aspects of Sourcerer that supports combining Software Engineering and Information Retrieval techniques to build effective code search applications.}, keywords = {code search, flossmole cited}, isbn = {978-1-4614-6596-6}, doi = {10.1007/978-1-4614-6596-6_8}, url = {http://www.drsusansim.org/papers/FindingCodeontheWeb-20120822.pdf}, author = {Bajracharya, Sushil Krishna}, editor = {Sim, Susan Elliott and Gallardo-Valencia, Rosalva E.} } @article {1400, title = {Analyzing and mining a code search engine usage log}, journal = {Empirical Software Engineering}, volume = {17}, year = {2012}, month = {8/2012}, pages = {424 - 466}, abstract = {This paper presents an analysis of a year long usage log of Koders, the first commercially available Internet-Scale code search engine (http://www.koders.com). The usage log comprises about ten million activities from more than three million users. Analysis of the usage data shows that despite of attracting a large number of visitors, Koders has a very sparse usage and that it lacks regular usage from many of its users. When compared to Web search, search behavior in Koders showed many similar patterns. A topic modeling analysis of the usage data shows what topics users of Koders are looking for. Observations on the prevalence of these topics among the users, and observations on how search and download activities vary across topics, lead to the conclusion that users who find code search engines usable are those who already know to a high level of specificity what to look for. This paper also presents a general categorization of these topics that provides insights on the different ways code search engine users express their queries. It identifies various forms of queries in Koders{\textquoteright}s log and the kinds of results addressed by the queries. It also provides several suggestions for improvements in code search engines based on the analysis of usage, topics, and query forms. The work presented in this paper is the first of its kind that reveals several insights on the usage of an Internet-Scale code search engine.}, keywords = {code search, koders, search, search engine, topics}, issn = {1573-7616}, doi = {10.1007/s10664-010-9144-6}, author = {Bajracharya, Sushil Krishna and Lopes, Cristina Videira} }