@conference {1687, title = {Recommending relevant projects via user behaviour: an exploratory study on github}, booktitle = {Proceedings of the 1st International Workshop on Crowd-based Software Development Methods and Technologies - CrowdSoft 2014}, year = {2014}, pages = {25 - 30}, publisher = {ACM Press}, organization = {ACM Press}, address = {Hong Kong, ChinaNew York, New York, USA}, abstract = {Social coding sites (e.g., Github) provide various features like Forking and Sending Pull-requests to support crowd-based software engineering. When using these features, a large amount of user behavior data is recorded. User behavior data can reflect developers preferences and interests in software development activities. Online service providers in many fields have been using user behavior data to discover user preferences and interests to achieve various purposes. In the field of software engineering however, there has been few studies in mining large amount of user behavior data. Our goal is to design an approach based on user behavior data, to recommend relevant open source projects to developers, which can be helpful in activities like searching for the right open source solutions to quickly build prototypes. In this paper, we explore the possibilities of such a method by conducting a set of experiments on selected data sets from Github. We find it a promising direction in mining projects{\textquoteright} relevance from user behavior data. Our study also obtain some important issues that is worth considering in this method.}, isbn = {9781450332248}, doi = {10.1145/2666539.2666570}, url = {http://dl.acm.org/citation.cfm?id=2666570}, author = {Zhang, Lingxiao and Yanzhen Zou and Bing Xie and Zixiao Zhu} } @conference {1576, title = {A scalable crawler framework for FLOSS data}, booktitle = {Proceedings of the 5th Asia-Pacific Symposium on Internetware - Internetware {\textquoteright}13}, year = {2013}, note = {FLOSSmole [8] and FLOSSmetrics [9] retrieved FLOSS data of various types from famous software forges like SourceForge and Google code, interface to data sharing and analyzing is also provided. " Typically, Howison et al. [8] proposed a system called FLOSSmole. FLOSSmole is a large collection of datasets extracted from famous software forges such as SourceForge, GitHub, and Google Code. Datasets in FLOSSmole are mainly metadata describing various facts about the development of FLOSS projects. FLOSSmole manages its datasets in an open and collaborative manner. Most of the data is collected by the FLOSSmole research team, yet they also accept data donation from other research groups or similar projects. The scripts and programs that collects the datasets from the Internet is also open for download and donation. " "Using FLOSSmole [8] and FLOSSmetrics [9] as case studies, similar systems as such are called {\textquotedblleft}repository of repositories (RoR){\textquotedblright} and basic requirements of these systems are proposed. "}, pages = {1 - 7}, publisher = {ACM Press}, organization = {ACM Press}, address = {Changsha, China}, abstract = {Free / Libre / Open Source Software (FLOSS) data, such as bug reports, mailing lists and related webpages, contains valuable information for reusing open source software projects. Before conducting further experiment on FLOSS data, researchers often need to download these data into a local storage system. We refer to this pre-process as FLOSS data retrieval, which in many cases can be a challenging task. In this paper, we proposed a crawler framework to ease the process of FLOSS data retrieval. To cope with various types of FLOSS data scattered on the Internet, we designed the framework in a scalable manner where a crawler program can be easily plugged into the system to extend its functionality. Researchers can perform the retrieval process on datasets of various types and sources simply by adding new configurations to the system. We have implemented the framework and provided basic functions via web-based interfaces. We presented the usage of the system by a detailed case study where we retrieved various types of datasets related to Apache Lucene project using our framework.}, keywords = {flossmole cited}, isbn = {9781450323697}, doi = {10.1145/2532443.2532454}, author = {Yanzhen Zou and Bing Zie and Zhang, Lingxiao} }