@conference {G13, title = {The GHTorrent dataset and tool suite}, booktitle = {MSR {\textquoteright}13: Proceedings of the 9th Working Conference on Mining Software Repositories}, year = {2013}, note = {Example of how to use the data/tool: http://www.gousios.gr/blog/on-github-pull-requests/}, month = {05/2013}, abstract = {A common requirement of many empirical software engineering studies is the acquisition and curation of data from software repositories. During the last few years, GitHub has emerged as a popular project hosting, mirroring and collaboration platform. GitHub provides an extensive REST API, which enables researchers to retrieve both the commits to the projects{\textquoteright} repositories and events generated through user actions on project resources. GHTorrent aims to create a scalable off line mirror of GitHub{\textquoteright}s event streams and persistent data, and offer it to the research community as a service. In this paper, we present the project{\textquoteright}s design and initial implementation and demonstrate how the provided datasets can be queried and processed.}, url = {http://www.gousios.gr/bibliography/G13.html}, attachments = {https://flosshub.org/sites/flosshub.org/files/ghtorrent-dataset-toolsuite.pdf}, author = {Gousios, Georgios} }