@proceedings {1758, title = {A Dataset of the Activity of the git Super-repository of Linux in 2012}, year = {2015}, month = {05/2015}, publisher = {IEEE}, abstract = {This dataset documents the activity in the public portion of the git Super-repository of the Linux kernel during 2012. In a distributed version control system, such as git, the Super-repository is the collection of all the repositories (repos) used for development. In such a Super-repository, some repos will be accessible only by their owners (they are private, and are located in places that are unreachable to other users) while others are available to other members of the team. The latter public repositories are used as avenues through which commits flow from one developer to another. During the last six weeks of 2011, we proceeded to automatically discover the public portion of the Super-repository of Linux. Then, in 2012, every 3 hrs, each of these public repositories was queried to see what new commits it had and what commits had disappeared from it using a process we call continuous mining. This resulted in the identification of 533,513 different commits across 451 different public repositories and how they propagated through the Linux Super-repository, including the repository of Linus Torvalds (i.e., the main repository of the Linux kernel). This information could help us understand how kernel contributors use git, how they collaborate and how commits are integrated into the Linux kernel and into the repositories of organizations that distribute the kernel. This dataset is at http://turingmachine.org/2015/linuxGit}, url = {http://turingmachine.org/2015/linuxGit/msr-data-git-linux.pdf}, attachments = {https://flosshub.org/sites/flosshub.org/files/msr-data-git-linux.pdf}, author = {Daniel M. German and Adams, Bram and Hassan, Ahmed E.} }