@conference {Vasilescu:2016:SLM:2884781.2884875, title = {The Sky is Not the Limit: Multitasking Across GitHub Projects}, booktitle = {Proceedings of the 38th International Conference on Software Engineering (ICSE 2016)}, series = {ICSE {\textquoteright}16}, year = {2016}, pages = {994{\textendash}1005}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Software development has always inherently required multitasking: developers switch between coding, reviewing, testing, designing, and meeting with colleagues. The advent of software ecosystems like GitHub has enabled something new: the ability to easily switch between projects. Developers also have social incentives to contribute to many projects; prolific contributors gain social recognition and (eventually) economic rewards. Multitasking, however, comes at a cognitive cost: frequent context-switches can lead to distraction, sub-standard work, and even greater stress. In this paper, we gather ecosystem-level data on a group of programmers working on a large collection of projects. We develop models and methods for measuring the rate and breadth of a developers{\textquoteright} context-switching behavior, and we study how context-switching affects their productivity. We also survey developers to understand the reasons for and perceptions of multitasking. We find that the most common reason for multitasking is interrelationships and dependencies between projects. Notably, we find that the rate of switching and breadth (number of projects) of a developer{\textquoteright}s work matter. Developers who work on many projects have higher productivity if they focus on few projects per day. Developers that switch projects too much during the course of a day have lower productivity as they work on more projects overall. Despite these findings, developers perceptions of the benefits of multitasking are varied. }, keywords = {github, multitasking, productivity}, isbn = {978-1-4503-3900-1}, doi = {10.1145/2884781.2884875}, url = {http://doi.acm.org/10.1145/2884781.2884875}, author = {Vasilescu, Bogdan and Blincoe, Kelly and Xuan, Qi and Casalnuovo, Casey and Damian, Daniela and Devanbu, Premkumar and Filkov, Vladimir} } @conference {969, title = {Validity of network analyses in Open Source Projects}, booktitle = {2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)2010 7th IEEE Working Conference on Mining Software Repositories (MSR 2010)}, year = {2010}, note = {"We have mined archival records of developer mailing lists to generate reply-to social networks for the three OSS projects: Apache, MySQL, and Perl." "For each of these projects, we construct an information flow network based on messages that are sent as replies to previous messages....We use this methodology on all mined data to create a network of mailing list participants. "}, pages = {201 - 209}, publisher = {IEEE}, organization = {IEEE}, address = {Cape Town, South Africa}, abstract = {Social network methods are frequently used to analyze networks derived from Open Source Project communication and collaboration data. Such studies typically discover patterns in the information flow between contributors or contributions in these projects. Social network metrics have also been used to predict defect occurrence. However, such studies often ignore or side-step the issue of whether (and in what way) the metrics and networks of study are influenced by inadequate or missing data. In previous studies email archives of OSS projects have provided a useful trace of the communication and co-ordination activities of the participants. These traces have been used to construct social networks that are then subject to various types of analysis. However, during the construction of these networks, some assumptions are made, that may not always hold; this leads to incomplete, and sometimes incorrect networks. The question then becomes, do these errors affect the validity of the ensuing analysis? In this paper we specifically examine the stability of network metrics in the presence of inadequate and missing data. The issues that we study are: 1) the effect of paths with broken information flow (i.e. consecutive edges which are out of temporal order) on measures of centrality of nodes in the network, and 2) the effect of missing links on such measures. We demonstrate on three different OSS projects that while these issues do change network topology, the metrics used in the analysis are stable with respect to such changes.}, keywords = {apache, email archives, mailing lists, missing data, mysql, perl, social networks}, isbn = {978-1-4244-6802-7}, doi = {10.1109/MSR.2010.5463342}, attachments = {https://flosshub.org/sites/flosshub.org/files/201NetworkAnalysis.pdf}, author = {Nia, Roozbeh and Christian Bird and Devanbu, Premkumar and Filkov, Vladimir} }