@conference {Siy:2008:SDW:1370750.1370784, title = {Summarizing developer work history using time series segmentation: challenge report}, booktitle = {Proceedings of the 2008 international working conference on Mining software repositories}, series = {MSR {\textquoteright}08}, year = {2008}, month = {05/2008}, pages = {137{\textendash}140}, publisher = {ACM}, organization = {ACM}, address = {New York, NY, USA}, abstract = {Temporal segmentation partitions time series data with the intent of producing more homogeneous segments. It is a technique used to preprocess data so that subsequent time series analysis on individual segments can detect trends that may not be evident when performing time series analysis on the entire dataset. This technique allows data miners to partition a large dataset without making any assumption of periodicity or any other a priori knowledge of the dataset{\textquoteright}s features. We investigate the insights that can be gained from the application of time series segmentation to software version repositories. Software version repositories from large projects contain on the order of hundreds of thousands of timestamped entries or more. It is a continuing challenge to aggregate such data so that noise is reduced and important characteristics are brought out. In this paper, we present a way to summarize developer work history in terms of the files they have modified over time by segmenting the CVS change data of individual Eclipse developers. We show that the files they modify tends to change significantly over time though most of them tend to work within the same directories.}, keywords = {contributions, cvs, developers, eclipse, msr challenge, temporal segmentation, time series, work history}, isbn = {978-1-60558-024-1}, doi = {http://doi.acm.org/10.1145/1370750.1370784}, url = {http://doi.acm.org/10.1145/1370750.1370784}, author = {Siy, Harvey and Chundi, Parvathi and Subramaniam, Mahadevan} }