@conference {1800, title = {Big data analytics on large-scale socio-technical software engineering archives}, booktitle = {2015 3rd International Conference on Information and Communication Technology (ICoICT )2015 3rd International Conference on Information and Communication Technology (ICoICT)}, year = {2015}, pages = {65 - 69}, publisher = {IEEE}, organization = {IEEE}, address = {Nusa Dua, Bali, Indonesia}, abstract = {Given the fast growing nature of software engineering data in online software repositories and open source communities, it would be helpful to analyse these assets to discover valuable information about the software engineering development process and other related data. Big Data Analytics (BDA) techniques and frameworks can be applied on these data resources to achieve a high-performance and relevant data collection and analysis. Software engineering is a socio-technical process which needs development team collaboration and technical knowledge to develop a high-quality application. GitHub, as an online social coding foundation, contains valuable information about the software engineers{\textquoteright} communications and project life cycles. In this paper, unsupervised data mining techniques are applied on the data collected by general Big Data approaches to analyse GitHub projects, source codes and interactions. Source codes and projects are clustered using features and metrics derived from historical data in repositories, object oriented programming metrics and the influences of developers on source codes. }, doi = {10.1109/ICoICT.2015.7231398}, author = {Bayati, Shahabedin and Parsons, David and Susnjak, Teo and Heidary, Marzieh} }