\documentclass[11pt,twoside]{article}\makeatletter
\IfFileExists{xcolor.sty}%
{\RequirePackage{xcolor}}%
{\RequirePackage{color}}
\usepackage{colortbl}
\usepackage{wrapfig}
\usepackage{ifxetex}
\ifxetex
\usepackage{fontspec}
\usepackage{xunicode}
\catcode`⃥=\active \def⃥{\textbackslash}
\catcode`❴=\active \def❴{\{}
\catcode`❵=\active \def❵{\}}
\def\textJapanese{\fontspec{Noto Sans CJK JP}}
\def\textChinese{\fontspec{Noto Sans CJK SC}}
\def\textKorean{\fontspec{Noto Sans CJK KR}}
\setmonofont{DejaVu Sans Mono}
\else
\IfFileExists{utf8x.def}%
{\usepackage[utf8x]{inputenc}
\PrerenderUnicode{–}
}%
{\usepackage[utf8]{inputenc}}
\usepackage[english]{babel}
\usepackage[T1]{fontenc}
\usepackage{float}
\usepackage[]{ucs}
\uc@dclc{8421}{default}{\textbackslash }
\uc@dclc{10100}{default}{\{}
\uc@dclc{10101}{default}{\}}
\uc@dclc{8491}{default}{\AA{}}
\uc@dclc{8239}{default}{\,}
\uc@dclc{20154}{default}{ }
\uc@dclc{10148}{default}{>}
\def\textschwa{\rotatebox{-90}{e}}
\def\textJapanese{}
\def\textChinese{}
\IfFileExists{tipa.sty}{\usepackage{tipa}}{}
\fi
\def\exampleFont{\ttfamily\small}
\DeclareTextSymbol{\textpi}{OML}{25}
\usepackage{relsize}
\RequirePackage{array}
\def\@testpach{\@chclass
\ifnum \@lastchclass=6 \@ne \@chnum \@ne \else
\ifnum \@lastchclass=7 5 \else
\ifnum \@lastchclass=8 \tw@ \else
\ifnum \@lastchclass=9 \thr@@
\else \z@
\ifnum \@lastchclass = 10 \else
\edef\@nextchar{\expandafter\string\@nextchar}%
\@chnum
\if \@nextchar c\z@ \else
\if \@nextchar l\@ne \else
\if \@nextchar r\tw@ \else
\z@ \@chclass
\if\@nextchar |\@ne \else
\if \@nextchar !6 \else
\if \@nextchar @7 \else
\if \@nextchar (8 \else
\if \@nextchar )9 \else
10
\@chnum
\if \@nextchar m\thr@@\else
\if \@nextchar p4 \else
\if \@nextchar b5 \else
\z@ \@chclass \z@ \@preamerr \z@ \fi \fi \fi \fi
\fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi}
\gdef\arraybackslash{\let\\=\@arraycr}
\def\@textsubscript#1{{\m@th\ensuremath{_{\mbox{\fontsize\sf@size\z@#1}}}}}
\def\Panel#1#2#3#4{\multicolumn{#3}{){\columncolor{#2}}#4}{#1}}
\def\abbr{}
\def\corr{}
\def\expan{}
\def\gap{}
\def\orig{}
\def\reg{}
\def\ref{}
\def\sic{}
\def\persName{}\def\name{}
\def\placeName{}
\def\orgName{}
\def\textcal#1{{\fontspec{Lucida Calligraphy}#1}}
\def\textgothic#1{{\fontspec{Lucida Blackletter}#1}}
\def\textlarge#1{{\large #1}}
\def\textoverbar#1{\ensuremath{\overline{#1}}}
\def\textquoted#1{‘#1’}
\def\textsmall#1{{\small #1}}
\def\textsubscript#1{\@textsubscript{\selectfont#1}}
\def\textxi{\ensuremath{\xi}}
\def\titlem{\itshape}
\newenvironment{biblfree}{}{\ifvmode\par\fi }
\newenvironment{bibl}{}{}
\newenvironment{byline}{\vskip6pt\itshape\fontsize{16pt}{18pt}\selectfont}{\par }
\newenvironment{citbibl}{}{\ifvmode\par\fi }
\newenvironment{docAuthor}{\ifvmode\vskip4pt\fontsize{16pt}{18pt}\selectfont\fi\itshape}{\ifvmode\par\fi }
\newenvironment{docDate}{}{\ifvmode\par\fi }
\newenvironment{docImprint}{\vskip 6pt}{\ifvmode\par\fi }
\newenvironment{docTitle}{\vskip6pt\bfseries\fontsize{22pt}{25pt}\selectfont}{\par }
\newenvironment{msHead}{\vskip 6pt}{\par}
\newenvironment{msItem}{\vskip 6pt}{\par}
\newenvironment{rubric}{}{}
\newenvironment{titlePart}{}{\par }
\newcolumntype{L}[1]{){\raggedright\arraybackslash}p{#1}}
\newcolumntype{C}[1]{){\centering\arraybackslash}p{#1}}
\newcolumntype{R}[1]{){\raggedleft\arraybackslash}p{#1}}
\newcolumntype{P}[1]{){\arraybackslash}p{#1}}
\newcolumntype{B}[1]{){\arraybackslash}b{#1}}
\newcolumntype{M}[1]{){\arraybackslash}m{#1}}
\definecolor{label}{gray}{0.75}
\def\unusedattribute#1{\sout{\textcolor{label}{#1}}}
\DeclareRobustCommand*{\xref}{\hyper@normalise\xref@}
\def\xref@#1#2{\hyper@linkurl{#2}{#1}}
\begingroup
\catcode`\_=\active
\gdef_#1{\ensuremath{\sb{\mathrm{#1}}}}
\endgroup
\mathcode`\_=\string"8000
\catcode`\_=12\relax
\usepackage[a4paper,twoside,lmargin=1in,rmargin=1in,tmargin=1in,bmargin=1in,marginparwidth=0.75in]{geometry}
\usepackage{framed}
\definecolor{shadecolor}{gray}{0.95}
\usepackage{longtable}
\usepackage[normalem]{ulem}
\usepackage{fancyvrb}
\usepackage{fancyhdr}
\usepackage{graphicx}
\usepackage{marginnote}
\renewcommand{\@cite}[1]{#1}
\renewcommand*{\marginfont}{\itshape\footnotesize}
\def\Gin@extensions{.pdf,.png,.jpg,.mps,.tif}
\pagestyle{fancy}
\usepackage[pdftitle={Towards Optimized K means Clustering using Nature-Inspired Algorithms for Software Bug Prediction},
pdfauthor={}]{hyperref}
\hyperbaseurl{}
\paperwidth210mm
\paperheight297mm
\def\@pnumwidth{1.55em}
\def\@tocrmarg {2.55em}
\def\@dotsep{4.5}
\setcounter{tocdepth}{3}
\clubpenalty=8000
\emergencystretch 3em
\hbadness=4000
\hyphenpenalty=400
\pretolerance=750
\tolerance=2000
\vbadness=4000
\widowpenalty=10000
\renewcommand\section{\@startsection {section}{1}{\z@}%
{-1.75ex \@plus -0.5ex \@minus -.2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\Large\bfseries}}
\renewcommand\subsection{\@startsection{subsection}{2}{\z@}%
{-1.75ex\@plus -0.5ex \@minus- .2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\Large}}
\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}%
{-1.5ex\@plus -0.35ex \@minus -.2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\large}}
\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}%
{-1ex \@plus-0.35ex \@minus -0.2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\normalsize}}
\renewcommand\subparagraph{\@startsection{subparagraph}{5}{\parindent}%
{1.5ex \@plus1ex \@minus .2ex}%
{-1em}%
{\reset@font\normalsize\bfseries}}
\def\l@section#1#2{\addpenalty{\@secpenalty} \addvspace{1.0em plus 1pt}
\@tempdima 1.5em \begingroup
\parindent \z@ \rightskip \@pnumwidth
\parfillskip -\@pnumwidth
\bfseries \leavevmode #1\hfil \hbox to\@pnumwidth{\hss #2}\par
\endgroup}
\def\l@subsection{\@dottedtocline{2}{1.5em}{2.3em}}
\def\l@subsubsection{\@dottedtocline{3}{3.8em}{3.2em}}
\def\l@paragraph{\@dottedtocline{4}{7.0em}{4.1em}}
\def\l@subparagraph{\@dottedtocline{5}{10em}{5em}}
\@ifundefined{c@section}{\newcounter{section}}{}
\@ifundefined{c@chapter}{\newcounter{chapter}}{}
\newif\if@mainmatter
\@mainmattertrue
\def\chaptername{Chapter}
\def\frontmatter{%
\pagenumbering{roman}
\def\thechapter{\@roman\c@chapter}
\def\theHchapter{\roman{chapter}}
\def\thesection{\@roman\c@section}
\def\theHsection{\roman{section}}
\def\@chapapp{}%
}
\def\mainmatter{%
\cleardoublepage
\def\thechapter{\@arabic\c@chapter}
\setcounter{chapter}{0}
\setcounter{section}{0}
\pagenumbering{arabic}
\setcounter{secnumdepth}{6}
\def\@chapapp{\chaptername}%
\def\theHchapter{\arabic{chapter}}
\def\thesection{\@arabic\c@section}
\def\theHsection{\arabic{section}}
}
\def\backmatter{%
\cleardoublepage
\setcounter{chapter}{0}
\setcounter{section}{0}
\setcounter{secnumdepth}{2}
\def\@chapapp{\appendixname}%
\def\thechapter{\@Alph\c@chapter}
\def\theHchapter{\Alph{chapter}}
\appendix
}
\newenvironment{bibitemlist}[1]{%
\list{\@biblabel{\@arabic\c@enumiv}}%
{\settowidth\labelwidth{\@biblabel{#1}}%
\leftmargin\labelwidth
\advance\leftmargin\labelsep
\@openbib@code
\usecounter{enumiv}%
\let\p@enumiv\@empty
\renewcommand\theenumiv{\@arabic\c@enumiv}%
}%
\sloppy
\clubpenalty4000
\@clubpenalty \clubpenalty
\widowpenalty4000%
\sfcode`\.\@m}%
{\def\@noitemerr
{\@latex@warning{Empty `bibitemlist' environment}}%
\endlist}
\def\tableofcontents{\section*{\contentsname}\@starttoc{toc}}
\parskip0pt
\parindent1em
\def\Panel#1#2#3#4{\multicolumn{#3}{){\columncolor{#2}}#4}{#1}}
\newenvironment{reflist}{%
\begin{raggedright}\begin{list}{}
{%
\setlength{\topsep}{0pt}%
\setlength{\rightmargin}{0.25in}%
\setlength{\itemsep}{0pt}%
\setlength{\itemindent}{0pt}%
\setlength{\parskip}{0pt}%
\setlength{\parsep}{2pt}%
\def\makelabel##1{\itshape ##1}}%
}
{\end{list}\end{raggedright}}
\newenvironment{sansreflist}{%
\begin{raggedright}\begin{list}{}
{%
\setlength{\topsep}{0pt}%
\setlength{\rightmargin}{0.25in}%
\setlength{\itemindent}{0pt}%
\setlength{\parskip}{0pt}%
\setlength{\itemsep}{0pt}%
\setlength{\parsep}{2pt}%
\def\makelabel##1{\upshape ##1}}%
}
{\end{list}\end{raggedright}}
\newenvironment{specHead}[2]%
{\vspace{20pt}\hrule\vspace{10pt}%
\phantomsection\label{#1}\markright{#2}%
\pdfbookmark[2]{#2}{#1}%
\hspace{-0.75in}{\bfseries\fontsize{16pt}{18pt}\selectfont#2}%
}{}
\def\TheFullDate{1970-01-01 (revised: 01 January 1970)}
\def\TheID{\makeatother }
\def\TheDate{1970-01-01}
\title{Towards Optimized K means Clustering using Nature-Inspired Algorithms for Software Bug Prediction}
\author{}\makeatletter
\makeatletter
\newcommand*{\cleartoleftpage}{%
\clearpage
\if@twoside
\ifodd\c@page
\hbox{}\newpage
\if@twocolumn
\hbox{}\newpage
\fi
\fi
\fi
}
\makeatother
\makeatletter
\thispagestyle{empty}
\markright{\@title}\markboth{\@title}{\@author}
\renewcommand\small{\@setfontsize\small{9pt}{11pt}\abovedisplayskip 8.5\p@ plus3\p@ minus4\p@
\belowdisplayskip \abovedisplayskip
\abovedisplayshortskip \z@ plus2\p@
\belowdisplayshortskip 4\p@ plus2\p@ minus2\p@
\def\@listi{\leftmargin\leftmargini
\topsep 2\p@ plus1\p@ minus1\p@
\parsep 2\p@ plus\p@ minus\p@
\itemsep 1pt}
}
\makeatother
\fvset{frame=single,numberblanklines=false,xleftmargin=5mm,xrightmargin=5mm}
\fancyhf{}
\setlength{\headheight}{14pt}
\fancyhead[LE]{\bfseries\leftmark}
\fancyhead[RO]{\bfseries\rightmark}
\fancyfoot[RO]{}
\fancyfoot[CO]{\thepage}
\fancyfoot[LO]{\TheID}
\fancyfoot[LE]{}
\fancyfoot[CE]{\thepage}
\fancyfoot[RE]{\TheID}
\hypersetup{citebordercolor=0.75 0.75 0.75,linkbordercolor=0.75 0.75 0.75,urlbordercolor=0.75 0.75 0.75,bookmarksnumbered=true}
\fancypagestyle{plain}{\fancyhead{}\renewcommand{\headrulewidth}{0pt}}
\date{}
\usepackage{authblk}
\providecommand{\keywords}[1]
{
\footnotesize
\textbf{\textit{Index terms---}} #1
}
\usepackage{graphicx,xcolor}
\definecolor{GJBlue}{HTML}{273B81}
\definecolor{GJLightBlue}{HTML}{0A9DD9}
\definecolor{GJMediumGrey}{HTML}{6D6E70}
\definecolor{GJLightGrey}{HTML}{929497}
\renewenvironment{abstract}{%
\setlength{\parindent}{0pt}\raggedright
\textcolor{GJMediumGrey}{\rule{\textwidth}{2pt}}
\vskip16pt
\textcolor{GJBlue}{\large\bfseries\abstractname\space}
}{%
\vskip8pt
\textcolor{GJMediumGrey}{\rule{\textwidth}{2pt}}
\vskip16pt
}
\usepackage[absolute,overlay]{textpos}
\makeatother
\usepackage{lineno}
\linenumbers
\begin{document}
\author[1]{Tameswar Kajal}
\author[2]{Geerish Suddul}
\author[3]{Kumar Dookhitram}
\affil[1]{ University of technology}
\renewcommand\Authands{ and }
\date{\small \em Received: 1 January 1970 Accepted: 1 January 1970 Published: 1 January 1970}
\maketitle
\begin{abstract}
In today's software development environment, the necessity for providing quality software products has undoubtedly remained the largest difficulty. As a result, early software bug prediction in the development phase is critical for lowering maintenance costs and improving overall software performance. Clustering is a well-known unsupervised method for data classification and finding related patterns hidden in datasets.
\end{abstract}
\keywords{data clustering, K-means algorithm, Nature-inspired algorithms, software bug detection, coral reefs.}
\begin{textblock*}{18cm}(1cm,1cm) % {block width} (coords)
\textcolor{GJBlue}{\LARGE Global Journals \LaTeX\ JournalKaleidoscope\texttrademark}
\end{textblock*}
\begin{textblock*}{18cm}(1.4cm,1.5cm) % {block width} (coords)
\textcolor{GJBlue}{\footnotesize \\ Artificial Intelligence formulated this projection for compatibility purposes from the original article published at Global Journals. However, this technology is currently in beta. \emph{Therefore, kindly ignore odd layouts, missed formulae, text, tables, or figures.}}
\end{textblock*}
\begin{textblock*}{10cm}(1.05cm,3cm)
{{\textit{CrossRef DOI of original article:}} \underline{}}
\end{textblock*}\let\tabcellsep&
\section[{Introduction}]{Introduction}\par
n an era of technological disruption, the demand for software adoption has accelerated. They are a part of our society and play an important role in shaping it. Our modern society is becoming increasingly reliant on complex software systems. Thus, it is critical to build reliable and trustworthy systems in a cost-effective and timely manner. The presence of defective modules in a software drives up development and maintenance expenses, leading to customer dissatisfaction. The need for quality assurance has inevitably remained the biggest challenge in today's software development environment Hence, software bug prediction is an important task to help developers locate bugs more efficiently.\par
Software bug prediction is an imperative task in Software Development Life cycle (SDLC) as it pertains to the overall success of software. One method in this direction is to use machine learning (ML) methods to predict defects in software. In addition, implementing this method earlier in the SDLC process enhances quality of the product and lowers the cost of software maintenance. Many researchers have applied different theories and methodologies in the field of software bug prediction. Two things are clear from the literature when it comes to defect prediction. Initially, no single prediction approach dominates {\ref (Lessmann et al.,2008)}, and next, the employment of various set of data, data pre-processing, validation systems, and performance statistics makes it challenging to make sense of the multiple prediction outcomes \hyperref[b5]{(Myrtveit et al.,2005)}. There are two common ML model used for prediction based on dataset availability. The first, known as supervised approach, in which a software defect prediction model is built from training set of data and then tested on a testing dataset. Secondly, unsupervised approach, in which the defect prediction model for software is built from scratch using the present testing dataset without training the dataset.\par
Clustering algorithms have been commonly used to evade the lack of training datasets available being a constraint. Cluster analysis groups things into clusters based on their similarity to create a visual representation of data {\ref (Jain and Dubes, 1998)}. As pointed out by \hyperref[b17]{Kaur, 2010}, one of the better instances of unsupervised learning is K-means clustering. Clustering is beneficial because it makes it easier to obtain or locate relevant information at a faster rate. Among the different clustering approaches that already exist, the Kmeans methodology is obviously fairly popular. \hyperref[b7]{(Gayathri et al., 2015)}. The preliminary values of the initial centroids, which are generated randomly each time the algorithm is run, have a significant impact on the performance of k-means. K-means frequently fall into local optima that produce poor clustering results. Obtaining a globally optimal clustering result involves a time-consuming, exhaustive approach that tests all partitioning choices. A heuristic approach to the problem is to use an optimization algorithm to search for global optima in each computer iteration.\par
Our unsupervised approach uses the k-means approach to divide the unlabeled dataset into defective and non-defective non-overlapped clusters for bug prediction. The goal of this research is to verify the hybrids' efficacy as well as to quantify the quality of results produced by each clustering hybrid model. In this study, we have applied the k-means clustering algorithm, an unsupervised algorithm with different NIAs including Genetic algorithm (GA), Bat algorithm (BA), Particle Swarm Optimization (PSO), Coral Reefs Optimization (CRO), Cuckoo Search optimization (CSO) algorithm, Ant colony optimization (ACO), Firefly algorithm (FA) and Grey Wolf Optimizer (GWO) for software bug prediction. The rest of this paper is organized as follows. Section 2 presents a discussion of the related work in software bug prediction. An overview of the methodology, consisting of the algorithms used are presented in Section 3. Section 4 describes the proposed method. Section 5 describes the Dataset and Data Processing method. The evaluation methodology is discussed in section 6. The results and discussion part is discussed in Section 7. Section 8 discusses the practical implications followed by conclusions and future works in section 9.
\section[{II.}]{II.}
\section[{Related Works}]{Related Works}\par
K-means clustering is a well-known partitioned clustering algorithm that has been used in a variety of applications. In the literature, several variations of Kmeans have been proposed to improve its performance for the broad clustering problem. {\ref Fong et al. (2012)} studied the integration of bio-inspired optimization methods into K-means clustering for software bug prediction in order to assess clustering performance. The main optimization algorithms tested include the Firefly algorithm, Cuckoo search algorithm, Bat algorithm, Wolf and Ant Colony Optimization (ACO) algorithms. Results show that the combination of these algorithms acquired improved performance accuracy compared with ordinary k-means, at the same time accelerating the search process and avoid local optima. Zhong et al.,2004 compared the k-means algorithm to natural-gas algorithms. The natural gas algorithm outperformed the k-means algorithm in terms of mean square error values. However, this method necessitates the use of a software expert to determine whether the software is appropriate.\par
Annisa et al., 2020, came up with an improved version of k-means algorithm for software bug prediction, that locate the initial centroid of the k-means algorithm and determine the number of clusters present. Because it produces better accuracy than the simple K-Means method, this proposed method could be useful for clustering other data types. Seliya and Khoshgoftaar, 2007 proposed K-means for software failure prediction. Their method iteratively labels clusters as fault-prone or not using expert domain knowledge as a restriction.\par
The k-means algorithm based on quad tree was proposed by Bishnu and Bhattacherjee, 2012 and it was compared to some clustering algorithms. Their proposed algorithm has error rates that are comparable to k-means, Linear Discriminant Analysis and Naive Bayes. \hyperref[b10]{Catal et al. 2009} used the x-means clustering algorithm to create faulty and non-faulty clusters based on software metrics. Lines of code, cyclomatic complexity, operand and operator are the metrics. If the metric values are complex than the threshold, the software entity is predicted to be defective, and vice versa. Almayyan, 2021 used dataset from the NASA repository and used three clustering algorithms, Farthest First, X-means and Self-organizing map. This article presents a comparison of software defect prediction algorithms based on Bat, Cuckoo, Grey Wolf Optimizer (GWO), and Particle Swarm Optimization (PSO) in order to evaluate different feature selection algorithms. The Farthest First clustering algorithm was found to be effective in predicting software faultiness, and Bat and Cuckoo were found to be useful in comparison to all other metaheuristic algorithms.\par
Though several academics have sought to merge K-means clustering with nature-inspired algorithms (NIAs), their efforts have been restricted to almost identical group movements, such as the Firefly, Artificial Bee Colony (ACO), and Particle Swarm Optimization (PSO) algorithms {\ref (Jensi and Jiji, 2015)}. In addition, only a few bio-inspired optimization methods that are integrated with K-means are provided in the previous studies. Only 7 of the 28 NIAs hybridized with K-means (Genetic Algorithm, Particle Swarm Optimization, Bat Algorithm, Artificial Bee Colony, Differential Evolution, Harmony Search, and Symbiotic Organism Search) dedicated their hybridization to solving automatic clustering problems, accounting for 20.6 percent of the total (Ikotun et al., 2021). In general, it can be seen that the rate of publishing on K-means hybridization with specific NIAAs is minimal. More research is needed in this area to see if there are any other ways to improve the performance of the existing hybridization algorithm. This suggests that combining Kmeans with these other NIAs to solve automatic clustering problems should be investigated.\par
The purpose of this research is to look into the mechanics of incorporating certain NIAs into the Kmeans clustering algorithm. The optimization function adds to the existing best solution by progressively improving it with a new solution from an unknown fragment of the search space. When a new solution is identified to be better than the present one, the searching agents replace the solutions and continue searching until some stopping criteria are fulfilled.
\section[{III.}]{III.}
\section[{Methodology a) K means Clustering Algorithm}]{Methodology a) K means Clustering Algorithm}\par
The K-means clustering algorithm is a partitioned clustering technique that divides a dataset into k number of clusters using a certain fitness measure. Due to the large amount of data objects in real-world datasets, distributing data items into appropriate clusters to obtain an ideal cluster outcome is computationally expensive and time-consuming (Ikotun et al.2021).\par
Given a dataset X = \{x i \}, where i = 1, 2, . . . n of d-dimension data points of size n, X is partitioned into 'k' clusters such thatJ(c k ) = ? xieck ||x i -µ k || 2\textbf{(1)}\par
With the objective function: minimize the sum of the square error over all the k clusters. That is, minimizeJ(C) = K ? k=1 ? xieck ||x i -µ k || 2\textbf{(2)}\par
When assigning N objects to k clusters, the purpose of the clustering algorithm is to limit the number of potential possibilities. This can be expressed numerically as:S (N, K) = 1/ K! K ? i=0 (-1) K-i ( K t ) t N\textbf{(3)}\par
b) Nature-inspired algorithms (NIAs) Nature-inspired computation has gained popularity in the previous two decades and has been used in practically every field of research and engineering {\ref (Yang et al.2013)}. NIAs are global optimization strategies for solving difficult real-world issues \hyperref[b19]{(Okwu et al. 2020}). NIAs have successfully provided suboptimal solutions to automatic clustering problems in a reasonable amount of time \hyperref[b20]{(Hruschka et al. 2009}). The population is used for the exploration of search space in the nature-inspired metaheuristic, ensuring a higher possibility of finding optimal cluster partitions (Nanda and Panda, 2014). It has been discovered that combining K-means with NIAs for automatic clustering improves the performance of algorithms when dealing with cluster analysis. In most circumstances, the automatic cluster number determination aids in the selection of near-optimal starting cluster centroids for the clustering process rather than the normal random selection \hyperref[b22]{(Zhou et al. 2017}).
\section[{c) Combination of k-means with Nature-Inspired Algorithms (NIAs)}]{c) Combination of k-means with Nature-Inspired Algorithms (NIAs)}\par
Clustering using NIAs is now as simple as assigning combinations of centroids to the searching agents, allowing them to heuristically find the best answer. Though the specifics of conducting a heuristic search vary depending on which nature-inspired optimization algorithm technique is used, the initialization stage and the finishing step, where the quality of the discovered solution is evaluated as a stopping condition, are both comparable.\par
S is defined as the solution space that contains a finite number of x i , where i is the solution's index, in the initialization construct. The search agents represent the solutions x, each of which holds a set of centroids, regardless of the types of bio-inspired optimization methods used. Typically, a large population of searching agents, N, is utilized to collaboratively search for the best feasible cluster configurations (as expressed by the locations of the optimal centroids). K is the number of clusters that must be formed, which is generally a userdefined figure. D is the dimension of the search space, which is the number of attributes a data point possesses.\par
To find the optimal configuration of centroids we let cen j,v be the centroids at the j th cluster and the v th attribute. To obtain the centroid location, the following formula is used:cen j,v =? S i =1 wi, j X i,v/ ? S i =1 wi, j X i , Where j =1?K, v=1?.K *D (4)\par
In our concept, the matrix cenj,v contains all of the cluster centers and is a two-dimensional matrix with K * D characteristics.F(cen)= ? k j=1 ? S i=1 W i,j ? K*D v=1 (X iv , -cen j,v ) 2 (5)\par
The calculation method loops K * D times to analyze the values of all the attributes of x in each cluster v to calculate the distance between each x and the centroid.\par
Cluster centers can be designated by data points. For example, in a two-cluster clustering task, the objective function requires three variables. As a result, there are three dimensions.\par
Three variables, and hence three-dimensional spaces, are required, and the i th data point may be written as x i = (i, [x i , 1 , x i,2 , x i,3 , x i,4 , x i,5 , x i,6 ]). The clustering strategy can be formulated as follows:clmat i,j = min k?k \{||X i -cen k ||\}\textbf{(6)}\par
Sets of functional parameters must be defined in order to execute the bio-inspired optimization algorithms. Despite the fact that some of their parameters are shared, each set of parameters for the hybrid bio-inspired clustering algorithms is designed independently. The six models investigated are K means with Genetic Algorithm, K means with Bat algorithm, K means with Ant colony algorithm, K means with Cuckoo Search Algorithm, K means with Firefly Algorithm and K means with Coral reefs algorithm. The most significant variations are in how the global optimal exploration is carried out for all these algorithms. The evaluation stage comes right after the exploration construct, and it compares if the new solution is better than the current best one.
\section[{d) Genetic Algorithm}]{d) Genetic Algorithm}\par
Genetic Algorithm (Ga) are randomized heuristic search algorithms that are based on natural selection and genetic principles \hyperref[b25]{(Goldberg, 1989)}. The genetic operators used in the combination of K-means and GA are selection, distance-based mutation, and the K-means operator. The parameters have been set according to the study of \hyperref[b24]{Bouhmala et al. 2015}. P (0) is chosen at random as the starting population. Each allele in the population can be given a cluster number from the uniform distribution over the set \{1,?. K) at random.\par
According to the distribution given by, the selection operator selects a chromosome from the preceding population at random as follows:P(s i ) = F(s i ) / N ? j=1 F(s j )\textbf{(7)}\par
The possibility of solutions surviving in the future population is ranked in the current population. Each solution in the population must be assigned a figure of merit or a fitness value.F(s W ) = \{ g(sW ) 0 ; if g(s W ) ? 0 ; otherwise.\textbf{(8)}\par
e) Bat Algorithm (BA) Bat echolocation is used in the bat algorithm (BA), which is a heuristic optimization tool \hyperref[b25]{(Yang, 2010)}. The four basic parameters of a BA are pulse frequency, pulse rate, velocity, and a constant. The parameters have been set according to the study {\ref (Huang and Ma, 2020)}.\par
The frequency, velocity, and position for each bat are initialized. The virtual bats' movement is described by updating their velocity and position using the equations below for each time step t, where T is the iteration limit.f i =f min + (f min -f max ) ? (9) V i t+1 =v i t + [X i t + X * ]f i (10) X i t+1 = X i t +v i t\textbf{(11)}\par
A random number is generated when the bat positions are updated; if the random number is greater than the pulse emission rate, a new location is formed around the current best solutions, as shown in the equation below.\par
X new =x old + EA t (12)
\section[{f) Ant Colony Optimization (ACO)}]{f) Ant Colony Optimization (ACO)}\par
The ACO heuristic was inspired by investigations of ant foraging behavior in real colonies, which indicated that ants can often figure out the shortest path between food source and nest \hyperref[b27]{(Zheng et al. 2003}). The parameters have been set according to the study {\ref (Tang et al. 2012)}.\par
When the ant moves from i to j, the path node at the start can set as A, A= \{0, 1,?? n-1\}. This reflects the role of pheromones accumulated by ants during exercise during ant migration and reveals the relative relevance of the trajectory. The larger ? is, it indicates the high probability for subsequent ants to choose this path.\par
The probability of the ant moving from I to j is computed using the following formula:P ij k (t)= r ij k (t)n ij ? (t) / ?r ij ? (t) n ij ? (t)\textbf{(13)}
\section[{g) Firefly Algorithm (FA)}]{g) Firefly Algorithm (FA)}\par
Firefly algorithm is a very strong technique for solving restricted optimization and NP-hard problems (Apostolopoulos and Vlachos, 2011). The parameters have been set according to the study {\ref (Tang et al.2012)}.\par
The attractiveness of a firefly I on a firefly j is determined by the degree of the firefly i's brightness and the distance rij between the firefly I and the firefly j, as shown below:I (r)=I s /r 2\textbf{(14)}\par
Consider the case when there are n fireflies and the solution for firefly I is xi. The brightness of the firefly I is linked to the objective function f (xi).I= f(x i )\textbf{(15)}\par
Each firefly has an attraction value, and the less dazzling (attractive) one is drawn to the brighter one and transferred there. The attractiveness value ? is relative based on the distance between fireflies. Where pheromone is ?, which is a constant that represents weight. The time of iteration is Nc and the initial setting is ?. The predicted heuristic factor is?, which demonstrates the relevance of visibility relative to other factors. It also represents the significance of the heuristic component in the entire path of the ant's movement.? (r)= ? 0 e -yr2\textbf{(16}\par
Where V i t and X i t are the velocity and position at time t, V i t+1 and X i t+1 are the velocity and position at time t+1, and is a random number between 0 and 1.\par
Where ? 0 is the firefly attraction value at r = 0 and ? is the media light absorption coefficient.\par
Where E is a random number A t represents the average loudness of all bats at time t.\par
An initial population of n nests is randomly generated at the positions, X= \{x 0 1 ,x 0 2 ,?,x 0 n \}, to evaluate the objective values to find the current global best g t 0 .\par
The new position is updated accordingly by performing a L?vy flight:x i (t+1) = x i (t) + ? ? L?vy (?),\textbf{(17)}
\section[{i) Coral Reefs Optimization Algorithm (CRO)}]{i) Coral Reefs Optimization Algorithm (CRO)}\par
CRO is another nature-inspired algorithm, based on an artificial simulation of the process of coral reef formation and reproduction (Sanz et al.2014). The CRO algorithm has never been utilized in the realm of software bug detection to our knowledge. Corals reproduce at each iteration step in the CRO algorithm, producing new individuals. The parameters have been set according to the study \hyperref[b31]{(Medeiros et al., 2015)}.\par
By allocating a coral to each square (i j), the CRO algorithm generates a N x M square grid in which each square (i,j) may represent an alternate solution to a problem (or colony of corals). The formation of coral is the second phase. After three phases, the entire collection of existing corals in the reef is graded according to their level of healthiness (broadcast spawning, brooding, and larvae setting).
\section[{j) Particle Swarm Optimization (PSO)}]{j) Particle Swarm Optimization (PSO)}\par
The behavior of particles in a swarm is the central concept of the PSO. Each particle has its own location in a multidimensional space and communicates with the others. To move about in space, the particles employ social and cognitive information. When the algorithm comes to a halt, the best solution has been discovered \hyperref[b33]{(Koohi and Groza, 2014}). The parameters have been set according to the study \hyperref[b40]{(Rana et al., 2010)}.\par
The inertia weight balances the algorithm's local and global search abilities. The proportional contribution of the prior velocity to the current velocity is defined by the inertia weight.V i k+1 = wv i k + c1 rand (p besti -x i k ) + c2 rand (g best -X i k ) (18) X i k+1 = X i k + v i k+1 (\textbf{19})
\section[{k) Grey Wolf Optimizer (GWO)}]{k) Grey Wolf Optimizer (GWO)}\par
The Grey Wolf Optimizer (GWO) is a simple, population-based, flexible, and derivative-free metaheuristic optimization method that intelligently avoids stagnation in local optima spots of the search space. It simulates the social behaviors of grey wolves in the aspects of their hierarchical leadership and hunting movement {\ref (Mirjalili et al., 2013)}. Grey wolves' leadership and haunting mechanism help to design a new metaheuristic algorithm with three steps: searching prey, encircling prey, and attacking prey.\par
During the GWO operation, the position of the wolves is continuously updated, with appropriate mathematical formulas {\ref (Hou et al., 2022)}. The parameters have been set according to the study \hyperref[b39]{(Wang et al., 2019)}.\par
IV.
\section[{Proposed Method}]{Proposed Method}\par
The purpose of clustering is to discover a proper set of centroids using the metaheuristic of the nature-inspired method as a guide. The metaheuristic will always insist on centroids being moved in a progressive manner in each phase, with the goal of finding the best grouping. The ideal group's ultimate result should be that the data points inside each cluster are closest to their centroid. During the search, the centroids move around in the search space, following the swarming pattern of the nature-inspired optimization method, until no further progress is seen. It comes to a halt when there is no other possible relocation that will yield a better result. Along with the success of employing nature-inspired metaheuristic algorithms to solve automatic clustering problems, it has been discovered that combining two or more metaheuristics for the same objective improves clustering performance. The performance of hybrid algorithms, according to Nanda and Panda 2014, is superior to that of separate algorithms in terms of robustness, effectiveness, and accuracy.\par
V.
\section[{Dataset and Data Processing}]{Dataset and Data Processing}\par
The dataset was collected from the online PROMISE repository. AR1, AR3, AR4, AR5, AR6, KC1, KC2, JM1, CM1, PC1 and PC5 were used respectively. With reference to the paper, by \hyperref[b14]{Shepperd et al. 2013}, data cleaning is mandatory before using any datasets available. Indeed, we noted a huge class imbalance issue with the available datasets (faulty, non-faulty),and all data inconsistencies, missing and null values were removed. Each dataset selected represents a NASA software system that includes various metrics. Each dataset is made up of a number of software modules and attributes. Modules with defects are classified as prone to faults, whereas those without defects are To address the curbs of the K-means clustering approach in generating globally optimum clusters, the suggested method uses the k-means algorithm together with a range of NIAs for software bug prediction. By adding an exploration function to the k-means algorithm, the combination of these strategies may improve the model. The exploration function improves the existing solution by examining regions outside of its immediate vicinity, and if a new, better solution than the current best one is discovered, the search agents will move toward it. The exploring procedure will continue until certain stopping criteria are met. Nature-inspired algorithms are metaheuristic algorithms, which means they have the ability to explore the combinatorial search space heuristically rather than exhaustively. The integration methods are based on representing the search agents as a combination of centroid locations, then the search agents explore the search space for the best solution.\par
Where ? > 0 denotes the step size, which should be connected to the problem's scales. In most circumstances, we can use? = 1. classified as non-fault prone. For the training purpose, the entire dataset is used except for the last column (output column), only columns consisting of numerical values were considered. VI.
\section[{Evaluation a) Experimental Setup}]{Evaluation a) Experimental Setup}\par
The main goal of this research is to demonstrate the utility of the k-means algorithm with different NIAs, which we accomplished using Tensorflow to train the model. TensorFlow is an open-source machine learning platform to build and deploy prediction models. Google Colab was also used to run the results, which allowed the code to run with no configuration and free GPU access. Each dataset is performed 10 times in the trials to find the average CPU time and objective function values/best fitness value.\par
The clustering results of the new hybrid clustering algorithms are compared to the K-means, which serve as a benchmarking reference. The full dataset is used for training, and cluster formation is referred to until perfection is attained using the entire set of data. The ultimate clustering result's quality is determined by each cluster's integrity, which is represented by the objective function's final fitness value.\par
The hardware configuration used for all experiments in this study is as follows: Corei7-6500U CPU @2.50 GHz 2.60 GHz, Windows 10, 64-bit operating system, x64 based processor, RAM: 8 GB DDR4, and Hard Disk: SSD.
\section[{b) Performance Evaluation Measures}]{b) Performance Evaluation Measures}\par
In order to assess the effectiveness of combining the k-means algorithm and optimization algorithms in the prediction of software bugs, the evaluation metrics, accuracy and F-measure have been calculated accordingly as shown in the Equation ( \hyperref[formula_0]{1}):???????????????? = (????+????) / (????+????+????+????),\textbf{(20)}\par
Where TP = true positive, TN = true negative, FN = false negative and FP = false positive.\par
On the other hand, the external metric used to determine the accuracy of the clustering findings, known as the F-measure, is also computed.\par
The F-measure, which is the average of precision and sensitivity performance, is calculated as follows: F = 2 * P * Sensitivity/ P + Sensitivity, \hyperref[b21]{(21)} Where P refers to precision and sensitivity is calculated by finding the non-defective modules that were accurately categorized. From the table above, K-means clustering is optimized using the various NIAs. We can see that all of the proposed algorithms perform better than the traditional standalone k-means algorithm. K-means appears to take the shortest computation time in any of the tests, maybe because it stops early in local optima (Table {\ref 3}). This is evident from the accuracy obtained from the table above. NIAs speed up the process of clustering centroids and illustrate that all partitioning clustering methods can be linked with the natural search process to prevent local optima. Secondly, simple Kmeans were applied to the robust nature of GA, which shows adequate prediction accuracy for all datasets. Even though GA may converge to the global optimum due to mutation, GA faces the issue in terms of computational challenges. The application of k means with the Bat algorithm apparently yields the same accuracy. This hybrid algorithm improves the convergence speed of BA and helps the k means algorithm independent of the initial centers. Next, K means is combined with PSO. The PSO method is used to start the process because of its fast convergence, and then the K-Means algorithm is used to refine the PSO algorithm's outcome to near-optimal solutions. The hybridization of these two methods yields effective results in terms of efficiency and precision. The PSO algorithm can be used to generate good initial cluster centroids for the K-Means.
\section[{VII.}]{VII.}
\section[{Results and Discussions}]{Results and Discussions}
\section[{Practical Implications}]{Practical Implications}\par
Metaheuristics algorithms have shown to be effective optimizers. This research found that each of the hybrid K means based-nature-inspired optimization algorithm models outperformed the standalone K means algorithm in terms of accuracy and F1 score. Following the intrinsic limitations of K-means design and the virtues of Nature-inspired optimization techniques, it seems feasible to integrate them, allowing them to complement and function together. The algorithms' successful integration gives reason to believe that more advanced optimization mining techniques can be developed. This study can be used as a roadmap for researchers who want to incorporate other new emerging NIAs into improved clustering methods in the field of software bug detection.
\section[{IX.}]{IX.}
\section[{Conclusion and Future Works}]{Conclusion and Future Works}\par
Prediction of defect-prone software modules is an important goal in software engineering. The traditional clustering algorithm usually gets trapped in the problem of local optima. As a result, the natureinspired method provides an alternative technique for solving clustering problems using its searching capabilities. This study's main contribution is combining the clustering algorithm with the different NIAs for software bug detection. To the authors' knowledge, only PSO, Cuckoo, Bat, and GWO (Grey Wolf Optimizer) algorithms were applied with clustering algorithms for software bug detection \hyperref[b13]{(Almayyan, 2021)}. The results are improved significantly when clustering algorithms are combined with bio-inspired optimization methods, apparently for the hybrid model of k means clustering withCoral reefs algorithm, achieving an accuracy of 96\%.For future work, this work can be replicated with other related datasets for the analysis of bug prediction in software. \begin{figure}[htbp]
\noindent\textbf{1} \par
\begin{longtable}{P{0.1954022988505747\textwidth}P{0.20028735632183906\textwidth}P{0.20517241379310344\textwidth}P{0.24913793103448273\textwidth}}
Dataset\tabcellsep Modules\tabcellsep Defective modules\tabcellsep Software metrics (Attributes)\\
AR1\tabcellsep 121\tabcellsep 9\tabcellsep 29\\
AR3\tabcellsep 63\tabcellsep 8\tabcellsep 29\\
AR4\tabcellsep 107\tabcellsep 20\tabcellsep 29\\
AR5\tabcellsep 36\tabcellsep 8\tabcellsep 29\\
AR6\tabcellsep 101\tabcellsep 15\tabcellsep 29\\
KC1\tabcellsep 2109\tabcellsep 1783\tabcellsep 22\\
KC2\tabcellsep 522\tabcellsep 107\tabcellsep 21\\
JM1\tabcellsep 7782\tabcellsep 1672\tabcellsep 21\\
CM1\tabcellsep 327\tabcellsep 42\tabcellsep 37\\
PC1\tabcellsep 705\tabcellsep 61\tabcellsep 37\\
PC5\tabcellsep 1711\tabcellsep 471\tabcellsep 38\end{longtable} \par
\caption{\label{tab_0}Table 1 :}\end{figure}
\begin{figure}[htbp]
\noindent\textbf{2} \par
\begin{longtable}{P{0.1648091603053435\textwidth}P{0.06229007633587786\textwidth}P{0.06229007633587786\textwidth}P{0.06229007633587786\textwidth}P{0.06229007633587786\textwidth}P{0.06229007633587786\textwidth}P{0.06229007633587786\textwidth}P{0.06229007633587786\textwidth}P{0.06229007633587786\textwidth}P{0.06229007633587786\textwidth}P{0.06229007633587786\textwidth}P{0.06229007633587786\textwidth}}
Datasets\tabcellsep AR1\tabcellsep AR3\tabcellsep AR4\tabcellsep AR5\tabcellsep AR6\tabcellsep KC1\tabcellsep KC2\tabcellsep JM1\tabcellsep CM1\tabcellsep PC1\tabcellsep PC5\\
k-Means\tabcellsep 88.90\tabcellsep 88.00\tabcellsep 89.01\tabcellsep 88.85\tabcellsep 88.43\tabcellsep 89.10\tabcellsep 89.00\tabcellsep 88.80\tabcellsep 89.00\tabcellsep 89.19\tabcellsep 89.99\\
K-Means +GA\tabcellsep 90.50\tabcellsep 90.58\tabcellsep 91.28\tabcellsep 91.55\tabcellsep 90.11\tabcellsep 90.00\tabcellsep 90.54\tabcellsep 90.53\tabcellsep 91.25\tabcellsep 90.00\tabcellsep 90.05\\
K-Means +BAT\tabcellsep 90.00\tabcellsep 91.59\tabcellsep 91.00\tabcellsep 92.34\tabcellsep 92.00\tabcellsep 92.98\tabcellsep 91.34\tabcellsep 90.00\tabcellsep 91.25\tabcellsep 92.56\tabcellsep 92.00\\
K-Means +PSO\tabcellsep 92.50\tabcellsep 92.65\tabcellsep 92.87\tabcellsep 93.01\tabcellsep 93.00\tabcellsep 92.99\tabcellsep 94.10\tabcellsep 92.67\tabcellsep 92.89\tabcellsep 93.10\tabcellsep 93.58\\
K-Means +Coral Reefs\tabcellsep 94.00\tabcellsep 94.54\tabcellsep 94.56\tabcellsep 94.87\tabcellsep 94.00\tabcellsep 95.96\tabcellsep 95.66\tabcellsep 96.88\tabcellsep 95.01\tabcellsep 95.04\tabcellsep 95.54\\
K-Means +Cuckoo\tabcellsep 94.50\tabcellsep 94.58\tabcellsep 94.58\tabcellsep 94.00\tabcellsep 94.56\tabcellsep 95.45\tabcellsep 95.88\tabcellsep 95.67\tabcellsep 95.44\tabcellsep 94.56\tabcellsep 94.78\\
K-Means + ACO\tabcellsep 94.00\tabcellsep 93.56\tabcellsep 93.50\tabcellsep 94.10\tabcellsep 93.78\tabcellsep 93.03\tabcellsep 93.56\tabcellsep 93.44\tabcellsep 93.89\tabcellsep 94.01\tabcellsep 94.52\\
K-Means +Firefly\tabcellsep 92.56\tabcellsep 92.67\tabcellsep 93.00\tabcellsep 93.44\tabcellsep 93.02\tabcellsep 93.56\tabcellsep 94.78\tabcellsep 93.67\tabcellsep 94.88\tabcellsep 94.34\tabcellsep 94.54\\
K-Means + GWO\tabcellsep 90.09\tabcellsep 92.47\tabcellsep 94.65\tabcellsep 93.22\tabcellsep 92.00\tabcellsep 92.60\tabcellsep 93.00\tabcellsep 92.50\tabcellsep 94.50\tabcellsep 94.12\tabcellsep 94.13\end{longtable} \par
\caption{\label{tab_1}Table 2 :}\end{figure}
\begin{figure}[htbp]
\noindent\textbf{4} \par
\begin{longtable}{P{0.19650537634408602\textwidth}P{0.05940860215053763\textwidth}P{0.05940860215053763\textwidth}P{0.05940860215053763\textwidth}P{0.05940860215053763\textwidth}P{0.05940860215053763\textwidth}P{0.05940860215053763\textwidth}P{0.05940860215053763\textwidth}P{0.05940860215053763\textwidth}P{0.05940860215053763\textwidth}P{0.05940860215053763\textwidth}P{0.05940860215053763\textwidth}}
Datasets\tabcellsep AR1\tabcellsep AR3\tabcellsep AR4\tabcellsep AR5\tabcellsep AR6\tabcellsep KC1\tabcellsep KC2\tabcellsep JM1\tabcellsep CM1\tabcellsep PC1\tabcellsep PC5\\
k-Means\tabcellsep 0.66\tabcellsep 0.79\tabcellsep 0.82\tabcellsep 0.80\tabcellsep 0.75\tabcellsep 0.81\tabcellsep 0.80\tabcellsep 0.81\tabcellsep 0.82\tabcellsep 0.82\tabcellsep 0.80\\
K-Means+GA\tabcellsep 0.84\tabcellsep 0.83\tabcellsep 0.83\tabcellsep 0.80\tabcellsep 0.83\tabcellsep 0.84\tabcellsep 0.84\tabcellsep 0.85\tabcellsep 0.82\tabcellsep 0.81\tabcellsep 0.85\\
K-Means +BAT\tabcellsep 0.83\tabcellsep 0.81\tabcellsep 0.83\tabcellsep 0.86\tabcellsep 0.86\tabcellsep 0.86\tabcellsep 0.85\tabcellsep 0.85\tabcellsep 0.85\tabcellsep 0.87\tabcellsep 0.85\\
K-Means +PSO\tabcellsep 0.85\tabcellsep 0.85\tabcellsep 0.87\tabcellsep 0.87\tabcellsep 0.86\tabcellsep 0.85\tabcellsep 0.87\tabcellsep 0.85\tabcellsep 0.87\tabcellsep 0.87\tabcellsep 0.87\\
K-Means +Coral Reefs\tabcellsep 0.86\tabcellsep 0.86\tabcellsep 0.86\tabcellsep 0.85\tabcellsep 0.86\tabcellsep 0.86\tabcellsep 0.87\tabcellsep 0.88\tabcellsep 0.86\tabcellsep 0.87\tabcellsep 0.88\\
K-Means +Cuckoo\tabcellsep 0.89\tabcellsep 0.85\tabcellsep 0.88\tabcellsep 0.89\tabcellsep 0.86\tabcellsep 0.89\tabcellsep 0.86\tabcellsep 0.89\tabcellsep 0.89\tabcellsep 0.87\tabcellsep 0.88\\
K-Means+ ACO\tabcellsep 0.84\tabcellsep 0.83\tabcellsep 0.86\tabcellsep 0.85\tabcellsep 0.84\tabcellsep 0.86\tabcellsep 0.85\tabcellsep 0.85\tabcellsep 0.86\tabcellsep 0.85\tabcellsep 0.86\\
K-Means +Firefly\tabcellsep 0.86\tabcellsep 0.85\tabcellsep 0.83\tabcellsep 0.87\tabcellsep 0.87\tabcellsep 0.85\tabcellsep 0.83\tabcellsep 0.85\tabcellsep 0.86\tabcellsep 0.88\tabcellsep 0.85\\
K-Means+ GWO\tabcellsep 0.82\tabcellsep 0.82\tabcellsep 0.81\tabcellsep 0.86\tabcellsep 0.84\tabcellsep 0.83\tabcellsep 0.79\tabcellsep 0.85\tabcellsep 0.84\tabcellsep 0.84\tabcellsep 0.85\\
VIII.\tabcellsep \tabcellsep \tabcellsep \tabcellsep \tabcellsep \tabcellsep \tabcellsep \tabcellsep \tabcellsep \tabcellsep \tabcellsep \end{longtable} \par
\caption{\label{tab_2}Table 4 :}\end{figure}
\label{foot_0}\footnote{\label{foot_0} Towards Optimized K means Clustering using Nature-Inspired Algorithms for Software Bug Prediction © 2023 Global Journals} \backmatter
\subsection[{Acknowledgments}]{Acknowledgments}\par
This study received no formal support from public, private, or not-for-profit funding organizations.
\subsection[{( )}]{( )}\par
Year 2023 C Furthermore, K means and Coral reefs algorithm are combined. The results for this combined method are quite promising since they show that using the CRO method for a clustering application can produce better results to using hybrid genetic algorithms, which is the most often used clustering optimization technique. To best of our knowledge, CRO has not been used with clustering for software bug detection. The hybrid model of k means with Cuckoo Search algorithm shows significant accuracy, likewise CRO algorithm. Cuckoo search is used to provide a robust initialization, whereas K-means is utilized to construct solutions faster. K means is also combined with Ant Colony Optimization algorithm. The suggested method's learning mechanism is based on the use of a defined parameter termed pheromone, which eliminates undesirable K-means algorithm solutions. The suggested method improves the K-means algorithm by making it less reliant on starting parameters such as randomly picked beginning cluster centers, resulting in a more stable algorithm. K means with firefly also produce near accuracy with CRO and Cuckoo search algorithm. This is because fireflies with high similarity are dispersed, resulting in a more diverse distribution of the entire swarm in search space. K means with GWO has also shown rapid convergence. This improvement is caused by the fact that K-means significantly affects the GWO population and separates it into two clusters. Because GWO often operates as three clusters and has three wolves in the search space, K-means is advantageous for GWO.As a result, it can be concluded that K-means combined with GWO increased GWO's effectiveness to some extent.\par
High clustering accuracy and efficiency were obtained from the hybrid clustering of Coral reefs and Cuckoo Search Algorithm. Hybrid clustering of Coral reefs algorithm has never been applied in the field of software bug detection and has indeed shown promising results. Hybrid clustering of Coral reefs algorithm locate cluster centroids without causing premature convergence. The findings of the evaluation results add evidence that NIAs can indeed speed up the process and avoid local optima.Because fewer iterations are required to achieve the best cluster outcome, selecting the number of clusters enhances the hybridized clustering method's convergence speed. The computational time for each algorithm is computed as shown in Table {\ref 3}. Less computational time was noted when K means was integrated with Coral reefs and Cuckoo Search algorithm respectively. For statistical performance, the F1 score has been calculated for all the algorithms as shown in Table {\ref 4}. Again, the F1 Score shows that K-means with Coral reefs resulted in dependable and significant performance that can be used to predict software defects. When a good validity measure is applied, most metaheuristic algorithms can automatically divide datasets into an appropriate number of clusters, according to Gbaje et al.2019. \begin{bibitemlist}{1}
\bibitem[Engineering ()]{b4}\label{b4} \textit{}, Engineering . 2008. 34 p. .
\bibitem[Yang ()]{b36}\label{b36} \textit{}, X S Yang . \textit{Swarm Intelligence and Bio-Inspired Computation: Theory and Applications} 2013. V. Amsterdam, The Netherlands: Elsevier Science Publishers B.
\bibitem[Rana et al. ()]{b40}\label{b40} ‘A hybrid sequential approach for data clustering using K-Means and particle swarm optimization algorithm’. Sandeep Rana , Sanjay Jasola , Rajesh Kumar . \textit{International Journal of Engineering, Science and Technology} 2010. 2 (6) p. .
\bibitem[Kao and Kao ()]{b12}\label{b12} ‘A hybridized approach to data clustering’. Yi-Tung Kao , Erwiezahara , I-Wei Kao . \textit{Expert Systems with Applications} 2008. 34 (3) p. .
\bibitem[Yunlongzhu et al. ()]{b11}\label{b11} ‘A new approach for data clustering using hybrid artificial bee colony algorithm’. Xiaohui Yunlongzhu , Wenping Yan , Liang Zou , Wang . \textit{Neuro computing} 2012. 97 p. .
\bibitem[Goldberg and Yang ()]{b25}\label{b25} ‘A new metaheuristic bat-inspired algorithm’. D E Goldberg , ; X.-S Yang . \textit{Genetic Algorithms in Search, Optimization, and Machine Learning}, (New York) 1989. 2010. Addison-Wesley. 284 p. .
\bibitem[Gayathri et al. (2015)]{b7}\label{b7} ‘A Novel Approach for Clustering Based On Bayesian Network’. R Gayathri , A Cauveri , R Kanagapriya , V Nivetha , P Tamizhselvi , K P Kumar . \textit{Proceedings of the 2015 International Conference on Advanced Research in Computer Science Engineering \& Technology}, (the 2015 International Conference on Advanced Research in Computer Science Engineering \& Technology) 2015. March. 2015. ACM. p. 60.
\bibitem[Tóth et al. ()]{b15}\label{b15} ‘A Public Bug Database of GitHub Projects and Its Application in Bug Prediction’. Z Tóth , P Gyimesi , R Ferenc . \textit{Computational Science and Its Applications --ICCSA 2016}, (Cham) 2016. Springer International Publishing. p. .
\bibitem[Hruschka et al. ()]{b20}\label{b20} ‘A Survey of Evolutionary Algorithms for Clustering’. E Hruschka , R J G B Campello , A A Freitas , A De Carvalho . \textit{IEEE Trans. Syst. Man Cybern. Part C Appl. Rev} 2009. 39 p. .
\bibitem[Nanda and Panda ()]{b21}\label{b21} ‘A survey on nature inspired metaheuristic algorithms for partitional clustering’. S J Nanda , G Panda . \textit{Swarm Evol. Comput} 2014. 16 p. .
\bibitem[Jain and Dubes ()]{b6}\label{b6} \textit{Algorithms for clustering data}, A K Jain , R C Dubes . 1988. Prentice-Hall, Inc.
\bibitem[Zhou et al. ()]{b22}\label{b22} ‘An Automatic K-Means Clustering Algorithm of GPS Data Combining a Novel Niche Genetic Algorithm with Noise and Density’. X Zhou , J Gu , S Shen , H Ma , F Miao , H Zhang , H Gong . \textit{ISPRS Int. J. Geo-Inf} 2017. 6 p. 392.
\bibitem[Wang and Li ()]{b39}\label{b39} ‘An Improved Grey Wolf Optimizer Based on Differential Evolution and Elimination Mechanism’. Jie-Sheng Wang , Shu-Xia Li . \xref{http://dx.doi.org/10.1038/s41598-019-43546-3}{10.1038/s41598-019-43546-3}. \url{https://doi.org/10.1038/s41598-019-43546-3} \textit{SciRep} 2019. 9 p. 7181.
\bibitem[Apostolopoulos and Vlachos ()]{b29}\label{b29} ‘Application of the Firefly Algorithm for Solving the Economic Emissions Load Dispatch Problem’. T Apostolopoulos , A Vlachos . \xref{http://dx.doi.org/10.1155/2011/523806}{10.1155/2011/523806}. \textit{International journal of Combinatorics} 2011.
\bibitem[Inacio et al. (2015)]{b31}\label{b31} ‘Applying the Coral Reefs Optimization Algorithm to Clustering Problems’. G Inacio , Joao C Medeiros , Anne M P Xavier-Junior , Canuto . \xref{http://dx.doi.org/10.1109/IJCNN.2015.7280845}{10.1109/IJCNN.2015.7280845}. Conference Paper July 2015.
\bibitem[Agbaje et al. ()]{b35}\label{b35} ‘Automatic Data Clustering Using Hybrid Firefly Particle Swarm Optimization Algorithm’. M B Agbaje , A E Ezugwu , R Els . \textit{IEEE Access} 2019. 7 p. .
\bibitem[Huang and Ma]{b26}\label{b26} \textit{Bat Algorithm Based on an Integration Strategy and Gaussian Distribution}, Jianqiang Huang , Yan Ma . \xref{http://dx.doi.org/10.1155/2020/9495281}{10.1155/2020/9495281}. \url{https://doi.org/10.1155/2020/9495281} 2020.
\bibitem[Lessmann et al.]{b3}\label{b3} ‘Benchmarking classification models for software defect prediction: A proposed framework and novel findings’. S Lessmann , B Baesens , C Mues , S Pietsch . \textit{IEEE Transactions on Software}
\bibitem[Yang and Deb (2009)]{b30}\label{b30} ‘Cuckoo search via Lévy flights’. X.-S Yang , S Deb . \textit{Proceedings of the World Congress on Nature \& Biologically Inspired Computing (NABIC '09)}, (the World Congress on Nature \& Biologically Inspired Computing (NABIC '09)Coimbatore, india) December 2009. p. .
\bibitem[Bouhmala et al. (2015)]{b24}\label{b24} ‘Enhanced Genetic Algorithm with K-Means for the Clustering Problem’. N Bouhmala , A Viken , J B Lønnum . \textit{International Journal of Modeling and Optimization} April 2015. 5 (2) .
\bibitem[Kaur and Kaur ()]{b17}\label{b17} ‘Fault Prediction using K-Canberra Means Clustering’. Deepinder Kaur , Arashdeep Kaur . \textit{CNC} 2010. (in Press)
\bibitem[Mirjalili et al. ()]{b37}\label{b37} ‘Grey wolf optimizer’. S Mirjalili , S M Mirjalili , A Lewis . \xref{http://dx.doi.org/10.1016/j.advengsoft.2013.12.007}{10.1016/j.advengsoft.2013.12.007}. \textit{Adv. Eng. Softw} 2014. 69 p. .
\bibitem[Jensi and Wiselinjiji (2015)]{b34}\label{b34} ‘HYBRID DATA CLUSTERING APPROACH USING K-MEANS AND FLOWER POLLINATION ALGORITHM, Advanced Computational Intelligence’. R Jensi , G Wiselinjiji . \textit{An International Journal (ACII)} April 2015. 2 (2) .
\bibitem[Hou et al.]{b38}\label{b38} ‘Improved Grey Wolf Optimization Algorithm and Application’. Y Hou , H Gao , Z Wang , C Du . \xref{http://dx.doi.org/10.3390/s22103810}{10.3390/s22103810}. \url{https://doi.org/10.3390/s22103810} \textit{Sensors} 2022 p. 3810.
\bibitem[Riskiannisa and Riana (2020)]{b2}\label{b2} ‘Improved point center algorithm for k-means clustering to increase software defect prediction’. Didirosiyadi Riskiannisa , Dwiza Riana . \textit{International Journal of Advances in Intelligent Informatics} November 2020. 6 (3) p. .
\bibitem[Tang et al. ()]{b28}\label{b28} ‘Integrating nature-inspired optimization algorithms to K-means clustering’. R Tang , S Fong , X Yang , S Deb . \xref{http://dx.doi.org/10.1109/ICDIM.2012.6360145}{10.1109/ICDIM.2012.6360145}. \textit{Seventh International Conference on Digital Information Management (ICDIM 2012}, 2012. p. .
\bibitem[Abiodun et al.]{b18}\label{b18} ‘K-Means-Based Nature-Inspired Metaheuristic Algorithms for Automatic Data Clustering Problems: Recent Advances and Future Directions’. M Abiodun , Mubarak S Ikotun , Absalom E Almutari , Ezugwu . \xref{http://dx.doi.org/10.3390/app112311246}{10.3390/app112311246}. \url{https://doi.org/10.3390/app112311246} \textit{Appl. Sci} 2021 p. 11246.
\bibitem[Mousa et al. ()]{b23}\label{b23} ‘Local Search Based Hybrid Particle Swarm Optimization for Multiobjective Optimization’. A A Mousa , M A El-Shorbagy , Abd El-Wahed , WF . \textit{Swarm and Evolutionary Computation} 2012. 3 p. .
\bibitem[Okwu and Tartibu ()]{b19}\label{b19} ‘Metaheuristic Optimization: Nature-Inspired Algorithms Swarm and Computational Intelligence’. M O Okwu , L K Tartibu . \textit{Theory and Applications} 2020. Germany: Springer Nature: Berlin/Heidelberg. 927.
\bibitem[Koohi and Groza ()]{b33}\label{b33} ‘Optimizing Particle Swarm Optimization algorithm’. I Koohi , V Z Groza . \xref{http://dx.doi.org/10.1109/CCECE.2014.6901057}{10.1109/CCECE.2014.6901057}. \textit{2014 IEEE 27th Canadian Conference on Electrical and Computer Engineering (CCECE)}, 2014. p. .
\bibitem[Myrtveit et al. ()]{b5}\label{b5} ‘Reliability and validity in comparative studies of software prediction models’. I Myrtveit , E Stensrud , M Shepperd . \textit{IEEE Transactions on Software Engineering} 2005. 31 (5) p. .
\bibitem[Shepperd et al. ()]{b14}\label{b14} M Shepperd , Q Song , Z Sun , C Mair . \textit{Data quality: Some Eng}, 2013. 39 p. .
\bibitem[Catal et al. ()]{b10}\label{b10} \textit{Software fault prediction of unlabeled program modules}, C Catal , U Sevim , B Diri . 2009. 2009. p. .
\bibitem[Bishnu and Bhattacherjee ()]{b9}\label{b9} ‘Software fault prediction using quad tree-based kk-means clustering algorithm’. P S Bishnu , V Bhattacherjee . \xref{http://dx.doi.org/10.1109/TKDE.2011.163}{10.1109/TKDE.2011.163}. \url{https://doi.org/10.1109/TKDE.2011.163} \textit{IEEE Trans Knowl Data Eng} 2012. 24 (6) p. .
\bibitem[Macqueen ()]{b0}\label{b0} ‘Some methods for classification and Analysis of Multivariate Observations’. J B Macqueen . \textit{Proceedings of 5 th Berkeley Symposium on Mathematical Statistics and Probability}, (5 th Berkeley Symposium on Mathematical Statistics and Probability) 1967. University of California Press. p. .
\bibitem[Zheng et al. ()]{b27}\label{b27} ‘The application of ant colony system to image texture classification’. H Zheng , Z Zheng , Y Xiang . \textit{Proceedings of the 2nd International Conference on Machine Learning and Cybernetics}, (the 2nd International Conference on Machine Learning and CyberneticsXi'an, China) 2003. 3 p. . (textute read texture)
\bibitem[Salcedo-Sanz et al. ()]{b32}\label{b32} ‘The Coral Reefs Optimization Algorithm: A Novel Metaheuristic for Efficiently Solving Optimization Problems’. S Salcedo-Sanz , J Del Ser , S Gil-Lpez , I Landa-Torres , J A Portilla-Figueras . \textit{The Scientific World Journal} 2014. Hindawi Publishing Corporation. 2014.
\bibitem[Fong et al. ()]{b16}\label{b16} ‘Towards Enhancement of Performance of K-Means Clustering Using Nature-Inspired Optimization Algorithms’. Simon Fong , Suash Deb , Xin-She Yang , Yan Zhuang . \xref{http://dx.doi.org/10.1155/2014/564829}{10.1155/2014/564829}. \url{https://doi.org/10.1155/2014/564829} \textit{Computational Intelligence and Metaheuristic Algorithms with Applications} 2014.
\bibitem[Almayyan (2021)]{b13}\label{b13} ‘Towards Predicting software defects with clustering techniques’. Waheeda Almayyan . \textit{International Journal of Artificial Intelligence and Application (IJAIA)} January 2021. 12 (1) .
\bibitem[Zhong et al. ()]{b8}\label{b8} ‘Unsupervised learning for expert-based software quality estimation’. S Zhong , T M Khoshgoftaar , N Seliya . \xref{http://dx.doi.org/10.1109/HASE.2004.1281739}{10.1109/HASE.2004.1281739}. \url{https://doi.org/10.1109/HASE.2004.1281739} \textit{Proceedings of the eighth IEEE international conference on high assurance systems engineering HASE}, (the eighth IEEE international conference on high assurance systems engineering HASE) 2004. 2004. p. .
\bibitem[Tang and Fong (2012)]{b1}\label{b1} \textit{Xin-she Yang and Suash Deb, Integrating Nature inspired Optimization algorithms to k-means clustering}, Rui Tang , Simon Fong . Aug 2012. University of Macau
\end{bibitemlist}
\end{document}