\documentclass[11pt,twoside]{article}\makeatletter
\IfFileExists{xcolor.sty}%
{\RequirePackage{xcolor}}%
{\RequirePackage{color}}
\usepackage{colortbl}
\usepackage{wrapfig}
\usepackage{ifxetex}
\ifxetex
\usepackage{fontspec}
\usepackage{xunicode}
\catcode`⃥=\active \def⃥{\textbackslash}
\catcode`❴=\active \def❴{\{}
\catcode`❵=\active \def❵{\}}
\def\textJapanese{\fontspec{Noto Sans CJK JP}}
\def\textChinese{\fontspec{Noto Sans CJK SC}}
\def\textKorean{\fontspec{Noto Sans CJK KR}}
\setmonofont{DejaVu Sans Mono}
\else
\IfFileExists{utf8x.def}%
{\usepackage[utf8x]{inputenc}
\PrerenderUnicode{–}
}%
{\usepackage[utf8]{inputenc}}
\usepackage[english]{babel}
\usepackage[T1]{fontenc}
\usepackage{float}
\usepackage[]{ucs}
\uc@dclc{8421}{default}{\textbackslash }
\uc@dclc{10100}{default}{\{}
\uc@dclc{10101}{default}{\}}
\uc@dclc{8491}{default}{\AA{}}
\uc@dclc{8239}{default}{\,}
\uc@dclc{20154}{default}{ }
\uc@dclc{10148}{default}{>}
\def\textschwa{\rotatebox{-90}{e}}
\def\textJapanese{}
\def\textChinese{}
\IfFileExists{tipa.sty}{\usepackage{tipa}}{}
\fi
\def\exampleFont{\ttfamily\small}
\DeclareTextSymbol{\textpi}{OML}{25}
\usepackage{relsize}
\RequirePackage{array}
\def\@testpach{\@chclass
\ifnum \@lastchclass=6 \@ne \@chnum \@ne \else
\ifnum \@lastchclass=7 5 \else
\ifnum \@lastchclass=8 \tw@ \else
\ifnum \@lastchclass=9 \thr@@
\else \z@
\ifnum \@lastchclass = 10 \else
\edef\@nextchar{\expandafter\string\@nextchar}%
\@chnum
\if \@nextchar c\z@ \else
\if \@nextchar l\@ne \else
\if \@nextchar r\tw@ \else
\z@ \@chclass
\if\@nextchar |\@ne \else
\if \@nextchar !6 \else
\if \@nextchar @7 \else
\if \@nextchar (8 \else
\if \@nextchar )9 \else
10
\@chnum
\if \@nextchar m\thr@@\else
\if \@nextchar p4 \else
\if \@nextchar b5 \else
\z@ \@chclass \z@ \@preamerr \z@ \fi \fi \fi \fi
\fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi}
\gdef\arraybackslash{\let\\=\@arraycr}
\def\@textsubscript#1{{\m@th\ensuremath{_{\mbox{\fontsize\sf@size\z@#1}}}}}
\def\Panel#1#2#3#4{\multicolumn{#3}{){\columncolor{#2}}#4}{#1}}
\def\abbr{}
\def\corr{}
\def\expan{}
\def\gap{}
\def\orig{}
\def\reg{}
\def\ref{}
\def\sic{}
\def\persName{}\def\name{}
\def\placeName{}
\def\orgName{}
\def\textcal#1{{\fontspec{Lucida Calligraphy}#1}}
\def\textgothic#1{{\fontspec{Lucida Blackletter}#1}}
\def\textlarge#1{{\large #1}}
\def\textoverbar#1{\ensuremath{\overline{#1}}}
\def\textquoted#1{‘#1’}
\def\textsmall#1{{\small #1}}
\def\textsubscript#1{\@textsubscript{\selectfont#1}}
\def\textxi{\ensuremath{\xi}}
\def\titlem{\itshape}
\newenvironment{biblfree}{}{\ifvmode\par\fi }
\newenvironment{bibl}{}{}
\newenvironment{byline}{\vskip6pt\itshape\fontsize{16pt}{18pt}\selectfont}{\par }
\newenvironment{citbibl}{}{\ifvmode\par\fi }
\newenvironment{docAuthor}{\ifvmode\vskip4pt\fontsize{16pt}{18pt}\selectfont\fi\itshape}{\ifvmode\par\fi }
\newenvironment{docDate}{}{\ifvmode\par\fi }
\newenvironment{docImprint}{\vskip 6pt}{\ifvmode\par\fi }
\newenvironment{docTitle}{\vskip6pt\bfseries\fontsize{22pt}{25pt}\selectfont}{\par }
\newenvironment{msHead}{\vskip 6pt}{\par}
\newenvironment{msItem}{\vskip 6pt}{\par}
\newenvironment{rubric}{}{}
\newenvironment{titlePart}{}{\par }
\newcolumntype{L}[1]{){\raggedright\arraybackslash}p{#1}}
\newcolumntype{C}[1]{){\centering\arraybackslash}p{#1}}
\newcolumntype{R}[1]{){\raggedleft\arraybackslash}p{#1}}
\newcolumntype{P}[1]{){\arraybackslash}p{#1}}
\newcolumntype{B}[1]{){\arraybackslash}b{#1}}
\newcolumntype{M}[1]{){\arraybackslash}m{#1}}
\definecolor{label}{gray}{0.75}
\def\unusedattribute#1{\sout{\textcolor{label}{#1}}}
\DeclareRobustCommand*{\xref}{\hyper@normalise\xref@}
\def\xref@#1#2{\hyper@linkurl{#2}{#1}}
\begingroup
\catcode`\_=\active
\gdef_#1{\ensuremath{\sb{\mathrm{#1}}}}
\endgroup
\mathcode`\_=\string"8000
\catcode`\_=12\relax
\usepackage[a4paper,twoside,lmargin=1in,rmargin=1in,tmargin=1in,bmargin=1in,marginparwidth=0.75in]{geometry}
\usepackage{framed}
\definecolor{shadecolor}{gray}{0.95}
\usepackage{longtable}
\usepackage[normalem]{ulem}
\usepackage{fancyvrb}
\usepackage{fancyhdr}
\usepackage{graphicx}
\usepackage{marginnote}
\renewcommand{\@cite}[1]{#1}
\renewcommand*{\marginfont}{\itshape\footnotesize}
\def\Gin@extensions{.pdf,.png,.jpg,.mps,.tif}
\pagestyle{fancy}
\usepackage[pdftitle={Machine Learning Model Optimization with Hyper Parameter Tuning Approach},
pdfauthor={}]{hyperref}
\hyperbaseurl{}
\paperwidth210mm
\paperheight297mm
\def\@pnumwidth{1.55em}
\def\@tocrmarg {2.55em}
\def\@dotsep{4.5}
\setcounter{tocdepth}{3}
\clubpenalty=8000
\emergencystretch 3em
\hbadness=4000
\hyphenpenalty=400
\pretolerance=750
\tolerance=2000
\vbadness=4000
\widowpenalty=10000
\renewcommand\section{\@startsection {section}{1}{\z@}%
{-1.75ex \@plus -0.5ex \@minus -.2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\Large\bfseries}}
\renewcommand\subsection{\@startsection{subsection}{2}{\z@}%
{-1.75ex\@plus -0.5ex \@minus- .2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\Large}}
\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}%
{-1.5ex\@plus -0.35ex \@minus -.2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\large}}
\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}%
{-1ex \@plus-0.35ex \@minus -0.2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\normalsize}}
\renewcommand\subparagraph{\@startsection{subparagraph}{5}{\parindent}%
{1.5ex \@plus1ex \@minus .2ex}%
{-1em}%
{\reset@font\normalsize\bfseries}}
\def\l@section#1#2{\addpenalty{\@secpenalty} \addvspace{1.0em plus 1pt}
\@tempdima 1.5em \begingroup
\parindent \z@ \rightskip \@pnumwidth
\parfillskip -\@pnumwidth
\bfseries \leavevmode #1\hfil \hbox to\@pnumwidth{\hss #2}\par
\endgroup}
\def\l@subsection{\@dottedtocline{2}{1.5em}{2.3em}}
\def\l@subsubsection{\@dottedtocline{3}{3.8em}{3.2em}}
\def\l@paragraph{\@dottedtocline{4}{7.0em}{4.1em}}
\def\l@subparagraph{\@dottedtocline{5}{10em}{5em}}
\@ifundefined{c@section}{\newcounter{section}}{}
\@ifundefined{c@chapter}{\newcounter{chapter}}{}
\newif\if@mainmatter
\@mainmattertrue
\def\chaptername{Chapter}
\def\frontmatter{%
\pagenumbering{roman}
\def\thechapter{\@roman\c@chapter}
\def\theHchapter{\roman{chapter}}
\def\thesection{\@roman\c@section}
\def\theHsection{\roman{section}}
\def\@chapapp{}%
}
\def\mainmatter{%
\cleardoublepage
\def\thechapter{\@arabic\c@chapter}
\setcounter{chapter}{0}
\setcounter{section}{0}
\pagenumbering{arabic}
\setcounter{secnumdepth}{6}
\def\@chapapp{\chaptername}%
\def\theHchapter{\arabic{chapter}}
\def\thesection{\@arabic\c@section}
\def\theHsection{\arabic{section}}
}
\def\backmatter{%
\cleardoublepage
\setcounter{chapter}{0}
\setcounter{section}{0}
\setcounter{secnumdepth}{2}
\def\@chapapp{\appendixname}%
\def\thechapter{\@Alph\c@chapter}
\def\theHchapter{\Alph{chapter}}
\appendix
}
\newenvironment{bibitemlist}[1]{%
\list{\@biblabel{\@arabic\c@enumiv}}%
{\settowidth\labelwidth{\@biblabel{#1}}%
\leftmargin\labelwidth
\advance\leftmargin\labelsep
\@openbib@code
\usecounter{enumiv}%
\let\p@enumiv\@empty
\renewcommand\theenumiv{\@arabic\c@enumiv}%
}%
\sloppy
\clubpenalty4000
\@clubpenalty \clubpenalty
\widowpenalty4000%
\sfcode`\.\@m}%
{\def\@noitemerr
{\@latex@warning{Empty `bibitemlist' environment}}%
\endlist}
\def\tableofcontents{\section*{\contentsname}\@starttoc{toc}}
\parskip0pt
\parindent1em
\def\Panel#1#2#3#4{\multicolumn{#3}{){\columncolor{#2}}#4}{#1}}
\newenvironment{reflist}{%
\begin{raggedright}\begin{list}{}
{%
\setlength{\topsep}{0pt}%
\setlength{\rightmargin}{0.25in}%
\setlength{\itemsep}{0pt}%
\setlength{\itemindent}{0pt}%
\setlength{\parskip}{0pt}%
\setlength{\parsep}{2pt}%
\def\makelabel##1{\itshape ##1}}%
}
{\end{list}\end{raggedright}}
\newenvironment{sansreflist}{%
\begin{raggedright}\begin{list}{}
{%
\setlength{\topsep}{0pt}%
\setlength{\rightmargin}{0.25in}%
\setlength{\itemindent}{0pt}%
\setlength{\parskip}{0pt}%
\setlength{\itemsep}{0pt}%
\setlength{\parsep}{2pt}%
\def\makelabel##1{\upshape ##1}}%
}
{\end{list}\end{raggedright}}
\newenvironment{specHead}[2]%
{\vspace{20pt}\hrule\vspace{10pt}%
\phantomsection\label{#1}\markright{#2}%
\pdfbookmark[2]{#2}{#1}%
\hspace{-0.75in}{\bfseries\fontsize{16pt}{18pt}\selectfont#2}%
}{}
\def\TheFullDate{2021 2021-07-15 (revised: Year 2021 15 July 2021)}
\def\TheID{\makeatother }
\def\TheDate{2021 2021-07-15}
\title{Machine Learning Model Optimization with Hyper Parameter Tuning Approach}
\author{}\makeatletter
\makeatletter
\newcommand*{\cleartoleftpage}{%
\clearpage
\if@twoside
\ifodd\c@page
\hbox{}\newpage
\if@twocolumn
\hbox{}\newpage
\fi
\fi
\fi
}
\makeatother
\makeatletter
\thispagestyle{empty}
\markright{\@title}\markboth{\@title}{\@author}
\renewcommand\small{\@setfontsize\small{9pt}{11pt}\abovedisplayskip 8.5\p@ plus3\p@ minus4\p@
\belowdisplayskip \abovedisplayskip
\abovedisplayshortskip \z@ plus2\p@
\belowdisplayshortskip 4\p@ plus2\p@ minus2\p@
\def\@listi{\leftmargin\leftmargini
\topsep 2\p@ plus1\p@ minus1\p@
\parsep 2\p@ plus\p@ minus\p@
\itemsep 1pt}
}
\makeatother
\fvset{frame=single,numberblanklines=false,xleftmargin=5mm,xrightmargin=5mm}
\fancyhf{}
\setlength{\headheight}{14pt}
\fancyhead[LE]{\bfseries\leftmark}
\fancyhead[RO]{\bfseries\rightmark}
\fancyfoot[RO]{}
\fancyfoot[CO]{\thepage}
\fancyfoot[LO]{\TheID}
\fancyfoot[LE]{}
\fancyfoot[CE]{\thepage}
\fancyfoot[RE]{\TheID}
\hypersetup{citebordercolor=0.75 0.75 0.75,linkbordercolor=0.75 0.75 0.75,urlbordercolor=0.75 0.75 0.75,bookmarksnumbered=true}
\fancypagestyle{plain}{\fancyhead{}\renewcommand{\headrulewidth}{0pt}}
\date{}
\usepackage{authblk}
\providecommand{\keywords}[1]
{
\footnotesize
\textbf{\textit{Index terms---}} #1
}
\usepackage{graphicx,xcolor}
\definecolor{GJBlue}{HTML}{273B81}
\definecolor{GJLightBlue}{HTML}{0A9DD9}
\definecolor{GJMediumGrey}{HTML}{6D6E70}
\definecolor{GJLightGrey}{HTML}{929497}
\renewenvironment{abstract}{%
\setlength{\parindent}{0pt}\raggedright
\textcolor{GJMediumGrey}{\rule{\textwidth}{2pt}}
\vskip16pt
\textcolor{GJBlue}{\large\bfseries\abstractname\space}
}{%
\vskip8pt
\textcolor{GJMediumGrey}{\rule{\textwidth}{2pt}}
\vskip16pt
}
\usepackage[absolute,overlay]{textpos}
\makeatother
\usepackage{lineno}
\linenumbers
\begin{document}
\author[1]{Md Riyad Hossain}
\author[2]{Dr. Douglas Timmer}
\affil[1]{ University of Texas Rio Grande Valley}
\renewcommand\Authands{ and }
\date{\small \em Received: 5 June 2021 Accepted: 4 July 2021 Published: 15 July 2021}
\maketitle
\begin{abstract}
Hyper-parameters tuning is a key step to find the optimal machine learning parameters. Determining the best hyper-parameters takes a good deal of time, especially when the objective functions are costly to determine, or a large number of parameters are required to be tuned. In contrast to the conventional machine learning algorithms, Neural Network requires tuning hyperparameters more because it has to process a lot of parameters together, and depending on the fine tuning, the accuracy of the model can be varied in between 25%-90%.A few of the most effective techniques for tuning hyper-parameters in the Deep learning methods are: Grid search, Random forest, Bayesian optimization, etc. Every method has some advantages and disadvantages over others. For example: Grid search has proven to be an effective technique to tune hyper-parameters, along with drawbacks like trying too many combinations, and performing poorly when it is required to tune many parameters at a time. In our work, we will determine, show and analyze the efficiencies of a real-world synthetic polymer dataset for different parameters and tuning methods.
\end{abstract}
\keywords{machine learning, hyper parameter optimization, grid search, random search, BO-GP.}
\begin{textblock*}{18cm}(1cm,1cm) % {block width} (coords)
\textcolor{GJBlue}{\LARGE Global Journals \LaTeX\ JournalKaleidoscope\texttrademark}
\end{textblock*}
\begin{textblock*}{18cm}(1.4cm,1.5cm) % {block width} (coords)
\textcolor{GJBlue}{\footnotesize \\ Artificial Intelligence formulated this projection for compatibility purposes from the original article published at Global Journals. However, this technology is currently in beta. \emph{Therefore, kindly ignore odd layouts, missed formulae, text, tables, or figures.}}
\end{textblock*}
\let\tabcellsep&
\section[{Introduction}]{Introduction}\par
n the era of Machine learning, performance (based on accuracy and computing time) is very important. The growing number of tuning parameters associated with the Machine learning models is tedious and timeconsuming to set by standard optimization techniques. Researchers working with ML models often spend long hours to find the perfect combination of hyperparameters \hyperref[b0]{[1]}. If we think w, x, y, z as the parameters of the model, and if all of these parameters are integers ranging from 0.0001 to say 5.00, then hyperparameter tuning is the finding the best combinations to make the objective function optimal.\par
One of the major difficulties in working with the Machine learning problem is tuning hyperparameters. These are the design parameters that could directly affect the training outcome. The conversion from a nontuned Machine learning model to a tuned ML model is like learning to predict everything accurately from predicting nothing correctly \hyperref[b1]{[2]}. There are two types of parameters in ML models: Hyperparameters, and Model parameters. Hyperparameters are arbitrarily set by the user even before starting to train the model, whereas, the model parameters are learned during the training.\par
The quality of a predictive model mostly depends on the configuration of its hyperparameters, but it is often difficult to know how these hyperparameters interact with each other to affect the final results of the model \hyperref[b12]{[14]}. To determine accuracy and make a comparison between two models it is always better to make comparisons between two models with both of the models' parameters tuned. It would be unfair to compare a Decision Tree model with the best parameter against an ANN model whose hyperparameters haven't been optimized yet.
\section[{II.}]{II.}
\section[{Literature Review}]{Literature Review}\par
The hyperparameter tuning, due to its importance, has changed to a new interesting topic in the ML community. The hyperparameter tuning algorithms are either model-free or model-based.\par
Model-free algorithms are free of using knowledge about the solution space extracted during the optimization; a few of this category includes manual search \hyperref[b2]{[4]}, random search \hyperref[b1]{[2,}\hyperref[b4]{[6]}\hyperref[b5]{[7]}, and grid search \hyperref[b3]{[5]}. In the Manual search categories, we assume the values of the parameters based on our previous experience. In this technique, the user allows some sets of hyperparameters based on judgments or previous experience, trains the algorithm by them, observes the performance, keeps repeating to train the model until achieving a satisfactory accuracy and then selects the best set of hyperparameters that gives the maximum accuracy. However, this technique is heavily dependent on the judgment and previous expertise and its reliability is dependent on the correctness of the previous knowledge \hyperref[b26]{[3]}. Some of the few of the main parameters used by Random forest classifiers are: criterion, max\textunderscore depth, n\textunderscore estimators, min\textunderscore samples\textunderscore split etc.\par
In the Random search, we train and test our model based on some random combinations of the hyperparameters. This method is better used to identify new combinations of the parameters or to discover new hyperparameters. Although it may take more time to process, it often leads to better performance. \hyperref[b32]{Bergstra et al. (2012)} in their work mentioned that, over the same domain, random search is able to find models that are as good as or even better in a reduced computation time. After granting the same computational budget for the random search, it was evident that random search can find better models by effectively searching for larger and less promising configuration spaces \hyperref[b14]{[16]}. Random Search, which is developed based on grid research, sets up a grid of hyper-parameter values and selects random combinations to train the algorithm; \hyperref[b1]{[2]}.\par
In the grid search, the user sets a grid of hyperparameters and trains the model based on each possible combination. \hyperref[b26]{Amirabadi et al. (2020)} proposes two novel suboptimal grid search techniques on the four separate dataset to show the efficiency of their hyperparameter tuning model and later compare it with some of the other recently published work. The main drawback of the grid search method is its high complexity. It is commonly used when there are a few numbers of hyperparameters to be tuned. In other words, grid search works well when the best combinations are already determined. Some of the similar works of grid search applications have been reported by \hyperref[b15]{Zhang et al. (2014)} \hyperref[b15]{[17]}, Ghawi et al. (2019) \hyperref[b16]{[18]}, and \hyperref[b17]{Beyramysoltan et al. (2013)} \hyperref[b17]{[19]}.\par
Zhang et al. (2019) \hyperref[b18]{[20]} in their work reported a few of the drawbacks of the existing hyperparameter tuning methods. In their work, they mentioned grid search as an ad-hoc process, as it traverses all the possible combinations, and the entire procedure requires a lot of time. Andradóttir (2014) \hyperref[b11]{[13]} shows that Random Search (RS) eradicates some of the limitations of the grid search technique to an extent. RS can reduce the overall time consumption, but the main disadvantage is that it cannot converge to the global optimal value.\par
The combination of randomly selected hyperparameters can never guarantee a steady and widely acceptable result. That's why, apart from the manually tuning methods, automated tuning methods are becoming more and more popular in recent times; snoek et al. (2015) \hyperref[b8]{[10]}. Bayesian Optimization is one of the most widely used automated hyperparameter tuning methods to find the global optimum in fewer steps. However, Bayesian optimization's results are sensitive to the parameters of the surrogate model and the accuracy is greatly depending on the quality of the learning model; Amirabadi et al. ( {\ref 2020}) \hyperref[b26]{[3]}.\par
To minimize the error function of hyperparameter values, Bayesian optimization adopts probabilistic surrogate models like Gaussian processes. Through precise exploration and development, an alternative model of hyperparameter space is established; \hyperref[b6]{Eggensperger et al. (2013)} \hyperref[b6]{[8]}. However, probabilistic surrogates need accurate estimations of sufficient statistics of error function distribution. So, a sizable number of hyperparameters is required to evaluate the estimations and this method doesn't work well when there is to process myriad hyperparameters altogether.
\section[{III.}]{III.}
\section[{Methodology a) Dataset description}]{Methodology a) Dataset description}\par
Denier: Denier is a weight measurement usually refers to the thickness of the threads. It is the weight (grams) of a single optical fiber for 9 kilometers. If we have a 9 km fiber weighs 1 gram, this fiber has a denier of 1, or 1D. A fiber with less than 1 gram weight calls Microfibers \hyperref[b20]{[22]}. Microfibers become a new development trend in the synthetic polymer industry. The higher the denier is, the more thick and strong the fiber is. Conversely, less denier means that the fiber/fabric will be softer and more transparent. Fine denier fibers are becoming a new standard and are very useful for the development of new textiles with excellent performance \hyperref[b19]{[21]}.\par
Breaking Elongation (\%): Elongation at break is one of the few main quality parameters of any synthetic fiber \hyperref[b27]{[24]}. It is the percentage of elongation at break. Fiber elongation partly reflects the extent of stretching a filament under a certain loading condition. Fibers with high elongation at break are determined to be easily stretched under a predetermined load. Fibers showing these characteristics are known to be flexible. The elongation behavior of any single fiber can be complex because of its multiplicity of structural factors affecting it. Moreover, a cotton fiber comes up with a natural crimp, which is important for fibers to stick together while undergoing other production processes \hyperref[b21]{[23]}. If L is the length of the fiber, then the equation for the percentages of the breaking elongation would be:?? ???????????????? ???????????????????? = ??? ?????????? ?? 0 * 100\%\par
Breaking elongation for the cotton fiber might be varied from 5\% to 10\%, which is significantly lower than that of wool fibers (25\%-45\%), and much lower than polyester fibers (typically over 50\%).\par
Breaking force (cN) and Tenacity (cN/tex): Breaking tenacity is the maximum load that a single fiber can withstand before breaking. For the Polypropylene and PET staple fibers, 10 mm lengths sample filaments is drawn until failure. Breaking tenacity is measured in grams/denier. Very small forces are encountered when evaluating fiber properties, so an instrument with gramlevel accuracy is required \hyperref[b22]{[25]}. The tenacity of virgin PP fibers is about 5-8 g/den, and the elongation at break is about 100\%. At the same time, the tenacity of recycled PET is about 3.5-5.7 g/den; the elongation at break usually exceeds 100\%. Draw Ratio: The drawing ration is the ratio of the diameter of the initial blank form to the diameter of the drawn part. The limiting drawing ratio (Capstan speed/Nip reel speed) for the extruder section is between 1.6 and 2.2 \hyperref[b23]{[26]}, whereas, for the stretching section it is in between 3 and 4.
\section[{b) Hyper-parameter Optimization (HPO)}]{b) Hyper-parameter Optimization (HPO)}\par
The purpose of hyperparameter optimization is to find the global optimal value ?? * of the objective function f(x) can be evaluated for any arbitrary ?? ? ??, ?? * = arg ?????? ????? ð??"ð??"(??), and X is a hyperparameter space that can contain categorical, discrete, and continuous variables \hyperref[b24]{[27]}.In order to construct the design of different machine learning models, the application of effective hyperparameter optimization techniques can simplify the process of identifying the best hyperparameters for the models. HPO contains four major components: First, an estimator that could be a regressor or any classifier with one or more objective functions, second: a search space, Third: an optimization method to find the best combinations, and Fourth: a function to make a comparison between the effectiveness of various hyperparameter configurations \hyperref[b25]{[28]}. Some of the common hyperparameter techniques is discussed below: Grid Search: Grid search is a process that exhaustively searches a manually specified subset of the hyperparameter space of the target algorithm \hyperref[b30]{[30]}. A traditional approach to finding the optimum is to do a grid search, for example, to run experiments or processes on a number of conditions, for example, if there are three factors, a 15 × 15× 15 would mean performing 3375 experiments under different conditions. \hyperref[b32]{[32]}. Grid search is more practical when \hyperref[b31]{[31]}: (1) the total number of parameters in the model is small, say M <10. The grid is M-dimensional, so the number of test solutions is proportional to L M , where L is the number of test solutions along each dimension of the grid. \hyperref[b1]{(2)} The solution is known to be within a specific range of values, which can be used to define the limits of the grid. \hyperref[b26]{(3)} The direct problem d = g (m) can be computed quickly enough that the time required to compute L M from them is not prohibitive. (4) The error function E (m) is uniform on the scale of the grid spacing, Î?"m, so that the minimum is not lost because the grid spacing is too coarse.\par
There are many problems with the grid search method. The first is that the number of experiments can be prohibitive if there are several factors. The second is that there can be significant experimental error, which means that if the experiments are repeated under identical conditions, different responses can be obtained; therefore, choosing the best point on the grid can be misleading, especially if the optimum is fairly flat. The third is that the initial grid may be too small for the number of experiments to be feasible, and it could lose characteristics close to the optimum or find a false (local) optimum \hyperref[b32]{[32]}.\par
Random Search: Random search \hyperref[b33]{[33]} is a basic improvement on grid search. It indicates a randomized search over hyper-parameters from certain distributions over possible parameter values. The searching process continues till the predetermined budget is exhausted, or until the desired accuracy is reached. This methods are the simplest stochastic optimization and are very useful for certain problems, such as small search space and fast-running simulation. RS finds a value for each hyperparameter, prior to the probability distribution function. Both the GS and RS estimate the cost measure based on the produced hyperparameter sets. Although RS is simple, it has proven to be more effective than Grid search in many of the cases \hyperref[b33]{[33]}.\par
Random search has been shown to provide better results due to several benefits: first, the budget can be set independently according to the distribution of the search space, therefore, random search can work better especially when multiple hyper-parameters are not uniformly distributed \hyperref[b34]{[34]}. Second: Because each evaluation is independent, it is easy to parallelize and allocate resources. Unlike GS, RS samples a number of parameter combinations from a defined distribution, which maximizes system efficiency by reducing the likelihood of wasting a lot of time in a small, underperforming area. In addition, this method can detect global optimum values or close to global if given a sufficient budget. Third, although getting optimal results using random search is not promising, more time consumption will lead to a greater likelihood of finding the best hyperparameter set, whereas longer search BO is more efficient than GS and RS because it can detect optimal combinations of hyperparameters by analyzing previously tested values, and running the surrogate model is usually much cheaper than running the objective function as a whole. However, because Bayesian optimization models are run based on previously tested values, it is difficult to belong to them with parallel sequential methods; but they are generally able to detect optimal close hyperparameter combinations in a few iterations \hyperref[b36]{[36]}. Common substitution models for BO include the Gaussian process (GP) \hyperref[b37]{[37]}, random forest (RF) \hyperref[b39]{[38]}, and Parzen estimator (TPE) {\ref [39]}. Therefore, there are three main BO algorithms based on their substitution models: BO-GP, BO-RF, BO-TPE. GP is an attractive reduced order model of BO that can be used to quantify forecast uncertainty. This is not a parametric model and the number of its parameters depends only on the input points. With the right kernel function, your GP can take advantage of the data structure. However, the GP also has disadvantages. For example, it is conceptually difficult to understand with BO theory. In addition, its low scalability with large dimensions or a large number of data points is another important issue \hyperref[b36]{[36]}. Applying HPO in ML Models\par
In order to put the theory into practice, several experiments have been performed on an industrialbased synthetic polymer model. This section describes experiments with four different HPO techniques on three general and representative ML algorithms. In the first part of the section, we discussed the experimental setup and the main HPO process. In the second part, we compare and analyze the results of the application of different HPO methods. The use of random search is recommended in the early stages of HPO to narrow the search space quickly, before using guided algorithms to get better results. The main drawback \hyperref[b25]{[28]} of RS and GS is that each evaluation in its iteration does not depend on previous evaluations; thus, they waste time evaluating underperforming areas of the search space.\par
Table {\ref 2}: Performance evaluation of applying HPO methods to the regressor on the synthetic polymer dataset
\section[{Discussion \& Conclusion}]{Discussion \& Conclusion}\par
Machine learning has become the primary strategy for dealing with data problems and is widely used in various applications. To apply ML models to practical problems, hyperparameters must be tuned to handle specific datasets. However, as the size of the generated data increases greatly in real life, and manual tuning of hyperparameters is extremely computationally expensive, it has become essential to optimize the hyperparameters by an automatic process. In this work, we used hyperparameter techniques in the ML model to find the best set of hyperparameters. Our data set was small, and in this small datset we can see that the randomly selected subsets are very representative for the given data set, as they can effectively optimize all types of hyperparameters. Our future work would be to test our model on a much larger data set and see the feedback. \begin{figure}[htbp]
\noindent\textbf{1}\includegraphics[]{image-2.png}
\caption{\label{fig_0}Figure 1 :}\end{figure}
\begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-3.png}
\caption{\label{fig_1}}\end{figure}
\begin{figure}[htbp]
\noindent\textbf{2}\includegraphics[]{image-4.png}
\caption{\label{fig_2}Figure 2 :}\end{figure}
\begin{figure}[htbp]
\noindent\textbf{1} \par
\begin{longtable}{P{0.3095108695652174\textwidth}P{0.5404891304347826\textwidth}}
\multicolumn{2}{l}{Machine Learning Model Optimization with Hyper Parameter Tuning Approach}\\
Year 2021\tabcellsep \\
10\tabcellsep \\
( ) D\tabcellsep \\
ML Model\tabcellsep Hyper-parameter\\
RF Regressor\tabcellsep n\textunderscore estimators, max\textunderscore depth, min\textunderscore samples\textunderscore split, min\textunderscore samples\textunderscore leaf, criterion, max\textunderscore features\\
SVM Regressor\tabcellsep C, kernel, epsilon\\
KNN Regressor\tabcellsep n neighbors\end{longtable} \par
{\small\itshape [Note: © 2021 Global JournalsGlobal Journal of Computer Science and TechnologyVolume XXI Issue II Version I]}
\caption{\label{tab_0}Table 1 :}\end{figure}
\backmatter
\subsection[{Conflict of Interest:}]{Conflict of Interest:}\par
The authors whose names are listed in this work certify that they have no affiliations with or involvement in any organization or entity with any financial interest, or non-financial interest in the subject matter or materials discussed in this manuscript. \begin{bibitemlist}{1}
\bibitem[ Neurocomputing]{b28}\label{b28} \textit{}, \xref{http://dx.doi.org/10.1016/j.neucom.2020.07.061}{10.1016/j.neucom.2020.07.061}. \textit{Neurocomputing} 415 p. .
\bibitem[Springer]{b38}\label{b38} \textit{}, Springer . \xref{http://dx.doi.org/10.1007/978-3-642-25566-3_40}{10.1007/978-3-642-25566-3\textunderscore 40}. \url{https://doi.org/10.1007/978-3-642-25566-3\textunderscore 40} Berlin, Heidelberg.
\bibitem[Li et al. ()]{b12}\label{b12} ‘A Novel Bandit-Based Approach to Hyperparameter Optimization’. L Li , K Jamieson , G Desalvo , A Rostamizadeh , A Talwalker , Hyperband . \textit{Journal of Machine Learning Research} 2018. 18 p. .
\bibitem[Andrad_Ottir ()]{b11}\label{b11} ‘A review of random search methods’. S Andrad\textunderscore Ottir . \textit{Handbook of Simulation Optimization}, 2015. Springer. p. .
\bibitem[Tyagi ()]{b27}\label{b27} \textit{Advances in Yarn Spinning Technology || Yarn structure and properties from different spinning techniques}, G K Tyagi . doi:10. 1533/9780857090218.1.119. 2010. p. .
\bibitem[Bergstra et al. ()]{b39}\label{b39} ‘Algorithms for hyper-parameter optimization’. J Bergstra , R Bardenet , Y Bengio , B Kégl . \textit{Adv Neural Inf Process Syst (NIPS)} 2011. 24 p. .
\bibitem[Bergstra et al. ()]{b1}\label{b1} ‘Algorithms for hyperparameter optimization’. J S Bergstra , R Bardenet , Y Bengio , B Kégl . \textit{Advances in Neural Information Processing Systems}, 2011. p. .
\bibitem[Larochelle et al. ()]{b7}\label{b7} ‘An empirical evaluation of deep architectures on problems with many factors of variation’. H Larochelle , D Erhan , A Courville , J Bergstra , Y Bengio . \textit{Proceedings of the 24th International Conference on Machine Learning}, (the 24th International Conference on Machine Learning) 2007. ACM. p. .
\bibitem[Hutter et al. ()]{b35}\label{b35} \textit{Automated Machine Learning Methods, Systems}, F Hutter , L Kotthoff , J \&vanschoren . 2019. Springer International Publishing.
\bibitem[Cho et al. ()]{b0}\label{b0} ‘Basic Enhancement Strategies When Using Bayesian Optimization for Hyperparameter Tuning of Deep Neural Networks’. H Cho , Y Kim , E Lee , D Choi , Y Lee , W Rhee . \xref{http://dx.doi.org/10.1109/access.2020.2981072}{10.1109/access.2020.2981072}. \textit{IEEE Access} 2020. 8 p. .
\bibitem[Cho et al. ()]{b24}\label{b24} ‘Basic Enhancement Strategies When Using Bayesian Optimization for Hyperparameter Tuning of Deep Neural Networks’. H Cho , Y Kim , E Lee , D Choi , Y Lee , W Rhee . \xref{http://dx.doi.org/10.1109/access.2020.2981072}{10.1109/access.2020.2981072}. \textit{IEEE Access} 2020. 8 p. .
\bibitem[Hutter et al. ()]{b2}\label{b2} ‘Beyond manual tuning of hyperparameters’. F Hutter , J Lücke , L Schmidt-Thieme . \textit{DISKI} 2015. 29 (4) p. .
\bibitem[Chan and &treleaven ()]{b29}\label{b29} \textit{Continuous Model Selection for Large-Scale Recommender Systems. Handbook of Statistics Big Data Analytics}, S Chan , P \&treleaven . \xref{http://dx.doi.org/10.1016/b978-0-444-63492-4.00005-8}{10.1016/b978-0-444-63492-4.00005-8}. 2015. p. .
\bibitem[Zhang et al. ()]{b19}\label{b19} ‘Crystalline behaviors and phase transition during the manufacture of fine denier PA6 fibers’. C Zhang , Y Liu , S Liu . \xref{http://dx.doi.org/10.1007/s11426-009-0242-5}{10.1007/s11426-009-0242-5}. \url{https://doi.org/10.1007/s11426-009-0242-5} \textit{Sci. China Ser. B-Chem} 2009. 52 p. 1835.
\bibitem[Zhang et al. ()]{b18}\label{b18} ‘Deep Neural Network Hyperparameter Optimization with Orthogonal Array Tuning’. X Zhang , X Chen , L Yao , C Ge , M Dong . \xref{http://dx.doi.org/10.1007/978-3-030-36808-1_31}{10.1007/978-3-030-36808-1\textunderscore 31}. \textit{Computer and Information Science Neural Information Processing}, 2019. p. .
\bibitem[Jones et al. ()]{b9}\label{b9} ‘Efficient global optimization of expensive black-box functions’. D R Jones , M Schonlau , W J Welch . \textit{J. Glob. Optim} 1998. 13 p. .
\bibitem[Ghawi and Pfeffer ()]{b16}\label{b16} ‘Efficient Hyperparameter Tuning with Grid Search for Text Categorization using kNN Approach with BM25 Similarity’. R Ghawi , J Pfeffer . \xref{http://dx.doi.org/10.1515/comp-2019-0011}{10.1515/comp-2019-0011}. \textit{Open Computer Science} 2019. 9 (1) p. .
\bibitem[Friedrichs and Igel ()]{b3}\label{b3} ‘Evolutionary tuning of multiple SVM parameters’. F Friedrichs , C Igel . \textit{Neurocomputing} 2005. 64 p. .
\bibitem[Swift and Booker ()]{b23}\label{b23} \textit{Forming Processes. Manufacturing Process Selection Handbook}, K Swift , J Booker . \xref{http://dx.doi.org/10.1016/b978-0-08-099360-7.00004-5}{10.1016/b978-0-08-099360-7.00004-5}. 2013. p. .
\bibitem[Seeger ()]{b36}\label{b36} ‘Gaussian Processes For Machine Learning’. M Seeger . \xref{http://dx.doi.org/10.1142/s0129065704001899}{10.1142/s0129065704001899}. \textit{International Journal of Neural Systems} 2004. 14 (02) p. .
\bibitem[Elmogahzy ()]{b21}\label{b21} \textit{Handbook of Properties of Textile and Technical Fibres || Tensile properties of cotton fibers}, Yehia Elmogahzy . \xref{http://dx.doi.org/10.1016/B978-0-08-101272-7.00007-9}{10.1016/B978-0-08-101272-7.00007-9}. 2018. p. .
\bibitem[Hazan et al. ()]{b34}\label{b34} E Hazan , A Klivans , Y Yuan . arXiv:1706.00764. \url{https://arxiv.org/abs1706.00764} \textit{Hyperparameter optimization: a spectral approach}, 2017. (arXiv preprint)
\bibitem[Yu and Zhu ()]{b33}\label{b33} \textit{Hyper-Parameter Optimization: A Review of Algorithms and Applications}, T Yu , H Zhu . \url{https://arxiv.org/abs/2003.05689} 2020.
\bibitem[Beyramysoltan et al. ()]{b17}\label{b17} ‘Investigation of the equality constraint effect on the reduction of the rotational ambiguity in threecomponent system using a novel grid search method’. S Beyramysoltan , R Rajkó , H Abdollahi . \xref{http://dx.doi.org/10.1016/j.aca.2013.06.043}{10.1016/j.aca.2013.06.043}. \textit{Analytica Chimica Acta} 2013. 791 p. .
\bibitem[Klein et al. ()]{b13}\label{b13} ‘Learning curve prediction with Bayesian neural networks’. A Klein , S Falkner , J T Springenberg , F Hutter . \textit{International Conference On Learning Representation (ICLR}, 2017.
\bibitem[Calandra et al. (2016)]{b10}\label{b10} ‘Manifold Gaussian processes for regression’. R Calandra , J Peters , C E Rasmussen , M P Deisenroth . \textit{Proceedings of the 2016 International Joint Conference on Neural Networks}, (the 2016 International Joint Conference on Neural NetworksVancouver, BC, Canada) July 2016. p. .
\bibitem[Mantovani et al. ()]{b4}\label{b4} R G Mantovani , A L Rossi , J Vanschoren , B Bischl , A C De Carvalho . \textit{2015 International Joint Conference on Neural Networks (IJCNN)}, 2015. p. . (Effectiveness of random search in SVM hyper-parameter tuning)
\bibitem[Blair ()]{b22}\label{b22} ‘Materials and design for sports apparel’. K Blair . \xref{http://dx.doi.org/10.1533/9781845693664.1.60}{10.1533/9781845693664.1.60}. \textit{Materials in Sports Equipment} 2007. p. .
\bibitem[Menke ()]{b30}\label{b30} W Menke . \xref{http://dx.doi.org/10.1016/b978-0-12-397160-9.00009-6}{10.1016/b978-0-12-397160-9.00009-6}. \textit{Nonlinear Inverse Problems. Geophysical Data Analysis: Discrete Inverse Theory}, 2012. p. .
\bibitem[Amirabadi et al. ()]{b26}\label{b26} \textit{Novel suboptimal approaches for hyperparameter tuning of deep neural network}, M Amirabadi , M Kahaei , S Nezamalhosseini . \xref{http://dx.doi.org/10.1016/j.phycom.2020.101057}{10.1016/j.phycom.2020.101057}. 2020. Physical Communication. 41 p. 101057. (under the shelf of optical communication)
\bibitem[Yang and &shami ()]{b25}\label{b25} \textit{On hyperparameter optimization of machine learning algorithms: Theory}, L Yang , A \&shami . 2020.
\bibitem[Li and Talwalkar ()]{b5}\label{b5} \textit{Random search and reproducibility for neural architecture search}, L Li , A Talwalkar . arXiv:1902.07638. 2019. (arXiv preprint)
\bibitem[Bergstra and Bengio ()]{b14}\label{b14} ‘Random Search for Hyper-Parameter Optimization’. J S Bergstra , Y Bengio . \textit{Journal of Machine Learning Research} 2012. 13 p. .
\bibitem[Bergstra and Bengio ()]{b32}\label{b32} ‘Random search for hyper-parameter optimization’. J Bergstra , Y Bengio . \url{http://dl.acm.org/citation.cfm?id=2188385.2188395} \textit{J. Mach. Learn. Res} 1532-4435. 2012. 13 p. .
\bibitem[Hutter et al. ()]{b37}\label{b37} ‘Sequential Model-Based Optimization for General Algorithm Configuration’. F Hutter , H H Hoos , K Leyton-Brown . Lecture Notes in Computer Science Coello C.A.C. (ed.) 2011. 2011. 6683. ((eds) Learning and Intelligent Optimization)
\bibitem[Snoek et al. ()]{b8}\label{b8} J Snoek , O Rippel , K Swersky , R Kiros , N Satish , N Sundaram . \textit{Scalable bayesian optimization using deep neural networks, in: International conference on machine learning}, 2015. p. .
\bibitem[Brereton ()]{b31}\label{b31} ‘Steepest Ascent, Steepest Descent, and Gradient Methods’. R Brereton . \xref{http://dx.doi.org/10.1016/b978-044452701-1.00037-5}{10.1016/b978-044452701-1.00037-5}. \textit{Comprehensive Chemometrics} 2009. p. .
\bibitem[Zhang et al. ()]{b15}\label{b15} ‘Support Vector Regression Based on Grid-Search Method for Short-Term Wind Power Forecasting’. H Zhang , L Chen , Y Qu , G Zhao , Z Guo . \xref{http://dx.doi.org/10.1155/2014/835791}{10.1155/2014/835791}. \textit{Journal of Applied Mathematics} 2014. 2014. p. .
\bibitem[Eggensperger et al. ()]{b6}\label{b6} ‘Towards an empirical foundation for assessing bayesian optimization of hyperparameters’. K Eggensperger , M Feurer , F Hutter , J Bergstra , J Snoek , H Hoos , K Leyton-Brown . \textit{NIPS workshop on Bayesian Optimization in Theory and Practice}, 2013. 10 p. 3.
\bibitem[
Joe
(2020)]{b20}\label{b20} ‘What Is Denier Rating? Why Does It Matter To You?’. Joe . \url{https://www.digitravelist.com/what-is-denier-rating/} \textit{Digi Travelist} 2020. May 5.
\end{bibitemlist}
\end{document}