\documentclass[11pt,twoside]{article}\makeatletter
\IfFileExists{xcolor.sty}%
{\RequirePackage{xcolor}}%
{\RequirePackage{color}}
\usepackage{colortbl}
\usepackage{wrapfig}
\usepackage{ifxetex}
\ifxetex
\usepackage{fontspec}
\usepackage{xunicode}
\catcode`⃥=\active \def⃥{\textbackslash}
\catcode`❴=\active \def❴{\{}
\catcode`❵=\active \def❵{\}}
\def\textJapanese{\fontspec{Noto Sans CJK JP}}
\def\textChinese{\fontspec{Noto Sans CJK SC}}
\def\textKorean{\fontspec{Noto Sans CJK KR}}
\setmonofont{DejaVu Sans Mono}
\else
\IfFileExists{utf8x.def}%
{\usepackage[utf8x]{inputenc}
\PrerenderUnicode{–}
}%
{\usepackage[utf8]{inputenc}}
\usepackage[english]{babel}
\usepackage[T1]{fontenc}
\usepackage{float}
\usepackage[]{ucs}
\uc@dclc{8421}{default}{\textbackslash }
\uc@dclc{10100}{default}{\{}
\uc@dclc{10101}{default}{\}}
\uc@dclc{8491}{default}{\AA{}}
\uc@dclc{8239}{default}{\,}
\uc@dclc{20154}{default}{ }
\uc@dclc{10148}{default}{>}
\def\textschwa{\rotatebox{-90}{e}}
\def\textJapanese{}
\def\textChinese{}
\IfFileExists{tipa.sty}{\usepackage{tipa}}{}
\fi
\def\exampleFont{\ttfamily\small}
\DeclareTextSymbol{\textpi}{OML}{25}
\usepackage{relsize}
\RequirePackage{array}
\def\@testpach{\@chclass
\ifnum \@lastchclass=6 \@ne \@chnum \@ne \else
\ifnum \@lastchclass=7 5 \else
\ifnum \@lastchclass=8 \tw@ \else
\ifnum \@lastchclass=9 \thr@@
\else \z@
\ifnum \@lastchclass = 10 \else
\edef\@nextchar{\expandafter\string\@nextchar}%
\@chnum
\if \@nextchar c\z@ \else
\if \@nextchar l\@ne \else
\if \@nextchar r\tw@ \else
\z@ \@chclass
\if\@nextchar |\@ne \else
\if \@nextchar !6 \else
\if \@nextchar @7 \else
\if \@nextchar (8 \else
\if \@nextchar )9 \else
10
\@chnum
\if \@nextchar m\thr@@\else
\if \@nextchar p4 \else
\if \@nextchar b5 \else
\z@ \@chclass \z@ \@preamerr \z@ \fi \fi \fi \fi
\fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi \fi}
\gdef\arraybackslash{\let\\=\@arraycr}
\def\@textsubscript#1{{\m@th\ensuremath{_{\mbox{\fontsize\sf@size\z@#1}}}}}
\def\Panel#1#2#3#4{\multicolumn{#3}{){\columncolor{#2}}#4}{#1}}
\def\abbr{}
\def\corr{}
\def\expan{}
\def\gap{}
\def\orig{}
\def\reg{}
\def\ref{}
\def\sic{}
\def\persName{}\def\name{}
\def\placeName{}
\def\orgName{}
\def\textcal#1{{\fontspec{Lucida Calligraphy}#1}}
\def\textgothic#1{{\fontspec{Lucida Blackletter}#1}}
\def\textlarge#1{{\large #1}}
\def\textoverbar#1{\ensuremath{\overline{#1}}}
\def\textquoted#1{‘#1’}
\def\textsmall#1{{\small #1}}
\def\textsubscript#1{\@textsubscript{\selectfont#1}}
\def\textxi{\ensuremath{\xi}}
\def\titlem{\itshape}
\newenvironment{biblfree}{}{\ifvmode\par\fi }
\newenvironment{bibl}{}{}
\newenvironment{byline}{\vskip6pt\itshape\fontsize{16pt}{18pt}\selectfont}{\par }
\newenvironment{citbibl}{}{\ifvmode\par\fi }
\newenvironment{docAuthor}{\ifvmode\vskip4pt\fontsize{16pt}{18pt}\selectfont\fi\itshape}{\ifvmode\par\fi }
\newenvironment{docDate}{}{\ifvmode\par\fi }
\newenvironment{docImprint}{\vskip 6pt}{\ifvmode\par\fi }
\newenvironment{docTitle}{\vskip6pt\bfseries\fontsize{22pt}{25pt}\selectfont}{\par }
\newenvironment{msHead}{\vskip 6pt}{\par}
\newenvironment{msItem}{\vskip 6pt}{\par}
\newenvironment{rubric}{}{}
\newenvironment{titlePart}{}{\par }
\newcolumntype{L}[1]{){\raggedright\arraybackslash}p{#1}}
\newcolumntype{C}[1]{){\centering\arraybackslash}p{#1}}
\newcolumntype{R}[1]{){\raggedleft\arraybackslash}p{#1}}
\newcolumntype{P}[1]{){\arraybackslash}p{#1}}
\newcolumntype{B}[1]{){\arraybackslash}b{#1}}
\newcolumntype{M}[1]{){\arraybackslash}m{#1}}
\definecolor{label}{gray}{0.75}
\def\unusedattribute#1{\sout{\textcolor{label}{#1}}}
\DeclareRobustCommand*{\xref}{\hyper@normalise\xref@}
\def\xref@#1#2{\hyper@linkurl{#2}{#1}}
\begingroup
\catcode`\_=\active
\gdef_#1{\ensuremath{\sb{\mathrm{#1}}}}
\endgroup
\mathcode`\_=\string"8000
\catcode`\_=12\relax
\usepackage[a4paper,twoside,lmargin=1in,rmargin=1in,tmargin=1in,bmargin=1in,marginparwidth=0.75in]{geometry}
\usepackage{framed}
\definecolor{shadecolor}{gray}{0.95}
\usepackage{longtable}
\usepackage[normalem]{ulem}
\usepackage{fancyvrb}
\usepackage{fancyhdr}
\usepackage{graphicx}
\usepackage{marginnote}
\renewcommand{\@cite}[1]{#1}
\renewcommand*{\marginfont}{\itshape\footnotesize}
\def\Gin@extensions{.pdf,.png,.jpg,.mps,.tif}
\pagestyle{fancy}
\usepackage[pdftitle={Content Based Data Retrieval on KNN-Classification and Cluster Analysis for Data Mining},
pdfauthor={}]{hyperref}
\hyperbaseurl{}
\paperwidth210mm
\paperheight297mm
\def\@pnumwidth{1.55em}
\def\@tocrmarg {2.55em}
\def\@dotsep{4.5}
\setcounter{tocdepth}{3}
\clubpenalty=8000
\emergencystretch 3em
\hbadness=4000
\hyphenpenalty=400
\pretolerance=750
\tolerance=2000
\vbadness=4000
\widowpenalty=10000
\renewcommand\section{\@startsection {section}{1}{\z@}%
{-1.75ex \@plus -0.5ex \@minus -.2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\Large\bfseries}}
\renewcommand\subsection{\@startsection{subsection}{2}{\z@}%
{-1.75ex\@plus -0.5ex \@minus- .2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\Large}}
\renewcommand\subsubsection{\@startsection{subsubsection}{3}{\z@}%
{-1.5ex\@plus -0.35ex \@minus -.2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\large}}
\renewcommand\paragraph{\@startsection{paragraph}{4}{\z@}%
{-1ex \@plus-0.35ex \@minus -0.2ex}%
{0.5ex \@plus .2ex}%
{\reset@font\normalsize}}
\renewcommand\subparagraph{\@startsection{subparagraph}{5}{\parindent}%
{1.5ex \@plus1ex \@minus .2ex}%
{-1em}%
{\reset@font\normalsize\bfseries}}
\def\l@section#1#2{\addpenalty{\@secpenalty} \addvspace{1.0em plus 1pt}
\@tempdima 1.5em \begingroup
\parindent \z@ \rightskip \@pnumwidth
\parfillskip -\@pnumwidth
\bfseries \leavevmode #1\hfil \hbox to\@pnumwidth{\hss #2}\par
\endgroup}
\def\l@subsection{\@dottedtocline{2}{1.5em}{2.3em}}
\def\l@subsubsection{\@dottedtocline{3}{3.8em}{3.2em}}
\def\l@paragraph{\@dottedtocline{4}{7.0em}{4.1em}}
\def\l@subparagraph{\@dottedtocline{5}{10em}{5em}}
\@ifundefined{c@section}{\newcounter{section}}{}
\@ifundefined{c@chapter}{\newcounter{chapter}}{}
\newif\if@mainmatter
\@mainmattertrue
\def\chaptername{Chapter}
\def\frontmatter{%
\pagenumbering{roman}
\def\thechapter{\@roman\c@chapter}
\def\theHchapter{\roman{chapter}}
\def\thesection{\@roman\c@section}
\def\theHsection{\roman{section}}
\def\@chapapp{}%
}
\def\mainmatter{%
\cleardoublepage
\def\thechapter{\@arabic\c@chapter}
\setcounter{chapter}{0}
\setcounter{section}{0}
\pagenumbering{arabic}
\setcounter{secnumdepth}{6}
\def\@chapapp{\chaptername}%
\def\theHchapter{\arabic{chapter}}
\def\thesection{\@arabic\c@section}
\def\theHsection{\arabic{section}}
}
\def\backmatter{%
\cleardoublepage
\setcounter{chapter}{0}
\setcounter{section}{0}
\setcounter{secnumdepth}{2}
\def\@chapapp{\appendixname}%
\def\thechapter{\@Alph\c@chapter}
\def\theHchapter{\Alph{chapter}}
\appendix
}
\newenvironment{bibitemlist}[1]{%
\list{\@biblabel{\@arabic\c@enumiv}}%
{\settowidth\labelwidth{\@biblabel{#1}}%
\leftmargin\labelwidth
\advance\leftmargin\labelsep
\@openbib@code
\usecounter{enumiv}%
\let\p@enumiv\@empty
\renewcommand\theenumiv{\@arabic\c@enumiv}%
}%
\sloppy
\clubpenalty4000
\@clubpenalty \clubpenalty
\widowpenalty4000%
\sfcode`\.\@m}%
{\def\@noitemerr
{\@latex@warning{Empty `bibitemlist' environment}}%
\endlist}
\def\tableofcontents{\section*{\contentsname}\@starttoc{toc}}
\parskip0pt
\parindent1em
\def\Panel#1#2#3#4{\multicolumn{#3}{){\columncolor{#2}}#4}{#1}}
\newenvironment{reflist}{%
\begin{raggedright}\begin{list}{}
{%
\setlength{\topsep}{0pt}%
\setlength{\rightmargin}{0.25in}%
\setlength{\itemsep}{0pt}%
\setlength{\itemindent}{0pt}%
\setlength{\parskip}{0pt}%
\setlength{\parsep}{2pt}%
\def\makelabel##1{\itshape ##1}}%
}
{\end{list}\end{raggedright}}
\newenvironment{sansreflist}{%
\begin{raggedright}\begin{list}{}
{%
\setlength{\topsep}{0pt}%
\setlength{\rightmargin}{0.25in}%
\setlength{\itemindent}{0pt}%
\setlength{\parskip}{0pt}%
\setlength{\itemsep}{0pt}%
\setlength{\parsep}{2pt}%
\def\makelabel##1{\upshape ##1}}%
}
{\end{list}\end{raggedright}}
\newenvironment{specHead}[2]%
{\vspace{20pt}\hrule\vspace{10pt}%
\phantomsection\label{#1}\markright{#2}%
\pdfbookmark[2]{#2}{#1}%
\hspace{-0.75in}{\bfseries\fontsize{16pt}{18pt}\selectfont#2}%
}{}
\def\TheFullDate{2012-05-15 (revised: 15 May 2012)}
\def\TheID{\makeatother }
\def\TheDate{2012-05-15}
\title{Content Based Data Retrieval on KNN-Classification and Cluster Analysis for Data Mining}
\author{}\makeatletter
\makeatletter
\newcommand*{\cleartoleftpage}{%
\clearpage
\if@twoside
\ifodd\c@page
\hbox{}\newpage
\if@twocolumn
\hbox{}\newpage
\fi
\fi
\fi
}
\makeatother
\makeatletter
\thispagestyle{empty}
\markright{\@title}\markboth{\@title}{\@author}
\renewcommand\small{\@setfontsize\small{9pt}{11pt}\abovedisplayskip 8.5\p@ plus3\p@ minus4\p@
\belowdisplayskip \abovedisplayskip
\abovedisplayshortskip \z@ plus2\p@
\belowdisplayshortskip 4\p@ plus2\p@ minus2\p@
\def\@listi{\leftmargin\leftmargini
\topsep 2\p@ plus1\p@ minus1\p@
\parsep 2\p@ plus\p@ minus\p@
\itemsep 1pt}
}
\makeatother
\fvset{frame=single,numberblanklines=false,xleftmargin=5mm,xrightmargin=5mm}
\fancyhf{}
\setlength{\headheight}{14pt}
\fancyhead[LE]{\bfseries\leftmark}
\fancyhead[RO]{\bfseries\rightmark}
\fancyfoot[RO]{}
\fancyfoot[CO]{\thepage}
\fancyfoot[LO]{\TheID}
\fancyfoot[LE]{}
\fancyfoot[CE]{\thepage}
\fancyfoot[RE]{\TheID}
\hypersetup{citebordercolor=0.75 0.75 0.75,linkbordercolor=0.75 0.75 0.75,urlbordercolor=0.75 0.75 0.75,bookmarksnumbered=true}
\fancypagestyle{plain}{\fancyhead{}\renewcommand{\headrulewidth}{0pt}}
\date{}
\usepackage{authblk}
\providecommand{\keywords}[1]
{
\footnotesize
\textbf{\textit{Index terms---}} #1
}
\usepackage{graphicx,xcolor}
\definecolor{GJBlue}{HTML}{273B81}
\definecolor{GJLightBlue}{HTML}{0A9DD9}
\definecolor{GJMediumGrey}{HTML}{6D6E70}
\definecolor{GJLightGrey}{HTML}{929497}
\renewenvironment{abstract}{%
\setlength{\parindent}{0pt}\raggedright
\textcolor{GJMediumGrey}{\rule{\textwidth}{2pt}}
\vskip16pt
\textcolor{GJBlue}{\large\bfseries\abstractname\space}
}{%
\vskip8pt
\textcolor{GJMediumGrey}{\rule{\textwidth}{2pt}}
\vskip16pt
}
\usepackage[absolute,overlay]{textpos}
\makeatother
\usepackage{lineno}
\linenumbers
\begin{document}
\author[1]{H K Sawant}
\author[2]{H K Sawant}
\affil[1]{ BVU}
\renewcommand\Authands{ and }
\date{\small \em Received: 7 April 2012 Accepted: 3 May 2012 Published: 15 May 2012}
\maketitle
\begin{abstract}
Data mining is sorting through data to identify patterns and establish relationships.Data mining parameters include: Regression - In statistics, regression analysis includes any techniques for modeling and analyzing several variables, when the focus is on the relationship between a dependent variable and one or more independent variables. Sequence or path analysis - looking for patterns where one event leads to another later event. Classification - looking for new patterns. Clustering - finding and visually documenting groups. Decision Trees â??" Decision trees are commonly used in operations research, specifically in decision analysis, to help identify a strategy most likely to reach a goal.
\end{abstract}
\keywords{}
\begin{textblock*}{18cm}(1cm,1cm) % {block width} (coords)
\textcolor{GJBlue}{\LARGE Global Journals \LaTeX\ JournalKaleidoscope\texttrademark}
\end{textblock*}
\begin{textblock*}{18cm}(1.4cm,1.5cm) % {block width} (coords)
\textcolor{GJBlue}{\footnotesize \\ Artificial Intelligence formulated this projection for compatibility purposes from the original article published at Global Journals. However, this technology is currently in beta. \emph{Therefore, kindly ignore odd layouts, missed formulae, text, tables, or figures.}}
\end{textblock*}
\let\tabcellsep& \par
ata mining is an iterative process that typically involves the following phases: a) Problem definition : A data mining project starts with the understanding of the business problem. Data mining experts, business experts, and domain experts work closely together to define the project objectives and the requirements from a business perspective. The project objective is then translated into a data mining problem definition. In the problem definition phase, data mining tools are not yet required. b) Data exploration : Domain experts understand the meaning of the metadata. They collect, describe, and explore the data. They also identify quality problems of the data. A frequent exchange with the data mining experts and the business experts from the problem definition phase is vital.\par
In the data exploration phase, traditional data analysis tools, for example, statistics, are used to explore the data. c) Data preparation : Domain experts build the data model for the modeling process. They collect, cleanse, and format the data because some of the mining functions accept data only in a certain format. They also create new derived attributes, for example, an average value. In the data preparation phase, data is tweaked multiple times in no prescribed order. Preparing the data for the modeling tool by selecting tables, records, and attributes, are typical tasks in this phase. The meaning of the data is not changed. Raw Data: Raw data is a term for data collected on source which has not been subjected to processing or any other manipulation. (Primary data), it is also known as primary data. It is a relative term (see data). Raw data can be input to a computer program or used in manual analysis procedures such as gathering statistics from a survey. It can refer to the binary data on D electronic storage devices such as hard disk drives (also referred to as low-level data). Suppose that the data for a feature v are in a range between 150 and 250. Then, the previous method of normalization will give all normalized data between .15 and .25; but it will accumulate the values on a small subinterval of the entire range. To obtain better distribution of values on a whole, normalized interval, e.g., [0, 1], we can use the min-max formulaVI '=(VI-Min(VI))/(Max(VI)-Min(VI)) d) Standard Deviation Normalization\par
Normalization by standard deviation often works well with distance measures, but transforms the data into a form unrecognizable from the original data.VI '=(VI-Mean(V))/Std(V)
\section[{Types of Data}]{Types of Data}\par
Categorical Data: Categorical data (or variable) consists of names representing categories. For example, the gender (categories of male \& female) of the people where you work or go to school; or the make of cars in the parking lot (categories of Ford, GM, Toyota, Mazda, KIA, etc) is categorical data.\par
Numerical Data: Numerical data (or variable) consists of numbers that represent counts or measurements. For example, the number of males \& females where you work or go to school; or the number of the make of cars Ford, GM, Toyota, Mazda, KIA, etc is numerical data.\par
Dummy Variable: A dummy variable is a numerical variable used in regression analysis to represent subgroups of the sample in your study.\par
Discrete Variable: Discrete Variable are also called Qualitative Variable. It is nominal or ordinal.\par
Continuous Variable: Continuous variable are measured using interval scale or ratio scale.\par
Means reducing the number of cases or variables in a data matrix. The basic operations in a data-reduction process are delete column, delete a row, and reduce the number of values in a column. These operations attempt to preserve the character of the original data by deleting data that are nonessential. There are other operations that reduce dimensions, but the new data are unrecognizable when compared to the original data set, and these operations are mentioned here just briefly because they are highly applicationdependent.
\section[{a) Entropy}]{a) Entropy}\par
A method for unsupervised feature selection or ranking based on entropy measure is a relatively simple technique; but with a large number of features its complexity increases significantly.\par
The similarity measure between two samples can be defined as D is the average distance among samples in the data set. Hence, is determined by the data. But, in a successfully implemented practical application, it was used a constant value of = 0.5. Normalized Euclidean distance measure is used to calculate the distance Dij between two samples xi and xj:\par
where n is the number of dimensions and max(k) and min(k) are maximum and minimum values used for normalization of the k-th dimension. All features are not numeric. The similarity for nominal variables is measured directly using Hamming distance.\par
where The total number of variables is equal to n. For mixed data, we can discretize numeric values (Binning) and transform numeric features into nominal features before we apply this similarity measure.\par
If the two measures are close, then the reduced set of features will satisfactorily approximate the original set. For a data set of N samples, the entropy measure is where Sij is the similarity between samples xi and xj. This measure is computed in each of the iterations as a basis for deciding the ranking of features. We rank features by gradually removing the least important feature in maintaining the order in the
\section[{March}]{March}\par
Where Dij is the distance between the two samples xi and xj and is a parameter mathematically expressed as configurations of data. The steps of the algorithm are base on sequential backward ranking, and they have been successfully tested on several real-world applications.
\section[{b) Linear Regreesion}]{b) Linear Regreesion}\par
In statistics, linear regression refers to any approach to modeling the relationship between one or more variables denoted y and one or more variables denoted X, such that the model depends linearly on the unknown parameters to be estimated from the data.\par
Linear regression has many practical uses. Most applications of linear regression fall into one of the following two broad categories:\par
If the goal is prediction, or forecasting, linear regression can be used to fit a predictive model to an observed data set of y and X values. After developing such a model, if an additional value of X is then given without its accompanying value of y, the fitted model can be used to make a prediction of the value of y. Given a variable y and a number of variables X 1 , ..., X p that may be related to y, then linear regression analysis can be applied to quantify the strength of the relationship between y and the X j , to assess which X j may have no relationship with y at all, and to identify which subsets of the X j contain redundant information about y, thus once one of them is known, the others are no longer informative.\par
The core task of Data Mining Model is the application of the appropriate mining function to your data to build mining models that answer your business questions. Administrative tasks such as retrieving progress information or interpreting error messages support this task. Data Mining Process The Missing value technique used in these type of project is to take the mean of that feature but the data set which I have choose for the project have no missing values.
\section[{d) Outlier Analysis}]{d) Outlier Analysis}\par
The technique used by data set to remove the outlier values is the Deviation based technique in which the human can easily distinguish unusual samples from a set of other similar samples.\par
After examining each and every data cluster, we obtain data set which contains no outlier.
\section[{e) Data Reduction}]{e) Data Reduction}\par
The term data reduction in the context o data mining is usually applied to projects where the goal is to aggregate the information contained in large data sets into manageable(smaller) information nuggets. Data reduction method can include simple tabulation ,aggregation (computing descriptive statistics) or more sophisticated technique like principle component analysis.\par
Since the data which I have used in the project is not so huge therefore there is no need of applying the data reduction because it could lead to the loss of information from the data.
\section[{f) Model Estimation}]{f) Model Estimation}\par
A model can be defined as a number of examples or a mathematical relationship. Data mining experts select and apply various mining functions because we can use different mining functions for the same type of data mining problem. Some of the mining functions require specific data types.
\section[{g) Linear Regression}]{g) Linear Regression}\par
Regression: The purpose of this model function is to map a data item to a real-valued prediction variable.\par
The goal of regression is to build a concise model of the distribution of the dependent attribute in terms of the predictor attributes. The resulting model is used to assign values to a database of testing records, where the values of the predictor attributes are known but the dependent attribute is to be determined.\par
The value r 2 is a fraction between 0.0 and 1.0, and has no units. An r 2 value of 0.0 means that knowing X does not help you predict Y. There is no linear relationship between X and Y, and the best-fit line is a horizontal line going through the mean of all Y values. Since the error is very small so the result which we get after applying is very close to the final result. The graph between observed and fitted value is shown in figure\par
The normal probability plot is a special case of the probability plot. We cover the normal probability plot separately due to its importance in many applications. The normal probability plot is formed by: Vertical axis: Ordered response values Horizontal axis: Normal order statistic medians The normal probability plot is shown in the figure h) Cluster Analysis: Cluster analysis or clustering is the assignment of a set of observations into subsets (called clusters) so that observations in the same cluster are similar in some sense. Clustering is a method of unsupervised learning, and a common technique for statistical data analysis used in many fields, including machine learning, data Divisive: This is a "top down" approach: all observations start in one cluster, and splits are performed recursively as one moves down the hierarchy The K-means partitional-clustering algorithm is the simplest and most commonly used algorithm employing a square-error criterion.\par
It starts with a random, initial partition and keeps reassigning the samples to clusters, based on the similarity between samples and clusters, until a convergence criterion is met. The model in which every decision is based on the comparison of two numbers within constant time is called simply a decision tree model. It was introduced to establish computational complexity of sorting and searching, advantages of applying is Easy to understand, Map nicely to a set of business rules, Applied to real problems, Make no prior assumptions about the data, Able to process both numerical and categorical data.\par
Data mining techniques are used in a many research areas, including mathematics, cybernetics, genetics and marketing. Web mining, a type of data mining used in customer relationship management (CRM), takes advantage of the huge amount of information gathered by a Web site to look for patterns in user behavior.\begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-2.png}
\caption{\label{fig_0}}\end{figure}
\begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-3.png}
\caption{\label{fig_1}}\end{figure}
\begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-4.png}
\caption{\label{fig_2}}\end{figure}
\begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-5.png}
\caption{\label{fig_3}}\end{figure}
\begin{figure}[htbp]
\noindent\textbf{2}\includegraphics[]{image-6.png}
\caption{\label{fig_4}When r 2}\end{figure}
\begin{figure}[htbp]
\noindent\textbf{}\includegraphics[]{image-7.png}
\caption{\label{fig_5}}\end{figure}
\footnote{© 2012 Global Journals Inc. (US)} \footnote{© 2012 Global Journals Inc. (US) Global Journal of Computer Science and Technology Volume XII Issue V Version I} \footnote{© 2012 Global Journals Inc. (US)} \backmatter
\subsection[{This page is intentionally left blank}]{This page is intentionally left blank}\par
Global Journal of Computer Science and Technology Volume XII Issue V Version I \begin{bibitemlist}{1}
\bibitem[Yang and Wanli]{b12}\label{b12} \textit{A Clustering Algorithm Using Dynamic Nearest Neighbors Selection Model}, Jin Yang , Zuo Wanli .
\bibitem[Yang and Pedersen ()]{b10}\label{b10} \textit{A comparative study on feature selection in text categorization}, Y Yang , J Pedersen . 1997. p. .
\bibitem[Yi et al. ()]{b4}\label{b4} ‘A Fast KNN Algorithm Applied to Web Text Categorization’. Wang Yi , Bai Shi , Wang Zhang , ' Ou . \textit{Journal of The China Society for Scientific and Technical Information}, 2007. 26 p. .
\bibitem[Yu and Zhengguo (2007)]{b9}\label{b9} ‘A fast knn algorithm for text categorization’. Wang Yu , Wang Zhengguo . \textit{Proceedings of the Sixth International Conference on Machine Learning and Cybernetics}, (the Sixth International Conference on Machine Learning and CyberneticsHong Kong) 19-22 August 2007. p. .
\bibitem[Jianhui et al. ()]{b2}\label{b2} ‘A Simple and Efficient Algorithm to Classify a Large Scale of Texts’. Wang Jianhui , Wang Hongwei , Shen Zhan , Hu Yunfa . \textit{Journal of Computer Research and Development} 2005. 42 (1) p. .
\bibitem[Jinshu et al. ()]{b0}\label{b0} ‘Advances in Machine Learning Based Text Categorization’. Zhang Jinshu , Xu Bofeng , Xin . \textit{Journal of Software} 2006. 17 (9) p. .
\bibitem[Yuchang et al. ()]{b6}\label{b6} ‘Analysis and construction of word weighing function in vsm’. Lu Yuchang , Lu Mingyu , Li Fan . \textit{Journal of Computer Research and Development} 2002. 39 (10) p. .
\bibitem[Wang et al.]{b11}\label{b11} ‘Improving Chinese Text Categorization by Outlier Learning’. Xinhao Wang , Dingsheng Luo , Xihong Wu , Huisheng Chi . \textit{Proceeding ofNLP-KE'05 pp}, (eeding ofNLP-KE'05 pp) p. .
\bibitem[Sebastiani ()]{b5}\label{b5} ‘Machine learning in automated text categorization’. Fabrizio Sebastiani . \textit{ACM Computer Survey} 2002. 34 (1) p. .
\bibitem[Belur and Dasarathy ()]{b7}\label{b7} ‘Nearest Neighbor (NN) Norms NN Pattern Classification Techniques’. V Belur , Dasarathy . Mc Graw-Hill Computer Science Series 1991. IEEE Computer Society Press. p. .
\bibitem[Jinna ()]{b1}\label{b1} ‘Study on Categorization Algorithm of Chinese Text’. Ma Jinna . \textit{Dissertation of Master's Degree}, 2006. University of Shanghai for Science and Technology
\bibitem[Lihua et al. ()]{b8}\label{b8} \textit{Study on KNN Text Categorization Algorithm}, Yang Lihua , Dai Qi , Guo Yanjun . 2006. Micro Computer Information. p. .
\bibitem[Ying et al. ()]{b3}\label{b3} ‘Vector-Combination-Applied KNN Method for Chinese Text Categorization’. Li Ying , Zhang Xiaohui , Wang Huayong , Chang Guiran . \textit{Mini-Micro Systems}, 2004. 25 p. .
\end{bibitemlist}
\end{document}