Text Practice Mode
Latex practice
created Nov 6th 2019, 08:41 by Anders Bredesen Hatlelid
3
626 words
2 completed
0
Rating visible after 3 or more votes
saving score / loading statistics ...
00:00
\documentclass{article}
\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{bm}
\usepackage{mathtools}
\begin{document}
\section*{5.09}
=============================================\\
In the case of linear discriminant analysis with K=2 and equal a priori probability for the two groups, show that $d_{1}(x) > d_{2}(x)$ takes the form
\begin{equation*}
(\mu_{1} - \mu_{2})^\top \Sigma^{-1}(x - \mu) > 0
\end{equation*}
where $\mu = \frac{1}{2}(\mu_{1} - \mu_{2})$.
\\
=============================================\\
\\
This is LDA. K=2 equal a priori
Formula for $d_{k}(x)$ s.155
\\
Extra: \\
\begin{enumerate}
\item
what is x
\item same x
\item
What is d\\
Discriminant function.
\item
What is sigma
\item
why is sigma inverse
\item
what is mu
\item
why take the difference
\item
what does it mean with equal a priori prob
\end{enumerate}
\newpage
*******************\\
$K$: number of classes. So since its 2, its binary. \\
$d_{1}$: the discriminant function\\
$d_{k}(x_{0}) = log\pi_{k} + log\text{ }p_{k}(x_0)$\\
$d_{k} = log\text{ } \pi_{k} - \frac{1}{2}\mu_{k}^\top\Sigma^{-1}\mu_{k} + x^\top\Sigma^{-1}\mu_{k}$\\
Priori probability: $\pi_{k}$. Said to be equal, so: $\pi_{1} = \pi_{2}$\\
\\
We then have:\\
$d_{1} = log\text{ } \pi_{1} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} + x^\top\Sigma^{-1}\mu_{1}$ and:\\
$d_{2} = log\text{ } \pi_{2} - \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} + x^\top\Sigma^{-1}\mu_{2}$\\
*******************\\
\begin{equation*}
d_{1}(x) > d_2{x}(x)
\end{equation*}
\\
Substitute for $d_{1}$ and $d_{2}$
\begin{equation*}
log\text{ } \pi_{1} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} + x^\top\Sigma^{-1}\mu_{1} > log\text{ } \pi_{2} - \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} + x^\top\Sigma^{-1}\mu_{2}
\end{equation*}
\\
$\pi_{1} = \pi_{2}$
\begin{equation*}
- \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} + x^\top\Sigma^{-1}\mu_{1} > - \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} + x^\top\Sigma^{-1}\mu_{2}
\end{equation*}
\\
Rearranging terms
\begin{equation*}
\frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} - x^\top\Sigma^{-1}\mu_{2} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} + x^\top\Sigma^{-1}\mu_{1} > 0
\end{equation*}
\begin{equation*}
x^\top\Sigma^{-1}\mu_{1} - x^\top\Sigma^{-1}\mu_{2} + \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} > 0
\end{equation*}
\\
Matrix distibutive law: $A(B + C) = AB + AC$
\begin{equation*}
x^\top(\Sigma^{-1}\mu_{1} - \Sigma^{-1}\mu_{2}) + \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} > 0
\end{equation*}
\\
Matrix distibutive law:
\begin{equation*}
x^\top(\Sigma^{-1}(\mu_{1} - \mu_{2})) + \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} > 0
\end{equation*}
\\
Associative law: $A(BC) = (AB)C$
\begin{equation*}
x^\top\Sigma^{-1}(\mu_{1} - \mu_{2}) + \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} > 0
\end{equation*}
\\
Factoring out $\frac{1}{2}$:
\begin{equation*}
x^\top\Sigma^{-1}(\mu_{1} - \mu_{2}) + \frac{1}{2}(\mu_{2}^\top\Sigma^{-1}\mu_{2} - \mu_{1}^\top\Sigma^{-1}\mu_{1}) > 0
\end{equation*}
************************************************\\
\\
\\
\\
Limbo.\\
Linear discriminant analysis when p>1. We have K classes. Our observation consist of a single response variable, and some explanatory variables. We have p explanatory variables. When p = 1, we use (4.13). With each observation we know which class it belongs to, since our response is one of the classes. I.e. the response is qualitative or categorical. We want to estimate coefficients for the explanatory variables, so that when we obtain a observation of p variables, we can put it in the class for which $\delta_{k}$ is largest. \\
But when p > 1, we have a vector $x = (x_{1}, \dots , x_{p}$. Now this has $E(X} = \mu = (\mu_{1}, \dots, \mu_{p})$. The predicators x may be correlated, so the variance of $x$ is a covariance matrix $\Sigma$ with dimention $p \times p$. Now we have that $X ~ N(\mu, \Sigma$), meaning that $X$ has a multivariate normal distribution.
\\
The density of x, f(x) is given by:
\begin{equation*}
f(x) = \frac{1}{(2\pi)^{p/2}|\bm{\Sigma}|^{1/2}}\text{exp}\Big(-\frac{1}{2}(x-\mu)^{\top})\bm{\Sigma}^{-1}(x - \mu)\Big)
\end{equation*}
\\
\begin{equation*}
\delta(x) = x^{\top}\bm{\Sigma}^{-1} \mu_{k} - \frac{1}{2} \mu^{\top}_{k}\bm{\Sigma}^{-1}\mu_{k} +\text{ log}(\pi_{k})
\end{equation*}
\\
\\
$$\delta_{k}(x) = \delta_{k} ( \begin{bmatrix} x_{11} \\ x_{12} \\ x_{13} \end{bmatrix}) $$
\begin{equation*}
\begin{array}{l}
\delta_{1}(x) = \\
\begin{bmatrix} x_{11} & x_{12} & x_{13} \end{bmatrix}
\begin{bmatrix}
\text{Cov}(x_{11},x_{11}) & \text{Cov}(x_{11},x_{12}) & \text{Cov}(x_{11},x_{13}) \\
\text{Cov}(x_{12},x_{11}) & \text{Cov}(x_{12},x_{12}) & \text{Cov}(x_{12},x_{12}) \\
\text{Cov}(x_{13},x_{11}) & \text{Cov}(x_{13},x_{12}) & \text{Cov}(x_{13},x_{13}) \\
\end{bmatrix}^{-1}
\begin{bmatrix} E_{1}(x_{11}) \\ E_{1}(x_{12}) \\ E_{1}(x_{13}) \end{bmatrix} \\
- \frac{1}{2}(\begin{bmatrix}E_{1}(x_{11}) & E_{1}(x_{12}) & E_{1}(x_{13})\end{bmatrix}
\begin{bmatrix}
\text{Cov}(x_{11},x_{11}) & \text{Cov}(x_{11},x_{12}) & \text{Cov}(x_{11},x_{13}) \\
\text{Cov}(x_{12},x_{11}) & \text{Cov}(x_{12},x_{12}) & \text{Cov}(x_{12},x_{12}) \\
\text{Cov}(x_{13},x_{11}) & \text{Cov}(x_{13},x_{12}) & \text{Cov}(x_{13},x_{13}) \\
\end{bmatrix}^{-1}
\begin{bmatrix} E_{1}(x_{11}) \\ E_{1}(x_{12}) \\ E_{1}(x_{13}) \end{bmatrix} \\
+ \text{ log }\pi_{1}
\end{array}
\end{equation*}
\\
\begin{equation*}
\begin{array}{l}
\delta_{2}(x) = \\
\begin{bmatrix} x_{11} & x_{12} & x_{13} \end{bmatrix}
\begin{bmatrix}
\text{Cov}(x_{11},x_{11}) & \text{Cov}(x_{11},x_{12}) & \text{Cov}(x_{11},x_{13}) \\
\text{Cov}(x_{12},x_{11}) & \text{Cov}(x_{12},x_{12}) & \text{Cov}(x_{12},x_{12}) \\
\text{Cov}(x_{13},x_{11}) & \text{Cov}(x_{13},x_{12}) & \text{Cov}(x_{13},x_{13}) \\
\usepackage[utf8]{inputenc}
\usepackage{amsmath}
\usepackage{bm}
\usepackage{mathtools}
\begin{document}
\section*{5.09}
=============================================\\
In the case of linear discriminant analysis with K=2 and equal a priori probability for the two groups, show that $d_{1}(x) > d_{2}(x)$ takes the form
\begin{equation*}
(\mu_{1} - \mu_{2})^\top \Sigma^{-1}(x - \mu) > 0
\end{equation*}
where $\mu = \frac{1}{2}(\mu_{1} - \mu_{2})$.
\\
=============================================\\
\\
This is LDA. K=2 equal a priori
Formula for $d_{k}(x)$ s.155
\\
Extra: \\
\begin{enumerate}
\item
what is x
\item same x
\item
What is d\\
Discriminant function.
\item
What is sigma
\item
why is sigma inverse
\item
what is mu
\item
why take the difference
\item
what does it mean with equal a priori prob
\end{enumerate}
\newpage
*******************\\
$K$: number of classes. So since its 2, its binary. \\
$d_{1}$: the discriminant function\\
$d_{k}(x_{0}) = log\pi_{k} + log\text{ }p_{k}(x_0)$\\
$d_{k} = log\text{ } \pi_{k} - \frac{1}{2}\mu_{k}^\top\Sigma^{-1}\mu_{k} + x^\top\Sigma^{-1}\mu_{k}$\\
Priori probability: $\pi_{k}$. Said to be equal, so: $\pi_{1} = \pi_{2}$\\
\\
We then have:\\
$d_{1} = log\text{ } \pi_{1} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} + x^\top\Sigma^{-1}\mu_{1}$ and:\\
$d_{2} = log\text{ } \pi_{2} - \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} + x^\top\Sigma^{-1}\mu_{2}$\\
*******************\\
\begin{equation*}
d_{1}(x) > d_2{x}(x)
\end{equation*}
\\
Substitute for $d_{1}$ and $d_{2}$
\begin{equation*}
log\text{ } \pi_{1} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} + x^\top\Sigma^{-1}\mu_{1} > log\text{ } \pi_{2} - \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} + x^\top\Sigma^{-1}\mu_{2}
\end{equation*}
\\
$\pi_{1} = \pi_{2}$
\begin{equation*}
- \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} + x^\top\Sigma^{-1}\mu_{1} > - \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} + x^\top\Sigma^{-1}\mu_{2}
\end{equation*}
\\
Rearranging terms
\begin{equation*}
\frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} - x^\top\Sigma^{-1}\mu_{2} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} + x^\top\Sigma^{-1}\mu_{1} > 0
\end{equation*}
\begin{equation*}
x^\top\Sigma^{-1}\mu_{1} - x^\top\Sigma^{-1}\mu_{2} + \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} > 0
\end{equation*}
\\
Matrix distibutive law: $A(B + C) = AB + AC$
\begin{equation*}
x^\top(\Sigma^{-1}\mu_{1} - \Sigma^{-1}\mu_{2}) + \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} > 0
\end{equation*}
\\
Matrix distibutive law:
\begin{equation*}
x^\top(\Sigma^{-1}(\mu_{1} - \mu_{2})) + \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} > 0
\end{equation*}
\\
Associative law: $A(BC) = (AB)C$
\begin{equation*}
x^\top\Sigma^{-1}(\mu_{1} - \mu_{2}) + \frac{1}{2}\mu_{2}^\top\Sigma^{-1}\mu_{2} - \frac{1}{2}\mu_{1}^\top\Sigma^{-1}\mu_{1} > 0
\end{equation*}
\\
Factoring out $\frac{1}{2}$:
\begin{equation*}
x^\top\Sigma^{-1}(\mu_{1} - \mu_{2}) + \frac{1}{2}(\mu_{2}^\top\Sigma^{-1}\mu_{2} - \mu_{1}^\top\Sigma^{-1}\mu_{1}) > 0
\end{equation*}
************************************************\\
\\
\\
\\
Limbo.\\
Linear discriminant analysis when p>1. We have K classes. Our observation consist of a single response variable, and some explanatory variables. We have p explanatory variables. When p = 1, we use (4.13). With each observation we know which class it belongs to, since our response is one of the classes. I.e. the response is qualitative or categorical. We want to estimate coefficients for the explanatory variables, so that when we obtain a observation of p variables, we can put it in the class for which $\delta_{k}$ is largest. \\
But when p > 1, we have a vector $x = (x_{1}, \dots , x_{p}$. Now this has $E(X} = \mu = (\mu_{1}, \dots, \mu_{p})$. The predicators x may be correlated, so the variance of $x$ is a covariance matrix $\Sigma$ with dimention $p \times p$. Now we have that $X ~ N(\mu, \Sigma$), meaning that $X$ has a multivariate normal distribution.
\\
The density of x, f(x) is given by:
\begin{equation*}
f(x) = \frac{1}{(2\pi)^{p/2}|\bm{\Sigma}|^{1/2}}\text{exp}\Big(-\frac{1}{2}(x-\mu)^{\top})\bm{\Sigma}^{-1}(x - \mu)\Big)
\end{equation*}
\\
\begin{equation*}
\delta(x) = x^{\top}\bm{\Sigma}^{-1} \mu_{k} - \frac{1}{2} \mu^{\top}_{k}\bm{\Sigma}^{-1}\mu_{k} +\text{ log}(\pi_{k})
\end{equation*}
\\
\\
$$\delta_{k}(x) = \delta_{k} ( \begin{bmatrix} x_{11} \\ x_{12} \\ x_{13} \end{bmatrix}) $$
\begin{equation*}
\begin{array}{l}
\delta_{1}(x) = \\
\begin{bmatrix} x_{11} & x_{12} & x_{13} \end{bmatrix}
\begin{bmatrix}
\text{Cov}(x_{11},x_{11}) & \text{Cov}(x_{11},x_{12}) & \text{Cov}(x_{11},x_{13}) \\
\text{Cov}(x_{12},x_{11}) & \text{Cov}(x_{12},x_{12}) & \text{Cov}(x_{12},x_{12}) \\
\text{Cov}(x_{13},x_{11}) & \text{Cov}(x_{13},x_{12}) & \text{Cov}(x_{13},x_{13}) \\
\end{bmatrix}^{-1}
\begin{bmatrix} E_{1}(x_{11}) \\ E_{1}(x_{12}) \\ E_{1}(x_{13}) \end{bmatrix} \\
- \frac{1}{2}(\begin{bmatrix}E_{1}(x_{11}) & E_{1}(x_{12}) & E_{1}(x_{13})\end{bmatrix}
\begin{bmatrix}
\text{Cov}(x_{11},x_{11}) & \text{Cov}(x_{11},x_{12}) & \text{Cov}(x_{11},x_{13}) \\
\text{Cov}(x_{12},x_{11}) & \text{Cov}(x_{12},x_{12}) & \text{Cov}(x_{12},x_{12}) \\
\text{Cov}(x_{13},x_{11}) & \text{Cov}(x_{13},x_{12}) & \text{Cov}(x_{13},x_{13}) \\
\end{bmatrix}^{-1}
\begin{bmatrix} E_{1}(x_{11}) \\ E_{1}(x_{12}) \\ E_{1}(x_{13}) \end{bmatrix} \\
+ \text{ log }\pi_{1}
\end{array}
\end{equation*}
\\
\begin{equation*}
\begin{array}{l}
\delta_{2}(x) = \\
\begin{bmatrix} x_{11} & x_{12} & x_{13} \end{bmatrix}
\begin{bmatrix}
\text{Cov}(x_{11},x_{11}) & \text{Cov}(x_{11},x_{12}) & \text{Cov}(x_{11},x_{13}) \\
\text{Cov}(x_{12},x_{11}) & \text{Cov}(x_{12},x_{12}) & \text{Cov}(x_{12},x_{12}) \\
\text{Cov}(x_{13},x_{11}) & \text{Cov}(x_{13},x_{12}) & \text{Cov}(x_{13},x_{13}) \\
