HW3-3 Probability - Naguib

1904 days ago by bigdata2016

a naive bayesian model for binary classification using single feature

import matplotlib.pyplot as plt import numpy as np from sage.symbolic.integration.integral import definite_integral samples=500 def GaussPDF(x,mu,sigma): return (1/sqrt(2*pi*sigma**2))*exp(-(x-mu)**2/(2*sigma**2)) def Intersect2Gassians(mu1,sigma1,mu2,sigma2): gamma = 2*(sigma1**2)*(sigma2**2)*ln((1/sqrt(2*pi*sigma2**2))/(1/sqrt(2*pi*sigma1**2))) a=(sigma1**2)-(sigma2**2) b=2*((sigma2**2)*mu1-(sigma1**2)*mu2) c=(sigma1**2)*(mu2**2)-(sigma2**2)*(mu1**2)-gamma x1=(-b+sqrt((b**2)-4*a*c))/(2*a) x2=(-b-sqrt((b**2)-4*a*c))/(2*a) return [x1, x2] def errorFunction(x): t=1/(1+0.3275911*abs(1.0*x)) out=(1-exp(-x*x)*(0.254829592*t-0.284496736*t*t+1.421413741*t*t*t-1.453152027*t*t*t*t+1.061405429*t*t*t*t*t)) if x<=0: out=out*-1 return out def IntegrateGaussian(mu,sigma,from_,to_): I1=0.5*(1+errorFunction((from_-mu)/(sqrt(2.0)*sigma))) I2=0.5*(1+errorFunction((to_ -mu)/(sqrt(2.0)*sigma))) return I2-I1 A=matrix(RDF,samples,1) B=matrix(RDF,samples,1) TA = RealDistribution('gaussian', 2) TB = RealDistribution('gaussian', 1.4) pretty_print("Let's say we measured the feature of class A and class B", " ", samples, " times.") for i in range(samples): A[i]=TA.get_random_element()+5 B[i]=TB.get_random_element()-5 pA=plt.hist(A.transpose(), bins=50, range=(-15.,15.), color='r', alpha=0.3) pB=plt.hist(B.transpose(), bins=50, range=(-15.,15.), color='b', alpha=0.3) plt.legend(('A','B')) plt.title('histogram of samples') plt.savefig('Histogram.png') plt.close() 
       
minA=min(A) ranA=max(A)-min(A) minB=min(B) ranB=max(B)-min(B) hist_A=matrix(RDF,int(samples/10)+1,1) hist_B=matrix(RDF,int(samples/10)+1,1) for i in range(int(samples/10)+1): hist_A[i]=0 hist_B[i]=0 for i in range(samples): idxA=int((A[i]-minA)*int(samples/10)/ranA) idxB=int((B[i]-minB)*int(samples/10)/ranB) hist_A[idxA]=hist_A[idxA,0]+1 hist_B[idxB]=hist_B[idxB,0]+1 A_2D=matrix(RDF,samples,2) B_2D=matrix(RDF,samples,2) for i in range(samples): idxA=int((A[i]-minA)*int(samples/10)/ranA) idxB=int((B[i]-minB)*int(samples/10)/ranB) A_2D[i,0]=A[i,0] A_2D[i,1]=hist_A[idxA,0] B_2D[i,0]=B[i,0] B_2D[i,1]=hist_B[idxB,0] var('sigma mu alpha x') model(x) = alpha * (1/sqrt(2*pi*sigma**2))*exp(-(x-mu)**2/(2*sigma**2)) show("Let's model their likelihood distribution with Gaussian pdf") A_model = find_fit(A_2D, model, initial_guess=[1,0,1], solution_dict = True) pretty_print("Estimated Gaussian pdf of A:", " ", mu, " =", " ", A_model[mu], ",", " ", sigma, " =", " ", A_model[sigma]) B_model = find_fit(B_2D, model, initial_guess=[1,0,1], solution_dict = True) pretty_print("Estimated Gaussian pdf of B:", " ", mu, " =", " ", B_model[mu], ",", " ", sigma, " =", " ", B_model[sigma]) gaussx=np.arange(-15, 15, 0.1); gaussAy=A_model[alpha]*2 * GaussPDF(gaussx, A_model[mu],A_model[sigma]) gaussBy=B_model[alpha]*2 * GaussPDF(gaussx, B_model[mu],B_model[sigma]) pA=plt.hist(A.transpose(), bins=50, range=(-15.,15.), color='r', alpha=0.1) pB=plt.hist(B.transpose(), bins=50, range=(-15.,15.), color='b', alpha=0.1) plt.plot(gaussx,gaussAy, color='r') plt.plot(gaussx,gaussBy, color='b') plt.legend(('A','B')) plt.title('Likelihood distribution by Gaussian PDF fitting') plt.savefig('fitting.png') plt.close() 
       


pretty_print("Now, let's assume we have a feature measurement", " ", x, " and we want to know which class (A or B) it came from") function('P') #var('A B') show("From Bayes Theory, we can derive:") show(LatexExpr(r"P(A|x)=\frac{P(x|A)P(A)}{P(x)}")) show(LatexExpr(r"=\frac{P(x|A)P(A)}{P(x|A)P(A)+P(x|B)P(B)}")) show(LatexExpr(r"=\frac{1}{1+\frac{P(x|B)P(B)}{P(x|A)P(A)}}")) pretty_print("Let's assume we don't have prior information of which class is more likely to appear, i.e. they are equiprobable", " ", LatexExpr(r"P(A)=P(B)")) show(LatexExpr(r"P(A|x)=\frac{1}{1+\frac{P(x|B)}{P(x|A)}}")) pretty_print("and similarly:") show(LatexExpr(r"P(B|x)=\frac{1}{1+\frac{P(x|A)}{P(x|B)}}")) pretty_print("this will generate the following decision characteristics") classAy=1/(1+ GaussPDF(gaussx,B_model[mu],B_model[sigma])/GaussPDF(gaussx,A_model[mu],A_model[sigma])) classBy=1/(1+ GaussPDF(gaussx,A_model[mu],A_model[sigma])/GaussPDF(gaussx,B_model[mu],B_model[sigma])) pA=plt.hist(A.transpose(), bins=50, color='r', alpha=0.1, normed=1, range=(-15.,15.)) pB=plt.hist(B.transpose(), bins=50, color='b', alpha=0.1, normed=1, range=(-15.,15.)) plt.plot(gaussx,classAy, color='r') plt.plot(gaussx,classBy, color='b') plt.legend(('A','B')) plt.title('Classification characteristics') plt.savefig('classification.png') plt.close() 
       









pretty_print("we can compute the classification error as follows") pretty_print("Classification boundary (C)") show(LatexExpr(r"P(A|C)=P(B|C)")) show(LatexExpr(r"\frac{1}{1+\frac{P(C|A)}{P(C|B)}}=\frac{1}{1+\frac{P(C|B)}{P(C|A)}}")) show(LatexExpr(r"\frac{P(C|A)}{P(C|B)}=\frac{P(C|B)}{P(C|A)}")) show(LatexExpr(r"P(C|A)=P(C|A)")) show(LatexExpr(r"\frac{1}{\sqrt{2\pi\sigma_A^2}}e^{-\frac{(C-\mu_A)^2}{2\sigma_A^2}} = \frac{1}{\sqrt{2\pi\sigma_B^2}}e^{-\frac{(C-\mu_B)^2}{2\sigma_B^2}}")) C=Intersect2Gassians(A_model[mu],A_model[sigma],B_model[mu],B_model[sigma]) if abs(C[1])<abs(C[0]): C[0]=C[1] C=C[0] pretty_print("C =", " ", C) show(" ") ErrA = IntegrateGaussian(B_model[mu],B_model[sigma],C,99999) ErrB = IntegrateGaussian(A_model[mu],A_model[sigma],-99999,C) show(LatexExpr(r"P(A|B)=\int_{C}^{\infty} P(x|B) dx")) show(LatexExpr(r"P(A|B)=\int_{C}^{\infty} \frac{1}{\sqrt{2\pi\sigma_B^2}}e^{-\frac{(x-\mu_B)^2}{2\sigma_B^2}} dx")) pretty_print(LatexExpr(r"P(A|B)"), " =", " ", ErrA*100, "%") show(" ") pretty_print(LatexExpr(r"similarly, P(B|A)"), " =", " ", ErrB*100, "%") show(" ") 
       














pretty_print("we can notice that there's a small problem with classification of features with magnitude > 10.") pretty_print("it will be classified either as A or B depending on its side, in spite of not actually having any") pretty_print("samples in the training to support such decision. to fix this, we can add laplace smoothing, that is,") pretty_print("to assume that every possible outcome of an experiment happened at least once, before we actually start training") show(LatexExpr(r"P'(x|A)=P(x|A)+\epsilon")) show(LatexExpr(r"P'(x|B)=P(x|B)+\epsilon")) pretty_print("So, classification characteristics changes to be:") show(LatexExpr(r"P(A|x)=\frac{1}{1+\frac{P(x|B)+\epsilon}{P(x|A)+\epsilon}}")) show(LatexExpr(r"P(B|x)=\frac{1}{1+\frac{P(x|A)+\epsilon}{P(x|B)+\epsilon}}")) LclassAy=1/(1+ (0.001+GaussPDF(gaussx,B_model[mu],B_model[sigma]))/(0.001+GaussPDF(gaussx,A_model[mu],A_model[sigma]))) LclassBy=1/(1+ (0.001+GaussPDF(gaussx,A_model[mu],A_model[sigma]))/(0.001+GaussPDF(gaussx,B_model[mu],B_model[sigma]))) pA=plt.hist(A.transpose(), bins=50, color='r', alpha=0.1, normed=1, range=(-15.,15.)) pB=plt.hist(B.transpose(), bins=50, color='b', alpha=0.1, normed=1, range=(-15.,15.)) plt.plot(gaussx,LclassAy, color='r') plt.plot(gaussx,LclassBy, color='b') plt.legend(('A','B')) plt.title('Classification characteristics with e = 0.001') plt.savefig('laplace classification.png') plt.close()