You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
271 lines
8.9 KiB
271 lines
8.9 KiB
2 years ago
|
"""
|
||
|
Programmer : EOF
|
||
|
E-mail : jasonleaster@163.com
|
||
|
Date : 2015.11.22
|
||
|
File : weakClassifier.py
|
||
|
|
||
|
License : MIT License
|
||
|
|
||
|
"""
|
||
|
|
||
|
from matplotlib import pyplot
|
||
|
import numpy
|
||
|
|
||
|
from config import LABEL_POSITIVE
|
||
|
from config import LABEL_NEGATIVE
|
||
|
|
||
|
|
||
|
class WeakClassifier:
|
||
|
|
||
|
def __init__(self, Mat=None, Tag=None, W=None, train=True):
|
||
|
"""
|
||
|
Parameter:
|
||
|
@Mat : A matrix(or two dimension array) which's size is
|
||
|
(row = number of features,
|
||
|
column = number of total sample)
|
||
|
@Tag : A vector(or one dimension array) which's size is the
|
||
|
same as the number of total sample
|
||
|
|
||
|
@W : Weight of each sample in the training set.
|
||
|
A vector or a list, which's size is the same as the
|
||
|
number of total sample.
|
||
|
|
||
|
@train : A bool value. If it's False, it means that user want to
|
||
|
get a instance of this class object from cached data"""
|
||
|
|
||
|
if train == True:
|
||
|
"""
|
||
|
It's necessary to do this check.
|
||
|
The implementation depend on numpy.ndarray heavily
|
||
|
"""
|
||
|
assert Mat.__class__ == numpy.ndarray
|
||
|
assert Tag.__class__ == numpy.ndarray
|
||
|
assert W.__class__ == numpy.ndarray
|
||
|
|
||
|
"""
|
||
|
It will cost a lot of memory, if I use @Mat to initialize
|
||
|
the @self._mat like this:
|
||
|
self._mat = numpy.array(Mat)
|
||
|
|
||
|
constructor @numpy.array will return a new object which's
|
||
|
message is the same as @Mat
|
||
|
|
||
|
To save memory, I just set the data member @self._mat
|
||
|
the same as the parameter passed into this constructor,
|
||
|
which means that they point to the same address.
|
||
|
|
||
|
Make sure this weak classifier will not modify the inputed mat.
|
||
|
"""
|
||
|
self._mat = Mat
|
||
|
self._label = Tag
|
||
|
|
||
|
# sampleDim == the number of features
|
||
|
self.sampleDim, self.sampleNum = self._mat.shape
|
||
|
|
||
|
if W is None:
|
||
|
self.numPos = numpy.count_nonzero(self._label == LABEL_POSITIVE)
|
||
|
self.numNeg = numpy.count_nonzero(self._label == LABEL_NEGATIVE)
|
||
|
pos_W = [1.0 / (2 * self.numPos) for i in range(self.numPos)]
|
||
|
|
||
|
neg_W = [1.0 / (2 * self.numNeg) for i in range(self.numNeg)]
|
||
|
self.weight = numpy.array(pos_W + neg_W)
|
||
|
|
||
|
else:
|
||
|
self.weight = W
|
||
|
|
||
|
self.output = numpy.zeros(self.sampleNum, dtype=numpy.int)
|
||
|
|
||
|
self.opt_errorRate = 1.
|
||
|
self.opt_dimension = 0
|
||
|
self.opt_threshold = None
|
||
|
self.opt_direction = 0
|
||
|
|
||
|
def optimal(self, d):
|
||
|
|
||
|
# for positive sample
|
||
|
idx = (self._label + LABEL_POSITIVE) / (LABEL_POSITIVE * 2)
|
||
|
weight = self.weight * idx
|
||
|
vector = self._mat[d] * idx
|
||
|
sumPos = weight.dot(vector)
|
||
|
sumPosW = weight.sum()
|
||
|
|
||
|
# for negative sample
|
||
|
idx = (self._label + LABEL_NEGATIVE) / (LABEL_NEGATIVE * 2)
|
||
|
weight = self.weight * idx
|
||
|
vector = self._mat[d] * idx
|
||
|
sumNeg = weight.dot(vector)
|
||
|
sumNegW = weight.sum()
|
||
|
|
||
|
"""
|
||
|
Code beyong there is just optimal version of this one.
|
||
|
======================================================
|
||
|
sumPos = 0.
|
||
|
sumNeg = 0.
|
||
|
|
||
|
sumPosW = 0.
|
||
|
sumNegW = 0.
|
||
|
|
||
|
for i in range(self.sampleNum):
|
||
|
if self._label[i] == LABEL_POSITIVE:
|
||
|
sumPos += self.weight[i] * self._mat[d][i]
|
||
|
sumPosW += self.weight[i]
|
||
|
else:
|
||
|
sumNeg += self.weight[i] * self._mat[d][i]
|
||
|
sumNegW += self.weight[i]
|
||
|
"""
|
||
|
|
||
|
miuPos = sumPos / sumPosW
|
||
|
miuNeg = sumNeg / sumNegW
|
||
|
|
||
|
threshold = (miuPos + miuNeg) / 2
|
||
|
|
||
|
minErrRate = numpy.inf
|
||
|
bestDirection = None
|
||
|
for direction in [-1, 1]:
|
||
|
errorRate = 0.
|
||
|
|
||
|
self.output[self._mat[d] * direction < threshold * direction] \
|
||
|
= LABEL_POSITIVE
|
||
|
|
||
|
self.output[self._mat[d] * direction >= threshold * direction] \
|
||
|
= LABEL_NEGATIVE
|
||
|
|
||
|
errorRate = self.weight[self.output != self._label].sum()
|
||
|
|
||
|
"""
|
||
|
Code beyond there is just optimal version of this one.
|
||
|
======================================================
|
||
|
|
||
|
self.output *= 0 # reset the output
|
||
|
start = time.time()
|
||
|
for i in range(self.sampleNum):
|
||
|
if self._mat[d][i] *direction < threshold * direction:
|
||
|
self.output[i] = LABEL_POSITIVE
|
||
|
else:
|
||
|
self.output[i] = LABEL_NEGATIVE
|
||
|
|
||
|
if self.output[i] != self._label[i]:
|
||
|
errorRate += self.weight[i]
|
||
|
"""
|
||
|
|
||
|
self.output *= 0 # reset the output
|
||
|
if errorRate < minErrRate:
|
||
|
minErrRate = errorRate
|
||
|
bestDirection = direction
|
||
|
|
||
|
return minErrRate, threshold, bestDirection
|
||
|
|
||
|
def train(self):
|
||
|
|
||
|
for dim in range(self.sampleDim):
|
||
|
err, threshold, direction = self.optimal(dim)
|
||
|
if err < self.opt_errorRate:
|
||
|
self.opt_errorRate = err
|
||
|
self.opt_dimension = dim
|
||
|
self.opt_threshold = threshold
|
||
|
self.opt_direction = direction
|
||
|
|
||
|
assert self.opt_errorRate < 0.5
|
||
|
|
||
|
return self.opt_errorRate
|
||
|
|
||
|
def prediction(self, Mat):
|
||
|
sampleNum = Mat.shape[1]
|
||
|
|
||
|
dim = self.opt_dimension
|
||
|
threshold = self.opt_threshold
|
||
|
direction = self.opt_direction
|
||
|
|
||
|
output = numpy.zeros(sampleNum, dtype=numpy.int)
|
||
|
|
||
|
output[Mat[dim] * direction < direction * threshold] = LABEL_POSITIVE
|
||
|
output[Mat[dim] * direction >= direction * threshold] = LABEL_NEGATIVE
|
||
|
"""
|
||
|
Optimised for this.
|
||
|
========================================================
|
||
|
for i in range(sampleNum):
|
||
|
if direction * Mat[dim][i] < direction * threshold:
|
||
|
output[i] = LABEL_POSITIVE
|
||
|
else:
|
||
|
output[i] = LABEL_NEGATIVE
|
||
|
"""
|
||
|
|
||
|
return output
|
||
|
|
||
|
def show(self, dim=None):
|
||
|
|
||
|
if dim == None:
|
||
|
dim = self.opt_dimension
|
||
|
|
||
|
N = 10 # the number of center
|
||
|
MaxVal = numpy.max(self._mat[dim])
|
||
|
MinVal = numpy.min(self._mat[dim])
|
||
|
|
||
|
scope = (MaxVal - MinVal) / N
|
||
|
|
||
|
centers = [(MinVal - scope / 2) + scope * i for i in range(N)]
|
||
|
counter = [[0, 0] for i in range(N)]
|
||
|
|
||
|
for j in range(N):
|
||
|
for i in range(self.sampleNum):
|
||
|
if abs(self._mat[dim][i] - centers[j]) < scope / 2:
|
||
|
if self._label[i] == LABEL_POSITIVE:
|
||
|
counter[j][1] += 1
|
||
|
else:
|
||
|
counter[j][0] += 1
|
||
|
|
||
|
posVal, negVal = [], []
|
||
|
|
||
|
for i in range(N):
|
||
|
posVal.append(counter[i][1])
|
||
|
negVal.append(counter[i][0])
|
||
|
|
||
|
sumPosVal = sum(posVal)
|
||
|
sumNegVal = sum(negVal)
|
||
|
|
||
|
for i in range(len(posVal)): posVal[i] /= (1. * sumPosVal)
|
||
|
for i in range(len(negVal)): negVal[i] /= (1. * sumNegVal)
|
||
|
|
||
|
pyplot.title("A simple weak classifier")
|
||
|
pyplot.plot(centers, posVal, "r-o", label="Face class")
|
||
|
pyplot.plot(centers, negVal, "b-o", label="Non-Face class")
|
||
|
pyplot.xlabel("feature response")
|
||
|
pyplot.ylabel("frequency")
|
||
|
|
||
|
# plot threshold line
|
||
|
sumPosW = 0.
|
||
|
sumNegW = 0.
|
||
|
sumPos = 0.
|
||
|
sumNeg = 0.
|
||
|
for i in range(self.sampleNum):
|
||
|
if self._label[i] == LABEL_POSITIVE:
|
||
|
sumPos += self.weight[i] * self._mat[dim][i]
|
||
|
sumPosW += self.weight[i]
|
||
|
else:
|
||
|
sumNeg += self.weight[i] * self._mat[dim][i]
|
||
|
sumNegW += self.weight[i]
|
||
|
|
||
|
miuPos = sumPos / sumPosW
|
||
|
miuNeg = sumNeg / sumNegW
|
||
|
|
||
|
threshold = (miuPos + miuNeg) / 2
|
||
|
pyplot.plot([threshold for i in range(10)], [i for i in numpy.arange(0.0, 0.5, 0.05)], label="threshold")
|
||
|
pyplot.legend()
|
||
|
pyplot.show()
|
||
|
|
||
|
def __str__(self):
|
||
|
|
||
|
string = "opt_errorRate:" + str(self.opt_errorRate) + "\n"
|
||
|
string += "opt_threshold:" + str(self.opt_threshold) + "\n"
|
||
|
string += "opt_dimension:" + str(self.opt_dimension) + "\n"
|
||
|
string += "opt_direction:" + str(self.opt_direction) + "\n"
|
||
|
string += "weights :" + str(self.weight) + "\n"
|
||
|
return string
|
||
|
|
||
|
def constructor(self, dimension, direction, threshold):
|
||
|
self.opt_dimension = dimension
|
||
|
self.opt_threshold = threshold
|
||
|
self.opt_direction = direction
|
||
|
|
||
|
return self
|