You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

271 lines
8.9 KiB

2 years ago
"""
Programmer : EOF
E-mail : jasonleaster@163.com
Date : 2015.11.22
File : weakClassifier.py
License : MIT License
"""
from matplotlib import pyplot
import numpy
from config import LABEL_POSITIVE
from config import LABEL_NEGATIVE
class WeakClassifier:
def __init__(self, Mat=None, Tag=None, W=None, train=True):
"""
Parameter:
@Mat : A matrix(or two dimension array) which's size is
(row = number of features,
column = number of total sample)
@Tag : A vector(or one dimension array) which's size is the
same as the number of total sample
@W : Weight of each sample in the training set.
A vector or a list, which's size is the same as the
number of total sample.
@train : A bool value. If it's False, it means that user want to
get a instance of this class object from cached data"""
if train == True:
"""
It's necessary to do this check.
The implementation depend on numpy.ndarray heavily
"""
assert Mat.__class__ == numpy.ndarray
assert Tag.__class__ == numpy.ndarray
assert W.__class__ == numpy.ndarray
"""
It will cost a lot of memory, if I use @Mat to initialize
the @self._mat like this:
self._mat = numpy.array(Mat)
constructor @numpy.array will return a new object which's
message is the same as @Mat
To save memory, I just set the data member @self._mat
the same as the parameter passed into this constructor,
which means that they point to the same address.
Make sure this weak classifier will not modify the inputed mat.
"""
self._mat = Mat
self._label = Tag
# sampleDim == the number of features
self.sampleDim, self.sampleNum = self._mat.shape
if W is None:
self.numPos = numpy.count_nonzero(self._label == LABEL_POSITIVE)
self.numNeg = numpy.count_nonzero(self._label == LABEL_NEGATIVE)
pos_W = [1.0 / (2 * self.numPos) for i in range(self.numPos)]
neg_W = [1.0 / (2 * self.numNeg) for i in range(self.numNeg)]
self.weight = numpy.array(pos_W + neg_W)
else:
self.weight = W
self.output = numpy.zeros(self.sampleNum, dtype=numpy.int)
self.opt_errorRate = 1.
self.opt_dimension = 0
self.opt_threshold = None
self.opt_direction = 0
def optimal(self, d):
# for positive sample
idx = (self._label + LABEL_POSITIVE) / (LABEL_POSITIVE * 2)
weight = self.weight * idx
vector = self._mat[d] * idx
sumPos = weight.dot(vector)
sumPosW = weight.sum()
# for negative sample
idx = (self._label + LABEL_NEGATIVE) / (LABEL_NEGATIVE * 2)
weight = self.weight * idx
vector = self._mat[d] * idx
sumNeg = weight.dot(vector)
sumNegW = weight.sum()
"""
Code beyong there is just optimal version of this one.
======================================================
sumPos = 0.
sumNeg = 0.
sumPosW = 0.
sumNegW = 0.
for i in range(self.sampleNum):
if self._label[i] == LABEL_POSITIVE:
sumPos += self.weight[i] * self._mat[d][i]
sumPosW += self.weight[i]
else:
sumNeg += self.weight[i] * self._mat[d][i]
sumNegW += self.weight[i]
"""
miuPos = sumPos / sumPosW
miuNeg = sumNeg / sumNegW
threshold = (miuPos + miuNeg) / 2
minErrRate = numpy.inf
bestDirection = None
for direction in [-1, 1]:
errorRate = 0.
self.output[self._mat[d] * direction < threshold * direction] \
= LABEL_POSITIVE
self.output[self._mat[d] * direction >= threshold * direction] \
= LABEL_NEGATIVE
errorRate = self.weight[self.output != self._label].sum()
"""
Code beyond there is just optimal version of this one.
======================================================
self.output *= 0 # reset the output
start = time.time()
for i in range(self.sampleNum):
if self._mat[d][i] *direction < threshold * direction:
self.output[i] = LABEL_POSITIVE
else:
self.output[i] = LABEL_NEGATIVE
if self.output[i] != self._label[i]:
errorRate += self.weight[i]
"""
self.output *= 0 # reset the output
if errorRate < minErrRate:
minErrRate = errorRate
bestDirection = direction
return minErrRate, threshold, bestDirection
def train(self):
for dim in range(self.sampleDim):
err, threshold, direction = self.optimal(dim)
if err < self.opt_errorRate:
self.opt_errorRate = err
self.opt_dimension = dim
self.opt_threshold = threshold
self.opt_direction = direction
assert self.opt_errorRate < 0.5
return self.opt_errorRate
def prediction(self, Mat):
sampleNum = Mat.shape[1]
dim = self.opt_dimension
threshold = self.opt_threshold
direction = self.opt_direction
output = numpy.zeros(sampleNum, dtype=numpy.int)
output[Mat[dim] * direction < direction * threshold] = LABEL_POSITIVE
output[Mat[dim] * direction >= direction * threshold] = LABEL_NEGATIVE
"""
Optimised for this.
========================================================
for i in range(sampleNum):
if direction * Mat[dim][i] < direction * threshold:
output[i] = LABEL_POSITIVE
else:
output[i] = LABEL_NEGATIVE
"""
return output
def show(self, dim=None):
if dim == None:
dim = self.opt_dimension
N = 10 # the number of center
MaxVal = numpy.max(self._mat[dim])
MinVal = numpy.min(self._mat[dim])
scope = (MaxVal - MinVal) / N
centers = [(MinVal - scope / 2) + scope * i for i in range(N)]
counter = [[0, 0] for i in range(N)]
for j in range(N):
for i in range(self.sampleNum):
if abs(self._mat[dim][i] - centers[j]) < scope / 2:
if self._label[i] == LABEL_POSITIVE:
counter[j][1] += 1
else:
counter[j][0] += 1
posVal, negVal = [], []
for i in range(N):
posVal.append(counter[i][1])
negVal.append(counter[i][0])
sumPosVal = sum(posVal)
sumNegVal = sum(negVal)
for i in range(len(posVal)): posVal[i] /= (1. * sumPosVal)
for i in range(len(negVal)): negVal[i] /= (1. * sumNegVal)
pyplot.title("A simple weak classifier")
pyplot.plot(centers, posVal, "r-o", label="Face class")
pyplot.plot(centers, negVal, "b-o", label="Non-Face class")
pyplot.xlabel("feature response")
pyplot.ylabel("frequency")
# plot threshold line
sumPosW = 0.
sumNegW = 0.
sumPos = 0.
sumNeg = 0.
for i in range(self.sampleNum):
if self._label[i] == LABEL_POSITIVE:
sumPos += self.weight[i] * self._mat[dim][i]
sumPosW += self.weight[i]
else:
sumNeg += self.weight[i] * self._mat[dim][i]
sumNegW += self.weight[i]
miuPos = sumPos / sumPosW
miuNeg = sumNeg / sumNegW
threshold = (miuPos + miuNeg) / 2
pyplot.plot([threshold for i in range(10)], [i for i in numpy.arange(0.0, 0.5, 0.05)], label="threshold")
pyplot.legend()
pyplot.show()
def __str__(self):
string = "opt_errorRate:" + str(self.opt_errorRate) + "\n"
string += "opt_threshold:" + str(self.opt_threshold) + "\n"
string += "opt_dimension:" + str(self.opt_dimension) + "\n"
string += "opt_direction:" + str(self.opt_direction) + "\n"
string += "weights :" + str(self.weight) + "\n"
return string
def constructor(self, dimension, direction, threshold):
self.opt_dimension = dimension
self.opt_threshold = threshold
self.opt_direction = direction
return self