You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
172 lines
5.5 KiB
172 lines
5.5 KiB
# features.py
|
|
# -----------
|
|
# Licensing Information: You are free to use or extend these projects for
|
|
# educational purposes provided that (1) you do not distribute or publish
|
|
# solutions, (2) you retain this notice, and (3) you provide clear
|
|
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
|
|
#
|
|
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
|
|
# The core projects and autograders were primarily created by John DeNero
|
|
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
|
|
# Student side autograding was added by Brad Miller, Nick Hay, and
|
|
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
|
|
|
|
|
|
import numpy as np
|
|
import util
|
|
import samples
|
|
|
|
DIGIT_DATUM_WIDTH=28
|
|
DIGIT_DATUM_HEIGHT=28
|
|
|
|
def basicFeatureExtractor(datum):
|
|
"""
|
|
Returns a binarized and flattened version of the image datum.
|
|
|
|
Args:
|
|
datum: 2-dimensional numpy.array representing a single image.
|
|
|
|
Returns:
|
|
A 1-dimensional numpy.array of features indicating whether each pixel
|
|
in the provided datum is white (0) or gray/black (1).
|
|
"""
|
|
features = np.zeros_like(datum, dtype=int)
|
|
features[datum > 0] = 1
|
|
return features.flatten()
|
|
|
|
def enhancedFeatureExtractor(datum):
|
|
"""
|
|
Returns a feature vector of the image datum.
|
|
|
|
Args:
|
|
datum: 2-dimensional numpy.array representing a single image.
|
|
|
|
Returns:
|
|
A 1-dimensional numpy.array of features designed by you. The features
|
|
can have any length.
|
|
|
|
## DESCRIBE YOUR ENHANCED FEATURES HERE...
|
|
add an extra feature that shows the number of continuous white regions in the graph.
|
|
##
|
|
"""
|
|
features = np.zeros_like(datum, dtype=int)
|
|
features[datum > 0] = 1
|
|
"""===== setup for enhanced extraction ===="""
|
|
m, n = datum.shape[0], datum.shape[1]
|
|
num_w_regions = 0
|
|
visited = set()
|
|
i, j = newStartPoint(visited, features, m, n)
|
|
|
|
from collections import deque
|
|
queue = deque()
|
|
while i >= 0 and j >= 0:
|
|
queue.append((i, j))
|
|
num_w_regions += 1
|
|
|
|
while queue:
|
|
x, y = queue.popleft()
|
|
if y > 0 and features[x][y - 1] == 0 and (x, y - 1) not in visited:
|
|
visited.add((x, y - 1))
|
|
queue.append((x, y - 1))
|
|
|
|
if y + 1 < n and features[x][y + 1] == 0 and (x, y + 1) not in visited:
|
|
visited.add((x, y + 1))
|
|
queue.append((x, y + 1))
|
|
|
|
if x + 1 < m and features[x + 1][y] == 0 and (x + 1, y) not in visited:
|
|
visited.add((x + 1, y))
|
|
queue.append((x + 1, y))
|
|
|
|
if x > 0 and features[x - 1][y] == 0 and (x - 1, y) not in visited:
|
|
visited.add((x - 1, y))
|
|
queue.append((x - 1, y))
|
|
|
|
i, j = newStartPoint(visited, features, m, n)
|
|
|
|
extra_features = np.array([0, 0, 0])
|
|
if num_w_regions == 1:
|
|
extra_features = np.array([1, 0, 0])
|
|
elif num_w_regions == 2:
|
|
extra_features = np.array([0, 1, 0])
|
|
elif num_w_regions > 2:
|
|
extra_features = np.array([0, 0, 1])
|
|
return np.concatenate((features.flatten(), extra_features), axis = 0)
|
|
|
|
def newStartPoint(visited, features, m, n):
|
|
for i in range(m):
|
|
for j in range(n):
|
|
if features[i][j] == 0 and (i, j) not in visited:
|
|
visited.add((i, j))
|
|
return i, j
|
|
return -1, -1
|
|
|
|
def analysis(model, trainData, trainLabels, trainPredictions, valData, valLabels, validationPredictions):
|
|
"""
|
|
This function is called after learning.
|
|
Include any code that you want here to help you analyze your results.
|
|
|
|
Use the print_digit(numpy array representing a training example) function
|
|
to the digit
|
|
|
|
An example of use has been given to you.
|
|
|
|
- model is the trained model
|
|
- trainData is a numpy array where each row is a training example
|
|
- trainLabel is a list of training labels
|
|
- trainPredictions is a list of training predictions
|
|
- valData is a numpy array where each row is a validation example
|
|
- valLabels is the list of validation labels
|
|
- valPredictions is a list of validation predictions
|
|
|
|
This code won't be evaluated. It is for your own optional use
|
|
(and you can modify the signature if you want).
|
|
"""
|
|
|
|
# Put any code here...
|
|
# Example of use:
|
|
# for i in range(len(trainPredictions)):
|
|
# prediction = trainPredictions[i]
|
|
# truth = trainLabels[i]
|
|
# if (prediction != truth):
|
|
# print "==================================="
|
|
# print "Mistake on example %d" % i
|
|
# print "Predicted %d; truth is %d" % (prediction, truth)
|
|
# print "Image: "
|
|
# print_digit(trainData[i,:])
|
|
|
|
|
|
## =====================
|
|
## You don't have to modify any code below.
|
|
## =====================
|
|
|
|
def print_features(features):
|
|
str = ''
|
|
width = DIGIT_DATUM_WIDTH
|
|
height = DIGIT_DATUM_HEIGHT
|
|
for i in range(width):
|
|
for j in range(height):
|
|
feature = i*height + j
|
|
if feature in features:
|
|
str += '#'
|
|
else:
|
|
str += ' '
|
|
str += '\n'
|
|
print(str)
|
|
|
|
def print_digit(pixels):
|
|
width = DIGIT_DATUM_WIDTH
|
|
height = DIGIT_DATUM_HEIGHT
|
|
pixels = pixels[:width*height]
|
|
image = pixels.reshape((width, height))
|
|
datum = samples.Datum(samples.convertToTrinary(image),width,height)
|
|
print(datum)
|
|
|
|
def _test():
|
|
import datasets
|
|
train_data = datasets.tinyMnistDataset()[0]
|
|
for i, datum in enumerate(train_data):
|
|
print_digit(datum)
|
|
|
|
if __name__ == "__main__":
|
|
_test()
|