You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

172 lines
5.5 KiB

# features.py
# -----------
# Licensing Information: You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).
import numpy as np
import util
import samples
DIGIT_DATUM_WIDTH=28
DIGIT_DATUM_HEIGHT=28
def basicFeatureExtractor(datum):
"""
Returns a binarized and flattened version of the image datum.
Args:
datum: 2-dimensional numpy.array representing a single image.
Returns:
A 1-dimensional numpy.array of features indicating whether each pixel
in the provided datum is white (0) or gray/black (1).
"""
features = np.zeros_like(datum, dtype=int)
features[datum > 0] = 1
return features.flatten()
def enhancedFeatureExtractor(datum):
"""
Returns a feature vector of the image datum.
Args:
datum: 2-dimensional numpy.array representing a single image.
Returns:
A 1-dimensional numpy.array of features designed by you. The features
can have any length.
## DESCRIBE YOUR ENHANCED FEATURES HERE...
add an extra feature that shows the number of continuous white regions in the graph.
##
"""
features = np.zeros_like(datum, dtype=int)
features[datum > 0] = 1
"""===== setup for enhanced extraction ===="""
m, n = datum.shape[0], datum.shape[1]
num_w_regions = 0
visited = set()
i, j = newStartPoint(visited, features, m, n)
from collections import deque
queue = deque()
while i >= 0 and j >= 0:
queue.append((i, j))
num_w_regions += 1
while queue:
x, y = queue.popleft()
if y > 0 and features[x][y - 1] == 0 and (x, y - 1) not in visited:
visited.add((x, y - 1))
queue.append((x, y - 1))
if y + 1 < n and features[x][y + 1] == 0 and (x, y + 1) not in visited:
visited.add((x, y + 1))
queue.append((x, y + 1))
if x + 1 < m and features[x + 1][y] == 0 and (x + 1, y) not in visited:
visited.add((x + 1, y))
queue.append((x + 1, y))
if x > 0 and features[x - 1][y] == 0 and (x - 1, y) not in visited:
visited.add((x - 1, y))
queue.append((x - 1, y))
i, j = newStartPoint(visited, features, m, n)
extra_features = np.array([0, 0, 0])
if num_w_regions == 1:
extra_features = np.array([1, 0, 0])
elif num_w_regions == 2:
extra_features = np.array([0, 1, 0])
elif num_w_regions > 2:
extra_features = np.array([0, 0, 1])
return np.concatenate((features.flatten(), extra_features), axis = 0)
def newStartPoint(visited, features, m, n):
for i in range(m):
for j in range(n):
if features[i][j] == 0 and (i, j) not in visited:
visited.add((i, j))
return i, j
return -1, -1
def analysis(model, trainData, trainLabels, trainPredictions, valData, valLabels, validationPredictions):
"""
This function is called after learning.
Include any code that you want here to help you analyze your results.
Use the print_digit(numpy array representing a training example) function
to the digit
An example of use has been given to you.
- model is the trained model
- trainData is a numpy array where each row is a training example
- trainLabel is a list of training labels
- trainPredictions is a list of training predictions
- valData is a numpy array where each row is a validation example
- valLabels is the list of validation labels
- valPredictions is a list of validation predictions
This code won't be evaluated. It is for your own optional use
(and you can modify the signature if you want).
"""
# Put any code here...
# Example of use:
# for i in range(len(trainPredictions)):
# prediction = trainPredictions[i]
# truth = trainLabels[i]
# if (prediction != truth):
# print "==================================="
# print "Mistake on example %d" % i
# print "Predicted %d; truth is %d" % (prediction, truth)
# print "Image: "
# print_digit(trainData[i,:])
## =====================
## You don't have to modify any code below.
## =====================
def print_features(features):
str = ''
width = DIGIT_DATUM_WIDTH
height = DIGIT_DATUM_HEIGHT
for i in range(width):
for j in range(height):
feature = i*height + j
if feature in features:
str += '#'
else:
str += ' '
str += '\n'
print(str)
def print_digit(pixels):
width = DIGIT_DATUM_WIDTH
height = DIGIT_DATUM_HEIGHT
pixels = pixels[:width*height]
image = pixels.reshape((width, height))
datum = samples.Datum(samples.convertToTrinary(image),width,height)
print(datum)
def _test():
import datasets
train_data = datasets.tinyMnistDataset()[0]
for i, datum in enumerate(train_data):
print_digit(datum)
if __name__ == "__main__":
_test()