AI_learning/cs188-projects-master/P6 Classification/features.py

# features.py
# -----------
# Licensing Information:  You are free to use or extend these projects for
# educational purposes provided that (1) you do not distribute or publish
# solutions, (2) you retain this notice, and (3) you provide clear
# attribution to UC Berkeley, including a link to http://ai.berkeley.edu.
#
# Attribution Information: The Pacman AI projects were developed at UC Berkeley.
# The core projects and autograders were primarily created by John DeNero
# (denero@cs.berkeley.edu) and Dan Klein (klein@cs.berkeley.edu).
# Student side autograding was added by Brad Miller, Nick Hay, and
# Pieter Abbeel (pabbeel@cs.berkeley.edu).


import numpy as np
import util
import samples

DIGIT_DATUM_WIDTH=28
DIGIT_DATUM_HEIGHT=28

def basicFeatureExtractor(datum):
    """
    Returns a binarized and flattened version of the image datum.

    Args:
        datum: 2-dimensional numpy.array representing a single image.

    Returns:
        A 1-dimensional numpy.array of features indicating whether each pixel
            in the provided datum is white (0) or gray/black (1).
    """
    features = np.zeros_like(datum, dtype=int)
    features[datum > 0] = 1
    return features.flatten()

def enhancedFeatureExtractor(datum):
    """
    Returns a feature vector of the image datum.

    Args:
        datum: 2-dimensional numpy.array representing a single image.

    Returns:
        A 1-dimensional numpy.array of features designed by you. The features
            can have any length.

    ## DESCRIBE YOUR ENHANCED FEATURES HERE...
        add an extra feature that shows the number of continuous white regions in the graph.
    ##
    """
    features = np.zeros_like(datum, dtype=int)
    features[datum > 0] = 1
    """===== setup for enhanced extraction ===="""
    m, n = datum.shape[0], datum.shape[1]
    num_w_regions = 0
    visited = set()
    i, j = newStartPoint(visited, features, m, n)

    from collections import deque
    queue = deque()
    while i >= 0 and j >= 0:
        queue.append((i, j))
        num_w_regions += 1

        while queue:
            x, y = queue.popleft()
            if y > 0 and features[x][y - 1] == 0 and (x, y - 1) not in visited:
                visited.add((x, y - 1))
                queue.append((x, y - 1))

            if y + 1 < n and features[x][y + 1] == 0 and (x, y + 1) not in visited:
                visited.add((x, y + 1))
                queue.append((x, y + 1))

            if x + 1 < m and features[x + 1][y] == 0 and (x + 1, y) not in visited:
                visited.add((x + 1, y))
                queue.append((x + 1, y))

            if x > 0 and features[x - 1][y] == 0 and (x - 1, y) not in visited:
                visited.add((x - 1, y))
                queue.append((x - 1, y))

        i, j = newStartPoint(visited, features, m, n)

    extra_features = np.array([0, 0, 0])
    if num_w_regions == 1:
        extra_features = np.array([1, 0, 0])
    elif num_w_regions == 2:
        extra_features = np.array([0, 1, 0])
    elif num_w_regions > 2:
        extra_features = np.array([0, 0, 1])
    return np.concatenate((features.flatten(), extra_features), axis = 0)

def newStartPoint(visited, features, m, n):
    for i in range(m):
        for j in range(n):
            if features[i][j] == 0 and (i, j) not in visited:
                visited.add((i, j))
                return i, j
    return -1, -1

def analysis(model, trainData, trainLabels, trainPredictions, valData, valLabels, validationPredictions):
    """
    This function is called after learning.
    Include any code that you want here to help you analyze your results.

    Use the print_digit(numpy array representing a training example) function
    to the digit

    An example of use has been given to you.

    - model is the trained model
    - trainData is a numpy array where each row is a training example
    - trainLabel is a list of training labels
    - trainPredictions is a list of training predictions
    - valData is a numpy array where each row is a validation example
    - valLabels is the list of validation labels
    - valPredictions is a list of validation predictions

    This code won't be evaluated. It is for your own optional use
    (and you can modify the signature if you want).
    """

    # Put any code here...
    # Example of use:
    # for i in range(len(trainPredictions)):
    #     prediction = trainPredictions[i]
    #     truth = trainLabels[i]
    #     if (prediction != truth):
    #         print "==================================="
    #         print "Mistake on example %d" % i
    #         print "Predicted %d; truth is %d" % (prediction, truth)
    #         print "Image: "
    #         print_digit(trainData[i,:])


## =====================
## You don't have to modify any code below.
## =====================

def print_features(features):
    str = ''
    width = DIGIT_DATUM_WIDTH
    height = DIGIT_DATUM_HEIGHT
    for i in range(width):
        for j in range(height):
            feature = i*height + j
            if feature in features:
                str += '#'
            else:
                str += ' '
        str += '\n'
    print(str)

def print_digit(pixels):
    width = DIGIT_DATUM_WIDTH
    height = DIGIT_DATUM_HEIGHT
    pixels = pixels[:width*height]
    image = pixels.reshape((width, height))
    datum = samples.Datum(samples.convertToTrinary(image),width,height)
    print(datum)

def _test():
    import datasets
    train_data = datasets.tinyMnistDataset()[0]
    for i, datum in enumerate(train_data):
        print_digit(datum)

if __name__ == "__main__":
    _test()