from django.contrib import admin
# Register your models here.
from django.apps import AppConfig
class DjangowebConfig(AppConfig):
default_auto_field = 'django.db.models.BigAutoField'
name = 'DjangoWeb'
from django.db import models
# Create your models here.
from django.test import TestCase
# Create your tests here.
from django.shortcuts import render
from django.http import HttpResponse
from django.shortcuts import render
def index(request):
return render(request,'index.html')
# Create your views here.
ASGI config for DjongoTest project.
It exposes the ASGI callable as a module-level variable named ``application``.
For more information on this file, see
import os
from django.core.asgi import get_asgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'DjongoTest.settings')
application = get_asgi_application()
Django settings for DjongoTest project.
Generated by 'django-admin startproject' using Django 3.2.9.
For more information on this file, see
For the full list of settings and their values, see
from pathlib import Path
# Build paths inside the project like this: BASE_DIR / 'subdir'.
BASE_DIR = Path(__file__).resolve().parent.parent
# Quick-start development settings - unsuitable for production
# See
# SECURITY WARNING: keep the secret key used in production secret!
SECRET_KEY = 'django-insecure-#$hv8r*tx!d9*l=!genwf9i%54k_j9qvgbp2vs7tkm!v(z!kqn'
# SECURITY WARNING: don't run with debug turned on in production!
DEBUG = True
# Application definition
ROOT_URLCONF = 'DjongoTest.urls'
'BACKEND': 'django.template.backends.django.DjangoTemplates',
'DIRS': [Path(BASE_DIR, 'templates')]
'APP_DIRS': True,
'context_processors': [
'DjangoWeb', # App的名字
WSGI_APPLICATION = 'DjongoTest.wsgi.application'
# Database
'default': {
'ENGINE': 'django.db.backends.sqlite3',
'NAME': BASE_DIR / 'db.sqlite3',
# Password validation
'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
# Internationalization
USE_I18N = True
USE_L10N = True
USE_TZ = True
# Static files (CSS, JavaScript, Images)
STATIC_URL = '/static/'
# Default primary key field type
DEFAULT_AUTO_FIELD = 'django.db.models.BigAutoField'
"""DjongoTest URL Configuration
The `urlpatterns` list routes URLs to views. For more information please see:
Function views
1. Add an import: from my_app import views
2. Add a URL to urlpatterns: path('', views.home, name='home')
Class-based views
1. Add an import: from other_app.views import Home
2. Add a URL to urlpatterns: path('', Home.as_view(), name='home')
Including another URLconf
1. Import the include() function: from django.urls import include, path
2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
from django.contrib import admin
from django.urls import path
urlpatterns = [
from django.contrib import admin
from django.urls import path
from DjangoWeb import views
urlpatterns = [
path('index/', views.index)
WSGI config for DjongoTest project.
It exposes the WSGI callable as a module-level variable named ``application``.
For more information on this file, see
import os
from django.core.wsgi import get_wsgi_application
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'DjongoTest.settings')
application = get_wsgi_application()
#!/usr/bin/env python
"""Django's command-line utility for administrative tasks."""
import os
import sys
def main():
"""Run administrative tasks."""
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'DjongoTest.settings')
from import execute_from_command_line
except ImportError as exc:
raise ImportError(
"Couldn't import Django. Are you sure it's installed and "
"available on your PYTHONPATH environment variable? Did you "
"forget to activate a virtual environment?"
) from exc
if __name__ == '__main__':
# encoding: UTF-8
# Copyright 2017
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import glob
import sys
# size of the alphabet that we work with
# Specification of the supported alphabet (subset of ASCII-7)
# 10 line feed LF
# 32-64 numbers and punctuation
# 65-90 upper-case letters
# 91-97 more punctuation
# 97-122 lower-case letters
# 123-126 more punctuation
def convert_from_alphabet(a):
"""Encode a character
:param a: one character
:return: the encoded value
if a == 9:
return 1
if a == 10:
return 127 - 30 # LF
elif 32 <= a <= 126:
return a - 30
return 0 # unknown
# encoded values:
# unknown = 0
# tab = 1
# space = 2
# all chars from 32 to 126 = c-30
# LF mapped to 127-30
def convert_to_alphabet(c, avoid_tab_and_lf=False):
"""Decode a code point
:param c: code point
:param avoid_tab_and_lf: if True, tab and line feed characters are replaced by '\'
:return: decoded character
if c == 1:
return 32 if avoid_tab_and_lf else 9 # space instead of TAB
if c == 127 - 30:
return 92 if avoid_tab_and_lf else 10 # \ instead of LF
if 32 <= c + 30 <= 126:
return c + 30
return 0 # unknown
def encode_text(s):
"""Encode a string.
:param s: a text string
:return: encoded list of code points
return list(map(lambda a: convert_from_alphabet(ord(a)), s))
def decode_to_text(c, avoid_tab_and_lf=False):
"""Decode an encoded string.
:param c: encoded list of code points
:param avoid_tab_and_lf: if True, tab and line feed characters are replaced by '\'
return "".join(map(lambda a: chr(convert_to_alphabet(a, avoid_tab_and_lf)), c))
def sample_from_probabilities(probabilities, topn=ALPHASIZE):
"""Roll the dice to produce a random integer in the [0..ALPHASIZE] range,
according to the provided probabilities. If topn is specified, only the
topn highest probabilities are taken into account.
:param probabilities: a list of size ALPHASIZE with individual probabilities
:param topn: the number of highest probabilities to consider. Defaults to all of them.
:return: a random integer
p = np.squeeze(probabilities)
p[np.argsort(p)[:-topn]] = 0
p = p / np.sum(p)
return np.random.choice(ALPHASIZE, 1, p=p)[0]
def rnn_minibatch_sequencer(raw_data, batch_size, sequence_size, nb_epochs):
Divides the data into batches of sequences so that all the sequences in one batch
continue in the next batch. This is a generator that will keep returning batches
until the input data has been seen nb_epochs times. Sequences are continued even
between epochs, apart from one, the one corresponding to the end of raw_data.
The remainder at the end of raw_data that does not fit in an full batch is ignored.
:param raw_data: the training text
:param batch_size: the size of a training minibatch
:param sequence_size: the unroll size of the RNN
:param nb_epochs: number of epochs to train on
x: one batch of training sequences
y: on batch of target sequences, i.e. training sequences shifted by 1
epoch: the current epoch number (starting at 0)
data = np.array(raw_data)
data_len = data.shape[0]
# using (data_len-1) because we must provide for the sequence shifted by 1 too
nb_batches = (data_len - 1) // (batch_size * sequence_size)
assert nb_batches > 0, "Not enough data, even for a single batch. Try using a smaller batch_size."
rounded_data_len = nb_batches * batch_size * sequence_size
xdata = np.reshape(data[0:rounded_data_len], [batch_size, nb_batches * sequence_size])
ydata = np.reshape(data[1:rounded_data_len + 1], [batch_size, nb_batches * sequence_size])
for epoch in range(nb_epochs):
for batch in range(nb_batches):
x = xdata[:, batch * sequence_size:(batch + 1) * sequence_size]
y = ydata[:, batch * sequence_size:(batch + 1) * sequence_size]
x = np.roll(x, -epoch, axis=0) # to continue the text from epoch to epoch (do not reset rnn state!)
y = np.roll(y, -epoch, axis=0)
yield x, y, epoch
def find_book(index, bookranges):
return next(
book["name"] for book in bookranges if (book["start"] <= index < book["end"]))
def find_book_index(index, bookranges):
return next(
i for i, book in enumerate(bookranges) if (book["start"] <= index < book["end"]))
def print_learning_learned_comparison(X, Y, losses, bookranges, batch_loss, batch_accuracy, epoch_size, index, epoch):
"""Display utility for printing learning statistics"""
# epoch_size in number of batches
batch_size = X.shape[0] # batch_size in number of sequences
sequence_len = X.shape[1] # sequence_len in number of characters
start_index_in_epoch = index % (epoch_size * batch_size * sequence_len)
for k in range(batch_size):
index_in_epoch = index % (epoch_size * batch_size * sequence_len)
decx = decode_to_text(X[k], avoid_tab_and_lf=True)
decy = decode_to_text(Y[k], avoid_tab_and_lf=True)
bookname = find_book(index_in_epoch, bookranges)
formatted_bookname = "{: <10.40}".format(bookname) # min 10 and max 40 chars
epoch_string = "{:4d}".format(index) + " (epoch {}) ".format(epoch)
loss_string = "loss: {:.5f}".format(losses[k])
print_string = epoch_string + formatted_bookname + " │ {} │ {} │ {}"
print(print_string.format(decx, decy, loss_string))
index += sequence_len
# box formatting characters:
# │ \u2502
# ─ \u2500
# └ \u2514
# ┘ \u2518
# ┴ \u2534
# ┌ \u250C
# ┐ \u2510
format_string = "└{:─^" + str(len(epoch_string)) + "}"
format_string += "{:─^" + str(len(formatted_bookname)) + "}"
format_string += "┴{:─^" + str(len(decx) + 2) + "}"
format_string += "┴{:─^" + str(len(decy) + 2) + "}"
format_string += "┴{:─^" + str(len(loss_string)) + "}┘"
footer = format_string.format('INDEX', 'BOOK NAME', 'TRAINING SEQUENCE', 'PREDICTED SEQUENCE', 'LOSS')
# print statistics
batch_index = start_index_in_epoch // (batch_size * sequence_len)
batch_string = "batch {}/{} in epoch {},".format(batch_index, epoch_size, epoch)
stats = "{: <28} batch loss: {:.5f}, batch accuracy: {:.5f}".format(batch_string, batch_loss, batch_accuracy)
print("TRAINING STATS: {}".format(stats))
class Progress:
"""Text mode progress bar.
p = Progress(30)
p.step(start=True) # to restart form 0%
The progress bar displays a new header at each restart."""
def __init__(self, maxi, size=100, msg=""):
:param maxi: the number of steps required to reach 100%
:param size: the number of characters taken on the screen by the progress bar
:param msg: the message displayed in the header of the progress bat
self.maxi = maxi
self.p = self.__start_progress(maxi)() # () to get the iterator from the generator
self.header_printed = False
self.msg = msg
self.size = size
def step(self, reset=False):
if reset:
self.__init__(self.maxi, self.size, self.msg)
if not self.header_printed:
def __print_header(self):
format_string = "0%{: ^" + str(self.size - 6) + "}100%"
self.header_printed = True
def __start_progress(self, maxi):
def print_progress():
# Bresenham's algorithm. Yields the number of dots printed.
# This will always print 100 dots in max invocations.
dx = maxi
dy = self.size
d = dy - dx
for x in range(maxi):
k = 0
while d >= 0:
print('=', end="", flush=True)
k += 1
d -= dx
d += dy
yield k
return print_progress
def read_data_files(directory, validation=True):
"""Read data files according to the specified glob pattern
Optionnaly set aside the last file as validation data.
No validation data is returned if there are 5 files or less.
:param directory: for example "data/*.txt"
:param validation: if True (default), sets the last file aside as validation data
:return: training data, validation data, list of loaded file names with ranges
If validation is
codetext = []
bookranges = []
shakelist = glob.glob(directory, recursive=True)
for shakefile in shakelist:
shaketext = open(shakefile, "r")
print("Loading file " + shakefile)
start = len(codetext)
end = len(codetext)
bookranges.append({"start": start, "end": end, "name": shakefile.rsplit("/", 1)[-1]})
if len(bookranges) == 0:
sys.exit("No training data has been found. Aborting.")
# For validation, use roughly 90K of text,
# but no more than 10% of the entire text
# and no more than 1 book in 5 => no validation at all for 5 files or fewer.
# 10% of the text is how many files ?
total_len = len(codetext)
validation_len = 0
nb_books1 = 0
for book in reversed(bookranges):
validation_len += book["end"]-book["start"]
nb_books1 += 1
if validation_len > total_len // 10:
# 90K of text is how many books ?
validation_len = 0
nb_books2 = 0
for book in reversed(bookranges):
validation_len += book["end"]-book["start"]
nb_books2 += 1
if validation_len > 90*1024:
# 20% of the books is how many books ?
nb_books3 = len(bookranges) // 5
# pick the smallest
nb_books = min(nb_books1, nb_books2, nb_books3)
if nb_books == 0 or not validation:
cutoff = len(codetext)
cutoff = bookranges[-nb_books]["start"]
valitext = codetext[cutoff:]
codetext = codetext[:cutoff]
return codetext, valitext, bookranges
def print_data_stats(datalen, valilen, epoch_size):
datalen_mb = datalen/1024.0/1024.0
valilen_kb = valilen/1024.0
print("Training text size is {:.2f}MB with {:.2f}KB set aside for validation.".format(datalen_mb, valilen_kb)
+ " There will be {} batches per epoch".format(epoch_size))
def print_validation_header(validation_start, bookranges):
bookindex = find_book_index(validation_start, bookranges)
books = ''
for i in range(bookindex, len(bookranges)):
books += bookranges[i]["name"]
if i < len(bookranges)-1:
books += ", "
print("{: <60}".format("Validating on " + books), flush=True)
def print_validation_stats(loss, accuracy):
print("VALIDATION STATS: loss: {:.5f}, accuracy: {:.5f}".format(loss,
def print_text_generation_header():
print("┌{:─^111}┐".format('Generating random text from learned state'))
def print_text_generation_footer():
print("└{:─^111}┘".format('End of generation'))
def frequency_limiter(n, multiple=1, modulo=0):
def limit(i):
return i % (multiple * n) == modulo*multiple
return limit
# encoding: UTF-8
# Copyright 2017
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import tensorflow.compat.v1 as tf
from code import my_txtutils
# these must match what was saved !
# Data files can be downloaded from the following locations:
# - Fully trained on Shakespeare or Tensorflow Python source:
# - Partially trained, to see how they make progress in training:
shakespeareC0 = "checkpoints/rnn_train_1495455686-0" # random
shakespeareC1 = "checkpoints/rnn_train_1495455686-150000" # lower case gibberish
shakespeareC2 = "checkpoints/rnn_train_1495455686-300000" # words, paragraphs
shakespeareC3 = "checkpoints/rnn_train_1495455686-450000" # structure of a play, unintelligible words
shakespeareC4 = "checkpoints/rnn_train_1495447371-15000000" # better structure of a play, character names (not very good), 4-letter words in correct English
shakespeareC5 = "checkpoints/rnn_train_1495447371-45000000" # good names, even when invented (ex: SIR NATHANIS LORD OF SYRACUSE), correct 6-8 letter words
shakespeareB10 = "checkpoints/rnn_train_1495440473-102000000" # ACT V SCENE IV, [Re-enter KING JOHN with MARDIAN], DON ADRIANO DRAGHAMONE <- invented!
# most scene directions correct: [Enter FERDINAND] [Dies] [Exit ROSALIND] [To COMINIUS with me] [Enter PRINCE HENRY, and Attendants], correct English.
pythonA0 = "checkpoints/rnn_train_1495458538-300000" # gibberish
pythonA1 = "checkpoints/rnn_train_1495458538-1200000" # some function calls with parameters and ()
pythonA2 = "checkpoints/rnn_train_1495458538-10200000" # starts looking Tensorflow Python, nested () and [] not perfect yet
pythonB10 = "checkpoints/rnn_train_1495458538-201600000" # can even recite the Apache license
# use topn=10 for all but the last one which works with topn=2 for Shakespeare and topn=3 for Python
author = shakespeareB10
ncnt = 0
with tf.Session() as sess:
new_saver = tf.train.import_meta_graph('checkpoints/rnn_train_1495455686-0.meta')
new_saver.restore(sess, author)
x = my_txtutils.convert_from_alphabet(ord("L"))
x = np.array([[x]]) # shape [BATCHSIZE, SEQLEN] with BATCHSIZE=1 and SEQLEN=1
# initial values
y = x
h = np.zeros([1, INTERNALSIZE * NLAYERS], dtype=np.float32) # [ BATCHSIZE, INTERNALSIZE * NLAYERS]
for i in range(1000000000):
yo, h =['Yo:0', 'H:0'], feed_dict={'X:0': y, 'pkeep:0': 1., 'Hin:0': h, 'batchsize:0': 1})
# If sampling is be done from the topn most likely characters, the generated text
# is more credible and more "english". If topn is not set, it defaults to the full
# distribution (ALPHASIZE)
# Recommended: topn = 10 for intermediate checkpoints, topn=2 or 3 for fully trained checkpoints
c = my_txtutils.sample_from_probabilities(yo, topn=2)
y = np.array([[c]]) # shape [BATCHSIZE, SEQLEN] with BATCHSIZE=1 and SEQLEN=1
c = chr(my_txtutils.convert_to_alphabet(c))
print(c, end="")
if c == '\n':
ncnt = 0
ncnt += 1
if ncnt == 100:
ncnt = 0
# SCENE III An ante-chamber. The COUNT's palace.
# [Enter CLEOMENES, with the Lord SAY]
# Chamberlain Let me see your worshing in my hands.
# LUCETTA I am a sign of me, and sorrow sounds it.
# What manner of mine is mad, and soon arise?
# JULIA What shall by these things were a secret fool,
# That still shall see me with the best and force?
# Second Watchman Ay, but we see them not at home: the strong and fair of thee,
# The seasons are as safe as the time will be a soul,
# That works out of this fearful sore of feather
# To tell her with a storm of something storms
# That have some men of man is now the subject.
# What says the story, well say we have said to thee,
# That shall she not, though that the way of hearts,
# We have seen his service that we may be sad.
# [Retains his house]
# ADRIANA What says my lord the Duke of Burgons of Tyre?
# DOMITIUS ENOBARBUS But, sir, you shall have such a sweet air from the state,
# There is not so much as you see the store,
# As if the base should be so foul as you.
# DOMITIUS ENOY If I do now, if you were not to seek to say,
# That you may be a soldier's father for the field.
# [Exit]
# encoding: UTF-8
# Copyright 2017
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import os
import time
import numpy as np
import tensorflow.compat.v1 as tf
from tensorflow.compat.v1.contrib import layers
from tensorflow.compat.v1.contrib import rnn # rnn stuff temporarily in contrib, moving back to code in TF 1.1
from code import my_txtutils as txt
# model parameters
# Usage:
# Training only:
# Leave all the parameters as they are
# Disable validation to run a bit faster (set validation=False below)
# You can follow progress in Tensorboard: tensorboard --log-dir=log
# Training and experimentation (default):
# Keep validation enabled
# You can now play with the parameters anf follow the effects in Tensorboard
# A good choice of parameters ensures that the testing and validation curves stay close
# To see the curves drift apart ("overfitting") try to use an insufficient amount of
# training data (shakedir = "shakespeare/t*.txt" for example)
learning_rate = 0.001 # fixed learning rate
dropout_pkeep = 0.8 # some dropout
# load data, either shakespeare, or the Python source of Tensorflow itself
shakedir = "shakespeare/*.txt"
#shakedir = "../tensorflow/**/*.py"
codetext, valitext, bookranges = txt.read_data_files(shakedir, validation=True)
# display some stats on the data
epoch_size = len(codetext) // (BATCHSIZE * SEQLEN)
txt.print_data_stats(len(codetext), len(valitext), epoch_size)
# the model (see FAQ in
lr = tf.placeholder(tf.float32, name='lr') # learning rate
pkeep = tf.placeholder(tf.float32, name='pkeep') # dropout parameter
batchsize = tf.placeholder(tf.int32, name='batchsize')
# inputs
X = tf.placeholder(tf.uint8, [None, None], name='X') # [ BATCHSIZE, SEQLEN ]
Xo = tf.one_hot(X, ALPHASIZE, 1.0, 0.0) # [ BATCHSIZE, SEQLEN, ALPHASIZE ]
# expected outputs = same sequence shifted by 1 since we are trying to predict the next character
Y_ = tf.placeholder(tf.uint8, [None, None], name='Y_') # [ BATCHSIZE, SEQLEN ]
Yo_ = tf.one_hot(Y_, ALPHASIZE, 1.0, 0.0) # [ BATCHSIZE, SEQLEN, ALPHASIZE ]
# input state
Hin = tf.placeholder(tf.float32, [None, INTERNALSIZE*NLAYERS], name='Hin') # [ BATCHSIZE, INTERNALSIZE * NLAYERS]
# using a NLAYERS=3 layers of GRU cells, unrolled SEQLEN=30 times
# dynamic_rnn infers SEQLEN from the size of the inputs Xo
# How to properly apply dropout in RNNs: see
cells = [rnn.GRUCell(INTERNALSIZE) for _ in range(NLAYERS)]
# "naive dropout" implementation
dropcells = [rnn.DropoutWrapper(cell,input_keep_prob=pkeep) for cell in cells]
multicell = rnn.MultiRNNCell(dropcells, state_is_tuple=False)
multicell = rnn.DropoutWrapper(multicell, output_keep_prob=pkeep) # dropout for the softmax layer
Yr, H = tf.nn.dynamic_rnn(multicell, Xo, dtype=tf.float32, initial_state=Hin)
# H: [ BATCHSIZE, INTERNALSIZE*NLAYERS ] # this is the last state in the sequence
H = tf.identity(H, name='H') # just to give it a name
# Softmax layer implementation:
# Flatten the first two dimension of the output [ BATCHSIZE, SEQLEN, ALPHASIZE ] => [ BATCHSIZE x SEQLEN, ALPHASIZE ]
# then apply softmax readout layer. This way, the weights and biases are shared across unrolled time steps.
# From the readout point of view, a value coming from a sequence time step or a minibatch item is the same thing.
Ylogits = layers.linear(Yflat, ALPHASIZE) # [ BATCHSIZE x SEQLEN, ALPHASIZE ]
Yflat_ = tf.reshape(Yo_, [-1, ALPHASIZE]) # [ BATCHSIZE x SEQLEN, ALPHASIZE ]
loss = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Yflat_) # [ BATCHSIZE x SEQLEN ]
loss = tf.reshape(loss, [batchsize, -1]) # [ BATCHSIZE, SEQLEN ]
Yo = tf.nn.softmax(Ylogits, name='Yo') # [ BATCHSIZE x SEQLEN, ALPHASIZE ]
Y = tf.argmax(Yo, 1) # [ BATCHSIZE x SEQLEN ]
Y = tf.reshape(Y, [batchsize, -1], name="Y") # [ BATCHSIZE, SEQLEN ]
train_step = tf.train.AdamOptimizer(lr).minimize(loss)
# stats for display
seqloss = tf.reduce_mean(loss, 1)
batchloss = tf.reduce_mean(seqloss)
accuracy = tf.reduce_mean(tf.cast(tf.equal(Y_, tf.cast(Y, tf.uint8)), tf.float32))
loss_summary = tf.summary.scalar("batch_loss", batchloss)
acc_summary = tf.summary.scalar("batch_accuracy", accuracy)
summaries = tf.summary.merge([loss_summary, acc_summary])
# Init Tensorboard stuff. This will save Tensorboard information into a different
# folder at each run named 'log/<timestamp>/'. Two sets of data are saved so that
# you can compare training and validation curves visually in Tensorboard.
timestamp = str(math.trunc(time.time()))
summary_writer = tf.summary.FileWriter("log/" + timestamp + "-training")
validation_writer = tf.summary.FileWriter("log/" + timestamp + "-validation")
# Init for saving models. They will be saved into a directory named 'checkpoints'.
# Only the last checkpoint is kept.
if not os.path.exists("checkpoints"):
saver = tf.train.Saver(max_to_keep=1000)
# for display: init the progress bar
progress = txt.Progress(DISPLAY_FREQ, size=111+2, msg="Training on next "+str(DISPLAY_FREQ)+" batches")
# init
istate = np.zeros([BATCHSIZE, INTERNALSIZE*NLAYERS]) # initial zero input state
init = tf.global_variables_initializer()
sess = tf.Session()
step = 0
# training loop
for x, y_, epoch in txt.rnn_minibatch_sequencer(codetext, BATCHSIZE, SEQLEN, nb_epochs=10):
# train on one minibatch
feed_dict = {X: x, Y_: y_, Hin: istate, lr: learning_rate, pkeep: dropout_pkeep, batchsize: BATCHSIZE}
_, y, ostate =[train_step, Y, H], feed_dict=feed_dict)
# log training data for Tensorboard display a mini-batch of sequences (every 50 batches)
if step % _50_BATCHES == 0:
feed_dict = {X: x, Y_: y_, Hin: istate, pkeep: 1.0, batchsize: BATCHSIZE} # no dropout for validation
y, l, bl, acc, smm =[Y, seqloss, batchloss, accuracy, summaries], feed_dict=feed_dict)
txt.print_learning_learned_comparison(x, y, l, bookranges, bl, acc, epoch_size, step, epoch)
summary_writer.add_summary(smm, step)
# run a validation step every 50 batches
# The validation text should be a single sequence but that's too slow (1s per 1024 chars!),
# so we cut it up and batch the pieces (slightly inaccurate)
# tested: validating with 5K sequences instead of 1K is only slightly more accurate, but a lot slower.
if step % _50_BATCHES == 0 and len(valitext) > 0:
VALI_SEQLEN = 1*1024 # Sequence length for validation. State will be wrong at the start of each sequence.
bsize = len(valitext) // VALI_SEQLEN
txt.print_validation_header(len(codetext), bookranges)
vali_x, vali_y, _ = next(txt.rnn_minibatch_sequencer(valitext, bsize, VALI_SEQLEN, 1)) # all data in 1 batch
vali_nullstate = np.zeros([bsize, INTERNALSIZE*NLAYERS])
feed_dict = {X: vali_x, Y_: vali_y, Hin: vali_nullstate, pkeep: 1.0, # no dropout for validation
batchsize: bsize}
ls, acc, smm =[batchloss, accuracy, summaries], feed_dict=feed_dict)
txt.print_validation_stats(ls, acc)
# save validation data for Tensorboard
validation_writer.add_summary(smm, step)
# display a short text generated with the current weights and biases (every 150 batches)
if step // 3 % _50_BATCHES == 0:
ry = np.array([[txt.convert_from_alphabet(ord("K"))]])
rh = np.zeros([1, INTERNALSIZE * NLAYERS])
for k in range(1000):
ryo, rh =[Yo, H], feed_dict={X: ry, pkeep: 1.0, Hin: rh, batchsize: 1})
rc = txt.sample_from_probabilities(ryo, topn=10 if epoch <= 1 else 2)
print(chr(txt.convert_to_alphabet(rc)), end="")
ry = np.array([[rc]])
# save a checkpoint (every 500 batches)
if step // 10 % _50_BATCHES == 0:
saved_file =, 'checkpoints/rnn_train_' + timestamp, global_step=step)
print("Saved file: " + saved_file)
# display progress bar
progress.step(reset=step % _50_BATCHES == 0)
# loop state around
istate = ostate
# all runs: SEQLEN = 30, BATCHSIZE = 100, ALPHASIZE = 98, INTERNALSIZE = 512, NLAYERS = 3
# run 1477669632 decaying learning rate 0.001-0.0001-1e7 dropout 0.5: not good
# run 1477670023 lr=0.001 no dropout: very good
# Tensorflow runs:
# 1485434262
# trained on shakespeare/t*.txt only. Validation on 1K sequences
# validation loss goes up from step 5M (overfitting because of small dataset)
# 1485436038
# trained on shakespeare/t*.txt only. Validation on 5K sequences
# On 5K sequences validation accuracy is slightly higher and loss slightly lower
# => sequence breaks do introduce inaccuracies but the effect is small
# 1485437956
# Trained on shakespeare/*.txt. Validation on 1K sequences
# On this much larger dataset, validation loss still decreasing after 6 epochs (step 35M)
# 1495447371
# Trained on shakespeare/*.txt no dropout, 30 epochs
# Validation loss starts going up after 10 epochs (overfitting)
# 1495440473
# Trained on shakespeare/*.txt "naive dropout" pkeep=0.8, 30 epochs
# Dropout brings the validation loss under control, preventing it from
# going up but the effect is small.
# encoding: UTF-8
# Copyright 2017
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import os
import time
import tensorflow as tf
from tensorflow.contrib import layers
from tensorflow.contrib import rnn # rnn stuff temporarily in contrib, moving back to code in TF 1.1
from code import my_txtutils as txt
# Full comments in
# This file implements the exact same model but using the state_is_tuple=True
# option in tf.nn.rnn_cell.MultiRNNCell. This option is enabled by default.
# It produces faster code (by ~10%) but handling the state as a tuple is bit
# more cumbersome. Search for comments containing "state_is_tuple=True" for
# details.
learning_rate = 0.001 # fixed learning rate
# load data, either shakespeare, or the Python source of Tensorflow itself
shakedir = "shakespeare/*.txt"
# shakedir = "../tensorflow/**/*.py"
codetext, valitext, bookranges = txt.read_data_files(shakedir, validation=False)
# display some stats on the data
epoch_size = len(codetext) // (BATCHSIZE * SEQLEN)
txt.print_data_stats(len(codetext), len(valitext), epoch_size)
# the model
lr = tf.placeholder(tf.float32, name='lr') # learning rate
batchsize = tf.placeholder(tf.int32, name='batchsize')
# inputs
X = tf.placeholder(tf.uint8, [None, None], name='X') # [ BATCHSIZE, SEQLEN ]
Xo = tf.one_hot(X, ALPHASIZE, 1.0, 0.0) # [ BATCHSIZE, SEQLEN, ALPHASIZE ]
# expected outputs = same sequence shifted by 1 since we are trying to predict the next character
Y_ = tf.placeholder(tf.uint8, [None, None], name='Y_') # [ BATCHSIZE, SEQLEN ]
Yo_ = tf.one_hot(Y_, ALPHASIZE, 1.0, 0.0) # [ BATCHSIZE, SEQLEN, ALPHASIZE ]
cells = [rnn.GRUCell(INTERNALSIZE) for _ in range(NLAYERS)]
multicell = rnn.MultiRNNCell(cells, state_is_tuple=True)
# When using state_is_tuple=True, you must use multicell.zero_state
# to create a tuple of placeholders for the input states (one state per layer).
# When executed using, this also returns the correctly
# shaped initial zero state to use when starting your training loop.
zerostate = multicell.zero_state(BATCHSIZE, dtype=tf.float32)
Yr, H = tf.nn.dynamic_rnn(multicell, Xo, dtype=tf.float32, initial_state=zerostate)
# H: [ BATCHSIZE, INTERNALSIZE*NLAYERS ] # this is the last state in the sequence
H = tf.identity(H, name='H') # just to give it a name
# Softmax layer implementation:
# Flatten the first two dimension of the output [ BATCHSIZE, SEQLEN, ALPHASIZE ] => [ BATCHSIZE x SEQLEN, ALPHASIZE ]
# then apply softmax readout layer. This way, the weights and biases are shared across unrolled time steps.
# From the readout point of view, a value coming from a cell or a minibatch is the same thing
Ylogits = layers.linear(Yflat, ALPHASIZE) # [ BATCHSIZE x SEQLEN, ALPHASIZE ]
Yflat_ = tf.reshape(Yo_, [-1, ALPHASIZE]) # [ BATCHSIZE x SEQLEN, ALPHASIZE ]
loss = tf.nn.softmax_cross_entropy_with_logits(logits=Ylogits, labels=Yflat_) # [ BATCHSIZE x SEQLEN ]
loss = tf.reshape(loss, [batchsize, -1]) # [ BATCHSIZE, SEQLEN ]
Yo = tf.nn.softmax(Ylogits, name='Yo') # [ BATCHSIZE x SEQLEN, ALPHASIZE ]
Y = tf.argmax(Yo, 1) # [ BATCHSIZE x SEQLEN ]
Y = tf.reshape(Y, [batchsize, -1], name="Y") # [ BATCHSIZE, SEQLEN ]
train_step = tf.train.AdamOptimizer(lr).minimize(loss)
# stats for display
seqloss = tf.reduce_mean(loss, 1)
batchloss = tf.reduce_mean(seqloss)
accuracy = tf.reduce_mean(tf.cast(tf.equal(Y_, tf.cast(Y, tf.uint8)), tf.float32))
loss_summary = tf.summary.scalar("batch_loss", batchloss)
acc_summary = tf.summary.scalar("batch_accuracy", accuracy)
summaries = tf.summary.merge([loss_summary, acc_summary])
# Init Tensorboard stuff. This will save Tensorboard information into a different
# folder at each run named 'log/<timestamp>/'.
timestamp = str(math.trunc(time.time()))
summary_writer = tf.summary.FileWriter("log/" + timestamp + "-training")
# Init for saving models. They will be saved into a directory named 'checkpoints'.
# Only the last checkpoint is kept.
if not os.path.exists("checkpoints"):
saver = tf.train.Saver(max_to_keep=1)
# for display: init the progress bar
progress = txt.Progress(DISPLAY_FREQ, size=111+2, msg="Training on next "+str(DISPLAY_FREQ)+" batches")
# init
init = tf.global_variables_initializer()
sess = tf.Session()
step = 0
# training loop
istate = # initial zero input state (a tuple)
for x, y_, epoch in txt.rnn_minibatch_sequencer(codetext, BATCHSIZE, SEQLEN, nb_epochs=1000):
# train on one minibatch
feed_dict = {X: x, Y_: y_, lr: learning_rate, batchsize: BATCHSIZE}
# This is how you add the input state to feed dictionary when state_is_tuple=True.
# zerostate is a tuple of the placeholders for the NLAYERS=3 input states of our
# multi-layer RNN cell. Those placeholders must be used as keys in feed_dict.
# istate is a tuple holding the actual values of the input states (one per layer).
# Iterate on the input state placeholders and use them as keys in the dictionary
# to add actual input state values.
for i, v in enumerate(zerostate):
feed_dict[v] = istate[i]
_, y, ostate, smm =[train_step, Y, H, summaries], feed_dict=feed_dict)
# save training data for Tensorboard
summary_writer.add_summary(smm, step)
# display a visual validation of progress (every 50 batches)
if step % _50_BATCHES == 0:
feed_dict = {X: x, Y_: y_, batchsize: BATCHSIZE} # no dropout for validation
for i, v in enumerate(zerostate):
feed_dict[v] = istate[i]
y, l, bl, acc =[Y, seqloss, batchloss, accuracy], feed_dict=feed_dict)
txt.print_learning_learned_comparison(x[:5], y, l, bookranges, bl, acc, epoch_size, step, epoch)
# save a checkpoint (every 500 batches)
if step // 10 % _50_BATCHES == 0:
||||, 'checkpoints/rnn_train_' + timestamp, global_step=step)
# display progress bar
progress.step(reset=step % _50_BATCHES == 0)
# loop state around
istate = ostate
# encoding: UTF-8
# Copyright 2017
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from code import my_txtutils as txt
class RnnMinibatchSequencerTest(unittest.TestCase):
def setUp(self):
# generate text of consecutive items
|||| = list(range(TST_TXTSIZE))
def check_seq_batch(batch1, batch2):
nb_errors = 0
for i in range(TST_BATCHSIZE):
ok = batch1[i, -1] + 1 == batch2[i, 0]
nb_errors += 0 if ok else 1
return nb_errors
def test_sequences(self):
for x, y, epoch in txt.rnn_minibatch_sequencer(, TST_BATCHSIZE, TST_SEQLEN, TST_EPOCHS):
for i in range(TST_BATCHSIZE):
self.assertListEqual(x[i, 1:].tolist(), y[i, :-1].tolist(),
msg="y sequences must be equal to x sequences shifted by -1")
def test_batches(self):
start = True
prev_x = np.zeros([TST_BATCHSIZE, TST_SEQLEN], np.int32)
prev_y = np.zeros([TST_BATCHSIZE, TST_SEQLEN], np.int32)
nb_errors = 0
nb_batches = 0
for x, y, epoch in txt.rnn_minibatch_sequencer(, TST_BATCHSIZE, TST_SEQLEN, TST_EPOCHS):
if not start:
nb_errors += self.check_seq_batch(prev_x, x)
nb_errors += self.check_seq_batch(prev_y, y)
prev_x = x
prev_y = y
start = False
nb_batches += 1
self.assertLessEqual(nb_errors, 2 * TST_EPOCHS,
msg="Sequences should be correctly continued, even between epochs. Only "
"one sequence is allowed to not continue from one epoch to the next.")
self.assertLess(TST_TXTSIZE - (nb_batches * TST_BATCHSIZE * TST_SEQLEN),
msg="Text ignored at the end of an epoch must be smaller than one batch of sequences")
class EncodingTest(unittest.TestCase):
def setUp(self):
self.test_text_known_chars = \
"\n" \
"By Jane Austen" \
"\n" \
"\n" \
"\n" \
"Chapter 1" \
"\n" \
"\n" \
"It is a truth universally acknowledged, that a single man in possession " \
"of a good fortune, must be in want of a wife." \
"\n\n" \
"However little known the feelings or views of such a man may be on his " \
"first entering a neighbourhood, this truth is so well fixed in the minds " \
"of the surrounding families, that he is considered the rightful property " \
"of some one or other of their daughters." \
"\n\n" \
"\"My dear Mr. Bennet,\" said his lady to him one day, \"have you heard that " \
"Netherfield Park is let at last?\"" \
"\n\n" \
"Mr. Bennet replied that he had not." \
"\n\n" \
"\"But it is,\" returned she; \"for Mrs. Long has just been here, and she " \
"told me all about it.\"" \
"\n\n" \
"Mr. Bennet made no answer." \
"\n\n" \
"\"Do you not want to know who has taken it?\" cried his wife impatiently." \
"\n\n" \
"\"_You_ want to tell me, and I have no objection to hearing it.\"" \
"\n\n" \
"This was invitation enough." \
"\n\n" \
"\"Why, my dear, you must know, Mrs. Long says that Netherfield is taken " \
"by a young man of large fortune from the north of England; that he came " \
"down on Monday in a chaise and four to see the place, and was so much " \
"delighted with it, that he agreed with Mr. Morris immediately; that he " \
"is to take possession before Michaelmas, and some of his servants are to " \
"be in the house by the end of next week.\"" \
"\n\n" \
"\"What is his name?\"" \
"\n\n" \
"\"Bingley.\"" \
"\n\n" \
"Testing punctuation: !\"#$%&\'()*+,-./0123456789:;<=>?@[\\]^_`{|}~" \
"\n" \
"Tab\x09Tab\x09Tab\x09Tab" \
self.test_text_unknown_char = "Unknown char: \x0C" # the unknown char 'new page'
def test_encoding(self):
encoded = txt.encode_text(self.test_text_known_chars)
decoded = txt.decode_to_text(encoded)
self.assertEqual(self.test_text_known_chars, decoded,
msg="On a sequence of supported characters, encoding, "
"then decoding should yield the original string.")
def test_unknown_encoding(self):
encoded = txt.encode_text(self.test_text_unknown_char)
decoded = txt.decode_to_text(encoded)
original_fix = self.test_text_unknown_char[:-1] + chr(0)
self.assertEqual(original_fix, decoded,
msg="The last character of the test sequence is an unsupported "
"character and should be encoded and decoded as 0.")
class TxtProgressTest(unittest.TestCase):
def test_progress_indicator(self):
print("If the printed output of this test is incorrect, the test will fail. No need to check visually.", end='')
test_cases = (50, 51, 49, 1, 2, 3, 1000, 333, 101)
p = txt.Progress(100)
for maxi in test_cases:
m, cent = self.check_progress_indicator(p, maxi)
self.assertEqual(m, maxi, msg="Incorrect number of steps.")
self.assertEqual(cent, 100, msg="Incorrect number of steps.")
def check_progress_indicator(p, maxi):
progress = p._Progress__start_progress(maxi)
total = 0
n = 0
for k in progress():
total += k
n += 1
return n, total
1. 管理员,管理用户账户,监督和处理用户违规行为。
2. 管理员,定期检查和维护网站以及服务器运行情况。
3. 管理员,及时更新人工智能拥有剧本数量,扩宽编写剧本广度。
4. 用户,选择剧本需求,支付后查看人工智能编写出的剧本。
5. 用户,在支付剧本费用后可查看人工智能写出的剧本,共计四个。
6. 用户,选择剧本后可进行编辑,保存和下载。
7. 用户,用户可以查看自己所有已购买和编辑的剧本。
# 项目沟通计划
| ------------ | ------------ | ------------ | ------------ |
# 风险管理计划
| ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ | ------------ |
Reference in new issue