You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
171 lines
4.8 KiB
171 lines
4.8 KiB
import os
|
|
import re
|
|
import sys
|
|
import json
|
|
import math
|
|
import time
|
|
import unicodedata
|
|
import numpy as np
|
|
import tensorflow.compat.v1 as tf
|
|
tf.disable_v2_behavior()
|
|
from tensorflow.python.framework import function
|
|
from tqdm import tqdm
|
|
from functools import partial
|
|
|
|
def encode_dataset(*splits, encoder):
|
|
encoded_splits = []
|
|
for split in splits[0]:
|
|
fields = []
|
|
for field in split:
|
|
if isinstance(field[0], str):
|
|
field = encoder.encode(field)
|
|
fields.append(field)
|
|
encoded_splits.append(fields)
|
|
return encoded_splits
|
|
|
|
def stsb_label_encoding(labels, nclass=6):
|
|
"""
|
|
Label encoding from Tree LSTM paper (Tai, Socher, Manning)
|
|
"""
|
|
Y = np.zeros((len(labels), nclass)).astype(np.float32)
|
|
for j, y in enumerate(labels):
|
|
for i in range(nclass):
|
|
if i == np.floor(y) + 1:
|
|
Y[j,i] = y - np.floor(y)
|
|
if i == np.floor(y):
|
|
Y[j,i] = np.floor(y) - y + 1
|
|
return Y
|
|
|
|
def shape_list(x):
|
|
"""
|
|
deal with dynamic shape in tensorflow cleanly
|
|
"""
|
|
ps = x.get_shape().as_list()
|
|
ts = tf.shape(x)
|
|
return [ts[i] if ps[i] is None else ps[i] for i in range(len(ps))]
|
|
|
|
def np_softmax(x, t=1):
|
|
x = x/t
|
|
x = x - np.max(x, axis=-1, keepdims=True)
|
|
ex = np.exp(x)
|
|
return ex/np.sum(ex, axis=-1, keepdims=True)
|
|
|
|
def make_path(f):
|
|
d = os.path.dirname(f)
|
|
if d and not os.path.exists(d):
|
|
os.makedirs(d)
|
|
return f
|
|
|
|
def _identity_init(shape, dtype, partition_info, scale):
|
|
n = shape[-1]
|
|
w = np.eye(n)*scale
|
|
if len([s for s in shape if s != 1]) == 2:
|
|
w = w.reshape(shape)
|
|
return w.astype(np.float32)
|
|
|
|
def identity_init(scale=1.0):
|
|
return partial(_identity_init, scale=scale)
|
|
|
|
def _np_init(shape, dtype, partition_info, w):
|
|
return w
|
|
|
|
def np_init(w):
|
|
return partial(_np_init, w=w)
|
|
|
|
class ResultLogger(object):
|
|
def __init__(self, path, *args, **kwargs):
|
|
if 'time' not in kwargs:
|
|
kwargs['time'] = time.time()
|
|
self.f_log = open(make_path(path), 'w')
|
|
self.f_log.write(json.dumps(kwargs)+'\n')
|
|
|
|
def log(self, **kwargs):
|
|
if 'time' not in kwargs:
|
|
kwargs['time'] = time.time()
|
|
self.f_log.write(json.dumps(kwargs)+'\n')
|
|
self.f_log.flush()
|
|
|
|
def close(self):
|
|
self.f_log.close()
|
|
|
|
def find_trainable_variables(key):
|
|
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, ".*{}.*".format(key))
|
|
|
|
def flatten(outer):
|
|
return [el for inner in outer for el in inner]
|
|
|
|
def remove_none(l):
|
|
return [e for e in l if e is not None]
|
|
|
|
def iter_data(*datas, n_batch=128, truncate=False, verbose=False, max_batches=float("inf")):
|
|
n = len(datas[0])
|
|
if truncate:
|
|
n = (n//n_batch)*n_batch
|
|
n = min(n, max_batches*n_batch)
|
|
n_batches = 0
|
|
if verbose:
|
|
f = sys.stderr
|
|
else:
|
|
f = open(os.devnull, 'w')
|
|
for i in tqdm(range(0, n, n_batch), total=n//n_batch, file=f, ncols=80, leave=False):
|
|
if n_batches >= max_batches: raise StopIteration
|
|
if len(datas) == 1:
|
|
yield datas[0][i:i+n_batch]
|
|
else:
|
|
yield (d[i:i+n_batch] for d in datas)
|
|
n_batches += 1
|
|
|
|
@function.Defun(
|
|
python_grad_func=lambda x, dy: tf.convert_to_tensor(dy),
|
|
shape_func=lambda op: [op.inputs[0].get_shape()])
|
|
def convert_gradient_to_tensor(x):
|
|
"""force gradient to be a dense tensor
|
|
it's often faster to do dense embedding gradient on GPU than sparse on CPU
|
|
"""
|
|
return x
|
|
|
|
def assign_to_gpu(gpu=0, ps_dev="/device:CPU:0"):
|
|
def _assign(op):
|
|
node_def = op if isinstance(op, tf.NodeDef) else op.node_def
|
|
if node_def.op == "Variable":
|
|
return ps_dev
|
|
else:
|
|
return "/gpu:%d" % gpu
|
|
return _assign
|
|
|
|
def average_grads(tower_grads):
|
|
def average_dense(grad_and_vars):
|
|
if len(grad_and_vars) == 1:
|
|
return grad_and_vars[0][0]
|
|
|
|
grad = grad_and_vars[0][0]
|
|
for g, _ in grad_and_vars[1:]:
|
|
grad += g
|
|
return grad / len(grad_and_vars)
|
|
|
|
def average_sparse(grad_and_vars):
|
|
if len(grad_and_vars) == 1:
|
|
return grad_and_vars[0][0]
|
|
|
|
indices = []
|
|
values = []
|
|
for g, _ in grad_and_vars:
|
|
indices += [g.indices]
|
|
values += [g.values]
|
|
indices = tf.concat(indices, 0)
|
|
values = tf.concat(values, 0)
|
|
return tf.IndexedSlices(values, indices, grad_and_vars[0][0].dense_shape)
|
|
|
|
average_grads = []
|
|
for grad_and_vars in zip(*tower_grads):
|
|
if grad_and_vars[0][0] is None:
|
|
grad = None
|
|
elif isinstance(grad_and_vars[0][0], tf.IndexedSlices):
|
|
grad = average_sparse(grad_and_vars)
|
|
else:
|
|
grad = average_dense(grad_and_vars)
|
|
v = grad_and_vars[0][1]
|
|
grad_and_var = (grad, v)
|
|
average_grads.append(grad_and_var)
|
|
return average_grads
|