import os import re import sys import json import math import time import unicodedata import numpy as np import tensorflow.compat.v1 as tf tf.disable_v2_behavior() from tensorflow.python.framework import function from tqdm import tqdm from functools import partial def encode_dataset(*splits, encoder): encoded_splits = [] for split in splits[0]: fields = [] for field in split: if isinstance(field[0], str): field = encoder.encode(field) fields.append(field) encoded_splits.append(fields) return encoded_splits def stsb_label_encoding(labels, nclass=6): """ Label encoding from Tree LSTM paper (Tai, Socher, Manning) """ Y = np.zeros((len(labels), nclass)).astype(np.float32) for j, y in enumerate(labels): for i in range(nclass): if i == np.floor(y) + 1: Y[j,i] = y - np.floor(y) if i == np.floor(y): Y[j,i] = np.floor(y) - y + 1 return Y def shape_list(x): """ deal with dynamic shape in tensorflow cleanly """ ps = x.get_shape().as_list() ts = tf.shape(x) return [ts[i] if ps[i] is None else ps[i] for i in range(len(ps))] def np_softmax(x, t=1): x = x/t x = x - np.max(x, axis=-1, keepdims=True) ex = np.exp(x) return ex/np.sum(ex, axis=-1, keepdims=True) def make_path(f): d = os.path.dirname(f) if d and not os.path.exists(d): os.makedirs(d) return f def _identity_init(shape, dtype, partition_info, scale): n = shape[-1] w = np.eye(n)*scale if len([s for s in shape if s != 1]) == 2: w = w.reshape(shape) return w.astype(np.float32) def identity_init(scale=1.0): return partial(_identity_init, scale=scale) def _np_init(shape, dtype, partition_info, w): return w def np_init(w): return partial(_np_init, w=w) class ResultLogger(object): def __init__(self, path, *args, **kwargs): if 'time' not in kwargs: kwargs['time'] = time.time() self.f_log = open(make_path(path), 'w') self.f_log.write(json.dumps(kwargs)+'\n') def log(self, **kwargs): if 'time' not in kwargs: kwargs['time'] = time.time() self.f_log.write(json.dumps(kwargs)+'\n') self.f_log.flush() def close(self): self.f_log.close() def find_trainable_variables(key): return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, ".*{}.*".format(key)) def flatten(outer): return [el for inner in outer for el in inner] def remove_none(l): return [e for e in l if e is not None] def iter_data(*datas, n_batch=128, truncate=False, verbose=False, max_batches=float("inf")): n = len(datas[0]) if truncate: n = (n//n_batch)*n_batch n = min(n, max_batches*n_batch) n_batches = 0 if verbose: f = sys.stderr else: f = open(os.devnull, 'w') for i in tqdm(range(0, n, n_batch), total=n//n_batch, file=f, ncols=80, leave=False): if n_batches >= max_batches: raise StopIteration if len(datas) == 1: yield datas[0][i:i+n_batch] else: yield (d[i:i+n_batch] for d in datas) n_batches += 1 @function.Defun( python_grad_func=lambda x, dy: tf.convert_to_tensor(dy), shape_func=lambda op: [op.inputs[0].get_shape()]) def convert_gradient_to_tensor(x): """force gradient to be a dense tensor it's often faster to do dense embedding gradient on GPU than sparse on CPU """ return x def assign_to_gpu(gpu=0, ps_dev="/device:CPU:0"): def _assign(op): node_def = op if isinstance(op, tf.NodeDef) else op.node_def if node_def.op == "Variable": return ps_dev else: return "/gpu:%d" % gpu return _assign def average_grads(tower_grads): def average_dense(grad_and_vars): if len(grad_and_vars) == 1: return grad_and_vars[0][0] grad = grad_and_vars[0][0] for g, _ in grad_and_vars[1:]: grad += g return grad / len(grad_and_vars) def average_sparse(grad_and_vars): if len(grad_and_vars) == 1: return grad_and_vars[0][0] indices = [] values = [] for g, _ in grad_and_vars: indices += [g.indices] values += [g.values] indices = tf.concat(indices, 0) values = tf.concat(values, 0) return tf.IndexedSlices(values, indices, grad_and_vars[0][0].dense_shape) average_grads = [] for grad_and_vars in zip(*tower_grads): if grad_and_vars[0][0] is None: grad = None elif isinstance(grad_and_vars[0][0], tf.IndexedSlices): grad = average_sparse(grad_and_vars) else: grad = average_dense(grad_and_vars) v = grad_and_vars[0][1] grad_and_var = (grad, v) average_grads.append(grad_and_var) return average_grads