You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1651 lines
58 KiB

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""This file exports ONNX ops for opset 11."""
from __future__ import annotations
import functools
import sys
import warnings
from typing import Optional, Sequence
import torch
from torch import _C
from torch._C import _onnx as _C_onnx
from torch.onnx import (
_type_utils,
errors,
symbolic_helper,
symbolic_opset10 as opset10,
symbolic_opset9 as opset9,
utils,
)
from torch.onnx._globals import GLOBALS
from torch.onnx._internal import _beartype, jit_utils, registration
# EDITING THIS FILE? READ THIS FIRST!
# see Note [Edit Symbolic Files] in README.md
__all__ = [
"add",
"append",
"arange",
"argsort",
"atleast_1d",
"atleast_2d",
"atleast_3d",
"cat",
"chunk",
"clamp_max",
"clamp_min",
"clamp",
"constant_pad_nd",
"cumsum",
"Delete",
"embedding_bag",
"embedding_renorm",
"flatten",
"gather",
"hardtanh",
"hstack",
"im2col",
"index_fill",
"index",
"index_copy",
"index_put",
"insert",
"linalg_det",
"linalg_vector_norm",
"logdet",
"masked_scatter",
"masked_select",
"mm",
"narrow",
"normal",
"pad",
"pixel_shuffle",
"pop",
"prim_constant_chunk",
"reflection_pad",
"relu6",
"remainder",
"replication_pad",
"round",
"scatter",
"select",
"size",
"sort",
"split_with_sizes",
"split",
"squeeze",
"stack",
"topk",
"unbind",
"unique_dim",
"unsqueeze",
"vstack",
]
_onnx_symbolic = functools.partial(registration.onnx_symbolic, opset=11)
def _apply_params(*args, **kwargs):
"""Returns a decorator that calls the decorated (higher-order) function with the given parameters."""
def _apply(fn):
return fn(*args, **kwargs)
return _apply
@_onnx_symbolic("aten::hardtanh")
@symbolic_helper.quantized_args(True)
@symbolic_helper.parse_args("v", "f", "f")
@_beartype.beartype
def hardtanh(g: jit_utils.GraphContext, self: _C.Value, min_val: float, max_val: float):
scalar_type = _type_utils.JitScalarType.from_value(
self, _type_utils.JitScalarType.FLOAT
)
min_val = g.op(
"Constant",
value_t=torch.tensor(min_val, dtype=scalar_type.dtype()),
)
max_val = g.op(
"Constant",
value_t=torch.tensor(max_val, dtype=scalar_type.dtype()),
)
return opset9._op_with_optional_float_cast(
g, "Clip", self, min_val, max_val, opset_before=12
)
@_onnx_symbolic("aten::clamp")
@_beartype.beartype
def clamp(g: jit_utils.GraphContext, self, min, max):
@_beartype.beartype
def _cast_if_not_none(tensor, dtype):
if tensor is not None and not symbolic_helper._is_none(tensor):
return g.op(
"Cast",
tensor,
to_i=dtype.onnx_type(),
)
else:
return tensor
scalar_type = _type_utils.JitScalarType.from_value(
self, _type_utils.JitScalarType.UNDEFINED
)
if scalar_type != _type_utils.JitScalarType.UNDEFINED:
min = _cast_if_not_none(min, scalar_type)
max = _cast_if_not_none(max, scalar_type)
if symbolic_helper._is_none(min):
return clamp_max(g, self, max)
elif symbolic_helper._is_none(max):
return clamp_min(g, self, min)
else:
if (
symbolic_helper._get_tensor_rank(min) == 0
and symbolic_helper._get_tensor_rank(max) == 0
):
return opset9._op_with_optional_float_cast(
g, "Clip", self, min, max, opset_before=12
)
else:
return clamp_max(g, clamp_min(g, self, min), max)
@_onnx_symbolic("aten::clamp_min")
@symbolic_helper.parse_args("v", "v")
@_beartype.beartype
def clamp_min(g: jit_utils.GraphContext, self, min):
min = g.op("Cast", min, to_i=_type_utils.JitScalarType.from_value(self).onnx_type())
if symbolic_helper._get_tensor_rank(min) == 0:
max = opset9.unused(g)
return opset9._op_with_optional_float_cast(
g, "Clip", self, min, max, opset_before=12
)
else:
return opset9._op_with_optional_float_cast(g, "Max", self, min, opset_before=12)
@_onnx_symbolic("aten::clamp_max")
@symbolic_helper.parse_args("v", "v")
@_beartype.beartype
def clamp_max(g: jit_utils.GraphContext, self, max):
max = g.op("Cast", max, to_i=_type_utils.JitScalarType.from_value(self).onnx_type())
if symbolic_helper._get_tensor_rank(max) == 0:
min = opset9.unused(g)
return opset9._op_with_optional_float_cast(
g, "Clip", self, min, max, opset_before=12
)
else:
return opset9._op_with_optional_float_cast(g, "Min", self, max, opset_before=12)
@_onnx_symbolic("aten::relu6")
@_beartype.beartype
def relu6(g: jit_utils.GraphContext, input):
scalar_type = _type_utils.JitScalarType.from_value(
input, _type_utils.JitScalarType.FLOAT
)
min_val = g.op(
"Constant",
value_t=torch.tensor(0, dtype=scalar_type.dtype()),
)
max_val = g.op(
"Constant",
value_t=torch.tensor(6, dtype=scalar_type.dtype()),
)
return clamp(g, input, min_val, max_val)
@_onnx_symbolic("aten::select")
# Opset 11 gather accepts negative indices
@symbolic_helper.quantized_args(True)
@symbolic_helper.parse_args("v", "i", "v")
@_beartype.beartype
def select(g: jit_utils.GraphContext, self, dim, index):
return g.op("Gather", self, index, axis_i=dim)
@_onnx_symbolic("aten::index_put")
@_beartype.beartype
def index_put(
g: jit_utils.GraphContext, self, indices_list_value, values, accumulate=False
):
if symbolic_helper._is_packed_list(indices_list_value):
indices_list = symbolic_helper._unpack_list(indices_list_value)
else:
indices_list = [indices_list_value]
if symbolic_helper.is_caffe2_aten_fallback():
args = [self] + indices_list + [values, accumulate]
return g.at("index_put", *args)
accumulate = symbolic_helper._parse_arg(accumulate, "b")
if len(indices_list) == 0:
return values
if len(indices_list) > 1:
for idx_ in range(len(indices_list)):
if symbolic_helper._is_bool(indices_list[idx_]):
indices_list[idx_] = g.op("NonZero", indices_list[idx_])
index = indices_list[0]
for ind in indices_list[1:]:
index = opset9.add(g, index, ind)
broadcast_index_shape = g.op("Shape", index)
indices_list = [
symbolic_helper._unsqueeze_helper(
g, opset9.expand(g, ind, broadcast_index_shape, None), [-1]
)
for ind in indices_list
]
index = g.op("Concat", *indices_list, axis_i=-1)
else:
# Replace index_put node with masked_scatter or masked_fill
# when inputs to the index_put node contains a single boolean input.
#
# index_put -> masked_fill
# * input index contains single tensor of Bool type (e.g.: %24 <- %23).
# * input value contains single element (e.g.: %18).
#
# Torch IR
# %mask : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = aten::clone(%0, %6)
# %16 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) =
# aten::to(%8, %26, %27, %11, %12, %28, %29, %15)
# %18 : Float(requires_grad=0, device=cpu) = prim::Constant[value={1}]()
# %23 : Bool(8, strides=[1], device=cpu) = aten::view(%16, %22)
# %24 : Tensor?[] = prim::ListConstruct(%23)
# %25 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) =
# aten::index_put(%mask, %24, %18, %30)
# return (%25)
#
#
# index_put -> masked_scatter
# * input index contains single tensor of Bool type (e.g.: %32 <- %31).
# * input value contains multiple elements (e.g.: %28).
#
# Torch IR
# %mask : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu) = aten::clone(%0, %6)
# %28 : Float(8, strides=[1], requires_grad=0, device=cpu)
# = prim::Constant[value= 1 1 1 1 1 1 1 1 [ CPUFloatType{8} ]]()
# %15 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu)
# = aten::ne(%mask, %some_const)
# %23 : Bool(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu)
# = aten::to(%15, %34, %35, %18, %19, %36, %37, %22)
# %38 : Long(requires_grad=0, device=cpu) = prim::Constant[value={0}]()
# %30 : int[] = prim::Constant[value=[-1]]()
# %31 : Bool(8, strides=[1], device=cpu) = aten::view(%23, %30)
# %32 : Tensor?[] = prim::ListConstruct(%31)
# %33 : Float(2, 2, 2, strides=[4, 2, 1], requires_grad=0, device=cpu)
# = aten::index_put(%mask, %32, %28, %38)
# return (%33)
index = indices_list[0]
bool_inp = index
if symbolic_helper._is_bool(bool_inp):
rank = symbolic_helper._get_tensor_rank(values)
if rank is not None and rank == 0:
return opset9.masked_fill(g, self, bool_inp, values)
mask_rank = symbolic_helper._get_tensor_rank(bool_inp)
self_rank = symbolic_helper._get_tensor_rank(self)
if (
mask_rank is not None
and self_rank is not None
and self_rank > mask_rank
):
# Unsqueeze 'bool_inp' to be broadcastable to shape of 'self'.
bool_inp = symbolic_helper._unsqueeze_helper(
g, bool_inp, list(range(mask_rank, self_rank))
)
return masked_scatter(g, self, bool_inp, values)
broadcast_index_shape = g.op("Shape", index)
index = symbolic_helper._unsqueeze_helper(g, index, [-1])
sub_data_shape = symbolic_helper._slice_helper(
g, g.op("Shape", self), axes=[0], starts=[len(indices_list)], ends=[sys.maxsize]
)
values_shape = g.op("Concat", broadcast_index_shape, sub_data_shape, axis_i=0)
# Check if values is a singular value and expand accordingly
rank = symbolic_helper._get_tensor_rank(values)
if rank is not None and rank == 0:
values = opset9.expand(g, values, values_shape, None)
values = symbolic_helper._reshape_helper(g, values, values_shape)
self_scalar_type = _type_utils.JitScalarType.from_value(
self, _type_utils.JitScalarType.UNDEFINED
)
if self_scalar_type != _type_utils.JitScalarType.UNDEFINED:
values_scalar_type = _type_utils.JitScalarType.from_value(
values, _type_utils.JitScalarType.UNDEFINED
)
if self_scalar_type != values_scalar_type:
values = g.op("Cast", values, to_i=self_scalar_type.onnx_type())
elif accumulate:
raise errors.SymbolicValueError("self does not have a valid scalar type.", self)
if accumulate:
zeros = g.op(
"ConstantOfShape",
g.op("Shape", self),
value_t=torch.tensor([0], dtype=self_scalar_type.dtype()),
)
result = g.op("ScatterND", zeros, index, values)
result = add(g, self, result)
else:
result = g.op("ScatterND", self, index, values)
return result
@_onnx_symbolic("aten::pixel_shuffle")
@symbolic_helper.parse_args("v", "i")
@_beartype.beartype
def pixel_shuffle(g: jit_utils.GraphContext, self, upscale_factor):
rank = symbolic_helper._get_tensor_rank(self)
if rank is not None and rank != 4:
return symbolic_helper._unimplemented("pixel_shuffle", "only support 4d input")
return g.op("DepthToSpace", self, blocksize_i=upscale_factor, mode_s="CRD")
@_onnx_symbolic(
"aten::upsample_nearest1d",
decorate=[_apply_params("upsample_nearest1d", 3, "nearest")],
)
@_onnx_symbolic(
"aten::upsample_nearest2d",
decorate=[_apply_params("upsample_nearest2d", 4, "nearest")],
)
@_onnx_symbolic(
"aten::upsample_nearest3d",
decorate=[_apply_params("upsample_nearest3d", 5, "nearest")],
)
@_onnx_symbolic(
"aten::upsample_linear1d",
decorate=[_apply_params("upsample_linear1d", 3, "linear")],
)
@_onnx_symbolic(
"aten::upsample_bilinear2d",
decorate=[_apply_params("upsample_bilinear2d", 4, "linear")],
)
@_onnx_symbolic(
"aten::upsample_trilinear3d",
decorate=[_apply_params("upsample_trilinear3d", 5, "linear")],
)
@_onnx_symbolic(
"aten::upsample_bicubic2d",
decorate=[_apply_params("upsample_bicubic2d", 4, "cubic")],
)
@_beartype.beartype
def _interpolate(name: str, dim: int, interpolate_mode: str):
return symbolic_helper._interpolate_helper(name, dim, interpolate_mode)
@_onnx_symbolic("aten::__interpolate")
@symbolic_helper.quantized_args(True, False, False, False, False, False, False)
@_beartype.beartype
def __interpolate(
g: jit_utils.GraphContext,
input,
size,
scale_factor,
mode,
align_corners,
recompute_scale_factor,
antialias,
):
return symbolic_helper.__interpolate_helper(
g, input, size, scale_factor, mode, align_corners, recompute_scale_factor
)
@_onnx_symbolic("aten::gather")
@symbolic_helper.parse_args("v", "i", "v", "v")
@_beartype.beartype
def gather(g: jit_utils.GraphContext, self, dim, index, sparse_grad=False):
if symbolic_helper._maybe_get_const(sparse_grad, "i"):
return symbolic_helper._unimplemented("gather", "sparse_grad == True")
if symbolic_helper.is_caffe2_aten_fallback():
return g.at("gather", self, dim, index, sparse_grad)
return g.op("GatherElements", self, index, axis_i=dim)
@_onnx_symbolic("aten::scatter")
@symbolic_helper.parse_args("v", "i", "v", "v")
@_beartype.beartype
def scatter(g: jit_utils.GraphContext, self, dim, index, src):
if symbolic_helper.is_caffe2_aten_fallback():
return g.at("scatter", self, dim, index, src, overload_name="src")
src_type = _type_utils.JitScalarType.from_value(src)
src = symbolic_helper._maybe_get_scalar(src)
if symbolic_helper._is_value(src):
return g.op("ScatterElements", self, index, src, axis_i=dim)
else:
# Check if scalar "src" has same type as self (PyTorch allows different
# type for scalar src (but not when src is tensor)). If not, insert Cast node.
if _type_utils.JitScalarType.from_value(self) != src_type:
src = g.op(
"Cast",
src,
to_i=_type_utils.JitScalarType.from_value(self).onnx_type(),
)
return g.op(
"ScatterElements", self, index, opset9.expand_as(g, src, index), axis_i=dim
)
@_onnx_symbolic("aten::cumsum")
@symbolic_helper.parse_args("v", "i", "none")
@_beartype.beartype
def cumsum(g: jit_utils.GraphContext, self, dim, dtype=None):
dim_tensor = g.op("Constant", value_t=torch.tensor(dim, dtype=torch.int))
if dtype and dtype.node().kind() != "prim::Constant":
parsed_dtype = symbolic_helper._get_const(dtype, "i", "dtype")
cast = g.op(
"Cast", self, to_i=_type_utils.JitScalarType(parsed_dtype).onnx_type()
)
else:
cast = self
csum = g.op("CumSum", cast, dim_tensor)
return csum
@_onnx_symbolic("aten::masked_select")
@_beartype.beartype
def masked_select(g: jit_utils.GraphContext, self, mask):
index = opset9.nonzero(g, opset9.expand_as(g, mask, self))
return g.op("GatherND", self, index)
@_onnx_symbolic("aten::masked_scatter")
@_beartype.beartype
def masked_scatter(g: jit_utils.GraphContext, self, mask, source):
index = opset9.nonzero(g, opset9.expand_as(g, mask, self))
# NOTE: source can have more elements than needed.
# It could also have arbitrary shape.
# This is not supported by ONNX::ScatterND, so we need to flatten and slice source tensor.
source = symbolic_helper._reshape_helper(g, source, torch.LongTensor([-1]))
source = symbolic_helper._slice_helper(
g,
source,
axes=torch.LongTensor([0]),
starts=torch.LongTensor([0]),
ends=opset9.size(g, index, torch.LongTensor([0])),
)
return g.op("ScatterND", self, index, source)
@_onnx_symbolic("aten::len")
@_beartype.beartype
def _len(g: jit_utils.GraphContext, self):
if (
symbolic_helper._is_tensor_list(self)
or self.node().kind() == "onnx::SplitToSequence"
):
return g.op("SequenceLength", self)
sz_0 = size(g, self, g.op("Constant", value_t=torch.LongTensor([0])))
return symbolic_helper._squeeze_helper(g, sz_0, [0])
@_onnx_symbolic("aten::__getitem_")
@_beartype.beartype
def __getitem_(g: jit_utils.GraphContext, self, i):
if symbolic_helper._is_tensor_list(self):
# SequenceAt requires that the input be a List of Tensors
return g.op("SequenceAt", self, i)
else:
from torch.onnx.symbolic_opset9 import __getitem_ as getitem
return getitem(g, self, i)
@_onnx_symbolic("aten::_set_item")
@_beartype.beartype
def _set_item(g: jit_utils.GraphContext, tensor_list, i, v):
tensor_list = g.op("SequenceErase", tensor_list, i)
return g.op("SequenceInsert", tensor_list, v, i)
@_onnx_symbolic("aten::append")
@_beartype.beartype
def append(g: jit_utils.GraphContext, self, tensor):
return g.op("SequenceInsert", self, tensor)
@_onnx_symbolic("aten::add")
@_beartype.beartype
def add(g: jit_utils.GraphContext, self, other, alpha=None):
if symbolic_helper._is_value(self) and symbolic_helper._is_tensor_list(self):
tensor_list_node = other.node()
if tensor_list_node.kind() != "prim::ListConstruct":
return symbolic_helper._unimplemented(
"add", "does not support adding dynamic tensor list to another"
)
tensors = symbolic_helper._unpack_list(other)
l = self
for t in tensors:
l = g.op("SequenceInsert", l, t)
return l
return opset9.add(g, self, other, alpha)
@_onnx_symbolic("aten::insert")
@_beartype.beartype
def insert(g: jit_utils.GraphContext, self, pos, tensor):
return g.op("SequenceInsert", self, tensor, pos)
@_onnx_symbolic("aten::pop")
@_beartype.beartype
def pop(g: jit_utils.GraphContext, tensor_list, dim):
return g.op("SequenceErase", tensor_list, dim)
@_onnx_symbolic("aten::Delete")
@_beartype.beartype
def Delete(g: jit_utils.GraphContext, tensor_list, dim):
return g.op("SequenceErase", tensor_list, dim)
@_onnx_symbolic("aten::cat")
@symbolic_helper.quantized_args(True)
@_beartype.beartype
def cat(g: jit_utils.GraphContext, tensor_list, dim):
if symbolic_helper._is_packed_list(tensor_list):
return opset9.cat(g, tensor_list, dim)
else:
dim = symbolic_helper._get_const(dim, "i", "dim")
return g.op("ConcatFromSequence", tensor_list, axis_i=dim)
@_onnx_symbolic("aten::stack")
@_beartype.beartype
def stack(g: jit_utils.GraphContext, tensor_list, dim):
if symbolic_helper._is_packed_list(tensor_list):
return opset9.stack(g, tensor_list, dim)
else:
dim = symbolic_helper._get_const(dim, "i", "dim")
return g.op("ConcatFromSequence", tensor_list, axis_i=dim, new_axis_i=1)
@_onnx_symbolic("aten::_unique2")
@symbolic_helper.parse_args("v", "i", "i", "i")
@_beartype.beartype
def _unique2(g: jit_utils.GraphContext, self, sorted, return_inverse, return_counts):
u, indices, inverse_indices, counts = g.op(
"Unique", self, sorted_i=sorted, outputs=4
)
return u, inverse_indices, counts
@_onnx_symbolic("aten::unique_dim")
@symbolic_helper.parse_args("v", "i", "i", "i", "i")
@_beartype.beartype
def unique_dim(
g: jit_utils.GraphContext, self, dim, sorted, return_inverse, return_counts
):
u, indices, inverse_indices, counts = g.op(
"Unique", self, axis_i=dim, sorted_i=sorted, outputs=4
)
return u, inverse_indices, counts
@_onnx_symbolic("aten::topk")
@symbolic_helper.parse_args("v", "v", "i", "i", "i", "none")
@_beartype.beartype
def topk(g: jit_utils.GraphContext, self, k, dim, largest, sorted, out=None):
return symbolic_helper._topk_helper(
g, self, k, dim, largest=largest, sorted=sorted, out=out
)
@_onnx_symbolic("aten::sort")
@symbolic_helper.parse_args("v", "i", "i", "none")
@_beartype.beartype
def sort(g: jit_utils.GraphContext, self, dim, decending, out=None):
return symbolic_helper._sort_helper(g, self, dim, decending=decending, out=out)
@_onnx_symbolic("aten::argsort")
@symbolic_helper.parse_args("v", "i", "i", "none")
@_beartype.beartype
def argsort(g: jit_utils.GraphContext, self, dim, decending, out=None):
_, indices = symbolic_helper._sort_helper(
g, self, dim, decending=decending, out=out
)
return indices
@_onnx_symbolic("aten::round")
@symbolic_helper.parse_args("v", "i")
@_beartype.beartype
def round(g: jit_utils.GraphContext, self, decimals=0):
if not symbolic_helper._is_fp(self):
return self
if decimals == 0:
return g.op("Round", self)
mul = g.op("Mul", self, g.op("Constant", value_t=torch.tensor(pow(10, decimals))))
round = g.op("Round", mul)
return g.op(
"Mul", round, g.op("Constant", value_t=torch.tensor(pow(10, -1 * decimals)))
)
@_onnx_symbolic("aten::remainder")
@_beartype.beartype
def remainder(g: jit_utils.GraphContext, input, other):
if symbolic_helper._is_fp(input) or symbolic_helper._is_fp(other):
return opset9.remainder(g, input, other)
return g.op("Mod", input, other, fmod_i=0)
@_onnx_symbolic("aten::split")
@symbolic_helper.parse_args("v", "v", "i", "i")
@_beartype.beartype
def split(g: jit_utils.GraphContext, self, split_size_or_sizes, dim, _outputs=None):
if not symbolic_helper._is_split_static(split_size_or_sizes, _outputs):
split_out = g.op("SplitToSequence", self, split_size_or_sizes, axis_i=dim)
if _outputs is None:
return split_out
# Convert to multiple slice nodes iff number of splits and number of outputs are statically known.
if (
symbolic_helper._is_packed_list(split_size_or_sizes)
and len(symbolic_helper._unpack_list(split_size_or_sizes)) == _outputs
):
split_sizes = [
symbolic_helper._unsqueeze_helper(g, v, [0])
for v in symbolic_helper._unpack_list(split_size_or_sizes)
]
start = g.op("Constant", value_t=torch.tensor([0], dtype=torch.long))
axis = g.op("Constant", value_t=torch.tensor([dim], dtype=torch.long))
res = []
for i in range(_outputs):
end = g.op(
"Add", start, split_sizes[i]
) # split_sizes is a list of same length as _outputs
res.append(g.op("Slice", self, start, end, axis))
start = end
return res
return [
g.op(
"SequenceAt",
split_out,
g.op("Constant", value_t=torch.tensor([i], dtype=torch.long)),
)
for i in range(_outputs)
]
else:
return opset9.split(g, self, split_size_or_sizes, dim, _outputs)
@_onnx_symbolic("aten::split_with_sizes")
@symbolic_helper.parse_args("v", "v", "i", "i")
@_beartype.beartype
def split_with_sizes(g: jit_utils.GraphContext, self, split_sizes, dim, _outputs=None):
return split(g, self, split_sizes, dim, _outputs)
@_onnx_symbolic("aten::unbind")
@symbolic_helper.parse_args("v", "i", "i")
@_beartype.beartype
def unbind(g: jit_utils.GraphContext, self, dim=0, _outputs=None):
if _outputs is None:
return g.op(
"SplitToSequence",
self,
g.op("Constant", value_t=torch.tensor(1, dtype=torch.long)),
axis_i=dim,
keepdims_i=0,
)
else:
return opset9.unbind(g, self, dim, _outputs)
@_beartype.beartype
def _prepare_onnx_paddings(g: jit_utils.GraphContext, input, pad):
"""Generate paddings in ONNX order based on pad in pytorch.
Args:
input: the input tensor.
pad: the paddings in pytorch.
The order is dim_n_begin, dim_n_end, dim_n-1_begin, dim_n-1_end, ..., dim_m_begin, dim_m_end,
where m is in range [0, n].
"""
if (
not symbolic_helper._is_packed_list(pad)
and symbolic_helper._is_list(pad)
and symbolic_helper._is_scalar_list(pad)
):
pad = g.op("ConcatFromSequence", pad, axis_i=0, new_axis_i=1)
# The desired order of paddings is
# dim_0_begin, dim_1_begin, ... , dim_0_end, ..., dim_n_end.
# n is the dimension of input.
# Assume zero-dimensions in the beginning, pad the "pad" sequence with zeros in the beginning
pad_len = opset9.size(g, pad, g.op("Constant", value_t=torch.tensor([0])))
# Set extension = [0] * (dim * 2 - len(pad))
rank = symbolic_helper._get_tensor_rank(input)
if rank is None:
rank = g.op("Size", g.op("Shape", input))
else:
rank = g.op("Constant", value_t=torch.tensor(rank, dtype=torch.int64))
extension = g.op(
"Sub",
g.op("Mul", rank, g.op("Constant", value_t=torch.tensor(2, dtype=torch.int64))),
pad_len,
)
# Concat pad with extension: paddings = [dim_n_begin, dim_n_end, dim_n-1_begin, dim_n-1_end, 0, 0, ... ]
# Currently ONNX only supports int64 type for Pad
pad = g.op("Cast", pad, to_i=_C_onnx.TensorProtoDataType.INT64)
paddings = g.op(
"Concat",
pad,
g.op(
"ConstantOfShape", extension, value_t=torch.tensor([0], dtype=torch.int64)
),
axis_i=0,
)
# Reshape and reverse order and collate first beginnings and then ends
# paddings = [[..., 0, dim_n-1_begin, dim_n_begin],
# [..., 0, dim_n-1_end, dim_n_end]]
# Reshape back to 1-D paddings = [..., 0, dim_n - 1_begin, dim_n_begin, ..., 0, dim_n - 1_end, dim_n_end]
paddings = symbolic_helper._reshape_helper(
g, paddings, g.op("Constant", value_t=torch.tensor([-1, 2]))
)
paddings = g.op("Transpose", opset10.flip(g, paddings, [0]), perm_i=[1, 0])
paddings = symbolic_helper._reshape_helper(
g, paddings, g.op("Constant", value_t=torch.tensor([-1]))
)
padding_c = g.op("Cast", paddings, to_i=_C_onnx.TensorProtoDataType.INT64)
return padding_c
@_onnx_symbolic("aten::constant_pad_nd")
@_beartype.beartype
def constant_pad_nd(g: jit_utils.GraphContext, input, padding, value=None):
mode = "constant"
value = symbolic_helper._maybe_get_scalar(value)
value = symbolic_helper._if_scalar_type_as(value, input)
pad = _prepare_onnx_paddings(g, input, padding)
return g.op("Pad", input, pad, value, mode_s=mode)
@_onnx_symbolic("aten::reflection_pad1d")
@_onnx_symbolic("aten::reflection_pad2d")
@_onnx_symbolic("aten::reflection_pad3d")
@_beartype.beartype
def reflection_pad(g: jit_utils.GraphContext, input, padding):
mode = "reflect"
paddings = _prepare_onnx_paddings(g, input, padding)
return g.op("Pad", input, paddings, mode_s=mode)
@_onnx_symbolic("aten::replication_pad1d")
@_onnx_symbolic("aten::replication_pad2d")
@_onnx_symbolic("aten::replication_pad3d")
@_beartype.beartype
def replication_pad(g: jit_utils.GraphContext, input, padding):
mode = "edge"
paddings = _prepare_onnx_paddings(g, input, padding)
return g.op("Pad", input, paddings, mode_s=mode)
@_onnx_symbolic("aten::pad")
@_beartype.beartype
def pad(
g: jit_utils.GraphContext,
input: _C.Value,
pad: _C.Value,
mode: _C.Value,
value: _C.Value,
):
mode = symbolic_helper._parse_arg(mode, "s")
if mode == "replicate":
return replication_pad(g, input, pad)
elif mode == "reflect":
return reflection_pad(g, input, pad)
elif mode == "constant":
return constant_pad_nd(g, input, pad, value)
elif mode == "circular":
return opset9._pad_circular(g, input, pad)
else:
raise errors.SymbolicValueError(f"Unrecognized padding mode {mode}", input)
@_onnx_symbolic("aten::linalg_det")
@_beartype.beartype
def linalg_det(g: jit_utils.GraphContext, self):
return g.op("Det", self)
@_onnx_symbolic("aten::logdet")
@_beartype.beartype
def logdet(g: jit_utils.GraphContext, input):
return opset9.log(g, linalg_det(g, input))
@_onnx_symbolic("aten::arange")
@_beartype.beartype
def arange(g: jit_utils.GraphContext, *args):
def _get_arange_dtype(dtype):
dtype = symbolic_helper._maybe_get_const(dtype, "i")
return dtype
if len(args) == 2 and all(isinstance(val, int) for val in args):
# aten::arange(Scalar start, Scalar end)
dtype = torch.int64
# Start index.
start = g.op(
"Constant",
value_t=torch.tensor(args[0], dtype=dtype),
)
# End (exclusive) index.
end = g.op(
"Constant",
value_t=torch.tensor(args[1], dtype=dtype),
)
# Step size from start to end indexes.
delta_default = g.op(
"Constant",
value_t=torch.tensor(1, dtype=dtype),
)
return g.op("Range", start, end, delta_default)
elif len(args) == 2 or len(args) == 5:
if len(args) == 2:
# aten::arange(Scalar end, Tensor out)
dtype = None
else:
# aten::arange(Scalar end, ScalarType dtype, Layout, Device, bool pin_memory)
dtype = _get_arange_dtype(args[1])
type_, end, start, step = symbolic_helper._arange_cast_helper(
g, end=args[0], dtype=dtype
)
start_default = g.op(
"Constant",
value_t=torch.tensor(0, dtype=type_.dtype()),
)
delta_default = g.op(
"Constant",
value_t=torch.tensor(1, dtype=type_.dtype()),
)
return g.op("Range", start_default, end, delta_default)
elif len(args) == 4 or len(args) == 7:
if len(args) == 4:
# aten::arange(Scalar start, Scalar end, Scalar step, Tensor out)
dtype = None
else:
# aten::arange(Scalar start, Scalar end, Scalar step, ScalarType dtype, Layout, Device, bool pin_memory)
dtype = _get_arange_dtype(args[3])
_, end, start, step = symbolic_helper._arange_cast_helper(
g, start=args[0], end=args[1], step=args[2], dtype=dtype
)
return g.op("Range", start, end, step)
elif len(args) == 6:
# aten::arange(Scalar start, Scalar end, ScalarType dtype, Layout, Device, bool pin_memory)
dtype = _get_arange_dtype(args[2])
type_, end, start, step = symbolic_helper._arange_cast_helper(
g, start=args[0], end=args[1], dtype=dtype
)
delta_default = g.op(
"Constant",
value_t=torch.tensor(1, dtype=type_.dtype()),
)
return g.op("Range", start, end, delta_default)
else:
return symbolic_helper._unimplemented(
"aten::arange", f"with {len(args)} arguments"
)
@_onnx_symbolic("aten::_dim_arange")
@symbolic_helper.parse_args("v", "i")
@_beartype.beartype
def _dim_arange(g: jit_utils.GraphContext, like, dim):
like_shape = g.op("Shape", like)
stop = g.op(
"Gather", like_shape, g.op("Constant", value_t=torch.tensor(dim)), axis_i=0
)
if symbolic_helper.is_caffe2_aten_fallback():
return g.op("_caffe2::Range", stop)
return arange(g, stop, 4, None, None, None)
@_onnx_symbolic("aten::size")
@symbolic_helper.quantized_args(True, quantize_output=False)
@_beartype.beartype
def size(g: jit_utils.GraphContext, self, dim=None):
if dim is None:
return g.op("Shape", self)
return symbolic_helper._size_helper(g, self, dim)
@_onnx_symbolic("aten::squeeze")
@_beartype.beartype
def squeeze(g: jit_utils.GraphContext, self, dim=None):
if dim is None:
return g.op("Squeeze", self)
# dim as a tensor
if not symbolic_helper._is_constant(dim):
return symbolic_helper._squeeze_helper(g, self, [dim])
dim = symbolic_helper._get_const(dim, "i", "dim")
input_rank = symbolic_helper._get_tensor_rank(self)
adjusted_dim = dim
if input_rank is not None and dim < 0:
adjusted_dim += input_rank
dim_size = symbolic_helper._get_tensor_dim_size(self, adjusted_dim)
if (dim < 0 and input_rank is None) or dim_size is None:
# If onnx shape inference is not on, export always as dynamic.
# Because we cannot tell if observed static shape is also static at runtime.
# create "cond" node (condition is shape[i]==1)
dim_constant = g.op("Constant", value_t=torch.tensor([dim]))
size = symbolic_helper._size_helper(g, self, dim_constant)
const_one = g.op("Constant", value_t=torch.ones(1, dtype=torch.int64))
cond = g.op("Equal", size, const_one)
# create the "If" node and add the "then" and "else" blocks to it.
if_op, (if_context, else_context), _ = jit_utils.add_op_with_blocks(
g, "If", cond, n_blocks=2
)
squeeze_ = symbolic_helper._squeeze_helper(if_context, self, [dim])
utils._add_output_to_block(if_context.block, squeeze_)
identity_ = else_context.op("Identity", self)
utils._add_output_to_block(else_context.block, identity_)
return if_op
# For static input shape
dim = adjusted_dim
if dim_size > 1:
warnings.warn(
"This model contains a squeeze operation on dimension "
+ str(dim)
+ ". The size of "
+ "this dimension in the given input is "
+ str(dim_size)
+ ". The model will "
+ "be exported without the squeeze node. If the model is intended to be used with dynamic "
+ "input shapes, please export with dynamic_axes argument."
)
return self
return symbolic_helper._squeeze_helper(g, self, [dim])
@_onnx_symbolic("aten::unsqueeze")
@_beartype.beartype
def unsqueeze(g: jit_utils.GraphContext, self, dim):
if symbolic_helper._is_constant(dim):
dim = symbolic_helper._get_const(dim, "i", "dim")
return symbolic_helper._unsqueeze_helper(g, self, [dim])
@_onnx_symbolic("aten::mm")
@_beartype.beartype
def mm(g: jit_utils.GraphContext, self, other):
return g.op("Gemm", self, other, beta_f=0.0, alpha_f=1.0)
@_onnx_symbolic("aten::index")
@_beartype.beartype
def index(g: jit_utils.GraphContext, self, index):
if symbolic_helper.is_caffe2_aten_fallback():
return g.at("index", self, index, overload_name="Tensor")
if symbolic_helper._is_packed_list(index):
indices = symbolic_helper._unpack_list(index)
else:
indices = [index]
# Handle single mask index.
if len(indices) == 1:
index = indices[0]
if not symbolic_helper._is_none(index) and (
symbolic_helper._is_bool(index)
or _type_utils.JitScalarType.from_value(index)
== _type_utils.JitScalarType.UINT8
):
index = opset9.nonzero(g, index)
return g.op("GatherND", self, index)
return opset9.index(g, self, index)
@_onnx_symbolic("aten::index_fill")
@_beartype.beartype
def index_fill(g: jit_utils.GraphContext, self, dim, index, value):
dim_value = symbolic_helper._parse_arg(dim, "i")
if symbolic_helper.is_caffe2_aten_fallback():
return g.at(
"index_fill",
self,
index,
value,
overload_name="int_Scalar",
dim_i=dim_value,
)
expanded_index_shape, expanded_index = symbolic_helper._index_fill_reshape_helper(
g, self, dim, index
)
value = symbolic_helper._maybe_get_scalar(value)
value = symbolic_helper._if_scalar_type_as(value, self)
expanded_value = opset9.expand(g, value, expanded_index_shape, None)
return scatter(g, self, dim, expanded_index, expanded_value)
@_onnx_symbolic("aten::index_copy")
@_beartype.beartype
def index_copy(g: jit_utils.GraphContext, self, dim, index, source):
dim_value = symbolic_helper._parse_arg(dim, "i")
if symbolic_helper.is_caffe2_aten_fallback():
return g.at("index_copy", self, index, source, dim_i=dim_value)
expanded_index_shape, expanded_index = symbolic_helper._index_fill_reshape_helper(
g, self, dim, index
)
return scatter(g, self, dim, expanded_index, source)
@_onnx_symbolic("aten::__rshift_")
@_beartype.beartype
def __rshift_(g: jit_utils.GraphContext, self, other):
# make sure to cast other to self's type
# (when self is long, make sure that other is not float)
if _type_utils.JitScalarType.from_value(
other, _type_utils.JitScalarType.UNDEFINED
) != _type_utils.JitScalarType.from_value(self):
other = g.op(
"Cast",
other,
to_i=_type_utils.JitScalarType.from_value(self).onnx_type(),
)
if (
_type_utils.JitScalarType.from_value(self, _type_utils.JitScalarType.UNDEFINED)
== _type_utils.JitScalarType.UINT8
):
return g.op("BitShift", self, other, direction_s="RIGHT")
two = g.op("Constant", value_t=torch.tensor(2, dtype=torch.float32))
# exponent (same type as self) has to be float or double in onnx::Pow
if not symbolic_helper._is_fp(self):
other = g.op("Cast", other, to_i=_C_onnx.TensorProtoDataType.FLOAT)
two_pow = g.op("Pow", two, other)
two_pow = g.op(
"Cast",
two_pow,
to_i=_type_utils.JitScalarType.from_value(self).onnx_type(),
)
rshift = g.op("Div", self, two_pow)
return rshift
@_onnx_symbolic("aten::__lshift_")
@_beartype.beartype
def __lshift_(g: jit_utils.GraphContext, self, other):
# make sure to cast other to self's type
# (when self is long, make sure that other is not float)
if _type_utils.JitScalarType.from_value(
other, _type_utils.JitScalarType.UNDEFINED
) != _type_utils.JitScalarType.from_value(self):
other = g.op(
"Cast",
other,
to_i=_type_utils.JitScalarType.from_value(self).onnx_type(),
)
if (
_type_utils.JitScalarType.from_value(self, _type_utils.JitScalarType.UNDEFINED)
== _type_utils.JitScalarType.UINT8
):
return g.op("BitShift", self, other, direction_s="LEFT")
two = g.op("Constant", value_t=torch.tensor(2, dtype=torch.float32))
# exponent (same type as self) has to be float or double in onnx::Pow
if not symbolic_helper._is_fp(self):
other = g.op("Cast", other, to_i=_C_onnx.TensorProtoDataType.FLOAT)
two_pow = g.op("Pow", two, other)
two_pow = g.op(
"Cast",
two_pow,
to_i=_type_utils.JitScalarType.from_value(self).onnx_type(),
)
lshift = g.op("Mul", self, two_pow)
return lshift
@_beartype.beartype
def _get_im2col_indices_along_dim(
g: jit_utils.GraphContext, input_d, kernel_size_d, dilation_d, padding_d, stride_d
):
# Input is always 4-D (N, C, H, W)
# Calculate indices of sliding blocks along spatial dimension
# Slide kernel over input each dim d:
# each dimension d ranges from 0 to input[d]+2xpadding[d]-dilation[d]x(kernel_size[d]-1)
# with steps = stride
blocks_d = g.op(
"Add", input_d, g.op("Constant", value_t=torch.tensor(padding_d * 2))
)
blocks_d = g.op(
"Sub",
blocks_d,
g.op("Constant", value_t=torch.tensor(dilation_d * (kernel_size_d - 1))),
)
# Stride kernel over input and find starting indices along dim d
blocks_d_indices = g.op(
"Range",
g.op("Constant", value_t=torch.tensor(0)),
blocks_d,
g.op("Constant", value_t=torch.tensor(stride_d)),
)
# Apply dilation on kernel and find its indices along dim d
kernel_grid = torch.arange(0, kernel_size_d * dilation_d, dilation_d)
kernel_grid = g.op("Constant", value_t=kernel_grid.unsqueeze(0))
# Broadcast and add kernel staring positions (indices) with
# kernel_grid along dim d, to get block indices along dim d
blocks_d_indices = symbolic_helper._unsqueeze_helper(
g, blocks_d_indices, [0]
) # Reshape to [1, -1]
kernel_mask = symbolic_helper._reshape_helper(
g, kernel_grid, g.op("Constant", value_t=torch.tensor([-1, 1]))
)
block_mask = g.op("Add", blocks_d_indices, kernel_mask)
return block_mask
@_beartype.beartype
def _get_im2col_padded_input(g: jit_utils.GraphContext, input, padding_h, padding_w):
# Input is always 4-D tensor (N, C, H, W)
# Padding tensor has the following format: (padding_h, padding_w)
# Reshape the padding to follow ONNX format: (dim1_begin, dim2_begin,...,dim1_end, dim2_end,...)
pad = g.op("Constant", value_t=torch.LongTensor([0, 0, padding_h, padding_w] * 2))
return g.op("Pad", input, pad)
@_beartype.beartype
def _get_im2col_output_shape(g: jit_utils.GraphContext, input, kernel_h, kernel_w):
batch_dim = size(g, input, g.op("Constant", value_t=torch.tensor(0)))
channel_dim = size(g, input, g.op("Constant", value_t=torch.tensor(1)))
channel_unfolded = g.op(
"Mul", channel_dim, g.op("Constant", value_t=torch.tensor(kernel_h * kernel_w))
)
return g.op(
"Concat",
symbolic_helper._unsqueeze_helper(g, batch_dim, [0]),
symbolic_helper._unsqueeze_helper(g, channel_unfolded, [0]),
g.op("Constant", value_t=torch.tensor([-1])),
axis_i=0,
)
@_onnx_symbolic("aten::im2col")
@symbolic_helper.parse_args("v", "is", "is", "is", "is")
@_beartype.beartype
def im2col(g: jit_utils.GraphContext, input, kernel_size, dilation, padding, stride):
# Input is always 4-D tensor (N, C, H, W)
# All other args are int[2]
input_h = size(g, input, g.op("Constant", value_t=torch.tensor(2)))
input_w = size(g, input, g.op("Constant", value_t=torch.tensor(3)))
stride_h, stride_w = stride[0], stride[1]
padding_h, padding_w = padding[0], padding[1]
dilation_h, dilation_w = dilation[0], dilation[1]
kernel_h, kernel_w = kernel_size[0], kernel_size[1]
blocks_row_indices = _get_im2col_indices_along_dim(
g, input_h, kernel_h, dilation_h, padding_h, stride_h
)
blocks_col_indices = _get_im2col_indices_along_dim(
g, input_w, kernel_w, dilation_w, padding_w, stride_w
)
output_shape = _get_im2col_output_shape(g, input, kernel_h, kernel_w)
padded_input = _get_im2col_padded_input(g, input, padding_h, padding_w)
# For a 4D matrix of size (1, 1, 3, 3) as below with kernel_size=2, stride=1, and dilation=1
# [[[[1., 2., 3.,],
# [4., 5., 6.,],
# [7., 8., 9.,]]]]
# First gather indices along rows (dim=2) with blocks_row_indices = [[0,1], [1,2]] to get:
# [[[[[1., 2., 3.],
# [4., 5., 6.]],
# [[4., 5., 6.],
# [7., 8., 9.]]]]]
# And then gather along cols (dim=4) with blocks_row_indices = [[0,1], [1,2]] to get:
# [[[[[[1., 2.],
# [4., 5.]],
# [[2., 3.],
# [5., 6]]],
# [[[4., 5.],
# [7., 8.]],
# [[5., 6.],
# [8., 9.]]]]]]
# Transpose dims 3 (depth) and 4 (rows), and then reshape to output shape (1, 1, 4, 4) to get:
# [[[1., 2., 4., 5.],
# [2., 3., 5., 6.],
# [4., 5., 7., 8.],
# [5., 6., 8., 9.]]]
output = g.op("Gather", padded_input, blocks_row_indices, axis_i=2)
output = g.op("Gather", output, blocks_col_indices, axis_i=4)
output = g.op("Transpose", output, perm_i=[0, 1, 2, 4, 3, 5])
return symbolic_helper._reshape_helper(g, output, output_shape)
@_onnx_symbolic("aten::narrow")
@_beartype.beartype
def narrow(g: jit_utils.GraphContext, input, dim, start, length):
end = g.op("Add", start, length)
return symbolic_helper._slice_helper(g, input, axes=dim, starts=start, ends=end)
@_onnx_symbolic("aten::flatten")
@symbolic_helper.quantized_args(True, False, False)
@symbolic_helper.parse_args("v", "i", "i")
@_beartype.beartype
def flatten(g: jit_utils.GraphContext, input, start_dim, end_dim):
dim = symbolic_helper._get_tensor_rank(input)
if dim == 1:
return input
# use ONNX's Flatten operator for cases where the output shape is 2D
if start_dim == 1:
if end_dim == -1 or (dim is not None and end_dim == dim - 1):
return g.op("Flatten", input, axis_i=start_dim)
elif start_dim == 0:
if end_dim == -2 or (dim is not None and end_dim == dim - 2):
return g.op("Flatten", input, axis_i=end_dim + 1)
if dim is None:
return symbolic_helper._unimplemented(
"dim",
"ONNX and PyTorch use different strategies to split the input. "
"Input rank must be known at export time.",
)
# if end_dim is negative add dim
if end_dim < 0:
end_dim = dim + end_dim
return symbolic_helper._flatten_helper(g, input, start_dim, end_dim, dim)
@_onnx_symbolic("aten::linalg_vector_norm")
@symbolic_helper.parse_args("v", "f", "is", "b", "v")
@_beartype.beartype
def linalg_vector_norm(
g: jit_utils.GraphContext,
self,
ord,
dim: Optional[Sequence[int]],
keepdim: bool,
dtype,
):
if ord == 0:
if dim is None:
self = symbolic_helper._reshape_helper(
g, self, g.op("Constant", value_t=torch.tensor([-1], dtype=torch.int64))
)
keepdim = False
cond_op = g.op(
"Not", g.op("Equal", self, g.op("Constant", value_t=torch.LongTensor([0])))
)
cond_op = g.op(
"Cast",
cond_op,
to_i=_type_utils.JitScalarType.from_value(self).onnx_type(),
)
return symbolic_helper._reducesum_helper(
g, cond_op, axes_i=dim, keepdims_i=keepdim
)
else:
return opset9.linalg_vector_norm(g, self, ord, dim, keepdim, dtype)
@_onnx_symbolic("aten::embedding_bag")
@symbolic_helper.parse_args("v", "v", "v", "i", "i", "i", "v", "i", "i")
@_beartype.beartype
def embedding_bag(
g: jit_utils.GraphContext,
embedding_matrix,
indices,
offsets,
scale_grad_by_freq,
mode,
sparse,
per_sample_weights,
include_last_offset,
padding_idx,
):
if scale_grad_by_freq and GLOBALS.export_training:
return symbolic_helper._onnx_unsupported(
"embedding_bag with scale_grad_by_freq for training mode"
)
if padding_idx is not None and padding_idx >= 0:
raise RuntimeError("embedding_bag with padding_idx")
loop_condition = g.op("Constant", value_t=torch.tensor(1))
loop_condition = g.op("Cast", loop_condition, to_i=_C_onnx.TensorProtoDataType.BOOL)
zero = g.op("Constant", value_t=torch.tensor([0]))
indices_len = symbolic_helper._unsqueeze_helper(
g,
symbolic_helper._size_helper(
g, indices, g.op("Constant", value_t=torch.tensor(0))
),
[0],
)
if not include_last_offset:
offsets = [offsets, indices_len]
offsets = g.op("Concat", *offsets, axis_i=0)
# Offsets holds the starting index position of each bag. So we create a list of the indices slices (determined by
# offsets) and gather those indices in indices_row. Then we use this subset of indices to gather from embeddings.
# The embeddings output is a loop scan output, so we can avoid creating a sequence and inserting elements in.
offsets_starts = symbolic_helper._slice_helper(
g, offsets, axes=[0], starts=[0], ends=[sys.maxsize], steps=[1]
)
offsets_ends = symbolic_helper._slice_helper(
g, offsets, axes=[0], starts=[1], ends=[sys.maxsize], steps=[1]
)
loop_len = symbolic_helper._size_helper(
g, offsets_ends, g.op("Constant", value_t=torch.tensor(0))
)
loop, (loop_context,), _ = jit_utils.add_op_with_blocks(
g, "Loop", loop_len, loop_condition, n_blocks=1
)
loop_block = loop_context.block
# FIXME(justinchuby): We need to handle what happens when we call b.op on a node return
block_input_iter = utils._add_input_to_block(loop_block)
cond = utils._add_input_to_block(loop_block)
indices_start = loop_context.op(
"Gather", offsets_starts, block_input_iter, axis_i=0
)
indices_end = loop_context.op("Gather", offsets_ends, block_input_iter, axis_i=0)
indices_start = symbolic_helper._unsqueeze_helper(loop_context, indices_start, [0])
indices_end = symbolic_helper._unsqueeze_helper(loop_context, indices_end, [0])
indices_row = loop_context.op("Slice", indices, indices_start, indices_end, zero)
embeddings = loop_context.op("Gather", embedding_matrix, indices_row, axis_i=0)
if not symbolic_helper._is_none(per_sample_weights):
per_sample_weights_row = loop_context.op(
"Slice", per_sample_weights, indices_start, indices_end, zero
)
per_sample_weights_row = symbolic_helper._unsqueeze_helper(
loop_context, per_sample_weights_row, [1]
)
embeddings = loop_context.op("Mul", embeddings, per_sample_weights_row)
if mode == 0:
embeddings = symbolic_helper._reducesum_helper(
loop_context, embeddings, axes_i=[0], keepdims_i=0
)
elif mode == 1:
embeddings = loop_context.op("ReduceMean", embeddings, axes_i=[0], keepdims_i=0)
else:
embeddings = loop_context.op("ReduceMax", embeddings, axes_i=[0], keepdims_i=0)
cond_out = loop_context.op(
"Cast", loop_condition, to_i=_C_onnx.TensorProtoDataType.BOOL
)
utils._add_output_to_block(loop_block, cond_out)
utils._add_output_to_block(loop_block, embeddings)
# aten::embedding_bag returns a tuple of 4 elements: output, offset2bag, bag_size, max_indices.
# But the last three outputs are not used in torch.nn.EmbeddingBag or torch.nn.functional.embedding_bag.
return loop.node().output(), None, None, None
@_onnx_symbolic("aten::embedding_renorm")
@symbolic_helper.parse_args("v", "v", "f", "f")
@_beartype.beartype
def embedding_renorm(g: jit_utils.GraphContext, weight, indices, max_norm, norm_type):
unique_indices = g.op("Unique", indices)
partial_weight = g.op("Gather", weight, unique_indices)
norm_i = int(norm_type)
if norm_i == 1:
norm_type = "ReduceL1"
elif norm_i == 2:
norm_type = "ReduceL2"
else:
raise errors.SymbolicValueError(
f"Unsupported: ONNX export of embedding_renorm with norm: {norm_i}. "
"Only 1. and 2. are supported.",
weight,
)
partial_weight_norm = g.op(norm_type, partial_weight, axes_i=[1], keepdims_i=1)
# https://github.com/pytorch/pytorch/blob/0a07488ed2c47765e337e290bd138c0e6e459cbd/aten/src/ATen/native/Embedding.cpp#L177
# Add 1e-7 to prevent division by zero.
partial_weight_norm_ = g.op(
"Add", partial_weight_norm, g.op("Constant", value_t=torch.tensor(1e-7))
)
max_norm = torch.tensor(max_norm)
scales = g.op("Div", max_norm, partial_weight_norm_)
partial_weight_renorm = g.op("Mul", partial_weight, scales)
partial_weight_renorm = g.op(
"Where",
g.op("Greater", partial_weight_norm, max_norm),
partial_weight_renorm,
partial_weight,
)
return g.op(
"ScatterND",
weight,
symbolic_helper._unsqueeze_helper(g, unique_indices, [1]),
partial_weight_renorm,
)
@_onnx_symbolic("aten::chunk")
@_beartype.beartype
def chunk(g: jit_utils.GraphContext, self, chunks, dim):
# Calculate chunk size for dynamic chunk
dim_size = g.op("Gather", g.op("Shape", self), dim, axis_i=0)
chunk_size_s = g.op(
"Sub", chunks, g.op("Constant", value_t=torch.tensor([1], dtype=torch.long))
)
chunk_size = g.op("Div", g.op("Add", dim_size, chunk_size_s), chunks)
# Create splits vector
chunk_vec = [
opset9.expand(g, chunk_size, chunk_size_s, None),
g.op("Sub", dim_size, g.op("Mul", chunk_size, chunk_size_s)),
]
chunk_vec = g.op("Concat", *chunk_vec, axis_i=0)
return split(g, self, chunk_vec, dim)
@_onnx_symbolic("aten::normal")
@_beartype.beartype
def normal(
g: jit_utils.GraphContext,
mean,
std,
sizes=None,
generator=None,
dtype=None,
layout=None,
device=None,
pin_memory=None,
):
# If you can sample from a given distribution with mean 0 and variance 1, then you can easily sample from a
# scale-location transformation of that distribution, which has mean μ and variance σ's square. If x is a sample
# from a mean 0 and variance 1 distribution then
# σx+μ
# is a sample with mean μ and variance σ's square.
if sizes is not None and not symbolic_helper._is_none(sizes):
mean = opset9.expand(g, mean, sizes, None)
result = opset9.mul(g, std, g.op("RandomNormalLike", mean))
return add(g, result, mean)
@_onnx_symbolic("aten::atleast_1d")
@_beartype.beartype
def atleast_1d(g: jit_utils.GraphContext, self: torch._C.Value):
# NOTE: If it's 0D, reshape to 1D
# NOTE: self could be a packed list or a tensor
if symbolic_helper._is_value(self) and symbolic_helper._is_packed_list(self):
tensor_list = symbolic_helper._unpack_list(self)
new_tensor_list = []
for tensor in tensor_list:
new_tensor = tensor
tensor_rank = symbolic_helper._get_tensor_rank(tensor)
if tensor_rank == 0:
new_tensor = symbolic_helper._reshape_helper(
g, new_tensor, g.op("Constant", value_t=torch.tensor([1]))
)
new_tensor_list.append(new_tensor)
return g.op("SequenceConstruct", *new_tensor_list)
tensor_rank = symbolic_helper._get_tensor_rank(self)
if tensor_rank == 0:
self = symbolic_helper._reshape_helper(
g, self, g.op("Constant", value_t=torch.tensor([1]))
)
return self
@_onnx_symbolic("aten::atleast_2d")
@_beartype.beartype
def atleast_2d(g: jit_utils.GraphContext, self: torch._C.Value):
# NOTE: If it's 0D, reshape to 2D
# If it's 1D, unsqueeze to 2D
# NOTE: self could be a packed list or a tensor
if symbolic_helper._is_value(self) and symbolic_helper._is_packed_list(self):
tensor_list = symbolic_helper._unpack_list(self)
new_tensor_list = []
for tensor in tensor_list:
new_tensor = tensor
tensor_rank = symbolic_helper._get_tensor_rank(tensor)
if tensor_rank == 0:
new_tensor = symbolic_helper._reshape_helper(
g, new_tensor, g.op("Constant", value_t=torch.tensor([1, 1]))
)
elif tensor_rank == 1:
new_tensor = symbolic_helper._unsqueeze_helper(
g, new_tensor, axes_i=[0]
)
new_tensor_list.append(new_tensor)
return g.op("SequenceConstruct", *new_tensor_list)
tensor_rank = symbolic_helper._get_tensor_rank(self)
if tensor_rank == 0:
self = symbolic_helper._reshape_helper(
g, self, g.op("Constant", value_t=torch.tensor([1, 1]))
)
elif tensor_rank == 1:
self = symbolic_helper._unsqueeze_helper(g, self, axes_i=[0])
return self
@_onnx_symbolic("aten::atleast_3d")
@_beartype.beartype
def atleast_3d(g: jit_utils.GraphContext, self: torch._C.Value):
# NOTE: If it's 0D, reshape to 3D
# If it's 1D, unsqueeze to 3D
# If it's 2D, unsqueeze to 3D
# NOTE: self could be a packed list or a tensor
if symbolic_helper._is_value(self) and symbolic_helper._is_packed_list(self):
tensor_list = symbolic_helper._unpack_list(self)
new_tensor_list = []
for tensor in tensor_list:
new_tensor = tensor
tensor_rank = symbolic_helper._get_tensor_rank(tensor)
if tensor_rank == 0:
new_tensor = symbolic_helper._reshape_helper(
g, new_tensor, g.op("Constant", value_t=torch.tensor([1, 1, 1]))
)
elif tensor_rank == 1:
new_tensor = symbolic_helper._unsqueeze_helper(
g, new_tensor, axes_i=[0]
)
new_tensor = symbolic_helper._unsqueeze_helper(
g, new_tensor, axes_i=[-1]
)
elif tensor_rank == 2:
new_tensor = symbolic_helper._unsqueeze_helper(
g, new_tensor, axes_i=[-1]
)
new_tensor_list.append(new_tensor)
return g.op("SequenceConstruct", *new_tensor_list)
tensor_rank = symbolic_helper._get_tensor_rank(self)
if tensor_rank == 0:
self = symbolic_helper._reshape_helper(
g, self, g.op("Constant", value_t=torch.tensor([1, 1, 1]))
)
elif tensor_rank == 1:
self = symbolic_helper._unsqueeze_helper(g, self, axes_i=[0])
self = symbolic_helper._unsqueeze_helper(g, self, axes_i=[-1])
elif tensor_rank == 2:
self = symbolic_helper._unsqueeze_helper(g, self, axes_i=[-1])
return self
@_onnx_symbolic("prim::ConstantChunk")
@_beartype.beartype
def prim_constant_chunk(g: jit_utils.GraphContext, self, chunks, dim):
input_shape = g.op("Shape", self)
axis = g.op("Constant", value_t=torch.tensor([dim], dtype=torch.long))
input_shape_dim = g.op("Gather", input_shape, axis, axis_i=0)
start = g.op("Constant", value_t=torch.tensor([0], dtype=torch.long))
chunk_size = g.op("Constant", value_t=torch.tensor([chunks], dtype=torch.long))
chunk_size_minus_1 = g.op(
"Constant", value_t=torch.tensor([chunks - 1], dtype=torch.long)
)
input_shape_dim_shift = g.op("Add", input_shape_dim, chunk_size_minus_1)
chunk_dim = g.op("Div", input_shape_dim_shift, chunk_size)
res = []
for i in range(chunks):
index = g.op("Constant", value_t=torch.tensor([i + 1], dtype=torch.long))
end = g.op("Mul", chunk_dim, index)
res.append(g.op("Slice", self, start, end, axis))
start = end
return res
@_onnx_symbolic("aten::hstack")
@_beartype.beartype
def hstack(g: jit_utils.GraphContext, tensor_list: _C.Value):
tensor_list = atleast_1d(g, tensor_list)
first_tensor = g.op(
"SequenceAt",
tensor_list,
g.op("Constant", value_t=torch.tensor(0, dtype=torch.long)),
)
first_tensor_shape = g.op("Shape", first_tensor)
first_tensor_dim = g.op("Size", first_tensor_shape)
const_one = g.op("Constant", value_t=torch.tensor(1, dtype=torch.long))
equal_to_one = g.op("Equal", first_tensor_dim, const_one)
(
if_op_greater,
(if_context_equal, else_context_equal),
_,
) = jit_utils.add_op_with_blocks(g, "If", equal_to_one, n_blocks=2, outputs=1)
result_if = if_context_equal.op(
"ConcatFromSequence", tensor_list, axis_i=0, new_axis_i=0
)
utils._add_output_to_block(if_context_equal.block, result_if)
result_else = else_context_equal.op(
"ConcatFromSequence", tensor_list, axis_i=1, new_axis_i=0
)
utils._add_output_to_block(else_context_equal.block, result_else)
result = if_op_greater.node().output()
return result
@_onnx_symbolic("aten::vstack")
@_beartype.beartype
def vstack(g: jit_utils.GraphContext, tensor_list: _C.Value):
tensor_list = atleast_2d(g, tensor_list)
return g.op("ConcatFromSequence", tensor_list, axis_i=0, new_axis_i=0)