Source code for augpy._augpy

"""Python bindings for image processing CUDA functions"""


from typing import Tuple
from typing import List


class pybind11_object:
    pass


class buffer:
    """Any object that supports the buffer interface, like bytearray or numpy.ndarray."""
    pass


class capsule:
    """Python object that contains a reference to a C object."""
    pass


class CuRandError(Exception):
    pass


[docs]class DLDataTypeCode:
    """
            :ref:`cpp/tensor:dlpack` type code enum.


Members:

  kDLInt :
            Signed integer.


  kDLUInt :
            Unsigned integer.


  kDLFloat :
            Floating point number.
            """
    pass
[docs]    def __init__(self: 'DLDataTypeCode', arg0: int) -> None:
        """__init__(self: augpy._augpy.'DLDataTypeCode', arg0: int) -> None
"""
        pass
    @property
    def kDLInt(self):
        """
            :ref:`cpp/tensor:dlpack` type code enum.


Members:

  kDLInt :
            Signed integer.


  kDLUInt :
            Unsigned integer.


  kDLFloat :
            Floating point number.
            """
        pass
    @property
    def kDLUInt(self):
        """
            :ref:`cpp/tensor:dlpack` type code enum.


Members:

  kDLInt :
            Signed integer.


  kDLUInt :
            Unsigned integer.


  kDLFloat :
            Floating point number.
            """
        pass
    @property
    def kDLFloat(self):
        """
            :ref:`cpp/tensor:dlpack` type code enum.


Members:

  kDLInt :
            Signed integer.


  kDLUInt :
            Unsigned integer.


  kDLFloat :
            Floating point number.
            """
        pass


kDLInt = DLDataTypeCode(0)


kDLFloat = DLDataTypeCode(2)


[docs]class DLDataType(pybind11_object):
    """
        :ref:`cpp/tensor:dlpack` data type for :py:class:`CudaTensors <CudaTensor>`.

        Parameters:
            code: See :py:class:`DLDataTypeCode`
            bits: Number of bits
            lanes: Number of elements for vector types;
                must be 1 to use with :py:class:`CudaTensor`
        """
    pass

[docs]    def __init__(self: 'DLDataType', code: int, bits: int, lanes: int = 1) -> None:
        """__init__(self: augpy._augpy.'DLDataType', code: int, bits: int, lanes: int = 1) -> None
"""
        pass

    @property
    def bits(self):
        """
            Number of bits.
            """
        pass

    @property
    def code(self):
        """
            See :py:class:`'DLDataType'Code`.
            """
        pass

    @property
    def itemsize(self):
        """
            Number of bytes per element with this data type.
            """
        pass

    @property
    def lanes(self):
        """
            Mumber of elements for vector types.
            Must be 1 to use with :py:class:`CudaTensor`.
            """
        pass


kDLUInt = DLDataTypeCode(1)


[docs]class CudaDevice(pybind11_object):
    """
        Create a new CudaDevice with the given Cuda device ID.
        0 is the default and typically fastest device in the system.

        Parameters:
            device_id: GPU device ID
        """
    pass

[docs]    def __init__(self: 'CudaDevice', device_id: int) -> None:
        """__init__(self: augpy._augpy.'CudaDevice', device_id: int) -> None
"""
        pass

[docs]    def activate(self: 'CudaDevice') -> None:
        """activate(self: augpy._augpy.'CudaDevice') -> None


            Make this the :ref:`py/core:current_stream`
            and remember the previous stream.
            
"""
        pass

[docs]    def deactivate(self: 'CudaDevice') -> None:
        """deactivate(self: augpy._augpy.'CudaDevice') -> None


            Make the previous stream the :ref:`py/core:current_stream`.
            
"""
        pass

[docs]    def get_device(self: 'CudaDevice') -> int:
        """get_device(self: augpy._augpy.'CudaDevice') -> int


            Return the device ID.
            
"""
        pass

[docs]    def get_properties(self: 'CudaDevice') -> 'CudaDeviceProp':
        """get_properties(self: augpy._augpy.'CudaDevice') -> augpy._augpy.'CudaDevice'Prop


            Return the device properties,
            see  :ref:`py/core:get_device_properties` for more detials.
            
"""
        pass

[docs]    def synchronize(self: 'CudaDevice') -> None:
        """synchronize(self: augpy._augpy.'CudaDevice') -> None


            Block until all work on this device has finished.
            Cuda uses busy waiting to achieve this.
            See synchronization method of
            :ref:`py/core:CudaStream` or :ref:`py/core:CudaEvent`
            to avoid the CPU load this incurs.
            
"""
        pass


[docs]class CudaDeviceProp(pybind11_object):
    """
        The `cudaDeviceProp <https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaDeviceProp.html>`_
        struct extended with stream priority fields
        :py:attr:`leastStreamPriority` and :py:attr:`greatestStreamPriority`,
        :py:attr:`coresPerMultiprocessor`, and :py:attr:`maxGridSize`.
        """
    pass

[docs]    def __init__(self):
        """Initialize self.  See help(type(self)) for accurate signature."""
        pass

    @property
    def coresPerMultiprocessor(self):
        """
            Number of Cuda cores per multiprocessor
            """
        pass

    @property
    def coresPerSM(self):
        """
            Number of Cuda cores per SM.
            """
        pass

    @property
    def greatestStreamPriority(self):
        """
            Highest priority a Cuda stream on this device can have.
            """
        pass

    @property
    def l2CacheSize(self):
        """
            Size of L2 cache in bytes
            """
        pass

    @property
    def leastStreamPriority(self):
        """
            Lowest priority a Cuda stream on this device can have.
            """
        pass

    @property
    def major(self):
        """
            Major compute capability
            """
        pass

    @property
    def maxGridSize(self):
        """
            Max number of blocks in each grid dimension
            """
        pass

    @property
    def maxThreadsDim(self):
        """
            Maximum size of each dimension of a block
            """
        pass

    @property
    def maxThreadsPerBlock(self):
        """
            Maximum number of threads per block
            """
        pass

    @property
    def maxThreadsPerMultiProcessor(self):
        """
            Maximum resident threads per multiprocessor
            """
        pass

    @property
    def minor(self):
        """
            Minor compute capability
            """
        pass

    @property
    def multiProcessorCount(self):
        """
            Number of multiprocessors on device
            """
        pass

    @property
    def name(self):
        """
            ASCII string identifying device
            """
        pass

    @property
    def numCudaCores(self):
        """
            Total number of Cuda coes.
            """
        pass

    @property
    def regsPerBlock(self):
        """
            32-bit registers available per block
            """
        pass

    @property
    def regsPerMultiprocessor(self):
        """
            32-bit registers available per multiprocessor
            """
        pass

    @property
    def sharedMemPerBlock(self):
        """
            Shared memory available per block in bytes
            """
        pass

    @property
    def sharedMemPerMultiprocessor(self):
        """
            Shared memory available per multiprocessor in bytes
            """
        pass

    @property
    def streamPrioritiesSupported(self):
        """
            Device supports stream priorities
            """
        pass

    @property
    def totalConstMem(self):
        """
            Constant memory available on device in bytes
            """
        pass

    @property
    def totalGlobalMem(self):
        """
            Global memory available on device in bytes
            """
        pass

    @property
    def warpSize(self):
        """
            Warp size in threads
            """
        pass


class CudaError(Exception):
    pass


[docs]class CudaEvent(pybind11_object):
    """
        Convenience wrapper for the
        `cudaEvent_t <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html>`_.

        Creating a new CudaEvent retrieves an event from the event pool of the
        :ref:`py/core:current_device`.
        """
    pass

[docs]    def __init__(self: 'CudaEvent') -> None:
        """__init__(self: augpy._augpy.'CudaEvent') -> None
"""
        pass

[docs]    def query(self: 'CudaEvent') -> bool:
        """query(self: augpy._augpy.'CudaEvent') -> bool


            Returns ``True`` if event has occurred.
            
"""
        pass

[docs]    def record(self: 'CudaEvent') -> None:
        """record(self: augpy._augpy.'CudaEvent') -> None


            Record wrapped event on :ref:`py/core:current_stream`.
            
"""
        pass

[docs]    def synchronize(self: 'CudaEvent', microseconds: int = 100) -> None:
        """synchronize(self: augpy._augpy.'CudaEvent', microseconds: int = 100) -> None


            Block until event has occurred.
            Checks in ``microseconds`` interval.
            Faster intervals make this more accurate, but increase CPU load.
            Uses standard Cuda busy-waiting method if ``microseconds <= 0``.

            Parameters:
                microseconds: check interval
            
"""
        pass


[docs]class CudaStream(pybind11_object):
    """
        Convenience wrapper for the
        `cudaStream_t <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html>`_
        type.

        Creates a new Cuda stream on the given device.
        Lower numbers mean higher priority,
        and values are clipped to the valid range.
        Use :py:func:`get_device_properties`
        to get the range of possible values for a device.

        See:
            `cudaStreamCreateWithPriority <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1ge2be9e9858849bf62ba4a8b66d1c3540>`_

        Use ``device_id=-1`` and ``priority=-1`` to get the
        :py:attr:`default_stream`.

        Parameters:
            device_id: GPU device ID
            priority: stream priority
        """
    pass

[docs]    def __init__(self: 'CudaStream', device_id: int = 0, priority: int = 0) -> None:
        """__init__(self: augpy._augpy.'CudaStream', device_id: int = 0, priority: int = 0) -> None
"""
        pass

[docs]    def activate(self: 'CudaStream') -> None:
        """activate(self: augpy._augpy.'CudaStream') -> None


            Make this the :ref:`py/core:current_stream`
            and remember the previous stream.
            
"""
        pass

[docs]    def deactivate(self: 'CudaStream') -> None:
        """deactivate(self: augpy._augpy.'CudaStream') -> None


            Make the previous stream the :ref:`py/core:current_stream`.
            
"""
        pass

[docs]    def synchronize(self: 'CudaStream', microseconds: int = 100) -> None:
        """synchronize(self: augpy._augpy.'CudaStream', microseconds: int = 100) -> None


            Block until all work on this stream has finished.
            Checks in ``microseconds`` interval.
            Faster intervals make this more accurate, but increase CPU load.
            Uses standard Cuda busy-waiting method if ``microseconds <= 0``.
            
"""
        pass


[docs]class CudaTensor(pybind11_object):
    """
        Create a new, empty tensor on a GPU device.

        Parameters:
            shape: shape of the tensor
            dtype: data type
            device_id: Cuda device id
        """
    pass

[docs]    def __init__(self: 'CudaTensor', shape: List[int], dtype: DLDataType = DLDataType(code=kDLUInt, bits=8), device_id: int = 0) -> None:
        """__init__(self: augpy._augpy.'CudaTensor', shape: List[int], dtype: augpy._augpy.DLDataType = DLDataType(code=kDLUInt, bits=8), device_id: int = 0) -> None
"""
        pass

    @property
    def byte_offset(self):
        """
            Starting offset in bytes for the data pointer.
            """
        pass

    @property
    def dtype(self):
        """
            Tensor data type.
            """
        pass

[docs]    def fill(self, *args, **kwargs):
        """fill(*args, **kwargs)
Overloaded function.

1. fill(self: augpy._augpy.'CudaTensor', scalar: float) -> augpy._augpy.'CudaTensor'


            Fill the tensor with the given scalar value.

            :returns: this tensor
            

2. fill(self: augpy._augpy.'CudaTensor', other: augpy._augpy.'CudaTensor') -> augpy._augpy.'CudaTensor'


            Copy the given tensor into this tensor.

            :returns: this tensor
            
"""
        pass

    @property
    def is_contiguous(self):
        """
            ``True`` if the tensor is contiguous, i.e.,
            elements are located next to each other in memory.
            """
        pass

    @property
    def itemsize(self):
        """
            Size of the one element in bytes.
            """
        pass

    @property
    def ndim(self):
        """
            Number of dimensions.
            """
        pass

[docs]    def numpy(self, *args, **kwargs):
        """numpy(*args, **kwargs)
Overloaded function.

1. numpy(self: augpy._augpy.'CudaTensor') -> array


            Create a new numpy array and start copying data from
            the device to host memory.
            

2. numpy(self: augpy._augpy.'CudaTensor', array: buffer = None) -> array


            Create a new numpy array from the given buffer and
            start copying data from the device to host memory.

            :param array: buffer to create new array from
            
"""
        pass

    @property
    def ptr(self):
        """
            Data pointer.
            """
        pass

[docs]    def reshape(self: 'CudaTensor', shape: List[int]) -> 'CudaTensor':
        """reshape(self: augpy._augpy.'CudaTensor', shape: List[int]) -> augpy._augpy.'CudaTensor'


            Return a new tensor that uses the same backing memory
            with a different shape. Shape must have same number
            of elements. Only contiguous tensors can be reshaped.

            Parameters:
                shape: new shape
            
"""
        pass

    @property
    def shape(self):
        """
            Tensor shape.
            """
        pass

    @property
    def size(self):
        """
            Number of elements in the tensor.
            """
        pass

    @property
    def strides(self):
        """
            Tensor strides, i.e., the number of elements to add
            to a flat tensor to reach the next element for each
            dimension.
            """
        pass

[docs]    def sum(self, *args, **kwargs):
        """sum(*args, **kwargs)
Overloaded function.

1. sum(self: augpy._augpy.'CudaTensor', upcast: bool = False) -> augpy._augpy.'CudaTensor'


            Sum all values in the tensor.

            :param upcast: if ``True``, the output scalar tensor will
                be promoted to a more expressive data type to avoid saturation

            :returns: sum as scalar tensor
            

2. sum(self: augpy._augpy.'CudaTensor', axis: int, keepdim: bool = False, upcast: bool = False, out: augpy._augpy.'CudaTensor' = None, blocks_per_sm: int = 8, threads: int = 0) -> augpy._augpy.'CudaTensor'


            Sum all values in the tensor along an axis.

            :param axis: which axis to sum along
            :param keepdim: keep the summed dimension with size 1
            :param upcast: if ``True``, the output scalar tensor will
                be promoted to a more expressive data type to avoid saturation
            :param out: use this tensor as output, must have correct
                shape, and same data type if ``upcast`` is ``False``,
                otherwise promoted type is required

            :returns: tensor summed along axis
            
"""
        pass


class CutlassError(Exception):
    pass


[docs]class Decoder(pybind11_object):
    """
        Wrapper for Nvjpeg-based JPEG decoding,
        created on the :ref:`py/core:current_device`.

        See:
            `Nvjpeg docs <https://docs.nvidia.com/cuda/nvjpeg/index.html#nvjpeg-set-device-mem-padding>`_

        Parameters:
            device_padding: memory padding on the device
            host_padding: memory padding on the host
            gpu_huffman: enable Huffman decoding on the GPU;
                not recommended unless you really need
                to offload from CPU
        """
    pass

[docs]    def __init__(self: 'Decoder', device_padding: int = 16777216, host_padding: int = 8388608, gpu_huffman: bool = False) -> None:
        """__init__(self: augpy._augpy.'Decoder', device_padding: int = 16777216, host_padding: int = 8388608, gpu_huffman: bool = False) -> None
"""
        pass

[docs]    def decode(self: 'Decoder', data: str, buffer: CudaTensor = None) -> CudaTensor:
        """decode(self: augpy._augpy.'Decoder', data: str, buffer: augpy._augpy.CudaTensor = None) -> augpy._augpy.CudaTensor


            Decode a JPEG image using Nvjpeg.
            Output is in :math:`(H,W,C)` format and resides on the GPU device.

            Parameters:
                data: compressed JPEG image as a JFIF string, i.e.,
                    the full file contents
                buffer: optional buffer to use; may be ``None``;
                    if not ``None`` must be big enough to contain
                    the decoded image

            Returns:
                new tensor with decoded image on GPU in :math:`(H,W,C)` format
            
"""
        pass


class MemoryError(Exception):
    pass


class NvJpegError(Exception):
    pass


[docs]class RandomNumberGenerator(pybind11_object):
    """
        A convenient wrapper for cuRAND methods
        that fill tensors with pseudo-random numbers.

        Parameters:
            device_id: GPU device ID;
                if ``None``, :ref:`py/core:current_device` is used
            seed: random seed;
                if ``None``, read values from
                `std::random_device <https://en.cppreference.com/w/cpp/numeric/random/random_device>`_
                to create a random seed.
        """
    pass

[docs]    def __init__(self: 'RandomNumberGenerator', device_id: object = None, seed: object = None) -> None:
        """__init__(self: augpy._augpy.'RandomNumberGenerator', device_id: object = None, seed: object = None) -> None
"""
        pass

[docs]    def gaussian(self: 'RandomNumberGenerator', target: CudaTensor, mean: float = 0.0, std: float = 1.0, blocks_per_sm: int = 8, threads: int = 0) -> None:
        """gaussian(self: augpy._augpy.'RandomNumberGenerator', target: augpy._augpy.CudaTensor, mean: float = 0.0, std: float = 1.0, blocks_per_sm: int = 8, threads: int = 0) -> None


            Fill ``target`` tensor with Gaussian distributed numbers
            with specified ``mean`` and standard deviation ``std``.

            .. note::
                This is supported for integer tensors. Values are
                drawn from the given distribution, then rounded and
                cast to the data type of the tensor with saturation.
                The values in an integer tensor are thus only
                approximately Gaussian distributed.

            Parameters:
                target: tensor to fill
                mean: Gaussian mean
                std: Gaussian standard deviation
            
"""
        pass

[docs]    def uniform(self: 'RandomNumberGenerator', target: CudaTensor, vmin: float, vmax: float, blocks_per_sm: int = 8, threads: int = 0) -> None:
        """uniform(self: augpy._augpy.'RandomNumberGenerator', target: augpy._augpy.CudaTensor, vmin: float, vmax: float, blocks_per_sm: int = 8, threads: int = 0) -> None


            Fill ``target`` tensor with uniformly distributed number
            in :math:`[v_{min}, v_{max})`.

            .. note::
                This is supported for integer tensors. Values are
                cast from float or double down to the integer type.
                The mean of the values is approximately
                :math:`\frac{v_{max} + v_{min}}{2}`.

            .. warning::
                Saturation is not used.
                :math:`v_{min}` and :math:`v_{max}` must be
                representable in the target tensor data type,
                else values may under or overflow.

            Parameters:
                target: tensor to fill
                vmin: minimum value; can occur
                vmax: maximum value; does not occur
            
"""
        pass


[docs]class WarpScaleMode:
    """
        Enum whether to scale relative to the
        shortest or longest side of the image.
        

Members:

  WARP_SCALE_SHORTEST : 
            Scaling is relative to the shortest side of the image.
            

  WARP_SCALE_LONGEST : 
            Scaling is relative to the longest side of the image.
            """
    pass
[docs]    def __init__(self: 'WarpScaleMode', arg0: int) -> None:
        """__init__(self: augpy._augpy.'WarpScaleMode', arg0: int) -> None
"""
        pass
    @property
    def WARP_SCALE_SHORTEST(self):
        """
        Enum whether to scale relative to the
        shortest or longest side of the image.
        

Members:

  WARP_SCALE_SHORTEST : 
            Scaling is relative to the shortest side of the image.
            

  WARP_SCALE_LONGEST : 
            Scaling is relative to the longest side of the image.
            """
        pass
    @property
    def WARP_SCALE_LONGEST(self):
        """
        Enum whether to scale relative to the
        shortest or longest side of the image.
        

Members:

  WARP_SCALE_SHORTEST : 
            Scaling is relative to the shortest side of the image.
            

  WARP_SCALE_LONGEST : 
            Scaling is relative to the longest side of the image.
            """
        pass


WARP_SCALE_LONGEST = WarpScaleMode(1)


WARP_SCALE_SHORTEST = WarpScaleMode(0)


[docs]def add(*args, **kwargs):
    """add(*args, **kwargs)
Overloaded function.

1. add(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Add a ``scalar`` value to a ``tensor``.

        :param tensor: tensor
        :param scalar: scalar value
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        

2. add(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Add ``tensor2`` to ``tensor1``.

        :param tensor1: first tensor
        :param tensor2: second tensor
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


def all(tensor: CudaTensor) -> CudaTensor:
    """all(tensor: augpy._augpy.CudaTensor) -> augpy._augpy.CudaTensor


        Check whether all elements in a tensor are greater zero.

        Parameters:
            tensor: tensor to sum, must be contiguous

        Returns:
            ``0`` or ``1`` as scalar ``uint8`` tensor
        
"""
    pass


[docs]def array_to_tensor(*args, **kwargs):
    """array_to_tensor(*args, **kwargs)
Overloaded function.

1. array_to_tensor(array: buffer, device_id: int = 0) -> augpy._augpy.CudaTensor


        Copy a Python buffer into a new tensor on the specified GPU device.
        This initiates an asynchronous copy from host to device memory.
        

2. array_to_tensor(array: buffer, tensor: augpy._augpy.CudaTensor) -> augpy._augpy.CudaTensor


        Copy a Python buffer to a tensor created from the given buffer ``tensor``.
        This initiates an asynchronous copy from host to device memory.
        
"""
    pass


[docs]def box_blur_single(input: CudaTensor, ksize: int, out: CudaTensor = None) -> CudaTensor:
    """box_blur_single(input: augpy._augpy.CudaTensor, ksize: int, out: augpy._augpy.CudaTensor = None) -> augpy._augpy.CudaTensor


        Apply box blur to a single image.

        Kernel size describes both width and height in pixels
        of the area in the input that is averaged for each
        output pixel.
        Odd values are recommended for best results.
        For even values, the center of the kernel is below
        and to the right of the true center.
        This means the output is shifted up and left by half
        a pixel.


        Parameters:
            input: image tensor in channel-first format
            ksize: kernel size in pixels
            out: output tensor (may be ``None``)

        Returns:
            new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def cast(*args, **kwargs):
    """cast(*args, **kwargs)
Overloaded function.

1. cast(tensor: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor, blocks_per_sm: int = 8, threads: int = 0) -> augpy._augpy.CudaTensor


        Read values from ``tensor``, cast them to the data type of
        ``out`` and store them there.
        ``tensor`` and ``out`` must have the same shape.

        :param tensor: source tensor
        :param out: output tensor
        

2. cast(tensor: augpy._augpy.CudaTensor, dtype: augpy._augpy.DLDataType, blocks_per_sm: int = 8, threads: int = 0) -> augpy._augpy.CudaTensor


        Create a new tensor with values from ``tensor``
        cast to the given data type ``dtype``.

        :param tensor: source tensor
        :param dtype: target data type
        :returns: new tensor with given data type
        
"""
    pass


[docs]def copy(src: CudaTensor, dst: CudaTensor, blocks_per_sm: int = 8, threads: int = 0) -> CudaTensor:
    """copy(src: augpy._augpy.CudaTensor, dst: augpy._augpy.CudaTensor, blocks_per_sm: int = 8, threads: int = 0) -> augpy._augpy.CudaTensor


        Copy ``src`` into ``dst``.
        Supports broadcasting.
        
"""
    pass


default_stream = CudaStream(device_id=-1, priority=-1)


def disable_profiler() -> None:
    """disable_profiler() -> None


        Disable the Cuda profiler.
        
"""
    pass


[docs]def div(*args, **kwargs):
    """div(*args, **kwargs)
Overloaded function.

1. div(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Divide a ``tensor`` by a ``scalar`` value.

        :param tensor: tensor
        :param scalar: scalar value
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        

2. div(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Divide `tensor1` by `tensor2`.

        :param tensor1: first tensor
        :param tensor2: second tensor
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def empty_like(tensor: CudaTensor) -> CudaTensor:
    """empty_like(tensor: augpy._augpy.CudaTensor) -> augpy._augpy.CudaTensor


        Create a new tensor with the same shape,
        dtype and on the same device as ``tensor``.
        
"""
    pass


def enable_profiler() -> None:
    """enable_profiler() -> None


        Enable the Cuda profiler.
        
"""
    pass


[docs]def eq(*args, **kwargs):
    """eq(*args, **kwargs)
Overloaded function.

1. eq(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Compute ``tensor == scalar`` as ``uint8`` tensor,
        where ``1`` means the condition is met and ``0`` otherwise.

        :param tensor: tensor
        :param scalar: scalar value
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        

2. eq(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Compute ``tensor1 == tensor2`` as ``uint8`` tensor,
        where ``1`` means the condition is met and ``0`` otherwise.

        :param tensor1: first tensor
        :param tensor2: second tensor
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def export_dltensor(tensor: object, name: str = 'dltensor', destruct: bool = True) -> capsule:
    """export_dltensor(tensor: object, name: str = 'dltensor', destruct: bool = True) -> capsule


        Export a GPU tensor to be used by another library.

        Parameters:
            pytensor: Python-wrapped CudaTensor
            name: name under which the tensor is stored in the returned
                :py:ref:`capsule <Capsules>`, e.g., `"dltensor"` for Pytorch
            destruct: if ``True``, add a destructor to the
                :py:ref:`capsule <Capsules>` which will delete the tensor
                when the capsule is deleted; only set to ``False`` if you
                know what you're doing

        Returns:
            :py:ref:`capsule <Capsules>` with exported :py:class:`CudaTensor`
        
"""
    pass


[docs]def fill(scalar: float, dst: CudaTensor, blocks_per_sm: int = 8, threads: int = 0) -> CudaTensor:
    """fill(scalar: float, dst: augpy._augpy.CudaTensor, blocks_per_sm: int = 8, threads: int = 0) -> augpy._augpy.CudaTensor


        Fill `src` with the given `scalar` value.
        
"""
    pass


float16 = DLDataType(code=kDLFloat, bits=16)


float32 = DLDataType(code=kDLFloat, bits=32)


float64 = DLDataType(code=kDLFloat, bits=64)


[docs]def fma(scalar: float, tensor1: CudaTensor, tensor2: CudaTensor, out: CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> CudaTensor:
    """fma(scalar: float, tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Compute a fused multiply-add on a scalar and two tensors, i.e.,

        .. math::

            r = s \cdot t_1 \cdot t_2

        If ``tensor1`` has an unsigned integer data type,
        then ``tensor2`` must have the signed version of the same type,
        e.g., a ``uint8`` tensor must be paired with a ``int8`` tensor.

        Parameters:
            scalar: scalar factor
            tensor1: tensor :math:`t_1`
            tensor2: tensor :math:`t_2`
            out: optional output tensor :math:`r`

        Returns:
            new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def gaussian_blur(input: CudaTensor, sigmas: CudaTensor, max_ksize: int, out: CudaTensor = None) -> CudaTensor:
    """gaussian_blur(input: augpy._augpy.CudaTensor, sigmas: augpy._augpy.CudaTensor, max_ksize: int, out: augpy._augpy.CudaTensor = None) -> augpy._augpy.CudaTensor


        Apply Gaussian blur to a batch of images.

        Maximum kernel size can be calculated like this:

        ``ksize = max(3, int(max(sigmas) * 6.6 - 2.3) | 1)``

        I.e., ``ksize`` is at least 3 and always odd.

        The given kernel size defines the upper limit.
        The actual kernel size is calculated with the
        formula above and clipped at the given maximum.

        Smaller values can be given to trade speed vs quality.
        Bigger values typically do not visibly improve quality.

        Odd values are strongly recommended for best results.
        For even values, the center of the kernel is below
        and to the right of the true center.
        This means the output is shifted up and left by half
        a pixel.
        This can lead to inconsistencies between images
        in the batch.
        Images with large sigmas may be shifted, while smaller
        sigmas mean no shift occurs.

        Parameters:
            input: batch tensor with images in first dimension
            sigmas: float tensor with one sigma value per image in the batch
            max_ksize: maximum kernel size in pixels
            out: output tensor (may be ``None``)

        Returns:
            new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def gaussian_blur_single(input: CudaTensor, sigma: float, out: CudaTensor = None) -> CudaTensor:
    """gaussian_blur_single(input: augpy._augpy.CudaTensor, sigma: float, out: augpy._augpy.CudaTensor = None) -> augpy._augpy.CudaTensor


        Apply Gaussian blur to a single image.

        Kernel size is calculated like this:

        ``ksize = max(3, int(sigma * 6.6 - 2.3) | 1)``

        I.e., ``ksize`` is at least 3 and always odd.


        Parameters:
            input: image tensor in channel-first format
            sigma: standard deviation of the kernel
            out: output tensor (may be ``None``)

        Returns:
            new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def ge(tensor: CudaTensor, scalar: float, out: CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> CudaTensor:
    """ge(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Compute ``tensor >= scalar`` as ``uint8`` tensor,
        where ``1`` means the condition is met and ``0`` otherwise.

        :param tensor: tensor
        :param scalar: scalar value
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def gemm(A: CudaTensor, B: CudaTensor, C: CudaTensor = None, alpha: float = 1.0, beta: float = 0.0) -> CudaTensor:
    """gemm(A: augpy._augpy.CudaTensor, B: augpy._augpy.CudaTensor, C: augpy._augpy.CudaTensor = None, alpha: float = 1.0, beta: float = 0.0) -> augpy._augpy.CudaTensor


        Calculate the matrix multiplication of two 2D tensors.
        More specifically calculates

        .. math::

            C = A \times (\alpha \cdot B) + \beta \cdot C

        Only ``float`` and ``double`` are supported.

        All tensors must have the same data type.

        All tensors must be contiguous.

        Returns:
            new output tensor if ``C`` is ``None``, otherwise ``C``
        
"""
    pass


[docs]def get_current_device() -> int:
    """get_current_device() -> int


        Returns the active device ID.

        See:
            :ref:`py/core:current_device`.
        
"""
    pass


[docs]def get_current_stream() -> CudaStream:
    """get_current_stream() -> augpy._augpy.CudaStream


        Returns the active :py:class:`CudaStream`.

        See:
            :ref:`py/core:current_stream`
        
"""
    pass


[docs]def get_device_properties(device_id: int) -> CudaDeviceProp:
    """get_device_properties(device_id: int) -> augpy._augpy.CudaDeviceProp


        Get :py:class:`CudaDeviceProp` for given device.

        Parameters:
            device_id: Cude device id

        Returns:
            CudaDeviceProp: properties of device
        
"""
    pass


[docs]def gt(tensor: CudaTensor, scalar: float, out: CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> CudaTensor:
    """gt(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Compute ``tensor > scalar`` as ``uint8`` tensor,
        where ``1`` means the condition is met and ``0`` otherwise.

        :param tensor: tensor
        :param scalar: scalar value
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def import_dltensor(tensor_capsule: capsule, name: str) -> CudaTensor:
    """import_dltensor(tensor_capsule: capsule, name: str) -> augpy._augpy.CudaTensor


        Import a GPU tensor from another library into augpy.

        Parameters:
            tensor_capsule: a Python :py:ref:`capsule <Capsules>` object that contains
                a :any:`DLManagedTensor`
            name: name under which the tensor is stored in the
                :py:ref:`capsule <Capsules>`, e.g., ``"dltensor"`` for Pytorch

        Returns:
            other tensor wrapped in a :py:class:`CudaTensor`
        
"""
    pass


def init() -> None:
    """init() -> None


        Set the `cudaDeviceScheduleYield
        <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g69e73c7dda3fc05306ae7c811a690fac>`_
        flag for the :ref:`py/core:current_device`.

        .. warning::

            EXPERIMENTAL! MAY REDUCE GPU THROUGHPUT AND BREAK MANY THINGS!
        
"""
    pass


int16 = DLDataType(code=kDLInt, bits=16)


int32 = DLDataType(code=kDLInt, bits=32)


int64 = DLDataType(code=kDLInt, bits=64)


int8 = DLDataType(code=kDLInt, bits=8)


[docs]def le(*args, **kwargs):
    """le(*args, **kwargs)
Overloaded function.

1. le(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Compute ``tensor <= scalar`` as ``uint8`` tensor,
        where ``1`` means the condition is met and ``0`` otherwise.

        :param tensor: tensor
        :param scalar: scalar value
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        

2. le(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Compute ``tensor1 >= tensor2`` as ``uint8`` tensor,
        where ``1`` means the condition is met and ``0`` otherwise.

        :param tensor1: first tensor
        :param tensor2: second tensor
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def lighting(imtensor: CudaTensor, gammagrays: CudaTensor, gammacolors: CudaTensor, contrasts: CudaTensor, vmin: float, vmax: float, out: CudaTensor = None) -> CudaTensor:
    """lighting(imtensor: augpy._augpy.CudaTensor, gammagrays: augpy._augpy.CudaTensor, gammacolors: augpy._augpy.CudaTensor, contrasts: augpy._augpy.CudaTensor, vmin: float, vmax: float, out: augpy._augpy.CudaTensor = None) -> augpy._augpy.CudaTensor


        Apply lighting augmentation to a batch of images.
        This is a four-step process:

        #. Normalize values :math:`v_{norm} = \frac{v - v_{min}}{v_{max}-v_{min}}`
           with :math:`v_{max}` the minimum and :math:`v_{max}` the maximum
           lightness value
        #. Apply contrast change
        #. Apply gamma correction
        #. Denormalize values :math:`v' = v_{norm} * (v_{max}-v_{min}) + v_{min}`

        To change contrast two reference functions are used.
        With contrast :math:`\mathcal{c} \ge 0`, i.e., increased contrast,
        the following function is used:

        .. math::

            f_{pos}(v) =
            \frac{1.0037575963899724}{1 + exp(6.279 + v \cdot 12.558)} - 0.0018787981949862

        With contrast :math:`\mathcal{c} < 0`, i.e., decreased contrast,
        the following function is used:

        .. math::

            f_{neg}(v) =
            0.1755606108304832 \cdot atanh(v \cdot 1.986608 - 0.993304) + 0.5

        The final value is
        :math:`v' = (1-\mathcal{c}) \cdot v + \mathcal{c} \cdot f(v)`.

        Brightness and color changes are done via gamma correction.

        .. math::

            v' = v^{\gamma_{gray} \cdot \gamma_c}

        with :math:`\gamma_{gray}` the gamma for overall lightness and
        :math:`\gamma_{c}` the per-channel gamma.

        Parameters:
            tensor: image tensor in :math:`(N,C,H,W)` format
            gammagrays: tensor of :math:`N` gamma gray values
            gammacolors: tensor of :math:`C\cdot N` gamma values in the format
                :math:`\gamma_{1,1}, \gamma_{1,2}, ..., \gamma_{1,C},
                \gamma_{2,1}, \gamma_{2,2}, ... \gamma_{N,C-1}, \gamma_{N,C}`
            contrasts: tensor of :math:`N` contrast values in :math:`[-1, 1]`
            vmin: minimum lightness value in images
            vmax: maximum lightness value in images
            out: output tensor (may be ``None``)

        Returns:
            new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def lt(*args, **kwargs):
    """lt(*args, **kwargs)
Overloaded function.

1. lt(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Compute ``tensor < scalar`` as ``uint8`` tensor,
        where ``1`` means the condition is met and ``0`` otherwise.

        :param tensor: tensor
        :param scalar: scalar value
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        

2. lt(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Compute ``tensor1 >= tensor2`` as ``uint8`` tensor,
        where ``1`` means the condition is met and ``0`` otherwise.

        :param tensor1: first tensor
        :param tensor2: second tensor
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def make_affine_matrix(out: buffer, source_height: int, source_width: int, target_height: int, target_width: int, angle: float = 0.0, scale: float = 1.0, aspect: float = 1.0, shifty: float = 0.0, shiftx: float = 0.0, sheary: float = 0.0, shearx: float = 0.0, hmirror: bool = False, vmirror: bool = False, scale_mode: WarpScaleMode = WarpScaleMode.WARP_SCALE_SHORTEST, max_supersampling: int = 3) -> int:
    """make_affine_matrix(out: buffer, source_height: int, source_width: int, target_height: int, target_width: int, angle: float = 0.0, scale: float = 1.0, aspect: float = 1.0, shifty: float = 0.0, shiftx: float = 0.0, sheary: float = 0.0, shearx: float = 0.0, hmirror: bool = False, vmirror: bool = False, scale_mode: augpy._augpy.WarpScaleMode = WarpScaleMode.WARP_SCALE_SHORTEST, max_supersampling: int = 3) -> int


        Create a :math:`2 \times 3` matrix for a set of affine
        transformations.
        This matrix is compatible with the `warpAffine
        <https://docs.opencv.org/3.4/da/d54/group__imgproc__transform.html#ga0203d9ee5fcd28d40dbc4a1ea4451983>`_
        function of OpenCV with the `WARP_INVERSE_MAP
        <https://docs.opencv.org/3.4/da/d54/group__imgproc__transform.html#gga5bb5a1fea74ea38e1a5445ca803ff121aa48be1c433186c4eae1ea86aa0ca75ba>`_
        flag set.

        Transforms are applied in the following order:

        #. shear
        #. scale & aspect ratio
        #. horizontal & vertical mirror
        #. rotation
        #. horizontal & vertical shift

        See:
            :py:func:`make_transform` for a more convenient version of this function.

        Parameters:
            out: output buffer that matrix is written to;
                must be a writeable :math:`2 \times 3` ``float`` buffer
            source_height: :math:`h_s` height of the image in pixels
            source_width: :math:`w_s` width of the image in pixels
            target_height: :math:`h_t` height of the output canvas in pixels
            target_width: :math:`w_t` width of the output canvas in pixels
            angle: clockwise angle in degrees
                with image center as rotation axis
            scale: scale factor relative to output size;
                1 means fill target height or width wise depending
                on ``scale_mode`` and whichever is longest/shortest;
                larger values will crop,
                smaller values leave empty space in the output canvas
            aspect: controls the aspect ratio;
                1 means same as input, values greater 1
                increase the width and reduce the height
            shifty: shift the image in y direction (vertical);
                0 centers the image on the output canvas;
                -1 means shift up as much as possible;
                1 means shfit down as much as possible;
                the maximum distance to shift is
                :math:`max(scale \cdot h_s - h_t, h_t - scale \cdot h_s)`
            shiftx: same as ``shifty``, but in x direction (horizontal)
            sheary: controls up/down shear;
                for every pixel in the x direction move ``sheary`` pixels
                in y direction
            shearx: same as ``sheary`` but controls left/right shear
            hmirror: if ``True`` flip image horizontally
            vmirror: if ``True`` flip image vertically
            scale_mode: if :py:attr:`WarpScaleMode.WARP_SCALE_SHORTEST` scale
                is relative to shortest side;
                this fills the output canvas, cropping the image
                if necessary;
                if :py:attr:`WarpScaleMode.WARP_SCALE_LONGEST` scale
                is relative to longest side;
                this ensures the image is contained inside the
                output canvas, but leaves empty space
            max_supersampling: upper limit for recommended supersampling

        Returns:
            recommended supersampling factor for the warp
        
"""
    pass


[docs]def meminfo(device_id: int = 0) -> Tuple[int, int, int]:
    """meminfo(device_id: int = 0) -> Tuple[int, int, int]


        For the device defined by ``device_id``,
        return the current used, free, and total memory in bytes.
        
"""
    pass


[docs]def mul(*args, **kwargs):
    """mul(*args, **kwargs)
Overloaded function.

1. mul(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Multiply a ``tensor`` by a ``scalar`` value.

        :param tensor: tensor
        :param scalar: scalar value
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        

2. mul(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Multiply ``tensor1`` by ``tensor2``.

        :param tensor1: first tensor
        :param tensor2: second tensor
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


def nvtx_range_end(end: int) -> None:
    """nvtx_range_end(end: int) -> None


        Tell the Nvidia profiler to end the given `nvtx
        <https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx>`_
        range.

        Parameters:
            end: ID of the range to end
        
"""
    pass


def nvtx_range_start(msg: str) -> int:
    """nvtx_range_start(msg: str) -> int


        Tell the Nvidia profiler to start a new `nvtx
        <https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx>`_
        range.
        Can be used to place marks in profiling output.

        Parameters:
            msg: Message attached to the range

        Returns:
            range ID to be used with :py:func:`nvtx_range_end`
        
"""
    pass


[docs]def rdiv(tensor: CudaTensor, scalar: float, out: CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> CudaTensor:
    """rdiv(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Divide a ``scalar`` value by a ``tensor``.

        Parameters:
            tensor: tensor
            scalar: scalar value
            out: optional output tensor

        Returns:
            new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def release() -> None:
    """release() -> None


        Release all allocated memory on all GPUs.
        All :py:class:`CudaTensors <CudaTensor>` become invalid immediately.
        Do I have to tell you this is dangerous?
        
"""
    pass


[docs]def rsub(tensor: CudaTensor, scalar: float, out: CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> CudaTensor:
    """rsub(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Subtract a ``tensor`` from a ``scalar`` value.

        Parameters:
            tensor: tensor
            scalar: scalar value
            out: optional output tensor

        Returns:
            new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def sub(*args, **kwargs):
    """sub(*args, **kwargs)
Overloaded function.

1. sub(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Subtract a ``scalar`` value from a ``tensor``.

        :param tensor: tensor
        :param scalar: scalar value
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        

2. sub(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor


        Subtract ``tensor2`` from ``tensor1``.

        :param tensor1: first tensor
        :param tensor2: second tensor
        :param out: optional output tensor

        :returns: new tensor if ``out`` is ``None``, else ``out``
        
"""
    pass


[docs]def sum(*args, **kwargs):
    """sum(*args, **kwargs)
Overloaded function.

1. sum(tensor: augpy._augpy.CudaTensor, upcast: bool = False) -> augpy._augpy.CudaTensor


        Sum all elements in a tensor with saturation.

        :param tensor: tensor to sum, must be contiguous
        :param upcast: if ``True``, returns tensor with
            ``float`` or ``double`` type

        :returns: sum value as scalar tensor
        

2. sum(tensor: augpy._augpy.CudaTensor, axis: int, keepdim: bool = False, upcast: bool = False, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, num_threads: int = 0) -> augpy._augpy.CudaTensor


        Sum of all elements along an axis in a tensor with saturation.

        :param tensor: tensor to sum, may be strided
        :param axis: axis index to sum along
        :param keepdim: if ``True``, keep sum axis dimension with length 1
        :param upcast: if ``True``, returns tensor with
            ``float`` or ``double`` type
        :param out: output tensor (may be ``None``)

        :returns: tensor with values summed along axis,
            or ``None`` if ``out`` is tensor
        
"""
    pass


[docs]def tensor_to_array(*args, **kwargs):
    """tensor_to_array(*args, **kwargs)
Overloaded function.

1. tensor_to_array(tensor: augpy._augpy.CudaTensor) -> array


        Copy a given tensor to a new numpy array.
        This initiates an asynchronous copy from device to host memory.
        

2. tensor_to_array(tensor: augpy._augpy.CudaTensor, array: buffer) -> array


        Copy a given tensor to a numpy array created from the given buffer ``array``.
        This initiates an asynchronous copy from device to host memory.
        
"""
    pass


uint16 = DLDataType(code=kDLUInt, bits=16)


uint32 = DLDataType(code=kDLUInt, bits=32)


uint64 = DLDataType(code=kDLUInt, bits=64)


uint8 = DLDataType(code=kDLUInt, bits=8)


[docs]def warp_affine(src: CudaTensor, dst: CudaTensor, matrix: buffer, background: CudaTensor, supersampling: int) -> None:
    """warp_affine(src: augpy._augpy.CudaTensor, dst: augpy._augpy.CudaTensor, matrix: buffer, background: augpy._augpy.CudaTensor, supersampling: int) -> None


        Takes an image in channels-last format :math:`(H, W, C)`
        and affine warps it into a given output tensor in
        channels-first format :math:`(C, H, W)`.
        Any blank canvas is filled with a background color.
        The warp is performed with bi-linear and supersampling.

        Parameters:
            src: image tensor
            dst: target tensor
            matrix: :math:`2 \times 3` ``float`` transformation matrix,
                see :py:func:`make_affine_matrix` for details
            background: background color to fill empty canvas
            supersampling: supersampling factor, e.g., 3 means
                9 samples are taken in a :math:`3 \times 3` grid
        
"""
    pass


all = [
'CuRandError',
'CudaDevice',
'CudaDeviceProp',
'CudaError',
'CudaEvent',
'CudaStream',
'CudaTensor',
'CutlassError',
'kDLInt',
'kDLFloat',
'DLDataTypeCode',
'DLDataType',
'kDLUInt',
'Decoder',
'MemoryError',
'NvJpegError',
'RandomNumberGenerator',
'WarpScaleMode',
'WARP_SCALE_LONGEST',
'WARP_SCALE_SHORTEST',
'add',
'all',
'array_to_tensor',
'box_blur_single',
'cast',
'copy',
'default_stream',
'disable_profiler',
'div',
'empty_like',
'enable_profiler',
'eq',
'export_dltensor',
'fill',
'float16',
'float32',
'float64',
'fma',
'gaussian_blur',
'gaussian_blur_single',
'ge',
'gemm',
'get_current_device',
'get_current_stream',
'get_device_properties',
'gt',
'import_dltensor',
'init',
'int16',
'int32',
'int64',
'int8',
'le',
'lighting',
'lt',
'make_affine_matrix',
'meminfo',
'mul',
'nvtx_range_end',
'nvtx_range_start',
'rdiv',
'release',
'rsub',
'sub',
'sum',
'tensor_to_array',
'uint16',
'uint32',
'uint64',
'uint8',
'warp_affine'
]