Source code for augpy._augpy

"""Python bindings for image processing CUDA functions"""


from typing import Tuple
from typing import List


class pybind11_object:
    pass


class buffer:
    """Any object that supports the buffer interface, like bytearray or numpy.ndarray."""
    pass


class capsule:
    """Python object that contains a reference to a C object."""
    pass


class CuRandError(Exception):
    pass


[docs]class DLDataTypeCode: """ :ref:`cpp/tensor:dlpack` type code enum. Members: kDLInt : Signed integer. kDLUInt : Unsigned integer. kDLFloat : Floating point number. """ pass
[docs] def __init__(self: 'DLDataTypeCode', arg0: int) -> None: """__init__(self: augpy._augpy.'DLDataTypeCode', arg0: int) -> None """ pass
@property def kDLInt(self): """ :ref:`cpp/tensor:dlpack` type code enum. Members: kDLInt : Signed integer. kDLUInt : Unsigned integer. kDLFloat : Floating point number. """ pass @property def kDLUInt(self): """ :ref:`cpp/tensor:dlpack` type code enum. Members: kDLInt : Signed integer. kDLUInt : Unsigned integer. kDLFloat : Floating point number. """ pass @property def kDLFloat(self): """ :ref:`cpp/tensor:dlpack` type code enum. Members: kDLInt : Signed integer. kDLUInt : Unsigned integer. kDLFloat : Floating point number. """ pass
kDLInt = DLDataTypeCode(0) kDLFloat = DLDataTypeCode(2)
[docs]class DLDataType(pybind11_object): """ :ref:`cpp/tensor:dlpack` data type for :py:class:`CudaTensors <CudaTensor>`. Parameters: code: See :py:class:`DLDataTypeCode` bits: Number of bits lanes: Number of elements for vector types; must be 1 to use with :py:class:`CudaTensor` """ pass
[docs] def __init__(self: 'DLDataType', code: int, bits: int, lanes: int = 1) -> None: """__init__(self: augpy._augpy.'DLDataType', code: int, bits: int, lanes: int = 1) -> None """ pass
@property def bits(self): """ Number of bits. """ pass @property def code(self): """ See :py:class:`'DLDataType'Code`. """ pass @property def itemsize(self): """ Number of bytes per element with this data type. """ pass @property def lanes(self): """ Mumber of elements for vector types. Must be 1 to use with :py:class:`CudaTensor`. """ pass
kDLUInt = DLDataTypeCode(1)
[docs]class CudaDevice(pybind11_object): """ Create a new CudaDevice with the given Cuda device ID. 0 is the default and typically fastest device in the system. Parameters: device_id: GPU device ID """ pass
[docs] def __init__(self: 'CudaDevice', device_id: int) -> None: """__init__(self: augpy._augpy.'CudaDevice', device_id: int) -> None """ pass
[docs] def activate(self: 'CudaDevice') -> None: """activate(self: augpy._augpy.'CudaDevice') -> None Make this the :ref:`py/core:current_stream` and remember the previous stream. """ pass
[docs] def deactivate(self: 'CudaDevice') -> None: """deactivate(self: augpy._augpy.'CudaDevice') -> None Make the previous stream the :ref:`py/core:current_stream`. """ pass
[docs] def get_device(self: 'CudaDevice') -> int: """get_device(self: augpy._augpy.'CudaDevice') -> int Return the device ID. """ pass
[docs] def get_properties(self: 'CudaDevice') -> 'CudaDeviceProp': """get_properties(self: augpy._augpy.'CudaDevice') -> augpy._augpy.'CudaDevice'Prop Return the device properties, see :ref:`py/core:get_device_properties` for more detials. """ pass
[docs] def synchronize(self: 'CudaDevice') -> None: """synchronize(self: augpy._augpy.'CudaDevice') -> None Block until all work on this device has finished. Cuda uses busy waiting to achieve this. See synchronization method of :ref:`py/core:CudaStream` or :ref:`py/core:CudaEvent` to avoid the CPU load this incurs. """ pass
[docs]class CudaDeviceProp(pybind11_object): """ The `cudaDeviceProp <https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaDeviceProp.html>`_ struct extended with stream priority fields :py:attr:`leastStreamPriority` and :py:attr:`greatestStreamPriority`, :py:attr:`coresPerMultiprocessor`, and :py:attr:`maxGridSize`. """ pass
[docs] def __init__(self): """Initialize self. See help(type(self)) for accurate signature.""" pass
@property def coresPerMultiprocessor(self): """ Number of Cuda cores per multiprocessor """ pass @property def coresPerSM(self): """ Number of Cuda cores per SM. """ pass @property def greatestStreamPriority(self): """ Highest priority a Cuda stream on this device can have. """ pass @property def l2CacheSize(self): """ Size of L2 cache in bytes """ pass @property def leastStreamPriority(self): """ Lowest priority a Cuda stream on this device can have. """ pass @property def major(self): """ Major compute capability """ pass @property def maxGridSize(self): """ Max number of blocks in each grid dimension """ pass @property def maxThreadsDim(self): """ Maximum size of each dimension of a block """ pass @property def maxThreadsPerBlock(self): """ Maximum number of threads per block """ pass @property def maxThreadsPerMultiProcessor(self): """ Maximum resident threads per multiprocessor """ pass @property def minor(self): """ Minor compute capability """ pass @property def multiProcessorCount(self): """ Number of multiprocessors on device """ pass @property def name(self): """ ASCII string identifying device """ pass @property def numCudaCores(self): """ Total number of Cuda coes. """ pass @property def regsPerBlock(self): """ 32-bit registers available per block """ pass @property def regsPerMultiprocessor(self): """ 32-bit registers available per multiprocessor """ pass @property def sharedMemPerBlock(self): """ Shared memory available per block in bytes """ pass @property def sharedMemPerMultiprocessor(self): """ Shared memory available per multiprocessor in bytes """ pass @property def streamPrioritiesSupported(self): """ Device supports stream priorities """ pass @property def totalConstMem(self): """ Constant memory available on device in bytes """ pass @property def totalGlobalMem(self): """ Global memory available on device in bytes """ pass @property def warpSize(self): """ Warp size in threads """ pass
class CudaError(Exception): pass
[docs]class CudaEvent(pybind11_object): """ Convenience wrapper for the `cudaEvent_t <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__EVENT.html>`_. Creating a new CudaEvent retrieves an event from the event pool of the :ref:`py/core:current_device`. """ pass
[docs] def __init__(self: 'CudaEvent') -> None: """__init__(self: augpy._augpy.'CudaEvent') -> None """ pass
[docs] def query(self: 'CudaEvent') -> bool: """query(self: augpy._augpy.'CudaEvent') -> bool Returns ``True`` if event has occurred. """ pass
[docs] def record(self: 'CudaEvent') -> None: """record(self: augpy._augpy.'CudaEvent') -> None Record wrapped event on :ref:`py/core:current_stream`. """ pass
[docs] def synchronize(self: 'CudaEvent', microseconds: int = 100) -> None: """synchronize(self: augpy._augpy.'CudaEvent', microseconds: int = 100) -> None Block until event has occurred. Checks in ``microseconds`` interval. Faster intervals make this more accurate, but increase CPU load. Uses standard Cuda busy-waiting method if ``microseconds <= 0``. Parameters: microseconds: check interval """ pass
[docs]class CudaStream(pybind11_object): """ Convenience wrapper for the `cudaStream_t <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html>`_ type. Creates a new Cuda stream on the given device. Lower numbers mean higher priority, and values are clipped to the valid range. Use :py:func:`get_device_properties` to get the range of possible values for a device. See: `cudaStreamCreateWithPriority <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1ge2be9e9858849bf62ba4a8b66d1c3540>`_ Use ``device_id=-1`` and ``priority=-1`` to get the :py:attr:`default_stream`. Parameters: device_id: GPU device ID priority: stream priority """ pass
[docs] def __init__(self: 'CudaStream', device_id: int = 0, priority: int = 0) -> None: """__init__(self: augpy._augpy.'CudaStream', device_id: int = 0, priority: int = 0) -> None """ pass
[docs] def activate(self: 'CudaStream') -> None: """activate(self: augpy._augpy.'CudaStream') -> None Make this the :ref:`py/core:current_stream` and remember the previous stream. """ pass
[docs] def deactivate(self: 'CudaStream') -> None: """deactivate(self: augpy._augpy.'CudaStream') -> None Make the previous stream the :ref:`py/core:current_stream`. """ pass
[docs] def synchronize(self: 'CudaStream', microseconds: int = 100) -> None: """synchronize(self: augpy._augpy.'CudaStream', microseconds: int = 100) -> None Block until all work on this stream has finished. Checks in ``microseconds`` interval. Faster intervals make this more accurate, but increase CPU load. Uses standard Cuda busy-waiting method if ``microseconds <= 0``. """ pass
[docs]class CudaTensor(pybind11_object): """ Create a new, empty tensor on a GPU device. Parameters: shape: shape of the tensor dtype: data type device_id: Cuda device id """ pass
[docs] def __init__(self: 'CudaTensor', shape: List[int], dtype: DLDataType = DLDataType(code=kDLUInt, bits=8), device_id: int = 0) -> None: """__init__(self: augpy._augpy.'CudaTensor', shape: List[int], dtype: augpy._augpy.DLDataType = DLDataType(code=kDLUInt, bits=8), device_id: int = 0) -> None """ pass
@property def byte_offset(self): """ Starting offset in bytes for the data pointer. """ pass @property def dtype(self): """ Tensor data type. """ pass
[docs] def fill(self, *args, **kwargs): """fill(*args, **kwargs) Overloaded function. 1. fill(self: augpy._augpy.'CudaTensor', scalar: float) -> augpy._augpy.'CudaTensor' Fill the tensor with the given scalar value. :returns: this tensor 2. fill(self: augpy._augpy.'CudaTensor', other: augpy._augpy.'CudaTensor') -> augpy._augpy.'CudaTensor' Copy the given tensor into this tensor. :returns: this tensor """ pass
@property def is_contiguous(self): """ ``True`` if the tensor is contiguous, i.e., elements are located next to each other in memory. """ pass @property def itemsize(self): """ Size of the one element in bytes. """ pass @property def ndim(self): """ Number of dimensions. """ pass
[docs] def numpy(self, *args, **kwargs): """numpy(*args, **kwargs) Overloaded function. 1. numpy(self: augpy._augpy.'CudaTensor') -> array Create a new numpy array and start copying data from the device to host memory. 2. numpy(self: augpy._augpy.'CudaTensor', array: buffer = None) -> array Create a new numpy array from the given buffer and start copying data from the device to host memory. :param array: buffer to create new array from """ pass
@property def ptr(self): """ Data pointer. """ pass
[docs] def reshape(self: 'CudaTensor', shape: List[int]) -> 'CudaTensor': """reshape(self: augpy._augpy.'CudaTensor', shape: List[int]) -> augpy._augpy.'CudaTensor' Return a new tensor that uses the same backing memory with a different shape. Shape must have same number of elements. Only contiguous tensors can be reshaped. Parameters: shape: new shape """ pass
@property def shape(self): """ Tensor shape. """ pass @property def size(self): """ Number of elements in the tensor. """ pass @property def strides(self): """ Tensor strides, i.e., the number of elements to add to a flat tensor to reach the next element for each dimension. """ pass
[docs] def sum(self, *args, **kwargs): """sum(*args, **kwargs) Overloaded function. 1. sum(self: augpy._augpy.'CudaTensor', upcast: bool = False) -> augpy._augpy.'CudaTensor' Sum all values in the tensor. :param upcast: if ``True``, the output scalar tensor will be promoted to a more expressive data type to avoid saturation :returns: sum as scalar tensor 2. sum(self: augpy._augpy.'CudaTensor', axis: int, keepdim: bool = False, upcast: bool = False, out: augpy._augpy.'CudaTensor' = None, blocks_per_sm: int = 8, threads: int = 0) -> augpy._augpy.'CudaTensor' Sum all values in the tensor along an axis. :param axis: which axis to sum along :param keepdim: keep the summed dimension with size 1 :param upcast: if ``True``, the output scalar tensor will be promoted to a more expressive data type to avoid saturation :param out: use this tensor as output, must have correct shape, and same data type if ``upcast`` is ``False``, otherwise promoted type is required :returns: tensor summed along axis """ pass
class CutlassError(Exception): pass
[docs]class Decoder(pybind11_object): """ Wrapper for Nvjpeg-based JPEG decoding, created on the :ref:`py/core:current_device`. See: `Nvjpeg docs <https://docs.nvidia.com/cuda/nvjpeg/index.html#nvjpeg-set-device-mem-padding>`_ Parameters: device_padding: memory padding on the device host_padding: memory padding on the host gpu_huffman: enable Huffman decoding on the GPU; not recommended unless you really need to offload from CPU """ pass
[docs] def __init__(self: 'Decoder', device_padding: int = 16777216, host_padding: int = 8388608, gpu_huffman: bool = False) -> None: """__init__(self: augpy._augpy.'Decoder', device_padding: int = 16777216, host_padding: int = 8388608, gpu_huffman: bool = False) -> None """ pass
[docs] def decode(self: 'Decoder', data: str, buffer: CudaTensor = None) -> CudaTensor: """decode(self: augpy._augpy.'Decoder', data: str, buffer: augpy._augpy.CudaTensor = None) -> augpy._augpy.CudaTensor Decode a JPEG image using Nvjpeg. Output is in :math:`(H,W,C)` format and resides on the GPU device. Parameters: data: compressed JPEG image as a JFIF string, i.e., the full file contents buffer: optional buffer to use; may be ``None``; if not ``None`` must be big enough to contain the decoded image Returns: new tensor with decoded image on GPU in :math:`(H,W,C)` format """ pass
class MemoryError(Exception): pass class NvJpegError(Exception): pass
[docs]class RandomNumberGenerator(pybind11_object): """ A convenient wrapper for cuRAND methods that fill tensors with pseudo-random numbers. Parameters: device_id: GPU device ID; if ``None``, :ref:`py/core:current_device` is used seed: random seed; if ``None``, read values from `std::random_device <https://en.cppreference.com/w/cpp/numeric/random/random_device>`_ to create a random seed. """ pass
[docs] def __init__(self: 'RandomNumberGenerator', device_id: object = None, seed: object = None) -> None: """__init__(self: augpy._augpy.'RandomNumberGenerator', device_id: object = None, seed: object = None) -> None """ pass
[docs] def gaussian(self: 'RandomNumberGenerator', target: CudaTensor, mean: float = 0.0, std: float = 1.0, blocks_per_sm: int = 8, threads: int = 0) -> None: """gaussian(self: augpy._augpy.'RandomNumberGenerator', target: augpy._augpy.CudaTensor, mean: float = 0.0, std: float = 1.0, blocks_per_sm: int = 8, threads: int = 0) -> None Fill ``target`` tensor with Gaussian distributed numbers with specified ``mean`` and standard deviation ``std``. .. note:: This is supported for integer tensors. Values are drawn from the given distribution, then rounded and cast to the data type of the tensor with saturation. The values in an integer tensor are thus only approximately Gaussian distributed. Parameters: target: tensor to fill mean: Gaussian mean std: Gaussian standard deviation """ pass
[docs] def uniform(self: 'RandomNumberGenerator', target: CudaTensor, vmin: float, vmax: float, blocks_per_sm: int = 8, threads: int = 0) -> None: """uniform(self: augpy._augpy.'RandomNumberGenerator', target: augpy._augpy.CudaTensor, vmin: float, vmax: float, blocks_per_sm: int = 8, threads: int = 0) -> None Fill ``target`` tensor with uniformly distributed number in :math:`[v_{min}, v_{max})`. .. note:: This is supported for integer tensors. Values are cast from float or double down to the integer type. The mean of the values is approximately :math:`\frac{v_{max} + v_{min}}{2}`. .. warning:: Saturation is not used. :math:`v_{min}` and :math:`v_{max}` must be representable in the target tensor data type, else values may under or overflow. Parameters: target: tensor to fill vmin: minimum value; can occur vmax: maximum value; does not occur """ pass
[docs]class WarpScaleMode: """ Enum whether to scale relative to the shortest or longest side of the image. Members: WARP_SCALE_SHORTEST : Scaling is relative to the shortest side of the image. WARP_SCALE_LONGEST : Scaling is relative to the longest side of the image. """ pass
[docs] def __init__(self: 'WarpScaleMode', arg0: int) -> None: """__init__(self: augpy._augpy.'WarpScaleMode', arg0: int) -> None """ pass
@property def WARP_SCALE_SHORTEST(self): """ Enum whether to scale relative to the shortest or longest side of the image. Members: WARP_SCALE_SHORTEST : Scaling is relative to the shortest side of the image. WARP_SCALE_LONGEST : Scaling is relative to the longest side of the image. """ pass @property def WARP_SCALE_LONGEST(self): """ Enum whether to scale relative to the shortest or longest side of the image. Members: WARP_SCALE_SHORTEST : Scaling is relative to the shortest side of the image. WARP_SCALE_LONGEST : Scaling is relative to the longest side of the image. """ pass
WARP_SCALE_LONGEST = WarpScaleMode(1) WARP_SCALE_SHORTEST = WarpScaleMode(0)
[docs]def add(*args, **kwargs): """add(*args, **kwargs) Overloaded function. 1. add(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Add a ``scalar`` value to a ``tensor``. :param tensor: tensor :param scalar: scalar value :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` 2. add(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Add ``tensor2`` to ``tensor1``. :param tensor1: first tensor :param tensor2: second tensor :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
def all(tensor: CudaTensor) -> CudaTensor: """all(tensor: augpy._augpy.CudaTensor) -> augpy._augpy.CudaTensor Check whether all elements in a tensor are greater zero. Parameters: tensor: tensor to sum, must be contiguous Returns: ``0`` or ``1`` as scalar ``uint8`` tensor """ pass
[docs]def array_to_tensor(*args, **kwargs): """array_to_tensor(*args, **kwargs) Overloaded function. 1. array_to_tensor(array: buffer, device_id: int = 0) -> augpy._augpy.CudaTensor Copy a Python buffer into a new tensor on the specified GPU device. This initiates an asynchronous copy from host to device memory. 2. array_to_tensor(array: buffer, tensor: augpy._augpy.CudaTensor) -> augpy._augpy.CudaTensor Copy a Python buffer to a tensor created from the given buffer ``tensor``. This initiates an asynchronous copy from host to device memory. """ pass
[docs]def box_blur_single(input: CudaTensor, ksize: int, out: CudaTensor = None) -> CudaTensor: """box_blur_single(input: augpy._augpy.CudaTensor, ksize: int, out: augpy._augpy.CudaTensor = None) -> augpy._augpy.CudaTensor Apply box blur to a single image. Kernel size describes both width and height in pixels of the area in the input that is averaged for each output pixel. Odd values are recommended for best results. For even values, the center of the kernel is below and to the right of the true center. This means the output is shifted up and left by half a pixel. Parameters: input: image tensor in channel-first format ksize: kernel size in pixels out: output tensor (may be ``None``) Returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def cast(*args, **kwargs): """cast(*args, **kwargs) Overloaded function. 1. cast(tensor: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor, blocks_per_sm: int = 8, threads: int = 0) -> augpy._augpy.CudaTensor Read values from ``tensor``, cast them to the data type of ``out`` and store them there. ``tensor`` and ``out`` must have the same shape. :param tensor: source tensor :param out: output tensor 2. cast(tensor: augpy._augpy.CudaTensor, dtype: augpy._augpy.DLDataType, blocks_per_sm: int = 8, threads: int = 0) -> augpy._augpy.CudaTensor Create a new tensor with values from ``tensor`` cast to the given data type ``dtype``. :param tensor: source tensor :param dtype: target data type :returns: new tensor with given data type """ pass
[docs]def copy(src: CudaTensor, dst: CudaTensor, blocks_per_sm: int = 8, threads: int = 0) -> CudaTensor: """copy(src: augpy._augpy.CudaTensor, dst: augpy._augpy.CudaTensor, blocks_per_sm: int = 8, threads: int = 0) -> augpy._augpy.CudaTensor Copy ``src`` into ``dst``. Supports broadcasting. """ pass
default_stream = CudaStream(device_id=-1, priority=-1) def disable_profiler() -> None: """disable_profiler() -> None Disable the Cuda profiler. """ pass
[docs]def div(*args, **kwargs): """div(*args, **kwargs) Overloaded function. 1. div(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Divide a ``tensor`` by a ``scalar`` value. :param tensor: tensor :param scalar: scalar value :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` 2. div(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Divide `tensor1` by `tensor2`. :param tensor1: first tensor :param tensor2: second tensor :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def empty_like(tensor: CudaTensor) -> CudaTensor: """empty_like(tensor: augpy._augpy.CudaTensor) -> augpy._augpy.CudaTensor Create a new tensor with the same shape, dtype and on the same device as ``tensor``. """ pass
def enable_profiler() -> None: """enable_profiler() -> None Enable the Cuda profiler. """ pass
[docs]def eq(*args, **kwargs): """eq(*args, **kwargs) Overloaded function. 1. eq(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Compute ``tensor == scalar`` as ``uint8`` tensor, where ``1`` means the condition is met and ``0`` otherwise. :param tensor: tensor :param scalar: scalar value :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` 2. eq(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Compute ``tensor1 == tensor2`` as ``uint8`` tensor, where ``1`` means the condition is met and ``0`` otherwise. :param tensor1: first tensor :param tensor2: second tensor :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def export_dltensor(tensor: object, name: str = 'dltensor', destruct: bool = True) -> capsule: """export_dltensor(tensor: object, name: str = 'dltensor', destruct: bool = True) -> capsule Export a GPU tensor to be used by another library. Parameters: pytensor: Python-wrapped CudaTensor name: name under which the tensor is stored in the returned :py:ref:`capsule <Capsules>`, e.g., `"dltensor"` for Pytorch destruct: if ``True``, add a destructor to the :py:ref:`capsule <Capsules>` which will delete the tensor when the capsule is deleted; only set to ``False`` if you know what you're doing Returns: :py:ref:`capsule <Capsules>` with exported :py:class:`CudaTensor` """ pass
[docs]def fill(scalar: float, dst: CudaTensor, blocks_per_sm: int = 8, threads: int = 0) -> CudaTensor: """fill(scalar: float, dst: augpy._augpy.CudaTensor, blocks_per_sm: int = 8, threads: int = 0) -> augpy._augpy.CudaTensor Fill `src` with the given `scalar` value. """ pass
float16 = DLDataType(code=kDLFloat, bits=16) float32 = DLDataType(code=kDLFloat, bits=32) float64 = DLDataType(code=kDLFloat, bits=64)
[docs]def fma(scalar: float, tensor1: CudaTensor, tensor2: CudaTensor, out: CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> CudaTensor: """fma(scalar: float, tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Compute a fused multiply-add on a scalar and two tensors, i.e., .. math:: r = s \cdot t_1 \cdot t_2 If ``tensor1`` has an unsigned integer data type, then ``tensor2`` must have the signed version of the same type, e.g., a ``uint8`` tensor must be paired with a ``int8`` tensor. Parameters: scalar: scalar factor tensor1: tensor :math:`t_1` tensor2: tensor :math:`t_2` out: optional output tensor :math:`r` Returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def gaussian_blur(input: CudaTensor, sigmas: CudaTensor, max_ksize: int, out: CudaTensor = None) -> CudaTensor: """gaussian_blur(input: augpy._augpy.CudaTensor, sigmas: augpy._augpy.CudaTensor, max_ksize: int, out: augpy._augpy.CudaTensor = None) -> augpy._augpy.CudaTensor Apply Gaussian blur to a batch of images. Maximum kernel size can be calculated like this: ``ksize = max(3, int(max(sigmas) * 6.6 - 2.3) | 1)`` I.e., ``ksize`` is at least 3 and always odd. The given kernel size defines the upper limit. The actual kernel size is calculated with the formula above and clipped at the given maximum. Smaller values can be given to trade speed vs quality. Bigger values typically do not visibly improve quality. Odd values are strongly recommended for best results. For even values, the center of the kernel is below and to the right of the true center. This means the output is shifted up and left by half a pixel. This can lead to inconsistencies between images in the batch. Images with large sigmas may be shifted, while smaller sigmas mean no shift occurs. Parameters: input: batch tensor with images in first dimension sigmas: float tensor with one sigma value per image in the batch max_ksize: maximum kernel size in pixels out: output tensor (may be ``None``) Returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def gaussian_blur_single(input: CudaTensor, sigma: float, out: CudaTensor = None) -> CudaTensor: """gaussian_blur_single(input: augpy._augpy.CudaTensor, sigma: float, out: augpy._augpy.CudaTensor = None) -> augpy._augpy.CudaTensor Apply Gaussian blur to a single image. Kernel size is calculated like this: ``ksize = max(3, int(sigma * 6.6 - 2.3) | 1)`` I.e., ``ksize`` is at least 3 and always odd. Parameters: input: image tensor in channel-first format sigma: standard deviation of the kernel out: output tensor (may be ``None``) Returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def ge(tensor: CudaTensor, scalar: float, out: CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> CudaTensor: """ge(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Compute ``tensor >= scalar`` as ``uint8`` tensor, where ``1`` means the condition is met and ``0`` otherwise. :param tensor: tensor :param scalar: scalar value :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def gemm(A: CudaTensor, B: CudaTensor, C: CudaTensor = None, alpha: float = 1.0, beta: float = 0.0) -> CudaTensor: """gemm(A: augpy._augpy.CudaTensor, B: augpy._augpy.CudaTensor, C: augpy._augpy.CudaTensor = None, alpha: float = 1.0, beta: float = 0.0) -> augpy._augpy.CudaTensor Calculate the matrix multiplication of two 2D tensors. More specifically calculates .. math:: C = A \times (\alpha \cdot B) + \beta \cdot C Only ``float`` and ``double`` are supported. All tensors must have the same data type. All tensors must be contiguous. Returns: new output tensor if ``C`` is ``None``, otherwise ``C`` """ pass
[docs]def get_current_device() -> int: """get_current_device() -> int Returns the active device ID. See: :ref:`py/core:current_device`. """ pass
[docs]def get_current_stream() -> CudaStream: """get_current_stream() -> augpy._augpy.CudaStream Returns the active :py:class:`CudaStream`. See: :ref:`py/core:current_stream` """ pass
[docs]def get_device_properties(device_id: int) -> CudaDeviceProp: """get_device_properties(device_id: int) -> augpy._augpy.CudaDeviceProp Get :py:class:`CudaDeviceProp` for given device. Parameters: device_id: Cude device id Returns: CudaDeviceProp: properties of device """ pass
[docs]def gt(tensor: CudaTensor, scalar: float, out: CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> CudaTensor: """gt(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Compute ``tensor > scalar`` as ``uint8`` tensor, where ``1`` means the condition is met and ``0`` otherwise. :param tensor: tensor :param scalar: scalar value :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def import_dltensor(tensor_capsule: capsule, name: str) -> CudaTensor: """import_dltensor(tensor_capsule: capsule, name: str) -> augpy._augpy.CudaTensor Import a GPU tensor from another library into augpy. Parameters: tensor_capsule: a Python :py:ref:`capsule <Capsules>` object that contains a :any:`DLManagedTensor` name: name under which the tensor is stored in the :py:ref:`capsule <Capsules>`, e.g., ``"dltensor"`` for Pytorch Returns: other tensor wrapped in a :py:class:`CudaTensor` """ pass
def init() -> None: """init() -> None Set the `cudaDeviceScheduleYield <https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__DEVICE.html#group__CUDART__DEVICE_1g69e73c7dda3fc05306ae7c811a690fac>`_ flag for the :ref:`py/core:current_device`. .. warning:: EXPERIMENTAL! MAY REDUCE GPU THROUGHPUT AND BREAK MANY THINGS! """ pass int16 = DLDataType(code=kDLInt, bits=16) int32 = DLDataType(code=kDLInt, bits=32) int64 = DLDataType(code=kDLInt, bits=64) int8 = DLDataType(code=kDLInt, bits=8)
[docs]def le(*args, **kwargs): """le(*args, **kwargs) Overloaded function. 1. le(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Compute ``tensor <= scalar`` as ``uint8`` tensor, where ``1`` means the condition is met and ``0`` otherwise. :param tensor: tensor :param scalar: scalar value :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` 2. le(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Compute ``tensor1 >= tensor2`` as ``uint8`` tensor, where ``1`` means the condition is met and ``0`` otherwise. :param tensor1: first tensor :param tensor2: second tensor :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def lighting(imtensor: CudaTensor, gammagrays: CudaTensor, gammacolors: CudaTensor, contrasts: CudaTensor, vmin: float, vmax: float, out: CudaTensor = None) -> CudaTensor: """lighting(imtensor: augpy._augpy.CudaTensor, gammagrays: augpy._augpy.CudaTensor, gammacolors: augpy._augpy.CudaTensor, contrasts: augpy._augpy.CudaTensor, vmin: float, vmax: float, out: augpy._augpy.CudaTensor = None) -> augpy._augpy.CudaTensor Apply lighting augmentation to a batch of images. This is a four-step process: #. Normalize values :math:`v_{norm} = \frac{v - v_{min}}{v_{max}-v_{min}}` with :math:`v_{max}` the minimum and :math:`v_{max}` the maximum lightness value #. Apply contrast change #. Apply gamma correction #. Denormalize values :math:`v' = v_{norm} * (v_{max}-v_{min}) + v_{min}` To change contrast two reference functions are used. With contrast :math:`\mathcal{c} \ge 0`, i.e., increased contrast, the following function is used: .. math:: f_{pos}(v) = \frac{1.0037575963899724}{1 + exp(6.279 + v \cdot 12.558)} - 0.0018787981949862 With contrast :math:`\mathcal{c} < 0`, i.e., decreased contrast, the following function is used: .. math:: f_{neg}(v) = 0.1755606108304832 \cdot atanh(v \cdot 1.986608 - 0.993304) + 0.5 The final value is :math:`v' = (1-\mathcal{c}) \cdot v + \mathcal{c} \cdot f(v)`. Brightness and color changes are done via gamma correction. .. math:: v' = v^{\gamma_{gray} \cdot \gamma_c} with :math:`\gamma_{gray}` the gamma for overall lightness and :math:`\gamma_{c}` the per-channel gamma. Parameters: tensor: image tensor in :math:`(N,C,H,W)` format gammagrays: tensor of :math:`N` gamma gray values gammacolors: tensor of :math:`C\cdot N` gamma values in the format :math:`\gamma_{1,1}, \gamma_{1,2}, ..., \gamma_{1,C}, \gamma_{2,1}, \gamma_{2,2}, ... \gamma_{N,C-1}, \gamma_{N,C}` contrasts: tensor of :math:`N` contrast values in :math:`[-1, 1]` vmin: minimum lightness value in images vmax: maximum lightness value in images out: output tensor (may be ``None``) Returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def lt(*args, **kwargs): """lt(*args, **kwargs) Overloaded function. 1. lt(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Compute ``tensor < scalar`` as ``uint8`` tensor, where ``1`` means the condition is met and ``0`` otherwise. :param tensor: tensor :param scalar: scalar value :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` 2. lt(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Compute ``tensor1 >= tensor2`` as ``uint8`` tensor, where ``1`` means the condition is met and ``0`` otherwise. :param tensor1: first tensor :param tensor2: second tensor :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def make_affine_matrix(out: buffer, source_height: int, source_width: int, target_height: int, target_width: int, angle: float = 0.0, scale: float = 1.0, aspect: float = 1.0, shifty: float = 0.0, shiftx: float = 0.0, sheary: float = 0.0, shearx: float = 0.0, hmirror: bool = False, vmirror: bool = False, scale_mode: WarpScaleMode = WarpScaleMode.WARP_SCALE_SHORTEST, max_supersampling: int = 3) -> int: """make_affine_matrix(out: buffer, source_height: int, source_width: int, target_height: int, target_width: int, angle: float = 0.0, scale: float = 1.0, aspect: float = 1.0, shifty: float = 0.0, shiftx: float = 0.0, sheary: float = 0.0, shearx: float = 0.0, hmirror: bool = False, vmirror: bool = False, scale_mode: augpy._augpy.WarpScaleMode = WarpScaleMode.WARP_SCALE_SHORTEST, max_supersampling: int = 3) -> int Create a :math:`2 \times 3` matrix for a set of affine transformations. This matrix is compatible with the `warpAffine <https://docs.opencv.org/3.4/da/d54/group__imgproc__transform.html#ga0203d9ee5fcd28d40dbc4a1ea4451983>`_ function of OpenCV with the `WARP_INVERSE_MAP <https://docs.opencv.org/3.4/da/d54/group__imgproc__transform.html#gga5bb5a1fea74ea38e1a5445ca803ff121aa48be1c433186c4eae1ea86aa0ca75ba>`_ flag set. Transforms are applied in the following order: #. shear #. scale & aspect ratio #. horizontal & vertical mirror #. rotation #. horizontal & vertical shift See: :py:func:`make_transform` for a more convenient version of this function. Parameters: out: output buffer that matrix is written to; must be a writeable :math:`2 \times 3` ``float`` buffer source_height: :math:`h_s` height of the image in pixels source_width: :math:`w_s` width of the image in pixels target_height: :math:`h_t` height of the output canvas in pixels target_width: :math:`w_t` width of the output canvas in pixels angle: clockwise angle in degrees with image center as rotation axis scale: scale factor relative to output size; 1 means fill target height or width wise depending on ``scale_mode`` and whichever is longest/shortest; larger values will crop, smaller values leave empty space in the output canvas aspect: controls the aspect ratio; 1 means same as input, values greater 1 increase the width and reduce the height shifty: shift the image in y direction (vertical); 0 centers the image on the output canvas; -1 means shift up as much as possible; 1 means shfit down as much as possible; the maximum distance to shift is :math:`max(scale \cdot h_s - h_t, h_t - scale \cdot h_s)` shiftx: same as ``shifty``, but in x direction (horizontal) sheary: controls up/down shear; for every pixel in the x direction move ``sheary`` pixels in y direction shearx: same as ``sheary`` but controls left/right shear hmirror: if ``True`` flip image horizontally vmirror: if ``True`` flip image vertically scale_mode: if :py:attr:`WarpScaleMode.WARP_SCALE_SHORTEST` scale is relative to shortest side; this fills the output canvas, cropping the image if necessary; if :py:attr:`WarpScaleMode.WARP_SCALE_LONGEST` scale is relative to longest side; this ensures the image is contained inside the output canvas, but leaves empty space max_supersampling: upper limit for recommended supersampling Returns: recommended supersampling factor for the warp """ pass
[docs]def meminfo(device_id: int = 0) -> Tuple[int, int, int]: """meminfo(device_id: int = 0) -> Tuple[int, int, int] For the device defined by ``device_id``, return the current used, free, and total memory in bytes. """ pass
[docs]def mul(*args, **kwargs): """mul(*args, **kwargs) Overloaded function. 1. mul(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Multiply a ``tensor`` by a ``scalar`` value. :param tensor: tensor :param scalar: scalar value :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` 2. mul(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Multiply ``tensor1`` by ``tensor2``. :param tensor1: first tensor :param tensor2: second tensor :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
def nvtx_range_end(end: int) -> None: """nvtx_range_end(end: int) -> None Tell the Nvidia profiler to end the given `nvtx <https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx>`_ range. Parameters: end: ID of the range to end """ pass def nvtx_range_start(msg: str) -> int: """nvtx_range_start(msg: str) -> int Tell the Nvidia profiler to start a new `nvtx <https://docs.nvidia.com/cuda/profiler-users-guide/index.html#nvtx>`_ range. Can be used to place marks in profiling output. Parameters: msg: Message attached to the range Returns: range ID to be used with :py:func:`nvtx_range_end` """ pass
[docs]def rdiv(tensor: CudaTensor, scalar: float, out: CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> CudaTensor: """rdiv(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Divide a ``scalar`` value by a ``tensor``. Parameters: tensor: tensor scalar: scalar value out: optional output tensor Returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def release() -> None: """release() -> None Release all allocated memory on all GPUs. All :py:class:`CudaTensors <CudaTensor>` become invalid immediately. Do I have to tell you this is dangerous? """ pass
[docs]def rsub(tensor: CudaTensor, scalar: float, out: CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> CudaTensor: """rsub(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Subtract a ``tensor`` from a ``scalar`` value. Parameters: tensor: tensor scalar: scalar value out: optional output tensor Returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def sub(*args, **kwargs): """sub(*args, **kwargs) Overloaded function. 1. sub(tensor: augpy._augpy.CudaTensor, scalar: float, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Subtract a ``scalar`` value from a ``tensor``. :param tensor: tensor :param scalar: scalar value :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` 2. sub(tensor1: augpy._augpy.CudaTensor, tensor2: augpy._augpy.CudaTensor, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, threads: int = 512) -> augpy._augpy.CudaTensor Subtract ``tensor2`` from ``tensor1``. :param tensor1: first tensor :param tensor2: second tensor :param out: optional output tensor :returns: new tensor if ``out`` is ``None``, else ``out`` """ pass
[docs]def sum(*args, **kwargs): """sum(*args, **kwargs) Overloaded function. 1. sum(tensor: augpy._augpy.CudaTensor, upcast: bool = False) -> augpy._augpy.CudaTensor Sum all elements in a tensor with saturation. :param tensor: tensor to sum, must be contiguous :param upcast: if ``True``, returns tensor with ``float`` or ``double`` type :returns: sum value as scalar tensor 2. sum(tensor: augpy._augpy.CudaTensor, axis: int, keepdim: bool = False, upcast: bool = False, out: augpy._augpy.CudaTensor = None, blocks_per_sm: int = 8, num_threads: int = 0) -> augpy._augpy.CudaTensor Sum of all elements along an axis in a tensor with saturation. :param tensor: tensor to sum, may be strided :param axis: axis index to sum along :param keepdim: if ``True``, keep sum axis dimension with length 1 :param upcast: if ``True``, returns tensor with ``float`` or ``double`` type :param out: output tensor (may be ``None``) :returns: tensor with values summed along axis, or ``None`` if ``out`` is tensor """ pass
[docs]def tensor_to_array(*args, **kwargs): """tensor_to_array(*args, **kwargs) Overloaded function. 1. tensor_to_array(tensor: augpy._augpy.CudaTensor) -> array Copy a given tensor to a new numpy array. This initiates an asynchronous copy from device to host memory. 2. tensor_to_array(tensor: augpy._augpy.CudaTensor, array: buffer) -> array Copy a given tensor to a numpy array created from the given buffer ``array``. This initiates an asynchronous copy from device to host memory. """ pass
uint16 = DLDataType(code=kDLUInt, bits=16) uint32 = DLDataType(code=kDLUInt, bits=32) uint64 = DLDataType(code=kDLUInt, bits=64) uint8 = DLDataType(code=kDLUInt, bits=8)
[docs]def warp_affine(src: CudaTensor, dst: CudaTensor, matrix: buffer, background: CudaTensor, supersampling: int) -> None: """warp_affine(src: augpy._augpy.CudaTensor, dst: augpy._augpy.CudaTensor, matrix: buffer, background: augpy._augpy.CudaTensor, supersampling: int) -> None Takes an image in channels-last format :math:`(H, W, C)` and affine warps it into a given output tensor in channels-first format :math:`(C, H, W)`. Any blank canvas is filled with a background color. The warp is performed with bi-linear and supersampling. Parameters: src: image tensor dst: target tensor matrix: :math:`2 \times 3` ``float`` transformation matrix, see :py:func:`make_affine_matrix` for details background: background color to fill empty canvas supersampling: supersampling factor, e.g., 3 means 9 samples are taken in a :math:`3 \times 3` grid """ pass
all = [ 'CuRandError', 'CudaDevice', 'CudaDeviceProp', 'CudaError', 'CudaEvent', 'CudaStream', 'CudaTensor', 'CutlassError', 'kDLInt', 'kDLFloat', 'DLDataTypeCode', 'DLDataType', 'kDLUInt', 'Decoder', 'MemoryError', 'NvJpegError', 'RandomNumberGenerator', 'WarpScaleMode', 'WARP_SCALE_LONGEST', 'WARP_SCALE_SHORTEST', 'add', 'all', 'array_to_tensor', 'box_blur_single', 'cast', 'copy', 'default_stream', 'disable_profiler', 'div', 'empty_like', 'enable_profiler', 'eq', 'export_dltensor', 'fill', 'float16', 'float32', 'float64', 'fma', 'gaussian_blur', 'gaussian_blur_single', 'ge', 'gemm', 'get_current_device', 'get_current_stream', 'get_device_properties', 'gt', 'import_dltensor', 'init', 'int16', 'int32', 'int64', 'int8', 'le', 'lighting', 'lt', 'make_affine_matrix', 'meminfo', 'mul', 'nvtx_range_end', 'nvtx_range_start', 'rdiv', 'release', 'rsub', 'sub', 'sum', 'tensor_to_array', 'uint16', 'uint32', 'uint64', 'uint8', 'warp_affine' ]