"""
将cupy的函数,class调用到chainer中来使用
Device, context and memory management on CuPy.
Chainer uses `CuPy <https://cupy.chainer.org/>`_ (with very thin wrapper)
to exploit the speed of GPU computation. Following modules and classes defined
in CuPy are imported to :mod:`chainer.cuda` module for convenience (refer to
this table when reading chainer's source codes).
============================ =================================
imported name original name
============================ =================================
``chainer.cuda.cupy`` :mod:`cupy`
``chainer.cuda.ndarray`` :class:`cupy.ndarray`
``chainer.cuda.cupy.cuda`` :mod:`cupy.cuda`
``chainer.cuda.Device`` :class:`cupy.cuda.Device`
``chainer.cuda.Event`` :class:`cupy.cuda.Event`
``chainer.cuda.Stream`` :class:`cupy.cuda.Stream`
============================ =================================
更高效的使用内存管理,做数据储存和计算
Chainer replaces the default allocator of CuPy by its memory pool
implementation. It enables us to reuse the device memory over multiple
forward/backward computations, and temporary arrays for consecutive elementwise
operations.
import functools
import os
import warnings
import numpy
import six
import chainer
available = False
cudnn_enabled = False
# 加载其他的CuPy函数和class
try:
import cupy
from cupy import cuda # NOQA
from cupy.cuda import cublas # NOQA
from cupy import ndarray # NOQA
from cupy.cuda import Device # NOQA
from cupy.cuda import Event # NOQA
from cupy.cuda import Stream # NOQA
from . import cuda_fusion as fusion # NOQA
available = True
except Exception as e:
_resolution_error = e
fusion = numpy
class ndarray(object):
pass # for type testing
# for `xp is cuda.cupy` to always work
cupy = object()
# 是否使用cuDNN
if available:
_cudnn_disabled_by_user = int(os.environ.get('CHAINER_CUDNN', '1')) == 0
try:
import cupy.cudnn
cudnn = cupy.cudnn
cudnn_enabled = not _cudnn_disabled_by_user
except Exception as e:
_resolution_error = e
# 确认cuda是否可用的函数
def check_cuda_available():
"""Checks if CUDA is available.
When CUDA is correctly set up, nothing happens.
Otherwise it raises ``RuntimeError``.
if not available:
msg = ('CUDA environment is not correctly set up\n'
'(see https://github.com/chainer/chainer#installation).')
msg += str(_resolution_error)
raise RuntimeError(msg)
if (not cudnn_enabled and
not _cudnn_disabled_by_user and
not getattr(check_cuda_available, '_already_warned', False)):
warnings.warn(
'cuDNN is not enabled.\n'
'Please reinstall CuPy after you install cudnn\n'
'(see https://docs-cupy.chainer.org/en/stable/install.html'
'#install-cupy-with-cudnn-and-nccl).')
check_cuda_available._already_warned = True
# 代表CPU的设备的class -> instance
class DummyDeviceType(object):
"""Dummy device class that does nothing with cupy.cuda.Device interface.
This class is used to represent CPU device.
id = -1
def __int__(self):
return -1
def __enter__(self):
return self
def __exit__(self, *args):
def use(self):
def synchronize(self):
def __eq__(self, other):
return isinstance(other, DummyDeviceType)
def __ne__(self, other):
return not (self == other)
DummyDevice = DummyDeviceType()
# 与之前版本兼容处理
# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
if available:
# This is for backward compatibility
memory_pool = cupy.get_default_memory_pool()
pinned_memory_pool = cupy.get_default_pinned_memory_pool()
if six.PY2:
try:
from
future.types.newint import newint as _newint
_integer_types = six.integer_types + (_newint,)
except ImportError:
_integer_types = six.integer_types
else:
_integer_types = six.integer_types
# 获取设备device(GPU, CPU)信息
# ------------------------------------------------------------------------------
# Global states
# ------------------------------------------------------------------------------
def get_device_from_id(device_id):
"""Gets the device from an ID integer.
Args:
device_id (int or None): The ID of the device which this function
returns.
if device_id is not None:
check_cuda_available()
return Device(device_id)
else:
return DummyDevice
def get_device_from_array(*arrays):
"""Gets the device from a list of CuPy array or a single CuPy array.
The device on which the given CuPy array reside is returned.
Args:
array (cupy.ndarray or list of cupy.ndarray):
A CuPy array which this function returns the device corresponding
to. If a list of :class:`cupy.ndarray`\\ s are given, it returns
the first device object of an array in the list.
for array in arrays:
if isinstance(array, ndarray) and array.device is not None:
return array.device
return DummyDevice
def get_device(*args):
"""Gets the device from a device object, an ID integer or an array object.
.. note::
This API is deprecated. Please use
:func:`~chainer.cuda.get_device_from_id`
or :func:`~chainer.cuda.get_device_from_array` instead.
This is a convenient utility to select a correct device if the type of
``arg`` is unknown (i.e., one can use this function on arrays that may be
on CPU or GPU). The returned device object supports the context management
protocol of Python for the *with* statement.
Args:
args: Values to specify a GPU device. The first device object, integer
or :class:`cupy.ndarray` object is used to select a device.
If it is a device object, it is returned. If it is an integer,
the corresponding device is returned. If it is a CuPy array,
the device on which this array reside is returned. If any
arguments are neither integers nor CuPy arrays, a dummy device
object representing CPU is returned.
Returns:
Device object specified by given ``args``.
.. seealso::
See :class:`cupy.cuda.Device` for the device selection not by arrays.
warnings.warn('get_device is deprecated. Please use get_device_from_id or'
' get_device_from_array instead.', DeprecationWarning)
return _get_device(*args)
def _get_device(*args):
for arg in args:
if type(arg) in _integer_types:
check_cuda_available()
return Device(arg)
if isinstance(arg, ndarray):
if arg.device is None:
continue
return arg.device
if available and isinstance(arg, Device):
return arg
return DummyDevice
# 在GPU,CPU不同设备间传递数据
# ------------------------------------------------------------------------------
# cupy.ndarray allocation and copy
# ------------------------------------------------------------------------------
def to_gpu(array, device=None, stream=None):
"""Copies the given CPU array to the specified device.
Args:
array: Array to be sent to GPU.
device: Device specifier.
stream (~cupy.cuda.Stream): *(deprecated since v3.0.0)*
CUDA stream. If not ``None``, the copy runs asynchronously.
Returns:
cupy.ndarray: Array on GPU.
If ``array`` is already on the GPU device specified by ``device``,
this function just returns ``array`` without performing any copy.
if stream is not None:
warnings.warn(
'The stream option is deprecated in chainer.cuda.to_gpu. '
'Please remove it.', DeprecationWarning)
check_cuda_available()
if isinstance(array, (numpy.number, numpy.bool_)):
array = numpy.asarray(array)
if not isinstance(array, (cupy.ndarray, numpy.ndarray)):
raise TypeError(
'The array sent to gpu must be numpy.ndarray or cupy.ndarray, '
'or a NumPy scalar.'
'\nActual type: {0}.'.format(type(array)))
with _get_device(device):
array_dev = get_device_from_array(array)
if array_dev.id == cupy.cuda.device.get_device_id():
return array
if stream is not None and stream.ptr != 0:
ret = cupy.empty_like(array)
if array_dev.id == -1:
# cpu to gpu
mem = cupy.cuda.alloc_pinned_memory(array.nbytes)
src = numpy.frombuffer(
mem, array.dtype, array.size).reshape(array.shape)
src[...] = array
ret.set(src, stream)
cupy.cuda.pinned_memory._add_to_watch_list(
stream.record(), mem)
else:
# gpu to gpu
with array_dev:
src = array.copy()
event = Stream.null.record()
stream.wait_event(event)
ret.data.copy_from_device_async(
src.data, src.nbytes, stream)
# to hold a reference until the end of the asynchronous
# memcpy
stream.add_callback(lambda *x: None, (src, ret))
return ret
if array_dev.id == -1:
return cupy.asarray(array)
# Need to make a copy when an array is copied to another device
return cupy.array(array, copy=True)
def to_cpu(array, stream=None):
"""Copies the given GPU array to host CPU.
Args:
array: Array to be sent to CPU.
stream (cupy.cuda.Stream): CUDA stream.
Returns:
numpy.ndarray: Array on CPU.
If given ``array`` is already on CPU, then this function just returns
``array`` without performing any copy.
if isinstance(array, ndarray):
check_cuda_available()
with get_device_from_array(array):
return array.get(stream)
elif isinstance(array, (numpy.number, numpy.bool_)):
return numpy.asarray(array)
elif isinstance(array, numpy.ndarray):
return array
else:
raise TypeError(
'The array sent to cpu must be numpy.ndarray or cupy.ndarray, '
'or a NumPy scalar.'
'\nActual type: {0}.'.format(type(array)))
def copy(array, out=None, out_device=None, stream=None):
"""Copies a :class:`cupy.ndarray` object using the default stream.
This function can copy the device array to the destination array on another
device.
Args:
array (cupy.ndarray): Array to be copied.
out (cupy.ndarray): Destination array.
If it is not ``None``, then ``out_device`` argument is ignored.
out_device: Destination device specifier. Actual device object is
obtained by passing this value to :func:`get_device`.
stream (cupy.cuda.Stream): CUDA stream.
Returns:
cupy.ndarray: Copied array.
If ``out`` is not specified, then the array is allocated on the device
specified by ``out_device`` argument.
check_cuda_available()
assert stream is None # TODO(beam2d): FIX IT
if out is None:
if out_device is None:
out_device = array
with _get_device(out_device):
out = cupy.empty_like(array)
with get_device_from_array(array):
cupy.copyto(out, array)
return out
# 采取memoize的优化技巧,将复杂计算的储存起来,方便调用(cuDNN)
# ------------------------------------------------------------------------------
# Function result memoization
# ------------------------------------------------------------------------------
def memoize(for_each_device=False):
"""Makes a function memoizing the result for each argument and device.
This is a similar version of :func:`cupy.memoize`. The difference is that
this function can be used in the global scope even if CUDA is not
available. In such case, this function does nothing.
.. note::
This decorator acts as a dummy if CUDA is not available. It cannot be
used for general purpose memoization even if ``for_each_device`` is set
to False.
if available:
return cupy.memoize(for_each_device)
def dummy_decorator(f):
@functools.wraps(f)
def ret(*args, **kwargs):
return f(*args, **kwargs)
return ret
return dummy_decorator
def clear_memo():
"""Clears the memoized results for all functions decorated by memoize.
This function works like :func:`cupy.clear_memo` as a counterpart for
:func:`chainer.cuda.memoize`. It can be used even if CUDA is not available.
In such a case, this function does nothing.
if available:
cupy.clear_memo()
# ------------------------------------------------------------------------------
# Kernel definition utility
# ------------------------------------------------------------------------------
@memoize(for_each_device=True)
def elementwise(in_params, out_params, operation, name, **kwargs):
"""Creates an elementwise kernel function.
This function uses :func:`~chainer.cuda.memoize` to cache the
kernel object, i.e. the resulting kernel object is cached for each argument
combination and CUDA device.
The arguments are the same as those for
:class:`cupy.ElementwiseKernel`, except that the ``name`` argument is
mandatory.
check_cuda_available()
return cupy.ElementwiseKernel(
in_params, out_params, operation, name, **kwargs)
@memoize(for_each_device=True)
def reduce(in_params, out_params, map_expr, reduce_expr, post_map_expr,
identity, name, **kwargs):
"""Creates a global reduction kernel function.
This function uses :func:`~chainer.cuda.memoize` to cache the resulting
kernel object, i.e. the resulting kernel object is cached for each argument
combination and CUDA device.
The arguments are the same as those for
:class:`cupy.ReductionKernel`, except that the ``name`` argument is
mandatory.
check_cuda_available()
return cupy.ReductionKernel(
in_params, out_params, map_expr, reduce_expr, post_map_expr,
identity, name, **kwargs)
# ------------------------------------------------------------------------------
# numpy/cupy compatible coding
# ------------------------------------------------------------------------------
def get_array_module(*args):
"""Gets an appropriate one from :mod:`numpy` or :mod:`cupy`.
This is almost equivalent to :func:`cupy.get_array_module`. The differences
are that this function can be used even if CUDA is not available and that
it will return their data arrays' array module for
:class:`~chainer.Variable` arguments.
Args:
args: Values to determine whether NumPy or CuPy should be used.
Returns:
module: :mod:`cupy` or :mod:`numpy` is returned based on the types of
the arguments.
if available:
args = [arg.data if isinstance(arg, chainer.variable.Variable) else arg
for arg in args]
return cupy.get_array_module(*args)
else:
return numpy
_max_workspace_size = 8 * 1024 * 1024
def get_max_workspace_size():
"""Gets the workspace size for cuDNN.
Check "cuDNN Library User Guide" for detail.
Returns:
int: The workspace size for cuDNN.
return _max_workspace_size
def set_max_workspace_size(size):
"""Sets the workspace size for cuDNN.
Check "cuDNN Library User Guide" for detail.
Args:
size: The workspace size for cuDNN.
global _max_workspace_size
_max_workspace_size = size
def fuse(*args, **kwargs):
"""Function fusing decorator.
It calls :func:`cupy.fuse` when CuPy is available to make fused function
and does nothing otherwise.
.. seealso::
:func:`cupy.fuse`