# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=no-name-in-module, c-extension-no-member
from typing import Tuple, Union, Optional
from cuda import cudart
from cuda.cudart import cudaStream_t, cudaMemcpyKind
from .stream import Stream, current_stream
[docs]def memory_info() -> Tuple[int, int]:
"""
Get the free and total memory on the current device in bytes.
Returns
-------
(free, total): Tuple[int, int]
The free and total memory on the current device in bytes.
"""
err, free_bytes, total_bytes = cudart.cudaMemGetInfo()
assert err == 0, err
return free_bytes, total_bytes
[docs]def malloc(num_bytes: int) -> int:
"""
Allocate memory on the current device.
Parameters
----------
num_bytes: int
The number of bytes to allocate.
Returns
-------
addr: int
The address of the allocated memory.
"""
err, addr = cudart.cudaMalloc(num_bytes)
assert err == 0, err
return addr
[docs]def free(addr: int) -> None:
"""
Free memory on the current cuda device.
Parameters
----------
addr: int
The address of the memory to free. This must be the address of memory allocated with :func:`malloc` or
:func:`malloc_async`.
"""
(err,) = cudart.cudaFree(addr)
assert err == 0, err
[docs]def malloc_async(num_bytes: int, stream: Optional[Union[Stream, cudaStream_t, int]] = None) -> int:
"""
Allocate memory on the current device asynchronously.
Parameters
----------
num_bytes: int
The number of bytes to allocate.
stream: Optional[Union[Stream, cudaStream_t, int]]
The stream to use for the allocation. If None, the current stream is used.
Returns
-------
addr: int
The address of the allocated memory. When the allocation failed due to insufficient memory, 0 is returned.
"""
if stream is None:
stream = current_stream()
err, addr = cudart.cudaMallocAsync(num_bytes, int(stream))
if err == cudart.cudaError_t.cudaErrorMemoryAllocation:
return 0
assert err == 0, err
return addr
[docs]def free_async(addr: int, stream: Optional[Union[Stream, cudaStream_t, int]] = None) -> None:
"""
Free memory on the current cuda device asynchronously.
Parameters
----------
addr: int
The address of the memory to free. This must be the address of memory allocated with :func:`malloc` or
:func:`malloc_async`.
stream: Union[Stream, cudaStream_t, int], optional
The stream to use for the free. If None, the current stream is used.
"""
if stream is None:
stream = current_stream()
(err,) = cudart.cudaFreeAsync(addr, int(stream))
assert err == 0, err
[docs]def malloc_host(num_bytes: int) -> int:
"""
Allocate pinned host memory.
Parameters
----------
num_bytes: int
The number of bytes to allocate.
Returns
-------
addr: int
The address of the allocated memory.
"""
err, addr = cudart.cudaMallocHost(num_bytes)
assert err == 0, err
if err == cudart.cudaError_t.cudaErrorMemoryAllocation:
return 0
return addr
[docs]def free_host(addr: int) -> None:
"""
Free pinned host memory.
Parameters
----------
addr: int
The address of the memory to free. This must be the address of memory allocated with :func:`malloc_host`.
"""
(err,) = cudart.cudaFreeHost(addr)
assert err == 0, err
[docs]def memset(addr: int, value: int, num_bytes: int) -> None:
"""
Set the gpu memory to a given value.
Parameters
----------
addr: int
The start address of the memory region to set.
value: int
The byte value to set the memory region to.
num_bytes: int
The number of bytes to set.
"""
(err,) = cudart.cudaMemset(addr, value, num_bytes)
assert err == 0, err
[docs]def memset_async(
addr: int, value: int, num_bytes: int, stream: Optional[Union[Stream, cudaStream_t, int]] = None
) -> None:
"""
Set the gpu memory to given value asynchronously.
Parameters
----------
addr: int
The start address of the memory region to set.
value: int
The byte value to set the memory region to.
num_bytes: int
The number of bytes to set.
stream: Union[Stream, cudaStream_t, int], optional
The stream to use for the memset. If None, the current stream is used.
"""
if stream is None:
stream = current_stream()
(err,) = cudart.cudaMemsetAsync(addr, value, num_bytes, int(stream))
assert err == 0, err
[docs]def memcpy(dst: int, src: int, num_bytes: int) -> None:
"""
Copy gpu memory from one location to another.
Parameters
----------
dst: int
The destination address.
src: int
The source address.
num_bytes: int
The number of bytes to copy.
"""
(err,) = cudart.cudaMemcpy(dst, src, num_bytes, cudaMemcpyKind.cudaMemcpyDefault)
if err != 0:
raise RuntimeError(f"cudaMemcpy failed with error code {err.name}")
[docs]def memcpy_async(dst: int, src: int, num_bytes: int, stream: Optional[Union[Stream, cudaStream_t, int]] = None) -> None:
"""
Copy gpu memory from one location to another asynchronously.
Parameters
----------
dst: int
The destination address.
src: int
The source address.
num_bytes: int
The number of bytes to copy.
stream: Union[Stream, cudaStream_t, int], optional
The stream to use for the memcpy. If None, the current stream is used.
"""
if stream is None:
stream = current_stream()
(err,) = cudart.cudaMemcpyAsync(dst, src, num_bytes, cudaMemcpyKind.cudaMemcpyDefault, int(stream))
assert err == 0, err
def memcpy_peer(dst: int, dst_id: int, src: int, src_id: int, num_bytes: int) -> None:
"""
Copy gpu memory from one device to another.
Parameters
----------
dst: int
The destination address.
dst_id: int
The id of the destination device.
src: int
The source address.
src_id: int
The id of the source device.
num_bytes: int
The number of bytes to copy.
"""
(err,) = cudart.cudaMemcpyPeer(dst, dst_id, src, src_id, num_bytes)
assert err == 0, err
def memcpy_peer_async(
dst: int,
dst_id: int,
src: int,
src_id: int,
num_bytes: int,
stream: Optional[Union[Stream, cudaStream_t, int]] = None,
) -> None:
"""
Copy gpu memory from one device to another.
Parameters
----------
dst: int
The destination address.
dst_id: int
The id of the destination device.
src: int
The source address.
src_id: int
The id of the source device.
num_bytes: int
The number of bytes to copy.
stream: Union[Stream, cudaStream_t, int], optional
The stream to use for the memcpy. If None, the current stream is used.
"""
if stream is None:
stream = current_stream()
(err,) = cudart.cudaMemcpyPeerAsync(dst, dst_id, src, src_id, num_bytes, int(stream))
assert err == 0, err