1

I'm consistently encountering an "invalid argument" error when calling cudaMemcpyBatchAsync for host-to-device transfers.

CUDA error at btest.cu:43 - invalid argument

Line 43 is CUDA_CHECK(cudaMemcpyBatchAsync(...)). Line 43 is CUDA_CHECK(cudaMemcpyBatchAsync(...)). The API signature and description are from the official documentation: https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1gc02716b3bd21f3d83640ab102bf089f9

Code:

#include <cstdint>
#include <iostream>
#include <vector>

// CUDA error checking macro
#define CUDA_CHECK(call) \
   {\
   const cudaError_t err = call; \
   if (err != cudaSuccess) { \
     fprintf(stderr, "CUDA error at %s:%d - %s\n", __FILE__, __LINE__, cudaGetErrorString(err)); \
     exit(EXIT_FAILURE); \
   } \
   } \

int main() {
    // --- Configuration ---
    const int NUM_COPIES = 1000;
    const size_t COPY_SIZE_BYTES = 1024;

    // --- Allocate Host and Device Memory ---
    // We need multiple host and device pointers for individual copies
    std::vector<void*> h_src;
    std::vector<void*> d_dst;
    std::vector<size_t> sizes(NUM_COPIES, COPY_SIZE_BYTES);

    for (int i = 0; i < NUM_COPIES; ++i) {
        void* ptr_h, *ptr_d;
        CUDA_CHECK(cudaMallocHost(&ptr_h, COPY_SIZE_BYTES));
        CUDA_CHECK(cudaMalloc    (&ptr_d, COPY_SIZE_BYTES));
        h_src.push_back(ptr_h);
        d_dst.push_back(ptr_d);
    }

    std::vector<cudaMemcpyAttributes> attrs(1);
    attrs[0].srcLocHint.type = cudaMemLocationTypeHost;
    attrs[0].dstLocHint.type = cudaMemLocationTypeDevice;
    attrs[0].srcAccessOrder = cudaMemcpySrcAccessOrderAny;
    attrs[0].flags = 0;
    std::vector<size_t> attrsIdxs = {0};
    size_t numAttrs = attrs.size();

    size_t fail_idx=0; // Variable to store the index of the failed copy if any
    CUDA_CHECK(cudaMemcpyBatchAsync(
        d_dst.data(),
        h_src.data(),
        sizes.data(),
        NUM_COPIES,
        attrs.data(),
        attrsIdxs.data(),
        numAttrs,
        &fail_idx,
        0        // Default stream
    ));

    if( fail_idx!=SIZE_MAX ) throw std::runtime_error("Failed MemcpyBatchAsync at fail_idx = " + std::to_string(fail_idx) + "\n");

    // --- Cleanup ---
    for (int i = 0; i < NUM_COPIES; ++i) {
        CUDA_CHECK(cudaFreeHost(h_src[i]));
        CUDA_CHECK(cudaFree(d_dst[i]));
    }

    return 0;
}

Environment:

  • GPU: NVIDIA GeForce RTX 5090
  • Driver Version: 575.57.08
  • CUDA Version (from nvidia-smi): 12.9
  • Compilation: /usr/local/cuda-12.9/bin/nvcc -arch=sm_90 btest.cu -o a.out
  • Operating System: Ubuntu 24.04

Also tested for RTX3090 and CUDA 12.8 with compilation for sm_86. Same "invalid argument" error.

What else could cause an "invalid argument" error for cudaMemcpyBatchAsync in this scenario? Are there any subtle requirements or unusual environmental factors I might be missing?

1
  • 1
    You didn't initialize attrs[0].dstLocHint.id to the device number. Same with srcLocHint.id but that's ignored for cudaMemLocationTypeHost Commented Jul 1 at 6:02

1 Answer 1

5

The likely problem is the stream argument cannot be 0 (i.e. default stream). You will need to specify a named stream that was created with cudaStreamCreate*()

You also don't have to specify the location hints because "The cudaMemcpyAttributes::srcLocHint and cudaMemcpyAttributes::dstLocHint allows applications to specify hint locations for operands of a copy when the operand doesn't have a fixed location. That is, these hints are only applicable for managed memory pointers on devices where cudaDevAttrConcurrentManagedAccess is true or system-allocated pageable memory on devices where cudaDevAttrPageableMemoryAccess is true."

Sign up to request clarification or add additional context in comments.

2 Comments

As it’s currently written, your answer is unclear. Please edit to add additional details that will help others understand how this addresses the question asked. You can find more information on how to write good answers in the help center.
Well spotted.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.