In debug mode all program run well, but in Release mode I have error in cudaMalloc operation

-2

In debug mode program run well, but in Release mode have error memcpy operation

 CellBot *hCellList;
        CellBot *dCellList;
        size_t CellSize = WorldConst.numberOfCells * sizeof(CellBot);
        assert(!((hCellList = (CellBot *)malloc(CellSize)) == NULL));
        gpuAssert(cudaMalloc((void**)&dCellList, CellSize));


        ::memcpy(hCellList, CellList.data(), CellSize);
        gpuAssert(cudaMemcpy(dCellList, hCellList, CellSize, cudaMemcpyHostToDevice));

Don't have any ideas about that. I always work in debug mode and when decide to make Release have such error. Program stop on this line of code:

::memcpy(hCellList, CellList.data(), CellSize);

Exception thrown at 0x00007FFB9820C447 (vcruntime140.dll) in OgreCu_0.01.exe: 0xC0000005: Access violation writing location 0x0000000000000000.

Visual studio show error in memcpy.asm line mark **

 CopyUp:
        cmp     r8, 128
        jbe     XmmCopySmall

        bt      __favor, __FAVOR_ENFSTRG ; check for ENFSTRG (enhanced fast strings)
        jnc     XmmCopyUp               ; If Enhanced Fast String not available, use XMM

        ; use Enhanced Fast Strings
        ; but first align the destination dst to 16 byte alignment
        mov     rax, r11                ; return original destination pointer
        mov     r11, rdi                ; save rdi in r11
        mov     rdi, rcx                ; move destination pointer to rdi
        mov     rcx, r8                 ; move length to rcx
        mov     r8, rsi                 ; save rsi in r8
        mov     rsi, r10                ; move source pointer to rsi
        **rep     movsb                   ; copy source to destination buffer**
        mov     rsi, r8                 ; restore rsi
        mov     rdi, r11                ; restore rdi
        ret

I change ::memcpy(hCellList, CellList.data(), CellSize); to

for (int e = 0; e < WorldConst.numberOfCells; e++)
    {
        hCellList[e] = CellList[e];
    }

And the same error in hCellList[e] = CellList[e];

structure of CellBot

struct CellBot
{
    int mainId;
    int subId;

    Vec3 coord;
    Vec3 speed;
    Vec3 nspeed;
    Vec3 velocity;
    Vec3 nvelocity;

    float radiusView;
    float radiusAttraction;
    float radiusRepulsion;

    float forceAttraction;
    float forceRepulsion;

    float radius;
    float mass;
    float frictionBounce;

    int colorId;
    int groupId;

};

Vec3:

template <typename T=float>
class XVector3
{
public:

    typedef T value_type;

    __host__ __device__ inline XVector3() : x(0.0f), y(0.0f), z(0.0f) {}
    __host__ __device__ inline XVector3(T a) : x(a), y(a), z(a) {}
    __host__ __device__ inline XVector3(const T* p) : x(p[0]), y(p[1]), z(p[2]) {}
    __host__ __device__ inline XVector3(T x_, T y_, T z_) : x(x_), y(y_), z(z_)
    {
        VEC3_VALIDATE();
    }

    __host__ __device__ inline operator T* () { return &x; }
    __host__ __device__ inline operator const T* () const { return &x; };

    __host__ __device__ inline void Set(T x_, T y_, T z_) { VEC3_VALIDATE(); x = x_; y = y_; z = z_;}

    __host__ __device__ inline XVector3<T> operator * (T scale) const { XVector3<T> r(*this); r *= scale; return r; VEC3_VALIDATE();}
    __host__ __device__ inline XVector3<T> operator / (T scale) const { XVector3<T> r(*this); r /= scale; return r; VEC3_VALIDATE();}
    __host__ __device__ inline XVector3<T> operator + (const XVector3<T>& v) const { XVector3<T> r(*this); r += v; return r; VEC3_VALIDATE();}
    __host__ __device__ inline XVector3<T> operator - (const XVector3<T>& v) const { XVector3<T> r(*this); r -= v; return r; VEC3_VALIDATE();}
    __host__ __device__ inline XVector3<T> operator /(const XVector3<T>& v) const { XVector3<T> r(*this); r /= v; return r; VEC3_VALIDATE();}
    __host__ __device__ inline XVector3<T> operator *(const XVector3<T>& v) const { XVector3<T> r(*this); r *= v; return r; VEC3_VALIDATE();}

    __host__ __device__ inline XVector3<T>& operator *=(T scale) {x *= scale; y *= scale; z*= scale; VEC3_VALIDATE(); return *this;}
    __host__ __device__ inline XVector3<T>& operator /=(T scale) {T s(1.0f/scale); x *= s; y *= s; z *= s; VEC3_VALIDATE(); return *this;}
    __host__ __device__ inline XVector3<T>& operator +=(const XVector3<T>& v) {x += v.x; y += v.y; z += v.z; VEC3_VALIDATE(); return *this;}
    __host__ __device__ inline XVector3<T>& operator -=(const XVector3<T>& v) {x -= v.x; y -= v.y; z -= v.z; VEC3_VALIDATE(); return *this;}
    __host__ __device__ inline XVector3<T>& operator /=(const XVector3<T>& v) {x /= v.x; y /= v.y; z /= v.z; VEC3_VALIDATE(); return *this; }
    __host__ __device__ inline XVector3<T>& operator *=(const XVector3<T>& v) {x *= v.x; y *= v.y; z *= v.z; VEC3_VALIDATE(); return *this; }

    __host__ __device__ inline bool operator != (const XVector3<T>& v) const { return (x != v.x || y != v.y || z != v.z); }

    // negate
    __host__ __device__ inline XVector3<T> operator -() const { VEC3_VALIDATE(); return XVector3<T>(-x, -y, -z); }

    __host__ __device__ void Validate()
    {
        VEC3_VALIDATE();
    }

    T x,y,z;
};

typedef XVector3<float> Vec3;
typedef XVector3<float> Vector3;

// lhs scalar scale
template <typename T>
__host__ __device__ XVector3<T> operator *(T lhs, const XVector3<T>& rhs)
{
    XVector3<T> r(rhs);
    r *= lhs;
    return r;
}

template <typename T>
__host__ __device__ bool operator==(const XVector3<T>& lhs, const XVector3<T>& rhs)
{
    return (lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z);
}
c++
c
asked on Stack Overflow Jul 27, 2018 by Geo Evclid • edited Jul 27, 2018 by Geo Evclid

1 Answer

3

It is very hard to understand your question. Please write a more complete error message and explanation of what you are doing next time!

My guess however is, that you run into the following problem: assert statements are slow and therefore usually only compiled into your code in debug mode. In the release mode they are usually simply ignored.

In your code however, you are using a malloc inside an assert. Therefore in the debug version you get the memory you want, and in the release version you get nothing and the program crashes. The line is:

assert(!((hCellList = (CellBot *)malloc(CellSize)) == NULL));

What you should do instead is:

hCellList = (CellBot *)malloc(CellSize);
assert(!(hCellList == NULL));
answered on Stack Overflow Jul 27, 2018 by jan.sende • edited Jul 28, 2018 by jan.sende

User contributions licensed under CC BY-SA 3.0