In debug mode program run well, but in Release mode have error memcpy operation
CellBot *hCellList;
CellBot *dCellList;
size_t CellSize = WorldConst.numberOfCells * sizeof(CellBot);
assert(!((hCellList = (CellBot *)malloc(CellSize)) == NULL));
gpuAssert(cudaMalloc((void**)&dCellList, CellSize));
::memcpy(hCellList, CellList.data(), CellSize);
gpuAssert(cudaMemcpy(dCellList, hCellList, CellSize, cudaMemcpyHostToDevice));
Don't have any ideas about that. I always work in debug mode and when decide to make Release have such error. Program stop on this line of code:
::memcpy(hCellList, CellList.data(), CellSize);
Exception thrown at 0x00007FFB9820C447 (vcruntime140.dll) in OgreCu_0.01.exe: 0xC0000005: Access violation writing location 0x0000000000000000.
Visual studio show error in memcpy.asm line mark **
CopyUp:
cmp r8, 128
jbe XmmCopySmall
bt __favor, __FAVOR_ENFSTRG ; check for ENFSTRG (enhanced fast strings)
jnc XmmCopyUp ; If Enhanced Fast String not available, use XMM
; use Enhanced Fast Strings
; but first align the destination dst to 16 byte alignment
mov rax, r11 ; return original destination pointer
mov r11, rdi ; save rdi in r11
mov rdi, rcx ; move destination pointer to rdi
mov rcx, r8 ; move length to rcx
mov r8, rsi ; save rsi in r8
mov rsi, r10 ; move source pointer to rsi
**rep movsb ; copy source to destination buffer**
mov rsi, r8 ; restore rsi
mov rdi, r11 ; restore rdi
ret
I change ::memcpy(hCellList, CellList.data(), CellSize);
to
for (int e = 0; e < WorldConst.numberOfCells; e++)
{
hCellList[e] = CellList[e];
}
And the same error in hCellList[e] = CellList[e];
structure of CellBot
struct CellBot
{
int mainId;
int subId;
Vec3 coord;
Vec3 speed;
Vec3 nspeed;
Vec3 velocity;
Vec3 nvelocity;
float radiusView;
float radiusAttraction;
float radiusRepulsion;
float forceAttraction;
float forceRepulsion;
float radius;
float mass;
float frictionBounce;
int colorId;
int groupId;
};
Vec3:
template <typename T=float>
class XVector3
{
public:
typedef T value_type;
__host__ __device__ inline XVector3() : x(0.0f), y(0.0f), z(0.0f) {}
__host__ __device__ inline XVector3(T a) : x(a), y(a), z(a) {}
__host__ __device__ inline XVector3(const T* p) : x(p[0]), y(p[1]), z(p[2]) {}
__host__ __device__ inline XVector3(T x_, T y_, T z_) : x(x_), y(y_), z(z_)
{
VEC3_VALIDATE();
}
__host__ __device__ inline operator T* () { return &x; }
__host__ __device__ inline operator const T* () const { return &x; };
__host__ __device__ inline void Set(T x_, T y_, T z_) { VEC3_VALIDATE(); x = x_; y = y_; z = z_;}
__host__ __device__ inline XVector3<T> operator * (T scale) const { XVector3<T> r(*this); r *= scale; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T> operator / (T scale) const { XVector3<T> r(*this); r /= scale; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T> operator + (const XVector3<T>& v) const { XVector3<T> r(*this); r += v; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T> operator - (const XVector3<T>& v) const { XVector3<T> r(*this); r -= v; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T> operator /(const XVector3<T>& v) const { XVector3<T> r(*this); r /= v; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T> operator *(const XVector3<T>& v) const { XVector3<T> r(*this); r *= v; return r; VEC3_VALIDATE();}
__host__ __device__ inline XVector3<T>& operator *=(T scale) {x *= scale; y *= scale; z*= scale; VEC3_VALIDATE(); return *this;}
__host__ __device__ inline XVector3<T>& operator /=(T scale) {T s(1.0f/scale); x *= s; y *= s; z *= s; VEC3_VALIDATE(); return *this;}
__host__ __device__ inline XVector3<T>& operator +=(const XVector3<T>& v) {x += v.x; y += v.y; z += v.z; VEC3_VALIDATE(); return *this;}
__host__ __device__ inline XVector3<T>& operator -=(const XVector3<T>& v) {x -= v.x; y -= v.y; z -= v.z; VEC3_VALIDATE(); return *this;}
__host__ __device__ inline XVector3<T>& operator /=(const XVector3<T>& v) {x /= v.x; y /= v.y; z /= v.z; VEC3_VALIDATE(); return *this; }
__host__ __device__ inline XVector3<T>& operator *=(const XVector3<T>& v) {x *= v.x; y *= v.y; z *= v.z; VEC3_VALIDATE(); return *this; }
__host__ __device__ inline bool operator != (const XVector3<T>& v) const { return (x != v.x || y != v.y || z != v.z); }
// negate
__host__ __device__ inline XVector3<T> operator -() const { VEC3_VALIDATE(); return XVector3<T>(-x, -y, -z); }
__host__ __device__ void Validate()
{
VEC3_VALIDATE();
}
T x,y,z;
};
typedef XVector3<float> Vec3;
typedef XVector3<float> Vector3;
// lhs scalar scale
template <typename T>
__host__ __device__ XVector3<T> operator *(T lhs, const XVector3<T>& rhs)
{
XVector3<T> r(rhs);
r *= lhs;
return r;
}
template <typename T>
__host__ __device__ bool operator==(const XVector3<T>& lhs, const XVector3<T>& rhs)
{
return (lhs.x == rhs.x && lhs.y == rhs.y && lhs.z == rhs.z);
}
It is very hard to understand your question. Please write a more complete error message and explanation of what you are doing next time!
My guess however is, that you run into the following problem: assert
statements are slow and therefore usually only compiled into your code in debug mode. In the release mode they are usually simply ignored.
In your code however, you are using a malloc
inside an assert
. Therefore in the debug version you get the memory you want, and in the release version you get nothing and the program crashes. The line is:
assert(!((hCellList = (CellBot *)malloc(CellSize)) == NULL));
What you should do instead is:
hCellList = (CellBot *)malloc(CellSize);
assert(!(hCellList == NULL));
User contributions licensed under CC BY-SA 3.0