OpenACC, cuRAND, CMake: undefined reference to `__pgicudalib_curandUniformXORWOW


Question I am trying to generate random numbers with OpenACC and the cuRAND library. I have a simple piece of code (just trying a few things), which is basically a copy from the pgi cuRAND examples (/opt/pgi/linux86-64/2018/examples/CUDA-Libraries/cuRAND). The problem is that I run into an error: undefined reference to ``__pgicudalib_curandInitXORWOW' and undefined reference to ``__pgicudalib_curandUniformXORWOW'. Those are declared in the openacc_curand.h file:

#define curand_init                 __pgicudalib_curandInitXORWOW
#define curand_uniform              __pgicudalib_curandUniformXORWOW

#pragma acc routine(__pgicudalib_curandInitXORWOW) seq
extern void __pgicudalib_curandInitXORWOW(unsigned long long, unsigned long long, unsigned long long, curandStateXORWOW_t *);
#pragma acc routine(__pgicudalib_curandUniformXORWOW) seq
extern float __pgicudalib_curandUniformXORWOW(curandStateXORWOW_t *);

This is the source code: openacc-test.cpp

#include <openacc.h>
#include <array>
#include "openacc_curand.h" 

constexpr int SIZE = 16;

std::array<float, SIZE> data;
float* d_data;

void init(int x){
    for(int i = 0; i < SIZE; ++i){
        data[i] = x;

void print(){
    printf("Host: [");
    for(int i = 0; i < SIZE; ++i){
        printf("data: %.5f; ", data[i]);

void do_stuff_on_gpu(){
    unsigned long long seed;
    unsigned long long seq;
    unsigned long long offset;
    curandState_t state;
    #pragma acc parallel deviceptr(d_data) private(state)
      seed = 12345ULL;
      seq = 0ULL;
      offset = 0ULL;
      curand_init(seed, seq, offset, &state);
      #pragma acc loop seq
      for(int i = 0; i < SIZE; ++i){
        d_data[i] = curand_uniform(&state); 

int main(int argc, char** argv) {
    d_data = static_cast<float*>(acc_malloc(SIZE * sizeof(float)));

    acc_map_data(, d_data, SIZE * sizeof(float));
    acc_update_device(, SIZE * sizeof(float));


    acc_update_self(, SIZE * sizeof(float));


    return EXIT_SUCCESS;

This is the cmake file:

cmake_minimum_required(VERSION 3.10)
project(openacc-test VERSION 1.0.0 LANGUAGES CXX)

SET( CMAKE_CXX_FLAGS_DEV "-g -O0 -Minfo=accel -ta=tesla,nollvm -Mcudalib=curand" )


find_package(OpenACC REQUIRED)

add_executable(openacc-test ${PROJECT_SOURCE_DIR}/src/openacc-test.cpp)
target_compile_features(openacc-test PRIVATE cxx_std_14)
target_include_directories(openacc-test PRIVATE ${PROJECT_SOURCE_DIR}/include)
target_compile_options(openacc-test PRIVATE ${OpenACC_CXX_FLAGS})
target_link_libraries(openacc-test PRIVATE ${OpenACC_CXX_FLAGS} -lcurand -L/opt/pgi/linux86-64/2018/cuda/10.0/lib64)

And I build it like this from the build folder:

cmake -G "Unix Makefiles" -D CMAKE_BUILD_TYPE=Dev -D CMAKE_CXX_COMPILER=pgc++ ../ && \

make openacc-test && \


Since there are undefined reference errors I guess there is something wrong with the linking process. I used -ta=tesla,nollvm -Mcudalib=curand flags for pgi and manually set the path to the cuda libraries -lcurand -L/opt/pgi/linux86-64/2018/cuda/10.0/lib64. I also tried the findCuda module and the nativa Cuda support in CMake, but both don't seem to work. Any idea what is wrong here?

Edits: Fixed the syntax according to Mat's answer, but the error is still there.

If I build from the command line, I get the following output:

pgc++ -v -fast -ta=tesla:nollvm --c++11 -Minfo=accel openacc-test.cpp

Export PGI_CURR_CUDA_HOME=/opt/pgi/linux86-64/2018/cuda/10.0
Export PGI=/opt/pgi

/opt/pgi/linux86-64/18.10/bin/pggpp1 --llalign -Dunix -D__unix -D__unix__ -Dlinux -D__linux -D__linux__ -D__NO_MATH_INLINES -D__LP64__ -D__x86_64 -D__x86_64__ -D__LONG_MAX__=9223372036854775807L '-D__SIZE_TYPE__=unsigned long int' '-D__PTRDIFF_TYPE__=long int' -D__extension__= -D__amd_64__amd64__ -D__k8 -D__k8__ -D__SSE__ -D__MMX__ -D__SSE2__ -D__SSE3__ -D__SSE4A__ -D__ABM__ -D__PGI -D_GNU_SOURCE -D_PGCG_SOURCE -I- -I/opt/pgi/linux86-64/18.10/include-gcc70 -I/opt/pgi/linux86-64/18.10/include -I/usr/include/c++/7 -I/usr/include/x86_64-linux-gnu/c++/7 -I/usr/include/c++/7/backward -I/usr/lib/gcc/x86_64-linux-gnu/7/include -I/usr/local/include -I/usr/lib/gcc/x86_64-linux-gnu/7/include-fixed -I/usr/include/x86_64-linux-gnu -I/usr/include -I/opt/pgi/linux86-64/2018/cuda/10.0/include -D_ACCEL=201003 -D_OPENACC=201711 -D__CUDA_API_VERSION=10000 -DPGI_TESLA_TARGET --preinclude _cplus_preinclude.h --preinclude_macros _cplus_macros.h --gnu_version=70300 -D__pgnu_vsn=70300 --accel --preinclude openacc_predef.h --c++11 -q -o /tmp/ openacc-test.cpp

/opt/pgi/linux86-64/18.10/bin/pggpp2 openacc-test.cpp -opt 2 -x 119 0xa10000 -x 122 0x40 -x 123 0x1000 -x 127 4 -x 127 17 -x 19 0x400000 -x 28 0x40000 -x 120 0x10000000 -x 70 0x8000 -x 122 1 -x 125 0x20000 -quad -vect 56 -y 34 16 -x 34 0x8 -x 32 6291456 -y 19 8 -y 35 0 -x 42 0x30 -x 39 0x40 -x 199 10 -x 39 0x80 -x 59 4 -tp shanghai -x 120 0x1000 -astype 0 -x 121 1 -fn openacc-test.cpp -il /tmp/ -x 117 0x600 -x 123 0x80000000 -x 123 4 -x 119 0x20 -def __pgnu_vsn=70300 -autoinl 10 -x 168 400 -x 174 128000 -x 14 0x200000 -x 46 4 -x 14 0x400000 -x 120 0x200000 -x 70 0x40000000 -x 164 0x800000 -accel tesla -x 180 0x4000400 -x 121 0xc00 -x 186 0x80 -x 163 0x1 -x 186 0x80000 -cudaver 10000 -x 194 0x40000 -y 189 0x10 -cudaroot /opt/pgi/linux86-64/2018/cuda/10.0 -x 176 0x100 -cudacap 60 -x 189 0x8000 -y 163 0xc0000000 -y 189 0x4000000 -cudaroot /opt/pgi/linux86-64/2018/cuda/10.0 -x 9 1 -x 42 0x14200000 -x 72 0x1 -x 136 0x11 -quad -x 119 0x10000000 -x 129 0x40000000 -x 129 2 -x 164 0x1000 -x 0 0x1000000 -x 2 0x100000 -x 0 0x2000000 -x 161 16384 -x 162 16384 -gnuvsn 70300 -x 69 0x200 -cmdline '+pgc++ /tmp/ -v -fast -Mvect=sse -Mcache_align -Mflushz -Mpre -ta=tesla:nollvm --c++11 -Minfo=accel' -asm /tmp/pgc++3YTcLZpe7Hgh.s
     93, Accelerator kernel generated
         Generating Tesla code
         99, #pragma acc loop seq
     93, CUDA shared memory used for state
 /opt/pgi/linux86-64/18.10/bin/pgnvd -dcuda /opt/pgi/linux86-64/2018/cuda/10.0 -reloc /tmp/pgacc62TcUtvwk7F_.gpu -computecap=60 -ptx /tmp/pgaccA2Tco99QQ3zu.ptx -o /tmp/pgaccQ2Tc_MT360Ow.bin -ftz -cuda10000
/usr/lib/gcc/x86_64-linux-gnu/7/include/stddef.h(444): error: identifier "nullptr" is undefined

/usr/lib/gcc/x86_64-linux-gnu/7/include/stddef.h(444): error: expected a ";"

/usr/include/x86_64-linux-gnu/c++/7/bits/c++config.h(235): error: expected a ";"

/usr/include/c++/7/bits/exception.h(63): error: expected a ";"

/usr/include/c++/7/bits/exception.h(69): error: expected a ";"

/usr/include/c++/7/exception(49): error: expected a ";"

/usr/include/c++/7/exception(57): error: expected a ";"

/usr/include/c++/7/exception(67): error: expected a "{"

/usr/include/c++/7/bits/cxxabi_init_exception.h(63): error: expected a "{"

/usr/include/c++/7/typeinfo(99): error: expected a ";"

/usr/include/c++/7/typeinfo(187): error: not a class or struct name

/usr/include/c++/7/typeinfo(190): error: expected a ";"

/usr/include/c++/7/typeinfo(197): error: expected a ";"

/usr/include/c++/7/typeinfo(204): error: not a class or struct name

/usr/include/c++/7/typeinfo(207): error: expected a ";"

/usr/include/c++/7/typeinfo(214): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(63): error: function "__cxxabiv1::std::current_exception" returns incomplete type "__cxxabiv1::std::__exception_ptr::exception_ptr"

/usr/include/c++/7/bits/exception_ptr.h(63): error: expected a "{"

/usr/include/c++/7/bits/exception_ptr.h(73): error: namespace "__cxxabiv1::std" has no member "rethrow_exception"

/usr/include/c++/7/bits/exception_ptr.h(83): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(85): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(86): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(88): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(90): error: declaration is incompatible with previous "__cxxabiv1::std::current_exception"
(63): here

/usr/include/c++/7/bits/exception_ptr.h(90): error: use of a local type to declare a function

/usr/include/c++/7/bits/exception_ptr.h(90): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(91): error: use of a local type to declare a function

/usr/include/c++/7/bits/exception_ptr.h(93): error: namespace "__cxxabiv1::std" has no member "make_exception_ptr"

/usr/include/c++/7/bits/exception_ptr.h(93): error: a template friend declaration cannot be declared in a local class

/usr/include/c++/7/bits/exception_ptr.h(93): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(96): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(98): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(101): error: incomplete type is not allowed

/usr/include/c++/7/bits/exception_ptr.h(101): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(122): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(132): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(149): error: use of a local type to declare a function

/usr/include/c++/7/bits/exception_ptr.h(150): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(153): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(158): error: use of a local type to declare a function

/usr/include/c++/7/bits/exception_ptr.h(159): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(162): error: use of a local type to declare a function

/usr/include/c++/7/bits/exception_ptr.h(163): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(166): error: use of a local type to declare a function

/usr/include/c++/7/bits/exception_ptr.h(167): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(179): error: expected a ";"

/usr/include/c++/7/bits/exception_ptr.h(220): error: expected a ";"

/usr/include/c++/7/bits/move.h(46): error: identifier "constexpr" is undefined

/usr/include/c++/7/bits/move.h(46): error: "_Tp" is not a function or static data member

/usr/include/c++/7/bits/move.h(51): error: expected a ";"

/usr/include/c++/7/type_traits(71): error: identifier "constexpr" is undefined

/usr/include/c++/7/type_traits(71): error: template parameter "_Tp" may not be redeclared in this scope

/usr/include/c++/7/type_traits(71): error: expected a ";"

/usr/include/c++/7/type_traits(72): error: member "__cxxabiv1::std::integral_constant<_Tp, __v>::_Tp" is not a type name

/usr/include/c++/7/type_traits(73): error: member "__cxxabiv1::std::integral_constant<_Tp, __v>::_Tp" is not a type name

/usr/include/c++/7/type_traits(74): error: identifier "constexpr" is undefined

/usr/include/c++/7/type_traits(74): error: expected a ";"

/usr/include/c++/7/type_traits(84): error: identifier "constexpr" is undefined

/usr/include/c++/7/type_traits(84): error: "_Tp" is not a function or static data member

/usr/include/c++/7/type_traits(93): error: expected a declaration

/usr/include/c++/7/type_traits(93): error: expected a ";"

/usr/include/c++/7/type_traits(126): error: space required between adjacent ">" delimiters of nested template argument lists (">>" is the right shift operator)

/usr/include/c++/7/type_traits(154): error: __bool_constant is not a template

/usr/include/c++/7/type_traits(154): error: not a class or struct name

/usr/include/c++/7/type_traits(245): error: identifier "char16_t" is undefined

/usr/include/c++/7/type_traits(249): error: identifier "char32_t" is undefined

/usr/include/c++/7/type_traits(249): error: class "__cxxabiv1::std::__is_integral_helper<<error-type>>" has already been defined

/usr/include/c++/7/type_traits(362): error: namespace "__cxxabiv1::std" has no member "size_t"

/usr/include/c++/7/type_traits(463): error: expected a ">"

/usr/include/c++/7/type_traits(467): error: expected a ">"

/usr/include/c++/7/type_traits(475): error: expected a ">"

/usr/include/c++/7/type_traits(479): error: expected a ">"

/usr/include/c++/7/type_traits(487): error: expected a ">"

/usr/include/c++/7/type_traits(491): error: expected a ">"

/usr/include/c++/7/type_traits(499): error: expected a ">"

/usr/include/c++/7/type_traits(503): error: expected a ">"

/usr/include/c++/7/type_traits(511): error: expected a ">"

/usr/include/c++/7/type_traits(515): error: expected a ">"

/usr/include/c++/7/type_traits(523): error: expected a ">"

/usr/include/c++/7/type_traits(527): error: expected a ">"

/usr/include/c++/7/type_traits(535): error: expected a ">"

/usr/include/c++/7/type_traits(539): error: expected a ">"

/usr/include/c++/7/type_traits(547): error: expected a ">"

/usr/include/c++/7/type_traits(551): error: expected a ">"

/usr/include/c++/7/type_traits(561): error: namespace "__cxxabiv1::std" has no member "nullptr_t"

/usr/include/c++/7/type_traits(582): error: space required between adjacent ">" delimiters of nested template argument lists (">>" is the right shift operator)

/usr/include/c++/7/type_traits(588): error: space required between adjacent ">" delimiters of nested template argument lists (">>" is the right shift operator)

/usr/include/c++/7/type_traits(595): error: space required between adjacent ">" delimiters of nested template argument lists (">>" is the right shift operator)

/usr/include/c++/7/type_traits(602): error: space required between adjacent ">" delimiters of nested template argument lists (">>" is the right shift operator)

/usr/include/c++/7/type_traits(612): error: space required between adjacent ">" delimiters of nested template argument lists (">>" is the right shift operator)

/usr/include/c++/7/type_traits(638): error: space required between adjacent ">" delimiters of nested template argument lists (">>" is the right shift operator)

/usr/include/c++/7/type_traits(748): error: space required between adjacent ">" delimiters of nested template argument lists (">>" is the right shift operator)

/usr/include/c++/7/type_traits(762): error: expected a ";"

/usr/include/c++/7/type_traits(777): error: space required between adjacent ">" delimiters of nested template argument lists (">>" is the right shift operator)

/usr/include/c++/7/type_traits(787): error: expected a ")"

/usr/include/c++/7/type_traits(798): error: an explicit template argument list is not allowed on this declaration

/usr/include/c++/7/type_traits(798): error: expected a type specifier

/usr/include/c++/7/type_traits(798): error: function returning function is not allowed

/usr/include/c++/7/type_traits(798): error: expected a ";"

/usr/include/c++/7/type_traits(804): error: space required between adjacent ">" delimiters of nested template argument lists (">>" is the right shift operator)

Error limit reached.
100 errors detected in the compilation of "/tmp/pgnvdP3Tc7ZGSTVCf.ii".
Compilation terminated.
PGCC-F-0155-Compiler failed to translate accelerator region (see -Minfo messages): Device compiler exited with error status code (openacc-test.cpp: 1)
PGCC/x86 Linux 18.10-1: compilation aborted
pgc++-Fatal-cpp2 completed with exit code 1

Unlinking /tmp/
Unlinking /tmp/pgc++3YTcLZpe7Hgh.s
Unlinking /tmp/pgc++VYTcnqU9SlJ_.ll
asked on Stack Overflow Mar 13, 2019 by Fabian • edited Mar 14, 2019 by Fabian

2 Answers


The undefined references are actually from the host code. The problem being that you're missing brackets around your parallel region so your only offloading the line, i.e. "seed = 12345UL".

To fix:

void do_stuff_on_gpu(){
    unsigned long long seed;
    unsigned long long seq;
    unsigned long long offset;
    curandState_t state;
    #pragma acc parallel deviceptr(d_data) private(state)
    {  // << Add here
      seed = 12345ULL;
      seq = 0ULL;
      offset = 0ULL;
      curand_init(seed, seq, offset, &state);
      #pragma acc loop seq
      for(int i = 0; i < SIZE; ++i){
        d_data[i] = curand_uniform(&state);
    } // << Add here

% pgc++ -fast -ta=tesla:nollvm --c++11 test.cpp -Minfo=accel
     29, Generating Tesla code
         36, #pragma acc loop seq
     29, CUDA shared memory used for state
answered on Stack Overflow Mar 13, 2019 by Mat Colgrove

The second error is actually unrelated to the first and will require a bit of detail to explain.

PGI has two back-end device code generation paths, LLVM and CUDA (nollvm), with the default being LLVM. When calling cuRAND from device code is one of the few cases when the CUDA back-end is required since the curand device code is contained in a CUDA header file that needs to get inlined. Unfortunately we don't have a way to do this yet from the LLVM path.

In general, when you see something like the second error it's due to compiling C++14 (or C++11/17) code without the appropriate language flag. The problem here is that the PGI driver isn't passing correct the language flag to the CUDA back-end compiler (cicc). It worked for me since I have GNU 4.8.5 installed so C++11 isn't on by default. However you're using GNU 7 in which C++14 is enabled, but since we're not passing "--c++14" to cicc, you get the error.

I've filled a problem report (TPR#26979) to track this issue and have asked our engineers to pass the appropriate language flag when using newer GNU versions.

As a work-around, we can update one of PGI's configuration files (pgnvdrc) so you can pass the correct flag via an environment variable. In your PGI installation, find the file "$PGI/linux86-64/18.10/bin/pgnvdrc" and make the following two line change:

% diff -u pgnvdrc
--- 2019-03-14 13:12:45.232168580 -0700
+++ pgnvdrc     2019-03-14 13:12:57.026220144 -0700
@@ -18,6 +18,8 @@
 variable LDLIB is environment(LD_LIBRARY_PATH);
 variable NEWLDLIB is default($LDLIB);

+variable CICCFLAG is environment(CICCFLAG);
 variable DYLDLIB is environment(DYLD_LIBRARY_PATH);
 variable NEWDYLDLIB is default($DYLDLIB);

@@ -547,6 +549,7 @@
        -arch $COMPCAP -m$CUWIDTH -ftz=$FTZ -prec_div=$NOFASTMATH -prec_sqrt=$NOFASTMATH -fmad=$USEFMA
+       $CICCFLAG
        $NVVMARGS -O$CUOPT $input -o $out3

Next set the environment variable "CICCFLAG=--c++14" and recompile.

Here I've updated my 18.10 compiler to use GNU 7.2. I can recreate the error, but after setting CICCFLAG, the code compiles correctly.

% pgc++ -fast -ta=tesla:nollvm --c++14 openacc-test.cpp
/home/sw/thirdparty/gcc/gcc-7.2.0/linux86-64/lib/gcc/x86_64-pc-linux-gnu/7.2.0/include/stddef.h(444): error: identifier "nullptr" is undefined

/home/sw/thirdparty/gcc/gcc-7.2.0/linux86-64/lib/gcc/x86_64-pc-linux-gnu/7.2.0/include/stddef.h(444): error: expected a ";"

/home/sw/thirdparty/gcc/gcc-7.2.0/linux86-64/include/c++/7.2.0/x86_64-pc-linux-gnu/bits/c++config.h(235): error: expected a ";"
... more errors ...

% setenv CICCFLAG "--c++14"
% pgc++ -fast -ta=tesla:nollvm --c++14 -Minfo=accel openacc-test.cpp
     30, Accelerator kernel generated
         Generating Tesla code
         36, #pragma acc loop seq
     30, CUDA shared memory used for state
% a.out
Host: [data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; ]
Host: [data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; data: 42.00000; ]
Host: [data: 0.29890; data: 0.38100; data: 0.28855; data: 0.40197; data: 0.74258; data: 0.26742; data: 0.35657; data: 0.70735; data: 0.55123; data: 0.72577; data: 0.64131; data: 0.48502; data: 0.09711; data: 0.14655; data: 0.15180; data: 0.35960; ]
answered on Stack Overflow Mar 14, 2019 by Mat Colgrove

User contributions licensed under CC BY-SA 3.0