issue in using #pragma omp simd

cancel
Showing results for 
Show  only  | Search instead for 
Did you mean: 

issue in using #pragma omp simd

555 Views
mohamedhusain
Contributor I

Not getting altivec instructions on compiling the following code

testsimd.cpp

////////////////////////////////////////

#include <omp.h>

int main()
{
    int LEN=3200;
    int __attribute__ ((aligned(16))) a[LEN], b[LEN], c[LEN];
#pragma omp simd safelen(16) aligned(a:16) aligned(b:16) aligned(c:16)
    for( int i=0; i<LEN; i++ )
       a[i] = a[i]*b[i];

    return 0;
}

///////////////

compiled using

 $CXX -S testsimd.cpp -fopenmp -fopenmp-simd -maltivec -mabi=altivec -I /opt/fsl-qoriq/2.0/sysroots/ppc64e6500-fsl-linux/usr/lib64/gcc/powerpc64-fsl-linux/4.9.2/include/

echo $CXX
powerpc64-fsl-linux-g++ -mhard-float -m64 -mcpu=e6500 --sysroot=/opt/fsl-qoriq/2.0/sysroots/ppc64e6500-fsl-linux

$CXX -v
Using built-in specs.
COLLECT_GCC=powerpc64-fsl-linux-g++
COLLECT_LTO_WRAPPER=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/libexec/powerpc64-fsl-linux/gcc/powerpc64-fsl-linux/4.9.2/lto-wrapper
Target: powerpc64-fsl-linux
Configured with: /home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/work-shared/gcc-4.9.2-r0/gcc-4.9.2/configure --build=x86_64-linux --host=x86_64-fslsdk-linux --target=powerpc64-fsl-linux --prefix=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr --exec_prefix=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr --bindir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/bin/powerpc64-fsl-linux --sbindir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/bin/powerpc64-fsl-linux --libexecdir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/libexec/powerpc64-fsl-linux --datadir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/share --sysconfdir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/etc --sharedstatedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/com --localstatedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/var --libdir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/lib/powerpc64-fsl-linux --includedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/include --oldincludedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/include --infodir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/share/info --mandir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/share/man --disable-silent-rules --disable-dependency-tracking --with-libtool-sysroot=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux --with-gnu-ld --enable-shared --enable-languages=c,c++ --enable-threads=posix --enable-multilib --enable-c99 --enable-long-long --enable-symvers=gnu --enable-libstdcxx-pch --program-prefix=powerpc64-fsl-linux- --without-local-prefix --enable-target-optspace --enable-lto --enable-libssp --disable-bootstrap --disable-libmudflap --with-system-zlib --with-linker-hash-style=gnu --enable-linker-build-id --with-ppl=no --enable-checking=release --enable-cheaders=c_global --with-gxx-include-dir=/not/exist/usr/include/c++/4.9.2 --with-build-time-tools=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-linux/usr/powerpc64-fsl-linux/bin --with-sysroot=/not/exist --with-build-sysroot=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/b4860qds-64b --with-long-double-128 --enable-poison-system-directories --with-mpfr=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux --with-mpc=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux --enable-nls --enable-__cxa_atexit --with-isl=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr --with-cloog=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr
Thread model: posix
gcc version 4.9.2 (GCC)

testsimd.s

//////////////////////////

...

li 10,0
stw 10,48(31)
b .L2
.L3:
ld 8,64(31)
lwz 10,48(31)
extsw 10,10
sldi 10,10,2
add 10,8,10
lwz 10,0(10)
extsw 8,10
ld 7,80(31)
lwz 10,48(31)
extsw 10,10
sldi 10,10,2
add 10,7,10
lwz 10,0(10)
extsw 10,10
mullw 10,8,10
extsw 8,10
ld 7,64(31)
lwz 10,48(31)
extsw 10,10
sldi 10,10,2
add 10,7,10
stw 8,0(10)
lwz 10,48(31)
addi 10,10,1
stw 10,48(31)
.L2:
lwz 10,48(31)
cmpw 7,10,9
blt 7,.L3
li 10,0

0 Kudos
1 Reply

428 Views
yipingwang
NXP TechSupport
NXP TechSupport

Please create the program as the following.

$ cat test_altivec.c
#include
int main()
{
int LEN=3200;
float __attribute__ ((aligned(16))) a[LEN], b[LEN], c[LEN];
*(vector float*)c = (*(vector float*)a)*(*(vector float*)b);
return 0;
}

Build the program with the following command

$ source /opt/fsl-qoriq/2.0/environment-setup-ppc64e6500-fsl-linux

$ ${CC} test_altivec.c -maltivec -mabi=altivec -o test_altivec

$ powerpc64-fsl-linux-objdump -S test_altivec >test_altivec.s

You will find part of the disassembly code as the following.

   10000700:   f9 3f 00 60     std     r9,96(r31)

   10000704:   e9 3f 00 60     ld     r9,96(r31)

   10000708:   e9 5f 00 40     ld     r10,64(r31)

   1000070c:   7d a0 50 ce     lvx     v13,0,r10

   10000710:   e9 5f 00 50     ld     r10,80(r31)

   10000714:   7c 20 50 ce     lvx     v1,0,r10

   10000718:   10 1f 03 8c     vspltisw v0,-1

   1000071c:   10 00 01 84     vslw   v0,v0,v0

   10000720:   10 0d 00 6e     vmaddfp v0,v13,v1,v0

   10000724:   7c 00 49 ce     stvx   v0,0,r9


Have a great day,
TIC

-----------------------------------------------------------------------------------------------------------------------
Note: If this post answers your question, please click the Correct Answer button. Thank you!
-----------------------------------------------------------------------------------------------------------------------

0 Kudos