AnsweredAssumed Answered

issue in using #pragma omp simd

Question asked by Mohamed Husain on Sep 25, 2018
Latest reply on Sep 29, 2018 by Yiping Wang

Not getting altivec instructions on compiling the following code

testsimd.cpp

////////////////////////////////////////

#include <omp.h>

int main()
{
    int LEN=3200;
    int __attribute__ ((aligned(16))) a[LEN], b[LEN], c[LEN];
#pragma omp simd safelen(16) aligned(a:16) aligned(b:16) aligned(c:16)
    for( int i=0; i<LEN; i++ )
       a[i] = a[i]*b[i];

    return 0;
}

 

///////////////

compiled using

 $CXX -S testsimd.cpp -fopenmp -fopenmp-simd -maltivec -mabi=altivec -I /opt/fsl-qoriq/2.0/sysroots/ppc64e6500-fsl-linux/usr/lib64/gcc/powerpc64-fsl-linux/4.9.2/include/

 

echo $CXX
powerpc64-fsl-linux-g++ -mhard-float -m64 -mcpu=e6500 --sysroot=/opt/fsl-qoriq/2.0/sysroots/ppc64e6500-fsl-linux

 

$CXX -v
Using built-in specs.
COLLECT_GCC=powerpc64-fsl-linux-g++
COLLECT_LTO_WRAPPER=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/libexec/powerpc64-fsl-linux/gcc/powerpc64-fsl-linux/4.9.2/lto-wrapper
Target: powerpc64-fsl-linux
Configured with: /home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/work-shared/gcc-4.9.2-r0/gcc-4.9.2/configure --build=x86_64-linux --host=x86_64-fslsdk-linux --target=powerpc64-fsl-linux --prefix=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr --exec_prefix=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr --bindir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/bin/powerpc64-fsl-linux --sbindir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/bin/powerpc64-fsl-linux --libexecdir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/libexec/powerpc64-fsl-linux --datadir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/share --sysconfdir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/etc --sharedstatedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/com --localstatedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/var --libdir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/lib/powerpc64-fsl-linux --includedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/include --oldincludedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/include --infodir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/share/info --mandir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/share/man --disable-silent-rules --disable-dependency-tracking --with-libtool-sysroot=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux --with-gnu-ld --enable-shared --enable-languages=c,c++ --enable-threads=posix --enable-multilib --enable-c99 --enable-long-long --enable-symvers=gnu --enable-libstdcxx-pch --program-prefix=powerpc64-fsl-linux- --without-local-prefix --enable-target-optspace --enable-lto --enable-libssp --disable-bootstrap --disable-libmudflap --with-system-zlib --with-linker-hash-style=gnu --enable-linker-build-id --with-ppl=no --enable-checking=release --enable-cheaders=c_global --with-gxx-include-dir=/not/exist/usr/include/c++/4.9.2 --with-build-time-tools=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-linux/usr/powerpc64-fsl-linux/bin --with-sysroot=/not/exist --with-build-sysroot=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/b4860qds-64b --with-long-double-128 --enable-poison-system-directories --with-mpfr=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux --with-mpc=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux --enable-nls --enable-__cxa_atexit --with-isl=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr --with-cloog=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr
Thread model: posix
gcc version 4.9.2 (GCC)

 

testsimd.s

//////////////////////////

...

li 10,0
stw 10,48(31)
b .L2
.L3:
ld 8,64(31)
lwz 10,48(31)
extsw 10,10
sldi 10,10,2
add 10,8,10
lwz 10,0(10)
extsw 8,10
ld 7,80(31)
lwz 10,48(31)
extsw 10,10
sldi 10,10,2
add 10,7,10
lwz 10,0(10)
extsw 10,10
mullw 10,8,10
extsw 8,10
ld 7,64(31)
lwz 10,48(31)
extsw 10,10
sldi 10,10,2
add 10,7,10
stw 8,0(10)
lwz 10,48(31)
addi 10,10,1
stw 10,48(31)
.L2:
lwz 10,48(31)
cmpw 7,10,9
blt 7,.L3
li 10,0

Attachments

Outcomes