Not getting altivec instructions on compiling the following code
testsimd.cpp
////////////////////////////////////////
#include <omp.h>
int main()
{
int LEN=3200;
int __attribute__ ((aligned(16))) a[LEN], b[LEN], c[LEN];
#pragma omp simd safelen(16) aligned(a:16) aligned(b:16) aligned(c:16)
for( int i=0; i<LEN; i++ )
a[i] = a[i]*b[i];
return 0;
}
///////////////
compiled using
$CXX -S testsimd.cpp -fopenmp -fopenmp-simd -maltivec -mabi=altivec -I /opt/fsl-qoriq/2.0/sysroots/ppc64e6500-fsl-linux/usr/lib64/gcc/powerpc64-fsl-linux/4.9.2/include/
echo $CXX
powerpc64-fsl-linux-g++ -mhard-float -m64 -mcpu=e6500 --sysroot=/opt/fsl-qoriq/2.0/sysroots/ppc64e6500-fsl-linux
$CXX -v
Using built-in specs.
COLLECT_GCC=powerpc64-fsl-linux-g++
COLLECT_LTO_WRAPPER=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/libexec/powerpc64-fsl-linux/gcc/powerpc64-fsl-linux/4.9.2/lto-wrapper
Target: powerpc64-fsl-linux
Configured with: /home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/work-shared/gcc-4.9.2-r0/gcc-4.9.2/configure --build=x86_64-linux --host=x86_64-fslsdk-linux --target=powerpc64-fsl-linux --prefix=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr --exec_prefix=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr --bindir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/bin/powerpc64-fsl-linux --sbindir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/bin/powerpc64-fsl-linux --libexecdir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/libexec/powerpc64-fsl-linux --datadir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/share --sysconfdir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/etc --sharedstatedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/com --localstatedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/var --libdir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/lib/powerpc64-fsl-linux --includedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/include --oldincludedir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/include --infodir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/share/info --mandir=/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr/share/man --disable-silent-rules --disable-dependency-tracking --with-libtool-sysroot=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux --with-gnu-ld --enable-shared --enable-languages=c,c++ --enable-threads=posix --enable-multilib --enable-c99 --enable-long-long --enable-symvers=gnu --enable-libstdcxx-pch --program-prefix=powerpc64-fsl-linux- --without-local-prefix --enable-target-optspace --enable-lto --enable-libssp --disable-bootstrap --disable-libmudflap --with-system-zlib --with-linker-hash-style=gnu --enable-linker-build-id --with-ppl=no --enable-checking=release --enable-cheaders=c_global --with-gxx-include-dir=/not/exist/usr/include/c++/4.9.2 --with-build-time-tools=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-linux/usr/powerpc64-fsl-linux/bin --with-sysroot=/not/exist --with-build-sysroot=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/b4860qds-64b --with-long-double-128 --enable-poison-system-directories --with-mpfr=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux --with-mpc=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux --enable-nls --enable-__cxa_atexit --with-isl=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr --with-cloog=/home/jenkins/ci/iso-make/master/b4860qds-64b/build_b4860qds-64b/tmp/sysroots/x86_64-nativesdk-fslsdk-linux/opt/fsl-qoriq/2.0/sysroots/x86_64-fslsdk-linux/usr
Thread model: posix
gcc version 4.9.2 (GCC)
testsimd.s
//////////////////////////
...
li 10,0
stw 10,48(31)
b .L2
.L3:
ld 8,64(31)
lwz 10,48(31)
extsw 10,10
sldi 10,10,2
add 10,8,10
lwz 10,0(10)
extsw 8,10
ld 7,80(31)
lwz 10,48(31)
extsw 10,10
sldi 10,10,2
add 10,7,10
lwz 10,0(10)
extsw 10,10
mullw 10,8,10
extsw 8,10
ld 7,64(31)
lwz 10,48(31)
extsw 10,10
sldi 10,10,2
add 10,7,10
stw 8,0(10)
lwz 10,48(31)
addi 10,10,1
stw 10,48(31)
.L2:
lwz 10,48(31)
cmpw 7,10,9
blt 7,.L3
li 10,0
Please create the program as the following.
$ cat test_altivec.c
#include
int main()
{
int LEN=3200;
float __attribute__ ((aligned(16))) a[LEN], b[LEN], c[LEN];
*(vector float*)c = (*(vector float*)a)*(*(vector float*)b);
return 0;
}
Build the program with the following command
$ source /opt/fsl-qoriq/2.0/environment-setup-ppc64e6500-fsl-linux
$ ${CC} test_altivec.c -maltivec -mabi=altivec -o test_altivec
$ powerpc64-fsl-linux-objdump -S test_altivec >test_altivec.s
You will find part of the disassembly code as the following.
10000700: f9 3f 00 60 std r9,96(r31)
10000704: e9 3f 00 60 ld r9,96(r31)
10000708: e9 5f 00 40 ld r10,64(r31)
1000070c: 7d a0 50 ce lvx v13,0,r10
10000710: e9 5f 00 50 ld r10,80(r31)
10000714: 7c 20 50 ce lvx v1,0,r10
10000718: 10 1f 03 8c vspltisw v0,-1
1000071c: 10 00 01 84 vslw v0,v0,v0
10000720: 10 0d 00 6e vmaddfp v0,v13,v1,v0
10000724: 7c 00 49 ce stvx v0,0,r9
Have a great day,
TIC
-----------------------------------------------------------------------------------------------------------------------
Note: If this post answers your question, please click the Correct Answer button. Thank you!
-----------------------------------------------------------------------------------------------------------------------