Altivec vector comparison - GCC AmigaOS4 SDK

6 posts / 0 new

Last post

Sat, 2018-04-21 16:36

flash

Offline

Last seen: 3 weeks 3 days ago

Joined: 2018-02-24 17:25

Altivec vector comparison - GCC AmigaOS4 SDK

Hello friends,
I'd like to ask you if the following code is ok.
It uses altivec extensions under GCC AmigaOS4 compiler/SDK.

I'm using AmigaOS under Winuae and can't test Altivec code :-(

I'd like to focus you in the following line: "if (vec_all_gt (vec_add (zi2, zr2), maxdist)) break;"

I know vectors comparisions are bit strange and in some others examples in internet I find other more complex ways to make same job using masking tecniques.

Here there's the whole function:

#include "exec/types.h"
#include "FlashMandel.h"
 
#ifdef __ALTIVEC__
#include "altivec.h"
 
__inline WORD MandelnAltivec (LONG Iterazioni, LONG Power, LDouble Cre, LDouble Cim)
{
      REGISTER  WORD Exp;
      const vector float zero = (vector float) {0};      
      const vector float maxdist = (vector float) {4.0};
 
      vector float vCre = (vector float) {(float) Cre};
      vector float vCim = (vector float) {(float) Cim};
      vector float zr = vCre;
      vector float zi = vCim;
      vector float zi2, zr2;
 
     do
     {
        for (Exp = Power; Exp != -1; Exp--)
        {
            zi2 = vec_madd (zi, zi, zero);
            zi = vec_madd (zi, zr, zero);
            zr2 = vec_madd (zr, zr, zero);
            zr = vec_sub (zr2, zi2);
            zi = vec_add (zi, zi);
        }
 
        if (vec_all_gt (vec_add (zi2, zr2), maxdist)) break;
 
        zi = vec_add (zi, vCim);
        zr = vec_add (zr, vCre);
 
        if (--Iterazioni == 0) break;
 
        for (Exp = Power; Exp != -1; Exp--)
        {
            zi2 = vec_madd (zi, zi, zero);
            zi = vec_madd (zi, zr, zero);
            zr2 = vec_madd (zr, zr, zero);
            zr = vec_sub (zr2, zi2);
            zi = vec_add (zi, zi);
        }
 
        if (vec_all_gt (vec_add (zi2, zr2), maxdist)) break;
 
        zi = vec_add (zi, vCim);
        zr = vec_add (zr, vCre);
 
     } while (--Iterazioni);
 
     return 0;
}
#endif /* __ALTIVEC__ */

Sun, 2018-04-29 18:23

flash

Offline

Last seen: 3 weeks 3 days ago

Joined: 2018-02-24 17:25

Re: Altivec vector comparison - GCC AmigaOS4 SDK

Ok friends, i want to update the thread and reply myself because I got the answer.
Due I haven't any real Amiga and Winuae platform doesn't support Altivec extensions I had to try it on my PowerMac G5 Quad under XCode development tool.

..Maybe this is the only existing one mandelbrot program to use altivec ad output a ascii text as result :-)

#include "stdio.h" 
#include "altivec.h"
 
int main() 
{ 
    int a, b; 
    vector float cre, cim, zi, zr, zi2, zr2; 
    const int maxiter = 32; 
    int iter; 
 
	vector bool int exitMask, loopMask = {1}, zeroInt = {0};
 
	const vector float zero = (vector float) {0};
	const vector float maxdist = (vector float) {4.0};
 
    for (b = 0; b <= 32; b++) 
    { 
		cim = (vector float) {((b - 16) / 10.0)}; 
 
        for (a = 0; a <= 85; a++)
        { 
			iter = 1; 
 
            cre = (vector float) {((a - 50) / 20.0)};    
 
			zr = cre;
			zi = cim;
 
            do {                
 
			zi2 = vec_madd (zi, zi, zero);
			zi = vec_madd (zi, zr, zero);
			zr2 = vec_madd (zr, zr, zero);
			zr = vec_sub (zr2, zi2);
			zi = vec_add (zi, zi);
 
//			if (vec_all_gt (vec_add (zi2, zr2), maxdist)) break;
 
			exitMask = vec_and (vec_cmpgt (vec_add (zi2, zr2), maxdist),loopMask);
			if (vec_any_ne (exitMask,zeroInt))		break;	
 
			zi = vec_add (zi, cim);
			zr = vec_add (zr, cre);
 
            iter++; 
            } while (iter < maxiter); 
 
            if (iter == maxiter) 
                putchar(' '); 
            else 
                putchar (" .:-;!/>)|&IH%*#"[iter & 15]); 
        } 
        putchar ('\n'); 
    } 
    return 0; 
}

As you can see there's no simple way to compare vectors and I needed to use masking.
If someone knows a faster way is welcome to share.

I'm looking for fortunate amigans with G4 cpu and/or altivec ebabled extensions for testing purposes..

Mon, 2018-04-30 17:04

(Reply to #2) #3

TSK

Offline

Last seen: 1 year 10 months ago

Joined: 2011-06-28 02:06

Re: Altivec vector comparison - GCC AmigaOS4 SDK

The code compiles and runs (from the post #2) and returns this on X1000 (with Altivec):

..........................:::::::::::::::::::::::::::::::::::::::::::::::::...........
........................:::::::::::::::::::::::::::::::::::::::::::::::::::::.........
......................:::::::::::::::::::::::::::::::::::::::::::::::::::::::::.......
....................:::::::::::::::-----------:::::::::::::::::::::::::::::::::::.....
..................::::::::::-------------------------::::::::::::::::::::::::::::::...
.................:::::::----------------;;;;;!-)!!;;;----:::::::::::::::::::::::::::..
................:::::----------------;;;;;;!!/>I|I !;;;;----:::::::::::::::::::::::::.
...............:::----------------;;;;;;;!!!/)| )I)/!!;;;;-----:::::::::::::::::::::::
..............::----------------;;;;;;;!!//>)*    :)/!!!!;;------:::::::::::::::::::::
.............::--------------;;;;;;!!/>))))|&*    %&|)///:!;;-----::::::::::::::::::::
............::-------------;;;;!!!!//>H   &          ;H*# #!;------:::::::::::::::::::
............:-----------;;!!!!!!!///)*H                  ;>/!;------::::::::::::::::::
...........:-------;;;!!>H>>>>>>>>>)I;                   .|-!;;------:::::::::::::::::
...........---;;;;;;!!!/>& ;H% %I||I)                     #)/;;-------::::::::::::::::
...........-;;;;;;!!!!/>)I:       #                       #>!;;;------::::::::::::::::
...........;;;;;////>)H%*/                                )/!;;;------::::::::::::::::
..........                                              H)>/!;;;-------:::::::::::::::
...........;;;;;////>)H%*/                                )/!;;;------::::::::::::::::
...........-;;;;;;!!!!/>)I:       #                       #>!;;;------::::::::::::::::
...........---;;;;;;!!!/>& ;H% %I||I)                     #)/;;-------::::::::::::::::
...........:-------;;;!!>H>>>>>>>>>)I;                   .|-!;;------:::::::::::::::::
............:-----------;;!!!!!!!///)*H                  ;>/!;------::::::::::::::::::
............::-------------;;;;!!!!//>H   &          ;H*# #!;------:::::::::::::::::::
.............::--------------;;;;;;!!/>))))|&*    %&|)///:!;;-----::::::::::::::::::::
..............::----------------;;;;;;;!!//>)*    :)/!!!!;;------:::::::::::::::::::::
...............:::----------------;;;;;;;!!!/)| )I)/!!;;;;-----:::::::::::::::::::::::
................:::::----------------;;;;;;!!/>I|I !;;;;----:::::::::::::::::::::::::.
.................:::::::----------------;;;;;!-)!!;;;----:::::::::::::::::::::::::::..
..................::::::::::-------------------------::::::::::::::::::::::::::::::...
....................:::::::::::::::-----------:::::::::::::::::::::::::::::::::::.....
......................:::::::::::::::::::::::::::::::::::::::::::::::::::::::::.......
........................:::::::::::::::::::::::::::::::::::::::::::::::::::::.........
..........................:::::::::::::::::::::::::::::::::::::::::::::::::...........

Tue, 2018-05-01 01:45

(Reply to #3) #4

flash

Offline

Last seen: 3 weeks 3 days ago

Joined: 2018-02-24 17:25

Re: Altivec vector comparison - GCC AmigaOS4 SDK

It was tested on PowerMac g5 quad, I'm very glad to know it runs ok on Amiga X1000 too :-)
I uploaded a version for both, altivec and scalar fpu versions, on os4depot and aminet.
As always C source code is included.

@TSK
I send you a PM

Fri, 2018-11-02 17:21

(Reply to #4) #5

flash

Offline

Last seen: 3 weeks 3 days ago

Joined: 2018-02-24 17:25

Re: Altivec vector comparison - GCC AmigaOS4 SDK

Well, so I decided to review th Altivec mandelbrot routine because I was sure it could be further optimized.
Than I got 2 vars and one instruction less!
But I got also a big surprise.. the scalar routine is about 30% faster than altivec one!
Maybe using altivec unit just as a regular fpu there isn't any performance improvement, while i guess if it was a spe unit (like Tabor or PS3) it could be faster than fpu..
Is it right?
If you want you can recompile and try.. It's tested under MacOSX Leopard/Xcode 3 and UbuntuMATE 16.04/CodeBlocks (both on PowerMac G5 Quad).
Of course it compiles well also under AmigaOS 4 and in next days I'll update AsciiMandelbrot program on Aminet/Os4Depot adding a benchmark switch.
Meanwhile I have updated FlashMandelNG to v1.8 version to reflect the speed improvements in altivec version.

***

#include <stdio.h>
 
char VERSION_STRING [] = "$VER: AsciiMandelbrot 1.1 by Dino Papararo ©2018\0 ";
 
#define ALTIVEC
#ifdef ALTIVEC
#include <altivec.h>
 
int main()
{
    int a, b;
    vector float cre, cim, zi, zr, zi2, zr2;
    const int maxiter = 800000;
    int iter;
 
    const vector bool int zeroInt = {0};
 
	const vector float zero = {0.0}, maxdist = {4.0};
 
    for (b = 0; b <= 32; b++)
    {
        cim = (vector float) {((b - 16) / 10.0)};
 
        for (a = 0; a <= 85; a++)
        {
            iter = 1;
 
            cre = (vector float) {((a - 50) / 20.0)};
            zr = cre;
            zi = cim;
 
            do {
                    zi2 = vec_madd (zi, zi, zero);
                    zi = vec_madd (zi, zr, zero);
                    zr2 = vec_madd (zr, zr, zero);
                    zr = vec_sub (zr2, zi2);
                    zi = vec_add (zi, zi);
 
                    if (vec_any_ne (vec_cmpgt (vec_add (zi2, zr2), maxdist),zeroInt)) break;
 
                    zi = vec_add (zi, cim);
                    zr = vec_add (zr, cre);
 
                    iter++;
            } while (iter < maxiter);
 
            if (iter == maxiter) putchar(' ');
            else putchar (" .:-;!/>)|&IH%*#"[iter & 15]);
        }
 
        putchar ('\n');
    }
 
    printf("\nComputed with Altivec unit!\n");
 
//    getchar ();
 
    return 0;
}
 
#else // NOT ALTIVEC
 
int main()
{
    int a, b;
    float cre, cim, zr, zi, zr2, zi2;
    const float maxdist = 4.0;
    const int maxiter = 800000;
    int iter;
 
    for (b = 0; b <= 32; b++)
    {
        cim = (b - 16.0) / 10.0;
 
        for (a = 0; a <= 85; a++)
        {
            cre = (a - 50) / 20.0;
 
            zi = cim;
            zr = cre;
 
            iter = 1;
 
            do {
                    zi2 = zi*zi;
                    zi = zi*zr;
                    zr2 = zr*zr;
                    zr = zr2-zi2;
                    zi += zi;
 
                    if ((zi2+zr2) > maxdist) break;
 
                    zi += cim;
                    zr += cre;
 
                    iter++;
            } while (iter < maxiter);
 
            if (iter == maxiter) putchar(' ');
            else putchar (" .:-;!/>)|&IH%*#"[iter & 15]);
        }
 
        putchar ('\n');
    }
 
    printf ("\nComputed without Altivec unit\n");
 
//    getchar ();
 
    return 0;
}
#endif

Sat, 2018-11-03 20:31

(Reply to #5) #6

flash

Offline

Last seen: 3 weeks 3 days ago

Joined: 2018-02-24 17:25

Re: Altivec vector comparison - GCC AmigaOS4 SDK

So I have just released a new AsciiMandelbrot V1.4 version on both, Aminet and Os4Depot, just for fun and speedtest purposes.
I have also included timings to track speed on different amiga flavors.
Now there's a single exe for altivec and non altivec routines, of course source code is included.

When/If Tabor will be released, I'd like to add support for it too :-)

Amiga rulez! :-)

Search form

User login