Altivec vector comparison - GCC AmigaOS4 SDK

6 posts / 0 new
Last post
flash
flash's picture
Offline
Last seen: 5 months 4 weeks ago
Joined: 2018-02-24 17:25
Altivec vector comparison - GCC AmigaOS4 SDK

Hello friends,
I'd like to ask you if the following code is ok.
It uses altivec extensions under GCC AmigaOS4 compiler/SDK.

I'm using AmigaOS under Winuae and can't test Altivec code :-(

I'd like to focus you in the following line: "if (vec_all_gt (vec_add (zi2, zr2), maxdist)) break;"

I know vectors comparisions are bit strange and in some others examples in internet I find other more complex ways to make same job using masking tecniques.

Here there's the whole function:

  1. #include "exec/types.h"
  2. #include "FlashMandel.h"
  3.  
  4. #ifdef __ALTIVEC__
  5. #include "altivec.h"
  6.  
  7. __inline WORD MandelnAltivec (LONG Iterazioni, LONG Power, LDouble Cre, LDouble Cim)
  8. {
  9. REGISTER WORD Exp;
  10. const vector float zero = (vector float) {0};
  11. const vector float maxdist = (vector float) {4.0};
  12.  
  13. vector float vCre = (vector float) {(float) Cre};
  14. vector float vCim = (vector float) {(float) Cim};
  15. vector float zr = vCre;
  16. vector float zi = vCim;
  17. vector float zi2, zr2;
  18.  
  19. do
  20. {
  21. for (Exp = Power; Exp != -1; Exp--)
  22. {
  23. zi2 = vec_madd (zi, zi, zero);
  24. zi = vec_madd (zi, zr, zero);
  25. zr2 = vec_madd (zr, zr, zero);
  26. zr = vec_sub (zr2, zi2);
  27. zi = vec_add (zi, zi);
  28. }
  29.  
  30. if (vec_all_gt (vec_add (zi2, zr2), maxdist)) break;
  31.  
  32. zi = vec_add (zi, vCim);
  33. zr = vec_add (zr, vCre);
  34.  
  35. if (--Iterazioni == 0) break;
  36.  
  37. for (Exp = Power; Exp != -1; Exp--)
  38. {
  39. zi2 = vec_madd (zi, zi, zero);
  40. zi = vec_madd (zi, zr, zero);
  41. zr2 = vec_madd (zr, zr, zero);
  42. zr = vec_sub (zr2, zi2);
  43. zi = vec_add (zi, zi);
  44. }
  45.  
  46. if (vec_all_gt (vec_add (zi2, zr2), maxdist)) break;
  47.  
  48. zi = vec_add (zi, vCim);
  49. zr = vec_add (zr, vCre);
  50.  
  51. } while (--Iterazioni);
  52.  
  53. return 0;
  54. }
  55. #endif /* __ALTIVEC__ */
flash
flash's picture
Offline
Last seen: 5 months 4 weeks ago
Joined: 2018-02-24 17:25
Re: Altivec vector comparison - GCC AmigaOS4 SDK

Ok friends, i want to update the thread and reply myself because I got the answer.
Due I haven't any real Amiga and Winuae platform doesn't support Altivec extensions I had to try it on my PowerMac G5 Quad under XCode development tool.

..Maybe this is the only existing one mandelbrot program to use altivec ad output a ascii text as result :-)

  1. #include "stdio.h"
  2. #include "altivec.h"
  3.  
  4. int main()
  5. {
  6. int a, b;
  7. vector float cre, cim, zi, zr, zi2, zr2;
  8. const int maxiter = 32;
  9. int iter;
  10.  
  11. vector bool int exitMask, loopMask = {1}, zeroInt = {0};
  12.  
  13. const vector float zero = (vector float) {0};
  14. const vector float maxdist = (vector float) {4.0};
  15.  
  16. for (b = 0; b <= 32; b++)
  17. {
  18. cim = (vector float) {((b - 16) / 10.0)};
  19.  
  20. for (a = 0; a <= 85; a++)
  21. {
  22. iter = 1;
  23.  
  24. cre = (vector float) {((a - 50) / 20.0)};
  25.  
  26. zr = cre;
  27. zi = cim;
  28.  
  29. do {
  30.  
  31. zi2 = vec_madd (zi, zi, zero);
  32. zi = vec_madd (zi, zr, zero);
  33. zr2 = vec_madd (zr, zr, zero);
  34. zr = vec_sub (zr2, zi2);
  35. zi = vec_add (zi, zi);
  36.  
  37. // if (vec_all_gt (vec_add (zi2, zr2), maxdist)) break;
  38.  
  39. exitMask = vec_and (vec_cmpgt (vec_add (zi2, zr2), maxdist),loopMask);
  40. if (vec_any_ne (exitMask,zeroInt)) break;
  41.  
  42. zi = vec_add (zi, cim);
  43. zr = vec_add (zr, cre);
  44.  
  45. iter++;
  46. } while (iter < maxiter);
  47.  
  48. if (iter == maxiter)
  49. putchar(' ');
  50. else
  51. putchar (" .:-;!/>)|&IH%*#"[iter & 15]);
  52. }
  53. putchar ('\n');
  54. }
  55. return 0;
  56. }

As you can see there's no simple way to compare vectors and I needed to use masking.
If someone knows a faster way is welcome to share.

I'm looking for fortunate amigans with G4 cpu and/or altivec ebabled extensions for testing purposes..

TSK
TSK's picture
Offline
Last seen: 1 year 1 month ago
Joined: 2011-06-28 02:06
Re: Altivec vector comparison - GCC AmigaOS4 SDK

The code compiles and runs (from the post #2) and returns this on X1000 (with Altivec):

  1. ..........................:::::::::::::::::::::::::::::::::::::::::::::::::...........
  2. ........................:::::::::::::::::::::::::::::::::::::::::::::::::::::.........
  3. ......................:::::::::::::::::::::::::::::::::::::::::::::::::::::::::.......
  4. ....................:::::::::::::::-----------:::::::::::::::::::::::::::::::::::.....
  5. ..................::::::::::-------------------------::::::::::::::::::::::::::::::...
  6. .................:::::::----------------;;;;;!-)!!;;;----:::::::::::::::::::::::::::..
  7. ................:::::----------------;;;;;;!!/>I|I !;;;;----:::::::::::::::::::::::::.
  8. ...............:::----------------;;;;;;;!!!/)| )I)/!!;;;;-----:::::::::::::::::::::::
  9. ..............::----------------;;;;;;;!!//>)* :)/!!!!;;------:::::::::::::::::::::
  10. .............::--------------;;;;;;!!/>))))|&* %&|)///:!;;-----::::::::::::::::::::
  11. ............::-------------;;;;!!!!//>H & ;H*# #!;------:::::::::::::::::::
  12. ............:-----------;;!!!!!!!///)*H ;>/!;------::::::::::::::::::
  13. ...........:-------;;;!!>H>>>>>>>>>)I; .|-!;;------:::::::::::::::::
  14. ...........---;;;;;;!!!/>& ;H% %I||I) #)/;;-------::::::::::::::::
  15. ...........-;;;;;;!!!!/>)I: # #>!;;;------::::::::::::::::
  16. ...........;;;;;////>)H%*/ )/!;;;------::::::::::::::::
  17. .......... H)>/!;;;-------:::::::::::::::
  18. ...........;;;;;////>)H%*/ )/!;;;------::::::::::::::::
  19. ...........-;;;;;;!!!!/>)I: # #>!;;;------::::::::::::::::
  20. ...........---;;;;;;!!!/>& ;H% %I||I) #)/;;-------::::::::::::::::
  21. ...........:-------;;;!!>H>>>>>>>>>)I; .|-!;;------:::::::::::::::::
  22. ............:-----------;;!!!!!!!///)*H ;>/!;------::::::::::::::::::
  23. ............::-------------;;;;!!!!//>H & ;H*# #!;------:::::::::::::::::::
  24. .............::--------------;;;;;;!!/>))))|&* %&|)///:!;;-----::::::::::::::::::::
  25. ..............::----------------;;;;;;;!!//>)* :)/!!!!;;------:::::::::::::::::::::
  26. ...............:::----------------;;;;;;;!!!/)| )I)/!!;;;;-----:::::::::::::::::::::::
  27. ................:::::----------------;;;;;;!!/>I|I !;;;;----:::::::::::::::::::::::::.
  28. .................:::::::----------------;;;;;!-)!!;;;----:::::::::::::::::::::::::::..
  29. ..................::::::::::-------------------------::::::::::::::::::::::::::::::...
  30. ....................:::::::::::::::-----------:::::::::::::::::::::::::::::::::::.....
  31. ......................:::::::::::::::::::::::::::::::::::::::::::::::::::::::::.......
  32. ........................:::::::::::::::::::::::::::::::::::::::::::::::::::::.........
  33. ..........................:::::::::::::::::::::::::::::::::::::::::::::::::...........
flash
flash's picture
Offline
Last seen: 5 months 4 weeks ago
Joined: 2018-02-24 17:25
Re: Altivec vector comparison - GCC AmigaOS4 SDK

It was tested on PowerMac g5 quad, I'm very glad to know it runs ok on Amiga X1000 too :-)
I uploaded a version for both, altivec and scalar fpu versions, on os4depot and aminet.
As always C source code is included.

@TSK
I send you a PM

flash
flash's picture
Offline
Last seen: 5 months 4 weeks ago
Joined: 2018-02-24 17:25
Re: Altivec vector comparison - GCC AmigaOS4 SDK

Well, so I decided to review th Altivec mandelbrot routine because I was sure it could be further optimized.
Than I got 2 vars and one instruction less!
But I got also a big surprise.. the scalar routine is about 30% faster than altivec one!
Maybe using altivec unit just as a regular fpu there isn't any performance improvement, while i guess if it was a spe unit (like Tabor or PS3) it could be faster than fpu..
Is it right?
If you want you can recompile and try.. It's tested under MacOSX Leopard/Xcode 3 and UbuntuMATE 16.04/CodeBlocks (both on PowerMac G5 Quad).
Of course it compiles well also under AmigaOS 4 and in next days I'll update AsciiMandelbrot program on Aminet/Os4Depot adding a benchmark switch.
Meanwhile I have updated FlashMandelNG to v1.8 version to reflect the speed improvements in altivec version.

***

  1. #include <stdio.h>
  2.  
  3. char VERSION_STRING [] = "$VER: AsciiMandelbrot 1.1 by Dino Papararo ©2018\0 ";
  4.  
  5. #define ALTIVEC
  6. #ifdef ALTIVEC
  7. #include <altivec.h>
  8.  
  9. int main()
  10. {
  11. int a, b;
  12. vector float cre, cim, zi, zr, zi2, zr2;
  13. const int maxiter = 800000;
  14. int iter;
  15.  
  16. const vector bool int zeroInt = {0};
  17.  
  18. const vector float zero = {0.0}, maxdist = {4.0};
  19.  
  20. for (b = 0; b <= 32; b++)
  21. {
  22. cim = (vector float) {((b - 16) / 10.0)};
  23.  
  24. for (a = 0; a <= 85; a++)
  25. {
  26. iter = 1;
  27.  
  28. cre = (vector float) {((a - 50) / 20.0)};
  29. zr = cre;
  30. zi = cim;
  31.  
  32. do {
  33. zi2 = vec_madd (zi, zi, zero);
  34. zi = vec_madd (zi, zr, zero);
  35. zr2 = vec_madd (zr, zr, zero);
  36. zr = vec_sub (zr2, zi2);
  37. zi = vec_add (zi, zi);
  38.  
  39. if (vec_any_ne (vec_cmpgt (vec_add (zi2, zr2), maxdist),zeroInt)) break;
  40.  
  41. zi = vec_add (zi, cim);
  42. zr = vec_add (zr, cre);
  43.  
  44. iter++;
  45. } while (iter < maxiter);
  46.  
  47. if (iter == maxiter) putchar(' ');
  48. else putchar (" .:-;!/>)|&IH%*#"[iter & 15]);
  49. }
  50.  
  51. putchar ('\n');
  52. }
  53.  
  54. printf("\nComputed with Altivec unit!\n");
  55.  
  56. // getchar ();
  57.  
  58. return 0;
  59. }
  60.  
  61. #else // NOT ALTIVEC
  62.  
  63. int main()
  64. {
  65. int a, b;
  66. float cre, cim, zr, zi, zr2, zi2;
  67. const float maxdist = 4.0;
  68. const int maxiter = 800000;
  69. int iter;
  70.  
  71. for (b = 0; b <= 32; b++)
  72. {
  73. cim = (b - 16.0) / 10.0;
  74.  
  75. for (a = 0; a <= 85; a++)
  76. {
  77. cre = (a - 50) / 20.0;
  78.  
  79. zi = cim;
  80. zr = cre;
  81.  
  82. iter = 1;
  83.  
  84. do {
  85. zi2 = zi*zi;
  86. zi = zi*zr;
  87. zr2 = zr*zr;
  88. zr = zr2-zi2;
  89. zi += zi;
  90.  
  91. if ((zi2+zr2) > maxdist) break;
  92.  
  93. zi += cim;
  94. zr += cre;
  95.  
  96. iter++;
  97. } while (iter < maxiter);
  98.  
  99. if (iter == maxiter) putchar(' ');
  100. else putchar (" .:-;!/>)|&IH%*#"[iter & 15]);
  101. }
  102.  
  103. putchar ('\n');
  104. }
  105.  
  106. printf ("\nComputed without Altivec unit\n");
  107.  
  108. // getchar ();
  109.  
  110. return 0;
  111. }
  112. #endif
flash
flash's picture
Offline
Last seen: 5 months 4 weeks ago
Joined: 2018-02-24 17:25
Re: Altivec vector comparison - GCC AmigaOS4 SDK

So I have just released a new AsciiMandelbrot V1.4 version on both, Aminet and Os4Depot, just for fun and speedtest purposes.
I have also included timings to track speed on different amiga flavors.
Now there's a single exe for altivec and non altivec routines, of course source code is included.

When/If Tabor will be released, I'd like to add support for it too :-)

Amiga rulez! :-)

Log in or register to post comments