 


#include <stdio.h>
#include <xmmintrin.h>
#include <emmintrin.h>
#include <mmintrin.h>


double starttime, benchtime, overhead;
#define CPUID __asm __emit 0fh __asm __emit 0a2h
#define RDTSC __asm __emit 0fh __asm __emit 031h

#define N 1000

/*ICC sous Windows
#define CPUID __asm __emit 0fh __asm __emit 0a2h
#define RDTSC __asm __emit 0fh __asm __emit 031h

unsigned __int64 dtime(){
		unsigned cycles_low, cycles_high;

		__asm {
				pushad
				CPUID			
				RDTSC
				mov	cycles_high, edx			
				mov	cycles_low, eax	
				popad
				}
			return ((unsigned __int64)cycles_high << 32) | cycles_low;
	}
// ICC Windows (fin) */

//ICC Linux
long long readTSC ();

long long readTSC ()
{
  long long t;
  asm volatile (".byte 0x0f,0x31" : "=A" (t));

  return t;      
}
double dtime()
{
  return (double) readTSC();
}
// ICC Linux (fin) */

unsigned char XB[N][N], YB[N][N];
int XI[N][N], YI[N][N], ZI[N][N], SI;
float XF[N][N], YF[N][N], ZF[N][N], SF;

double starttime, benchtime;


void main_copy_ij()
{
	int i, j, k;
	for (k=0;k<10;k++){

starttime=dtime();

for (i=0;i<N;i++)
for (j=0;j<N;j++)
YI[i][j]=XI[i][j];

 benchtime=dtime()-starttime ; 
  printf ("Temps copie par element_ij %f  N %d \n", (double) (benchtime)/(double)(N*N), N);

	}
	printf("\n");
}

void main_copy_ji()
{
	int i, j, k;
	for (k=0;k<10;k++){

starttime=dtime();

for (j=0;j<N;j++)
for (i=0;i<N;i++)

YI[i][j]=XI[i][j];

 benchtime=dtime()-starttime ; 
  printf ("Temps copie par element_ji %f  N %d \n", (double) (benchtime)/(double)(N*N), N);

	}
		printf("\n");
}

void main_copy_byte()
{
	int i, j, k;
	for (k=0;k<10;k++){

starttime=dtime();

for (i=0;i<N;i++)
for (j=0;j<N;j++)
YB[i][j]=XB[i][j];

 benchtime=dtime()-starttime ; 
  printf ("Temps copie par element_byte %f  N %d \n", (double) (benchtime)/(double)(N*N), N);

	}
	printf("\n");
}

void main_copy_float()
{
	int i, j, k;
	for (k=0;k<10;k++){

starttime=dtime();

for (i=0;i<N;i++)
for (j=0;j<N;j++)
YF[i][j]=XF[i][j];

 benchtime=dtime()-starttime ; 
  printf ("Temps copie par element_float %f  N %d \n", (double) (benchtime)/(double)(N*N), N);

	}
	printf("\n");
}

main()
{
main_copy_ij();
main_copy_ji();
main_copy_byte();
main_copy_float();

} 









