pthreads implemented (fucking slow!)
[physik/posic.git] / potentials / albe_fast.c
index 29c303b..952f1fe 100644 (file)
 #include <omp.h>
 #endif
 
-#ifdef PTHREAD
+#ifdef PTHREADS
 #include <pthread.h>
+#define MAX_THREADS 4
 #endif
 
 #include "../moldyn.h"
 #include "../math/math.h"
 #include "albe.h"
 
+#ifdef PTHREADS
+extern pthread_mutex_t *amutex;
+extern pthread_mutex_t emutex;
+#endif
+
 /*
  * virial calculation
  */
@@ -1078,14 +1084,20 @@ void *potential_force_thread(void *ptr) {
        /* force contribution for atom i */
        scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
        v3_scale(&force,&(dist_ij),scale);
+       pthread_mutex_lock(&(amutex[ai->tag])); 
        v3_add(&(ai->f),&(ai->f),&force);
+       pthread_mutex_unlock(&(amutex[ai->tag]));       
 
        /* force contribution for atom j */
        v3_scale(&force,&force,-1.0); // dri rij = - drj rij
+       pthread_mutex_lock(&(amutex[jtom->tag]));       
        v3_add(&(jtom->f),&(jtom->f),&force);
+       pthread_mutex_unlock(&(amutex[jtom->tag]));     
 
        /* virial */
+       pthread_mutex_lock(&(amutex[ai->tag])); 
        virial_calc(ai,&force,&(dist_ij));
+       pthread_mutex_unlock(&(amutex[ai->tag]));       
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1108,8 +1120,12 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 
        /* energy contribution */
        energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
+       pthread_mutex_lock(&emutex);
        moldyn->energy+=energy;
+       pthread_mutex_unlock(&emutex);
+       pthread_mutex_lock(&(amutex[ai->tag])); 
        ai->e+=energy;
+       pthread_mutex_unlock(&(amutex[ai->tag]));       
 
        /* reset k counter for second k loop */
        kcount=0;
@@ -1204,7 +1220,9 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
 
        /* force contribution */
+       pthread_mutex_lock(&(amutex[jtom->tag]));       
        v3_add(&(jtom->f),&(jtom->f),&force);
+       pthread_mutex_unlock(&(amutex[jtom->tag]));     
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1219,11 +1237,13 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
+       pthread_mutex_lock(&(amutex[ai->tag])); 
        virial_calc(ai,&force,&dist_ij);
 
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
        v3_add(&(ai->f),&(ai->f),&force);
+       pthread_mutex_unlock(&(amutex[ai->tag]));       
 
        /* derivative wrt k */
        v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
@@ -1232,7 +1252,9 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        v3_scale(&force,&force,pre_dzeta);
 
        /* force contribution */
+       pthread_mutex_lock(&(amutex[ktom->tag]));       
        v3_add(&(ktom->f),&(ktom->f),&force);
+       pthread_mutex_unlock(&(amutex[ktom->tag]));     
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1247,11 +1269,13 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
+       pthread_mutex_lock(&(amutex[ai->tag])); 
        virial_calc(ai,&force,&dist_ik);
        
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
        v3_add(&(ai->f),&(ai->f),&force);
+       pthread_mutex_unlock(&(amutex[ai->tag]));       
 
        /* increase k counter */
        kcount++;       
@@ -1298,35 +1322,17 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        }
 #endif
 
-       /* some postprocessing */
-#ifdef PARALLEL
-       #pragma omp parallel for
-#endif
-       for(i=0;i<count;i++) {
-               /* calculate global virial */
-               moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
-               moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
-               moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
-               moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
-               moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
-               moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
-
-               /* check forces regarding the given timestep */
-               if(v3_norm(&(itom[i].f))>\
-                   0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
-                       printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
-                              i);
-       }
+       pthread_exit(NULL);
 
        return 0;
 }
 
 int albe_potential_force_calc(t_moldyn *moldyn) {
 
-       int i,ret;
-       t_pft_data *pft_data;
+       int i,j,ret;
+       t_pft_data pft_data[MAX_THREADS];
        int count;
-       pthread_t *pft_thread;
+       pthread_t pft_thread[MAX_THREADS];
        t_atom *itom;
        t_virial *virial;
 
@@ -1359,42 +1365,70 @@ int albe_potential_force_calc(t_moldyn *moldyn) {
 
        }
 
-       /* alloc thread memory */
-       pft_thread=malloc(count*sizeof(pthread_t));
-       if(pft_thread==NULL) {
-               perror("[albe fast] alloc thread mem");
-               return -1;
-       }
-       pft_data=malloc(count*sizeof(t_pft_data));
-       if(pft_data==NULL) {
-               perror("[albe fast] alloc thread mem");
-               return -1;
-       }
+       i=0;
+       while(i<count) {
 
-       /* start threads */
-       for(i=0;i<count;i++) {
+               /* start threads */
+               for(j=0;j<MAX_THREADS;j++) {
 
-               /* prepare thread data */
-               pft_data[i].moldyn=moldyn;
-               pft_data[i].i=i;
+                       /* break if all atoms are processed */
+                       if(j+i==count)
+                               break;
 
-               ret=pthread_create(&(pft_thread[i]),NULL,
-                                   potential_force_thread,&(pft_data[i]));
-               if(ret)  {
-                       perror("[albe fast] pf thread create");
-                       return ret;
+                       /* prepare thread data */
+                       pft_data[j].moldyn=moldyn;
+                       pft_data[j].i=j+i;
+
+                       ret=pthread_create(&(pft_thread[j]),NULL,
+                                           potential_force_thread,
+                                           &(pft_data[j]));
+                       if(ret)  {
+                               perror("[albe fast] pf thread create");
+                               return ret;
+                       }
+               }
+
+               //printf("threads created! %d\n",j);
+
+               /* join threads */
+               for(j=0;j<MAX_THREADS;j++) {
+
+                       if(j+i==count)
+                               break;
+
+                       ret=pthread_join(pft_thread[j],NULL);
+                       if(ret) {
+                               perror("[albe fast] pf thread join");
+                               return ret;
+                       }
                }
+
+               /* increment counter */
+               i+=MAX_THREADS;
+
+               //printf("threads joined! -> %d\n",i);
+
        }
 
-       /* join threads */
+       /* some postprocessing */
        for(i=0;i<count;i++) {
-               ret=pthread_join(pft_thread[i],NULL);
-               if(ret) {
-                       perror("[albe fast] pf thread join");
-                       return ret;
-               }
+               /* calculate global virial */
+               moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
+               moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
+               moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
+               moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
+               moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
+               moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
+
+               /* check forces regarding the given timestep */
+               if(v3_norm(&(itom[i].f))>\
+                   0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
+                       printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
+                              i);
        }
 
+       pthread_exit(NULL);
+
        return 0;
 }