Merge branch 'leadoff'
[physik/posic.git] / potentials / albe_fast.c
index 952f1fe..79162b1 100644 (file)
@@ -23,7 +23,7 @@
 
 #ifdef PTHREADS
 #include <pthread.h>
-#define MAX_THREADS 4
+#define MAX_THREADS 2
 #endif
 
 #include "../moldyn.h"
@@ -39,12 +39,12 @@ extern pthread_mutex_t emutex;
  * virial calculation
  */
 
-#define albe_v_calc(a,f,d)     a->virial.xx+=f->x*d->x; \
-                               a->virial.yy+=f->y*d->y; \
-                               a->virial.zz+=f->z*d->z; \
-                               a->virial.xy+=f->x*d->y; \
-                               a->virial.xz+=f->x*d->z; \
-                               a->virial.yz+=f->y*d->z
+#define albe_v_calc(a,f,d)     (a)->virial.xx+=(f)->x*(d)->x; \
+                               (a)->virial.yy+=(f)->y*(d)->y; \
+                               (a)->virial.zz+=(f)->z*(d)->z; \
+                               (a)->virial.xy+=(f)->x*(d)->y; \
+                               (a)->virial.xz+=(f)->x*(d)->z; \
+                               (a)->virial.yz+=(f)->y*(d)->z
 
 #ifndef PTHREADS
 
@@ -452,7 +452,11 @@ int albe_potential_force_calc(t_moldyn *moldyn) {
        }
 
        /* force contribution for atom i */
+#ifdef MATTONI
+       scale=-0.5*(f_c*(df_r-b*df_a)); // - in albe formalism
+#else
        scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
+#endif
        v3_scale(&force,&(dist_ij),scale);
        v3_add(&(ai->f),&(ai->f),&force);
 
@@ -461,7 +465,8 @@ int albe_potential_force_calc(t_moldyn *moldyn) {
        v3_add(&(jtom->f),&(jtom->f),&force);
 
        /* virial */
-       virial_calc(ai,&force,&(dist_ij));
+       albe_v_calc(ai,&force,&(dist_ij));
+       //virial_calc(ai,&force,&(dist_ij));
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -595,17 +600,23 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
-       virial_calc(ai,&force,&dist_ij);
+       albe_v_calc(ai,&force,&dist_ij);
+       //virial_calc(ai,&force,&dist_ij);
 
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
        v3_add(&(ai->f),&(ai->f),&force);
 
        /* derivative wrt k */
+#ifdef MATTONI
+       v3_scale(&tmp,&dcosdrk,fcdg);
+       v3_scale(&force,&tmp,pre_dzeta);
+#else
        v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
        v3_scale(&tmp,&dcosdrk,fcdg);
        v3_add(&force,&force,&tmp);
        v3_scale(&force,&force,pre_dzeta);
+#endif
 
        /* force contribution */
        v3_add(&(ktom->f),&(ktom->f),&force);
@@ -623,7 +634,8 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
-       virial_calc(ai,&force,&dist_ik);
+       albe_v_calc(ai,&force,&dist_ik);
+       //virial_calc(ai,&force,&dist_ik);
        
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
@@ -703,7 +715,7 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 
 typedef struct s_pft_data {
        t_moldyn *moldyn;
-       int i;
+       int start,end;
 } t_pft_data;
 
 void *potential_force_thread(void *ptr) {
@@ -785,23 +797,20 @@ void *potential_force_thread(void *ptr) {
        // optimized
        params=moldyn->pot_params;
 
+       /* get energy, force and virial for atoms */
 
-       /* get energy, force and virial of every atom */
-
-       /* first (and only) loop over atoms i */
-       for(i=0;i<count;i++) {
+       for(i=pft_data->start;i<pft_data->end;i++) {
 
                if(!(itom[i].attr&ATOM_ATTR_3BP))
-                       continue;
+                       return 0;
 
-               link_cell_neighbour_index(moldyn,
+               // thread safe this way!
+               dnlc=link_cell_neighbour_index(moldyn,
                                          (itom[i].r.x+moldyn->dim.x/2)/lc->x,
                                          (itom[i].r.y+moldyn->dim.y/2)/lc->y,
                                          (itom[i].r.z+moldyn->dim.z/2)/lc->z,
                                          neighbour_i);
 
-               dnlc=lc->dnlc;
-
                /* copy the neighbour lists */
 #ifdef STATIC_LISTS
 #elif LOWMEM_LISTS
@@ -829,7 +838,7 @@ void *potential_force_thread(void *ptr) {
                        while(p!=-1) {
 
                                jtom=&(itom[p]);
-                               p=lc->subcell->list[p];
+                               p=lc->subcell->list[p]; // thread safe!
 #else
                        this=&(neighbour_i[j]);
                        list_reset_f(this);
@@ -1082,22 +1091,33 @@ void *potential_force_thread(void *ptr) {
        }
 
        /* force contribution for atom i */
+#ifdef MATTONI
+       scale=-0.5*(f_c*(df_r-b*df_a)); // - in albe formalism
+#else
        scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
+#endif
        v3_scale(&force,&(dist_ij),scale);
-       pthread_mutex_lock(&(amutex[ai->tag])); 
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (1)\n");
        v3_add(&(ai->f),&(ai->f),&force);
-       pthread_mutex_unlock(&(amutex[ai->tag]));       
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (1)\n");
 
        /* force contribution for atom j */
        v3_scale(&force,&force,-1.0); // dri rij = - drj rij
-       pthread_mutex_lock(&(amutex[jtom->tag]));       
+       if(pthread_mutex_lock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex lock (2)\n");
        v3_add(&(jtom->f),&(jtom->f),&force);
-       pthread_mutex_unlock(&(amutex[jtom->tag]));     
+       if(pthread_mutex_unlock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex unlock (2)\n");
 
        /* virial */
-       pthread_mutex_lock(&(amutex[ai->tag])); 
-       virial_calc(ai,&force,&(dist_ij));
-       pthread_mutex_unlock(&(amutex[ai->tag]));       
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (3)\n");
+       albe_v_calc(ai,&force,&(dist_ij));
+       //virial_calc(ai,&force,&(dist_ij));
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (3)\n");
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1120,12 +1140,16 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 
        /* energy contribution */
        energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
-       pthread_mutex_lock(&emutex);
+       if(pthread_mutex_lock(&emutex))
+               perror("[albe fast] mutex lock (energy)\n");
        moldyn->energy+=energy;
-       pthread_mutex_unlock(&emutex);
-       pthread_mutex_lock(&(amutex[ai->tag])); 
+       if(pthread_mutex_unlock(&emutex))
+               perror("[albe fast] mutex unlock (energy)\n");
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (4)\n");
        ai->e+=energy;
-       pthread_mutex_unlock(&(amutex[ai->tag]));       
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (4)\n");
 
        /* reset k counter for second k loop */
        kcount=0;
@@ -1220,9 +1244,11 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
 
        /* force contribution */
-       pthread_mutex_lock(&(amutex[jtom->tag]));       
+       if(pthread_mutex_lock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex lock (5)\n");
        v3_add(&(jtom->f),&(jtom->f),&force);
-       pthread_mutex_unlock(&(amutex[jtom->tag]));     
+       if(pthread_mutex_unlock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex unlock (5)\n");
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1237,24 +1263,34 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
-       pthread_mutex_lock(&(amutex[ai->tag])); 
-       virial_calc(ai,&force,&dist_ij);
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (6)\n");
+       albe_v_calc(ai,&force,&dist_ij);
+       //virial_calc(ai,&force,&dist_ij);
 
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
        v3_add(&(ai->f),&(ai->f),&force);
-       pthread_mutex_unlock(&(amutex[ai->tag]));       
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (6)\n");
 
        /* derivative wrt k */
+#ifdef MATTONI
+       v3_scale(&tmp,&dcosdrk,fcdg);
+       v3_scale(&force,&tmp,pre_dzeta);
+#else
        v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
        v3_scale(&tmp,&dcosdrk,fcdg);
        v3_add(&force,&force,&tmp);
        v3_scale(&force,&force,pre_dzeta);
+#endif
 
        /* force contribution */
-       pthread_mutex_lock(&(amutex[ktom->tag]));       
+       if(pthread_mutex_lock(&(amutex[ktom->tag])))
+               perror("[albe fast] mutex lock (7)\n");
        v3_add(&(ktom->f),&(ktom->f),&force);
-       pthread_mutex_unlock(&(amutex[ktom->tag]));     
+       if(pthread_mutex_unlock(&(amutex[ktom->tag])))
+               perror("[albe fast] mutex unlock (7)\n");
 
 #ifdef DEBUG
 if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1269,13 +1305,16 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
 
        /* virial */
-       pthread_mutex_lock(&(amutex[ai->tag])); 
-       virial_calc(ai,&force,&dist_ik);
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (8)\n");
+       albe_v_calc(ai,&force,&dist_ik);
+       //virial_calc(ai,&force,&dist_ik);
        
        /* force contribution to atom i */
        v3_scale(&force,&force,-1.0);
        v3_add(&(ai->f),&(ai->f),&force);
-       pthread_mutex_unlock(&(amutex[ai->tag]));       
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (8)\n");
 
        /* increase k counter */
        kcount++;       
@@ -1302,6 +1341,8 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
 #endif
                
                }
+
+       } // i loop
                
 #ifdef DEBUG
        //printf("\n\n");
@@ -1310,8 +1351,6 @@ if(moldyn->time>DSTART&&moldyn->time<DEND) {
        printf("\n\n");
 #endif
 
-       }
-
 #ifdef DEBUG
        //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
        if(moldyn->time>DSTART&&moldyn->time<DEND) {
@@ -1365,49 +1404,37 @@ int albe_potential_force_calc(t_moldyn *moldyn) {
 
        }
 
-       i=0;
-       while(i<count) {
-
-               /* start threads */
-               for(j=0;j<MAX_THREADS;j++) {
+       /* start threads */
+       for(j=0;j<MAX_THREADS;j++) {
 
-                       /* break if all atoms are processed */
-                       if(j+i==count)
-                               break;
-
-                       /* prepare thread data */
-                       pft_data[j].moldyn=moldyn;
-                       pft_data[j].i=j+i;
-
-                       ret=pthread_create(&(pft_thread[j]),NULL,
-                                           potential_force_thread,
-                                           &(pft_data[j]));
-                       if(ret)  {
-                               perror("[albe fast] pf thread create");
-                               return ret;
-                       }
+               /* prepare thread data */
+               pft_data[j].moldyn=moldyn;
+               pft_data[j].start=j*(count/MAX_THREADS);
+               if(j==MAX_THREADS-1) {
+                       pft_data[j].end=count;
                }
-
-               //printf("threads created! %d\n",j);
-
-               /* join threads */
-               for(j=0;j<MAX_THREADS;j++) {
-
-                       if(j+i==count)
-                               break;
-
-                       ret=pthread_join(pft_thread[j],NULL);
-                       if(ret) {
-                               perror("[albe fast] pf thread join");
-                               return ret;
-                       }
+               else {
+                       pft_data[j].end=pft_data[j].start;
+                       pft_data[j].end+=count/MAX_THREADS;
                }
 
-               /* increment counter */
-               i+=MAX_THREADS;
+               ret=pthread_create(&(pft_thread[j]),NULL,
+                                   potential_force_thread,
+                                   &(pft_data[j]));
+               if(ret)  {
+                       perror("[albe fast] pf thread create");
+                       return ret;
+               }
+       }
 
-               //printf("threads joined! -> %d\n",i);
+       /* join threads */
+       for(j=0;j<MAX_THREADS;j++) {
 
+               ret=pthread_join(pft_thread[j],NULL);
+               if(ret) {
+                       perror("[albe fast] pf thread join");
+                       return ret;
+               }
        }
 
        /* some postprocessing */
@@ -1427,8 +1454,6 @@ int albe_potential_force_calc(t_moldyn *moldyn) {
                               i);
        }
 
-       pthread_exit(NULL);
-
        return 0;
 }