Merge branch 'leadoff'
[physik/posic.git] / potentials / albe_fast.c
diff --git a/potentials/albe_fast.c b/potentials/albe_fast.c
new file mode 100644 (file)
index 0000000..79162b1
--- /dev/null
@@ -0,0 +1,1460 @@
+/*
+ * test: albe_new.c
+ *
+ * author: Frank Zirkelbach <frank.zirkelbach@physik.uni-augsburg.de>
+ *
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <time.h>
+#include <math.h>
+
+#ifdef PARALLEL
+#include <omp.h>
+#endif
+
+#ifdef PTHREADS
+#include <pthread.h>
+#define MAX_THREADS 2
+#endif
+
+#include "../moldyn.h"
+#include "../math/math.h"
+#include "albe.h"
+
+#ifdef PTHREADS
+extern pthread_mutex_t *amutex;
+extern pthread_mutex_t emutex;
+#endif
+
+/*
+ * virial calculation
+ */
+
+#define albe_v_calc(a,f,d)     (a)->virial.xx+=(f)->x*(d)->x; \
+                               (a)->virial.yy+=(f)->y*(d)->y; \
+                               (a)->virial.zz+=(f)->z*(d)->z; \
+                               (a)->virial.xy+=(f)->x*(d)->y; \
+                               (a)->virial.xz+=(f)->x*(d)->z; \
+                               (a)->virial.yz+=(f)->y*(d)->z
+
+#ifndef PTHREADS
+
+int albe_potential_force_calc(t_moldyn *moldyn) {
+
+       int i,j,k,count;
+       t_atom *itom,*jtom,*ktom;
+       t_virial *virial;
+       t_linkcell *lc;
+#ifdef STATIC_LISTS
+       int *neighbour_i[27];
+       int p,q;
+#elif LOWMEM_LISTS
+       int neighbour_i[27];
+       int p,q;
+#else
+       t_list neighbour_i[27];
+       t_list neighbour_i2[27];
+       t_list *this,*that;
+#endif
+       u8 bc_ij,bc_ik;
+       int dnlc;
+#ifdef PTHREADS
+       int ret;
+       t_kdata kdata[27];
+       pthread_t kthread[27];
+#endif
+
+       // needed to work
+       t_atom *ai;
+
+       // optimized
+       t_albe_mult_params *params;
+       t_albe_exchange *exchange;
+       t_3dvec dist_ij;
+       double d_ij2;
+       double d_ij;
+       u8 brand_i;
+       double S2;
+       int kcount;
+       double zeta_ij;
+       double pre_dzeta;
+
+       // more ...
+       double Rk,Sk,Sk2;
+       t_3dvec dist_ik;
+       double d_ik2,d_ik;
+       double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg;
+       double f_c_ik,df_c_ik;
+
+       t_3dvec force;
+       double f_a,df_a,b,db,f_c,df_c;
+       double f_r,df_r;
+       double scale;
+       double mu,B;
+       double lambda,A;
+       double r0;
+       double S,R;
+       double energy;
+
+       double dijdik_inv,fcdg,dfcg;
+       t_3dvec dcosdrj,dcosdrk;
+       t_3dvec tmp;
+
+       // even more ...
+       double gamma_i;
+       double c_i;
+       double d_i;
+       double h_i;
+       double ci2;
+       double di2;
+       double ci2di2;
+
+       count=moldyn->count;
+       itom=moldyn->atom;
+       lc=&(moldyn->lc);
+
+       // optimized
+       params=moldyn->pot_params;
+       exchange=&(params->exchange);
+
+
+       /* reset energy */
+       moldyn->energy=0.0;
+
+       /* reset global virial */
+       memset(&(moldyn->gvir),0,sizeof(t_virial));
+
+       /* reset force, site energy and virial of every atom */
+#ifdef PARALLEL
+       #pragma omp parallel for private(virial)
+#endif
+       for(i=0;i<count;i++) {
+
+               /* reset force */
+               v3_zero(&(itom[i].f));
+
+               /* reset virial */
+               virial=(&(itom[i].virial));
+               virial->xx=0.0;
+               virial->yy=0.0;
+               virial->zz=0.0;
+               virial->xy=0.0;
+               virial->xz=0.0;
+               virial->yz=0.0;
+       
+               /* reset site energy */
+               itom[i].e=0.0;
+
+       }
+
+       /* get energy, force and virial of every atom */
+
+       /* first (and only) loop over atoms i */
+       for(i=0;i<count;i++) {
+
+               if(!(itom[i].attr&ATOM_ATTR_3BP))
+                       continue;
+
+               link_cell_neighbour_index(moldyn,
+                                         (itom[i].r.x+moldyn->dim.x/2)/lc->x,
+                                         (itom[i].r.y+moldyn->dim.y/2)/lc->y,
+                                         (itom[i].r.z+moldyn->dim.z/2)/lc->z,
+                                         neighbour_i);
+
+               dnlc=lc->dnlc;
+
+               /* copy the neighbour lists */
+#ifdef STATIC_LISTS
+#elif LOWMEM_LISTS
+#else
+               memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list));
+#endif
+
+               ai=&(itom[i]);
+               brand_i=ai->brand;
+
+               /* loop over atoms j */
+               for(j=0;j<27;j++) {
+
+                       bc_ij=(j<dnlc)?0:1;
+#ifdef STATIC_LISTS
+                       p=0;
+
+                       while(neighbour_i[j][p]!=-1) {
+
+                               jtom=&(itom[neighbour_i[j][p]]);
+                               p++;
+#elif LOWMEM_LISTS
+                       p=neighbour_i[j];
+
+                       while(p!=-1) {
+
+                               jtom=&(itom[p]);
+                               p=lc->subcell->list[p];
+#else
+                       this=&(neighbour_i[j]);
+                       list_reset_f(this);
+
+                       if(this->start==NULL)
+                               continue;
+
+                       do {
+
+                               jtom=this->current->data;
+#endif
+
+                               if(jtom==&(itom[i]))
+                                       continue;
+
+                               if(!(jtom->attr&ATOM_ATTR_3BP))
+                                       continue;
+
+                               /* reset 3bp run */
+                               moldyn->run3bp=1;
+
+
+/* j1 func here ... */
+/* albe 3 body potential function (first ij loop) */
+
+       /* reset zeta sum */
+       zeta_ij=0.0;
+
+       /*
+        * set ij depending values
+        */
+
+       if(brand_i==jtom->brand) {
+               S2=params->S2[brand_i];
+       }
+       else {
+               S2=params->S2mixed;
+       }
+
+       /* dist_ij, d_ij2 */
+       v3_sub(&dist_ij,&(jtom->r),&(ai->r));
+       if(bc_ij) check_per_bound(moldyn,&dist_ij);
+       d_ij2=v3_absolute_square(&dist_ij);
+
+       /* if d_ij2 > S2 => no force & potential energy contribution */
+       if(d_ij2>S2)
+               continue;
+
+       /* d_ij */
+       d_ij=sqrt(d_ij2);
+
+       /* reset k counter for first k loop */
+       kcount=0;
+
+                               /* first loop over atoms k */
+                               for(k=0;k<27;k++) {
+
+                                       bc_ik=(k<dnlc)?0:1;
+#ifdef STATIC_LISTS
+                                       q=0;
+
+                                       while(neighbour_i[k][q]!=-1) {
+
+                                               ktom=&(itom[neighbour_i[k][q]]);
+                                               q++;
+#elif LOWMEM_LISTS
+                                       q=neighbour_i[k];
+
+                                       while(q!=-1) {
+
+                                               ktom=&(itom[q]);
+                                               q=lc->subcell->list[q];
+#else
+                                       that=&(neighbour_i2[k]);
+                                       list_reset_f(that);
+                                       
+                                       if(that->start==NULL)
+                                               continue;
+
+                                       do {
+                                               ktom=that->current->data;
+#endif
+
+                                               if(!(ktom->attr&ATOM_ATTR_3BP))
+                                                       continue;
+
+                                               if(ktom==jtom)
+                                                       continue;
+
+                                               if(ktom==&(itom[i]))
+                                                       continue;
+
+/* k1 func here ... */
+/* albe 3 body potential function (first k loop) */
+
+       if(kcount>ALBE_MAXN) {
+               printf("FATAL: neighbours = %d\n",kcount);
+               printf("  -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag);
+       }
+
+       /* ik constants */
+       if(brand_i==ktom->brand) {
+               Rk=params->R[brand_i];
+               Sk=params->S[brand_i];
+               Sk2=params->S2[brand_i];
+               /* albe needs i,k depending c,d,h and gamma values */
+               gamma_i=params->gamma[brand_i];
+               c_i=params->c[brand_i];
+               d_i=params->d[brand_i];
+               h_i=params->h[brand_i];
+               ci2=params->c2[brand_i];
+               di2=params->d2[brand_i];
+               ci2di2=params->c2d2[brand_i];
+       }
+       else {
+               Rk=params->Rmixed;
+               Sk=params->Smixed;
+               Sk2=params->S2mixed;
+               /* albe needs i,k depending c,d,h and gamma values */
+               gamma_i=params->gamma_m;
+               c_i=params->c_mixed;
+               d_i=params->d_mixed;
+               h_i=params->h_mixed;
+               ci2=params->c2_mixed;
+               di2=params->d2_mixed;
+               ci2di2=params->c2d2_m;
+       }
+
+       /* dist_ik, d_ik2 */
+       v3_sub(&dist_ik,&(ktom->r),&(ai->r));
+       if(bc_ik) check_per_bound(moldyn,&dist_ik);
+       d_ik2=v3_absolute_square(&dist_ik);
+
+       /* store data for second k loop */
+       exchange->dist_ik[kcount]=dist_ik;
+       exchange->d_ik2[kcount]=d_ik2;
+
+       /* return if not within cutoff */
+       if(d_ik2>Sk2) {
+               kcount++;
+               continue;
+       }
+
+       /* d_ik */
+       d_ik=sqrt(d_ik2);
+
+       /* cos theta */
+       cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik);
+
+       /* g_ijk 
+       h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism
+       d2_h_cos2=exchange->di2+(h_cos*h_cos);
+       frac=exchange->ci2/d2_h_cos2;
+       g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac);
+       dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f..
+       */
+
+       h_cos=h_i+cos_theta; // + in albe formalism
+       d2_h_cos2=di2+(h_cos*h_cos);
+       frac=ci2/d2_h_cos2;
+       g=gamma_i*(1.0+ci2di2-frac);
+       dg=2.0*frac*gamma_i*h_cos/d2_h_cos2; // + in albe f..
+
+       /* zeta sum += f_c_ik * g_ijk */
+       if(d_ik<=Rk) {
+               zeta_ij+=g;
+               f_c_ik=1.0;
+               df_c_ik=0.0;
+       }
+       else {
+               s_r=Sk-Rk;
+               arg=M_PI*(d_ik-Rk)/s_r;
+               f_c_ik=0.5+0.5*cos(arg);
+               df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik));
+               zeta_ij+=f_c_ik*g;
+       }
+
+       /* store even more data for second k loop */
+       exchange->g[kcount]=g;
+       exchange->dg[kcount]=dg;
+       exchange->d_ik[kcount]=d_ik;
+       exchange->cos_theta[kcount]=cos_theta;
+       exchange->f_c_ik[kcount]=f_c_ik;
+       exchange->df_c_ik[kcount]=df_c_ik;
+
+       /* increase k counter */
+       kcount++;
+
+#ifdef STATIC_LISTS
+                                       }
+#elif LOWMEM_LISTS
+                                       }
+#else
+                                       } while(list_next_f(that)!=\
+                                               L_NO_NEXT_ELEMENT);
+#endif
+
+                               }
+
+/* j2 func here ... */
+
+
+       if(brand_i==jtom->brand) {
+               S=params->S[brand_i];
+               R=params->R[brand_i];
+               B=params->B[brand_i];
+               A=params->A[brand_i];
+               r0=params->r0[brand_i];
+               mu=params->mu[brand_i];
+               lambda=params->lambda[brand_i];
+       }
+       else {
+               S=params->Smixed;
+               R=params->Rmixed;
+               B=params->Bmixed;
+               A=params->Amixed;
+               r0=params->r0_mixed;
+               mu=params->mu_m;
+               lambda=params->lambda_m;
+       }
+
+       /* f_c, df_c */
+       if(d_ij<R) {
+               f_c=1.0;
+               df_c=0.0;
+       }
+       else {
+               s_r=S-R;
+               arg=M_PI*(d_ij-R)/s_r;
+               f_c=0.5+0.5*cos(arg);
+               df_c=0.5*sin(arg)*(M_PI/(s_r*d_ij));
+       }
+
+       /* f_a, df_a */
+       f_a=-B*exp(-mu*(d_ij-r0));
+       df_a=mu*f_a/d_ij;
+
+       /* f_r, df_r */
+       f_r=A*exp(-lambda*(d_ij-r0));
+       df_r=lambda*f_r/d_ij;
+
+       /* b, db */
+       if(zeta_ij==0.0) {
+               b=1.0;
+               db=0.0;
+       }
+       else {
+               b=1.0/sqrt(1.0+zeta_ij);
+               db=-0.5*b/(1.0+zeta_ij);
+       }
+
+       /* force contribution for atom i */
+#ifdef MATTONI
+       scale=-0.5*(f_c*(df_r-b*df_a)); // - in albe formalism
+#else
+       scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
+#endif
+       v3_scale(&force,&(dist_ij),scale);
+       v3_add(&(ai->f),&(ai->f),&force);
+
+       /* force contribution for atom j */
+       v3_scale(&force,&force,-1.0); // dri rij = - drj rij
+       v3_add(&(jtom->f),&(jtom->f),&force);
+
+       /* virial */
+       albe_v_calc(ai,&force,&(dist_ij));
+       //virial_calc(ai,&force,&(dist_ij));
+
+#ifdef DEBUG
+if(moldyn->time>DSTART&&moldyn->time<DEND) {
+       if((ai==&(moldyn->atom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) {
+               printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag);
+               printf("  adding %f %f %f\n",force.x,force.y,force.z);
+               if(ai==&(moldyn->atom[0]))
+                       printf("  total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z);
+               if(jtom==&(moldyn->atom[0]))
+                       printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
+               printf("  energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r),
+                                                   f_c,b,f_a,f_r);
+               printf("          %f %f %f\n",zeta_ij,.0,.0);
+       }
+}
+#endif
+
+       /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */
+       pre_dzeta=0.5*f_a*f_c*db;
+
+       /* energy contribution */
+       energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
+       moldyn->energy+=energy;
+       ai->e+=energy;
+
+       /* reset k counter for second k loop */
+       kcount=0;
+               
+
+                               /* second loop over atoms k */
+                               for(k=0;k<27;k++) {
+
+                                       bc_ik=(k<dnlc)?0:1;
+#ifdef STATIC_LISTS
+                                       q=0;
+
+                                       while(neighbour_i[k][q]!=-1) {
+
+                                               ktom=&(itom[neighbour_i[k][q]]);
+                                               q++;
+#elif LOWMEM_LISTS
+                                       q=neighbour_i[k];
+
+                                       while(q!=-1) {
+
+                                               ktom=&(itom[q]);
+                                               q=lc->subcell->list[q];
+#else
+                                       that=&(neighbour_i2[k]);
+                                       list_reset_f(that);
+                                       
+                                       if(that->start==NULL)
+                                               continue;
+
+                                       do {
+                                               ktom=that->current->data;
+#endif
+
+                                               if(!(ktom->attr&ATOM_ATTR_3BP))
+                                                       continue;
+
+                                               if(ktom==jtom)
+                                                       continue;
+
+                                               if(ktom==&(itom[i]))
+                                                       continue;
+
+
+/* k2 func here ... */
+/* albe 3 body potential function (second k loop) */
+
+       if(kcount>ALBE_MAXN)
+               printf("FATAL: neighbours!\n");
+
+       /* d_ik2 */
+       d_ik2=exchange->d_ik2[kcount];
+
+       if(brand_i==ktom->brand)
+               Sk2=params->S2[brand_i];
+       else
+               Sk2=params->S2mixed;
+
+       /* return if d_ik > S */
+       if(d_ik2>Sk2) {
+               kcount++;
+               continue;
+       }
+
+       /* dist_ik, d_ik */
+       dist_ik=exchange->dist_ik[kcount];
+       d_ik=exchange->d_ik[kcount];
+
+       /* f_c_ik, df_c_ik */
+       f_c_ik=exchange->f_c_ik[kcount];
+       df_c_ik=exchange->df_c_ik[kcount];
+
+       /* g, dg, cos_theta */
+       g=exchange->g[kcount];
+       dg=exchange->dg[kcount];
+       cos_theta=exchange->cos_theta[kcount];
+
+       /* cos_theta derivatives wrt j,k */
+       dijdik_inv=1.0/(d_ij*d_ik);
+       v3_scale(&dcosdrj,&dist_ik,dijdik_inv);         // j
+       v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2);
+       v3_add(&dcosdrj,&dcosdrj,&tmp);
+       v3_scale(&dcosdrk,&dist_ij,dijdik_inv);         // k
+       v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2);
+       v3_add(&dcosdrk,&dcosdrk,&tmp);
+
+       /* f_c_ik * dg, df_c_ik * g */
+       fcdg=f_c_ik*dg;
+       dfcg=df_c_ik*g;
+
+       /* derivative wrt j */
+       v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
+
+       /* force contribution */
+       v3_add(&(jtom->f),&(jtom->f),&force);
+
+#ifdef DEBUG
+if(moldyn->time>DSTART&&moldyn->time<DEND) {
+       if(jtom==&(moldyn->atom[DATOM])) {
+               printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
+               printf("  adding %f %f %f\n",force.x,force.y,force.z);
+               printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
+               printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
+               printf("    d ij ik = %f %f\n",d_ij,d_ik);
+       }
+}
+#endif
+
+       /* virial */
+       albe_v_calc(ai,&force,&dist_ij);
+       //virial_calc(ai,&force,&dist_ij);
+
+       /* force contribution to atom i */
+       v3_scale(&force,&force,-1.0);
+       v3_add(&(ai->f),&(ai->f),&force);
+
+       /* derivative wrt k */
+#ifdef MATTONI
+       v3_scale(&tmp,&dcosdrk,fcdg);
+       v3_scale(&force,&tmp,pre_dzeta);
+#else
+       v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
+       v3_scale(&tmp,&dcosdrk,fcdg);
+       v3_add(&force,&force,&tmp);
+       v3_scale(&force,&force,pre_dzeta);
+#endif
+
+       /* force contribution */
+       v3_add(&(ktom->f),&(ktom->f),&force);
+
+#ifdef DEBUG
+if(moldyn->time>DSTART&&moldyn->time<DEND) {
+       if(ktom==&(moldyn->atom[DATOM])) {
+               printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
+               printf("  adding %f %f %f\n",force.x,force.y,force.z);
+               printf("  total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z);
+               printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
+               printf("    d ij ik = %f %f\n",d_ij,d_ik);
+       }
+}
+#endif
+
+       /* virial */
+       albe_v_calc(ai,&force,&dist_ik);
+       //virial_calc(ai,&force,&dist_ik);
+       
+       /* force contribution to atom i */
+       v3_scale(&force,&force,-1.0);
+       v3_add(&(ai->f),&(ai->f),&force);
+
+       /* increase k counter */
+       kcount++;       
+
+
+
+#ifdef STATIC_LISTS
+                                       }
+#elif LOWMEM_LISTS
+                                       }
+#else
+                                       } while(list_next_f(that)!=\
+                                               L_NO_NEXT_ELEMENT);
+#endif
+
+                               }
+                               
+#ifdef STATIC_LISTS
+                       }
+#elif LOWMEM_LISTS
+                       }
+#else
+                       } while(list_next_f(this)!=L_NO_NEXT_ELEMENT);
+#endif
+               
+               }
+               
+#ifdef DEBUG
+       //printf("\n\n");
+#endif
+#ifdef VDEBUG
+       printf("\n\n");
+#endif
+
+       }
+
+#ifdef DEBUG
+       //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
+       if(moldyn->time>DSTART&&moldyn->time<DEND) {
+               printf("force:\n");
+               printf("  x: %0.40f\n",moldyn->atom[DATOM].f.x);
+               printf("  y: %0.40f\n",moldyn->atom[DATOM].f.y);
+               printf("  z: %0.40f\n",moldyn->atom[DATOM].f.z);
+       }
+#endif
+
+       /* some postprocessing */
+#ifdef PARALLEL
+       #pragma omp parallel for
+#endif
+       for(i=0;i<count;i++) {
+               /* calculate global virial */
+               moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
+               moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
+               moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
+               moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
+               moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
+               moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
+
+               /* check forces regarding the given timestep */
+               if(v3_norm(&(itom[i].f))>\
+                   0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
+                       printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
+                              i);
+       }
+
+       return 0;
+}
+
+
+#else // PTHREADS
+
+
+typedef struct s_pft_data {
+       t_moldyn *moldyn;
+       int start,end;
+} t_pft_data;
+
+void *potential_force_thread(void *ptr) {
+
+       t_pft_data *pft_data;
+       t_moldyn *moldyn;
+       t_albe_exchange ec;
+
+       int i,j,k,count;
+       t_atom *itom,*jtom,*ktom;
+       t_linkcell *lc;
+#ifdef STATIC_LISTS
+       int *neighbour_i[27];
+       int p,q;
+#elif LOWMEM_LISTS
+       int neighbour_i[27];
+       int p,q;
+#else
+       t_list neighbour_i[27];
+       t_list neighbour_i2[27];
+       t_list *this,*that;
+#endif
+       u8 bc_ij,bc_ik;
+       int dnlc;
+
+       // needed to work
+       t_atom *ai;
+
+       // optimized
+       t_albe_mult_params *params;
+       t_albe_exchange *exchange;
+       t_3dvec dist_ij;
+       double d_ij2;
+       double d_ij;
+       u8 brand_i;
+       double S2;
+       int kcount;
+       double zeta_ij;
+       double pre_dzeta;
+
+       // more ...
+       double Rk,Sk,Sk2;
+       t_3dvec dist_ik;
+       double d_ik2,d_ik;
+       double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg;
+       double f_c_ik,df_c_ik;
+
+       t_3dvec force;
+       double f_a,df_a,b,db,f_c,df_c;
+       double f_r,df_r;
+       double scale;
+       double mu,B;
+       double lambda,A;
+       double r0;
+       double S,R;
+       double energy;
+
+       double dijdik_inv,fcdg,dfcg;
+       t_3dvec dcosdrj,dcosdrk;
+       t_3dvec tmp;
+
+       // even more ...
+       double gamma_i;
+       double c_i;
+       double d_i;
+       double h_i;
+       double ci2;
+       double di2;
+       double ci2di2;
+
+       pft_data=ptr;
+       moldyn=pft_data->moldyn;
+       exchange=&ec;
+
+       count=moldyn->count;
+       itom=moldyn->atom;
+       lc=&(moldyn->lc);
+
+       // optimized
+       params=moldyn->pot_params;
+
+       /* get energy, force and virial for atoms */
+
+       for(i=pft_data->start;i<pft_data->end;i++) {
+
+               if(!(itom[i].attr&ATOM_ATTR_3BP))
+                       return 0;
+
+               // thread safe this way!
+               dnlc=link_cell_neighbour_index(moldyn,
+                                         (itom[i].r.x+moldyn->dim.x/2)/lc->x,
+                                         (itom[i].r.y+moldyn->dim.y/2)/lc->y,
+                                         (itom[i].r.z+moldyn->dim.z/2)/lc->z,
+                                         neighbour_i);
+
+               /* copy the neighbour lists */
+#ifdef STATIC_LISTS
+#elif LOWMEM_LISTS
+#else
+               memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list));
+#endif
+
+               ai=&(itom[i]);
+               brand_i=ai->brand;
+
+               /* loop over atoms j */
+               for(j=0;j<27;j++) {
+
+                       bc_ij=(j<dnlc)?0:1;
+#ifdef STATIC_LISTS
+                       p=0;
+
+                       while(neighbour_i[j][p]!=-1) {
+
+                               jtom=&(itom[neighbour_i[j][p]]);
+                               p++;
+#elif LOWMEM_LISTS
+                       p=neighbour_i[j];
+
+                       while(p!=-1) {
+
+                               jtom=&(itom[p]);
+                               p=lc->subcell->list[p]; // thread safe!
+#else
+                       this=&(neighbour_i[j]);
+                       list_reset_f(this);
+
+                       if(this->start==NULL)
+                               continue;
+
+                       do {
+
+                               jtom=this->current->data;
+#endif
+
+                               if(jtom==&(itom[i]))
+                                       continue;
+
+                               if(!(jtom->attr&ATOM_ATTR_3BP))
+                                       continue;
+
+                               /* reset 3bp run */
+                               moldyn->run3bp=1;
+
+
+/* j1 func here ... */
+/* albe 3 body potential function (first ij loop) */
+
+       /* reset zeta sum */
+       zeta_ij=0.0;
+
+       /*
+        * set ij depending values
+        */
+
+       if(brand_i==jtom->brand) {
+               S2=params->S2[brand_i];
+       }
+       else {
+               S2=params->S2mixed;
+       }
+
+       /* dist_ij, d_ij2 */
+       v3_sub(&dist_ij,&(jtom->r),&(ai->r));
+       if(bc_ij) check_per_bound(moldyn,&dist_ij);
+       d_ij2=v3_absolute_square(&dist_ij);
+
+       /* if d_ij2 > S2 => no force & potential energy contribution */
+       if(d_ij2>S2)
+               continue;
+
+       /* d_ij */
+       d_ij=sqrt(d_ij2);
+
+       /* reset k counter for first k loop */
+       kcount=0;
+
+                               /* first loop over atoms k */
+                               for(k=0;k<27;k++) {
+
+                                       bc_ik=(k<dnlc)?0:1;
+#ifdef STATIC_LISTS
+                                       q=0;
+
+                                       while(neighbour_i[k][q]!=-1) {
+
+                                               ktom=&(itom[neighbour_i[k][q]]);
+                                               q++;
+#elif LOWMEM_LISTS
+                                       q=neighbour_i[k];
+
+                                       while(q!=-1) {
+
+                                               ktom=&(itom[q]);
+                                               q=lc->subcell->list[q];
+#else
+                                       that=&(neighbour_i2[k]);
+                                       list_reset_f(that);
+                                       
+                                       if(that->start==NULL)
+                                               continue;
+
+                                       do {
+                                               ktom=that->current->data;
+#endif
+
+                                               if(!(ktom->attr&ATOM_ATTR_3BP))
+                                                       continue;
+
+                                               if(ktom==jtom)
+                                                       continue;
+
+                                               if(ktom==&(itom[i]))
+                                                       continue;
+
+/* k1 func here ... */
+/* albe 3 body potential function (first k loop) */
+
+       if(kcount>ALBE_MAXN) {
+               printf("FATAL: neighbours = %d\n",kcount);
+               printf("  -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag);
+       }
+
+       /* ik constants */
+       if(brand_i==ktom->brand) {
+               Rk=params->R[brand_i];
+               Sk=params->S[brand_i];
+               Sk2=params->S2[brand_i];
+               /* albe needs i,k depending c,d,h and gamma values */
+               gamma_i=params->gamma[brand_i];
+               c_i=params->c[brand_i];
+               d_i=params->d[brand_i];
+               h_i=params->h[brand_i];
+               ci2=params->c2[brand_i];
+               di2=params->d2[brand_i];
+               ci2di2=params->c2d2[brand_i];
+       }
+       else {
+               Rk=params->Rmixed;
+               Sk=params->Smixed;
+               Sk2=params->S2mixed;
+               /* albe needs i,k depending c,d,h and gamma values */
+               gamma_i=params->gamma_m;
+               c_i=params->c_mixed;
+               d_i=params->d_mixed;
+               h_i=params->h_mixed;
+               ci2=params->c2_mixed;
+               di2=params->d2_mixed;
+               ci2di2=params->c2d2_m;
+       }
+
+       /* dist_ik, d_ik2 */
+       v3_sub(&dist_ik,&(ktom->r),&(ai->r));
+       if(bc_ik) check_per_bound(moldyn,&dist_ik);
+       d_ik2=v3_absolute_square(&dist_ik);
+
+       /* store data for second k loop */
+       exchange->dist_ik[kcount]=dist_ik;
+       exchange->d_ik2[kcount]=d_ik2;
+
+       /* return if not within cutoff */
+       if(d_ik2>Sk2) {
+               kcount++;
+               continue;
+       }
+
+       /* d_ik */
+       d_ik=sqrt(d_ik2);
+
+       /* cos theta */
+       cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik);
+
+       /* g_ijk 
+       h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism
+       d2_h_cos2=exchange->di2+(h_cos*h_cos);
+       frac=exchange->ci2/d2_h_cos2;
+       g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac);
+       dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f..
+       */
+
+       h_cos=h_i+cos_theta; // + in albe formalism
+       d2_h_cos2=di2+(h_cos*h_cos);
+       frac=ci2/d2_h_cos2;
+       g=gamma_i*(1.0+ci2di2-frac);
+       dg=2.0*frac*gamma_i*h_cos/d2_h_cos2; // + in albe f..
+
+       /* zeta sum += f_c_ik * g_ijk */
+       if(d_ik<=Rk) {
+               zeta_ij+=g;
+               f_c_ik=1.0;
+               df_c_ik=0.0;
+       }
+       else {
+               s_r=Sk-Rk;
+               arg=M_PI*(d_ik-Rk)/s_r;
+               f_c_ik=0.5+0.5*cos(arg);
+               df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik));
+               zeta_ij+=f_c_ik*g;
+       }
+
+       /* store even more data for second k loop */
+       exchange->g[kcount]=g;
+       exchange->dg[kcount]=dg;
+       exchange->d_ik[kcount]=d_ik;
+       exchange->cos_theta[kcount]=cos_theta;
+       exchange->f_c_ik[kcount]=f_c_ik;
+       exchange->df_c_ik[kcount]=df_c_ik;
+
+       /* increase k counter */
+       kcount++;
+
+#ifdef STATIC_LISTS
+                                       }
+#elif LOWMEM_LISTS
+                                       }
+#else
+                                       } while(list_next_f(that)!=\
+                                               L_NO_NEXT_ELEMENT);
+#endif
+
+                               }
+
+/* j2 func here ... */
+
+
+       if(brand_i==jtom->brand) {
+               S=params->S[brand_i];
+               R=params->R[brand_i];
+               B=params->B[brand_i];
+               A=params->A[brand_i];
+               r0=params->r0[brand_i];
+               mu=params->mu[brand_i];
+               lambda=params->lambda[brand_i];
+       }
+       else {
+               S=params->Smixed;
+               R=params->Rmixed;
+               B=params->Bmixed;
+               A=params->Amixed;
+               r0=params->r0_mixed;
+               mu=params->mu_m;
+               lambda=params->lambda_m;
+       }
+
+       /* f_c, df_c */
+       if(d_ij<R) {
+               f_c=1.0;
+               df_c=0.0;
+       }
+       else {
+               s_r=S-R;
+               arg=M_PI*(d_ij-R)/s_r;
+               f_c=0.5+0.5*cos(arg);
+               df_c=0.5*sin(arg)*(M_PI/(s_r*d_ij));
+       }
+
+       /* f_a, df_a */
+       f_a=-B*exp(-mu*(d_ij-r0));
+       df_a=mu*f_a/d_ij;
+
+       /* f_r, df_r */
+       f_r=A*exp(-lambda*(d_ij-r0));
+       df_r=lambda*f_r/d_ij;
+
+       /* b, db */
+       if(zeta_ij==0.0) {
+               b=1.0;
+               db=0.0;
+       }
+       else {
+               b=1.0/sqrt(1.0+zeta_ij);
+               db=-0.5*b/(1.0+zeta_ij);
+       }
+
+       /* force contribution for atom i */
+#ifdef MATTONI
+       scale=-0.5*(f_c*(df_r-b*df_a)); // - in albe formalism
+#else
+       scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
+#endif
+       v3_scale(&force,&(dist_ij),scale);
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (1)\n");
+       v3_add(&(ai->f),&(ai->f),&force);
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (1)\n");
+
+       /* force contribution for atom j */
+       v3_scale(&force,&force,-1.0); // dri rij = - drj rij
+       if(pthread_mutex_lock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex lock (2)\n");
+       v3_add(&(jtom->f),&(jtom->f),&force);
+       if(pthread_mutex_unlock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex unlock (2)\n");
+
+       /* virial */
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (3)\n");
+       albe_v_calc(ai,&force,&(dist_ij));
+       //virial_calc(ai,&force,&(dist_ij));
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (3)\n");
+
+#ifdef DEBUG
+if(moldyn->time>DSTART&&moldyn->time<DEND) {
+       if((ai==&(moldyn->atom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) {
+               printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag);
+               printf("  adding %f %f %f\n",force.x,force.y,force.z);
+               if(ai==&(moldyn->atom[0]))
+                       printf("  total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z);
+               if(jtom==&(moldyn->atom[0]))
+                       printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
+               printf("  energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r),
+                                                   f_c,b,f_a,f_r);
+               printf("          %f %f %f\n",zeta_ij,.0,.0);
+       }
+}
+#endif
+
+       /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */
+       pre_dzeta=0.5*f_a*f_c*db;
+
+       /* energy contribution */
+       energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
+       if(pthread_mutex_lock(&emutex))
+               perror("[albe fast] mutex lock (energy)\n");
+       moldyn->energy+=energy;
+       if(pthread_mutex_unlock(&emutex))
+               perror("[albe fast] mutex unlock (energy)\n");
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (4)\n");
+       ai->e+=energy;
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (4)\n");
+
+       /* reset k counter for second k loop */
+       kcount=0;
+               
+
+                               /* second loop over atoms k */
+                               for(k=0;k<27;k++) {
+
+                                       bc_ik=(k<dnlc)?0:1;
+#ifdef STATIC_LISTS
+                                       q=0;
+
+                                       while(neighbour_i[k][q]!=-1) {
+
+                                               ktom=&(itom[neighbour_i[k][q]]);
+                                               q++;
+#elif LOWMEM_LISTS
+                                       q=neighbour_i[k];
+
+                                       while(q!=-1) {
+
+                                               ktom=&(itom[q]);
+                                               q=lc->subcell->list[q];
+#else
+                                       that=&(neighbour_i2[k]);
+                                       list_reset_f(that);
+                                       
+                                       if(that->start==NULL)
+                                               continue;
+
+                                       do {
+                                               ktom=that->current->data;
+#endif
+
+                                               if(!(ktom->attr&ATOM_ATTR_3BP))
+                                                       continue;
+
+                                               if(ktom==jtom)
+                                                       continue;
+
+                                               if(ktom==&(itom[i]))
+                                                       continue;
+
+
+/* k2 func here ... */
+/* albe 3 body potential function (second k loop) */
+
+       if(kcount>ALBE_MAXN)
+               printf("FATAL: neighbours!\n");
+
+       /* d_ik2 */
+       d_ik2=exchange->d_ik2[kcount];
+
+       if(brand_i==ktom->brand)
+               Sk2=params->S2[brand_i];
+       else
+               Sk2=params->S2mixed;
+
+       /* return if d_ik > S */
+       if(d_ik2>Sk2) {
+               kcount++;
+               continue;
+       }
+
+       /* dist_ik, d_ik */
+       dist_ik=exchange->dist_ik[kcount];
+       d_ik=exchange->d_ik[kcount];
+
+       /* f_c_ik, df_c_ik */
+       f_c_ik=exchange->f_c_ik[kcount];
+       df_c_ik=exchange->df_c_ik[kcount];
+
+       /* g, dg, cos_theta */
+       g=exchange->g[kcount];
+       dg=exchange->dg[kcount];
+       cos_theta=exchange->cos_theta[kcount];
+
+       /* cos_theta derivatives wrt j,k */
+       dijdik_inv=1.0/(d_ij*d_ik);
+       v3_scale(&dcosdrj,&dist_ik,dijdik_inv);         // j
+       v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2);
+       v3_add(&dcosdrj,&dcosdrj,&tmp);
+       v3_scale(&dcosdrk,&dist_ij,dijdik_inv);         // k
+       v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2);
+       v3_add(&dcosdrk,&dcosdrk,&tmp);
+
+       /* f_c_ik * dg, df_c_ik * g */
+       fcdg=f_c_ik*dg;
+       dfcg=df_c_ik*g;
+
+       /* derivative wrt j */
+       v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
+
+       /* force contribution */
+       if(pthread_mutex_lock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex lock (5)\n");
+       v3_add(&(jtom->f),&(jtom->f),&force);
+       if(pthread_mutex_unlock(&(amutex[jtom->tag])))
+               perror("[albe fast] mutex unlock (5)\n");
+
+#ifdef DEBUG
+if(moldyn->time>DSTART&&moldyn->time<DEND) {
+       if(jtom==&(moldyn->atom[DATOM])) {
+               printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
+               printf("  adding %f %f %f\n",force.x,force.y,force.z);
+               printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
+               printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
+               printf("    d ij ik = %f %f\n",d_ij,d_ik);
+       }
+}
+#endif
+
+       /* virial */
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (6)\n");
+       albe_v_calc(ai,&force,&dist_ij);
+       //virial_calc(ai,&force,&dist_ij);
+
+       /* force contribution to atom i */
+       v3_scale(&force,&force,-1.0);
+       v3_add(&(ai->f),&(ai->f),&force);
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (6)\n");
+
+       /* derivative wrt k */
+#ifdef MATTONI
+       v3_scale(&tmp,&dcosdrk,fcdg);
+       v3_scale(&force,&tmp,pre_dzeta);
+#else
+       v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
+       v3_scale(&tmp,&dcosdrk,fcdg);
+       v3_add(&force,&force,&tmp);
+       v3_scale(&force,&force,pre_dzeta);
+#endif
+
+       /* force contribution */
+       if(pthread_mutex_lock(&(amutex[ktom->tag])))
+               perror("[albe fast] mutex lock (7)\n");
+       v3_add(&(ktom->f),&(ktom->f),&force);
+       if(pthread_mutex_unlock(&(amutex[ktom->tag])))
+               perror("[albe fast] mutex unlock (7)\n");
+
+#ifdef DEBUG
+if(moldyn->time>DSTART&&moldyn->time<DEND) {
+       if(ktom==&(moldyn->atom[DATOM])) {
+               printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
+               printf("  adding %f %f %f\n",force.x,force.y,force.z);
+               printf("  total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z);
+               printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
+               printf("    d ij ik = %f %f\n",d_ij,d_ik);
+       }
+}
+#endif
+
+       /* virial */
+       if(pthread_mutex_lock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex lock (8)\n");
+       albe_v_calc(ai,&force,&dist_ik);
+       //virial_calc(ai,&force,&dist_ik);
+       
+       /* force contribution to atom i */
+       v3_scale(&force,&force,-1.0);
+       v3_add(&(ai->f),&(ai->f),&force);
+       if(pthread_mutex_unlock(&(amutex[ai->tag])))
+               perror("[albe fast] mutex unlock (8)\n");
+
+       /* increase k counter */
+       kcount++;       
+
+
+
+#ifdef STATIC_LISTS
+                                       }
+#elif LOWMEM_LISTS
+                                       }
+#else
+                                       } while(list_next_f(that)!=\
+                                               L_NO_NEXT_ELEMENT);
+#endif
+
+                               }
+                               
+#ifdef STATIC_LISTS
+                       }
+#elif LOWMEM_LISTS
+                       }
+#else
+                       } while(list_next_f(this)!=L_NO_NEXT_ELEMENT);
+#endif
+               
+               }
+
+       } // i loop
+               
+#ifdef DEBUG
+       //printf("\n\n");
+#endif
+#ifdef VDEBUG
+       printf("\n\n");
+#endif
+
+#ifdef DEBUG
+       //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
+       if(moldyn->time>DSTART&&moldyn->time<DEND) {
+               printf("force:\n");
+               printf("  x: %0.40f\n",moldyn->atom[DATOM].f.x);
+               printf("  y: %0.40f\n",moldyn->atom[DATOM].f.y);
+               printf("  z: %0.40f\n",moldyn->atom[DATOM].f.z);
+       }
+#endif
+
+       pthread_exit(NULL);
+
+       return 0;
+}
+
+int albe_potential_force_calc(t_moldyn *moldyn) {
+
+       int i,j,ret;
+       t_pft_data pft_data[MAX_THREADS];
+       int count;
+       pthread_t pft_thread[MAX_THREADS];
+       t_atom *itom;
+       t_virial *virial;
+
+       count=moldyn->count;
+       itom=moldyn->atom;
+
+       /* reset energy */
+       moldyn->energy=0.0;
+
+       /* reset global virial */
+       memset(&(moldyn->gvir),0,sizeof(t_virial));
+
+       /* reset force, site energy and virial of every atom */
+       for(i=0;i<count;i++) {
+
+               /* reset force */
+               v3_zero(&(itom[i].f));
+
+               /* reset virial */
+               virial=&(itom[i].virial);
+               virial->xx=0.0;
+               virial->yy=0.0;
+               virial->zz=0.0;
+               virial->xy=0.0;
+               virial->xz=0.0;
+               virial->yz=0.0;
+       
+               /* reset site energy */
+               itom[i].e=0.0;
+
+       }
+
+       /* start threads */
+       for(j=0;j<MAX_THREADS;j++) {
+
+               /* prepare thread data */
+               pft_data[j].moldyn=moldyn;
+               pft_data[j].start=j*(count/MAX_THREADS);
+               if(j==MAX_THREADS-1) {
+                       pft_data[j].end=count;
+               }
+               else {
+                       pft_data[j].end=pft_data[j].start;
+                       pft_data[j].end+=count/MAX_THREADS;
+               }
+
+               ret=pthread_create(&(pft_thread[j]),NULL,
+                                   potential_force_thread,
+                                   &(pft_data[j]));
+               if(ret)  {
+                       perror("[albe fast] pf thread create");
+                       return ret;
+               }
+       }
+
+       /* join threads */
+       for(j=0;j<MAX_THREADS;j++) {
+
+               ret=pthread_join(pft_thread[j],NULL);
+               if(ret) {
+                       perror("[albe fast] pf thread join");
+                       return ret;
+               }
+       }
+
+       /* some postprocessing */
+       for(i=0;i<count;i++) {
+               /* calculate global virial */
+               moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
+               moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
+               moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
+               moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
+               moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
+               moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
+
+               /* check forces regarding the given timestep */
+               if(v3_norm(&(itom[i].f))>\
+                   0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
+                       printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
+                              i);
+       }
+
+       return 0;
+}
+
+#endif // PTHREADS