new imp of pthread approach
[physik/posic.git] / potentials / albe_fast.c
1 /*
2  * test: albe_new.c
3  *
4  * author: Frank Zirkelbach <frank.zirkelbach@physik.uni-augsburg.de>
5  *
6  */
7
8 #define _GNU_SOURCE
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/types.h>
13 #include <sys/stat.h>
14 #include <fcntl.h>
15 #include <unistd.h>
16 #include <sys/time.h>
17 #include <time.h>
18 #include <math.h>
19
20 #ifdef PARALLEL
21 #include <omp.h>
22 #endif
23
24 #ifdef PTHREAD
25 #include <pthread.h>
26 #endif
27
28 #include "../moldyn.h"
29 #include "../math/math.h"
30 #include "albe.h"
31
32 /*
33  * virial calculation
34  */
35
36 #define albe_v_calc(a,f,d)      a->virial.xx+=f->x*d->x; \
37                                 a->virial.yy+=f->y*d->y; \
38                                 a->virial.zz+=f->z*d->z; \
39                                 a->virial.xy+=f->x*d->y; \
40                                 a->virial.xz+=f->x*d->z; \
41                                 a->virial.yz+=f->y*d->z
42
43 #ifndef PTHREADS
44
45 int albe_potential_force_calc(t_moldyn *moldyn) {
46
47         int i,j,k,count;
48         t_atom *itom,*jtom,*ktom;
49         t_virial *virial;
50         t_linkcell *lc;
51 #ifdef STATIC_LISTS
52         int *neighbour_i[27];
53         int p,q;
54 #elif LOWMEM_LISTS
55         int neighbour_i[27];
56         int p,q;
57 #else
58         t_list neighbour_i[27];
59         t_list neighbour_i2[27];
60         t_list *this,*that;
61 #endif
62         u8 bc_ij,bc_ik;
63         int dnlc;
64 #ifdef PTHREADS
65         int ret;
66         t_kdata kdata[27];
67         pthread_t kthread[27];
68 #endif
69
70         // needed to work
71         t_atom *ai;
72
73         // optimized
74         t_albe_mult_params *params;
75         t_albe_exchange *exchange;
76         t_3dvec dist_ij;
77         double d_ij2;
78         double d_ij;
79         u8 brand_i;
80         double S2;
81         int kcount;
82         double zeta_ij;
83         double pre_dzeta;
84
85         // more ...
86         double Rk,Sk,Sk2;
87         t_3dvec dist_ik;
88         double d_ik2,d_ik;
89         double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg;
90         double f_c_ik,df_c_ik;
91
92         t_3dvec force;
93         double f_a,df_a,b,db,f_c,df_c;
94         double f_r,df_r;
95         double scale;
96         double mu,B;
97         double lambda,A;
98         double r0;
99         double S,R;
100         double energy;
101
102         double dijdik_inv,fcdg,dfcg;
103         t_3dvec dcosdrj,dcosdrk;
104         t_3dvec tmp;
105
106         // even more ...
107         double gamma_i;
108         double c_i;
109         double d_i;
110         double h_i;
111         double ci2;
112         double di2;
113         double ci2di2;
114
115         count=moldyn->count;
116         itom=moldyn->atom;
117         lc=&(moldyn->lc);
118
119         // optimized
120         params=moldyn->pot_params;
121         exchange=&(params->exchange);
122
123
124         /* reset energy */
125         moldyn->energy=0.0;
126
127         /* reset global virial */
128         memset(&(moldyn->gvir),0,sizeof(t_virial));
129
130         /* reset force, site energy and virial of every atom */
131 #ifdef PARALLEL
132         #pragma omp parallel for private(virial)
133 #endif
134         for(i=0;i<count;i++) {
135
136                 /* reset force */
137                 v3_zero(&(itom[i].f));
138
139                 /* reset virial */
140                 virial=(&(itom[i].virial));
141                 virial->xx=0.0;
142                 virial->yy=0.0;
143                 virial->zz=0.0;
144                 virial->xy=0.0;
145                 virial->xz=0.0;
146                 virial->yz=0.0;
147         
148                 /* reset site energy */
149                 itom[i].e=0.0;
150
151         }
152
153         /* get energy, force and virial of every atom */
154
155         /* first (and only) loop over atoms i */
156         for(i=0;i<count;i++) {
157
158                 if(!(itom[i].attr&ATOM_ATTR_3BP))
159                         continue;
160
161                 link_cell_neighbour_index(moldyn,
162                                           (itom[i].r.x+moldyn->dim.x/2)/lc->x,
163                                           (itom[i].r.y+moldyn->dim.y/2)/lc->y,
164                                           (itom[i].r.z+moldyn->dim.z/2)/lc->z,
165                                           neighbour_i);
166
167                 dnlc=lc->dnlc;
168
169                 /* copy the neighbour lists */
170 #ifdef STATIC_LISTS
171 #elif LOWMEM_LISTS
172 #else
173                 memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list));
174 #endif
175
176                 ai=&(itom[i]);
177                 brand_i=ai->brand;
178
179                 /* loop over atoms j */
180                 for(j=0;j<27;j++) {
181
182                         bc_ij=(j<dnlc)?0:1;
183 #ifdef STATIC_LISTS
184                         p=0;
185
186                         while(neighbour_i[j][p]!=-1) {
187
188                                 jtom=&(itom[neighbour_i[j][p]]);
189                                 p++;
190 #elif LOWMEM_LISTS
191                         p=neighbour_i[j];
192
193                         while(p!=-1) {
194
195                                 jtom=&(itom[p]);
196                                 p=lc->subcell->list[p];
197 #else
198                         this=&(neighbour_i[j]);
199                         list_reset_f(this);
200
201                         if(this->start==NULL)
202                                 continue;
203
204                         do {
205
206                                 jtom=this->current->data;
207 #endif
208
209                                 if(jtom==&(itom[i]))
210                                         continue;
211
212                                 if(!(jtom->attr&ATOM_ATTR_3BP))
213                                         continue;
214
215                                 /* reset 3bp run */
216                                 moldyn->run3bp=1;
217
218
219 /* j1 func here ... */
220 /* albe 3 body potential function (first ij loop) */
221
222         /* reset zeta sum */
223         zeta_ij=0.0;
224
225         /*
226          * set ij depending values
227          */
228
229         if(brand_i==jtom->brand) {
230                 S2=params->S2[brand_i];
231         }
232         else {
233                 S2=params->S2mixed;
234         }
235
236         /* dist_ij, d_ij2 */
237         v3_sub(&dist_ij,&(jtom->r),&(ai->r));
238         if(bc_ij) check_per_bound(moldyn,&dist_ij);
239         d_ij2=v3_absolute_square(&dist_ij);
240
241         /* if d_ij2 > S2 => no force & potential energy contribution */
242         if(d_ij2>S2)
243                 continue;
244
245         /* d_ij */
246         d_ij=sqrt(d_ij2);
247
248         /* reset k counter for first k loop */
249         kcount=0;
250
251                                 /* first loop over atoms k */
252                                 for(k=0;k<27;k++) {
253
254                                         bc_ik=(k<dnlc)?0:1;
255 #ifdef STATIC_LISTS
256                                         q=0;
257
258                                         while(neighbour_i[k][q]!=-1) {
259
260                                                 ktom=&(itom[neighbour_i[k][q]]);
261                                                 q++;
262 #elif LOWMEM_LISTS
263                                         q=neighbour_i[k];
264
265                                         while(q!=-1) {
266
267                                                 ktom=&(itom[q]);
268                                                 q=lc->subcell->list[q];
269 #else
270                                         that=&(neighbour_i2[k]);
271                                         list_reset_f(that);
272                                         
273                                         if(that->start==NULL)
274                                                 continue;
275
276                                         do {
277                                                 ktom=that->current->data;
278 #endif
279
280                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
281                                                         continue;
282
283                                                 if(ktom==jtom)
284                                                         continue;
285
286                                                 if(ktom==&(itom[i]))
287                                                         continue;
288
289 /* k1 func here ... */
290 /* albe 3 body potential function (first k loop) */
291
292         if(kcount>ALBE_MAXN) {
293                 printf("FATAL: neighbours = %d\n",kcount);
294                 printf("  -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag);
295         }
296
297         /* ik constants */
298         if(brand_i==ktom->brand) {
299                 Rk=params->R[brand_i];
300                 Sk=params->S[brand_i];
301                 Sk2=params->S2[brand_i];
302                 /* albe needs i,k depending c,d,h and gamma values */
303                 gamma_i=params->gamma[brand_i];
304                 c_i=params->c[brand_i];
305                 d_i=params->d[brand_i];
306                 h_i=params->h[brand_i];
307                 ci2=params->c2[brand_i];
308                 di2=params->d2[brand_i];
309                 ci2di2=params->c2d2[brand_i];
310         }
311         else {
312                 Rk=params->Rmixed;
313                 Sk=params->Smixed;
314                 Sk2=params->S2mixed;
315                 /* albe needs i,k depending c,d,h and gamma values */
316                 gamma_i=params->gamma_m;
317                 c_i=params->c_mixed;
318                 d_i=params->d_mixed;
319                 h_i=params->h_mixed;
320                 ci2=params->c2_mixed;
321                 di2=params->d2_mixed;
322                 ci2di2=params->c2d2_m;
323         }
324
325         /* dist_ik, d_ik2 */
326         v3_sub(&dist_ik,&(ktom->r),&(ai->r));
327         if(bc_ik) check_per_bound(moldyn,&dist_ik);
328         d_ik2=v3_absolute_square(&dist_ik);
329
330         /* store data for second k loop */
331         exchange->dist_ik[kcount]=dist_ik;
332         exchange->d_ik2[kcount]=d_ik2;
333
334         /* return if not within cutoff */
335         if(d_ik2>Sk2) {
336                 kcount++;
337                 continue;
338         }
339
340         /* d_ik */
341         d_ik=sqrt(d_ik2);
342
343         /* cos theta */
344         cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik);
345
346         /* g_ijk 
347         h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism
348         d2_h_cos2=exchange->di2+(h_cos*h_cos);
349         frac=exchange->ci2/d2_h_cos2;
350         g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac);
351         dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f..
352         */
353
354         h_cos=h_i+cos_theta; // + in albe formalism
355         d2_h_cos2=di2+(h_cos*h_cos);
356         frac=ci2/d2_h_cos2;
357         g=gamma_i*(1.0+ci2di2-frac);
358         dg=2.0*frac*gamma_i*h_cos/d2_h_cos2; // + in albe f..
359
360         /* zeta sum += f_c_ik * g_ijk */
361         if(d_ik<=Rk) {
362                 zeta_ij+=g;
363                 f_c_ik=1.0;
364                 df_c_ik=0.0;
365         }
366         else {
367                 s_r=Sk-Rk;
368                 arg=M_PI*(d_ik-Rk)/s_r;
369                 f_c_ik=0.5+0.5*cos(arg);
370                 df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik));
371                 zeta_ij+=f_c_ik*g;
372         }
373
374         /* store even more data for second k loop */
375         exchange->g[kcount]=g;
376         exchange->dg[kcount]=dg;
377         exchange->d_ik[kcount]=d_ik;
378         exchange->cos_theta[kcount]=cos_theta;
379         exchange->f_c_ik[kcount]=f_c_ik;
380         exchange->df_c_ik[kcount]=df_c_ik;
381
382         /* increase k counter */
383         kcount++;
384
385 #ifdef STATIC_LISTS
386                                         }
387 #elif LOWMEM_LISTS
388                                         }
389 #else
390                                         } while(list_next_f(that)!=\
391                                                 L_NO_NEXT_ELEMENT);
392 #endif
393
394                                 }
395
396 /* j2 func here ... */
397
398
399         if(brand_i==jtom->brand) {
400                 S=params->S[brand_i];
401                 R=params->R[brand_i];
402                 B=params->B[brand_i];
403                 A=params->A[brand_i];
404                 r0=params->r0[brand_i];
405                 mu=params->mu[brand_i];
406                 lambda=params->lambda[brand_i];
407         }
408         else {
409                 S=params->Smixed;
410                 R=params->Rmixed;
411                 B=params->Bmixed;
412                 A=params->Amixed;
413                 r0=params->r0_mixed;
414                 mu=params->mu_m;
415                 lambda=params->lambda_m;
416         }
417
418         /* f_c, df_c */
419         if(d_ij<R) {
420                 f_c=1.0;
421                 df_c=0.0;
422         }
423         else {
424                 s_r=S-R;
425                 arg=M_PI*(d_ij-R)/s_r;
426                 f_c=0.5+0.5*cos(arg);
427                 df_c=0.5*sin(arg)*(M_PI/(s_r*d_ij));
428         }
429
430         /* f_a, df_a */
431         f_a=-B*exp(-mu*(d_ij-r0));
432         df_a=mu*f_a/d_ij;
433
434         /* f_r, df_r */
435         f_r=A*exp(-lambda*(d_ij-r0));
436         df_r=lambda*f_r/d_ij;
437
438         /* b, db */
439         if(zeta_ij==0.0) {
440                 b=1.0;
441                 db=0.0;
442         }
443         else {
444                 b=1.0/sqrt(1.0+zeta_ij);
445                 db=-0.5*b/(1.0+zeta_ij);
446         }
447
448         /* force contribution for atom i */
449         scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
450         v3_scale(&force,&(dist_ij),scale);
451         v3_add(&(ai->f),&(ai->f),&force);
452
453         /* force contribution for atom j */
454         v3_scale(&force,&force,-1.0); // dri rij = - drj rij
455         v3_add(&(jtom->f),&(jtom->f),&force);
456
457         /* virial */
458         virial_calc(ai,&force,&(dist_ij));
459
460 #ifdef DEBUG
461 if(moldyn->time>DSTART&&moldyn->time<DEND) {
462         if((ai==&(moldyn->atom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) {
463                 printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag);
464                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
465                 if(ai==&(moldyn->atom[0]))
466                         printf("  total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z);
467                 if(jtom==&(moldyn->atom[0]))
468                         printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
469                 printf("  energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r),
470                                                     f_c,b,f_a,f_r);
471                 printf("          %f %f %f\n",zeta_ij,.0,.0);
472         }
473 }
474 #endif
475
476         /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */
477         pre_dzeta=0.5*f_a*f_c*db;
478
479         /* energy contribution */
480         energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
481         moldyn->energy+=energy;
482         ai->e+=energy;
483
484         /* reset k counter for second k loop */
485         kcount=0;
486                 
487
488                                 /* second loop over atoms k */
489                                 for(k=0;k<27;k++) {
490
491                                         bc_ik=(k<dnlc)?0:1;
492 #ifdef STATIC_LISTS
493                                         q=0;
494
495                                         while(neighbour_i[k][q]!=-1) {
496
497                                                 ktom=&(itom[neighbour_i[k][q]]);
498                                                 q++;
499 #elif LOWMEM_LISTS
500                                         q=neighbour_i[k];
501
502                                         while(q!=-1) {
503
504                                                 ktom=&(itom[q]);
505                                                 q=lc->subcell->list[q];
506 #else
507                                         that=&(neighbour_i2[k]);
508                                         list_reset_f(that);
509                                         
510                                         if(that->start==NULL)
511                                                 continue;
512
513                                         do {
514                                                 ktom=that->current->data;
515 #endif
516
517                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
518                                                         continue;
519
520                                                 if(ktom==jtom)
521                                                         continue;
522
523                                                 if(ktom==&(itom[i]))
524                                                         continue;
525
526
527 /* k2 func here ... */
528 /* albe 3 body potential function (second k loop) */
529
530         if(kcount>ALBE_MAXN)
531                 printf("FATAL: neighbours!\n");
532
533         /* d_ik2 */
534         d_ik2=exchange->d_ik2[kcount];
535
536         if(brand_i==ktom->brand)
537                 Sk2=params->S2[brand_i];
538         else
539                 Sk2=params->S2mixed;
540
541         /* return if d_ik > S */
542         if(d_ik2>Sk2) {
543                 kcount++;
544                 continue;
545         }
546
547         /* dist_ik, d_ik */
548         dist_ik=exchange->dist_ik[kcount];
549         d_ik=exchange->d_ik[kcount];
550
551         /* f_c_ik, df_c_ik */
552         f_c_ik=exchange->f_c_ik[kcount];
553         df_c_ik=exchange->df_c_ik[kcount];
554
555         /* g, dg, cos_theta */
556         g=exchange->g[kcount];
557         dg=exchange->dg[kcount];
558         cos_theta=exchange->cos_theta[kcount];
559
560         /* cos_theta derivatives wrt j,k */
561         dijdik_inv=1.0/(d_ij*d_ik);
562         v3_scale(&dcosdrj,&dist_ik,dijdik_inv);         // j
563         v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2);
564         v3_add(&dcosdrj,&dcosdrj,&tmp);
565         v3_scale(&dcosdrk,&dist_ij,dijdik_inv);         // k
566         v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2);
567         v3_add(&dcosdrk,&dcosdrk,&tmp);
568
569         /* f_c_ik * dg, df_c_ik * g */
570         fcdg=f_c_ik*dg;
571         dfcg=df_c_ik*g;
572
573         /* derivative wrt j */
574         v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
575
576         /* force contribution */
577         v3_add(&(jtom->f),&(jtom->f),&force);
578
579 #ifdef DEBUG
580 if(moldyn->time>DSTART&&moldyn->time<DEND) {
581         if(jtom==&(moldyn->atom[DATOM])) {
582                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
583                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
584                 printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
585                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
586                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
587         }
588 }
589 #endif
590
591         /* virial */
592         virial_calc(ai,&force,&dist_ij);
593
594         /* force contribution to atom i */
595         v3_scale(&force,&force,-1.0);
596         v3_add(&(ai->f),&(ai->f),&force);
597
598         /* derivative wrt k */
599         v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
600         v3_scale(&tmp,&dcosdrk,fcdg);
601         v3_add(&force,&force,&tmp);
602         v3_scale(&force,&force,pre_dzeta);
603
604         /* force contribution */
605         v3_add(&(ktom->f),&(ktom->f),&force);
606
607 #ifdef DEBUG
608 if(moldyn->time>DSTART&&moldyn->time<DEND) {
609         if(ktom==&(moldyn->atom[DATOM])) {
610                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
611                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
612                 printf("  total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z);
613                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
614                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
615         }
616 }
617 #endif
618
619         /* virial */
620         virial_calc(ai,&force,&dist_ik);
621         
622         /* force contribution to atom i */
623         v3_scale(&force,&force,-1.0);
624         v3_add(&(ai->f),&(ai->f),&force);
625
626         /* increase k counter */
627         kcount++;       
628
629
630
631 #ifdef STATIC_LISTS
632                                         }
633 #elif LOWMEM_LISTS
634                                         }
635 #else
636                                         } while(list_next_f(that)!=\
637                                                 L_NO_NEXT_ELEMENT);
638 #endif
639
640                                 }
641                                 
642 #ifdef STATIC_LISTS
643                         }
644 #elif LOWMEM_LISTS
645                         }
646 #else
647                         } while(list_next_f(this)!=L_NO_NEXT_ELEMENT);
648 #endif
649                 
650                 }
651                 
652 #ifdef DEBUG
653         //printf("\n\n");
654 #endif
655 #ifdef VDEBUG
656         printf("\n\n");
657 #endif
658
659         }
660
661 #ifdef DEBUG
662         //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
663         if(moldyn->time>DSTART&&moldyn->time<DEND) {
664                 printf("force:\n");
665                 printf("  x: %0.40f\n",moldyn->atom[DATOM].f.x);
666                 printf("  y: %0.40f\n",moldyn->atom[DATOM].f.y);
667                 printf("  z: %0.40f\n",moldyn->atom[DATOM].f.z);
668         }
669 #endif
670
671         /* some postprocessing */
672 #ifdef PARALLEL
673         #pragma omp parallel for
674 #endif
675         for(i=0;i<count;i++) {
676                 /* calculate global virial */
677                 moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
678                 moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
679                 moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
680                 moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
681                 moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
682                 moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
683
684                 /* check forces regarding the given timestep */
685                 if(v3_norm(&(itom[i].f))>\
686                    0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
687                         printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
688                                i);
689         }
690
691         return 0;
692 }
693
694
695 #else // PTHREADS
696
697
698 typedef struct s_pft_data {
699         t_moldyn *moldyn;
700         int i;
701 } t_pft_data;
702
703 void *potential_force_thread(void *ptr) {
704
705         t_pft_data *pft_data;
706         t_moldyn *moldyn;
707         t_albe_exchange ec;
708
709         int i,j,k,count;
710         t_atom *itom,*jtom,*ktom;
711         t_linkcell *lc;
712 #ifdef STATIC_LISTS
713         int *neighbour_i[27];
714         int p,q;
715 #elif LOWMEM_LISTS
716         int neighbour_i[27];
717         int p,q;
718 #else
719         t_list neighbour_i[27];
720         t_list neighbour_i2[27];
721         t_list *this,*that;
722 #endif
723         u8 bc_ij,bc_ik;
724         int dnlc;
725
726         // needed to work
727         t_atom *ai;
728
729         // optimized
730         t_albe_mult_params *params;
731         t_albe_exchange *exchange;
732         t_3dvec dist_ij;
733         double d_ij2;
734         double d_ij;
735         u8 brand_i;
736         double S2;
737         int kcount;
738         double zeta_ij;
739         double pre_dzeta;
740
741         // more ...
742         double Rk,Sk,Sk2;
743         t_3dvec dist_ik;
744         double d_ik2,d_ik;
745         double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg;
746         double f_c_ik,df_c_ik;
747
748         t_3dvec force;
749         double f_a,df_a,b,db,f_c,df_c;
750         double f_r,df_r;
751         double scale;
752         double mu,B;
753         double lambda,A;
754         double r0;
755         double S,R;
756         double energy;
757
758         double dijdik_inv,fcdg,dfcg;
759         t_3dvec dcosdrj,dcosdrk;
760         t_3dvec tmp;
761
762         // even more ...
763         double gamma_i;
764         double c_i;
765         double d_i;
766         double h_i;
767         double ci2;
768         double di2;
769         double ci2di2;
770
771         pft_data=ptr;
772         moldyn=pft_data->moldyn;
773         exchange=&ec;
774
775         count=moldyn->count;
776         itom=moldyn->atom;
777         lc=&(moldyn->lc);
778
779         // optimized
780         params=moldyn->pot_params;
781
782
783         /* get energy, force and virial of every atom */
784
785         /* first (and only) loop over atoms i */
786         for(i=0;i<count;i++) {
787
788                 if(!(itom[i].attr&ATOM_ATTR_3BP))
789                         continue;
790
791                 link_cell_neighbour_index(moldyn,
792                                           (itom[i].r.x+moldyn->dim.x/2)/lc->x,
793                                           (itom[i].r.y+moldyn->dim.y/2)/lc->y,
794                                           (itom[i].r.z+moldyn->dim.z/2)/lc->z,
795                                           neighbour_i);
796
797                 dnlc=lc->dnlc;
798
799                 /* copy the neighbour lists */
800 #ifdef STATIC_LISTS
801 #elif LOWMEM_LISTS
802 #else
803                 memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list));
804 #endif
805
806                 ai=&(itom[i]);
807                 brand_i=ai->brand;
808
809                 /* loop over atoms j */
810                 for(j=0;j<27;j++) {
811
812                         bc_ij=(j<dnlc)?0:1;
813 #ifdef STATIC_LISTS
814                         p=0;
815
816                         while(neighbour_i[j][p]!=-1) {
817
818                                 jtom=&(itom[neighbour_i[j][p]]);
819                                 p++;
820 #elif LOWMEM_LISTS
821                         p=neighbour_i[j];
822
823                         while(p!=-1) {
824
825                                 jtom=&(itom[p]);
826                                 p=lc->subcell->list[p];
827 #else
828                         this=&(neighbour_i[j]);
829                         list_reset_f(this);
830
831                         if(this->start==NULL)
832                                 continue;
833
834                         do {
835
836                                 jtom=this->current->data;
837 #endif
838
839                                 if(jtom==&(itom[i]))
840                                         continue;
841
842                                 if(!(jtom->attr&ATOM_ATTR_3BP))
843                                         continue;
844
845                                 /* reset 3bp run */
846                                 moldyn->run3bp=1;
847
848
849 /* j1 func here ... */
850 /* albe 3 body potential function (first ij loop) */
851
852         /* reset zeta sum */
853         zeta_ij=0.0;
854
855         /*
856          * set ij depending values
857          */
858
859         if(brand_i==jtom->brand) {
860                 S2=params->S2[brand_i];
861         }
862         else {
863                 S2=params->S2mixed;
864         }
865
866         /* dist_ij, d_ij2 */
867         v3_sub(&dist_ij,&(jtom->r),&(ai->r));
868         if(bc_ij) check_per_bound(moldyn,&dist_ij);
869         d_ij2=v3_absolute_square(&dist_ij);
870
871         /* if d_ij2 > S2 => no force & potential energy contribution */
872         if(d_ij2>S2)
873                 continue;
874
875         /* d_ij */
876         d_ij=sqrt(d_ij2);
877
878         /* reset k counter for first k loop */
879         kcount=0;
880
881                                 /* first loop over atoms k */
882                                 for(k=0;k<27;k++) {
883
884                                         bc_ik=(k<dnlc)?0:1;
885 #ifdef STATIC_LISTS
886                                         q=0;
887
888                                         while(neighbour_i[k][q]!=-1) {
889
890                                                 ktom=&(itom[neighbour_i[k][q]]);
891                                                 q++;
892 #elif LOWMEM_LISTS
893                                         q=neighbour_i[k];
894
895                                         while(q!=-1) {
896
897                                                 ktom=&(itom[q]);
898                                                 q=lc->subcell->list[q];
899 #else
900                                         that=&(neighbour_i2[k]);
901                                         list_reset_f(that);
902                                         
903                                         if(that->start==NULL)
904                                                 continue;
905
906                                         do {
907                                                 ktom=that->current->data;
908 #endif
909
910                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
911                                                         continue;
912
913                                                 if(ktom==jtom)
914                                                         continue;
915
916                                                 if(ktom==&(itom[i]))
917                                                         continue;
918
919 /* k1 func here ... */
920 /* albe 3 body potential function (first k loop) */
921
922         if(kcount>ALBE_MAXN) {
923                 printf("FATAL: neighbours = %d\n",kcount);
924                 printf("  -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag);
925         }
926
927         /* ik constants */
928         if(brand_i==ktom->brand) {
929                 Rk=params->R[brand_i];
930                 Sk=params->S[brand_i];
931                 Sk2=params->S2[brand_i];
932                 /* albe needs i,k depending c,d,h and gamma values */
933                 gamma_i=params->gamma[brand_i];
934                 c_i=params->c[brand_i];
935                 d_i=params->d[brand_i];
936                 h_i=params->h[brand_i];
937                 ci2=params->c2[brand_i];
938                 di2=params->d2[brand_i];
939                 ci2di2=params->c2d2[brand_i];
940         }
941         else {
942                 Rk=params->Rmixed;
943                 Sk=params->Smixed;
944                 Sk2=params->S2mixed;
945                 /* albe needs i,k depending c,d,h and gamma values */
946                 gamma_i=params->gamma_m;
947                 c_i=params->c_mixed;
948                 d_i=params->d_mixed;
949                 h_i=params->h_mixed;
950                 ci2=params->c2_mixed;
951                 di2=params->d2_mixed;
952                 ci2di2=params->c2d2_m;
953         }
954
955         /* dist_ik, d_ik2 */
956         v3_sub(&dist_ik,&(ktom->r),&(ai->r));
957         if(bc_ik) check_per_bound(moldyn,&dist_ik);
958         d_ik2=v3_absolute_square(&dist_ik);
959
960         /* store data for second k loop */
961         exchange->dist_ik[kcount]=dist_ik;
962         exchange->d_ik2[kcount]=d_ik2;
963
964         /* return if not within cutoff */
965         if(d_ik2>Sk2) {
966                 kcount++;
967                 continue;
968         }
969
970         /* d_ik */
971         d_ik=sqrt(d_ik2);
972
973         /* cos theta */
974         cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik);
975
976         /* g_ijk 
977         h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism
978         d2_h_cos2=exchange->di2+(h_cos*h_cos);
979         frac=exchange->ci2/d2_h_cos2;
980         g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac);
981         dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f..
982         */
983
984         h_cos=h_i+cos_theta; // + in albe formalism
985         d2_h_cos2=di2+(h_cos*h_cos);
986         frac=ci2/d2_h_cos2;
987         g=gamma_i*(1.0+ci2di2-frac);
988         dg=2.0*frac*gamma_i*h_cos/d2_h_cos2; // + in albe f..
989
990         /* zeta sum += f_c_ik * g_ijk */
991         if(d_ik<=Rk) {
992                 zeta_ij+=g;
993                 f_c_ik=1.0;
994                 df_c_ik=0.0;
995         }
996         else {
997                 s_r=Sk-Rk;
998                 arg=M_PI*(d_ik-Rk)/s_r;
999                 f_c_ik=0.5+0.5*cos(arg);
1000                 df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik));
1001                 zeta_ij+=f_c_ik*g;
1002         }
1003
1004         /* store even more data for second k loop */
1005         exchange->g[kcount]=g;
1006         exchange->dg[kcount]=dg;
1007         exchange->d_ik[kcount]=d_ik;
1008         exchange->cos_theta[kcount]=cos_theta;
1009         exchange->f_c_ik[kcount]=f_c_ik;
1010         exchange->df_c_ik[kcount]=df_c_ik;
1011
1012         /* increase k counter */
1013         kcount++;
1014
1015 #ifdef STATIC_LISTS
1016                                         }
1017 #elif LOWMEM_LISTS
1018                                         }
1019 #else
1020                                         } while(list_next_f(that)!=\
1021                                                 L_NO_NEXT_ELEMENT);
1022 #endif
1023
1024                                 }
1025
1026 /* j2 func here ... */
1027
1028
1029         if(brand_i==jtom->brand) {
1030                 S=params->S[brand_i];
1031                 R=params->R[brand_i];
1032                 B=params->B[brand_i];
1033                 A=params->A[brand_i];
1034                 r0=params->r0[brand_i];
1035                 mu=params->mu[brand_i];
1036                 lambda=params->lambda[brand_i];
1037         }
1038         else {
1039                 S=params->Smixed;
1040                 R=params->Rmixed;
1041                 B=params->Bmixed;
1042                 A=params->Amixed;
1043                 r0=params->r0_mixed;
1044                 mu=params->mu_m;
1045                 lambda=params->lambda_m;
1046         }
1047
1048         /* f_c, df_c */
1049         if(d_ij<R) {
1050                 f_c=1.0;
1051                 df_c=0.0;
1052         }
1053         else {
1054                 s_r=S-R;
1055                 arg=M_PI*(d_ij-R)/s_r;
1056                 f_c=0.5+0.5*cos(arg);
1057                 df_c=0.5*sin(arg)*(M_PI/(s_r*d_ij));
1058         }
1059
1060         /* f_a, df_a */
1061         f_a=-B*exp(-mu*(d_ij-r0));
1062         df_a=mu*f_a/d_ij;
1063
1064         /* f_r, df_r */
1065         f_r=A*exp(-lambda*(d_ij-r0));
1066         df_r=lambda*f_r/d_ij;
1067
1068         /* b, db */
1069         if(zeta_ij==0.0) {
1070                 b=1.0;
1071                 db=0.0;
1072         }
1073         else {
1074                 b=1.0/sqrt(1.0+zeta_ij);
1075                 db=-0.5*b/(1.0+zeta_ij);
1076         }
1077
1078         /* force contribution for atom i */
1079         scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
1080         v3_scale(&force,&(dist_ij),scale);
1081         v3_add(&(ai->f),&(ai->f),&force);
1082
1083         /* force contribution for atom j */
1084         v3_scale(&force,&force,-1.0); // dri rij = - drj rij
1085         v3_add(&(jtom->f),&(jtom->f),&force);
1086
1087         /* virial */
1088         virial_calc(ai,&force,&(dist_ij));
1089
1090 #ifdef DEBUG
1091 if(moldyn->time>DSTART&&moldyn->time<DEND) {
1092         if((ai==&(moldyn->atom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) {
1093                 printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag);
1094                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
1095                 if(ai==&(moldyn->atom[0]))
1096                         printf("  total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z);
1097                 if(jtom==&(moldyn->atom[0]))
1098                         printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
1099                 printf("  energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r),
1100                                                     f_c,b,f_a,f_r);
1101                 printf("          %f %f %f\n",zeta_ij,.0,.0);
1102         }
1103 }
1104 #endif
1105
1106         /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */
1107         pre_dzeta=0.5*f_a*f_c*db;
1108
1109         /* energy contribution */
1110         energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
1111         moldyn->energy+=energy;
1112         ai->e+=energy;
1113
1114         /* reset k counter for second k loop */
1115         kcount=0;
1116                 
1117
1118                                 /* second loop over atoms k */
1119                                 for(k=0;k<27;k++) {
1120
1121                                         bc_ik=(k<dnlc)?0:1;
1122 #ifdef STATIC_LISTS
1123                                         q=0;
1124
1125                                         while(neighbour_i[k][q]!=-1) {
1126
1127                                                 ktom=&(itom[neighbour_i[k][q]]);
1128                                                 q++;
1129 #elif LOWMEM_LISTS
1130                                         q=neighbour_i[k];
1131
1132                                         while(q!=-1) {
1133
1134                                                 ktom=&(itom[q]);
1135                                                 q=lc->subcell->list[q];
1136 #else
1137                                         that=&(neighbour_i2[k]);
1138                                         list_reset_f(that);
1139                                         
1140                                         if(that->start==NULL)
1141                                                 continue;
1142
1143                                         do {
1144                                                 ktom=that->current->data;
1145 #endif
1146
1147                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
1148                                                         continue;
1149
1150                                                 if(ktom==jtom)
1151                                                         continue;
1152
1153                                                 if(ktom==&(itom[i]))
1154                                                         continue;
1155
1156
1157 /* k2 func here ... */
1158 /* albe 3 body potential function (second k loop) */
1159
1160         if(kcount>ALBE_MAXN)
1161                 printf("FATAL: neighbours!\n");
1162
1163         /* d_ik2 */
1164         d_ik2=exchange->d_ik2[kcount];
1165
1166         if(brand_i==ktom->brand)
1167                 Sk2=params->S2[brand_i];
1168         else
1169                 Sk2=params->S2mixed;
1170
1171         /* return if d_ik > S */
1172         if(d_ik2>Sk2) {
1173                 kcount++;
1174                 continue;
1175         }
1176
1177         /* dist_ik, d_ik */
1178         dist_ik=exchange->dist_ik[kcount];
1179         d_ik=exchange->d_ik[kcount];
1180
1181         /* f_c_ik, df_c_ik */
1182         f_c_ik=exchange->f_c_ik[kcount];
1183         df_c_ik=exchange->df_c_ik[kcount];
1184
1185         /* g, dg, cos_theta */
1186         g=exchange->g[kcount];
1187         dg=exchange->dg[kcount];
1188         cos_theta=exchange->cos_theta[kcount];
1189
1190         /* cos_theta derivatives wrt j,k */
1191         dijdik_inv=1.0/(d_ij*d_ik);
1192         v3_scale(&dcosdrj,&dist_ik,dijdik_inv);         // j
1193         v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2);
1194         v3_add(&dcosdrj,&dcosdrj,&tmp);
1195         v3_scale(&dcosdrk,&dist_ij,dijdik_inv);         // k
1196         v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2);
1197         v3_add(&dcosdrk,&dcosdrk,&tmp);
1198
1199         /* f_c_ik * dg, df_c_ik * g */
1200         fcdg=f_c_ik*dg;
1201         dfcg=df_c_ik*g;
1202
1203         /* derivative wrt j */
1204         v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
1205
1206         /* force contribution */
1207         v3_add(&(jtom->f),&(jtom->f),&force);
1208
1209 #ifdef DEBUG
1210 if(moldyn->time>DSTART&&moldyn->time<DEND) {
1211         if(jtom==&(moldyn->atom[DATOM])) {
1212                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
1213                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
1214                 printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
1215                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
1216                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
1217         }
1218 }
1219 #endif
1220
1221         /* virial */
1222         virial_calc(ai,&force,&dist_ij);
1223
1224         /* force contribution to atom i */
1225         v3_scale(&force,&force,-1.0);
1226         v3_add(&(ai->f),&(ai->f),&force);
1227
1228         /* derivative wrt k */
1229         v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
1230         v3_scale(&tmp,&dcosdrk,fcdg);
1231         v3_add(&force,&force,&tmp);
1232         v3_scale(&force,&force,pre_dzeta);
1233
1234         /* force contribution */
1235         v3_add(&(ktom->f),&(ktom->f),&force);
1236
1237 #ifdef DEBUG
1238 if(moldyn->time>DSTART&&moldyn->time<DEND) {
1239         if(ktom==&(moldyn->atom[DATOM])) {
1240                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
1241                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
1242                 printf("  total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z);
1243                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
1244                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
1245         }
1246 }
1247 #endif
1248
1249         /* virial */
1250         virial_calc(ai,&force,&dist_ik);
1251         
1252         /* force contribution to atom i */
1253         v3_scale(&force,&force,-1.0);
1254         v3_add(&(ai->f),&(ai->f),&force);
1255
1256         /* increase k counter */
1257         kcount++;       
1258
1259
1260
1261 #ifdef STATIC_LISTS
1262                                         }
1263 #elif LOWMEM_LISTS
1264                                         }
1265 #else
1266                                         } while(list_next_f(that)!=\
1267                                                 L_NO_NEXT_ELEMENT);
1268 #endif
1269
1270                                 }
1271                                 
1272 #ifdef STATIC_LISTS
1273                         }
1274 #elif LOWMEM_LISTS
1275                         }
1276 #else
1277                         } while(list_next_f(this)!=L_NO_NEXT_ELEMENT);
1278 #endif
1279                 
1280                 }
1281                 
1282 #ifdef DEBUG
1283         //printf("\n\n");
1284 #endif
1285 #ifdef VDEBUG
1286         printf("\n\n");
1287 #endif
1288
1289         }
1290
1291 #ifdef DEBUG
1292         //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
1293         if(moldyn->time>DSTART&&moldyn->time<DEND) {
1294                 printf("force:\n");
1295                 printf("  x: %0.40f\n",moldyn->atom[DATOM].f.x);
1296                 printf("  y: %0.40f\n",moldyn->atom[DATOM].f.y);
1297                 printf("  z: %0.40f\n",moldyn->atom[DATOM].f.z);
1298         }
1299 #endif
1300
1301         /* some postprocessing */
1302 #ifdef PARALLEL
1303         #pragma omp parallel for
1304 #endif
1305         for(i=0;i<count;i++) {
1306                 /* calculate global virial */
1307                 moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
1308                 moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
1309                 moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
1310                 moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
1311                 moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
1312                 moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
1313
1314                 /* check forces regarding the given timestep */
1315                 if(v3_norm(&(itom[i].f))>\
1316                    0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
1317                         printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
1318                                i);
1319         }
1320
1321         return 0;
1322 }
1323
1324 int albe_potential_force_calc(t_moldyn *moldyn) {
1325
1326         int i,ret;
1327         t_pft_data *pft_data;
1328         int count;
1329         pthread_t *pft_thread;
1330         t_atom *itom;
1331         t_virial *virial;
1332
1333         count=moldyn->count;
1334         itom=moldyn->atom;
1335
1336         /* reset energy */
1337         moldyn->energy=0.0;
1338
1339         /* reset global virial */
1340         memset(&(moldyn->gvir),0,sizeof(t_virial));
1341
1342         /* reset force, site energy and virial of every atom */
1343         for(i=0;i<count;i++) {
1344
1345                 /* reset force */
1346                 v3_zero(&(itom[i].f));
1347
1348                 /* reset virial */
1349                 virial=&(itom[i].virial);
1350                 virial->xx=0.0;
1351                 virial->yy=0.0;
1352                 virial->zz=0.0;
1353                 virial->xy=0.0;
1354                 virial->xz=0.0;
1355                 virial->yz=0.0;
1356         
1357                 /* reset site energy */
1358                 itom[i].e=0.0;
1359
1360         }
1361
1362         /* alloc thread memory */
1363         pft_thread=malloc(count*sizeof(pthread_t));
1364         if(pft_thread==NULL) {
1365                 perror("[albe fast] alloc thread mem");
1366                 return -1;
1367         }
1368         pft_data=malloc(count*sizeof(t_pft_data));
1369         if(pft_data==NULL) {
1370                 perror("[albe fast] alloc thread mem");
1371                 return -1;
1372         }
1373
1374         /* start threads */
1375         for(i=0;i<count;i++) {
1376
1377                 /* prepare thread data */
1378                 pft_data[i].moldyn=moldyn;
1379                 pft_data[i].i=i;
1380
1381                 ret=pthread_create(&(pft_thread[i]),NULL,
1382                                    potential_force_thread,&(pft_data[i]));
1383                 if(ret)  {
1384                         perror("[albe fast] pf thread create");
1385                         return ret;
1386                 }
1387         }
1388
1389         /* join threads */
1390         for(i=0;i<count;i++) {
1391                 ret=pthread_join(pft_thread[i],NULL);
1392                 if(ret) {
1393                         perror("[albe fast] pf thread join");
1394                         return ret;
1395                 }
1396         }
1397
1398         return 0;
1399 }
1400
1401 #endif // PTHREADS