pthredas working now (by partitioning atoms into count/#THREADS blocks)
[physik/posic.git] / potentials / albe_fast.c
1 /*
2  * test: albe_new.c
3  *
4  * author: Frank Zirkelbach <frank.zirkelbach@physik.uni-augsburg.de>
5  *
6  */
7
8 #define _GNU_SOURCE
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <sys/types.h>
13 #include <sys/stat.h>
14 #include <fcntl.h>
15 #include <unistd.h>
16 #include <sys/time.h>
17 #include <time.h>
18 #include <math.h>
19
20 #ifdef PARALLEL
21 #include <omp.h>
22 #endif
23
24 #ifdef PTHREADS
25 #include <pthread.h>
26 #define MAX_THREADS 4
27 #endif
28
29 #include "../moldyn.h"
30 #include "../math/math.h"
31 #include "albe.h"
32
33 #ifdef PTHREADS
34 extern pthread_mutex_t *amutex;
35 extern pthread_mutex_t emutex;
36 #endif
37
38 /*
39  * virial calculation
40  */
41
42 #define albe_v_calc(a,f,d)      a->virial.xx+=f->x*d->x; \
43                                 a->virial.yy+=f->y*d->y; \
44                                 a->virial.zz+=f->z*d->z; \
45                                 a->virial.xy+=f->x*d->y; \
46                                 a->virial.xz+=f->x*d->z; \
47                                 a->virial.yz+=f->y*d->z
48
49 #ifndef PTHREADS
50
51 int albe_potential_force_calc(t_moldyn *moldyn) {
52
53         int i,j,k,count;
54         t_atom *itom,*jtom,*ktom;
55         t_virial *virial;
56         t_linkcell *lc;
57 #ifdef STATIC_LISTS
58         int *neighbour_i[27];
59         int p,q;
60 #elif LOWMEM_LISTS
61         int neighbour_i[27];
62         int p,q;
63 #else
64         t_list neighbour_i[27];
65         t_list neighbour_i2[27];
66         t_list *this,*that;
67 #endif
68         u8 bc_ij,bc_ik;
69         int dnlc;
70 #ifdef PTHREADS
71         int ret;
72         t_kdata kdata[27];
73         pthread_t kthread[27];
74 #endif
75
76         // needed to work
77         t_atom *ai;
78
79         // optimized
80         t_albe_mult_params *params;
81         t_albe_exchange *exchange;
82         t_3dvec dist_ij;
83         double d_ij2;
84         double d_ij;
85         u8 brand_i;
86         double S2;
87         int kcount;
88         double zeta_ij;
89         double pre_dzeta;
90
91         // more ...
92         double Rk,Sk,Sk2;
93         t_3dvec dist_ik;
94         double d_ik2,d_ik;
95         double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg;
96         double f_c_ik,df_c_ik;
97
98         t_3dvec force;
99         double f_a,df_a,b,db,f_c,df_c;
100         double f_r,df_r;
101         double scale;
102         double mu,B;
103         double lambda,A;
104         double r0;
105         double S,R;
106         double energy;
107
108         double dijdik_inv,fcdg,dfcg;
109         t_3dvec dcosdrj,dcosdrk;
110         t_3dvec tmp;
111
112         // even more ...
113         double gamma_i;
114         double c_i;
115         double d_i;
116         double h_i;
117         double ci2;
118         double di2;
119         double ci2di2;
120
121         count=moldyn->count;
122         itom=moldyn->atom;
123         lc=&(moldyn->lc);
124
125         // optimized
126         params=moldyn->pot_params;
127         exchange=&(params->exchange);
128
129
130         /* reset energy */
131         moldyn->energy=0.0;
132
133         /* reset global virial */
134         memset(&(moldyn->gvir),0,sizeof(t_virial));
135
136         /* reset force, site energy and virial of every atom */
137 #ifdef PARALLEL
138         #pragma omp parallel for private(virial)
139 #endif
140         for(i=0;i<count;i++) {
141
142                 /* reset force */
143                 v3_zero(&(itom[i].f));
144
145                 /* reset virial */
146                 virial=(&(itom[i].virial));
147                 virial->xx=0.0;
148                 virial->yy=0.0;
149                 virial->zz=0.0;
150                 virial->xy=0.0;
151                 virial->xz=0.0;
152                 virial->yz=0.0;
153         
154                 /* reset site energy */
155                 itom[i].e=0.0;
156
157         }
158
159         /* get energy, force and virial of every atom */
160
161         /* first (and only) loop over atoms i */
162         for(i=0;i<count;i++) {
163
164                 if(!(itom[i].attr&ATOM_ATTR_3BP))
165                         continue;
166
167                 link_cell_neighbour_index(moldyn,
168                                           (itom[i].r.x+moldyn->dim.x/2)/lc->x,
169                                           (itom[i].r.y+moldyn->dim.y/2)/lc->y,
170                                           (itom[i].r.z+moldyn->dim.z/2)/lc->z,
171                                           neighbour_i);
172
173                 dnlc=lc->dnlc;
174
175                 /* copy the neighbour lists */
176 #ifdef STATIC_LISTS
177 #elif LOWMEM_LISTS
178 #else
179                 memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list));
180 #endif
181
182                 ai=&(itom[i]);
183                 brand_i=ai->brand;
184
185                 /* loop over atoms j */
186                 for(j=0;j<27;j++) {
187
188                         bc_ij=(j<dnlc)?0:1;
189 #ifdef STATIC_LISTS
190                         p=0;
191
192                         while(neighbour_i[j][p]!=-1) {
193
194                                 jtom=&(itom[neighbour_i[j][p]]);
195                                 p++;
196 #elif LOWMEM_LISTS
197                         p=neighbour_i[j];
198
199                         while(p!=-1) {
200
201                                 jtom=&(itom[p]);
202                                 p=lc->subcell->list[p];
203 #else
204                         this=&(neighbour_i[j]);
205                         list_reset_f(this);
206
207                         if(this->start==NULL)
208                                 continue;
209
210                         do {
211
212                                 jtom=this->current->data;
213 #endif
214
215                                 if(jtom==&(itom[i]))
216                                         continue;
217
218                                 if(!(jtom->attr&ATOM_ATTR_3BP))
219                                         continue;
220
221                                 /* reset 3bp run */
222                                 moldyn->run3bp=1;
223
224
225 /* j1 func here ... */
226 /* albe 3 body potential function (first ij loop) */
227
228         /* reset zeta sum */
229         zeta_ij=0.0;
230
231         /*
232          * set ij depending values
233          */
234
235         if(brand_i==jtom->brand) {
236                 S2=params->S2[brand_i];
237         }
238         else {
239                 S2=params->S2mixed;
240         }
241
242         /* dist_ij, d_ij2 */
243         v3_sub(&dist_ij,&(jtom->r),&(ai->r));
244         if(bc_ij) check_per_bound(moldyn,&dist_ij);
245         d_ij2=v3_absolute_square(&dist_ij);
246
247         /* if d_ij2 > S2 => no force & potential energy contribution */
248         if(d_ij2>S2)
249                 continue;
250
251         /* d_ij */
252         d_ij=sqrt(d_ij2);
253
254         /* reset k counter for first k loop */
255         kcount=0;
256
257                                 /* first loop over atoms k */
258                                 for(k=0;k<27;k++) {
259
260                                         bc_ik=(k<dnlc)?0:1;
261 #ifdef STATIC_LISTS
262                                         q=0;
263
264                                         while(neighbour_i[k][q]!=-1) {
265
266                                                 ktom=&(itom[neighbour_i[k][q]]);
267                                                 q++;
268 #elif LOWMEM_LISTS
269                                         q=neighbour_i[k];
270
271                                         while(q!=-1) {
272
273                                                 ktom=&(itom[q]);
274                                                 q=lc->subcell->list[q];
275 #else
276                                         that=&(neighbour_i2[k]);
277                                         list_reset_f(that);
278                                         
279                                         if(that->start==NULL)
280                                                 continue;
281
282                                         do {
283                                                 ktom=that->current->data;
284 #endif
285
286                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
287                                                         continue;
288
289                                                 if(ktom==jtom)
290                                                         continue;
291
292                                                 if(ktom==&(itom[i]))
293                                                         continue;
294
295 /* k1 func here ... */
296 /* albe 3 body potential function (first k loop) */
297
298         if(kcount>ALBE_MAXN) {
299                 printf("FATAL: neighbours = %d\n",kcount);
300                 printf("  -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag);
301         }
302
303         /* ik constants */
304         if(brand_i==ktom->brand) {
305                 Rk=params->R[brand_i];
306                 Sk=params->S[brand_i];
307                 Sk2=params->S2[brand_i];
308                 /* albe needs i,k depending c,d,h and gamma values */
309                 gamma_i=params->gamma[brand_i];
310                 c_i=params->c[brand_i];
311                 d_i=params->d[brand_i];
312                 h_i=params->h[brand_i];
313                 ci2=params->c2[brand_i];
314                 di2=params->d2[brand_i];
315                 ci2di2=params->c2d2[brand_i];
316         }
317         else {
318                 Rk=params->Rmixed;
319                 Sk=params->Smixed;
320                 Sk2=params->S2mixed;
321                 /* albe needs i,k depending c,d,h and gamma values */
322                 gamma_i=params->gamma_m;
323                 c_i=params->c_mixed;
324                 d_i=params->d_mixed;
325                 h_i=params->h_mixed;
326                 ci2=params->c2_mixed;
327                 di2=params->d2_mixed;
328                 ci2di2=params->c2d2_m;
329         }
330
331         /* dist_ik, d_ik2 */
332         v3_sub(&dist_ik,&(ktom->r),&(ai->r));
333         if(bc_ik) check_per_bound(moldyn,&dist_ik);
334         d_ik2=v3_absolute_square(&dist_ik);
335
336         /* store data for second k loop */
337         exchange->dist_ik[kcount]=dist_ik;
338         exchange->d_ik2[kcount]=d_ik2;
339
340         /* return if not within cutoff */
341         if(d_ik2>Sk2) {
342                 kcount++;
343                 continue;
344         }
345
346         /* d_ik */
347         d_ik=sqrt(d_ik2);
348
349         /* cos theta */
350         cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik);
351
352         /* g_ijk 
353         h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism
354         d2_h_cos2=exchange->di2+(h_cos*h_cos);
355         frac=exchange->ci2/d2_h_cos2;
356         g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac);
357         dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f..
358         */
359
360         h_cos=h_i+cos_theta; // + in albe formalism
361         d2_h_cos2=di2+(h_cos*h_cos);
362         frac=ci2/d2_h_cos2;
363         g=gamma_i*(1.0+ci2di2-frac);
364         dg=2.0*frac*gamma_i*h_cos/d2_h_cos2; // + in albe f..
365
366         /* zeta sum += f_c_ik * g_ijk */
367         if(d_ik<=Rk) {
368                 zeta_ij+=g;
369                 f_c_ik=1.0;
370                 df_c_ik=0.0;
371         }
372         else {
373                 s_r=Sk-Rk;
374                 arg=M_PI*(d_ik-Rk)/s_r;
375                 f_c_ik=0.5+0.5*cos(arg);
376                 df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik));
377                 zeta_ij+=f_c_ik*g;
378         }
379
380         /* store even more data for second k loop */
381         exchange->g[kcount]=g;
382         exchange->dg[kcount]=dg;
383         exchange->d_ik[kcount]=d_ik;
384         exchange->cos_theta[kcount]=cos_theta;
385         exchange->f_c_ik[kcount]=f_c_ik;
386         exchange->df_c_ik[kcount]=df_c_ik;
387
388         /* increase k counter */
389         kcount++;
390
391 #ifdef STATIC_LISTS
392                                         }
393 #elif LOWMEM_LISTS
394                                         }
395 #else
396                                         } while(list_next_f(that)!=\
397                                                 L_NO_NEXT_ELEMENT);
398 #endif
399
400                                 }
401
402 /* j2 func here ... */
403
404
405         if(brand_i==jtom->brand) {
406                 S=params->S[brand_i];
407                 R=params->R[brand_i];
408                 B=params->B[brand_i];
409                 A=params->A[brand_i];
410                 r0=params->r0[brand_i];
411                 mu=params->mu[brand_i];
412                 lambda=params->lambda[brand_i];
413         }
414         else {
415                 S=params->Smixed;
416                 R=params->Rmixed;
417                 B=params->Bmixed;
418                 A=params->Amixed;
419                 r0=params->r0_mixed;
420                 mu=params->mu_m;
421                 lambda=params->lambda_m;
422         }
423
424         /* f_c, df_c */
425         if(d_ij<R) {
426                 f_c=1.0;
427                 df_c=0.0;
428         }
429         else {
430                 s_r=S-R;
431                 arg=M_PI*(d_ij-R)/s_r;
432                 f_c=0.5+0.5*cos(arg);
433                 df_c=0.5*sin(arg)*(M_PI/(s_r*d_ij));
434         }
435
436         /* f_a, df_a */
437         f_a=-B*exp(-mu*(d_ij-r0));
438         df_a=mu*f_a/d_ij;
439
440         /* f_r, df_r */
441         f_r=A*exp(-lambda*(d_ij-r0));
442         df_r=lambda*f_r/d_ij;
443
444         /* b, db */
445         if(zeta_ij==0.0) {
446                 b=1.0;
447                 db=0.0;
448         }
449         else {
450                 b=1.0/sqrt(1.0+zeta_ij);
451                 db=-0.5*b/(1.0+zeta_ij);
452         }
453
454         /* force contribution for atom i */
455         scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
456         v3_scale(&force,&(dist_ij),scale);
457         v3_add(&(ai->f),&(ai->f),&force);
458
459         /* force contribution for atom j */
460         v3_scale(&force,&force,-1.0); // dri rij = - drj rij
461         v3_add(&(jtom->f),&(jtom->f),&force);
462
463         /* virial */
464         virial_calc(ai,&force,&(dist_ij));
465
466 #ifdef DEBUG
467 if(moldyn->time>DSTART&&moldyn->time<DEND) {
468         if((ai==&(moldyn->atom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) {
469                 printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag);
470                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
471                 if(ai==&(moldyn->atom[0]))
472                         printf("  total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z);
473                 if(jtom==&(moldyn->atom[0]))
474                         printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
475                 printf("  energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r),
476                                                     f_c,b,f_a,f_r);
477                 printf("          %f %f %f\n",zeta_ij,.0,.0);
478         }
479 }
480 #endif
481
482         /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */
483         pre_dzeta=0.5*f_a*f_c*db;
484
485         /* energy contribution */
486         energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
487         moldyn->energy+=energy;
488         ai->e+=energy;
489
490         /* reset k counter for second k loop */
491         kcount=0;
492                 
493
494                                 /* second loop over atoms k */
495                                 for(k=0;k<27;k++) {
496
497                                         bc_ik=(k<dnlc)?0:1;
498 #ifdef STATIC_LISTS
499                                         q=0;
500
501                                         while(neighbour_i[k][q]!=-1) {
502
503                                                 ktom=&(itom[neighbour_i[k][q]]);
504                                                 q++;
505 #elif LOWMEM_LISTS
506                                         q=neighbour_i[k];
507
508                                         while(q!=-1) {
509
510                                                 ktom=&(itom[q]);
511                                                 q=lc->subcell->list[q];
512 #else
513                                         that=&(neighbour_i2[k]);
514                                         list_reset_f(that);
515                                         
516                                         if(that->start==NULL)
517                                                 continue;
518
519                                         do {
520                                                 ktom=that->current->data;
521 #endif
522
523                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
524                                                         continue;
525
526                                                 if(ktom==jtom)
527                                                         continue;
528
529                                                 if(ktom==&(itom[i]))
530                                                         continue;
531
532
533 /* k2 func here ... */
534 /* albe 3 body potential function (second k loop) */
535
536         if(kcount>ALBE_MAXN)
537                 printf("FATAL: neighbours!\n");
538
539         /* d_ik2 */
540         d_ik2=exchange->d_ik2[kcount];
541
542         if(brand_i==ktom->brand)
543                 Sk2=params->S2[brand_i];
544         else
545                 Sk2=params->S2mixed;
546
547         /* return if d_ik > S */
548         if(d_ik2>Sk2) {
549                 kcount++;
550                 continue;
551         }
552
553         /* dist_ik, d_ik */
554         dist_ik=exchange->dist_ik[kcount];
555         d_ik=exchange->d_ik[kcount];
556
557         /* f_c_ik, df_c_ik */
558         f_c_ik=exchange->f_c_ik[kcount];
559         df_c_ik=exchange->df_c_ik[kcount];
560
561         /* g, dg, cos_theta */
562         g=exchange->g[kcount];
563         dg=exchange->dg[kcount];
564         cos_theta=exchange->cos_theta[kcount];
565
566         /* cos_theta derivatives wrt j,k */
567         dijdik_inv=1.0/(d_ij*d_ik);
568         v3_scale(&dcosdrj,&dist_ik,dijdik_inv);         // j
569         v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2);
570         v3_add(&dcosdrj,&dcosdrj,&tmp);
571         v3_scale(&dcosdrk,&dist_ij,dijdik_inv);         // k
572         v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2);
573         v3_add(&dcosdrk,&dcosdrk,&tmp);
574
575         /* f_c_ik * dg, df_c_ik * g */
576         fcdg=f_c_ik*dg;
577         dfcg=df_c_ik*g;
578
579         /* derivative wrt j */
580         v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
581
582         /* force contribution */
583         v3_add(&(jtom->f),&(jtom->f),&force);
584
585 #ifdef DEBUG
586 if(moldyn->time>DSTART&&moldyn->time<DEND) {
587         if(jtom==&(moldyn->atom[DATOM])) {
588                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
589                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
590                 printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
591                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
592                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
593         }
594 }
595 #endif
596
597         /* virial */
598         virial_calc(ai,&force,&dist_ij);
599
600         /* force contribution to atom i */
601         v3_scale(&force,&force,-1.0);
602         v3_add(&(ai->f),&(ai->f),&force);
603
604         /* derivative wrt k */
605         v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
606         v3_scale(&tmp,&dcosdrk,fcdg);
607         v3_add(&force,&force,&tmp);
608         v3_scale(&force,&force,pre_dzeta);
609
610         /* force contribution */
611         v3_add(&(ktom->f),&(ktom->f),&force);
612
613 #ifdef DEBUG
614 if(moldyn->time>DSTART&&moldyn->time<DEND) {
615         if(ktom==&(moldyn->atom[DATOM])) {
616                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
617                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
618                 printf("  total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z);
619                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
620                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
621         }
622 }
623 #endif
624
625         /* virial */
626         virial_calc(ai,&force,&dist_ik);
627         
628         /* force contribution to atom i */
629         v3_scale(&force,&force,-1.0);
630         v3_add(&(ai->f),&(ai->f),&force);
631
632         /* increase k counter */
633         kcount++;       
634
635
636
637 #ifdef STATIC_LISTS
638                                         }
639 #elif LOWMEM_LISTS
640                                         }
641 #else
642                                         } while(list_next_f(that)!=\
643                                                 L_NO_NEXT_ELEMENT);
644 #endif
645
646                                 }
647                                 
648 #ifdef STATIC_LISTS
649                         }
650 #elif LOWMEM_LISTS
651                         }
652 #else
653                         } while(list_next_f(this)!=L_NO_NEXT_ELEMENT);
654 #endif
655                 
656                 }
657                 
658 #ifdef DEBUG
659         //printf("\n\n");
660 #endif
661 #ifdef VDEBUG
662         printf("\n\n");
663 #endif
664
665         }
666
667 #ifdef DEBUG
668         //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
669         if(moldyn->time>DSTART&&moldyn->time<DEND) {
670                 printf("force:\n");
671                 printf("  x: %0.40f\n",moldyn->atom[DATOM].f.x);
672                 printf("  y: %0.40f\n",moldyn->atom[DATOM].f.y);
673                 printf("  z: %0.40f\n",moldyn->atom[DATOM].f.z);
674         }
675 #endif
676
677         /* some postprocessing */
678 #ifdef PARALLEL
679         #pragma omp parallel for
680 #endif
681         for(i=0;i<count;i++) {
682                 /* calculate global virial */
683                 moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
684                 moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
685                 moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
686                 moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
687                 moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
688                 moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
689
690                 /* check forces regarding the given timestep */
691                 if(v3_norm(&(itom[i].f))>\
692                    0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
693                         printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
694                                i);
695         }
696
697         return 0;
698 }
699
700
701 #else // PTHREADS
702
703
704 typedef struct s_pft_data {
705         t_moldyn *moldyn;
706         int start,end;
707 } t_pft_data;
708
709 void *potential_force_thread(void *ptr) {
710
711         t_pft_data *pft_data;
712         t_moldyn *moldyn;
713         t_albe_exchange ec;
714
715         int i,j,k,count;
716         t_atom *itom,*jtom,*ktom;
717         t_linkcell *lc;
718 #ifdef STATIC_LISTS
719         int *neighbour_i[27];
720         int p,q;
721 #elif LOWMEM_LISTS
722         int neighbour_i[27];
723         int p,q;
724 #else
725         t_list neighbour_i[27];
726         t_list neighbour_i2[27];
727         t_list *this,*that;
728 #endif
729         u8 bc_ij,bc_ik;
730         int dnlc;
731
732         // needed to work
733         t_atom *ai;
734
735         // optimized
736         t_albe_mult_params *params;
737         t_albe_exchange *exchange;
738         t_3dvec dist_ij;
739         double d_ij2;
740         double d_ij;
741         u8 brand_i;
742         double S2;
743         int kcount;
744         double zeta_ij;
745         double pre_dzeta;
746
747         // more ...
748         double Rk,Sk,Sk2;
749         t_3dvec dist_ik;
750         double d_ik2,d_ik;
751         double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg;
752         double f_c_ik,df_c_ik;
753
754         t_3dvec force;
755         double f_a,df_a,b,db,f_c,df_c;
756         double f_r,df_r;
757         double scale;
758         double mu,B;
759         double lambda,A;
760         double r0;
761         double S,R;
762         double energy;
763
764         double dijdik_inv,fcdg,dfcg;
765         t_3dvec dcosdrj,dcosdrk;
766         t_3dvec tmp;
767
768         // even more ...
769         double gamma_i;
770         double c_i;
771         double d_i;
772         double h_i;
773         double ci2;
774         double di2;
775         double ci2di2;
776
777         pft_data=ptr;
778         moldyn=pft_data->moldyn;
779         exchange=&ec;
780
781         count=moldyn->count;
782         itom=moldyn->atom;
783         lc=&(moldyn->lc);
784
785         // optimized
786         params=moldyn->pot_params;
787
788         /* get energy, force and virial for atoms */
789
790         for(i=pft_data->start;i<pft_data->end;i++) {
791
792                 if(!(itom[i].attr&ATOM_ATTR_3BP))
793                         return 0;
794
795                 link_cell_neighbour_index(moldyn,
796                                           (itom[i].r.x+moldyn->dim.x/2)/lc->x,
797                                           (itom[i].r.y+moldyn->dim.y/2)/lc->y,
798                                           (itom[i].r.z+moldyn->dim.z/2)/lc->z,
799                                           neighbour_i);
800
801                 dnlc=lc->dnlc;
802
803                 /* copy the neighbour lists */
804 #ifdef STATIC_LISTS
805 #elif LOWMEM_LISTS
806 #else
807                 memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list));
808 #endif
809
810                 ai=&(itom[i]);
811                 brand_i=ai->brand;
812
813                 /* loop over atoms j */
814                 for(j=0;j<27;j++) {
815
816                         bc_ij=(j<dnlc)?0:1;
817 #ifdef STATIC_LISTS
818                         p=0;
819
820                         while(neighbour_i[j][p]!=-1) {
821
822                                 jtom=&(itom[neighbour_i[j][p]]);
823                                 p++;
824 #elif LOWMEM_LISTS
825                         p=neighbour_i[j];
826
827                         while(p!=-1) {
828
829                                 jtom=&(itom[p]);
830                                 p=lc->subcell->list[p];
831 #else
832                         this=&(neighbour_i[j]);
833                         list_reset_f(this);
834
835                         if(this->start==NULL)
836                                 continue;
837
838                         do {
839
840                                 jtom=this->current->data;
841 #endif
842
843                                 if(jtom==&(itom[i]))
844                                         continue;
845
846                                 if(!(jtom->attr&ATOM_ATTR_3BP))
847                                         continue;
848
849                                 /* reset 3bp run */
850                                 moldyn->run3bp=1;
851
852
853 /* j1 func here ... */
854 /* albe 3 body potential function (first ij loop) */
855
856         /* reset zeta sum */
857         zeta_ij=0.0;
858
859         /*
860          * set ij depending values
861          */
862
863         if(brand_i==jtom->brand) {
864                 S2=params->S2[brand_i];
865         }
866         else {
867                 S2=params->S2mixed;
868         }
869
870         /* dist_ij, d_ij2 */
871         v3_sub(&dist_ij,&(jtom->r),&(ai->r));
872         if(bc_ij) check_per_bound(moldyn,&dist_ij);
873         d_ij2=v3_absolute_square(&dist_ij);
874
875         /* if d_ij2 > S2 => no force & potential energy contribution */
876         if(d_ij2>S2)
877                 continue;
878
879         /* d_ij */
880         d_ij=sqrt(d_ij2);
881
882         /* reset k counter for first k loop */
883         kcount=0;
884
885                                 /* first loop over atoms k */
886                                 for(k=0;k<27;k++) {
887
888                                         bc_ik=(k<dnlc)?0:1;
889 #ifdef STATIC_LISTS
890                                         q=0;
891
892                                         while(neighbour_i[k][q]!=-1) {
893
894                                                 ktom=&(itom[neighbour_i[k][q]]);
895                                                 q++;
896 #elif LOWMEM_LISTS
897                                         q=neighbour_i[k];
898
899                                         while(q!=-1) {
900
901                                                 ktom=&(itom[q]);
902                                                 q=lc->subcell->list[q];
903 #else
904                                         that=&(neighbour_i2[k]);
905                                         list_reset_f(that);
906                                         
907                                         if(that->start==NULL)
908                                                 continue;
909
910                                         do {
911                                                 ktom=that->current->data;
912 #endif
913
914                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
915                                                         continue;
916
917                                                 if(ktom==jtom)
918                                                         continue;
919
920                                                 if(ktom==&(itom[i]))
921                                                         continue;
922
923 /* k1 func here ... */
924 /* albe 3 body potential function (first k loop) */
925
926         if(kcount>ALBE_MAXN) {
927                 printf("FATAL: neighbours = %d\n",kcount);
928                 printf("  -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag);
929         }
930
931         /* ik constants */
932         if(brand_i==ktom->brand) {
933                 Rk=params->R[brand_i];
934                 Sk=params->S[brand_i];
935                 Sk2=params->S2[brand_i];
936                 /* albe needs i,k depending c,d,h and gamma values */
937                 gamma_i=params->gamma[brand_i];
938                 c_i=params->c[brand_i];
939                 d_i=params->d[brand_i];
940                 h_i=params->h[brand_i];
941                 ci2=params->c2[brand_i];
942                 di2=params->d2[brand_i];
943                 ci2di2=params->c2d2[brand_i];
944         }
945         else {
946                 Rk=params->Rmixed;
947                 Sk=params->Smixed;
948                 Sk2=params->S2mixed;
949                 /* albe needs i,k depending c,d,h and gamma values */
950                 gamma_i=params->gamma_m;
951                 c_i=params->c_mixed;
952                 d_i=params->d_mixed;
953                 h_i=params->h_mixed;
954                 ci2=params->c2_mixed;
955                 di2=params->d2_mixed;
956                 ci2di2=params->c2d2_m;
957         }
958
959         /* dist_ik, d_ik2 */
960         v3_sub(&dist_ik,&(ktom->r),&(ai->r));
961         if(bc_ik) check_per_bound(moldyn,&dist_ik);
962         d_ik2=v3_absolute_square(&dist_ik);
963
964         /* store data for second k loop */
965         exchange->dist_ik[kcount]=dist_ik;
966         exchange->d_ik2[kcount]=d_ik2;
967
968         /* return if not within cutoff */
969         if(d_ik2>Sk2) {
970                 kcount++;
971                 continue;
972         }
973
974         /* d_ik */
975         d_ik=sqrt(d_ik2);
976
977         /* cos theta */
978         cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik);
979
980         /* g_ijk 
981         h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism
982         d2_h_cos2=exchange->di2+(h_cos*h_cos);
983         frac=exchange->ci2/d2_h_cos2;
984         g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac);
985         dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f..
986         */
987
988         h_cos=h_i+cos_theta; // + in albe formalism
989         d2_h_cos2=di2+(h_cos*h_cos);
990         frac=ci2/d2_h_cos2;
991         g=gamma_i*(1.0+ci2di2-frac);
992         dg=2.0*frac*gamma_i*h_cos/d2_h_cos2; // + in albe f..
993
994         /* zeta sum += f_c_ik * g_ijk */
995         if(d_ik<=Rk) {
996                 zeta_ij+=g;
997                 f_c_ik=1.0;
998                 df_c_ik=0.0;
999         }
1000         else {
1001                 s_r=Sk-Rk;
1002                 arg=M_PI*(d_ik-Rk)/s_r;
1003                 f_c_ik=0.5+0.5*cos(arg);
1004                 df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik));
1005                 zeta_ij+=f_c_ik*g;
1006         }
1007
1008         /* store even more data for second k loop */
1009         exchange->g[kcount]=g;
1010         exchange->dg[kcount]=dg;
1011         exchange->d_ik[kcount]=d_ik;
1012         exchange->cos_theta[kcount]=cos_theta;
1013         exchange->f_c_ik[kcount]=f_c_ik;
1014         exchange->df_c_ik[kcount]=df_c_ik;
1015
1016         /* increase k counter */
1017         kcount++;
1018
1019 #ifdef STATIC_LISTS
1020                                         }
1021 #elif LOWMEM_LISTS
1022                                         }
1023 #else
1024                                         } while(list_next_f(that)!=\
1025                                                 L_NO_NEXT_ELEMENT);
1026 #endif
1027
1028                                 }
1029
1030 /* j2 func here ... */
1031
1032
1033         if(brand_i==jtom->brand) {
1034                 S=params->S[brand_i];
1035                 R=params->R[brand_i];
1036                 B=params->B[brand_i];
1037                 A=params->A[brand_i];
1038                 r0=params->r0[brand_i];
1039                 mu=params->mu[brand_i];
1040                 lambda=params->lambda[brand_i];
1041         }
1042         else {
1043                 S=params->Smixed;
1044                 R=params->Rmixed;
1045                 B=params->Bmixed;
1046                 A=params->Amixed;
1047                 r0=params->r0_mixed;
1048                 mu=params->mu_m;
1049                 lambda=params->lambda_m;
1050         }
1051
1052         /* f_c, df_c */
1053         if(d_ij<R) {
1054                 f_c=1.0;
1055                 df_c=0.0;
1056         }
1057         else {
1058                 s_r=S-R;
1059                 arg=M_PI*(d_ij-R)/s_r;
1060                 f_c=0.5+0.5*cos(arg);
1061                 df_c=0.5*sin(arg)*(M_PI/(s_r*d_ij));
1062         }
1063
1064         /* f_a, df_a */
1065         f_a=-B*exp(-mu*(d_ij-r0));
1066         df_a=mu*f_a/d_ij;
1067
1068         /* f_r, df_r */
1069         f_r=A*exp(-lambda*(d_ij-r0));
1070         df_r=lambda*f_r/d_ij;
1071
1072         /* b, db */
1073         if(zeta_ij==0.0) {
1074                 b=1.0;
1075                 db=0.0;
1076         }
1077         else {
1078                 b=1.0/sqrt(1.0+zeta_ij);
1079                 db=-0.5*b/(1.0+zeta_ij);
1080         }
1081
1082         /* force contribution for atom i */
1083         scale=-0.5*(f_c*(df_r-b*df_a)+df_c*(f_r-b*f_a)); // - in albe formalism
1084         v3_scale(&force,&(dist_ij),scale);
1085         pthread_mutex_lock(&(amutex[ai->tag])); 
1086         v3_add(&(ai->f),&(ai->f),&force);
1087         pthread_mutex_unlock(&(amutex[ai->tag]));       
1088
1089         /* force contribution for atom j */
1090         v3_scale(&force,&force,-1.0); // dri rij = - drj rij
1091         pthread_mutex_lock(&(amutex[jtom->tag]));       
1092         v3_add(&(jtom->f),&(jtom->f),&force);
1093         pthread_mutex_unlock(&(amutex[jtom->tag]));     
1094
1095         /* virial */
1096         pthread_mutex_lock(&(amutex[ai->tag])); 
1097         virial_calc(ai,&force,&(dist_ij));
1098         pthread_mutex_unlock(&(amutex[ai->tag]));       
1099
1100 #ifdef DEBUG
1101 if(moldyn->time>DSTART&&moldyn->time<DEND) {
1102         if((ai==&(moldyn->atom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) {
1103                 printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag);
1104                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
1105                 if(ai==&(moldyn->atom[0]))
1106                         printf("  total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z);
1107                 if(jtom==&(moldyn->atom[0]))
1108                         printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
1109                 printf("  energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r),
1110                                                     f_c,b,f_a,f_r);
1111                 printf("          %f %f %f\n",zeta_ij,.0,.0);
1112         }
1113 }
1114 #endif
1115
1116         /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */
1117         pre_dzeta=0.5*f_a*f_c*db;
1118
1119         /* energy contribution */
1120         energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism
1121         pthread_mutex_lock(&emutex);
1122         moldyn->energy+=energy;
1123         pthread_mutex_unlock(&emutex);
1124         pthread_mutex_lock(&(amutex[ai->tag])); 
1125         ai->e+=energy;
1126         pthread_mutex_unlock(&(amutex[ai->tag]));       
1127
1128         /* reset k counter for second k loop */
1129         kcount=0;
1130                 
1131
1132                                 /* second loop over atoms k */
1133                                 for(k=0;k<27;k++) {
1134
1135                                         bc_ik=(k<dnlc)?0:1;
1136 #ifdef STATIC_LISTS
1137                                         q=0;
1138
1139                                         while(neighbour_i[k][q]!=-1) {
1140
1141                                                 ktom=&(itom[neighbour_i[k][q]]);
1142                                                 q++;
1143 #elif LOWMEM_LISTS
1144                                         q=neighbour_i[k];
1145
1146                                         while(q!=-1) {
1147
1148                                                 ktom=&(itom[q]);
1149                                                 q=lc->subcell->list[q];
1150 #else
1151                                         that=&(neighbour_i2[k]);
1152                                         list_reset_f(that);
1153                                         
1154                                         if(that->start==NULL)
1155                                                 continue;
1156
1157                                         do {
1158                                                 ktom=that->current->data;
1159 #endif
1160
1161                                                 if(!(ktom->attr&ATOM_ATTR_3BP))
1162                                                         continue;
1163
1164                                                 if(ktom==jtom)
1165                                                         continue;
1166
1167                                                 if(ktom==&(itom[i]))
1168                                                         continue;
1169
1170
1171 /* k2 func here ... */
1172 /* albe 3 body potential function (second k loop) */
1173
1174         if(kcount>ALBE_MAXN)
1175                 printf("FATAL: neighbours!\n");
1176
1177         /* d_ik2 */
1178         d_ik2=exchange->d_ik2[kcount];
1179
1180         if(brand_i==ktom->brand)
1181                 Sk2=params->S2[brand_i];
1182         else
1183                 Sk2=params->S2mixed;
1184
1185         /* return if d_ik > S */
1186         if(d_ik2>Sk2) {
1187                 kcount++;
1188                 continue;
1189         }
1190
1191         /* dist_ik, d_ik */
1192         dist_ik=exchange->dist_ik[kcount];
1193         d_ik=exchange->d_ik[kcount];
1194
1195         /* f_c_ik, df_c_ik */
1196         f_c_ik=exchange->f_c_ik[kcount];
1197         df_c_ik=exchange->df_c_ik[kcount];
1198
1199         /* g, dg, cos_theta */
1200         g=exchange->g[kcount];
1201         dg=exchange->dg[kcount];
1202         cos_theta=exchange->cos_theta[kcount];
1203
1204         /* cos_theta derivatives wrt j,k */
1205         dijdik_inv=1.0/(d_ij*d_ik);
1206         v3_scale(&dcosdrj,&dist_ik,dijdik_inv);         // j
1207         v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2);
1208         v3_add(&dcosdrj,&dcosdrj,&tmp);
1209         v3_scale(&dcosdrk,&dist_ij,dijdik_inv);         // k
1210         v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2);
1211         v3_add(&dcosdrk,&dcosdrk,&tmp);
1212
1213         /* f_c_ik * dg, df_c_ik * g */
1214         fcdg=f_c_ik*dg;
1215         dfcg=df_c_ik*g;
1216
1217         /* derivative wrt j */
1218         v3_scale(&force,&dcosdrj,fcdg*pre_dzeta);
1219
1220         /* force contribution */
1221         pthread_mutex_lock(&(amutex[jtom->tag]));       
1222         v3_add(&(jtom->f),&(jtom->f),&force);
1223         pthread_mutex_unlock(&(amutex[jtom->tag]));     
1224
1225 #ifdef DEBUG
1226 if(moldyn->time>DSTART&&moldyn->time<DEND) {
1227         if(jtom==&(moldyn->atom[DATOM])) {
1228                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
1229                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
1230                 printf("  total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z);
1231                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
1232                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
1233         }
1234 }
1235 #endif
1236
1237         /* virial */
1238         pthread_mutex_lock(&(amutex[ai->tag])); 
1239         virial_calc(ai,&force,&dist_ij);
1240
1241         /* force contribution to atom i */
1242         v3_scale(&force,&force,-1.0);
1243         v3_add(&(ai->f),&(ai->f),&force);
1244         pthread_mutex_unlock(&(amutex[ai->tag]));       
1245
1246         /* derivative wrt k */
1247         v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik
1248         v3_scale(&tmp,&dcosdrk,fcdg);
1249         v3_add(&force,&force,&tmp);
1250         v3_scale(&force,&force,pre_dzeta);
1251
1252         /* force contribution */
1253         pthread_mutex_lock(&(amutex[ktom->tag]));       
1254         v3_add(&(ktom->f),&(ktom->f),&force);
1255         pthread_mutex_unlock(&(amutex[ktom->tag]));     
1256
1257 #ifdef DEBUG
1258 if(moldyn->time>DSTART&&moldyn->time<DEND) {
1259         if(ktom==&(moldyn->atom[DATOM])) {
1260                 printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag);
1261                 printf("  adding %f %f %f\n",force.x,force.y,force.z);
1262                 printf("  total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z);
1263                 printf("  angle: %f\n",acos(cos_theta)*360.0/(2*M_PI));
1264                 printf("    d ij ik = %f %f\n",d_ij,d_ik);
1265         }
1266 }
1267 #endif
1268
1269         /* virial */
1270         pthread_mutex_lock(&(amutex[ai->tag])); 
1271         virial_calc(ai,&force,&dist_ik);
1272         
1273         /* force contribution to atom i */
1274         v3_scale(&force,&force,-1.0);
1275         v3_add(&(ai->f),&(ai->f),&force);
1276         pthread_mutex_unlock(&(amutex[ai->tag]));       
1277
1278         /* increase k counter */
1279         kcount++;       
1280
1281
1282
1283 #ifdef STATIC_LISTS
1284                                         }
1285 #elif LOWMEM_LISTS
1286                                         }
1287 #else
1288                                         } while(list_next_f(that)!=\
1289                                                 L_NO_NEXT_ELEMENT);
1290 #endif
1291
1292                                 }
1293                                 
1294 #ifdef STATIC_LISTS
1295                         }
1296 #elif LOWMEM_LISTS
1297                         }
1298 #else
1299                         } while(list_next_f(this)!=L_NO_NEXT_ELEMENT);
1300 #endif
1301                 
1302                 }
1303
1304         } // i loop
1305                 
1306 #ifdef DEBUG
1307         //printf("\n\n");
1308 #endif
1309 #ifdef VDEBUG
1310         printf("\n\n");
1311 #endif
1312
1313 #ifdef DEBUG
1314         //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z);
1315         if(moldyn->time>DSTART&&moldyn->time<DEND) {
1316                 printf("force:\n");
1317                 printf("  x: %0.40f\n",moldyn->atom[DATOM].f.x);
1318                 printf("  y: %0.40f\n",moldyn->atom[DATOM].f.y);
1319                 printf("  z: %0.40f\n",moldyn->atom[DATOM].f.z);
1320         }
1321 #endif
1322
1323         pthread_exit(NULL);
1324
1325         return 0;
1326 }
1327
1328 int albe_potential_force_calc(t_moldyn *moldyn) {
1329
1330         int i,j,ret;
1331         t_pft_data pft_data[MAX_THREADS];
1332         int count;
1333         pthread_t pft_thread[MAX_THREADS];
1334         t_atom *itom;
1335         t_virial *virial;
1336
1337         count=moldyn->count;
1338         itom=moldyn->atom;
1339
1340         /* reset energy */
1341         moldyn->energy=0.0;
1342
1343         /* reset global virial */
1344         memset(&(moldyn->gvir),0,sizeof(t_virial));
1345
1346         /* reset force, site energy and virial of every atom */
1347         for(i=0;i<count;i++) {
1348
1349                 /* reset force */
1350                 v3_zero(&(itom[i].f));
1351
1352                 /* reset virial */
1353                 virial=&(itom[i].virial);
1354                 virial->xx=0.0;
1355                 virial->yy=0.0;
1356                 virial->zz=0.0;
1357                 virial->xy=0.0;
1358                 virial->xz=0.0;
1359                 virial->yz=0.0;
1360         
1361                 /* reset site energy */
1362                 itom[i].e=0.0;
1363
1364         }
1365
1366         /* start threads */
1367         for(j=0;j<MAX_THREADS;j++) {
1368
1369                 /* prepare thread data */
1370                 pft_data[j].moldyn=moldyn;
1371                 pft_data[j].start=j*count/MAX_THREADS;
1372                 if(j==MAX_THREADS-1) {
1373                         pft_data[j].end=count;
1374                 }
1375                 else {
1376                         pft_data[j].end=pft_data[j].start;
1377                         pft_data[j].end+=count/MAX_THREADS;
1378                 }
1379
1380                 ret=pthread_create(&(pft_thread[j]),NULL,
1381                                    potential_force_thread,
1382                                    &(pft_data[j]));
1383                 if(ret)  {
1384                         perror("[albe fast] pf thread create");
1385                         return ret;
1386                 }
1387         }
1388
1389         /* join threads */
1390         for(j=0;j<MAX_THREADS;j++) {
1391
1392                 ret=pthread_join(pft_thread[j],NULL);
1393                 if(ret) {
1394                         perror("[albe fast] pf thread join");
1395                         return ret;
1396                 }
1397         }
1398
1399         /* some postprocessing */
1400         for(i=0;i<count;i++) {
1401                 /* calculate global virial */
1402                 moldyn->gvir.xx+=itom[i].r.x*itom[i].f.x;
1403                 moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y;
1404                 moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z;
1405                 moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x;
1406                 moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x;
1407                 moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y;
1408
1409                 /* check forces regarding the given timestep */
1410                 if(v3_norm(&(itom[i].f))>\
1411                    0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square)
1412                         printf("[moldyn] WARNING: pfc (high force: atom %d)\n",
1413                                i);
1414         }
1415
1416         return 0;
1417 }
1418
1419 #endif // PTHREADS