X-Git-Url: https://hackdaworld.org/gitweb/?p=physik%2Fposic.git;a=blobdiff_plain;f=potentials%2Falbe_fast.c;fp=potentials%2Falbe_fast.c;h=79162b1f31c62b2d95d01cded216e9a2ffc1584d;hp=0000000000000000000000000000000000000000;hb=6e6d7126ea9a845f11637d8e1b8eb2b570ac4dc9;hpb=97dc63eb6a519b8e1f4fbfaa9760dd94539436b0 diff --git a/potentials/albe_fast.c b/potentials/albe_fast.c new file mode 100644 index 0000000..79162b1 --- /dev/null +++ b/potentials/albe_fast.c @@ -0,0 +1,1460 @@ +/* + * test: albe_new.c + * + * author: Frank Zirkelbach + * + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef PARALLEL +#include +#endif + +#ifdef PTHREADS +#include +#define MAX_THREADS 2 +#endif + +#include "../moldyn.h" +#include "../math/math.h" +#include "albe.h" + +#ifdef PTHREADS +extern pthread_mutex_t *amutex; +extern pthread_mutex_t emutex; +#endif + +/* + * virial calculation + */ + +#define albe_v_calc(a,f,d) (a)->virial.xx+=(f)->x*(d)->x; \ + (a)->virial.yy+=(f)->y*(d)->y; \ + (a)->virial.zz+=(f)->z*(d)->z; \ + (a)->virial.xy+=(f)->x*(d)->y; \ + (a)->virial.xz+=(f)->x*(d)->z; \ + (a)->virial.yz+=(f)->y*(d)->z + +#ifndef PTHREADS + +int albe_potential_force_calc(t_moldyn *moldyn) { + + int i,j,k,count; + t_atom *itom,*jtom,*ktom; + t_virial *virial; + t_linkcell *lc; +#ifdef STATIC_LISTS + int *neighbour_i[27]; + int p,q; +#elif LOWMEM_LISTS + int neighbour_i[27]; + int p,q; +#else + t_list neighbour_i[27]; + t_list neighbour_i2[27]; + t_list *this,*that; +#endif + u8 bc_ij,bc_ik; + int dnlc; +#ifdef PTHREADS + int ret; + t_kdata kdata[27]; + pthread_t kthread[27]; +#endif + + // needed to work + t_atom *ai; + + // optimized + t_albe_mult_params *params; + t_albe_exchange *exchange; + t_3dvec dist_ij; + double d_ij2; + double d_ij; + u8 brand_i; + double S2; + int kcount; + double zeta_ij; + double pre_dzeta; + + // more ... + double Rk,Sk,Sk2; + t_3dvec dist_ik; + double d_ik2,d_ik; + double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg; + double f_c_ik,df_c_ik; + + t_3dvec force; + double f_a,df_a,b,db,f_c,df_c; + double f_r,df_r; + double scale; + double mu,B; + double lambda,A; + double r0; + double S,R; + double energy; + + double dijdik_inv,fcdg,dfcg; + t_3dvec dcosdrj,dcosdrk; + t_3dvec tmp; + + // even more ... + double gamma_i; + double c_i; + double d_i; + double h_i; + double ci2; + double di2; + double ci2di2; + + count=moldyn->count; + itom=moldyn->atom; + lc=&(moldyn->lc); + + // optimized + params=moldyn->pot_params; + exchange=&(params->exchange); + + + /* reset energy */ + moldyn->energy=0.0; + + /* reset global virial */ + memset(&(moldyn->gvir),0,sizeof(t_virial)); + + /* reset force, site energy and virial of every atom */ +#ifdef PARALLEL + #pragma omp parallel for private(virial) +#endif + for(i=0;ixx=0.0; + virial->yy=0.0; + virial->zz=0.0; + virial->xy=0.0; + virial->xz=0.0; + virial->yz=0.0; + + /* reset site energy */ + itom[i].e=0.0; + + } + + /* get energy, force and virial of every atom */ + + /* first (and only) loop over atoms i */ + for(i=0;idim.x/2)/lc->x, + (itom[i].r.y+moldyn->dim.y/2)/lc->y, + (itom[i].r.z+moldyn->dim.z/2)/lc->z, + neighbour_i); + + dnlc=lc->dnlc; + + /* copy the neighbour lists */ +#ifdef STATIC_LISTS +#elif LOWMEM_LISTS +#else + memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list)); +#endif + + ai=&(itom[i]); + brand_i=ai->brand; + + /* loop over atoms j */ + for(j=0;j<27;j++) { + + bc_ij=(jsubcell->list[p]; +#else + this=&(neighbour_i[j]); + list_reset_f(this); + + if(this->start==NULL) + continue; + + do { + + jtom=this->current->data; +#endif + + if(jtom==&(itom[i])) + continue; + + if(!(jtom->attr&ATOM_ATTR_3BP)) + continue; + + /* reset 3bp run */ + moldyn->run3bp=1; + + +/* j1 func here ... */ +/* albe 3 body potential function (first ij loop) */ + + /* reset zeta sum */ + zeta_ij=0.0; + + /* + * set ij depending values + */ + + if(brand_i==jtom->brand) { + S2=params->S2[brand_i]; + } + else { + S2=params->S2mixed; + } + + /* dist_ij, d_ij2 */ + v3_sub(&dist_ij,&(jtom->r),&(ai->r)); + if(bc_ij) check_per_bound(moldyn,&dist_ij); + d_ij2=v3_absolute_square(&dist_ij); + + /* if d_ij2 > S2 => no force & potential energy contribution */ + if(d_ij2>S2) + continue; + + /* d_ij */ + d_ij=sqrt(d_ij2); + + /* reset k counter for first k loop */ + kcount=0; + + /* first loop over atoms k */ + for(k=0;k<27;k++) { + + bc_ik=(ksubcell->list[q]; +#else + that=&(neighbour_i2[k]); + list_reset_f(that); + + if(that->start==NULL) + continue; + + do { + ktom=that->current->data; +#endif + + if(!(ktom->attr&ATOM_ATTR_3BP)) + continue; + + if(ktom==jtom) + continue; + + if(ktom==&(itom[i])) + continue; + +/* k1 func here ... */ +/* albe 3 body potential function (first k loop) */ + + if(kcount>ALBE_MAXN) { + printf("FATAL: neighbours = %d\n",kcount); + printf(" -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag); + } + + /* ik constants */ + if(brand_i==ktom->brand) { + Rk=params->R[brand_i]; + Sk=params->S[brand_i]; + Sk2=params->S2[brand_i]; + /* albe needs i,k depending c,d,h and gamma values */ + gamma_i=params->gamma[brand_i]; + c_i=params->c[brand_i]; + d_i=params->d[brand_i]; + h_i=params->h[brand_i]; + ci2=params->c2[brand_i]; + di2=params->d2[brand_i]; + ci2di2=params->c2d2[brand_i]; + } + else { + Rk=params->Rmixed; + Sk=params->Smixed; + Sk2=params->S2mixed; + /* albe needs i,k depending c,d,h and gamma values */ + gamma_i=params->gamma_m; + c_i=params->c_mixed; + d_i=params->d_mixed; + h_i=params->h_mixed; + ci2=params->c2_mixed; + di2=params->d2_mixed; + ci2di2=params->c2d2_m; + } + + /* dist_ik, d_ik2 */ + v3_sub(&dist_ik,&(ktom->r),&(ai->r)); + if(bc_ik) check_per_bound(moldyn,&dist_ik); + d_ik2=v3_absolute_square(&dist_ik); + + /* store data for second k loop */ + exchange->dist_ik[kcount]=dist_ik; + exchange->d_ik2[kcount]=d_ik2; + + /* return if not within cutoff */ + if(d_ik2>Sk2) { + kcount++; + continue; + } + + /* d_ik */ + d_ik=sqrt(d_ik2); + + /* cos theta */ + cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik); + + /* g_ijk + h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism + d2_h_cos2=exchange->di2+(h_cos*h_cos); + frac=exchange->ci2/d2_h_cos2; + g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac); + dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f.. + */ + + h_cos=h_i+cos_theta; // + in albe formalism + d2_h_cos2=di2+(h_cos*h_cos); + frac=ci2/d2_h_cos2; + g=gamma_i*(1.0+ci2di2-frac); + dg=2.0*frac*gamma_i*h_cos/d2_h_cos2; // + in albe f.. + + /* zeta sum += f_c_ik * g_ijk */ + if(d_ik<=Rk) { + zeta_ij+=g; + f_c_ik=1.0; + df_c_ik=0.0; + } + else { + s_r=Sk-Rk; + arg=M_PI*(d_ik-Rk)/s_r; + f_c_ik=0.5+0.5*cos(arg); + df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik)); + zeta_ij+=f_c_ik*g; + } + + /* store even more data for second k loop */ + exchange->g[kcount]=g; + exchange->dg[kcount]=dg; + exchange->d_ik[kcount]=d_ik; + exchange->cos_theta[kcount]=cos_theta; + exchange->f_c_ik[kcount]=f_c_ik; + exchange->df_c_ik[kcount]=df_c_ik; + + /* increase k counter */ + kcount++; + +#ifdef STATIC_LISTS + } +#elif LOWMEM_LISTS + } +#else + } while(list_next_f(that)!=\ + L_NO_NEXT_ELEMENT); +#endif + + } + +/* j2 func here ... */ + + + if(brand_i==jtom->brand) { + S=params->S[brand_i]; + R=params->R[brand_i]; + B=params->B[brand_i]; + A=params->A[brand_i]; + r0=params->r0[brand_i]; + mu=params->mu[brand_i]; + lambda=params->lambda[brand_i]; + } + else { + S=params->Smixed; + R=params->Rmixed; + B=params->Bmixed; + A=params->Amixed; + r0=params->r0_mixed; + mu=params->mu_m; + lambda=params->lambda_m; + } + + /* f_c, df_c */ + if(d_ijf),&(ai->f),&force); + + /* force contribution for atom j */ + v3_scale(&force,&force,-1.0); // dri rij = - drj rij + v3_add(&(jtom->f),&(jtom->f),&force); + + /* virial */ + albe_v_calc(ai,&force,&(dist_ij)); + //virial_calc(ai,&force,&(dist_ij)); + +#ifdef DEBUG +if(moldyn->time>DSTART&&moldyn->timeatom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) { + printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag); + printf(" adding %f %f %f\n",force.x,force.y,force.z); + if(ai==&(moldyn->atom[0])) + printf(" total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z); + if(jtom==&(moldyn->atom[0])) + printf(" total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z); + printf(" energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r), + f_c,b,f_a,f_r); + printf(" %f %f %f\n",zeta_ij,.0,.0); + } +} +#endif + + /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */ + pre_dzeta=0.5*f_a*f_c*db; + + /* energy contribution */ + energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism + moldyn->energy+=energy; + ai->e+=energy; + + /* reset k counter for second k loop */ + kcount=0; + + + /* second loop over atoms k */ + for(k=0;k<27;k++) { + + bc_ik=(ksubcell->list[q]; +#else + that=&(neighbour_i2[k]); + list_reset_f(that); + + if(that->start==NULL) + continue; + + do { + ktom=that->current->data; +#endif + + if(!(ktom->attr&ATOM_ATTR_3BP)) + continue; + + if(ktom==jtom) + continue; + + if(ktom==&(itom[i])) + continue; + + +/* k2 func here ... */ +/* albe 3 body potential function (second k loop) */ + + if(kcount>ALBE_MAXN) + printf("FATAL: neighbours!\n"); + + /* d_ik2 */ + d_ik2=exchange->d_ik2[kcount]; + + if(brand_i==ktom->brand) + Sk2=params->S2[brand_i]; + else + Sk2=params->S2mixed; + + /* return if d_ik > S */ + if(d_ik2>Sk2) { + kcount++; + continue; + } + + /* dist_ik, d_ik */ + dist_ik=exchange->dist_ik[kcount]; + d_ik=exchange->d_ik[kcount]; + + /* f_c_ik, df_c_ik */ + f_c_ik=exchange->f_c_ik[kcount]; + df_c_ik=exchange->df_c_ik[kcount]; + + /* g, dg, cos_theta */ + g=exchange->g[kcount]; + dg=exchange->dg[kcount]; + cos_theta=exchange->cos_theta[kcount]; + + /* cos_theta derivatives wrt j,k */ + dijdik_inv=1.0/(d_ij*d_ik); + v3_scale(&dcosdrj,&dist_ik,dijdik_inv); // j + v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2); + v3_add(&dcosdrj,&dcosdrj,&tmp); + v3_scale(&dcosdrk,&dist_ij,dijdik_inv); // k + v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2); + v3_add(&dcosdrk,&dcosdrk,&tmp); + + /* f_c_ik * dg, df_c_ik * g */ + fcdg=f_c_ik*dg; + dfcg=df_c_ik*g; + + /* derivative wrt j */ + v3_scale(&force,&dcosdrj,fcdg*pre_dzeta); + + /* force contribution */ + v3_add(&(jtom->f),&(jtom->f),&force); + +#ifdef DEBUG +if(moldyn->time>DSTART&&moldyn->timeatom[DATOM])) { + printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag); + printf(" adding %f %f %f\n",force.x,force.y,force.z); + printf(" total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z); + printf(" angle: %f\n",acos(cos_theta)*360.0/(2*M_PI)); + printf(" d ij ik = %f %f\n",d_ij,d_ik); + } +} +#endif + + /* virial */ + albe_v_calc(ai,&force,&dist_ij); + //virial_calc(ai,&force,&dist_ij); + + /* force contribution to atom i */ + v3_scale(&force,&force,-1.0); + v3_add(&(ai->f),&(ai->f),&force); + + /* derivative wrt k */ +#ifdef MATTONI + v3_scale(&tmp,&dcosdrk,fcdg); + v3_scale(&force,&tmp,pre_dzeta); +#else + v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik + v3_scale(&tmp,&dcosdrk,fcdg); + v3_add(&force,&force,&tmp); + v3_scale(&force,&force,pre_dzeta); +#endif + + /* force contribution */ + v3_add(&(ktom->f),&(ktom->f),&force); + +#ifdef DEBUG +if(moldyn->time>DSTART&&moldyn->timeatom[DATOM])) { + printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag); + printf(" adding %f %f %f\n",force.x,force.y,force.z); + printf(" total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z); + printf(" angle: %f\n",acos(cos_theta)*360.0/(2*M_PI)); + printf(" d ij ik = %f %f\n",d_ij,d_ik); + } +} +#endif + + /* virial */ + albe_v_calc(ai,&force,&dist_ik); + //virial_calc(ai,&force,&dist_ik); + + /* force contribution to atom i */ + v3_scale(&force,&force,-1.0); + v3_add(&(ai->f),&(ai->f),&force); + + /* increase k counter */ + kcount++; + + + +#ifdef STATIC_LISTS + } +#elif LOWMEM_LISTS + } +#else + } while(list_next_f(that)!=\ + L_NO_NEXT_ELEMENT); +#endif + + } + +#ifdef STATIC_LISTS + } +#elif LOWMEM_LISTS + } +#else + } while(list_next_f(this)!=L_NO_NEXT_ELEMENT); +#endif + + } + +#ifdef DEBUG + //printf("\n\n"); +#endif +#ifdef VDEBUG + printf("\n\n"); +#endif + + } + +#ifdef DEBUG + //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z); + if(moldyn->time>DSTART&&moldyn->timeatom[DATOM].f.x); + printf(" y: %0.40f\n",moldyn->atom[DATOM].f.y); + printf(" z: %0.40f\n",moldyn->atom[DATOM].f.z); + } +#endif + + /* some postprocessing */ +#ifdef PARALLEL + #pragma omp parallel for +#endif + for(i=0;igvir.xx+=itom[i].r.x*itom[i].f.x; + moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y; + moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z; + moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x; + moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x; + moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y; + + /* check forces regarding the given timestep */ + if(v3_norm(&(itom[i].f))>\ + 0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square) + printf("[moldyn] WARNING: pfc (high force: atom %d)\n", + i); + } + + return 0; +} + + +#else // PTHREADS + + +typedef struct s_pft_data { + t_moldyn *moldyn; + int start,end; +} t_pft_data; + +void *potential_force_thread(void *ptr) { + + t_pft_data *pft_data; + t_moldyn *moldyn; + t_albe_exchange ec; + + int i,j,k,count; + t_atom *itom,*jtom,*ktom; + t_linkcell *lc; +#ifdef STATIC_LISTS + int *neighbour_i[27]; + int p,q; +#elif LOWMEM_LISTS + int neighbour_i[27]; + int p,q; +#else + t_list neighbour_i[27]; + t_list neighbour_i2[27]; + t_list *this,*that; +#endif + u8 bc_ij,bc_ik; + int dnlc; + + // needed to work + t_atom *ai; + + // optimized + t_albe_mult_params *params; + t_albe_exchange *exchange; + t_3dvec dist_ij; + double d_ij2; + double d_ij; + u8 brand_i; + double S2; + int kcount; + double zeta_ij; + double pre_dzeta; + + // more ... + double Rk,Sk,Sk2; + t_3dvec dist_ik; + double d_ik2,d_ik; + double cos_theta,h_cos,d2_h_cos2,frac,g,dg,s_r,arg; + double f_c_ik,df_c_ik; + + t_3dvec force; + double f_a,df_a,b,db,f_c,df_c; + double f_r,df_r; + double scale; + double mu,B; + double lambda,A; + double r0; + double S,R; + double energy; + + double dijdik_inv,fcdg,dfcg; + t_3dvec dcosdrj,dcosdrk; + t_3dvec tmp; + + // even more ... + double gamma_i; + double c_i; + double d_i; + double h_i; + double ci2; + double di2; + double ci2di2; + + pft_data=ptr; + moldyn=pft_data->moldyn; + exchange=&ec; + + count=moldyn->count; + itom=moldyn->atom; + lc=&(moldyn->lc); + + // optimized + params=moldyn->pot_params; + + /* get energy, force and virial for atoms */ + + for(i=pft_data->start;iend;i++) { + + if(!(itom[i].attr&ATOM_ATTR_3BP)) + return 0; + + // thread safe this way! + dnlc=link_cell_neighbour_index(moldyn, + (itom[i].r.x+moldyn->dim.x/2)/lc->x, + (itom[i].r.y+moldyn->dim.y/2)/lc->y, + (itom[i].r.z+moldyn->dim.z/2)/lc->z, + neighbour_i); + + /* copy the neighbour lists */ +#ifdef STATIC_LISTS +#elif LOWMEM_LISTS +#else + memcpy(neighbour_i2,neighbour_i,27*sizeof(t_list)); +#endif + + ai=&(itom[i]); + brand_i=ai->brand; + + /* loop over atoms j */ + for(j=0;j<27;j++) { + + bc_ij=(jsubcell->list[p]; // thread safe! +#else + this=&(neighbour_i[j]); + list_reset_f(this); + + if(this->start==NULL) + continue; + + do { + + jtom=this->current->data; +#endif + + if(jtom==&(itom[i])) + continue; + + if(!(jtom->attr&ATOM_ATTR_3BP)) + continue; + + /* reset 3bp run */ + moldyn->run3bp=1; + + +/* j1 func here ... */ +/* albe 3 body potential function (first ij loop) */ + + /* reset zeta sum */ + zeta_ij=0.0; + + /* + * set ij depending values + */ + + if(brand_i==jtom->brand) { + S2=params->S2[brand_i]; + } + else { + S2=params->S2mixed; + } + + /* dist_ij, d_ij2 */ + v3_sub(&dist_ij,&(jtom->r),&(ai->r)); + if(bc_ij) check_per_bound(moldyn,&dist_ij); + d_ij2=v3_absolute_square(&dist_ij); + + /* if d_ij2 > S2 => no force & potential energy contribution */ + if(d_ij2>S2) + continue; + + /* d_ij */ + d_ij=sqrt(d_ij2); + + /* reset k counter for first k loop */ + kcount=0; + + /* first loop over atoms k */ + for(k=0;k<27;k++) { + + bc_ik=(ksubcell->list[q]; +#else + that=&(neighbour_i2[k]); + list_reset_f(that); + + if(that->start==NULL) + continue; + + do { + ktom=that->current->data; +#endif + + if(!(ktom->attr&ATOM_ATTR_3BP)) + continue; + + if(ktom==jtom) + continue; + + if(ktom==&(itom[i])) + continue; + +/* k1 func here ... */ +/* albe 3 body potential function (first k loop) */ + + if(kcount>ALBE_MAXN) { + printf("FATAL: neighbours = %d\n",kcount); + printf(" -> %d %d %d\n",ai->tag,jtom->tag,ktom->tag); + } + + /* ik constants */ + if(brand_i==ktom->brand) { + Rk=params->R[brand_i]; + Sk=params->S[brand_i]; + Sk2=params->S2[brand_i]; + /* albe needs i,k depending c,d,h and gamma values */ + gamma_i=params->gamma[brand_i]; + c_i=params->c[brand_i]; + d_i=params->d[brand_i]; + h_i=params->h[brand_i]; + ci2=params->c2[brand_i]; + di2=params->d2[brand_i]; + ci2di2=params->c2d2[brand_i]; + } + else { + Rk=params->Rmixed; + Sk=params->Smixed; + Sk2=params->S2mixed; + /* albe needs i,k depending c,d,h and gamma values */ + gamma_i=params->gamma_m; + c_i=params->c_mixed; + d_i=params->d_mixed; + h_i=params->h_mixed; + ci2=params->c2_mixed; + di2=params->d2_mixed; + ci2di2=params->c2d2_m; + } + + /* dist_ik, d_ik2 */ + v3_sub(&dist_ik,&(ktom->r),&(ai->r)); + if(bc_ik) check_per_bound(moldyn,&dist_ik); + d_ik2=v3_absolute_square(&dist_ik); + + /* store data for second k loop */ + exchange->dist_ik[kcount]=dist_ik; + exchange->d_ik2[kcount]=d_ik2; + + /* return if not within cutoff */ + if(d_ik2>Sk2) { + kcount++; + continue; + } + + /* d_ik */ + d_ik=sqrt(d_ik2); + + /* cos theta */ + cos_theta=v3_scalar_product(&dist_ij,&dist_ik)/(d_ij*d_ik); + + /* g_ijk + h_cos=*(exchange->h_i)+cos_theta; // + in albe formalism + d2_h_cos2=exchange->di2+(h_cos*h_cos); + frac=exchange->ci2/d2_h_cos2; + g=*(exchange->gamma_i)*(1.0+exchange->ci2di2-frac); + dg=2.0*frac**(exchange->gamma_i)*h_cos/d2_h_cos2; // + in albe f.. + */ + + h_cos=h_i+cos_theta; // + in albe formalism + d2_h_cos2=di2+(h_cos*h_cos); + frac=ci2/d2_h_cos2; + g=gamma_i*(1.0+ci2di2-frac); + dg=2.0*frac*gamma_i*h_cos/d2_h_cos2; // + in albe f.. + + /* zeta sum += f_c_ik * g_ijk */ + if(d_ik<=Rk) { + zeta_ij+=g; + f_c_ik=1.0; + df_c_ik=0.0; + } + else { + s_r=Sk-Rk; + arg=M_PI*(d_ik-Rk)/s_r; + f_c_ik=0.5+0.5*cos(arg); + df_c_ik=0.5*sin(arg)*(M_PI/(s_r*d_ik)); + zeta_ij+=f_c_ik*g; + } + + /* store even more data for second k loop */ + exchange->g[kcount]=g; + exchange->dg[kcount]=dg; + exchange->d_ik[kcount]=d_ik; + exchange->cos_theta[kcount]=cos_theta; + exchange->f_c_ik[kcount]=f_c_ik; + exchange->df_c_ik[kcount]=df_c_ik; + + /* increase k counter */ + kcount++; + +#ifdef STATIC_LISTS + } +#elif LOWMEM_LISTS + } +#else + } while(list_next_f(that)!=\ + L_NO_NEXT_ELEMENT); +#endif + + } + +/* j2 func here ... */ + + + if(brand_i==jtom->brand) { + S=params->S[brand_i]; + R=params->R[brand_i]; + B=params->B[brand_i]; + A=params->A[brand_i]; + r0=params->r0[brand_i]; + mu=params->mu[brand_i]; + lambda=params->lambda[brand_i]; + } + else { + S=params->Smixed; + R=params->Rmixed; + B=params->Bmixed; + A=params->Amixed; + r0=params->r0_mixed; + mu=params->mu_m; + lambda=params->lambda_m; + } + + /* f_c, df_c */ + if(d_ijtag]))) + perror("[albe fast] mutex lock (1)\n"); + v3_add(&(ai->f),&(ai->f),&force); + if(pthread_mutex_unlock(&(amutex[ai->tag]))) + perror("[albe fast] mutex unlock (1)\n"); + + /* force contribution for atom j */ + v3_scale(&force,&force,-1.0); // dri rij = - drj rij + if(pthread_mutex_lock(&(amutex[jtom->tag]))) + perror("[albe fast] mutex lock (2)\n"); + v3_add(&(jtom->f),&(jtom->f),&force); + if(pthread_mutex_unlock(&(amutex[jtom->tag]))) + perror("[albe fast] mutex unlock (2)\n"); + + /* virial */ + if(pthread_mutex_lock(&(amutex[ai->tag]))) + perror("[albe fast] mutex lock (3)\n"); + albe_v_calc(ai,&force,&(dist_ij)); + //virial_calc(ai,&force,&(dist_ij)); + if(pthread_mutex_unlock(&(amutex[ai->tag]))) + perror("[albe fast] mutex unlock (3)\n"); + +#ifdef DEBUG +if(moldyn->time>DSTART&&moldyn->timeatom[DATOM]))|(jtom==&(moldyn->atom[DATOM]))) { + printf("force 3bp (j2): [%d %d sum]\n",ai->tag,jtom->tag); + printf(" adding %f %f %f\n",force.x,force.y,force.z); + if(ai==&(moldyn->atom[0])) + printf(" total i: %f %f %f\n",ai->f.x,ai->f.y,ai->f.z); + if(jtom==&(moldyn->atom[0])) + printf(" total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z); + printf(" energy: %f = %f %f %f %f\n",0.5*f_c*(b*f_a+f_r), + f_c,b,f_a,f_r); + printf(" %f %f %f\n",zeta_ij,.0,.0); + } +} +#endif + + /* dzeta prefactor = - f_c f_a db, (* -0.5 due to force calc) */ + pre_dzeta=0.5*f_a*f_c*db; + + /* energy contribution */ + energy=0.5*f_c*(f_r-b*f_a); // - in albe formalism + if(pthread_mutex_lock(&emutex)) + perror("[albe fast] mutex lock (energy)\n"); + moldyn->energy+=energy; + if(pthread_mutex_unlock(&emutex)) + perror("[albe fast] mutex unlock (energy)\n"); + if(pthread_mutex_lock(&(amutex[ai->tag]))) + perror("[albe fast] mutex lock (4)\n"); + ai->e+=energy; + if(pthread_mutex_unlock(&(amutex[ai->tag]))) + perror("[albe fast] mutex unlock (4)\n"); + + /* reset k counter for second k loop */ + kcount=0; + + + /* second loop over atoms k */ + for(k=0;k<27;k++) { + + bc_ik=(ksubcell->list[q]; +#else + that=&(neighbour_i2[k]); + list_reset_f(that); + + if(that->start==NULL) + continue; + + do { + ktom=that->current->data; +#endif + + if(!(ktom->attr&ATOM_ATTR_3BP)) + continue; + + if(ktom==jtom) + continue; + + if(ktom==&(itom[i])) + continue; + + +/* k2 func here ... */ +/* albe 3 body potential function (second k loop) */ + + if(kcount>ALBE_MAXN) + printf("FATAL: neighbours!\n"); + + /* d_ik2 */ + d_ik2=exchange->d_ik2[kcount]; + + if(brand_i==ktom->brand) + Sk2=params->S2[brand_i]; + else + Sk2=params->S2mixed; + + /* return if d_ik > S */ + if(d_ik2>Sk2) { + kcount++; + continue; + } + + /* dist_ik, d_ik */ + dist_ik=exchange->dist_ik[kcount]; + d_ik=exchange->d_ik[kcount]; + + /* f_c_ik, df_c_ik */ + f_c_ik=exchange->f_c_ik[kcount]; + df_c_ik=exchange->df_c_ik[kcount]; + + /* g, dg, cos_theta */ + g=exchange->g[kcount]; + dg=exchange->dg[kcount]; + cos_theta=exchange->cos_theta[kcount]; + + /* cos_theta derivatives wrt j,k */ + dijdik_inv=1.0/(d_ij*d_ik); + v3_scale(&dcosdrj,&dist_ik,dijdik_inv); // j + v3_scale(&tmp,&dist_ij,-cos_theta/d_ij2); + v3_add(&dcosdrj,&dcosdrj,&tmp); + v3_scale(&dcosdrk,&dist_ij,dijdik_inv); // k + v3_scale(&tmp,&dist_ik,-cos_theta/d_ik2); + v3_add(&dcosdrk,&dcosdrk,&tmp); + + /* f_c_ik * dg, df_c_ik * g */ + fcdg=f_c_ik*dg; + dfcg=df_c_ik*g; + + /* derivative wrt j */ + v3_scale(&force,&dcosdrj,fcdg*pre_dzeta); + + /* force contribution */ + if(pthread_mutex_lock(&(amutex[jtom->tag]))) + perror("[albe fast] mutex lock (5)\n"); + v3_add(&(jtom->f),&(jtom->f),&force); + if(pthread_mutex_unlock(&(amutex[jtom->tag]))) + perror("[albe fast] mutex unlock (5)\n"); + +#ifdef DEBUG +if(moldyn->time>DSTART&&moldyn->timeatom[DATOM])) { + printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag); + printf(" adding %f %f %f\n",force.x,force.y,force.z); + printf(" total j: %f %f %f\n",jtom->f.x,jtom->f.y,jtom->f.z); + printf(" angle: %f\n",acos(cos_theta)*360.0/(2*M_PI)); + printf(" d ij ik = %f %f\n",d_ij,d_ik); + } +} +#endif + + /* virial */ + if(pthread_mutex_lock(&(amutex[ai->tag]))) + perror("[albe fast] mutex lock (6)\n"); + albe_v_calc(ai,&force,&dist_ij); + //virial_calc(ai,&force,&dist_ij); + + /* force contribution to atom i */ + v3_scale(&force,&force,-1.0); + v3_add(&(ai->f),&(ai->f),&force); + if(pthread_mutex_unlock(&(amutex[ai->tag]))) + perror("[albe fast] mutex unlock (6)\n"); + + /* derivative wrt k */ +#ifdef MATTONI + v3_scale(&tmp,&dcosdrk,fcdg); + v3_scale(&force,&tmp,pre_dzeta); +#else + v3_scale(&force,&dist_ik,-1.0*dfcg); // dri rik = - drk rik + v3_scale(&tmp,&dcosdrk,fcdg); + v3_add(&force,&force,&tmp); + v3_scale(&force,&force,pre_dzeta); +#endif + + /* force contribution */ + if(pthread_mutex_lock(&(amutex[ktom->tag]))) + perror("[albe fast] mutex lock (7)\n"); + v3_add(&(ktom->f),&(ktom->f),&force); + if(pthread_mutex_unlock(&(amutex[ktom->tag]))) + perror("[albe fast] mutex unlock (7)\n"); + +#ifdef DEBUG +if(moldyn->time>DSTART&&moldyn->timeatom[DATOM])) { + printf("force 3bp (k2): [%d %d %d]\n",ai->tag,jtom->tag,ktom->tag); + printf(" adding %f %f %f\n",force.x,force.y,force.z); + printf(" total k: %f %f %f\n",ktom->f.x,ktom->f.y,ktom->f.z); + printf(" angle: %f\n",acos(cos_theta)*360.0/(2*M_PI)); + printf(" d ij ik = %f %f\n",d_ij,d_ik); + } +} +#endif + + /* virial */ + if(pthread_mutex_lock(&(amutex[ai->tag]))) + perror("[albe fast] mutex lock (8)\n"); + albe_v_calc(ai,&force,&dist_ik); + //virial_calc(ai,&force,&dist_ik); + + /* force contribution to atom i */ + v3_scale(&force,&force,-1.0); + v3_add(&(ai->f),&(ai->f),&force); + if(pthread_mutex_unlock(&(amutex[ai->tag]))) + perror("[albe fast] mutex unlock (8)\n"); + + /* increase k counter */ + kcount++; + + + +#ifdef STATIC_LISTS + } +#elif LOWMEM_LISTS + } +#else + } while(list_next_f(that)!=\ + L_NO_NEXT_ELEMENT); +#endif + + } + +#ifdef STATIC_LISTS + } +#elif LOWMEM_LISTS + } +#else + } while(list_next_f(this)!=L_NO_NEXT_ELEMENT); +#endif + + } + + } // i loop + +#ifdef DEBUG + //printf("\n\n"); +#endif +#ifdef VDEBUG + printf("\n\n"); +#endif + +#ifdef DEBUG + //printf("\nATOM 0: %f %f %f\n\n",itom->f.x,itom->f.y,itom->f.z); + if(moldyn->time>DSTART&&moldyn->timeatom[DATOM].f.x); + printf(" y: %0.40f\n",moldyn->atom[DATOM].f.y); + printf(" z: %0.40f\n",moldyn->atom[DATOM].f.z); + } +#endif + + pthread_exit(NULL); + + return 0; +} + +int albe_potential_force_calc(t_moldyn *moldyn) { + + int i,j,ret; + t_pft_data pft_data[MAX_THREADS]; + int count; + pthread_t pft_thread[MAX_THREADS]; + t_atom *itom; + t_virial *virial; + + count=moldyn->count; + itom=moldyn->atom; + + /* reset energy */ + moldyn->energy=0.0; + + /* reset global virial */ + memset(&(moldyn->gvir),0,sizeof(t_virial)); + + /* reset force, site energy and virial of every atom */ + for(i=0;ixx=0.0; + virial->yy=0.0; + virial->zz=0.0; + virial->xy=0.0; + virial->xz=0.0; + virial->yz=0.0; + + /* reset site energy */ + itom[i].e=0.0; + + } + + /* start threads */ + for(j=0;jgvir.xx+=itom[i].r.x*itom[i].f.x; + moldyn->gvir.yy+=itom[i].r.y*itom[i].f.y; + moldyn->gvir.zz+=itom[i].r.z*itom[i].f.z; + moldyn->gvir.xy+=itom[i].r.y*itom[i].f.x; + moldyn->gvir.xz+=itom[i].r.z*itom[i].f.x; + moldyn->gvir.yz+=itom[i].r.z*itom[i].f.y; + + /* check forces regarding the given timestep */ + if(v3_norm(&(itom[i].f))>\ + 0.1*moldyn->nnd*itom[i].mass/moldyn->tau_square) + printf("[moldyn] WARNING: pfc (high force: atom %d)\n", + i); + } + + return 0; +} + +#endif // PTHREADS