/* \make plb2diff

This utility automates diffusion (D) and conductivity (kappa) calculations.

07/2025: update to match new features of cook, small improvements
09/2019: bit of cleaning
05/2017: V3.0 compatible with cook V3.0a and newer, viscosity removed
         env.variable PLB2DIFF removed, synopsis changed
07/2011: env.variable PLB2DIFF==3: fit MSD(t) to a+bt+c/t^1/2
06/2005: unsuccessful attempt to extend to viscosity via Einstein relation
         (it is likely impossible)
10/2000: interface to new option -g of cook

Requirements:
~~~~~~~~~~~~~
  Cubic periodic b.c. (NOT TESTED with a general rectangular box)
  Stored configurations:
    either in a plb-file (SIMNAME.plb, generated by cook* - any version)
    or SIMNAME.1,SIMNAME.2,... (generated by cook* -r, NOT TESTED)
  Utility plot to view graphs
  cook V3.0a or later

Synopsis - call plb2diff without args!
~~~~~~~~

Results:

  SIMNAME.m0.dat, SIMNAME.m1.dat ... : results of linear regression of
    individual atom msd of all blocks.  The points (column 2 on the files)
    are the slopes with -o points from the block omitted, i.e. after
    dividing by dt.plb [[or dt.cfg]] the diffusion coefficients D.
    Note that the larger coverage, the more correlated data.
  SIMNAME.mdif.dat : these data should be very small because of momentum
    conservation!!!
  These files are plotted in the first window named "diffusion SIMNAME".
  Three curves of the same color correspond to linear regression error bars.

  SIMNAME.q0.dat, SIMNAME.q1.dat, as SIMNAME.m0.dat,... with charge weights,
    see mscd
  SIMNAME.cond.dat : total conductivities estimated from blocks (if -q given)

  These files are plotted in the 2nd window named "conductivity SIMNAME"

  SIMNAME.avm.dat : estimated diffusion coefficients (if -m# given)
    Column1 = species 0, last column = difm (should be around zero)
  This file (all columns) is plotted in the 3rd window "<msd(t) SIMNAME>"

  SIMNAME.avq.dat : estimated partial conductivities and conductivity (last
    column=cond) (if -q iven)
  This file (all columns) is plotted in the 4th window "<mscd(t) SIMNAME>"

  SIMNAME.cdf :
    1/ coverage = see above
    2/ block info
    3/ averages and error analysis over linear regression results of
       individual blocks (=data in SIMNAME.m0.dat, ..., SIMNAME.visc.dat).
       The errors should be estimated from the statistical analysis!!!
       NEW: Approximate error estimates (stdef*sqrt(coverage)) are given.
    4/ linear regression of averaged data (=files SIMNAME.av[mq].dat)
       NOTE: the `l.r.err' should NOT be used as the estimate of error
       because the data are correlated!!!

Problems:
  If difm is not around zero, a warning is printed.  It may be caused by:
  - too long dt.plb so that particles travel by more than L/2 between
    frames and the algorithm cannot follow the trajectory over
    periodic images.  A separate warning may be printed if jumps close
    to L/2 are found, these warnings can be safely ignored if difm
    oscillates uniformly around zero and is tiny
  - changes of parameters during simulation (like noint, dt.plb, tau.P,...)
  - a restart from crash (more frames in a plb-file)
  - wrong SIMNAME.cfg (only with tau.P)
  - wrong newt used instead of SIMNAME.cfg (only with tau.P)
  - wrong (shifted) convergence profile, .cp x .cpz clash (only with tau.P)

Optimization:
  Using 256 molecules of tip4pew water, 80ps blocks and 10ps omitted,
  3ns total length,  the following dependence of the relative error on
  the "true" coverage was obtained:
# coverage  error/stdev
  10        0.71471
   5        0.71683
   3.33333  0.72299
   2.5      0.73523
   2        0.74933
   1.66667  0.74498
   1.42857  0.79346
   1.25     0.82888
   1.11111  0.88120
   1        0.98133
   0.83333  1.13038
   0.625    1.25066
   0.5      1.39198
   0.4      1.58114
   1        1 (independent blocks)
It means that coverage around 3 gives about 0.73 times smaller error
  than if the same data were generated as independent blocks (i.e.,
  almost 0.73^2~0.5 CPU time is needed).
Efficiency inreases by marginal 1-2% if the coverage is increased to 5
  or more.

Bugs:
  multiplication factors are not calculated automatically and must be given
  by options -m, -q
*/

#include "ground.h"
#include "linregr.h"
#include "sdfit.h"
#include "cpmark.h"
#include "statics.h"
#include "maxjump.h"
#include <unistd.h>

FILE *sh;

void Setenv(char *name,char *value) /******************************** Setenv */
{
  if (setenv(name,value,1)) ERROR(("setenv(%s,%s,1)",name,value))
  fprintf(sh,"export %s=\"%s\"\n",name,getenv(name));
}

int plbno(char *fn) /************************************************* plbno */
{
  FILE *f=fopen(fn,"r");
  float h[2];
  int len;

  if (!f) ERROR(("cannot open %s",fn))
  if (fread(h,4,2,f)!=2) ERROR(("%s too short",fn))
  fseek(f,0,SEEK_END);

  len=ftell(f)/(long)(((int)h[0]+(h[1]<0.))*12);
  fclose(f);

  fprintf(stderr,"%d frames detected in %s\n",len,fn);

  return len;
}

int main(int narg,char **arg) /**************************************** main */
{
  int NO=0,START=0,NBLOCKS=0;
  double MFACTOR=0,QFACTOR=0,BLOCK=0.25,OMIT=0.25,erasetemp=1,boxfollow=0;
  int PLOT=15,NPT=0;
  int init,no,iblock,ncps[128],ncp,icp,pass,iarg,ir;
  FILE *get=NULL,**mf=NULL,**qf=NULL;
  float *r=NULL;
  double **mr=NULL,**qr=NULL,**xr;
  struct sum_s {
    double s,q;
    int n;
  } *summ,*sumq,*sum;
  double ERR;
  char *COOK=NULL,*SYSNAME=NULL,*SIMNAME=NULL,*fn,*cmd,*mode;
  static char plotm[1024]="plot ",plotq[1024]="plot ";
  double av=0,err,cov,COVERAGE=0;
  double msdb=0,mscdb=0;
  char *defaultopt="-m1 -y0 -w0 -r1";
  char *extraopt="",*geometry="800x600";
  static char progress[80]="-------------------------------------------------------------------------------";

  void (*MYAdd)(char *name,double weight,double X,double Y);
  double (*MYRes)(char *name,char *key);
  void (*MYFree)(void);

#if CHECKHEAP==2
  AllocRange=128;
#endif

  memset(ncps,0,sizeof(ncps));

  if (narg<2) {
    fprintf(stderr,"\
*** plb2diff V3.0, compatible with cook V3.0a and later ***\n\
\n\
  Difusivity and conductivity are calculated from stored SIMNAME.plb.\n\
  The trajectory is split into (generally overlapping) blocks, which are\n\
  re-read by COOK to calculate square displacements via Einstein formulas.\n\
  Averaging and statistics is finished by plb2diff.\n\
\n\
CALL BY:\n\
  plb2diff [OPTIONS] COOK SYSNAME SIMNAME [OPTIONS]\n\
\n\
ARGUMENTS:\n\
  COOK    cook version (or a script calling cook) for block calculations\n\
  SYSNAME refers to SYSNAME.ble (and SYSNAME.def if SIMNAME.def is missing)\n\
  SIMNAME simulation name of the original simulation (SIMNAME.*)\n\
\n\
OPTIONS (#=integer or real number):\n\
  -a\'COOK_OPTIONS\'\n\
       additional options appended after \'-y0 -w0 -m1 -r1\'\n\
       -a\'-a0\' is recommended for POLAR except Gaussian charges on springs\n\
  -b#  int: block size (# of frames) for 1 analysis\n\
       #<1: as a fraction of the full length given by -n#\n\
  -B#  number of blocks (the blocks may overlap), exclusive with -c\n\
  -c#  coverage of blocks over the trajectory [default=3]\n\
       for coverage<1 there are unused data\n\
       for coverage>1 the blocks overlap, data become correlated\n\
  -f#  fit Mean Square (Charge) Displacements (MSD, MSCD) to [2]\n\
         2=a+b*t, incl. error estimates [default]\n\
         3=a+b*t+c/sqrt(t) (hydrodynamic tail, no error estimates)\n\
  -F#  box.follow for cook [default=0]\n\
  -g#x# plot geometry (size without position) [800x600]\n\
  -k   keep all temporary files [default=remove]\n\
  -m#  calculate mass diffusivity by species [0=do not calculate]\n\
       #=factor to multiply, in cook program units ps,AA,K as follows:\n\
         NVT simulation: 1/dt.plb\n\
         NPT simulation: <V>^(2/3)/dt.plb  NB: use option -P\n\
  -n#  # of frames to read [default=0=whole SIMNAME.plb]\n\
  -o#  int: # of frames not used for linear regression at block beginning\n\
       #<1: in the units of block size (see -b) [0.25]\n\
       Hint: check the <X(t)> graphs for linear parts\n\
  -p#  launch plots, sum of bits [15=all]:\n\
         1=diffusivity/block as a function of block #\n\
         2=conductivity/block as a function of block #\n\
         4=MSD(t) (mean square displacement), averaged over blocks\n\
         8=MSCD(t) (mean square charge displacement), averaged over blocks\n\
       Plots of MSD,MSCD have a linear term substracted, with b exported;\n\
       for MSD of the 1st species, for MSCD of the sum (giving the bulk\n\
       conductivity). Use hotkeys b B ^B / * or button [b] to change b\n\
       The last column for MSD should be noisy zero (with b=0)\n\
  -P#  tau.P (or any nonzero) for NPT [default=0=NVT]\n\
  -q#  calculate conductivity [0=do not calculate]\n\
       #=factor to multiply, in cook program units ps,AA,K as follows:\n\
       NVT simulation: 1/(V*T*dt.plb)\n\
       NPT simulation: 1/(<V>^(1/3)*T*dt.plb)  NB: use option -P\n\
  -s#  # of frames skipped from start [0=none]\n\
       use #>0 if the beginning is unequilibrated\n\
\n\
INPUT FILES (as used and written by simulation COOK SYSNAME SIMNAME)\n\
  SIMNAME.plb, SIMNAME.cfg, SYSNAME.ble, SYSNAME.def or SIMNAME.def\n\
\n\
OUTPUT FILES:\n\
  SIMNAME~d#.get : temporary file created by plb2diff for cook -m1\n\
                   # = 0,1,...; see option -k\n\
  SIMNAME~d#.def : symlink of SIMNAME.def\n\
  SIMNAME~d#.cfg : symlink of SIMNAME.cfg\n\
  SIMNAME~d#.[mq].cp : block MSD, MSCD from cook\n\
  SIMNAME~d#.* : and other cook -m1 output files:\n\
  SIMNAME.cdf : plb2diff protocol\n\
  SIMNAME.msd.dat : <MSD(t)>-b*t averaged over all blocks\n\
  SIMNAME.mscd.dat : <MSCD(t)>-b*t averaged over all blocks\n\
    MSD and MSCD are divided by 2*DIM = 6, so that slope = diffusivity\n\
    b is obtained by regression, type ctrl-b or [b] [=0] to set b:=0\n\
  SIMNAME.m#.dat : diffusivity results by species by blocks, #=0,1..=species\n\
  SIMNAME.q#.dat : as above, recalculated to partial conductivities\n\
  SIMNAME.difm.dat : total diffusivity: should be \"numerical zero\"\n\
             peaks indicate problems with following atoms over periodic b.c.\n\
  SIMNAME.difm.dat : conductivity by blocks (with standard errors)\n\
\n\
BUGS:\n\
  version cook -m0 (stored SIMNAME.1,.. instead of SIMNAME.plb) not tested\n\
  multiplication factors are not calculated and must be given as options -m,-q\n\
\n\
EXAMPLE:\n\
  plb2diff cookpol -a-a0 ions salt -n2000 -b.1 -o.2 -c3 -m13171 -q.0007807\n\
\n\
SEE ALSO:\n\
  plb2diff2 (older version) cook* plbinfo plbmsd plbbox plot showcp\n");
   exit(0); }

  initscroll(0);
  stringinit(128,5);

  MYFree=LRFree;
  MYAdd=LRAdd;
  MYRes=LRRes;

  loop (iarg,1,narg)
    if (arg[iarg][0]=='-') {
      double f=atof(arg[iarg]+2);
      switch (arg[iarg][1]) {
        case 'f': switch ((int)f) {
          case 2: break;
          case 3:
            fprintf(stderr,"WARNING: using fit to  a+b*t+c/sqrt(t), error of fit not available\n");
            MYFree=SDFree;
            MYAdd=SDAdd;
            MYRes=SDRes;
            break;
          default:
            ERROR(("only -f2 or -f3 supported")) }
          break;
        case 'F': boxfollow=f; break;
        case 'n': NO=f; break;
          NO=f;
          break;
        case 's': START=f; break;
        case 'b': BLOCK=f; break;
        case 'o': OMIT=f; break;
        case 'c': COVERAGE=f; break;
        case 'B': NBLOCKS=f; break;
        case 'm': MFACTOR=f; break;
        case 'q': QFACTOR=f; break;
        case 'p': PLOT=f; break;
        case 'k': erasetemp=0; break;
        case 'a': extraopt=arg[iarg]+2; break;
        case 'g': geometry=arg[iarg]+2; break;
        case 'P': NPT=f; break;
        default: ERROR(("%s: unknown option",arg[iarg])) } }
    else {
      if (COOK) ERROR(("%s: third position arguments (not -OPTION) invalid",arg[iarg]))
      COOK=SYSNAME;
      SYSNAME=SIMNAME;
      SIMNAME=arg[iarg]; }

  if (!COOK) ERROR(("three position arguments (COOK SYSNAME SIMNAME) expected\n\
*** (if your COOK is a script not accepting arguments, use dummy ones)"))

  if (NO<0) ERROR(("invalid NO=%d",NO))
  if (NO==0) NO=plbno(string("%s.plb",SIMNAME));

  if (BLOCK<0) { ERROR(("invalid option -b%g",BLOCK)) BLOCK=0; }
  if (BLOCK<=1) {
    BLOCK=(int)(BLOCK*NO);
    prt("BLOCK=%g frames set",BLOCK); }

  if (OMIT<0) { ERROR(("invalid option -o%g",OMIT)) OMIT=0; }
  if (OMIT<1) {
    OMIT=(int)(OMIT*BLOCK);
    prt("OMIT=%d frames set",OMIT); }

  fprintf(stderr,"%s %s %s %s %s <SIMNAME>\n",COOK,defaultopt,extraopt,SYSNAME,SIMNAME);

  if (NO<BLOCK) ERROR(("NO=%d < BLOCK=%d",NO,BLOCK))
  if (OMIT>=BLOCK) ERROR(("OMIT=%d >= BLOCK=%d",OMIT,BLOCK))
  if (START>=NO) ERROR(("START=%d >= NO=%d",START,NO))

  if (COVERAGE && NBLOCKS) ERROR(("only one of options -c and -B allowed"))
  if (!COVERAGE && !NBLOCKS) COVERAGE=3;

  if (COVERAGE)
    for (NBLOCKS=1; COVERAGE>(double)BLOCK*(NBLOCKS-0.999999999999999)/(NO-BLOCK-START); NBLOCKS++) ;

  fprintf(stderr,"NO=%d START=%d BLOCK=%g OMIT=%g NBLOCKS=%d\n",
          NO,START,BLOCK,OMIT,NBLOCKS);

  if (MFACTOR==0 && QFACTOR==0) ERROR(("nothing to do because none of option -m,-q specified"))

  out=fopen(string("%s.cdf",SIMNAME),"wt");

  if (OMIT==0) {
    if (MYRes==LRRes) WARNING(("OMIT=0 means that MSD(0)=0 is included in regression"))
    else ERROR(("OMIT=0 would lead to division by zero in c/sqrt(t) because MSD(0)=0")) }

  prt_("Called by:\n ");
  loop (ir,0,narg)
    if (strpbrk(arg[ir]," \t")) prt_(" \"%s\"",arg[ir]);
    else prt_(" %s",arg[ir]);

  prt("\nno=%d frames",NO);
  prt("Coverage = (frames in all blocks)/(all frames) = %g",
      cov=(double)BLOCK/(NO-BLOCK-START)*(NBLOCKS-1));
  if (cov<0.9) WARNING(("Too small coverage - there are unused data, recommended 3"))
  if (cov>10) WARNING(("Too large coverage - data are correlated, recommended 3"))
  if (cov<2 || cov>5) fprintf(stderr,"Coverage=%g is outside the optimum range [2,5]\n",cov);

  StaSet(1,2,2,4);

  loop (iblock,0,NBLOCKS) {
    char BASENAME[256];

    sprintf(BASENAME,"%s~d%d",SIMNAME,iblock);

    if (NBLOCKS==1) init=1;
    else init=1+START+(long long int)iblock*(NO-BLOCK-START)/(NBLOCKS-1);
    no=init+BLOCK-1;
    prt("block %-3d: from=%-7d to=%-7d",iblock+1,init,no);

    progress[(int)((double)(iblock+1)/NBLOCKS*78.9)]='%';
    fprintf(stderr,"%s\n",progress);
    fprintf(stderr,"## %.2f%% = %d/%d ## frames %d-%d ##\n",
            (double)(iblock+1)/NBLOCKS*100.,
            iblock+1,NBLOCKS,
            init,no);

    cmd=string("%s %s %s %s %s %s.plb",
               COOK,defaultopt,extraopt,SYSNAME,BASENAME,SIMNAME);
    get=fopen(fn=string("%s.get",BASENAME),"wt");
    if (!get) ERROR(("cannot write to %s",fn))
                fprintf(get,"! %s",cmd);
    fprintf(get,"\n\
MSD.mode=%d\n\
thermostat=-1\n\
reread.from=%d reread.to=%d\n\
box.follow=%g\n\
%s\n\
dt.plb=1 init=2;\n",(MFACTOR!=0)+2*(QFACTOR!=0),
            init,no,
            boxfollow,
            NPT?"tau.P=1 tau.rho=0":"tau.P=0 tau.rho=1");
    if (fclose(get)) ERROR(("%s write error",fn))

    fn=string("ln -sf %s.def %s.def",SIMNAME,BASENAME);
    if (system(fn)) ERROR(("%s: execution error",fn))
    fn=string("ln -sf %s.cfg %s.cfg",SIMNAME,BASENAME);
    if (system(fn)) ERROR(("%s: execution error",fn))

    if (system(cmd)) ERROR(("%s: execution error",cmd))

    for (mode="mq"; *mode; mode++) {
      FILE *cp=fopen(fn=string("%s.%c.cp",BASENAME,*mode),"rb");
      char fn0[256];
      float h=0;

      strcpy(fn0,fn);

      if (cp) {
        fread(&h,4,1,cp);
        if (h!=CPmark) ERROR(("%s bad header",fn0))
        fread(&ncp,4,1,cp);
#define NVCP 14 /* normally 10 */
        if (ncp<2 || ncp>1000) {
          ERROR(("%s : NCP=%d read",fn0,ncp))
          ncp=2; }
        ncps[(int)*mode]=ncp;
        loop (icp,2,ncp) fread(&h,4,1,cp);

        if (ncps['q'] && ncps['m']!=ncps['q'])
          ERROR(("NCP=%d in %s.m.cp != NCP=%d in %s.q.cp",
                 ncps['m'],SIMNAME,ncps['q'],SIMNAME))

        /* allocations in 1st iblock only */
        if (iblock==0) switch (*mode) {
          case 'm':
            alloc(r,max(NVCP,ncp)*4);
            allocarray(mr,BLOCK);
            loop (ir,0,BLOCK) allocarrayzero(mr[ir],ncp);
            allocarrayzero(mf,ncp);
            allocarrayzero(summ,ncp);
            break;
          case 'q':
	    allocarray(qr,BLOCK);
	    loop (ir,0,BLOCK) allocarrayzero(qr[ir],ncp);
	    allocarrayzero(qf,ncp);
	    allocarrayzero(sumq,ncp);
	    break;
          default:
            ERROR((mode)); }

	loop (ir,0,BLOCK) {
	  if (ncp!=fread(r,4,ncp,cp)) ERROR(("%s truncated",fn0))
          if (ir>=OMIT) {
            loop (icp,0,ncp)
              MYAdd(icp<ncp-1 ? string("%c%d",*mode,icp)
                              : *mode=='m' ? "difm" : "cond",
                    1./(ir+(ir==0)), /* weight */
                    ir, r[icp]); }
          //          prt("%g %g %g DBG",ir+1.,r[0],1/(ir+1.));
          if (*mode=='m') loop (icp,0,ncp) mr[ir][icp] += r[icp];
          else            loop (icp,0,ncp) qr[ir][icp] += r[icp]; }

        loop (icp,0,ncp) {
	  FILE **f = *mode=='m' ? mf : qf;
	  char *name=icp<ncp-1
	    ? string("%c%d",*mode,icp)
	    : *mode=='m' ? "difm" : "cond";

          sum = *mode=='m' ? summ : sumq;
          //prt("%s a=%g b=%g c=%g",name,MYRes(name,"A"),MYRes(name,"B"),MYRes(name,"C"));
	  av=MYRes(name,"B");
	  sum[icp].s+=av;
	  sum[icp].q+=av*av;
	  sum[icp].n++;

	  err=MYRes(name,"DB");
	  if (!f[icp]) {
	    char *fn=string("%s.%s.dat",SIMNAME,name);

	    f[icp]=fopen(fn,"wt");
	    sprintf(strend(*mode=='m'?plotm:plotq),
		    " %s:A:B:-:C",fn);

	    fprintf(f[icp],"# ");

	    loop (ir,0,narg)
	      if (strpbrk(arg[ir]," \t")) fprintf(f[icp]," \"%s\"",arg[ir]);
	      else fprintf(f[icp]," %s",arg[ir]);

	    fprintf(f[icp],"\n# init lin.regr.slope lin.regr.err\n"); }
	  fprintf(f[icp],"%d %g %g\n",init,av,err);
	  StaAdd(name,av); }
	StaAdd(string("sqrt(|%c|)",*mode),sqrt(fabs(av)));
	fclose(cp); }
      else {
	static int pass;

	if (!pass++)
          WARNING(("missing %s (MORE WARNINGS SUPPRESSED)",fn)) }
      } /* mode */

    if (iblock) fn=string("cat %s.aux >> %s.aux",BASENAME,SIMNAME);
    else fn=string("mv %s.aux %s.aux",BASENAME,SIMNAME);
    if (system(fn)) ERROR(("%s: execution error",fn))

    if (erasetemp) {
      fn=string("rm %s.*",BASENAME);
      if (system(fn)) ERROR(("%s: execution error",fn)) }

    MYFree();
   } /* iblock */

  loop (icp,0,ncps['m'])
    if (!mf[icp]) ERROR(("close m %d",icp)) else fclose(mf[icp]);
  loop (icp,0,ncps['q'])
    if (!qf[icp]) ERROR(("close q %d",icp)) else fclose(qf[icp]);

  StaPrintAll(NULL);

  /* MSD(t) and MSCD(t): print files and calculate fits */
  for (mode="mq"; *mode; mode++) {
    FILE *msd;

    xr=*mode=='m' ? mr : qr;
    msd=fopen(string("%s.%s.dat",SIMNAME,*mode=='m'?"msd":"mscd"),"wt");
    fprintf(msd,"# %s %s %s\n",arg[3],arg[4],arg[5]);
    if (*mode=='m')
      fprintf(msd,"#t/dt  <MSCD_0(t)> ... <sum MSCD(t)> (NB: factor 1/6 included)\n");
    else {
      fprintf(msd,"#t/dt  <MSD_0(t)> ... <sum MSD(t)> (NB: factor 1/6 included)\n");
      fprintf(msd,"# the last column <sum MSD(t)> should be tiny; peaks indicate periodic b.c. problems\n"); }
    fprintf(msd,"#    indented points not used in the fit\n");

    pass=1;
    loop (ir,0,BLOCK) {
      if (ir>=OMIT) {
	loop (icp,0,ncps[(int)*mode]) {
          char *name=icp<ncps[(int)*mode]-1 ? string("%c%d",*mode,icp)
                                            : *mode=='m'?"difm":"cond";
          if (pass) fprintf(msd," %s",name);
          MYAdd(name, 1./(ir+(ir==0)), ir, xr[ir][icp]/NBLOCKS); }
        fprintf(msd,"%2d ",ir); }
      else
        fprintf(msd,"   %2d ",ir); /* line shifted: data not used in fit */
      loop (icp,0,ncps[(int)*mode])
	fprintf(msd,"%g%c", xr[ir][icp]/NBLOCKS,"\n "[icp<ncps[(int)*mode]-1]);
      if (pass) fprintf(msd,"\n");
      pass=0; }

    fclose(msd); }

  /* MSD(t) and MSCD(t): print fits */
  for (mode="mq"; *mode; mode++) {
    FILE *fit;
    double a,b,c,x,y;

    xr=*mode=='m' ? mr : qr;
    fit=fopen(string("%s.%s.fit",SIMNAME,*mode=='m'?"msd":"mscd"),"wt");
    fprintf(fit,"# %s %s %s\n",arg[3],arg[4],arg[5]);
    fprintf(fit,"#t/dt  <MSCD_0(t)> ... <sum MSCD(t)>=0\n");
    fprintf(fit,"#    indented points not used in the fit\n");
    loop (icp,0,ncps[(int)*mode]) {
      char *name=icp<ncps[(int)*mode]-1 ? string("%c%d",*mode,icp)
                                        : *mode=='m'?"difm":"cond";
      fprintf(fit,"# species %s: a=%.9g b=%.9g %s\n",
              icp==ncps[(int)*mode]-1?"sum":string("%d",icp),
              MYRes(name,"A"),MYRes(name,"B"),
              MYRes==SDRes?string("c=%.9g",MYRes(name,"C")):""); }

    pass=1;
    for (x=0.5; x<BLOCK-0.3; x+=0.25) {
      if (fabs(OMIT+0.5-x)<0.03) fprintf(fit,"# end of extrapolated line\n#");
      fprintf(fit,"%.2f ",x);
      loop (icp,0,ncps[(int)*mode]) {
        char *name=icp<ncps[(int)*mode]-1 ? string("%c%d",*mode,icp)
                                          : *mode=='m'?"difm":"cond";
        if (pass) fprintf(fit," %s",name);
        a=MYRes(name,"A");
        b=MYRes(name,"B");
        if (*mode=='m' && !icp) msdb=b; /* the 1st species */
        if (*mode=='q') mscdb=b; /* the sum */
        y=a+x*b;
        if (MYRes==SDRes) {
          c=MYRes(name,"C");
          y=a+x*b+c/sqrt(x); }

	fprintf(fit,"%g%c", y,"\n "[icp<ncps[(int)*mode]-1]); }

      if (pass) fprintf(fit,"\n");
      pass=0; }

    fclose(fit); }

  prt("\n\n\
In the following statistics, ERR was obtained from blocks (see data in\n\
files SIMNAME.m0.dat, SIMNAME.q0.dat, SIMNAME.m1.dat, ..., SIMNAME.visc.dat)\n\
treated as independent data and for coverage>1 multiplied by sqrt(coverage)\n\
Relative error=ERR/fit slope\n");

  if (MYAdd==LRAdd) prt("FITTING FUNCTION = a+b*t (no hydrodynamic tail)");
  else prt("FITTING FUNCTION = a+b*t+c/sqrt(t) (linear with with hydrodynamic tail)");

  for (mode="mq"; *mode; mode++) {
    int NCP=ncps[(int)*mode];

    switch (*mode) {
      case 'm':
	prt("\nDIFFUSION");
	header(" name   n   fit slope    fit err  stderr      D/MFACTOR     D[m^2/s]   stderr ");
	break;
      case 'q':
	prt("\nCONDUCTIVITY");
	header(" name   n   fit slope    fit err  stderr     kappa/QFACTOR  kappa[S/m] stderr ");
    }

    loop (icp,0,NCP) {
      char *name=icp<NCP-1
	? string("%c%d",*mode,icp)
	: *mode=='m' ? "difm" : "cond";
      sum = *mode=='m' ? summ : sumq;

      if (!sum) continue;

      av=MYRes(name,"B");
      err=MYRes(name,"DB");

      if (fabs(av-sum[icp].s/sum[icp].n)/av>3e-6)
	WARNING(("%s: average inconsistent: %g %g",name,av,sum[icp].s/sum[icp].n))
      if (sum[icp].n>1)
	ERR=sqrt(fmax(cov,1.0)*(sum[icp].q/sum[icp].n-Sqr(sum[icp].s/sum[icp].n))/(sum[icp].n-1));
      else
	ERR=0;

      prt_("%5s %4.0f %12.6g %8.2g %9.3g ",name,MYRes(name,"N"),av,err,ERR);
      switch (*mode) {
        case 'm':
	  prt(" %12.6g %12.6g %8.3g",
              av*1e-8,av*1e-8*MFACTOR, sqrt(err*err+ERR*ERR)*1e-8*MFACTOR);
	  break;
        case 'q':
	  prt(" %12.6g %12.6g %8.3g",
              av*111.26502, av*111.26502*QFACTOR, sqrt(err*err+ERR*ERR)*111.26502*QFACTOR);
      } }
    header("");

    switch (*mode) {
      case 'm':
        prt("diffusion coefficients [in m^2/s] are obtained by multiplying column\n\
D/MFACTOR by MFACTOR, which is:\n\
  1/dt.plb          for NVT simulation (tau.P=0)\n\
  <V>^(2/3)/dt.plb  for NPT simulation (tau.P>0) [??? for not-cube]\n\
<V> = averaged volume in AA^3 (see SIMNAME.prt)\n\
dt.plb is in ps [to be replaced by dt.cfg if SIMNAME.1,... are used and -f-1]\n\
NOTE: the generated .dat files are in program units and the additional factor\n\
(to get units m^2/s from the slope) is 1e-8");
	if (MFACTOR) prt("you specified MFACTOR=%g",MFACTOR);
	break;
      case 'q':
	prt("conductivities [in S/m] are obtained by multiplying column kappa/QFACTOR by\n\
QFACTOR, which is:\n\
  1/(V*T*dt.plb)          for NVT simulation (tau.P=0)\n\
  1/(<V>^(1/3)*T*dt.plb)  for NPT simulation (tau.P>0) [??? for not-cube]\n\
(T = temperature in K, V = volume in AA^3, dt.plb is in ps)\n\
NOTE: the generated .dat files are in program units and the additional factor\n\
(to get units S/m from the slope) is 111.26502");
	if (QFACTOR) prt("you specified QFACTOR=%g",QFACTOR);
	break; } }

  {
    static struct maxjump_s maxjump,max1jump;
    struct maxjump_s mj,m1j;
    int k;
    FILE *aux;

    prt("\nPERIODIC BOX STATISTICS");

    fn=string("%s.aux",SIMNAME);
    aux=fopen(fn,"rb");
    if (!aux) ERROR(("open %s",fn))

    prt("jump lengths over periodic b.c. separately in blocks");

    header(" max.Dx/Lx one.Dx/Lx max.Dy/Ly one.Dy/Ly max.Dz/Lz one.Dz/Lz  KEY ");

    loop (iblock,0,NBLOCKS) {

      if (!fread(&mj,sizeof(mj),1,aux)) ERROR(("%s truncated",fn))
      if (!fread(&m1j,sizeof(m1j),1,aux)) ERROR(("%s truncated",fn))

      loop (k,0,3) {
        prt_("%10.5f %8.5f ",mj.xi[k],m1j.xi[k]);
	if (fabs(mj.xi[k])>fabs(maxjump.xi[k])) {
	  maxjump.xi[k]=mj.xi[k];
	  maxjump.i[k]=mj.i[k];
	  maxjump.n[k]=mj.n[k];
	  maxjump.frame[k]=mj.frame[k];
	  maxjump.no  =mj.no; }
	if (fabs(m1j.xi[k])>fabs(max1jump.xi[k])) {
	  max1jump.xi[k]=m1j.xi[k];
	  max1jump.i[k]=m1j.i[k];
	  max1jump.n[k]=m1j.n[k];
	  max1jump.frame[k]=m1j.frame[k];
	  max1jump.no  =m1j.no; } }
      prt("  JUMP"); }

    header("");

    if (fread(&mj,sizeof(mj),1,aux)) ERROR(("unexpected extra data in %s",fn))

    loop (k,0,3) {
      prt("=== %c-axis ===\n\
max jump over periodic boxes in blocks:\n\
  %f*L (no=%d frame=%d mol.site=%d.%d)",
         k+'x',
         maxjump.xi[k],maxjump.no,maxjump.frame[k],maxjump.n[k],maxjump.i[k]);
     prt("max jump between consecutive frames:\n\
  %f*L (no=%d frames=%d->%d mol.site=%d.%d)",
     max1jump.xi[k],max1jump.no,max1jump.frame[k]-1,max1jump.frame[k],max1jump.n[k],max1jump.i[k]);
     if (fabs(max1jump.xi[k])>0.45)
       WARNING(("max jump between consecutive frames %.4f*L is very close to +-L/2\n\
*** Program cook* (called from plb2diff) checks the center-of-mass condition\n\
*** and may have fixed a jump of one molecule by more than half the box.\n\
*** You should check:\n\
    - table JUMP above\n\
    - check difm in table DIFFUSION and the last column of %s.msd.dat\n\
    - rerun plb2diff with -k and watch WARNING \'Center of mass has shifted\'",
                max1jump.xi[k],SIMNAME)) }

    if (fabs(MYRes("difm","B"))>3e-7)
      WARNING(("difm=%g is not tiny (up to 1e-5 acceptable for large systems)\n\
*** possible causes:\n\
*** - nonzero momentum (check variable  drift )\n\
*** - nearest image problems because of too long  dt.plb\n\
***   (see max jump between consecutive frames)",MYRes("difm","B")))

    fclose(aux);
    if (erasetemp) if (remove(fn)) ERROR(("cannot remove %s",fn))
  }

  sh=fopen(fn=string("%s.plot.sh",SIMNAME),"wt");
  if (!sh) ERROR(("open %s",fn))

  fprintf(sh,"#!/bin/sh\n");

  Setenv("PLOTGEOMETRY",string("%s+15+15",geometry));
  Setenv("PLOTNAME",string("diffusion %s",arg[2]));
  strcat(plotm," &");
  fprintf(sh,"%s\n\n",plotm);
  if (PLOT&1) { system(plotm); sleep(1); }

  Setenv("PLOTGEOMETRY",string("%s+30+30",geometry));
  Setenv("PLOTNAME",string("conductivity %s",arg[2]));
  strcat(plotq," &");
  fprintf(sh,"%s\n\n",plotq);
  if (PLOT&2) { system(plotq); sleep(1); }

  // NEW: omitting the last column
  sprintf(plotm,"plot %s.msd.dat",SIMNAME);
  fn=strend(plotm);
  loop (icp,0,ncp-1) sprintf(strend(plotm),":1:\"%c-b*A:%do\" ",'B'+icp,icp+1);
  Setenv("PLOTGEOMETRY",string("%s+45+45",geometry));
  Setenv("PLOTNAME",string("<MSD(t)-b*t %s>",arg[2]));
  Setenv("b",string("%.9g",msdb));
  sprintf(strend(plotm)," %s.msd.fit",SIMNAME);
  loop (icp,0,ncp-1) sprintf(strend(plotm),":1:\"%c-b*A:%d-\" ",'B'+icp,icp+1);
  strcat(plotm," &");
  fprintf(sh,"%s\n\n",plotm);
  if (PLOT&4) { system(plotm); sleep(1); }

  sprintf(plotm,"plot %s.mscd.dat",SIMNAME);
  fn=strend(plotm);
  loop (icp,0,ncp) sprintf(strend(plotm),":1:\"%c-b*A:%do\" ",'B'+icp,icp+1);
  Setenv("PLOTGEOMETRY",string("%s+60+60",geometry));
  Setenv("PLOTNAME",string("<MSCD(t)-b*t %s>",arg[2]));
  Setenv("b",string("%.9g",mscdb));
  sprintf(strend(plotm)," %s.mscd.fit",SIMNAME);
  loop (icp,0,ncp) sprintf(strend(plotm),":1:\"%c-b*A:%d-\" ",'B'+icp,icp+1);
  strcat(plotm," &");
  fprintf(sh,"%s\n\n",plotm);
  if (PLOT&8) system(plotm);

  fclose(sh);
  system(string("chmod +x %s.plot.sh",SIMNAME));

  if (!erasetemp) fprintf(stderr,"Suggested command to erase temporary files:\n\
\\rm %s.aux %s~d[1-9]*.*\n",SIMNAME,SIMNAME);

  return 0;
}
