; $Id: $
;#######################################################################
;
; NAME:
;  dm_step_bin
;
; PURPOSE:
;  bin data according to given step
;
; CATEGORY:
;  general
;
; AUTHOR:
;  Yiming Qiu
;  NIST Center for Neutron Research
;  100 Bureau Drive, Gaithersburg, MD 20899-6102
;  United States
;  yiming.qiu@nist.gov
;  June, 2023
;
; LICENSE:
;  The software in this file is written by an employee of
;  National Institute of Standards and Technology
;  as part of the DAVE software project.
;
;  The DAVE software package is not subject to copyright protection
;  and is in the public domain. It should be considered as an
;  experimental neutron scattering data reduction, visualization, and
;  analysis system. As such, the authors assume no responsibility
;  whatsoever for its use, and make no guarantees, expressed or
;  implied, about its quality, reliability, or any other
;  characteristic. The use of certain trade names or commercial
;  products does not imply any endorsement of a particular product,
;  nor does it imply that the named product is necessarily the best
;  product for the stated purpose. We would appreciate acknowledgment
;  if the DAVE software is used or if the code in this file is
;  included in another product.
;
;#######################################################################

; parameter:
;   in_xstep:      step of binning
;   xdat:          array of xdat
; keyword:
;   checkfinite:   flag for checking finite values of xdat,ydat. finite values of yerr will always be checked
;   group_leader:  dialog parent uid 
;   ydat:          array of ydat, using the same binning as xdat
;   yerr:          array of yerr, using the same binning as xdat
;   xstart:        binning starting point, if absent, use the min(xdat) as the starting point
;   uniq_val:      if set, then use the given uniq values instead of the xstep to determine the bins
;   avgsum:        -1:weighted mean 0: arithmetic mean 1:sum, default 0
;   weight:        used as weight for avgsum = -1. if not present, use the error bar
;   bintr:         binning threshold, bins with fewer data points than bintr*(max number of points in a bin) will be discarded
;   conststep:     0: use the average as the resulted binning values(default) 1: constant binning intervals
;   ewid:          energy width, used when e-integrated intensity option is choosed, input ydat=sum(I*ewid), output ydat=sum(I*ewid)/sum(ewid)
;   extlib:        0:not using external library 1:use external library
;   truncatemax:   if set, rebinned xdat will not exceed max(xdat)
;   zero_error:    if nonzero, use this value as error bar for zero intensities
;   bin_zeroerror: flag for how to treat zero intenisty error, default 0 (user specify) 
;   zoerrestrange: used when bin_zeroerror=1, the zero intensity error bar statistical estimate range: 0-automatic search till non-zero counts are found; n(>=1)-specified search range
;   estapplyall:   used when bin_zeroerror=1, if set, zoerrestrange is applied to all intensity error bar statistical estimate
;   foundrange:    output the range when zoerrestrange=0
pro dm_step_bin,in_xstep,xdat,ydat=ydat,yerr=yerr,xstart=xstart,checkfinite=checkfinite,uniq_val=uniq_val,debug=debug,avgsum=avgsum,bintr=bintr,conststep=conststep,ewid=ewid,$
    extlib=extlib,weight=weight,truncatemax=truncatemax,zero_error=zero_error,bin_zeroerror=bin_zeroerror,zoerrestrange=zoerrestrange,estapplyall=estapplyall,foundrange=foundrange,$
    group_leader=group_leader

    if n_elements(avgsum) eq 0 then avgsum = 0        ;default using arithmetic average in binning
    if n_elements(bintr)  eq 0 then bintr = 0.0
    if n_elements(conststep) eq 0 then conststep = 0  ;default is using average as the binned value
    if n_elements(extlib) eq 0 then extlib = 0b       ;default not using external binning library
    if n_elements(zero_error) eq 0 then zero_error = 0.0
    nxdat   = n_elements(xdat)
    ok_ydat = (n_elements(ydat) eq nxdat)
    ok_yerr = (n_elements(yerr) eq nxdat)
    ok_eint = (n_elements(ewid) eq nxdat)
    ok_wght = (n_elements(weight) eq nxdat)
    x_type  = size(xdat,/type)
    if ok_ydat then y_type = size(ydat,/type)
    if ok_yerr then e_type = size(yerr,/type)
    if ok_wght then w_type = size(weight,/type)
    if ok_eint then avgsum = 1
    if keyword_set(bin_zeroerror) and ok_yerr then begin
       if n_elements(zoerrestrange) eq 0 then zoerrestrange = 0
       if zoerrestrange eq 0 then foundrange = 0
    endif else bin_zeroerror = 0
    if (avgsum eq -1) and ((~ok_ydat) or ((~ok_wght) and (~ok_yerr))) then avgsum = 0    ;weighted average requires the ydat and weight or corresponding yerr, without either of them, switching to arithmetic mean
    if keyword_set(debug) then current = systime(/sec)
    ;check data format
    mesg = ''
    if (~ok_ydat) and n_elements(ydat) gt 0 then mesg = [mesg,'xdat and ydat sizes are different.']
    if (~ok_yerr) and n_elements(yerr) gt 0 then mesg = [mesg,'xdat and yerr sizes are different.']
    if (~ok_eint) and n_elements(ewid) gt 0 then mesg = [mesg,'xdat and ewid sizes are different.']
    if n_elements(mesg) gt 1 then begin
       ok = dialog_message(mesg[1:n_elements(mesg)-1],/error,/center,dialog_parent=group_leader)
       return
    endif
    ;check finite values
    indcount = 0
    if keyword_set(checkfinite) then begin
       ind = where(finite(xdat),indcount)
       if indcount eq 0 then mesg = [mesg,'xdat are all non-finite.']
       if indcount eq nxdat then begin
          ind = 0 & tmp = temporary(ind) & indcount = 0
       endif
       if ok_ydat then begin
          ind1 = where(finite(ydat),count) 
          if (count ne nxdat) and (count ne 0) then begin
             if n_elements(ind) eq 0 then begin
                ind = temporary(ind1) & indcount = count
             endif else ind = dm_common(ind,ind1,indcount,/no_copy)
          endif else begin
             if count eq 0 then mesg = [mesg,'ydat are all non-finite.']
             ind1 = 0
          endelse
       endif
    endif
    if ok_yerr then begin ;always check the finite values of yerr
       ind1 = where(finite(yerr),count) 
       if (count ne nxdat) and (count ne 0) then begin
          if n_elements(ind) eq 0 then begin
             ind = temporary(ind1) & indcount = count
          endif else ind = dm_common(ind,ind1,indcount,/no_copy)
       endif else begin
          if count eq 0 then mesg = [mesg,'yerr are all non-finite.']
          ind1 = 0
       endelse
    endif
    if n_elements(mesg) gt 1 then begin
       ok = dialog_message(mesg[1:n_elements(mesg)-1],/error,/center,dialog_parent=group_leader)
       return
    endif
    if indcount ne 0 then begin ;remove non-finite values
       xdat = xdat[ind]
       if ok_ydat then ydat = ydat[ind]
       if ok_yerr then yerr = yerr[ind]
       if ok_eint then ewid = ewid[ind]
       if ok_wght then weight = weight[ind]
       nxdat = indcount & ind = 0 
    endif
    xstep = float(abs(in_xstep))
    xmax  = max(xdat,min=xmin)
    if n_elements(xstart) eq 1 then begin
       if finite(xstart) then begin
          xmin = xstart
          ind = where(xdat ge xstart,count)
          if count ne nxdat then begin
             xdat = xdat[ind]
             if ok_ydat then ydat = ydat[ind]
             if ok_yerr then yerr = yerr[ind]
             if ok_eint then ewid = ewid[ind]
             if ok_wght then weight = weight[ind]
          endif
          ind = 0 & nxdat = count
       endif
    endif
    if (avgsum eq -1) and (~ok_wght) then begin
       weight = yerr
       ind = where(yerr eq 0,count,complement=ind1)
       if count ne 0 then begin
          tmp = min(abs(yerr[ind1]))
          if zero_error ne 0 then tmp = (tmp)<(zero_error)
          weight[ind] = tmp                 ;use yerr as weight, make sure no yerr=0 exists, otherwise replace them with smallest of nonzero value
       endif
       weight = 1./weight^2
       ok_wght = 1b
       ind = 0 & ind1 = 0
    endif    
    if (xstep eq 0) then xstep = ((xmax eq xmin)?(0.1):((xmax-xmin)/100.0))
    if keyword_set(truncatemax) then oldxmax = xmax
    xmax  = xmax+0.5*xstep
    xmin  = xmin-0.5*xstep
    xsize = ceil((xmax-xmin)/xstep)
    if xsize eq 2 then conststep=0
    n_unq = n_elements(uniq_val)
    if n_unq ne 0 then begin
       uniq_val = uniq_val[uniq(uniq_val,sort(uniq_val))]
       n_unq = n_elements(uniq_val) & xsize = n_unq
       conststep = 0
    endif
    tmp_x = dblarr(xsize)
    if ok_ydat then tmp_y = dblarr(xsize)
    if ok_eint then tmp_w = dblarr(xsize)
    if ok_yerr then tmp_e = dblarr(xsize)
    if ok_wght then tmp_m = dblarr(xsize)
    num_x = lonarr(xsize)
           
    defsysv,'!DAVE_AUXILIARY_DIR',exists=exists
    if exists and extlib then begin
       filename = 'rebinnd'+'_'+!version.os+'_'+strtrim(string(!VERSION.MEMORY_BITS),2)
       shlib = (FILE_SEARCH(!DAVE_AUXILIARY_DIR+'rebin_nd'+path_sep()+filename+'*'))[0]
    endif else shlib = ''
    if (strlen(shlib) eq 0) and extlib then begin
       mesg = 'The external binning library is not found. IDL binning code is used instead.'
       if n_elements(group_leader) ne 0 then begin
          if (group_leader eq LookupManagedWidget('dcs_mslice')) or (group_leader eq LookupManagedWidget('mslice')) then begin
             widget_control,group_leader,get_uvalue=obj_mslice
             if obj_valid(obj_mslice) then obj_mslice->setproperty,binextlib=0 $
             else mesg = [mesg,'','Please disable the external library in the menu Option->Binning Method->Use External Binning Library.']
          endif
       endif
       ok = dialog_message(mesg,dialog_parent=group_leader,/center,title='Please note:')
    endif
    
    if ((strlen(shlib) gt 0) and (n_unq eq 0) and ok_ydat)then begin
       if (ok_eint and ok_yerr) then begin
          r = CALL_EXTERNAL(shlib, 'rebin1d_yew', long(nxdat),double(xdat),double(ydat),$
                double(yerr),double(ewid),long(xsize),double(xmin),double(xstep),num_x, tmp_x, tmp_y,  tmp_e, tmp_w, /CDECL)       
       endif       
       if (ok_eint and ~ok_yerr) then begin
          r = CALL_EXTERNAL(shlib, 'rebin1d_yw', long(nxdat),double(xdat),double(ydat),$
                double(ewid),long(xsize),double(xmin),double(xstep),num_x, tmp_x, tmp_y, tmp_w, /CDECL)          
       endif       
       if (~ok_eint and ok_yerr) then begin
          r = CALL_EXTERNAL(shlib, 'rebin1d_ye', long(nxdat),double(xdat),double(ydat),$
                double(yerr), long(xsize),double(xmin),double(xstep),num_x, tmp_x, tmp_y,  tmp_e, /CDECL) 
       endif       
       if (~ok_eint and ~ok_yerr) then begin    
          r = CALL_EXTERNAL(shlib, 'rebin1d_y', long(nxdat),double(xdat),double(ydat),$
                long(xsize),double(xmin),double(xstep),num_x, tmp_x, tmp_y, /CDECL)       
       endif
       if avgsum eq -1 then avgsum = 0  ;weighted mean not implemented yet
    endif else begin  ;use histogram function to do the binning
       if n_unq ne 0 then begin
          ccnt = 0ll
          tmp_xdat = lonarr(nxdat)
          for i=0L,n_unq-1 do begin
              index = where(xdat eq uniq_val[i],count)
              if count ne 0 then tmp_xdat[index] = i
              ccnt = ccnt + count
              if ccnt eq nxdat then break
          endfor
          if ccnt ne nxdat then ok = dialog_message('There is a discrepancy between xdat and the unique binning values. This might affect the binning result.',dialog_parent=group_leader,/center,title='Please note:')  
          num_x = histogram(temporary(tmp_xdat),binsize=1L,min=0L,max=n_unq-1L,reverse_indices=rindx)
       endif else begin
          num_x = histogram(xdat,binsize=xstep,min=xmin,max=xmax,reverse_indices=rindx)
       endelse
       if ok_yerr then begin
          yerr = yerr^2
          if (zero_error ne 0) and (avgsum eq -1) then begin
             ind = where(yerr eq 0,cnt)
             if cnt ne 0 then zero_error = zero_error*min(weight[ind])
          endif
       endif
       n_rindx = n_elements(rindx)
       if (avgsum eq -1) and keyword_set(bin_zeroerror) then tmp_yerr = yerr ;need another copy
       for i=0LL,xsize-1L do begin
           if num_x[i] eq 0 then continue
           if n_unq ne 0 then tmp_x[i] = num_x[i]*uniq_val[i] $
           else if ~keyword_set(conststep) then tmp_x[i] = total(xdat[rindx[rindx[i]:(rindx[i+1]-1)]],/double)
           if avgsum eq -1 then begin  ;weighted mean
              if keyword_set(bin_zeroerror) then begin  ;special care for zero intensities
                 tmp1    = 0d
                 tmp2    = 0d
                 if keyword_set(estapplyall) and (zoerrestrange) gt 0 then begin
                    for j=0>(i-zoerrestrange),(i+zoerrestrange)<(xsize-1) do begin 
                        if rindx[j] lt n_rindx then begin
                           if rindx[j+1] gt rindx[j] then begin
                              tmp1 = temporary(tmp1)+total(tmp_yerr[rindx[rindx[j]:(rindx[j+1]-1)]],/double)
                              tmp2 = temporary(tmp2)+total(weight[rindx[rindx[j]:(rindx[j+1]-1)]],/double)
                           endif else begin
                              tmp1 = temporary(tmp1)+tmp_yerr[rindx[rindx[j]]]
                              tmp2 = temporary(tmp2)+weight[rindx[rindx[j]]]
                           endelse
                        endif
                    endfor
                    if tmp2 gt 0 then begin
                       tmp = tmp1/tmp2
                       yerr[rindx[rindx[i]:(rindx[i+1]-1)]] = weight[rindx[rindx[i]:(rindx[i+1]-1)]]*tmp  
                    endif
                 endif else begin
                    ind = where(tmp_yerr[rindx[rindx[i]:(rindx[i+1]-1)]] eq 0,cnt)
                    if cnt ne 0 then begin
                       if zoerrestrange eq 0 then begin
                          ;expanding the range gradually until finding non-zero intensities
                          found   = 0b
                          tmp_ran = 0l
                          tmp_max = i>(xsize-i)
                          while (~found) do begin
                                for j=-1,1,2 do begin
                                    tmp_in = i+tmp_ran*j
                                    if (tmp_in ge 0) and (tmp_in lt xsize) then begin
                                       if rindx[tmp_in] lt n_rindx then begin
                                          if rindx[tmp_in+1] gt rindx[tmp_in] then begin
                                             tmp1 = temporary(tmp1)+total(tmp_yerr[rindx[rindx[tmp_in]:(rindx[tmp_in+1]-1)]],/double)
                                             tmp2 = temporary(tmp2)+total(weight[rindx[rindx[tmp_in]:(rindx[tmp_in+1]-1)]],/double)
                                          endif else begin
                                             tmp1 = temporary(tmp1)+tmp_yerr[rindx[rindx[tmp_in]]]
                                             tmp2 = temporary(tmp2)+weight[rindx[rindx[tmp_in]]]
                                          endelse
                                       endif
                                    endif
                                endfor
                                if tmp1 gt 0 then begin
                                   tmp = tmp1/tmp2
                                   if keyword_set(estapplyall) then yerr[rindx[rindx[i]:(rindx[i+1]-1)]] = weight[rindx[rindx[i]:(rindx[i+1]-1)]]*tmp $
                                   else yerr[(rindx[rindx[i]:(rindx[i+1]-1)])[ind]] = weight[(rindx[rindx[i]:(rindx[i+1]-1)])[ind]]*tmp
                                   found = 1b
                                endif
                                tmp_ran = tmp_ran+1
                                if tmp_ran gt tmp_max then found = 1b
                          endwhile
                          foundrange = foundrange>(tmp_ran-1)
                       endif else begin 
                          for j=0>(i-zoerrestrange),(i+zoerrestrange)<(xsize-1) do begin
                              if rindx[j] lt n_rindx then begin
                                 if rindx[j+1] gt rindx[j] then begin
                                    tmp1 = temporary(tmp1)+total(tmp_yerr[rindx[rindx[j]:(rindx[j+1]-1)]],/double)
                                    tmp2 = temporary(tmp2)+total(weight[rindx[rindx[j]:(rindx[j+1]-1)]],/double)
                                 endif else begin
                                    tmp1 = temporary(tmp1)+tmp_yerr[rindx[rindx[j]]]
                                    tmp2 = temporary(tmp2)+weight[rindx[rindx[j]]]
                                 endelse
                              endif
                          endfor
                          if tmp2 gt 0 then begin
                             tmp = tmp1/tmp2
                             yerr[(rindx[rindx[i]:(rindx[i+1]-1)])[ind]] = weight[(rindx[rindx[i]:(rindx[i+1]-1)])[ind]]*tmp
                          endif
                       endelse
                    endif
                 endelse 
              endif
              tmp_y[i] = total(ydat[rindx[rindx[i]:(rindx[i+1]-1)]]*weight[rindx[rindx[i]:(rindx[i+1]-1)]],/double)
              if ok_yerr then tmp_e[i] = total(yerr[rindx[rindx[i]:(rindx[i+1]-1)]]*(weight[rindx[rindx[i]:(rindx[i+1]-1)]])^2,/double)
           endif else begin
              if ok_ydat then tmp_y[i] = total(ydat[rindx[rindx[i]:(rindx[i+1]-1)]],/double)
              if ok_yerr then tmp_e[i] = total(yerr[rindx[rindx[i]:(rindx[i+1]-1)]],/double)
              if ok_eint then tmp_w[i] = total(ewid[rindx[rindx[i]:(rindx[i+1]-1)]],/double) 
           endelse
           if ok_wght then tmp_m[i] = total(weight[rindx[rindx[i]:(rindx[i+1]-1)]],/double)
       endfor
       rindx = 0
       if (avgsum eq -1) and keyword_set(bin_zeroerror) then tmp_yerr = 0
    endelse
     
    index = where(num_x gt 0,count,complement=zindex,ncomplement=zcount)
    if count ne 0 then begin
       if keyword_set(conststep) then begin
          if zcount ne 0 then begin
             num_x[zindex] = 1.0
             if avgsum eq -1 then tmp_e[zindex] = 0.0
             if ok_eint then tmp_w[zindex] = 1.0
          endif
          minx = min(xdat) 
          xdat = xmin+(findgen(xsize)+0.5)*xstep
          indx = where(xdat ge (minx-0.1*xstep),count,ncomplement=zcount)
          if count eq 0 then zcount = 0                                       ;get rid of starting continuous 0's
          while((xsize gt 2) and (num_x[xsize-1] eq 0)) do xsize = xsize-1    ;get rid of trailing 0's
          xdat = xdat[zcount:(xsize-1)]
          num_x = num_x[zcount:(xsize-1)]
          if ok_ydat then tmp_y = tmp_y[zcount:(xsize-1)]
          if ok_yerr then tmp_e = tmp_e[zcount:(xsize-1)]
          if ok_eint then tmp_w = tmp_w[zcount:(xsize-1)]
          if ok_wght then tmp_m = tmp_m[zcount:(xsize-1)]
       endif else begin
          tmp_x = tmp_x[index]
          num_x = num_x[index]
          if ok_ydat then tmp_y = tmp_y[index]
          if ok_yerr then tmp_e = tmp_e[index]
          if ok_eint then tmp_w = tmp_w[index]
          if ok_wght then tmp_m = tmp_m[index]
          xdat = tmp_x/num_x
       endelse
       index = -1 & zindex=-1
       if ok_wght then weight = temporary(tmp_m)
       if avgsum eq -1 then begin  ;weighted mean
          n_x1 = weight
       endif else begin
          if avgsum eq 1 then num_x = 1
          if ok_eint then n_x1 = tmp_w else n_x1 = num_x
       endelse
       if ok_ydat then ydat = tmp_y/n_x1
       if ok_yerr then yerr = sqrt(tmp_e)/n_x1
       if ok_yerr and (zero_error ne 0) then begin
          ind = where(yerr eq 0,count)
          if count ne 0 then yerr[ind] = zero_error/n_x1[ind]
          ind = 0
       endif
    endif
    if (bintr gt 0.0) and (bintr lt 1.0) then begin
       min_num_x = bintr*(max(num_x)>1)
       ind = where(num_x gt min_num_x,ct1)
       if ct1 gt 0 then begin
          xdat = xdat[ind]
          if ok_ydat then ydat = ydat[ind]
          if ok_yerr then yerr = yerr[ind]
       endif
       ind = 0
    endif
    if keyword_set(truncatemax) then begin
       ind = where(xdat le oldxmax,count)
       if count ne 0 then begin
          xdat = xdat[ind]
          if ok_ydat then ydat = ydat[ind]
          if ok_yerr then yerr = yerr[ind]
       endif
       ind = 0
    endif
    if size(xdat,/type) ne x_type then xdat = fix(xdat,type=x_type)
    if ok_ydat then begin
       if size(ydat,/type) ne y_type then ydat = fix(ydat,type=y_type)
    endif
    if ok_yerr then begin
       if size(yerr,/type) ne e_type then yerr = fix(yerr,type=e_type)
    endif
    if ok_wght then begin
       if size(weight,/type) ne w_type then weight = fix(weight,type=w_type)
    endif
    if keyword_set(debug) then print,'step_bin finished in ',systime(/sec)-current,' secs.'
end