/*  Program gauss_smear
 *  This program convolves the complex frames in a complex FITS datacube 
 *  whose filename is specified by the first program argument with a real
 *  Gaussian whose radius at its maximum value/e is that specified by the 
 *  floating-point second program argument.  The integral of the Gaussian
 *  over the entire domain is assumed to be unity.  The convolution 
 *  is written to a file whose name is specified by the third argument.  
 *  Compile this program with the following commands:
 *
 *  "gcc -c  -o gauss_smear.o gauss_smear.c
 *  (gcc -c  -o do_fits.o do_fits.c)
 *   gcc -O3 -o gauss_smear gauss_smear.o do_fits.o -lm".
 */

#define _FILE_OFFSET_BITS 64

#include <math.h>
#include <sys/types.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include "do_fits.h"
 
#define WRONG_NUM_COLS               -1
#define ROW_FAILURE                  -2
#define COL_FAILURE                  -3

#define NULL_PAD                      0
#define CYC_PAD                       1
#define SOFT_PAD                      2

#define PI                            3.14159265359

int pad_image(double ampl_in[], double ampl_pad[], int mode);
int unpad_image(double ampl_in[], double ampl_pad[]);
int lp2ge(int arg);
int smear_image(double ampl[], double smear_radius);
int frame_fft(double dat[], int mode);
int count_bits(int npts);
int fft(int npts, double dat[], int mode);
int bit_rev(int npts, double dat[]);

int num_frames;
int nx_in, ny_in, nx_pad, ny_pad;
int bpf_pad, ppf_pad;
double *gradx, *grady, nu;
off_t  bpf_in, ppf_in;

static double pi    = PI;
static double twopi = 2.*PI;

int main(int argc, char *argv[])
{
    char   line[LINE_LENGTH + 1];
    int    i, j, fid1, fid2, z_range[2];
    double smear_radius;
    double normfact, *ampl_in, *ampl_pad, *ampl_out;
    double *bs, bscale_in, bscale_out;
    double bzero_in, bzero_out;
    FILE   *dest;
    off_t  dat_start;

    /* Characterize the machine for FITS I/O.  */

    init_fits(argv[0]);

    /* Print usage statement if arguments are omitted or erroneous.  */

    if (argc != 4)
    {
        if (argc == 1)
            dest = stdout;
        else
            dest = stderr;

        fprintf(dest, "\nUsage:  %s input smear_radius(float) output\n", 
            dfprogname);

        if (argc == 1)
            exit(0);
        else
            exit(1);
    }

    /* Record the radius at which the Gaussian is supposed to be
     * 1/e times its maximum value:  */

    smear_radius = atof(argv[2]);

    /*  Open the input file and read the fits head.  Squawk and exit
     *  if any of the parameters are inappropriate.  */

    if ((fid1 = open(argv[1], O_RDONLY)) == EOF)
    {
        fprintf(stderr, "%s:  Cannot open file %s.  Exiting.\n", 
            dfprogname, argv[1]);
        exit(0);
    }

    if (get_head(fid1, &inhead) == EOF)
    {
        fprintf(stderr, "%s:  Defective fits head in file %s.  Exiting.\n",
            dfprogname, argv[1]);
        exit(0);
    }
    if (read_head(inhead) == EOF)
    {
        fprintf(stderr, "%s:  Cannot read fits head in file %s.  Exiting.\n",
            dfprogname, argv[1]);
        exit(0);
    }

    dat_start = lseek(fid1, 0, SEEK_CUR);
    if (bitpix == 0)
    {
        fprintf(stderr, "%s:  Read failed:  BITPIX zero or unspecified.\n", 
            dfprogname);
        exit(0);
    }

    if (naxis != 2 && naxis != 3)
    {
        fprintf(stderr, 
            "%s:  NAXIS must be 3 or 4.  It is %d.  Exiting.\n", 
            dfprogname, naxis);
        exit(0);
    }

    /*  Record appropriate FITS parameters.  */

    bscale_in = b_scale;
    bzero_in  = b_zero;
    nx_in     = nx;
    ny_in     = ny;
    dat_size  = bitpix/8;
    if (dat_size < 0)
        dat_size = -dat_size;
    ppf_in = nx_in*ny_in;
    bpf_in = ppf_in*dat_size;
    for (i = 2, num_frames = 1; i < naxis; i++)
        num_frames *= nax[i];

    nx_pad  = lp2ge(nx_in);
    ny_pad  = lp2ge(ny_in);
    ppf_pad = nx_pad*ny_pad;     /* Pixels per frame */
    bpf_pad = ppf_pad*dat_size;  /* Bytes per frame  */

    if ((fid2 = open(argv[3], O_RDWR | O_CREAT | O_EXCL, 0644))
    == EOF)
    {
        fprintf(stdout, "%s:  File %s already exists.  Overwrite?  ", 
            dfprogname, argv[3]);
        if (fgets(line, LINE_LENGTH + 1, stdin), line[0] != 'y')
            exit(0);
        if ((fid2 = open(argv[3], 
            O_RDWR | O_CREAT | O_TRUNC, 0644)) == EOF)
        {
            fprintf(stderr, "%s:  Cannot open file %s.  Exiting.\n", 
                dfprogname, argv[3]);   
            exit(0);
        }
    }

     /*  Allocate arrays for a real frame to be read, a padded complex 
      *  frame, and a smeared real frame.  */

    ampl_in   = (double *)malloc(  ppf_in *sizeof(double));
    ampl_pad  = (double *)malloc(2*ppf_pad*sizeof(double));
    ampl_out  = (double *)malloc(  ppf_in *sizeof(double));

    bscale_out = 0.;
    bzero_out  = 0.;
    bs = (double *)malloc(num_frames*sizeof(double));
    nx = nx_pad;
    ny = ny_pad;

    /*  For each cycle fo the following loop, read each frame from the 
     *  input file, pad its dimensions to a power of two, smear it, 
     *  clip the padded margin, and write the result to the output file. */

    for (i = 0; i < num_frames; i++)
    {
        printf("Smearing frame %d\n", i);

        b_scale = bscale_in;
        b_zero  = bzero_in;
        lseek(fid1, dat_start + bpf_in*i, SEEK_SET);
        input_data(fid1, ampl_in, ppf_in);

        pad_image(ampl_in, ampl_pad, SOFT_PAD);
        smear_image(ampl_pad, smear_radius);
        unpad_image(ampl_pad, ampl_out);

        min_max(ampl_out, ppf_in);
        set_scale(sig_min, sig_max);
        if (b_scale > bscale_out)
            bscale_out = b_scale;
        else
            b_scale = bscale_out;
        bs[i] = b_scale;

        lseek(fid2, dat_start + bpf_in*i, SEEK_SET);
        output_data(fid2, ampl_out, ppf_in);
    }

    /*  Read each frame from the input file, re-scale it, and write it
     *  to the output file.  */

    b_zero  = 0.;
    if (bitpix > 0)
        for (i = 0; i < num_frames; i++)
        {
            if (bs[i] == bscale_out)
                break;

            printf("Rescaling frame %d\n", i);

            b_scale = bs[i];
            lseek(fid2, dat_start + bpf_in*i, SEEK_SET);
            input_data(fid2, ampl_out, ppf_in);

            b_scale = bscale_out;
            lseek(fid2, dat_start + bpf_in*i, SEEK_SET);
            output_data(fid2, ampl_out, ppf_in);
        }

    /*  Revise the scaling parameters in the FITS header.  */

    if (bitpix > 0)
    {
        if (revise_keyword_value(inhead, "BZERO",  "%e", bzero_out,  "")
        == EOF)
            insert_keyword_value(inhead, naxis + 4, "BZERO", "%e",
                bzero_out, "");
        if (revise_keyword_value(inhead, "BSCALE", "%e", bscale_out, "") 
        == EOF)
            insert_keyword_value(inhead, naxis + 5, "BSCALE", "%e", 
                bscale_out, "");
    }

    /*  Insert the revised FITS header into the pad file, and pad the
     *  datacube to a multiple of BSIZE.  */

    lseek(fid2, 0, SEEK_SET);
    put_head(fid2, inhead);
    pad_file(fid2);

    /*  Close the input and output files, and free arrays ampl_in,
     *  ampl_pad and ampl_out.  */

    close(fid1);
    close(fid2);
    free(ampl_in);
    free(ampl_pad);
    free(ampl_out);
}

/*  Function pad_image
 *  This function spatially pads the frame to whose data buffer the first
 *  argument points and writes the result into the array to which the 
 *  second argument points.  The dimensions of the input frame are specified
 *  by the external variables nx_in and ny_in.  The dimensions of the output 
 *  frame are specified by the external variables nx_pad and ny_pad.
 *  The output frame is centered on the extended domain specified by the 
 *  nx_pad and ny_pad.  Where the center is ambignuous the center-most 
 *  pixel with the least center indices is taken as the center reference.  
 *  The variable "mode" specifies how the surrounding space will be filled.  
 *  The options are null padding, cyclic padding, and a "soft" padding that
 *  fades cyclically from the appropriate compact boundary across the 
 *  extended boundary to the value of the pixel on the compact boundary
 *  on the other side of the compact image.  */

int pad_image(double ampl_in[], double ampl_pad[], int mode)
{
    int    i, j, i0, j0;
    int    index1, index2;
    int    ratio, xpad, ypad;
    double **dat_in, **dat_pad;
    double arg, alpha, beta;

    dat_in  = (double **)malloc(ny_in *sizeof(double *));
    dat_pad = (double **)malloc(ny_pad*sizeof(double *));

    j0 = (nx_pad - nx_in)/2; /* Extent of the padding on the left side.  */
    i0 = (ny_pad - ny_in)/2; /* Extent of the padding on the bottom.     */

    /*  Allocate space for 2-D-array representation.   */

    for (i = 0; i < ny_in; i++)
        dat_in[i]  = ampl_in  + nx_in *i;
    for (i = 0; i < ny_pad; i++)
        dat_pad[i] = ampl_pad + nx_pad*i;

    /*  Padding options follow:  */

    if (mode == NULL_PAD)
        for (i = 0; i < ny_pad; i++)
            for (j = 0; j < nx_pad; j++)
            {
                index1        = i - i0;
                index2        = j - j0;
                if (index1 < 0 || index1 > ny_in - 1
                ||  index2 < 0 || index2 > nx_in - 1)
                    dat_pad[i][j] = 0.0;
                else
                    dat_pad[i][j] = dat_in[index1][index2];
            }
    else if (mode == CYC_PAD)
    {
        ratio = j0/nx_in;
        if (ratio*nx_in < j0)
            ratio++;
        xpad = nx_in*ratio;

        ratio = i0/ny_in;
        if (ratio*ny_in < i0)
            ratio++;
        ypad = ny_in*ratio;

        for (i = 0; i < ny_pad; i++)
            for (j = 0; j < nx_pad; j++)
            {
                index1        = (i - i0 + ypad)%ny_in;
                index2        = (j - j0 + xpad)%nx_in;
                dat_pad[i][j] = dat_in[index1][index2];
            }
    }
    else if (mode == SOFT_PAD)
    {
        for (i = 0; i < ny_pad; i++)
            for (j = 0; j < nx_pad; j++)
            {
                index1        = i - i0;
                index2        = j - j0;
                if (index1 < 0 || index1 > ny_in - 1
                ||  index2 < 0 || index2 > nx_in - 1)
                    dat_pad[i][j] = 0.0;
                else
                    dat_pad[i][j] = dat_in[index1][index2];
            }

        for (j = nx_in; j < nx_pad; j++)
        {
            arg = PI*(double)(j - nx_in + 1)/(double)(nx_pad - nx_in + 1);
            alpha = 0.5*(cos(arg) + 1.0);
            beta  = 1.0 - alpha;
            index2 = (j + j0)%nx_pad;
            for (i = 0; i < ny_in; i++)
            {
                index1 = i + i0;
                dat_pad[index1][index2] 
                              = alpha*dat_pad[index1][nx_in + j0 - 1] 
                              + beta *dat_pad[index1][j0];
            }
        }

        for (i = ny_in; i < ny_pad; i++)
        {
            arg = PI*(double)(i - ny_in + 1)/(double)(ny_pad - ny_in + 1);
            alpha = 0.5*(cos(arg) + 1.0);
            beta  = 1.0 - alpha;
            index1 = (i + i0)%ny_pad;
            for (j = 0; j < nx_pad; j++)
            {
                index2 = (j + j0)%nx_pad;
                dat_pad[index1][index2] 
                              = alpha*dat_pad[ny_in + i0 - 1][index2] 
                              + beta *dat_pad[i0][index2];
            }
        }
    }
    else
        mode = EOF;

    free(dat_in);
    free(dat_pad);
    return mode;
}

/*  Function unpad_image
 *  This function reverses the function of the function pad_image on the 
 *  array to which its first argument points and writes the result into 
 *  the array to which its second argument points.  The dimensions of the 
 *  input frame are specified by the external variables nx_pad and ny_pad.  
 *  The dimensions of the output frame are specified by the external 
 *  variables nx_in and ny_in, supposing that these are the same as those
 *  of the input array on which the pad was initially applied.  The contents
 *  of the original image are assumed to be centered on the extended domain 
 *  specified by the nx_pad and ny_pad.  Where the center is ambignuous the 
 *  center-most pixel with the least center indices is taken as the center 
 *  reference.  */

int unpad_image(double ampl_pad[], double ampl_out[])
{
    int    i, j, i0, j0;
    double **dat_pad, **dat_out;
    double arg, alpha, beta;

    dat_out = (double **)malloc(ny_in *sizeof(double *));
    dat_pad = (double **)malloc(ny_pad*sizeof(double *));

    j0 = (nx_pad - nx_in)/2;
    i0 = (ny_pad - ny_in)/2;

    for (i = 0; i < ny_in; i++)
        dat_out[i] = ampl_out + nx_in *i;
    for (i = 0; i < ny_pad; i++)
        dat_pad[i] = ampl_pad + nx_pad*i;

    for (i = 0; i < ny_in; i++)
        for (j = 0; j < nx_in; j++)
            dat_out[i][j] = dat_pad[i + i0][j + j0];

    free(dat_out);
    free(dat_pad);
    return 1;
}

/*  Least power of 2 greater than or equal to the argument  */

int lp2ge(int arg)
{
    int i;

    for (i = 1; i < arg; i <<= 1);

    return i;
}

/* Function smear_image
 * This function applies Gaussian smearing to 2-D image the data for which
 * is contained in the buffer to which its first argument points.  The 
 * dimensions of the array are specified by the external integers nx and
 * ny.  The smearing is accomplished by Fourier-transforming the data,
 * multiplying the transform by the appropriate transform of the appropriate
 * Gaussian, and reversing the Fourier transform of the product, in 
 * accordance with the convolution theorem.  */

int smear_image(double ampl[], double smear_radius)
{
    int    i, j, k, half_nx, half_ny, index;
    double kx, ky, dkx, dky, efact, kmag2, exp_fact;
    double *ampl_real, *ampl_imag;

    efact  = 0.25*smear_radius*smear_radius;

    half_nx = nx/2;
    half_ny = ny/2;

    dkx = twopi/(nx*dx);
    dky = twopi/(ny*dy);

    ampl_real  = ampl;
    ampl_imag  = ampl + ppf_pad;

    for (i = 0; i < ppf_pad; i++)
        ampl_imag[i] = 0.;

    frame_fft(ampl,  1);

    for (i = 0; i < ny; i++)
    {
        if (i < half_ny)
            ky = i*dky;
        else
            ky = (i - ny)*dky;
        for (j = 0; j < nx; j++)
        {            
            if (j < half_nx)
                kx = j*dkx;
            else
                kx = (j - nx)*dkx;

            kmag2                = kx*kx + ky*ky;
            exp_fact             = exp(-efact*kmag2);
            ampl_real[nx*i + j] *= exp_fact;
            ampl_imag[nx*i + j] *= exp_fact;
        }            
    }

    frame_fft(ampl, -1);

    return 1;
}

/*  This function Fourier transforms the contents of the array dat  */
/*  assuming it to be arranged in a rectangular array with ny    */
/*  rows and nx columns.  */

int frame_fft(double dat[], int mode)
{
    int i, j, *params, num_params, numbits;
    double *fast_dat, *fast_imag;

    if ((numbits = count_bits(nx)) < 0)
        return WRONG_NUM_COLS;

    fast_dat = (double *)malloc(2*nx*sizeof(double));

    /*  Fast-Fourier transform the data in the horizontal direction.  */
    /*  If the operation fails, return appropriate diagnostics and exit.  */

    for (i = 0; i < ny; i++)
    {
        for (j = 0; j < nx; j++)
        {
            fast_dat[j]      = dat[i*nx + j];
            fast_dat[nx + j] = dat[(ny + i)*nx + j];
        }
        if (fft(nx, fast_dat, mode) == -1)
            return ROW_FAILURE;
        for (j = 0; j < nx; j++)
        {
            dat[i*nx + j]        = fast_dat[j];
            dat[(ny + i)*nx + j] = fast_dat[nx + j];
        }
    }

    free(fast_dat);
    fast_dat = (double *)malloc(2*ny*sizeof(double));

    /*  Fast-Fourier transform the data in the vertical direction.  */
    /*  If the operation fails, return appropriate diagnostics and exit.  */

    for (i = 0; i < nx; i++)
    {
        for (j = 0; j < ny; j++)
        {
            fast_dat[j]          = dat[j*nx + i];
            fast_dat[ny + j]     = dat[(ny + j)*nx + i];
        }
        if (fft(ny, fast_dat, mode) == -1)
            return COL_FAILURE;
        for (j = 0; j < ny; j++)
        {
            dat[j*nx + i]        = fast_dat[j];    
            dat[(ny + j)*nx + i] = fast_dat[ny + j];
        }
    }

    free(fast_dat);

    return 1;
}

int count_bits(int npts)
{
    int i, num_bits, sign;

    for (num_bits = 0, i = npts, sign = 1; i != 1; i >>= 1)
    {
        if (i & 1 == 1)
            sign = -1;
        num_bits++; 
    }

    return sign*num_bits;
}

/*  Function to fast_Fourier transform the array, dat.              */
/*  This function inputs an array of floating point pairs (complex) */
/*  of length npts (int).  The Fourier transform of dat replaces    */
/*  the original contents of dat.                                   */

#define OFFSET                 npts
#define STEP                      1

int fft(int npts, double dat[], int mode)
{
    unsigned int i, j, k_e, k_o, seg_length;
    double theta, wave_real, wave_imag, dwave_real, dwave_imag;
    double factor, tmp_real, tmp_imag, *dat_real, *dat_imag;

    /*  If npts is not a power of 2 the value -1, indicating an error  */
    /*  condition, is returned and no Fourier transform is attempted.  */

    dat_real = dat;
    dat_imag = dat + OFFSET;

    bit_rev(npts, dat);

    for (seg_length = 1; seg_length < npts; seg_length <<= 1)
    {
        theta       = pi/(double)seg_length;
        dwave_real  = sin(0.5*theta);
        dwave_real *= -2.*dwave_real;

        if (mode >= 0)
            dwave_imag =  sin(theta);
        else
            dwave_imag = -sin(theta);

        wave_real  = 1.0;
        wave_imag  = 0.0;
        for (j = 0; j < seg_length; j++)
        {
            for (k_e = STEP*j; k_e < STEP*npts; k_e += (STEP*seg_length) << 1)
            {
                k_o = k_e + STEP*seg_length;

                tmp_real       = wave_real*dat_real[k_o]
                               - wave_imag*dat_imag[k_o];
                tmp_imag       = wave_imag*dat_real[k_o]
                               + wave_real*dat_imag[k_o];

                dat_real[k_o]  = dat_real[k_e] - tmp_real;
                dat_imag[k_o]  = dat_imag[k_e] - tmp_imag;
                dat_real[k_e] += tmp_real;
                dat_imag[k_e] += tmp_imag;
            }
            wave_real += (tmp_real = wave_real)*dwave_real 
                       - wave_imag*dwave_imag;
            wave_imag += tmp_real*dwave_imag 
                       + wave_imag*dwave_real;
        }
    }

    factor = 1./sqrt((double)npts);
    for (i = 0; i < npts; i += STEP)
    {
        dat_real[i] *= factor;
        dat_imag[i] *= factor;
    }

    return 1;
}

/* Function bit_rev
 * This function rearranges the data in the array dat in the order of
 * the original indices whose bits have been reversed.  */

int bit_rev(int npts, double dat[])
{
    int i, j, arg_i, arg_j, k, kstart;
    double *dat_real, *dat_imag, tmp;

    dat_real = dat;
    dat_imag = dat + OFFSET;

    for (i = (j = 0), kstart = npts >> 1; i < npts; i++)
    {
        if (j > i)
        {
            arg_i = STEP*i;
            arg_j = STEP*j;
            tmp             = dat_real[arg_i];
            dat_real[arg_i] = dat_real[arg_j];
            dat_real[arg_j] = tmp;
            tmp             = dat_imag[arg_i];
            dat_imag[arg_i] = dat_imag[arg_j];
            dat_imag[arg_j] = tmp;
        }
        for (k = kstart; k > 0 && j >= k; k >>= 1)
            j -= k;
        j += k;
    }
    return 1;
}
