/*--------------------------------------------------------------------
 *    $Id: blockmedian.c,v 1.47 2007/06/27 23:31:20 guru Exp $
 *
 *	Copyright (c) 1991-2007 by P. Wessel and W. H. F. Smith
 *	See COPYING file for copying and redistribution conditions.
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; version 2 of the License.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	Contact info: gmt.soest.hawaii.edu
 *--------------------------------------------------------------------*/

/*
   blockmedian.c
   Takes lon, lat, data, [weight] on GMT_stdin or file and writes out one value
   per cell, where cellular region is bounded by West East South North and
   cell dimensions are delta_x, delta_y.
      
   Author: 	Walter H. F. Smith
   Date:	28 June, 1988
   Modified	26 April, 1991 for gmt v2.0 by whfs smith;
		added dynamic memory allocation.
   Modified:	3 Jan 1995 by PW for gmt 3.0
   Modified:	3 May 1998 by PW for gmt 3.1
   Modified:	29 Oct 1998 by WHFS to add -Q option
   Modified:	3.3.5: 10 Jul 2000 by PW to add -L
   Version:	3.4 01-MAR-2001 by PW, Use -F instead of -N, and add -C
   Version:	4 01-MAR-2003 by PW
   Version:	4.1: 14-SEP-2005 by PW, Added enhanced -I
   Version	4.1.2: 24-MAR-2006 by PW: No longer defines global variables. Use double to hold data.
   		4-APR-2006 by PW: Added -E for L1 scale, low, and high value, and -T to set quartile
			Also implemented size_t counters to be 64-bit compatible.
*/

#define BLOCKMEDIAN

#include "gmt.h"
#include "block_subs.h"

int main (int argc, char **argv)
{

	BOOLEAN	error = FALSE, nofile = TRUE, done = FALSE, first = TRUE;
	/* Default value for go_quickly = FALSE for backward compatibility with 3.0  */

	FILE *fp = NULL;

	double	*in, out[7], wesn[4], weight, *z_tmp = NULL;

	int	i, ix, iy, fno, n_files = 0, n_args, n_req, w_col;
	int	n_expected_fields, n_fields, n_out, go_quickly = 0;

	GMT_LONG	nz, n_read, n_pitched, n_cells_filled, n_lost, index;
	GMT_LONG	first_in_cell, first_in_new_cell;
	
	size_t	n_alloc, nz_alloc;
	
	char	modifier, buffer[BUFSIZ], format[BUFSIZ];

	struct GRD_HEADER h;

	struct BLK_DATA *data;
	struct BLOCKMEDIAN_CTRL *Ctrl;
	
	void median_output (struct GRD_HEADER *h, GMT_LONG first_in_cell, GMT_LONG first_in_new_cell, double weight_sum, double *xx, double *yy, double *zz, int go_quickly, double quartile, struct BLK_DATA *data);

	argc = GMT_begin (argc, argv);

	Ctrl = (struct BLOCKMEDIAN_CTRL *) New_Blockmedian_Ctrl ();	/* Allocate and initialize a new control structure */

	GMT_grd_init (&h, argc, argv, FALSE);

	for (i = 1; i < argc; i++) {
		if (argv[i][0] == '-') {
			switch (argv[i][1]) {
              
				/* Common parameters */
                      
				case 'H':
				case 'R':
				case 'V':
				case ':':
				case 'b':
				case 'f':
				case '\0':
                                      error += GMT_parse_common_options (argv[i], &h.x_min, &h.x_max, &h.y_min, &h.y_max);
                                      break;
                              
				/* Supplemental parameters */
                              
				case 'C':
					Ctrl->C.active = TRUE;
					break;
				case 'E':
					Ctrl->E.active = TRUE;		/* Extended report with standard deviation, min, and max in cols 4-6 */
					break;
				case 'I':
					Ctrl->I.active = TRUE;
					if (GMT_getinc (&argv[i][2], &Ctrl->I.xinc, &Ctrl->I.yinc)) {
						GMT_inc_syntax ('I', 1);
						error = TRUE;
					}
					break;
				case 'L':	/* Obsolete, but backward compatibility prevails [use -f instead] */
					GMT_io.in_col_type[GMT_X] = GMT_io.out_col_type[GMT_X] = GMT_IS_LON;
					GMT_io.in_col_type[GMT_Y] = GMT_io.out_col_type[GMT_Y] = GMT_IS_LAT;
					fprintf (stderr, "%s: Option -L is obsolete (but is processed correctly).  Please use -f instead\n", GMT_program);
					break;
				case 'N':	/* Backward compatible with 3.3.6 */
				case 'F':
					Ctrl->F.active = TRUE;
					break;
				case 'Q':
					Ctrl->Q.active = TRUE;		/* Extended report with standard deviation, min, and max in cols 4-6 */
					break;
				case 'T':
					Ctrl->T.active = TRUE;		/* Extended report with standard deviation, min, and max in cols 4-6 */
					Ctrl->T.quartile = atof (&argv[i][2]);
					break;
				case 'W':
					Ctrl->W.active = TRUE;
					if ( (modifier = argv[i][2]) == 'i' || modifier == 'I')
						Ctrl->W.weighted[GMT_IN] = TRUE;
					else if (modifier == 'O' || modifier == 'o')
						Ctrl->W.weighted[GMT_OUT] = TRUE;
					else
						Ctrl->W.weighted[GMT_IN] = Ctrl->W.weighted[GMT_OUT] = TRUE;
					break;

				default:
					error = TRUE;
                                        GMT_default_error (argv[i][1]);
					break;
			}
		}
		else
			n_files++;
	}

	if (argc == 1 || GMT_give_synopsis_and_exit) {
		fprintf (stderr, "blockmedian %s - Block averaging by L1 norm\n\n", GMT_VERSION);
		fprintf (stderr, "usage: blockmedian [infile(s)] %s %s\n", GMT_I_OPT, GMT_Rgeo_OPT);
		fprintf (stderr, "\t[-C] [-E] [-F] [%s] [-Q] [-T<q>] [-V] [-W[i][o] ] [%s] [%s]\n", GMT_H_OPT, GMT_t_OPT, GMT_b_OPT);
		fprintf (stderr, "\t[%s]\n\n", GMT_f_OPT);

		if (GMT_give_synopsis_and_exit) exit (EXIT_FAILURE);

		GMT_inc_syntax ('I', 0);
		GMT_explain_option ('R');
		fprintf (stderr, "\n\tOPTIONS:\n");
		fprintf (stderr, "\t-C Output center of block and median z-value  [Default is median location (but see -Q)]\n");
		fprintf (stderr, "\t-E Extend output with L1 scale (s), low (l), and high (h) value per block, i,e,\n");
		fprintf (stderr, "\t   output (x,y,z,s,l,h[,w]) [Default outputs (x,y,z[,w]); see -W regarding w.\n");
		fprintf (stderr, "\t-F Offsets registration so block edges are on gridlines (pixel reg.).  [Default: grid reg.]\n");
		GMT_explain_option ('H');
		fprintf (stderr, "\t-Q Quicker; get median z and x,y at that z.  [Default gets median x, median y, median z.]\n");
		fprintf (stderr, "\t-T Set quartile (0 < q < 1) to report [Default is 0.5 which is the median z]\n");
		GMT_explain_option ('V');
		fprintf (stderr, "\t-W sets Weight options.\n");
		fprintf (stderr, "\t-  -Wi reads Weighted Input (4 cols: x,y,z,w) but writes only (x,y,z[,s,l,h]) Output.\n");
		fprintf (stderr, "\t   -Wo reads unWeighted Input (3 cols: x,y,z) but reports sum (x,y,z[,s,l,h],w) Output.\n");
		fprintf (stderr, "\t   -W with no modifier has both weighted Input and Output; Default is no weights used.\n");
		GMT_explain_option (':');
		GMT_explain_option ('i');
		GMT_explain_option ('n');
		fprintf (stderr, "\t   Default is 3 [6 with -E] columns (or 4 [7] if -W is set).\n");
		GMT_explain_option ('o');
		GMT_explain_option ('n');
		GMT_explain_option ('f');
		GMT_explain_option ('.');
		exit (EXIT_FAILURE);
	}

	if (Ctrl->C.active && go_quickly == 1) {
		fprintf (stderr, "%s: GMT WARNING:  -C overrides -Q\n", GMT_program);
		go_quickly = 0;
	}

	if (!project_info.region_supplied) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR:  Must specify -R option\n", GMT_program);
		error++;
	}
	if (Ctrl->T.quartile <= 0.0 || Ctrl->T.quartile >= 1.0) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR:  0 < q < 1 for quartile in -T [0.5]\n", GMT_program);
		error++;
	}
	if (Ctrl->I.xinc <= 0.0 || Ctrl->I.yinc <= 0.0) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR -I option.  Must specify positive increment(s)\n", GMT_program);
		error = TRUE;
	}
	if (GMT_io.binary[GMT_IN] && GMT_io.io_header[GMT_IN]) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR.  Binary input data cannot have header -H\n", GMT_program);
		error++;
	}
	n_req = (Ctrl->W.weighted[GMT_IN]) ? 4 : 3;
	if (GMT_io.binary[GMT_IN] && GMT_io.ncol[GMT_IN] == 0) GMT_io.ncol[GMT_IN] = n_req;
	if (GMT_io.binary[GMT_IN] && n_req > GMT_io.ncol[GMT_IN]) {
		fprintf (stderr, "%s: GMT SYNTAX ERROR.  binary input data must have at least %d columns\n", GMT_program, n_req);
		error++;
	}

	if (error) exit (EXIT_FAILURE);

	if (GMT_io.binary[GMT_IN] && gmtdefs.verbose) {
		char *type[2] = {"double", "single"};
		fprintf (stderr, "%s: Expects %d-column %s-precision binary data\n", GMT_program, GMT_io.ncol[GMT_IN], type[GMT_io.single_precision[GMT_IN]]);
	}

#ifdef SET_IO_MODE
	GMT_setmode (GMT_OUT);
#endif

	h.x_inc = Ctrl->I.xinc;
	h.y_inc = Ctrl->I.yinc;
	h.node_offset = Ctrl->F.active;
	GMT_RI_prepare (&h);	/* Ensure -R -I consistency and set nx, ny */

	go_quickly = (Ctrl->Q.active) ? 1 : 0;	
	if (Ctrl->C.active) go_quickly = 2;	/* Flag used in output calculation */
	h.xy_off = 0.5 * h.node_offset;		/* Use to calculate mean location of block */

	if (gmtdefs.verbose) {
		sprintf (format, "%%s: W: %s E: %s S: %s N: %s nx: %%d ny: %%d\n", gmtdefs.d_format, gmtdefs.d_format, gmtdefs.d_format, gmtdefs.d_format);
		fprintf (stderr, format, GMT_program, h.x_min, h.x_max, h.y_min, h.y_max, h.nx, h.ny);
	}

	n_read = n_pitched = 0;
        n_alloc = GMT_CHUNK;
        data = (struct BLK_DATA *) GMT_memory (VNULL, n_alloc, sizeof(struct BLK_DATA), GMT_program);

	GMT_set_xy_domain (wesn, &h);	/* May include some padding if gridline-registered */

	/* Read the input data  */

	n_expected_fields = (GMT_io.binary[GMT_IN]) ? GMT_io.ncol[GMT_IN] : 3 + Ctrl->W.weighted[GMT_IN];

	if (n_files > 0)
		nofile = FALSE;
	else
		n_files = 1;
	n_args = (argc > 1) ? argc : 2;

	for (fno = 1; !done && fno < n_args; fno++) {	/* Loop over input files, if any */
		if (!nofile && argv[fno][0] == '-') continue;

		if (nofile) {	/* Just read standard input */
			fp = GMT_stdin;
			done = TRUE;
#ifdef SET_IO_MODE
			GMT_setmode (GMT_IN);
#endif
		}
		else if ((fp = GMT_fopen (argv[fno], GMT_io.r_mode)) == NULL) {
			fprintf (stderr, "%s: Cannot open file %s\n", GMT_program, argv[fno]);
			continue;
		}

		if (!nofile && gmtdefs.verbose) fprintf (stderr, "%s: Working on file %s\n", GMT_program, argv[fno]);

		if (GMT_io.io_header[GMT_IN]) {
			for (i = 0; i < GMT_io.n_header_recs; i++) {
				GMT_fgets (buffer, BUFSIZ, fp);
				buffer[strlen(buffer)-1] = 0;
				if (first && GMT_io.io_header[GMT_OUT]) (Ctrl->W.weighted[GMT_OUT] && !(Ctrl->W.weighted[GMT_IN])) ? fprintf (GMT_stdout, "%s weights\n", buffer) : fprintf (GMT_stdout, "%s\n", buffer);
			}
			first = FALSE;
		}

		while ((n_fields = GMT_input (fp, &n_expected_fields, &in)) >= 0 && !(GMT_io.status & GMT_IO_EOF)) {	/* Not yet EOF */

			if (GMT_io.status & GMT_IO_MISMATCH) {
				fprintf (stderr, "%s: Mismatch between actual (%d) and expected (%d) fields near line ", GMT_program, n_fields,  n_expected_fields);
				PRINT_SIZE_T (stderr, n_read);
				fprintf (stderr, "\n");
				exit (EXIT_FAILURE);
			}

			if (GMT_is_dnan (in[2])) continue;	/* Skip when z = NaN */

			n_read++;

			if (GMT_y_is_outside (in[GMT_Y],  wesn[2], wesn[3])) continue;	/* Outside y-range */
			if (GMT_x_is_outside (&in[GMT_X], wesn[0], wesn[1])) continue;	/* Outside x-range */

			ix = GMT_x_to_i (in[GMT_X], h.x_min, h.x_inc, h.xy_off, h.nx);
			if ( ix < 0 || ix >= h.nx ) continue;
			iy = GMT_y_to_j (in[GMT_Y], h.y_min, h.y_inc, h.xy_off, h.ny);
			if ( iy < 0 || iy >= h.ny ) continue;

			index = GMT_IJ (iy, ix, h.nx);		/* 64-bit safe 1-D index */

			data[n_pitched].i = index;
			data[n_pitched].a[BLK_W] = ((Ctrl->W.weighted[GMT_IN]) ? in[3] : 1.0);
			if (!Ctrl->C.active) {
				data[n_pitched].a[BLK_X] = in[GMT_X];
				data[n_pitched].a[BLK_Y] = in[GMT_Y];
			}
			data[n_pitched].a[BLK_Z] = in[2];

			n_pitched++;
			if (n_pitched == (GMT_LONG)n_alloc) {
				n_alloc += GMT_CHUNK;
				data = (struct BLK_DATA *) GMT_memory ((void *)data, n_alloc, sizeof (struct BLK_DATA), GMT_program);
			}
		}
		if (fp != GMT_stdin) GMT_fclose(fp);

	}

	n_alloc = (size_t)n_pitched;
	data = (struct BLK_DATA *) GMT_memory ((void *)data, n_alloc, sizeof (struct BLK_DATA), GMT_program);
	
	n_lost = n_read - n_pitched;
	if (gmtdefs.verbose) fprintf(stderr,"%s: N read: %ld N used: %ld N outside_area: %ld\n", GMT_program, n_read, n_pitched, n_lost);

	/* Ready to go. */

	n_out = (Ctrl->W.weighted[GMT_OUT]) ? 4 : 3;
	if (Ctrl->E.active) n_out += 3;
	w_col = n_out - 1;	/* Weights always reported in last output column */
	nz_alloc = GMT_CHUNK;
	if (Ctrl->E.active) z_tmp = (double *) GMT_memory (VNULL, nz_alloc, sizeof (double), GMT_program);	/* tmp array for L1 scale calculation */

	/* Sort on index and Z value */

	qsort((void *)data, (size_t)n_pitched, sizeof (struct BLK_DATA), BLK_compare_index_z);

	/* Find n_in_cell and write appropriate output  */

	first_in_cell = 0;
	n_cells_filled = 0;
	while (first_in_cell < n_pitched) {
		weight = data[first_in_cell].a[BLK_W];
		if (Ctrl->E.active) z_tmp[0] = data[first_in_cell].a[BLK_Z];
		nz = 1;
		first_in_new_cell = first_in_cell + 1;
		while ( (first_in_new_cell < n_pitched) && (data[first_in_new_cell].i == data[first_in_cell].i) ) {
			weight += data[first_in_new_cell].a[BLK_W];
			if (Ctrl->E.active) {	/* Must get a temporary copy of the sorted z array */
				z_tmp[nz] = data[first_in_new_cell].a[BLK_Z];
				nz++;
				if (nz == (GMT_LONG)nz_alloc) {
					nz_alloc += GMT_CHUNK;
					z_tmp = (double *) GMT_memory ((void *)z_tmp, nz_alloc, sizeof (double), GMT_program);
				}
			}
			first_in_new_cell++;
		}
		median_output(&h, first_in_cell, first_in_new_cell, weight, &out[GMT_X], &out[GMT_Y], &out[2], go_quickly, Ctrl->T.quartile, data);
		if (Ctrl->E.active) {
			out[4] = z_tmp[0];	/* Low value */
			out[5] = z_tmp[nz-1];	/* High value */
			/* Turn z_tmp into absolute deviations from the median (out[2]) */
			if (nz > 1) {
				for (index = 0; index < nz; index++) z_tmp[index] = fabs (z_tmp[index] - out[2]);
				qsort ((void *)z_tmp, (size_t)nz, sizeof (double), GMT_comp_double_asc);
				out[3] = (nz%2) ? z_tmp[nz/2] : 0.5 * (z_tmp[(nz-1)/2] + z_tmp[nz/2]);
				out[3] *= 1.4826;	/* This will be L1 MAD-based scale */
			}
			else
				out[3] = GMT_d_NaN;
		}
		if (Ctrl->W.weighted[GMT_OUT]) out[w_col] = weight;

		GMT_output (GMT_stdout, n_out, out);

		n_cells_filled++;
		first_in_cell = first_in_new_cell;
	}

	if (gmtdefs.verbose) fprintf(stderr,"%s: N_cells_filled: %ld\n", GMT_program, n_cells_filled);

	GMT_free ((void *)data);
	if (Ctrl->E.active) GMT_free ((void *)z_tmp);

        GMT_end (argc, argv);

	Free_Blockmedian_Ctrl (Ctrl);	/* Deallocate control structure */

	exit (EXIT_SUCCESS);
}

void median_output (struct GRD_HEADER *h, GMT_LONG first_in_cell, GMT_LONG first_in_new_cell, double weight_sum, double *xx, double *yy, double *zz, int go_quickly, double quartile, struct BLK_DATA *data)
{
	double	weight_half, weight_count;
	GMT_LONG index, n_in_cell, index1;

	weight_half  = quartile * weight_sum;	/* Normally, quartile will be 0.5 hence the name of the variable */
	n_in_cell = first_in_new_cell - first_in_cell;

	/* Data are already sorted on z  */

	/* Determine the point where we hit the desired quartile */
	
	index = first_in_cell;
	weight_count = data[first_in_cell].a[BLK_W];
	while (weight_count < weight_half) {
		index++;
		weight_count += data[index].a[BLK_W];
	}
	if ( weight_count == weight_half ) {
		index1 = index + 1;
		*xx = 0.5 * (data[index].a[BLK_X] + data[index1].a[BLK_X]);
		*yy = 0.5 * (data[index].a[BLK_Y] + data[index1].a[BLK_Y]);
		*zz = 0.5 * (data[index].a[BLK_Z] + data[index1].a[BLK_Z]);
	}
	else {
		*xx = data[index].a[BLK_X];
		*yy = data[index].a[BLK_Y];
		*zz = data[index].a[BLK_Z];
	}

	/* Now get median x and median y if quick x and quick y not wanted:  */

	weight_half = 0.5 * weight_sum;	/* We want the median location */
	if (go_quickly == 1) return;

	if (go_quickly == 2) {	/* Get center of block */
		int i, j;
		j = (int)(data[index].i / (GMT_LONG)h->nx);
		i = (int)(data[index].i % (GMT_LONG)h->nx);
		*xx = GMT_i_to_x (i, h->x_min, h->x_max, h->x_inc, h->xy_off, h->nx);
		*yy = GMT_j_to_y (j, h->y_min, h->y_max, h->y_inc, h->xy_off, h->ny);
		return;
	}
	
	/* We get here when we need median x,y locations */
	
	if (n_in_cell > 2) qsort((void *)&data[first_in_cell], (size_t)n_in_cell, sizeof (struct BLK_DATA), BLK_compare_x);
	index = first_in_cell;
	weight_count = data[first_in_cell].a[BLK_W];
	while (weight_count < weight_half) {
		index++;
		weight_count += data[index].a[BLK_W];
	}
	if ( weight_count == weight_half )
		*xx = 0.5 * (data[index].a[BLK_X] + data[index + 1].a[BLK_X]);
	else
		*xx = data[index].a[BLK_X];

	if (n_in_cell > 2) qsort((void *)&data[first_in_cell], (size_t)n_in_cell, sizeof (struct BLK_DATA), BLK_compare_y);
	index = first_in_cell;
	weight_count = data[first_in_cell].a[BLK_W];
	while (weight_count < weight_half) {
		index++;
		weight_count += data[index].a[BLK_W];
	}
	if ( weight_count == weight_half )
		*yy = 0.5 * (data[index].a[BLK_Y] + data[index + 1].a[BLK_Y]);
	else
		*yy = data[index].a[BLK_Y];
}

#include "block_subs.c"
