#!/usr/bin/python

"""
copy_dir_files_to_erddap.py

  - copies all the files of a directory to the erddap data server.
    ECN's ftp server doubles as the erddap data server for those data sets
    that include a datasets.xml data-set specification on the erddap server
    (data sets on the ftp server that do not have a datasets.xml are not
    discoverable by the erddap server)
  - if file is an HDF file, converts it to NetCDF-4 (netcdf-4 is preferred
    for ERDDAP)
  - if the data in the file is in mercator projection, converts it to
    geographic projection (geographic displays better on ERDDAP than mercator)
  - conversion occurs in a dedicated processing dirctory 
            /disks/data563/scripts/utilities/erddap_tmp_ts/
    so that ERDDAP does not "discover" a file while it is in the process
    of being converted (which messes up ERDDAP)
  - after conversion, this script copies the .nc4 file from the dedicated
    processing directory to the ECN ftp directory, where this script renames
    it as .nc.  Again, this is to prevent ERDDAP from "discovering" files
    while they are in the process of being copied (which messes up ERDDAP) 
  - all the hdf files of the input directory are converted and copied to the
    ftp/erddap directory.  Therefore,
         THIS SCRIPT IS USED FOR COPYING ENTIRE TIME SERIES OF FILES
    (use a different script for copying NRT files to the ftp/erddap server)
  - conversion to netcdf4 and reprojection to geographic is performed with the
    CW Utilities Library

N.B.!!!  Unlike copy_to_ftp.sh (which copies NRT files), this script
         DOES NOT check for sensor/product or region to determine whether
         the product is an ERDDAP product or whether region is EC.
         Rather, this script simply reads all files in a directory and
         reprojects, converts format to nc4, and renames to nc.
         THEREFORE, ONLY USE THIS SCRIPT FOR DIRECTORIES THAT ARE INTENDED
         FOR ERDDAP AND ARE 'EC' REGION!

usage: ./copy_dir_files_to_erddap.py --from_dir [source_dir] --to_dir [destination_dir --geogr_geomaster [dir location of geomaster + geomaster filename]

Ron Vogel, SMRC for NOAA CoastWatch
Dec 30, 2016 - original script: copy_dir_files_to_newdir.py

Oct 9, 2019
- modified from copy_dir_files_to_newdir.py for handling the data
  modifications for ERDDAP (netcdf format, geographic projection, file renaming)

  As of now, ECN's daily composite data are not included on ERDDAP. Only
  intervals higher than daily (3-day, 7day, monthly, etc) are included.
  IF DAILY IS EVENTUALLY INCLUDED, it is better for ERDDAP to have daily
  data split into yearly directories, like this
      /data/aftp/socd1/ecn/data/avhrr/sst/daily/ec/2008/<datafiles.nc>
      /data/aftp/socd1/ecn/data/avhrr/sst/daily/ec/2009/<datafiles.nc>
      etc.
  (The reason for this is that daily data has _many_ files, and ERDDAP
  handles display poorly if a data directory contains more than 1000's of
  files.)
  If yearly directories are needed, generate the directories manually on the
  FTP server, and then MODIFY this script to read the year from the filename
  and place the file in the correct yearly subdirectory.

Aug 17, 2021
- added geographic-projection geomaster file as required input argument.
  Geographic-projection geomaster files are needed to ensure output for
  ERDDAP maintains consistent lat/lon values across CW-Utilities versions.
    Note: the generic projection command 'cwregister2 --proj=geo' 
          CHANGED its computation between Utils v.3.5.1 and 3.6.1, 
          so lat/lons were different in the same data product's time series 
          when hdf files were made with different Utils version 

          Inconsistent lat/lon values causes ERDDAP to not display the 
          full time series (the files with out-of-family lat/lons are not 
          displayed)
"""

import sys
#import re
import os
import shutil
import commands
import argparse


def export_ncdf(hdf_source_file,nc_destination_dir,geogr_proj_dirfile):

  # function to
  # 1) convert HDF file to NetCDF-4 using CW-Utilities cwexport
  #    (conversion occurs in the dedicated ERDDAP processing dirctory)
  # 2) if file is mercator projection, reproject for geographic projection
  # 3) copy the file to the ERDDAP/FTP server directory
  # 4) rename the file from .nc4 to .nc at the ERDDAP/FTP server directory

  # input parameters:  HDF filename with source directory path
  #                    netCDF destination directory path

  hdf_file = os.path.basename(hdf_source_file)
  hdf_dir = os.path.dirname(hdf_source_file)
  erddap_tmp_ts = '/disks/data563/scripts/utilities/erddap_tmp_ts'
  
  # create short file name without extension
  i = hdf_file.find('hdf')
  hdf_file_short = hdf_file[0:i]
  
  # add .nc4 to create netcdf filename
  hdf_geog_name =  hdf_file_short + 'hdf_geog'
  nc_name = hdf_file_short + 'nc4'
  nc_rename = hdf_file_short + 'nc'

  # create netCDF path + file names
  hdf_geog_path = os.path.join(erddap_tmp_ts,hdf_geog_name)
  nc_tmp_path = os.path.join(erddap_tmp_ts,nc_name)
  nc_dest_path = os.path.join(nc_destination_dir,nc_name)
  nc_rename_path = os.path.join(nc_destination_dir,nc_rename)

  print 'Converting for erddap: ' + hdf_file

  # ck if hdf file's data is in mercator projection
  cmd1 = 'hdatt --name "projection" ' + hdf_source_file
  (status, output) = commands.getstatusoutput(cmd1) 

  if output == 'Mercator' or output == 'mercator':
     # run cwutils cwregister2 to convert mercator projection to geographic
     # (can't use --proj=geo because 'proj' computation may change in future
     # CW-Utils versions, which would make a data product's lat/lon values
     # inconsistent across the time series (which ERDDAP can't display) )
     #cmd2 = 'cwregister2 --proj=geo ' + hdf_source_file + ' ' + hdf_geog_path
     cmd2 = 'cwregister2 --master=geogr_proj_dirfile ' + hdf_source_file + ' ' + hdf_geog_path
     (status, output) = commands.getstatusoutput(cmd2)
     if status:
        print 'ERROR CWREGISTER2: HDF file failed to convert from mercator to geographic: ' + hdf_source_file
        sys.stderr.write(output)  # writes cwregister2 error message
        print '\n'

     # run cwutils cwexport to convert hdf file to netcdf4
     # write DCS metadta for all files except PODAAC MUR
     # - first check if "data_source" metadata field exists in HDF global
     #   attributes, and if so, do _not_ write DCS metadata (i.e. MUR)
     #   (cwexport --dcs option searches for "satellite" metadata field but
     #   "satellite" is missing when "data_source" is present in the HDF file,
     #   so --dcs option will not work)
     # - if export fails, writes cwutils error messsage
     cmd3 = 'hdatt --name "data_source" ' + hdf_geog_path
     (status, output) = commands.getstatusoutput(cmd3) # if no data_source,
                                                       # command fails &
                                                       # status is non-zero
     if status:    # status is true (non-zero), data_source not present,
                   # so write DCS metadata
        cmd4 = 'cwexport --dcs --cw ' + hdf_geog_path + ' ' + nc_tmp_path
        (status, output) = commands.getstatusoutput(cmd4)
        if status:
           print 'ERROR CWEXPORT: HDF file failed to convert to netcdf: ' + hdf_geog_path
           sys.stderr.write(output)  # writes cwexport error message
           print '\n'

     else:         # data_source present, do not write DCS metadata (i.e. MUR)
        cmd4 = 'cwexport --cw ' + hdf_geog_path + ' ' + nc_tmp_path
        (status, output) = commands.getstatusoutput(cmd4)
        if status:
           print 'ERROR CWEXPORT: HDF file failed to convert to netcdf: ' + hdf_geog_path
           sys.stderr.write(output)  # writes cwexport error message
           print '\n'


  else:     # HDF file not mercator, don't reproject, just convert to netcdf

     cmd3 = 'hdatt --name "data_source" ' + hdf_source_path
     (status, output) = commands.getstatusoutput(cmd3) # if no data_source,
                                                       # command fails &
                                                       # status is non-zero
     if status:    # status is true (non-zero), data_source not present,
                   # so write DCS metadata
        cmd4 = 'cwexport --dcs --cw ' + hdf_source_path + ' ' + nc_tmp_path
        (status, output) = commands.getstatusoutput(cmd4)
        if status:
           print 'ERROR CWEXPORT: HDF file failed to convert to netcdf: ' + hdf_source_path
           sys.stderr.write(output)  # writes cwexport error message
           print '\n'

     else:         # data_source present, do not write DCS metadata (i.e. MUR)
        cmd4 = 'cwexport --cw ' + hdf_source_path + ' ' + nc_tmp_path
        (status, output) = commands.getstatusoutput(cmd4)
        if status:
           print 'ERROR CWEXPORT: HDF file failed to convert to netcdf: ' + hdf_source_path
           sys.stderr.write(output)  # writes cwexport error message
           print '\n'

  shutil.move(nc_tmp_path,nc_dest_path)
  shutil.move(nc_dest_path,nc_rename_path)
  os.remove(hdf_geog_path)


def main():

  # get from_directory and to_directory from the arguments
  parser = argparse.ArgumentParser(description='Copies all the files of one directory to another directory')
  parser.add_argument('--from_dir',help='Directory to copy from',required=True)
  parser.add_argument('--to_dir',help='Directory to copy to',required=True)
  parser.add_argument('--geogr_geomaster',help='Geographic projection geomaster file for ERDDAP',required=True)
  args = parser.parse_args()

  print ("From directory: %s" % args.from_dir )
  print ("To directory: %s" % args.to_dir )
  print ("Geographic geomaster: %s" % args.geogr_geomaster )

  source_dir = args.from_dir
  destination_dir = args.to_dir
  geogr_proj_file = args.geogr_geomaster

  # determine list of files to copy
  files = os.listdir(source_dir)

  for file in files:

    # add path names to files  
    source_file = os.path.join(source_dir,file)
    destination_file = os.path.join(destination_dir,file)

    # copy each HDF & PNG file to destination
    #  - NOTE: for AVHRR SST, HDF data was updated to include a land mask
    #          so HDF file must be copied to the ERDDAP/FTP directory
    #    MAY NEED TO REMOVE THIS COPY FOR PRODUCTS WHERE HDF & PNG _DO NOT_
    #    NEED TO BE COPIED TO ERDDAP/FTP DIRECTORY
    shutil.copy(source_file,destination_file)

    # if file is HDF,
    #   (1) convert it to netCDF-4 at the the dedicated ERDDAP
    #       processing dirctory /utilities/erddap_tmp_ts/
    #   (2) then copy the netCDF file to the destination ERDDAP/FTP directory
    #   (3) then rename it to .nc for ERDDAP discovery (as specified in
    #       datasets.xml)
    if file.endswith('hdf'):
      export_ncdf(source_file,destination_dir,geogr_proj_file)

if __name__ == "__main__":
  main()

