#!/data/home004/ronald.vogel/.conda/envs/rv_env_main/bin/python3
#calc_CBP_36sta_median_JunJul_20162019.py
#
#calculates monthly median chlorophyll from CBP insitu data for 
#36 mainstem stations individual stations for 2016 June & July
#and 2019 June & July
#(to compare with high satel chl in summer 2019)
#(note: CBP chl is LOW in 2016 compared to 2019)
#
#input:  CBP water quality monitoring yearly file (all stations in one
#        yearly file) as CSV file
#        e.g. CBP_WaterQualityStationMainstem_CHLOR_2012.csv
#        located in: 
#        /disks/data563/rvogel/chlor_cb_viirs-sq_timeseries_reports
#
#output: text file containing monthly median chl for individual 36 mainstem
#        stations for 2016 June & July and 2019 June & July
#
#usage:  ./calc_CBP_36sta_median_JunJul_20162019.py
#
#this code was based on: calc_CBP_all-bay_monthly_median_ts.py
#
#Ron Vogel, UMD/ESSIC
#Apr 12, 2023

import os
from datetime import datetime
from numpy import nanmedian  # calculates median ignoring NaN values
# modules not needed:
#import sys
#import re
#import shutil
#import commands
#import csv

# directory containing CBP water quality monitoring data files
cbp_dir = '/disks/data563/rvogel/chlor_cb_viirs-sq_timeseries_reports'

file_yr = [2016, 2019]

sta_list = ['CB2.1', 'CB2.2', 'CB3.1', 'CB3.2', 'CB3.3E', 'CB3.3C', 'CB3.3W', 'CB4.1E', 'CB4.1C', 'CB4.1W', 'CB4.2E', 'CB4.2C', 'CB4.2W', 'CB4.3E', 'CB4.3C', 'CB4.3W', 'CB5.1', 'CB5.2', 'CB5.3', 'CB5.4', 'CB5.4W', 'CB5.5', 'CB6.1', 'CB6.2', 'CB6.3', 'CB6.4', 'CB7.1', 'CB7.1N', 'CB7.2', 'CB7.2E', 'CB7.3', 'CB7.3E', 'CB7.4', 'CB7.4N', 'CB8.1', 'CB8.1E']


for i_yr in range(0,1+1):
#for i_yr in range(0,1):

   # open output file for appending (i.e. add new lines to an existing file)
   ts_output_file = open('CBP_chl_monthly_median_36stations_JunJul_' + str(file_yr[i_yr]) + '.txt', 'a')

   # get CBP filenames
   cbp_file = 'CBP_WaterQualityStation_49mainstem_' + str(file_yr[i_yr]) + '.csv'
   cbp_path_file = os.path.join(cbp_dir,cbp_file)

   print ('Now reading CBP data for year: ' + str(file_yr[i_yr]))

   # open file and read all lines
   cbp_records = open(cbp_path_file, 'r')
   cbp_records_list = cbp_records.readlines()
   # get total number of records: 'Total_Records' (last line of CBP file)
   total_records = cbp_records_list[-1]
   last_line_to_read = int(total_records.split()[-1])

   print ('  number of lines in yearly file: ' + str(last_line_to_read))

   # loop thru all stations
   for i_sta in range(0,35+1):

         station = sta_list[i_sta]

         #print ('Now getting chl for station: ' + station)

         # need lists to hold insitu values for this station, one for each month
         chl_allsta_month_6 = []
         chl_allsta_month_7 = []

         # read each line to get CBP monitoring sample values
         #      BUT do not read first line (header)
         #      AND only read up to and including last sample (last line of file
         #          contains "Total_Records:" )
         for cbp_indx in range(1,last_line_to_read+1):
            cbp_info = cbp_records_list[cbp_indx].split(',')
            cbp_station = cbp_info[0].replace('"','')   # removing quotes
            cbp_date    = cbp_info[8]
            cbp_layer   = cbp_info[14].replace('"','').strip()
            cbp_chl     = cbp_info[19]

            cbp_date_object = datetime.strptime(cbp_date, '%m/%d/%Y')
            #cbp_yr  = cbp_date_object.strftime('%Y')
            #cbp_doy = cbp_date_object.strftime('%j')
            cbp_day = cbp_date_object.strftime('%d')
            cbp_mon = cbp_date_object.strftime('%-m') # %m = month w/leading 0
                                                      # %-m = month w/o leading 0

            # only use surface "S" layer
            if cbp_layer != 'S':
               continue

            if cbp_mon != '6' and cbp_mon != '7':
               continue

            if cbp_station == station:

               #TEST
               #print ('Station: ' + cbp_station + ' Month: ' + cbp_mon + ' Day: ' + cbp_day + ' Chl: ' + cbp_chl)

               if cbp_chl == '':
                  cbp_chl = 'nan'

               if cbp_mon == '6':
                  chl_allsta_month_6.append(float(cbp_chl))
               elif cbp_mon == '7':
                  chl_allsta_month_7.append(float(cbp_chl))

         # calc median for each list and write to output file
         mon_6_median = nanmedian(chl_allsta_month_6)
         mon_7_median = nanmedian(chl_allsta_month_7)

         # write median for this station for each month to output file
         # separated by commas
         ts_output_file.write(station + ': ' + ', '.join(['{:.3f}'.format(x) for x in [mon_6_median, mon_7_median]]) + '\n')

   cbp_records.close()
   ts_output_file.close()

