MPAS-Dev · pwolfram · Dec 1, 2016 · Nov 25, 2016 · Nov 25, 2016 · pwolfram
diff --git a/mpas_analysis/shared/mpas_xarray/mpas_xarray.py b/mpas_analysis/shared/mpas_xarray/mpas_xarray.py
@@ -4,13 +4,14 @@
 ==============================================================
 Wrapper to handle importing MPAS files into xarray.
 
- Module does
- 1. Converts MPAS "xtime" to xarray time.  Time dimension is assigned via
-    `preprocess_mpas`.
- 2. Converts MPAS "timeSinceStartOfSim" to xarray time for MPAS fields coming from the
-    timeSeriesStatsAM.  Time dimension is assigned via `preprocess_mpas(..., timeSeriesStats=True)`.
- 3. Provides capability to remove redundant time entries from reading of multiple netCDF
-    datasets via `remove_repeated_time_index`.
+ Module:
+ 1. converts MPAS "xtime" to xarray time.  Time dimension is assigned via
+    `preprocess_mpas(..., timeSeriestats=False, ...)`.
+ 2. converts MPAS "timeSinceStartOfSim" to xarray time for MPAS fields coming
+    from the timeSeriesStatsAM.  Time dimension is assigned via
+    `preprocess_mpas(..., timeSeriesStats=True, ...)`.
+ 3. provides capability to remove redundant time entries from reading of
+    multiple netCDF datasets via `remove_repeated_time_index`.
 
  Example Usage:
 
@@ -19,8 +20,8 @@
 >>> ds = xarray.open_mfdataset('globalStats*nc', preprocess=preprocess_mpas)
 >>> ds = remove_repeated_time_index(ds)
 
-Phillip J. Wolfram
-12/01/2015
+Phillip J. Wolfram, Xylar Asay-Davis
+Last modified: 11/25/2016
 """
 
 import datetime
@@ -29,7 +30,8 @@
 import pandas as pd
 import xarray as xr
 
-def subset_variables(ds, vlist): #{{{
+
+def subset_variables(ds, vlist):  # {{{
     """
     Reduces an xarray dataset ds to only contain the variables in vlist.
 
@@ -52,23 +54,27 @@ def subset_variables(ds, vlist): #{{{
     # drop spurious coordinates
     ds = ds.drop(dropcoords)
 
-    return ds #}}}
+    return ds  # }}}
+
 
-def assert_valid_datetimes(datetimes, yearoffset): #{{{
+def assert_valid_datetimes(datetimes, yearoffset):  # {{{
     """
     Ensure that datatimes are compatable with xarray
 
     Phillip J. Wolfram
     04/20/2016
     """
-    assert datetimes[0].year > 1678, 'ERROR: yearoffset=%s'%(yearoffset) + \
-            ' must be large enough to ensure datetimes larger than year 1678'
-    assert datetimes[-1].year < 2262, 'ERROR: yearoffset=%s'%(yearoffset) + \
-            ' must be large enough to ensure datetimes larger than year 2262'
+    assert datetimes[0].year > 1678, \
+        'ERROR: yearoffset={}'.format(yearoffset) + \
+        ' must be large enough to ensure datetimes larger than year 1678'
+    assert datetimes[-1].year < 2262, \
+        'ERROR: yearoffset={}'.format(yearoffset) + \
+        ' must be large enough to ensure datetimes larger than year 2262'
+
+    return  # }}}
 
-    return #}}}
 
-def assert_valid_selections(selvals, iselvals): #{{{
+def assert_valid_selections(selvals, iselvals):  # {{{
     """
     Ensure that dataset selections are compatable.
 
@@ -83,12 +89,13 @@ def assert_valid_selections(selvals, iselvals): #{{{
     if (selvals is not None) and (iselvals is not None):
         duplicatedkeys = len(np.intersect1d(selvals.keys(), iselvals.keys()))
         assert len(duplicatedkeys) == 0, \
-                'Duplicated selection of variables %s was found!  ' + \
-                'Selection is ambiguous.'%(duplicatedkeys)
+            'Duplicated selection of variables {} was found!  ' + \
+            'Selection is ambiguous.'.format(duplicatedkeys)
 
-    return #}}}
+    return  # }}}
 
-def ensure_list(alist): #{{{
+
+def ensure_list(alist):  # {{{
     """
     Ensure that variables used as a list are actually lists.
 
@@ -97,12 +104,13 @@ def ensure_list(alist): #{{{
     """
 
     if isinstance(alist, str):
-        #print 'Warning, converting %s to a list'%(alist)
+        # print 'Warning, converting %s to a list'%(alist)
         alist = [alist]
 
-    return alist #}}}
+    return alist  # }}}
+
 
-def time_series_stat_time(timestr, daysSinceStart): #{{{
+def time_series_stat_time(timestr, daysSinceStart):  # {{{
     """
     Modifies daysSinceStart for uniformity based on between differences
     between MPAS-O and MPAS-Seaice.
@@ -116,11 +124,12 @@ def time_series_stat_time(timestr, daysSinceStart): #{{{
     else:
         return pd.to_timedelta(daysSinceStart.values, unit='ns')
 
-    #}}}
+    # }}}
+
 
 def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None,
-        timeSeriesStats=False, timestr=None,
-        yearoffset=1849, monthoffset=12, dayoffset=31): #{{{
+                    timeSeriesStats=False, timestr=None,
+                    yearoffset=1849):  # {{{
     """
     Builds correct time specification for MPAS, allowing a date offset because
     the time must be between 1678 and 2262 based on the xarray library.
@@ -130,8 +139,7 @@ def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None,
     constant over the entire model simulation. Typical time-slice experiments
     are run with 1850 (pre-industrial) conditions and 2000 (present-day)
     conditions. Hence, a default date offset is chosen to be yearoffset=1849,
-    monthoffset=12, dayoffset=31 (day 1 of an 1850 run will be seen as
-    Jan 1st, 1850).
+    (year 0001 of an 1850 run will correspond with Jan 1st, 1850).
 
     Note, for use with the timeSeriesStats analysis member fields set
     timeSeriesStats=True and assign timestr.
@@ -141,37 +149,37 @@ def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None,
     timestr='time_avg_daysSinceStartOfSim' and for MPAS-Seaice
     timestr='timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1'.
 
-    The onlyvars option reduces the dataset to only include variables in the onlyvars list.
-    If onlyvars=None, include all dataset variables.
+    The onlyvars option reduces the dataset to only include variables in the
+    onlyvars list. If onlyvars=None, include all dataset variables.
 
-    iselvals and selvals provide index and value-based slicing operations for individual datasets
-    prior to their merge via xarray.
-    iselvals is a dictionary, e.g., iselvals = {'nVertLevels': slice(0,3), 'nCells': cellIDs}
+    iselvals and selvals provide index and value-based slicing operations for
+    individual datasets prior to their merge via xarray.
+    iselvals is a dictionary, e.g., iselvals = {'nVertLevels': slice(0,3),
+                                                'nCells': cellIDs}
     selvals is a dictionary, e.g., selvals = {'cellLon': 180.0}
 
-    Phillip J. Wolfram, Milena Veneziani, and Luke van Roekel
-    09/13/2016
+    Phillip J. Wolfram, Milena Veneziani, Luke van Roekel and Xylar Asay-Davis
+    11/25/2016
     """
 
     # ensure timestr is specified used when timeSeriesStats=True
     if timeSeriesStats:
         if timestr is None:
             assert False, 'A value for timestr is required, e.g., ' + \
                     'for MPAS-O: time_avg_daysSinceStartOfSim, and ' + \
-                    'for MPAS-Seaice: timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1'
+                    'for MPAS-Seaice: ' + \
+                    'timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1'
 
         # compute shifted datetimes
         daysSinceStart = ds[timestr]
-        datetimes = [datetime.datetime(yearoffset, monthoffset, dayoffset) + x
+        datetimes = [datetime.datetime(yearoffset+1, 1, 1) + x
                      for x in time_series_stat_time(timestr, daysSinceStart)]
     else:
         time = np.array([''.join(atime).strip() for atime in ds.xtime.values])
-        # note the one year difference here (e.g., 12-31 of 1849 is essentially
-        # 1850) breaks previous convention used if timeSeriesStats=False
-        # yearoffset=1849 instead of prior 1950
-        # comments above can be cleaned up on transition to v1.0
-        datetimes = [datetime.datetime(yearoffset + int(x[:4]), int(x[5:7]), \
-                int(x[8:10]), int(x[11:13]), int(x[14:16]), int(x[17:19])) for x in time]
+        datetimes = [datetime.datetime(yearoffset + int(x[:4]), int(x[5:7]),
+                                       int(x[8:10]), int(x[11:13]),
+                                       int(x[14:16]), int(x[17:19]))
+                     for x in time]
 
     assert_valid_datetimes(datetimes, yearoffset)
 
@@ -191,9 +199,10 @@ def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None,
     if iselvals is not None:
         ds = ds.isel(**iselvals)
 
-    return ds #}}}
+    return ds  # }}}
+
 
-def remove_repeated_time_index(ds): #{{{
+def remove_repeated_time_index(ds):  # {{{
     """
     Remove repeated times from xarray dataset.
 
@@ -218,23 +227,30 @@ def remove_repeated_time_index(ds): #{{{
     # remove repeated indices
     ds = ds.isel(Time=index)
 
-    return ds #}}}
+    return ds  # }}}
 
-def test_load_mpas_xarray_datasets(path): #{{{
-    ds = xr.open_mfdataset(path, preprocess=lambda x: preprocess_mpas(x, yearoffset=1850))
+
+def test_load_mpas_xarray_datasets(path):  # {{{
+    ds = xr.open_mfdataset(path, preprocess=lambda x:
+                           preprocess_mpas(x, yearoffset=1850))
     ds = remove_repeated_time_index(ds)
 
     # make a simple plot from the data
     ds.Time.plot()
     plt.show()
 
-    return #}}}
+    return  # }}}
+
 
-def test_load_mpas_xarray_timeSeriesStats_datasets(path): #{{{
-    ds = xr.open_mfdataset(path, preprocess=lambda x: preprocess_mpas(x,
-        timeSeriesStats=True, timestr='timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1'))
+def test_load_mpas_xarray_timeSeriesStats_datasets(path):  # {{{
+    timestr = 'timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1'
+    ds = xr.open_mfdataset(path, preprocess=lambda x:
+                           preprocess_mpas(x,
+                                           timeSeriesStats=True,
+                                           timestr=timestr))
     ds = remove_repeated_time_index(ds)
-    ds2 = xr.open_mfdataset(path, preprocess=lambda x: preprocess_mpas(x, yearoffset=1850))
+    ds2 = xr.open_mfdataset(path, preprocess=lambda x:
+                            preprocess_mpas(x, yearoffset=1850))
     ds2 = remove_repeated_time_index(ds2)
 
     # make a simple plot from the data
@@ -244,27 +260,29 @@ def plot_data(ds):
 
     plot_data(ds)
     plot_data(ds2)
-    plt.title("Curve centered around right times (b) \n "+\
+    plt.title("Curve centered around right times (b) \n " +
               "Curve shifted towards end of avg period (g)")
     plt.show()
 
-    return #}}}
+    return  # }}}
 
 
 if __name__ == "__main__":
     from optparse import OptionParser
 
     parser = OptionParser()
     parser.add_option("-f", "--file", dest="inputfilename",
-                      help="files to be opened with xarray, could be of form 'output*.nc'", \
+                      help="files to be opened with xarray, could be of form "
+                      "'output*.nc'",
                       metavar="FILE")
     parser.add_option("--istimeavg", dest="istimeavg",
-                      help="option to use the preprocess for timeSeriesStatsAM fields")
+                      help="option to use the preprocess for "
+                      "timeSeriesStatsAM fields")
 
     options, args = parser.parse_args()
     if not options.inputfilename:
-        parser.error("Input filename or expression ('-f') is a required input..."+\
-                " e.g., -f 'output*.npz'")
+        parser.error("Input filename or expression ('-f') is a required"
+                     "input, e.g. -f 'output*.npz'")
 
     if not options.istimeavg:
         test_load_mpas_xarray_datasets(options.inputfilename)