-
Notifications
You must be signed in to change notification settings - Fork 52
Fix offset date in mpas_xarray #46
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,13 +4,14 @@ | |
============================================================== | ||
Wrapper to handle importing MPAS files into xarray. | ||
|
||
Module does | ||
1. Converts MPAS "xtime" to xarray time. Time dimension is assigned via | ||
`preprocess_mpas`. | ||
2. Converts MPAS "timeSinceStartOfSim" to xarray time for MPAS fields coming from the | ||
timeSeriesStatsAM. Time dimension is assigned via `preprocess_mpas(..., timeSeriesStats=True)`. | ||
3. Provides capability to remove redundant time entries from reading of multiple netCDF | ||
datasets via `remove_repeated_time_index`. | ||
Module: | ||
1. converts MPAS "xtime" to xarray time. Time dimension is assigned via | ||
`preprocess_mpas(..., timeSeriestats=False, ...)`. | ||
2. converts MPAS "timeSinceStartOfSim" to xarray time for MPAS fields coming | ||
from the timeSeriesStatsAM. Time dimension is assigned via | ||
`preprocess_mpas(..., timeSeriesStats=True, ...)`. | ||
3. provides capability to remove redundant time entries from reading of | ||
multiple netCDF datasets via `remove_repeated_time_index`. | ||
|
||
Example Usage: | ||
|
||
|
@@ -19,8 +20,8 @@ | |
>>> ds = xarray.open_mfdataset('globalStats*nc', preprocess=preprocess_mpas) | ||
>>> ds = remove_repeated_time_index(ds) | ||
|
||
Phillip J. Wolfram | ||
12/01/2015 | ||
Phillip J. Wolfram, Xylar Asay-Davis | ||
Last modified: 11/25/2016 | ||
""" | ||
|
||
import datetime | ||
|
@@ -29,7 +30,8 @@ | |
import pandas as pd | ||
import xarray as xr | ||
|
||
def subset_variables(ds, vlist): #{{{ | ||
|
||
def subset_variables(ds, vlist): # {{{ | ||
""" | ||
Reduces an xarray dataset ds to only contain the variables in vlist. | ||
|
||
|
@@ -52,23 +54,27 @@ def subset_variables(ds, vlist): #{{{ | |
# drop spurious coordinates | ||
ds = ds.drop(dropcoords) | ||
|
||
return ds #}}} | ||
return ds # }}} | ||
|
||
|
||
def assert_valid_datetimes(datetimes, yearoffset): #{{{ | ||
def assert_valid_datetimes(datetimes, yearoffset): # {{{ | ||
""" | ||
Ensure that datatimes are compatable with xarray | ||
|
||
Phillip J. Wolfram | ||
04/20/2016 | ||
""" | ||
assert datetimes[0].year > 1678, 'ERROR: yearoffset=%s'%(yearoffset) + \ | ||
' must be large enough to ensure datetimes larger than year 1678' | ||
assert datetimes[-1].year < 2262, 'ERROR: yearoffset=%s'%(yearoffset) + \ | ||
' must be large enough to ensure datetimes larger than year 2262' | ||
assert datetimes[0].year > 1678, \ | ||
'ERROR: yearoffset={}'.format(yearoffset) + \ | ||
' must be large enough to ensure datetimes larger than year 1678' | ||
assert datetimes[-1].year < 2262, \ | ||
'ERROR: yearoffset={}'.format(yearoffset) + \ | ||
' must be large enough to ensure datetimes larger than year 2262' | ||
|
||
return # }}} | ||
|
||
return #}}} | ||
|
||
def assert_valid_selections(selvals, iselvals): #{{{ | ||
def assert_valid_selections(selvals, iselvals): # {{{ | ||
""" | ||
Ensure that dataset selections are compatable. | ||
|
||
|
@@ -83,12 +89,13 @@ def assert_valid_selections(selvals, iselvals): #{{{ | |
if (selvals is not None) and (iselvals is not None): | ||
duplicatedkeys = len(np.intersect1d(selvals.keys(), iselvals.keys())) | ||
assert len(duplicatedkeys) == 0, \ | ||
'Duplicated selection of variables %s was found! ' + \ | ||
'Selection is ambiguous.'%(duplicatedkeys) | ||
'Duplicated selection of variables {} was found! ' + \ | ||
'Selection is ambiguous.'.format(duplicatedkeys) | ||
|
||
return #}}} | ||
return # }}} | ||
|
||
def ensure_list(alist): #{{{ | ||
|
||
def ensure_list(alist): # {{{ | ||
""" | ||
Ensure that variables used as a list are actually lists. | ||
|
||
|
@@ -97,12 +104,13 @@ def ensure_list(alist): #{{{ | |
""" | ||
|
||
if isinstance(alist, str): | ||
#print 'Warning, converting %s to a list'%(alist) | ||
# print 'Warning, converting %s to a list'%(alist) | ||
alist = [alist] | ||
|
||
return alist #}}} | ||
return alist # }}} | ||
|
||
|
||
def time_series_stat_time(timestr, daysSinceStart): #{{{ | ||
def time_series_stat_time(timestr, daysSinceStart): # {{{ | ||
""" | ||
Modifies daysSinceStart for uniformity based on between differences | ||
between MPAS-O and MPAS-Seaice. | ||
|
@@ -116,11 +124,12 @@ def time_series_stat_time(timestr, daysSinceStart): #{{{ | |
else: | ||
return pd.to_timedelta(daysSinceStart.values, unit='ns') | ||
|
||
#}}} | ||
# }}} | ||
|
||
|
||
def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None, | ||
timeSeriesStats=False, timestr=None, | ||
yearoffset=1849, monthoffset=12, dayoffset=31): #{{{ | ||
timeSeriesStats=False, timestr=None, | ||
yearoffset=1849): # {{{ | ||
""" | ||
Builds correct time specification for MPAS, allowing a date offset because | ||
the time must be between 1678 and 2262 based on the xarray library. | ||
|
@@ -130,8 +139,7 @@ def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None, | |
constant over the entire model simulation. Typical time-slice experiments | ||
are run with 1850 (pre-industrial) conditions and 2000 (present-day) | ||
conditions. Hence, a default date offset is chosen to be yearoffset=1849, | ||
monthoffset=12, dayoffset=31 (day 1 of an 1850 run will be seen as | ||
Jan 1st, 1850). | ||
(year 0001 of an 1850 run will correspond with Jan 1st, 1850). | ||
|
||
Note, for use with the timeSeriesStats analysis member fields set | ||
timeSeriesStats=True and assign timestr. | ||
|
@@ -141,37 +149,37 @@ def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None, | |
timestr='time_avg_daysSinceStartOfSim' and for MPAS-Seaice | ||
timestr='timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1'. | ||
|
||
The onlyvars option reduces the dataset to only include variables in the onlyvars list. | ||
If onlyvars=None, include all dataset variables. | ||
The onlyvars option reduces the dataset to only include variables in the | ||
onlyvars list. If onlyvars=None, include all dataset variables. | ||
|
||
iselvals and selvals provide index and value-based slicing operations for individual datasets | ||
prior to their merge via xarray. | ||
iselvals is a dictionary, e.g., iselvals = {'nVertLevels': slice(0,3), 'nCells': cellIDs} | ||
iselvals and selvals provide index and value-based slicing operations for | ||
individual datasets prior to their merge via xarray. | ||
iselvals is a dictionary, e.g., iselvals = {'nVertLevels': slice(0,3), | ||
'nCells': cellIDs} | ||
selvals is a dictionary, e.g., selvals = {'cellLon': 180.0} | ||
|
||
Phillip J. Wolfram, Milena Veneziani, and Luke van Roekel | ||
09/13/2016 | ||
Phillip J. Wolfram, Milena Veneziani, Luke van Roekel and Xylar Asay-Davis | ||
11/25/2016 | ||
""" | ||
|
||
# ensure timestr is specified used when timeSeriesStats=True | ||
if timeSeriesStats: | ||
if timestr is None: | ||
assert False, 'A value for timestr is required, e.g., ' + \ | ||
'for MPAS-O: time_avg_daysSinceStartOfSim, and ' + \ | ||
'for MPAS-Seaice: timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1' | ||
'for MPAS-Seaice: ' + \ | ||
'timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1' | ||
|
||
# compute shifted datetimes | ||
daysSinceStart = ds[timestr] | ||
datetimes = [datetime.datetime(yearoffset, monthoffset, dayoffset) + x | ||
datetimes = [datetime.datetime(yearoffset+1, 1, 1) + x | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please see comment above about hard coding values. I think we only want the default to be changed to avoid hard-coded "magic" numbers. |
||
for x in time_series_stat_time(timestr, daysSinceStart)] | ||
else: | ||
time = np.array([''.join(atime).strip() for atime in ds.xtime.values]) | ||
# note the one year difference here (e.g., 12-31 of 1849 is essentially | ||
# 1850) breaks previous convention used if timeSeriesStats=False | ||
# yearoffset=1849 instead of prior 1950 | ||
# comments above can be cleaned up on transition to v1.0 | ||
datetimes = [datetime.datetime(yearoffset + int(x[:4]), int(x[5:7]), \ | ||
int(x[8:10]), int(x[11:13]), int(x[14:16]), int(x[17:19])) for x in time] | ||
datetimes = [datetime.datetime(yearoffset + int(x[:4]), int(x[5:7]), | ||
int(x[8:10]), int(x[11:13]), | ||
int(x[14:16]), int(x[17:19])) | ||
for x in time] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||
|
||
assert_valid_datetimes(datetimes, yearoffset) | ||
|
||
|
@@ -191,9 +199,10 @@ def preprocess_mpas(ds, onlyvars=None, selvals=None, iselvals=None, | |
if iselvals is not None: | ||
ds = ds.isel(**iselvals) | ||
|
||
return ds #}}} | ||
return ds # }}} | ||
|
||
|
||
def remove_repeated_time_index(ds): #{{{ | ||
def remove_repeated_time_index(ds): # {{{ | ||
""" | ||
Remove repeated times from xarray dataset. | ||
|
||
|
@@ -218,23 +227,30 @@ def remove_repeated_time_index(ds): #{{{ | |
# remove repeated indices | ||
ds = ds.isel(Time=index) | ||
|
||
return ds #}}} | ||
return ds # }}} | ||
|
||
def test_load_mpas_xarray_datasets(path): #{{{ | ||
ds = xr.open_mfdataset(path, preprocess=lambda x: preprocess_mpas(x, yearoffset=1850)) | ||
|
||
def test_load_mpas_xarray_datasets(path): # {{{ | ||
ds = xr.open_mfdataset(path, preprocess=lambda x: | ||
preprocess_mpas(x, yearoffset=1850)) | ||
ds = remove_repeated_time_index(ds) | ||
|
||
# make a simple plot from the data | ||
ds.Time.plot() | ||
plt.show() | ||
|
||
return #}}} | ||
return # }}} | ||
|
||
|
||
def test_load_mpas_xarray_timeSeriesStats_datasets(path): #{{{ | ||
ds = xr.open_mfdataset(path, preprocess=lambda x: preprocess_mpas(x, | ||
timeSeriesStats=True, timestr='timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1')) | ||
def test_load_mpas_xarray_timeSeriesStats_datasets(path): # {{{ | ||
timestr = 'timeSeriesStatsMonthly_avg_daysSinceStartOfSim_1' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could be an optional argument to the function above, which may be clearer and provide additional functionality. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure that change fit in the scope of this PR. I'm just trying to make the line PEP8 compliant, not add any new functionality. I pulled out timestr simply because the variable name is so long that there's no feasible way of keeping the line under 80 characters without assigning it to a variable. If you want to upgrade the testing of mpas_xarray, that would be a fine thing to do in another PR. |
||
ds = xr.open_mfdataset(path, preprocess=lambda x: | ||
preprocess_mpas(x, | ||
timeSeriesStats=True, | ||
timestr=timestr)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This makes the code a lot clearer, thanks for the cleanup. |
||
ds = remove_repeated_time_index(ds) | ||
ds2 = xr.open_mfdataset(path, preprocess=lambda x: preprocess_mpas(x, yearoffset=1850)) | ||
ds2 = xr.open_mfdataset(path, preprocess=lambda x: | ||
preprocess_mpas(x, yearoffset=1850)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What was the choice for separation of the lambda argument from its name? I think it looks clearer the previous way but was this change because it overflowed the column limitation, requiring indentation? If so, this change makes sense. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could not find a cleaner way to obey the indentation rules and still stay within the 80-character limit per line. It might actually be cleaner to explicitly define a function rather than using lambda. But I'm up for suggestions. This comes up as well when trying to make the analysis scripts PEP8 compliant. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree, this is certainly a gray-area and the code is clear and aesthetically pleasing so we go for it and just keep and eye out for better ways to do this in the future (if they exist). |
||
ds2 = remove_repeated_time_index(ds2) | ||
|
||
# make a simple plot from the data | ||
|
@@ -244,27 +260,29 @@ def plot_data(ds): | |
|
||
plot_data(ds) | ||
plot_data(ds2) | ||
plt.title("Curve centered around right times (b) \n "+\ | ||
plt.title("Curve centered around right times (b) \n " + | ||
"Curve shifted towards end of avg period (g)") | ||
plt.show() | ||
|
||
return #}}} | ||
return # }}} | ||
|
||
|
||
if __name__ == "__main__": | ||
from optparse import OptionParser | ||
|
||
parser = OptionParser() | ||
parser.add_option("-f", "--file", dest="inputfilename", | ||
help="files to be opened with xarray, could be of form 'output*.nc'", \ | ||
help="files to be opened with xarray, could be of form " | ||
"'output*.nc'", | ||
metavar="FILE") | ||
parser.add_option("--istimeavg", dest="istimeavg", | ||
help="option to use the preprocess for timeSeriesStatsAM fields") | ||
help="option to use the preprocess for " | ||
"timeSeriesStatsAM fields") | ||
|
||
options, args = parser.parse_args() | ||
if not options.inputfilename: | ||
parser.error("Input filename or expression ('-f') is a required input..."+\ | ||
" e.g., -f 'output*.npz'") | ||
parser.error("Input filename or expression ('-f') is a required" | ||
"input, e.g. -f 'output*.npz'") | ||
|
||
if not options.istimeavg: | ||
test_load_mpas_xarray_datasets(options.inputfilename) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We are loosing an option here in exchange of hard-coded values. Why aren't we just setting the defaults to new values? I think this is probably better long term by keeping the code more general.