Skip to content

Commit 81e5593

Browse files
Add get_srml iotools function; deprecate read_srml_month_from_solardat (#1779)
* Initial commit * Add exception for non-existing file names * Update error handling * Deprecate read_srml_month_from_solardat * Add fail_on_pvlib_version to tests * Apply suggestions from code review Co-authored-by: Kevin Anderson <[email protected]> * Address code review * Conform to code review * Add warning message * Use assert_frame_equal * Update pvlib/tests/iotools/test_srml.py Co-authored-by: Kevin Anderson <[email protected]> --------- Co-authored-by: Kevin Anderson <[email protected]>
1 parent fa9dc9b commit 81e5593

File tree

6 files changed

+172
-12
lines changed

6 files changed

+172
-12
lines changed

docs/sphinx/source/reference/iotools.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ of sources and file formats relevant to solar energy modeling.
1717
iotools.parse_epw
1818
iotools.read_srml
1919
iotools.read_srml_month_from_solardat
20+
iotools.get_srml
2021
iotools.read_surfrad
2122
iotools.read_midc
2223
iotools.read_midc_raw_data_from_nrel

docs/sphinx/source/whatsnew/v0.10.0.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ Enhancements
3636
the same Python type as the `effective_irradiance` and `temp_cell` parameters. (:issue:`1626`, :pull:`1700`)
3737
* Added `map_variables` parameter to :py:func:`pvlib.iotools.read_srml`
3838
and :py:func:`pvlib.iotools.read_srml_month_from_solardat` (:pull:`1773`)
39+
* Added :func:`pvlib.iotools.get_srml` that is similar to
40+
:func:`pvlib.iotools.read_srml_month_from_solardat` but is able to fetch multiple months
41+
of data using the `start` and `end` parameters.
42+
(:pull:`1779`)
3943
* Allow passing keyword arguments to :py:func:`scipy:scipy.optimize.brentq` and
4044
:py:func:`scipy:scipy.optimize.newton` solvers in
4145
:py:func:`~pvlib.singlediode.bishop88_mpp`,

docs/sphinx/source/whatsnew/v0.9.6.rst

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,11 +44,13 @@ Deprecations
4444
(data period 2003-2012). Instead, ECMWF recommends to use CAMS global
4545
reanalysis (EAC4) from the Atmosphere Data Store (ADS). See also :py:func:`pvlib.iotools.get_cams`.
4646
(:issue:`1691`, :pull:`1654`)
47-
4847
* The ``recolumn`` parameter in :py:func:`pvlib.iotools.read_tmy3`, which maps
4948
TMY3 column names to nonstandard alternatives, is now deprecated.
5049
We encourage using ``map_variables`` (which produces standard pvlib names) instead.
5150
(:issue:`1517`, :pull:`1623`)
51+
* :py:func:`pvlib.iotools.read_srml_month_from_solardat` is deprecated and replaced by
52+
:py:func:`pvlib.iotools.get_srml`. (:pull:`1779`)
53+
5254

5355
Enhancements
5456
~~~~~~~~~~~~

pvlib/iotools/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from pvlib.iotools.epw import read_epw, parse_epw # noqa: F401
33
from pvlib.iotools.srml import read_srml # noqa: F401
44
from pvlib.iotools.srml import read_srml_month_from_solardat # noqa: F401
5+
from pvlib.iotools.srml import get_srml # noqa: F401
56
from pvlib.iotools.surfrad import read_surfrad # noqa: F401
67
from pvlib.iotools.midc import read_midc # noqa: F401
78
from pvlib.iotools.midc import read_midc_raw_data_from_nrel # noqa: F401

pvlib/iotools/srml.py

Lines changed: 104 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
"""
44
import numpy as np
55
import pandas as pd
6+
import urllib
7+
import warnings
68

9+
from pvlib._deprecation import deprecated
710

811
# VARIABLE_MAP is a dictionary mapping SRML data element numbers to their
912
# pvlib names. For most variables, only the first three digits are used,
@@ -26,8 +29,9 @@
2629

2730
def read_srml(filename, map_variables=True):
2831
"""
29-
Read University of Oregon SRML 1min .tsv file into pandas dataframe. The
30-
SRML is described in [1]_.
32+
Read University of Oregon SRML 1min .tsv file into pandas dataframe.
33+
34+
The SRML is described in [1]_.
3135
3236
Parameters
3337
----------
@@ -51,14 +55,14 @@ def read_srml(filename, map_variables=True):
5155
the time of the row until the time of the next row. This is consistent
5256
with pandas' default labeling behavior.
5357
54-
See SRML's `Archival Files`_ page for more information.
55-
56-
.. _Archival Files: http://solardat.uoregon.edu/ArchivalFiles.html
58+
See [2]_ for more information concerning the file format.
5759
5860
References
5961
----------
6062
.. [1] University of Oregon Solar Radiation Monitoring Laboratory
6163
`http://solardat.uoregon.edu/ <http://solardat.uoregon.edu/>`_
64+
.. [2] `Archival (short interval) data files
65+
<http://solardat.uoregon.edu/ArchivalFiles.html>`_
6266
"""
6367
tsv_data = pd.read_csv(filename, delimiter='\t')
6468
data = _format_index(tsv_data)
@@ -168,10 +172,12 @@ def _format_index(df):
168172
return df
169173

170174

175+
@deprecated('0.10.0', alternative='pvlib.iotools.get_srml', removal='0.11.0')
171176
def read_srml_month_from_solardat(station, year, month, filetype='PO',
172177
map_variables=True):
173-
"""Request a month of SRML data from solardat and read it into
174-
a Dataframe. The SRML is described in [1]_.
178+
"""Request a month of SRML data and read it into a Dataframe.
179+
180+
The SRML is described in [1]_.
175181
176182
Parameters
177183
----------
@@ -222,3 +228,94 @@ def read_srml_month_from_solardat(station, year, month, filetype='PO',
222228
url = "http://solardat.uoregon.edu/download/Archive/"
223229
data = read_srml(url + file_name, map_variables=map_variables)
224230
return data
231+
232+
233+
def get_srml(station, start, end, filetype='PO', map_variables=True,
234+
url="http://solardat.uoregon.edu/download/Archive/"):
235+
"""Request data from UoO SRML and read it into a Dataframe.
236+
237+
The University of Oregon Solar Radiation Monitoring Laboratory (SRML) is
238+
described in [1]_. A list of stations can be found in [2]_.
239+
240+
Data is returned for the entire months between and including start and end.
241+
242+
Parameters
243+
----------
244+
station : str
245+
Two letter station abbreviation.
246+
start : datetime like
247+
First day of the requested period
248+
end : datetime like
249+
Last day of the requested period
250+
filetype : string, default: 'PO'
251+
SRML file type to gather. See notes for explanation.
252+
map_variables : bool, default: True
253+
When true, renames columns of the DataFrame to pvlib variable names
254+
where applicable. See variable :const:`VARIABLE_MAP`.
255+
url : str, default: 'http://solardat.uoregon.edu/download/Archive/'
256+
API endpoint URL
257+
258+
Returns
259+
-------
260+
data : pd.DataFrame
261+
Dataframe with data from SRML.
262+
meta : dict
263+
Metadata.
264+
265+
Notes
266+
-----
267+
File types designate the time interval of a file and if it contains
268+
raw or processed data. For instance, `RO` designates raw, one minute
269+
data and `PO` designates processed one minute data. The availability
270+
of file types varies between sites. Below is a table of file types
271+
and their time intervals. See [1] for site information.
272+
273+
============= ============ ==================
274+
time interval raw filetype processed filetype
275+
============= ============ ==================
276+
1 minute RO PO
277+
5 minute RF PF
278+
15 minute RQ PQ
279+
hourly RH PH
280+
============= ============ ==================
281+
282+
Warning
283+
-------
284+
SRML data has nighttime data prefilled with 0s through the end of the
285+
current month (i.e., values are provided for data in the future).
286+
287+
References
288+
----------
289+
.. [1] University of Oregon Solar Radiation Measurement Laboratory
290+
`http://solardat.uoregon.edu/ <http://solardat.uoregon.edu/>`_
291+
.. [2] Station ID codes - Solar Radiation Measurement Laboratory
292+
`http://solardat.uoregon.edu/StationIDCodes.html
293+
<http://solardat.uoregon.edu/StationIDCodes.html>`_
294+
"""
295+
# Use pd.to_datetime so that strings (e.g. '2021-01-01') are accepted
296+
start = pd.to_datetime(start)
297+
end = pd.to_datetime(end)
298+
299+
# Generate list of months
300+
months = pd.date_range(
301+
start, end.replace(day=1) + pd.DateOffset(months=1), freq='1M')
302+
months_str = months.strftime('%y%m')
303+
304+
# Generate list of filenames
305+
filenames = [f"{station}{filetype}{m}.txt" for m in months_str]
306+
307+
dfs = [] # Initialize list of monthly dataframes
308+
for f in filenames:
309+
try:
310+
dfi = read_srml(url + f, map_variables=map_variables)
311+
dfs.append(dfi)
312+
except urllib.error.HTTPError:
313+
warnings.warn(f"The following file was not found: {f}")
314+
315+
data = pd.concat(dfs, axis='rows')
316+
317+
meta = {'filetype': filetype,
318+
'station': station,
319+
'filenames': filenames}
320+
321+
return data, meta

pvlib/tests/iotools/test_srml.py

Lines changed: 59 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33
import pytest
44

55
from pvlib.iotools import srml
6-
from ..conftest import DATA_DIR, RERUNS, RERUNS_DELAY
6+
from ..conftest import (DATA_DIR, RERUNS, RERUNS_DELAY, assert_index_equal,
7+
assert_frame_equal, fail_on_pvlib_version)
8+
from pvlib._deprecation import pvlibDeprecationWarning
79

810
srml_testfile = DATA_DIR / 'SRML-day-EUPO1801.txt'
911

@@ -74,19 +76,33 @@ def test__map_columns(column, expected):
7476
assert srml._map_columns(column) == expected
7577

7678

79+
@pytest.mark.remote_data
80+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
81+
def test_get_srml():
82+
url = 'http://solardat.uoregon.edu/download/Archive/EUPO1801.txt'
83+
file_data = srml.read_srml(url)
84+
requested, _ = srml.get_srml(station='EU', start='2018-01-01',
85+
end='2018-01-31')
86+
assert_frame_equal(file_data, requested)
87+
88+
89+
@fail_on_pvlib_version('0.11')
7790
@pytest.mark.remote_data
7891
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
7992
def test_read_srml_month_from_solardat():
8093
url = 'http://solardat.uoregon.edu/download/Archive/EUPO1801.txt'
8194
file_data = srml.read_srml(url)
82-
requested = srml.read_srml_month_from_solardat('EU', 2018, 1)
95+
with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'):
96+
requested = srml.read_srml_month_from_solardat('EU', 2018, 1)
8397
assert file_data.equals(requested)
8498

8599

100+
@fail_on_pvlib_version('0.11')
86101
@pytest.mark.remote_data
87102
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
88103
def test_15_minute_dt_index():
89-
data = srml.read_srml_month_from_solardat('TW', 2019, 4, 'RQ')
104+
with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'):
105+
data = srml.read_srml_month_from_solardat('TW', 2019, 4, 'RQ')
90106
start = pd.Timestamp('20190401 00:00')
91107
start = start.tz_localize('Etc/GMT+8')
92108
end = pd.Timestamp('20190430 23:45')
@@ -96,14 +112,53 @@ def test_15_minute_dt_index():
96112
assert (data.index[3::4].minute == 45).all()
97113

98114

115+
@fail_on_pvlib_version('0.11')
99116
@pytest.mark.remote_data
100117
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
101118
def test_hourly_dt_index():
102-
data = srml.read_srml_month_from_solardat('CD', 1986, 4, 'PH')
119+
with pytest.warns(pvlibDeprecationWarning, match='get_srml instead'):
120+
data = srml.read_srml_month_from_solardat('CD', 1986, 4, 'PH')
103121
start = pd.Timestamp('19860401 00:00')
104122
start = start.tz_localize('Etc/GMT+8')
105123
end = pd.Timestamp('19860430 23:00')
106124
end = end.tz_localize('Etc/GMT+8')
107125
assert data.index[0] == start
108126
assert data.index[-1] == end
109127
assert (data.index.minute == 0).all()
128+
129+
130+
@pytest.mark.remote_data
131+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
132+
def test_get_srml_hourly():
133+
data, meta = data, meta = srml.get_srml(station='CD', start='1986-04-01',
134+
end='1986-05-31', filetype='PH')
135+
expected_index = pd.date_range(start='1986-04-01', end='1986-05-31 23:59',
136+
freq='1h', tz='Etc/GMT+8')
137+
assert_index_equal(data.index, expected_index)
138+
139+
140+
@pytest.mark.remote_data
141+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
142+
def test_get_srml_minute():
143+
data_read = srml.read_srml(srml_testfile)
144+
data_get, meta = srml.get_srml(station='EU', start='2018-01-01',
145+
end='2018-01-31', filetype='PO')
146+
expected_index = pd.date_range(start='2018-01-01', end='2018-01-31 23:59',
147+
freq='1min', tz='Etc/GMT+8')
148+
assert_index_equal(data_get.index, expected_index)
149+
assert all([c in data_get.columns for c in data_read.columns])
150+
# Check that all indices in example file are present in remote file
151+
assert data_read.index.isin(data_get.index).all()
152+
assert meta['station'] == 'EU'
153+
assert meta['filetype'] == 'PO'
154+
assert meta['filenames'] == ['EUPO1801.txt']
155+
156+
157+
@pytest.mark.remote_data
158+
@pytest.mark.flaky(reruns=RERUNS, reruns_delay=RERUNS_DELAY)
159+
def test_get_srml_nonexisting_month_warning():
160+
with pytest.warns(UserWarning, match='file was not found: EUPO0912.txt'):
161+
# Request data for a period where not all files exist
162+
# Eugene (EU) station started reporting 1-minute data in January 2010
163+
data, meta = data, meta = srml.get_srml(
164+
station='EU', start='2009-12-01', end='2010-01-31', filetype='PO')

0 commit comments

Comments
 (0)