2
2
"""
3
3
4
4
import pandas as pd
5
- import numpy as np
6
5
7
6
8
- HEADERS = (
9
- 'WBANNO UTC_DATE UTC_TIME LST_DATE LST_TIME CRX_VN LONGITUDE LATITUDE '
10
- 'AIR_TEMPERATURE PRECIPITATION SOLAR_RADIATION SR_FLAG '
11
- 'SURFACE_TEMPERATURE ST_TYPE ST_FLAG RELATIVE_HUMIDITY RH_FLAG '
12
- 'SOIL_MOISTURE_5 SOIL_TEMPERATURE_5 WETNESS WET_FLAG WIND_1_5 WIND_FLAG'
13
- )
7
+ HEADERS = [
8
+ 'WBANNO' , ' UTC_DATE' , ' UTC_TIME' , ' LST_DATE' , ' LST_TIME' , ' CRX_VN' ,
9
+ 'LONGITUDE' , 'LATITUDE' , 'AIR_TEMPERATURE' , 'PRECIPITATION' ,
10
+ 'SOLAR_RADIATION' , 'SR_FLAG' , ' SURFACE_TEMPERATURE' , ' ST_TYPE' , ' ST_FLAG' ,
11
+ 'RELATIVE_HUMIDITY' , 'RH_FLAG' , 'SOIL_MOISTURE_5' , 'SOIL_TEMPERATURE_5' ,
12
+ 'WETNESS' , 'WET_FLAG' , 'WIND_1_5' , 'WIND_FLAG' ]
14
13
15
14
VARIABLE_MAP = {
16
15
'LONGITUDE' : 'longitude' ,
24
23
'WIND_FLAG' : 'wind_speed_flag'
25
24
}
26
25
26
+ NAN_DICT = {
27
+ 'CRX_VN' : - 99999 ,
28
+ 'AIR_TEMPERATURE' : - 9999 ,
29
+ 'PRECIPITATION' : - 9999 ,
30
+ 'SOLAR_RADIATION' : - 99999 ,
31
+ 'SURFACE_TEMPERATURE' : - 9999 ,
32
+ 'RELATIVE_HUMIDITY' : - 9999 ,
33
+ 'SOIL_MOISTURE_5' : - 99 ,
34
+ 'SOIL_TEMPERATURE_5' : - 9999 ,
35
+ 'WETNESS' : - 9999 ,
36
+ 'WIND_1_5' : - 99 }
37
+
38
+ # Add NUL characters to possible NaN values for all columns
39
+ NAN_DICT = {k : [v , '\x00 \x00 \x00 \x00 \x00 \x00 ' ] for k , v in NAN_DICT .items ()}
40
+
27
41
# as specified in CRN README.txt file. excludes 1 space between columns
28
42
WIDTHS = [5 , 8 , 4 , 8 , 4 , 6 , 7 , 7 , 7 , 7 , 6 , 1 , 7 , 1 , 1 , 5 , 1 , 7 , 7 , 5 , 1 , 6 , 1 ]
29
43
# add 1 to make fields contiguous (required by pandas.read_fwf)
40
54
]
41
55
42
56
43
- def read_crn (filename ):
44
- """
45
- Read a NOAA USCRN fixed-width file into pandas dataframe. The CRN is
46
- described in [1]_ and [2]_.
57
+ def read_crn (filename , map_variables = True ):
58
+ """Read a NOAA USCRN fixed-width file into a pandas dataframe.
59
+
60
+ The CRN network consists of over 100 meteorological stations covering the
61
+ U.S. and is described in [1]_ and [2]_. The primary goal of CRN is to
62
+ provide long-term measurements of temperature, precipitation, and soil
63
+ moisture and temperature. Additionally, global horizontal irradiance (GHI)
64
+ is measured at each site using a photodiode pyranometer.
47
65
48
66
Parameters
49
67
----------
50
68
filename: str, path object, or file-like
51
69
filepath or url to read for the fixed-width file.
70
+ map_variables: boolean, default: True
71
+ When true, renames columns of the Dataframe to pvlib variable names
72
+ where applicable. See variable :const:`VARIABLE_MAP`.
52
73
53
74
Returns
54
75
-------
@@ -60,12 +81,12 @@ def read_crn(filename):
60
81
-----
61
82
CRN files contain 5 minute averages labeled by the interval ending
62
83
time. Here, missing data is flagged as NaN, rather than the lowest
63
- possible integer for a field (e.g. -999 or -99). Air temperature in
64
- deg C. Wind speed in m/s at a height of 1.5 m above ground level.
84
+ possible integer for a field (e.g. -999 or -99). Air temperature is in
85
+ deg C and wind speed is in m/s at a height of 1.5 m above ground level.
65
86
66
- Variables corresponding to standard pvlib variables are renamed,
87
+ Variables corresponding to standard pvlib variables are by default renamed,
67
88
e.g. `SOLAR_RADIATION` becomes `ghi`. See the
68
- `pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping.
89
+ :const: `pvlib.iotools.crn.VARIABLE_MAP` dict for the complete mapping.
69
90
70
91
CRN files occasionally have a set of null characters on a line
71
92
instead of valid data. This function drops those lines. Sometimes
@@ -85,16 +106,13 @@ def read_crn(filename):
85
106
Amer. Meteor. Soc., 94, 489-498. :doi:`10.1175/BAMS-D-12-00170.1`
86
107
"""
87
108
88
- # read in data. set fields with NUL characters to NaN
89
- data = pd .read_fwf (filename , header = None , names = HEADERS .split (' ' ),
90
- widths = WIDTHS , na_values = ['\x00 \x00 \x00 \x00 \x00 \x00 ' ])
91
- # at this point we only have NaNs from NUL characters, not -999 etc.
92
- # these bad rows need to be removed so that dtypes can be set.
93
- # NaNs require float dtype so we run into errors if we don't do this.
94
- data = data .dropna (axis = 0 )
95
- # loop here because dtype kwarg not supported in read_fwf until 0.20
96
- for (col , _dtype ) in zip (data .columns , DTYPES ):
97
- data [col ] = data [col ].astype (_dtype )
109
+ # read in data
110
+ data = pd .read_fwf (filename , header = None , names = HEADERS , widths = WIDTHS ,
111
+ na_values = NAN_DICT )
112
+ # Remove rows with all nans
113
+ data = data .dropna (axis = 0 , how = 'all' )
114
+ # set dtypes here because dtype kwarg not supported in read_fwf until 0.20
115
+ data = data .astype (dict (zip (HEADERS , DTYPES )))
98
116
99
117
# set index
100
118
# UTC_TIME does not have leading 0s, so must zfill(4) to comply
@@ -103,19 +121,8 @@ def read_crn(filename):
103
121
dtindex = pd .to_datetime (dts ['UTC_DATE' ] + dts ['UTC_TIME' ].str .zfill (4 ),
104
122
format = '%Y%m%d%H%M' , utc = True )
105
123
data = data .set_index (dtindex )
106
- try :
107
- # to_datetime(utc=True) does not work in older versions of pandas
108
- data = data .tz_localize ('UTC' )
109
- except TypeError :
110
- pass
111
-
112
- # Now we can set nans. This could be done a per column basis to be
113
- # safer, since in principle a real -99 value could occur in a -9999
114
- # column. Very unlikely to see that in the real world.
115
- for val in [- 99 , - 999 , - 9999 ]:
116
- # consider replacing with .replace([-99, -999, -9999])
117
- data = data .where (data != val , np .nan )
118
-
119
- data = data .rename (columns = VARIABLE_MAP )
124
+
125
+ if map_variables :
126
+ data = data .rename (columns = VARIABLE_MAP )
120
127
121
128
return data
0 commit comments