9
9
10
10
import numpy as np
11
11
import pandas as pd
12
+ from scipy .optimize import minimize_scalar
13
+ from scipy .linalg import hankel
12
14
13
15
from pvlib import atmosphere , tools
14
16
@@ -597,7 +599,118 @@ def _calc_d(aod700, p):
597
599
return d
598
600
599
601
600
- def detect_clearsky (measured , clearsky , times , window_length ,
602
+ def _calc_stats (data , samples_per_window , sample_interval , H ):
603
+ """ Calculates statistics for each window, used by Reno-style clear
604
+ sky detection functions. Does not return the line length statistic
605
+ which is provided by _calc_windowed_stat and _line_length
606
+
607
+ Parameters
608
+ ----------
609
+ data : Series
610
+ samples_per_window : int
611
+ Number of data points in each window
612
+ sample_interval : float
613
+ Time in minutes in each sample interval
614
+ H : 2D ndarray
615
+ Hankel matrix defining the indices for each window.
616
+
617
+ Returns
618
+ -------
619
+ data_mean : Series
620
+ mean of data in each window
621
+ data_max : Series
622
+ maximum of data in each window
623
+ data_slope_nstd : Series
624
+ standard deviation of difference between data points in each window
625
+ data_slope : Series
626
+ difference between successive data points
627
+ """
628
+
629
+ data_mean = data .values [H ].mean (axis = 0 )
630
+ data_mean = _to_centered_series (data_mean , data .index , samples_per_window )
631
+ data_max = data .values [H ].max (axis = 0 )
632
+ data_max = _to_centered_series (data_max , data .index , samples_per_window )
633
+ # shift to get forward difference, .diff() is backward difference instead
634
+ data_diff = data .diff ().shift (- 1 )
635
+ data_slope = data_diff / sample_interval
636
+ data_slope_nstd = _slope_nstd_windowed (data , H , samples_per_window )
637
+ data_slope_nstd = data_slope_nstd
638
+
639
+ return data_mean , data_max , data_slope_nstd , data_slope
640
+
641
+
642
+ def _slope_nstd_windowed (data , H , samples_per_window ):
643
+ with np .errstate (divide = 'ignore' , invalid = 'ignore' ):
644
+ raw = np .diff (data )
645
+ raw = raw [H [:- 1 , ]].std (ddof = 1 , axis = 0 ) / data .values [H ].mean (axis = 0 )
646
+ return _to_centered_series (raw , data .index , samples_per_window )
647
+
648
+
649
+ def _max_diff_windowed (data , H , samples_per_window ):
650
+ raw = np .diff (data )
651
+ raw = np .abs (raw [H [:- 1 , ]]).max (axis = 0 )
652
+ return _to_centered_series (raw , data .index , samples_per_window )
653
+
654
+
655
+ def _line_length_windowed (data , H , samples_per_window ,
656
+ sample_interval ):
657
+ raw = np .sqrt (np .diff (data )** 2. + sample_interval ** 2. )
658
+ raw = np .sum (raw [H [:- 1 , ]], axis = 0 )
659
+ return _to_centered_series (raw , data .index , samples_per_window )
660
+
661
+
662
+ def _to_centered_series (vals , idx , samples_per_window ):
663
+ vals = np .pad (vals , ((0 , len (idx ) - len (vals )),), mode = 'constant' ,
664
+ constant_values = np .nan )
665
+ shift = samples_per_window // 2 # align = 'center' only
666
+ return pd .Series (index = idx , data = vals ).shift (shift )
667
+
668
+
669
+ def _get_sample_intervals (times , win_length ):
670
+ """ Calculates time interval and samples per window for Reno-style clear
671
+ sky detection functions
672
+ """
673
+ deltas = np .diff (times .values ) / np .timedelta64 (1 , '60s' )
674
+
675
+ # determine if we can proceed
676
+ if times .inferred_freq and len (np .unique (deltas )) == 1 :
677
+ sample_interval = times [1 ] - times [0 ]
678
+ sample_interval = sample_interval .seconds / 60 # in minutes
679
+ samples_per_window = int (win_length / sample_interval )
680
+ return sample_interval , samples_per_window
681
+ else :
682
+ raise NotImplementedError ('algorithm does not yet support unequal '
683
+ 'times. consider resampling your data.' )
684
+
685
+
686
+ def _clear_sample_index (clear_windows , samples_per_window , align , H ):
687
+ """
688
+ Returns indices of clear samples in clear windows
689
+ """
690
+ # H contains indices for each window, e.g. indices for the first window
691
+ # are in first column of H.
692
+ # clear_windows contains one boolean for each window and is aligned
693
+ # by 'align', default to center
694
+ # shift clear_windows.index to be aligned left (e.g. first value in the
695
+ # left-most position) to line up with the first column of H.
696
+
697
+ # commented if/else block for future align='left', 'right' capability
698
+ # if align == 'right':
699
+ # shift = 1 - samples_per_window
700
+ # elif align == 'center':
701
+ # shift = - (samples_per_window // 2)
702
+ # else:
703
+ # shift = 0
704
+ shift = - (samples_per_window // 2 )
705
+ idx = clear_windows .shift (shift )
706
+ # drop rows at the end corresponding to windows past the end of data
707
+ idx = idx .drop (clear_windows .index [1 - samples_per_window :])
708
+ idx = idx .astype (bool ) # shift changed type to object
709
+ clear_samples = np .unique (H [:, idx ])
710
+ return clear_samples
711
+
712
+
713
+ def detect_clearsky (measured , clearsky , times = None , window_length = 10 ,
601
714
mean_diff = 75 , max_diff = 75 ,
602
715
lower_line_length = - 5 , upper_line_length = 10 ,
603
716
var_diff = 0.005 , slope_dev = 8 , max_iterations = 20 ,
@@ -623,24 +736,25 @@ def detect_clearsky(measured, clearsky, times, window_length,
623
736
Parameters
624
737
----------
625
738
measured : array or Series
626
- Time series of measured values.
739
+ Time series of measured GHI. [W/m2]
627
740
clearsky : array or Series
628
- Time series of the expected clearsky values.
629
- times : DatetimeIndex
630
- Times of measured and clearsky values.
631
- window_length : int
741
+ Time series of the expected clearsky GHI. [W/m2]
742
+ times : DatetimeIndex or None, default None.
743
+ Times of measured and clearsky values. If None the index of measured
744
+ will be used.
745
+ window_length : int, default 10
632
746
Length of sliding time window in minutes. Must be greater than 2
633
747
periods.
634
748
mean_diff : float, default 75
635
749
Threshold value for agreement between mean values of measured
636
- and clearsky in each interval, see Eq. 6 in [1].
750
+ and clearsky in each interval, see Eq. 6 in [1]. [W/m2]
637
751
max_diff : float, default 75
638
752
Threshold value for agreement between maxima of measured and
639
- clearsky values in each interval, see Eq. 7 in [1].
753
+ clearsky values in each interval, see Eq. 7 in [1]. [W/m2]
640
754
lower_line_length : float, default -5
641
755
Lower limit of line length criterion from Eq. 8 in [1].
642
- Criterion satisfied when
643
- lower_line_length < line length difference < upper_line_length
756
+ Criterion satisfied when lower_line_length < line length difference
757
+ < upper_line_length.
644
758
upper_line_length : float, default 10
645
759
Upper limit of line length criterion from Eq. 8 in [1].
646
760
var_diff : float, default 0.005
@@ -671,6 +785,13 @@ def detect_clearsky(measured, clearsky, times, window_length,
671
785
detected clear_samples. Only provided if return_components is
672
786
True.
673
787
788
+ Raises
789
+ ------
790
+ ValueError
791
+ If measured is not a Series and times is not provided
792
+ NotImplementedError
793
+ If timestamps are not equally spaced
794
+
674
795
References
675
796
----------
676
797
.. [1] Reno, M.J. and C.W. Hansen, "Identification of periods of clear
@@ -692,78 +813,81 @@ def detect_clearsky(measured, clearsky, times, window_length,
692
813
* parameters are controllable via keyword arguments
693
814
* option to return individual test components and clearsky scaling
694
815
parameter
816
+ * uses centered windows (Matlab function uses left-aligned windows)
695
817
"""
696
818
697
- # calculate deltas in units of minutes (matches input window_length units)
698
- deltas = np .diff (times .values ) / np .timedelta64 (1 , '60s' )
819
+ if times is None :
820
+ try :
821
+ times = measured .index
822
+ except AttributeError :
823
+ raise ValueError ("times is required when measured is not a Series" )
699
824
700
- # determine the unique deltas and if we can proceed
701
- unique_deltas = np .unique (deltas )
702
- if len (unique_deltas ) == 1 :
703
- sample_interval = unique_deltas [0 ]
825
+ # be polite about returning the same type as was input
826
+ ispandas = isinstance (measured , pd .Series )
827
+
828
+ # for internal use, need a Series
829
+ if not ispandas :
830
+ meas = pd .Series (measured , index = times )
704
831
else :
705
- raise NotImplementedError ('algorithm does not yet support unequal '
706
- 'times. consider resampling your data.' )
832
+ meas = measured
707
833
708
- intervals_per_window = int (window_length / sample_interval )
834
+ if not isinstance (clearsky , pd .Series ):
835
+ clear = pd .Series (clearsky , index = times )
836
+ else :
837
+ clear = clearsky
709
838
710
- # generate matrix of integers for creating windows with indexing
711
- from scipy .linalg import hankel
712
- H = hankel (np .arange (intervals_per_window ), # noqa: N806
713
- np .arange (intervals_per_window - 1 , len (times )))
839
+ sample_interval , samples_per_window = _get_sample_intervals (times ,
840
+ window_length )
714
841
715
- # convert pandas input to numpy array, but save knowledge of input state
716
- # so we can return a series if that's what was originally provided
717
- ispandas = isinstance (measured , pd .Series )
718
- measured = np .asarray (measured )
719
- clearsky = np .asarray (clearsky )
842
+ # generate matrix of integers for creating windows with indexing
843
+ H = hankel (np .arange (samples_per_window ),
844
+ np .arange (samples_per_window - 1 , len (times )))
720
845
721
846
# calculate measurement statistics
722
- meas_mean = np .mean (measured [H ], axis = 0 )
723
- meas_max = np .max (measured [H ], axis = 0 )
724
- meas_diff = np .diff (measured [H ], n = 1 , axis = 0 )
725
- meas_slope = np .diff (measured [H ], n = 1 , axis = 0 ) / sample_interval
726
- # matlab std function normalizes by N-1, so set ddof=1 here
727
- meas_slope_nstd = np .std (meas_slope , axis = 0 , ddof = 1 ) / meas_mean
728
- meas_line_length = np .sum (np .sqrt (
729
- meas_diff * meas_diff +
730
- sample_interval * sample_interval ), axis = 0 )
847
+ meas_mean , meas_max , meas_slope_nstd , meas_slope = _calc_stats (
848
+ meas , samples_per_window , sample_interval , H )
849
+ meas_line_length = _line_length_windowed (
850
+ meas , H , samples_per_window , sample_interval )
731
851
732
852
# calculate clear sky statistics
733
- clear_mean = np .mean (clearsky [H ], axis = 0 )
734
- clear_max = np .max (clearsky [H ], axis = 0 )
735
- clear_diff = np .diff (clearsky [H ], n = 1 , axis = 0 )
736
- clear_slope = np .diff (clearsky [H ], n = 1 , axis = 0 ) / sample_interval
737
-
738
- from scipy .optimize import minimize_scalar
739
-
853
+ clear_mean , clear_max , _ , clear_slope = _calc_stats (
854
+ clear , samples_per_window , sample_interval , H )
855
+
856
+ # find a scaling factor for the clear sky time series that minimizes the
857
+ # RMSE between the clear times identified in the measured data and the
858
+ # scaled clear sky time series. Optimization to determine the scaling
859
+ # factor considers all identified clear times, which is different from [1]
860
+ # where the scaling factor was determined from clear times on days with
861
+ # at least 50% of the day being identified as clear.
740
862
alpha = 1
741
863
for iteration in range (max_iterations ):
742
- clear_line_length = np . sum ( np . sqrt (
743
- alpha * alpha * clear_diff * clear_diff +
744
- sample_interval * sample_interval ), axis = 0 )
864
+ scaled_clear = alpha * clear
865
+ clear_line_length = _line_length_windowed (
866
+ scaled_clear , H , samples_per_window , sample_interval )
745
867
746
868
line_diff = meas_line_length - clear_line_length
747
-
869
+ slope_max_diff = _max_diff_windowed (
870
+ meas - scaled_clear , H , samples_per_window )
748
871
# evaluate comparison criteria
749
872
c1 = np .abs (meas_mean - alpha * clear_mean ) < mean_diff
750
873
c2 = np .abs (meas_max - alpha * clear_max ) < max_diff
751
874
c3 = (line_diff > lower_line_length ) & (line_diff < upper_line_length )
752
875
c4 = meas_slope_nstd < var_diff
753
- c5 = np .max (np .abs (meas_slope -
754
- alpha * clear_slope ), axis = 0 ) < slope_dev
876
+ c5 = slope_max_diff < slope_dev
755
877
c6 = (clear_mean != 0 ) & ~ np .isnan (clear_mean )
756
878
clear_windows = c1 & c2 & c3 & c4 & c5 & c6
757
879
758
880
# create array to return
759
- clear_samples = np .full_like (measured , False , dtype = 'bool' )
881
+ clear_samples = np .full_like (meas , False , dtype = 'bool' )
760
882
# find the samples contained in any window classified as clear
761
- clear_samples [np .unique (H [:, clear_windows ])] = True
883
+ idx = _clear_sample_index (clear_windows , samples_per_window , 'center' ,
884
+ H )
885
+ clear_samples [idx ] = True
762
886
763
887
# find a new alpha
764
888
previous_alpha = alpha
765
- clear_meas = measured [clear_samples ]
766
- clear_clear = clearsky [clear_samples ]
889
+ clear_meas = meas [clear_samples ]
890
+ clear_clear = clear [clear_samples ]
767
891
768
892
def rmse (alpha ):
769
893
return np .sqrt (np .mean ((clear_meas - alpha * clear_clear )** 2 ))
@@ -773,7 +897,7 @@ def rmse(alpha):
773
897
break
774
898
else :
775
899
import warnings
776
- warnings .warn ('failed to converge after %s iterations'
900
+ warnings .warn ('rescaling failed to converge after %s iterations'
777
901
% max_iterations , RuntimeWarning )
778
902
779
903
# be polite about returning the same type as was input
@@ -794,8 +918,7 @@ def rmse(alpha):
794
918
components ['max_diff' ] = np .abs (meas_max - alpha * clear_max )
795
919
components ['line_length' ] = meas_line_length - clear_line_length
796
920
components ['slope_nstd' ] = meas_slope_nstd
797
- components ['slope_max' ] = (np .max (
798
- meas_slope - alpha * clear_slope , axis = 0 ))
921
+ components ['slope_max' ] = slope_max_diff
799
922
800
923
return clear_samples , components , alpha
801
924
else :
0 commit comments