From a8d9287baf6a4ae8ae07e6552814bd3fd090ed85 Mon Sep 17 00:00:00 2001 From: Winand Date: Wed, 4 May 2016 16:42:13 +0300 Subject: [PATCH 1/2] handle date/datetime formats --- pandas/io/sas/sas7bdat.py | 12 +++++++++--- pandas/io/sas/sas_constants.py | 14 ++++++++++++++ pandas/io/tests/sas/data/dt.sas7bdat | Bin 0 -> 5120 bytes pandas/io/tests/sas/test_sas7bdat.py | 10 ++++++++++ 4 files changed, 33 insertions(+), 3 deletions(-) create mode 100644 pandas/io/tests/sas/data/dt.sas7bdat diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index b75f05cf9ed7e..08a88a5ffa52a 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -629,9 +629,15 @@ def _chunk_to_dataframe(self): rslt[name] = self._byte_chunk[jb, :].view( dtype=self.byte_order + 'd') rslt[name] = np.asarray(rslt[name], dtype=np.float64) - if self.convert_dates and (self.column_formats[j] == "MMDDYY"): - epoch = pd.datetime(1960, 1, 1) - rslt[name] = epoch + pd.to_timedelta(rslt[name], unit='d') + if self.convert_dates: + unit = None + if self.column_formats[j] in const.sas_date_formats: + unit = 'd' + elif self.column_formats[j] in const.sas_datetime_formats: + unit = 's' + if unit: + epoch = pd.datetime(1960, 1, 1) + rslt[name] = epoch + pd.to_timedelta(rslt[name], unit) jb += 1 elif self.column_types[j] == b's': rslt[name] = self._string_chunk[js, :] diff --git a/pandas/io/sas/sas_constants.py b/pandas/io/sas/sas_constants.py index 65ae1e9102cb2..8bbe0117f9a56 100644 --- a/pandas/io/sas/sas_constants.py +++ b/pandas/io/sas/sas_constants.py @@ -145,3 +145,17 @@ class index: b"\xFF\xFF\xFF\xFE": index.columnListIndex, b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": index.columnListIndex, b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": index.columnListIndex} + + +# List of frequently used SAS date and datetime formats +# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm +sas_date_formats = ("DATE", "DAY", "DDMMYY", "DOWNAME", "JULDAY", "JULIAN", + "MMDDYY", "MMYY", "MMYYC", "MMYYD", "MMYYP", "MMYYS", + "MMYYN", "MONNAME", "MONTH", "MONYY", "QTR", "QTRR", + "NENGO", "WEEKDATE", "WEEKDATX", "WEEKDAY", "WEEKV", + "WORDDATE", "WORDDATX", "YEAR", "YYMM", "YYMMC", "YYMMD", + "YYMMP", "YYMMS", "YYMMN", "YYMON", "YYMMDD", "YYQ", + "YYQC", "YYQD", "YYQP", "YYQS", "YYQN", "YYQR", "YYQRC", + "YYQRD", "YYQRP", "YYQRS", "YYQRN") + +sas_datetime_formats = "DATETIME", "DTWKDATX" diff --git a/pandas/io/tests/sas/data/dt.sas7bdat b/pandas/io/tests/sas/data/dt.sas7bdat new file mode 100644 index 0000000000000000000000000000000000000000..e33eaf4fafed335ded3201caa25363b5448e65a1 GIT binary patch literal 5120 zcmeHLzl#$=6n?w6YSi41K#CMmLr8Ij%HEpd3Xv>q4g#0EaJQ(}oKX-Ju2i%V?JTs> z*8jrB=0Fcb!9vl_>IxCD7R6s#zxOh;H`&WYK?H?)F!{ds=DnFWZ)QR!L|d});b!+i zvG8L3ZjSaoow;2)G*zfpDX2Pb;H7S3PD*zm%i(fmt~K9`;(FyzBnsn@;~ySefB*RU zoAAoL&F33HnYQ$#ks>EIMPw#6d(;p7pzWV(Eu85rEsE=~5A+W+r#7DZW9QW!-O>(4 zCc767WNlWr75HB&FkzhEBm0oGJ5G5(WrAo^U8o{B+gRR$_{x3_e2tCJBk;Q7>>Mfn zazFePzYi`x&@URiZg9$QJtQMNT5T}gC_fXVTd(_A>-7U0G=sN97&qtO8wF@lyS=hPB>FL; z_TmDoeLYqP|G3p|ro&^AT_{{BuH)}YUO97(_{qb&qmlDx&SPqXNo>6HY-8m8V9rBn z#F0zS9LyqLhuFRhv1$LmiJu_)RDFiL!(@|c!h-Ko1)SHSMf>u3lfCBy16e}D5=6xC zkA0YL-(RK1>*cjE^5tS2&*0#lM(0S$8Mm@%d>`b6mkrCPVLW8w4BRaCUL6%;ZXNvu DoS&%7 literal 0 HcmV?d00001 diff --git a/pandas/io/tests/sas/test_sas7bdat.py b/pandas/io/tests/sas/test_sas7bdat.py index 6661d9fee5df0..f50e19cd4c139 100644 --- a/pandas/io/tests/sas/test_sas7bdat.py +++ b/pandas/io/tests/sas/test_sas7bdat.py @@ -112,3 +112,13 @@ def test_airline(): df0 = pd.read_csv(fname) df0 = df0.astype(np.float64) tm.assert_frame_equal(df, df0, check_exact=False) + + +def test_date_time(): + dirpath = tm.get_data_path() + fname = os.path.join(dirpath, "dt.sas7bdat") + df = pd.read_sas(fname) + di = pd.to_datetime(["1960-05-03", "1993-10-19", "2014-10-04", + "1960-01-02 10:17:36"]) + df0 = pd.DataFrame(di, index=("D1", "D2", "D3", "DT")).T + tm.assert_frame_equal(df, df0) From 4c90494bb7f9ad6ec6285092f87f7193f5383d60 Mon Sep 17 00:00:00 2001 From: Winand Date: Thu, 5 May 2016 16:30:29 +0300 Subject: [PATCH 2/2] Use origin kw of to_datetime --- pandas/io/sas/sas7bdat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 08a88a5ffa52a..f4474b7480236 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -636,8 +636,8 @@ def _chunk_to_dataframe(self): elif self.column_formats[j] in const.sas_datetime_formats: unit = 's' if unit: - epoch = pd.datetime(1960, 1, 1) - rslt[name] = epoch + pd.to_timedelta(rslt[name], unit) + rslt[name] = pd.to_datetime(rslt[name], unit=unit, + origin="1960-01-01") jb += 1 elif self.column_types[j] == b's': rslt[name] = self._string_chunk[js, :]