@@ -34,7 +34,7 @@ class DateConversionError(Exception):
34
34
Parameters
35
35
----------
36
36
filepath_or_buffer : string or file handle / StringIO. The string could be
37
- a URL. Valid URL schemes include http, ftp, and file. For file URLs, a host
37
+ a URL. Valid URL schemes include http, ftp, s3, and file. For file URLs, a host
38
38
is expected. For instance, a local file could be
39
39
file ://localhost/path/to/table.csv
40
40
%s
@@ -188,6 +188,12 @@ def _is_url(url):
188
188
except :
189
189
return False
190
190
191
+ def _is_s3_url (url ):
192
+ """ Check for an s3 url """
193
+ try :
194
+ return urlparse .urlparse (url ).scheme == 's3'
195
+ except :
196
+ return False
191
197
192
198
def _read (filepath_or_buffer , kwds ):
193
199
"Generic reader of line files."
@@ -196,17 +202,32 @@ def _read(filepath_or_buffer, kwds):
196
202
if skipfooter is not None :
197
203
kwds ['skip_footer' ] = skipfooter
198
204
199
- if isinstance (filepath_or_buffer , basestring ) and _is_url (filepath_or_buffer ):
200
- from urllib2 import urlopen
201
- filepath_or_buffer = urlopen (filepath_or_buffer )
202
- if py3compat .PY3 : # pragma: no cover
203
- if encoding :
204
- errors = 'strict'
205
- else :
206
- errors = 'replace'
207
- encoding = 'utf-8'
208
- bytes = filepath_or_buffer .read ()
209
- filepath_or_buffer = StringIO (bytes .decode (encoding , errors ))
205
+ if isinstance (filepath_or_buffer , basestring ):
206
+ if _is_url (filepath_or_buffer ):
207
+ from urllib2 import urlopen
208
+ filepath_or_buffer = urlopen (filepath_or_buffer )
209
+ if py3compat .PY3 : # pragma: no cover
210
+ if encoding :
211
+ errors = 'strict'
212
+ else :
213
+ errors = 'replace'
214
+ encoding = 'utf-8'
215
+ bytes = filepath_or_buffer .read ()
216
+ filepath_or_buffer = StringIO (bytes .decode (encoding , errors ))
217
+
218
+ if _is_s3_url (filepath_or_buffer ):
219
+ try :
220
+ import boto
221
+ except :
222
+ raise ImportError ("boto is required to handle s3 files" )
223
+ # Assuming AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
224
+ # are environment variables
225
+ parsed_url = urlparse .urlparse (filepath_or_buffer )
226
+ conn = boto .connect_s3 ()
227
+ b = conn .get_bucket (parsed_url .netloc )
228
+ k = boto .s3 .key .Key (b )
229
+ k .key = parsed_url .path
230
+ filepath_or_buffer = StringIO (k .get_contents_as_string ())
210
231
211
232
if kwds .get ('date_parser' , None ) is not None :
212
233
if isinstance (kwds ['parse_dates' ], bool ):
0 commit comments