16
16
from .compatibility import FileNotFoundError , urlparse , ConnectionError
17
17
from .utils import read_block
18
18
19
+ import locket
20
+
21
+ lock = locket .lock_file ('.libhdfs3.lock' )
19
22
20
23
logger = logging .getLogger (__name__ )
21
24
@@ -132,9 +135,6 @@ class HDFileSystem(object):
132
135
133
136
>>> hdfs = HDFileSystem(host='127.0.0.1', port=8020) # doctest: +SKIP
134
137
"""
135
-
136
- CONNECT_RETRIES = 5
137
-
138
138
def __init__ (self , host = DEFAULT_HOST , port = DEFAULT_PORT , user = None ,
139
139
ticket_cache = None , token = None , pars = None , connect = True ):
140
140
"""
@@ -185,44 +185,41 @@ def connect(self):
185
185
if self ._handle :
186
186
return
187
187
188
- o = _lib .hdfsNewBuilder ()
189
- if self .port is not None :
190
- _lib .hdfsBuilderSetNameNodePort (o , self .port )
191
- _lib .hdfsBuilderSetNameNode (o , ensure_bytes (self .host ))
192
- if self .user :
193
- _lib .hdfsBuilderSetUserName (o , ensure_bytes (self .user ))
188
+ with lock :
189
+ o = _lib .hdfsNewBuilder ()
190
+ if self .port is not None :
191
+ _lib .hdfsBuilderSetNameNodePort (o , self .port )
192
+ _lib .hdfsBuilderSetNameNode (o , ensure_bytes (self .host ))
193
+ if self .user :
194
+ _lib .hdfsBuilderSetUserName (o , ensure_bytes (self .user ))
194
195
195
- if self .ticket_cache :
196
- _lib .hdfsBuilderSetKerbTicketCachePath (o , ensure_bytes (self .ticket_cache ))
196
+ if self .ticket_cache :
197
+ _lib .hdfsBuilderSetKerbTicketCachePath (o , ensure_bytes (self .ticket_cache ))
197
198
198
- if self .token :
199
- _lib .hdfsBuilderSetToken (o , ensure_bytes (self .token ))
199
+ if self .token :
200
+ _lib .hdfsBuilderSetToken (o , ensure_bytes (self .token ))
200
201
201
- for par , val in self .pars .items ():
202
- if not _lib .hdfsBuilderConfSetStr (o , ensure_bytes (par ), ensure_bytes (val )) == 0 :
203
- warnings .warn ('Setting conf parameter %s failed' % par )
202
+ for par , val in self .pars .items ():
203
+ if not _lib .hdfsBuilderConfSetStr (o , ensure_bytes (par ), ensure_bytes (val )) == 0 :
204
+ warnings .warn ('Setting conf parameter %s failed' % par )
204
205
205
- trial = 0
206
- while trial < self .CONNECT_RETRIES :
207
206
fs = _lib .hdfsBuilderConnect (o )
208
- trial += 1
209
207
if fs :
210
- break
211
- if fs :
212
- logger .debug ("Connect to handle %d" , fs .contents .filesystem )
213
- self ._handle = fs
214
- #if self.token: # TODO: find out what a delegation token is
215
- # self._token = _lib.hdfsGetDelegationToken(self._handle,
216
- # ensure_bytes(self.user))
217
- else :
218
- msg = ensure_string (_lib .hdfsGetLastError ())
219
- raise ConnectionError ('Connection Failed: {}' .format (msg ))
208
+ logger .debug ("Connect to handle %d" , fs .contents .filesystem )
209
+ self ._handle = fs
210
+ #if self.token: # TODO: find out what a delegation token is
211
+ # self._token = _lib.hdfsGetDelegationToken(self._handle,
212
+ # ensure_bytes(self.user))
213
+ else :
214
+ msg = ensure_string (_lib .hdfsGetLastError ())
215
+ raise ConnectionError ('Connection Failed: {}' .format (msg ))
220
216
221
217
def disconnect (self ):
222
218
""" Disconnect from name node """
223
219
if self ._handle :
224
220
logger .debug ("Disconnect from handle %d" , self ._handle .contents .filesystem )
225
- _lib .hdfsDisconnect (self ._handle )
221
+ with lock :
222
+ _lib .hdfsDisconnect (self ._handle )
226
223
self ._handle = None
227
224
228
225
def open (self , path , mode = 'rb' , replication = 0 , buff = 0 , block_size = 0 ):
@@ -276,8 +273,9 @@ def du(self, path, total=False, deep=False):
276
273
277
274
def df (self ):
278
275
""" Used/free disc space on the HDFS system """
279
- cap = _lib .hdfsGetCapacity (self ._handle )
280
- used = _lib .hdfsGetUsed (self ._handle )
276
+ with lock :
277
+ cap = _lib .hdfsGetCapacity (self ._handle )
278
+ used = _lib .hdfsGetUsed (self ._handle )
281
279
return {'capacity' : cap , 'used' : used , 'percent-free' : 100 * (cap - used )/ cap }
282
280
283
281
def get_block_locations (self , path , start = 0 , length = 0 ):
@@ -287,26 +285,29 @@ def get_block_locations(self, path, start=0, length=0):
287
285
start = int (start ) or 0
288
286
length = int (length ) or self .info (path )['size' ]
289
287
nblocks = ctypes .c_int (0 )
290
- out = _lib .hdfsGetFileBlockLocations (self ._handle , ensure_bytes (path ),
291
- ctypes .c_int64 (start ), ctypes .c_int64 (length ),
292
- ctypes .byref (nblocks ))
288
+ with lock :
289
+ out = _lib .hdfsGetFileBlockLocations (self ._handle , ensure_bytes (path ),
290
+ ctypes .c_int64 (start ), ctypes .c_int64 (length ),
291
+ ctypes .byref (nblocks ))
293
292
locs = []
294
293
for i in range (nblocks .value ):
295
294
block = out [i ]
296
295
hosts = [block .hosts [i ] for i in
297
296
range (block .numOfNodes )]
298
297
locs .append ({'hosts' : hosts , 'length' : block .length ,
299
298
'offset' : block .offset })
300
- _lib .hdfsFreeFileBlockLocations (out , nblocks )
299
+ with lock :
300
+ _lib .hdfsFreeFileBlockLocations (out , nblocks )
301
301
return locs
302
302
303
303
def info (self , path ):
304
304
""" File information (as a dict) """
305
305
if not self .exists (path ):
306
306
raise FileNotFoundError (path )
307
- fi = _lib .hdfsGetPathInfo (self ._handle , ensure_bytes (path )).contents
308
- out = info_to_dict (fi )
309
- _lib .hdfsFreeFileInfo (ctypes .byref (fi ), 1 )
307
+ with lock :
308
+ fi = _lib .hdfsGetPathInfo (self ._handle , ensure_bytes (path )).contents
309
+ out = info_to_dict (fi )
310
+ _lib .hdfsFreeFileInfo (ctypes .byref (fi ), 1 )
310
311
return ensure_string (out )
311
312
312
313
def walk (self , path ):
@@ -358,9 +359,10 @@ def ls(self, path, detail=True):
358
359
if not self .exists (path ):
359
360
raise FileNotFoundError (path )
360
361
num = ctypes .c_int (0 )
361
- fi = _lib .hdfsListDirectory (self ._handle , ensure_bytes (path ), ctypes .byref (num ))
362
- out = [ensure_string (info_to_dict (fi [i ])) for i in range (num .value )]
363
- _lib .hdfsFreeFileInfo (fi , num .value )
362
+ with lock :
363
+ fi = _lib .hdfsListDirectory (self ._handle , ensure_bytes (path ), ctypes .byref (num ))
364
+ out = [ensure_string (info_to_dict (fi [i ])) for i in range (num .value )]
365
+ _lib .hdfsFreeFileInfo (fi , num .value )
364
366
if detail :
365
367
return out
366
368
else :
@@ -376,10 +378,11 @@ def __del__(self):
376
378
377
379
def mkdir (self , path ):
378
380
""" Make directory at path """
379
- out = _lib .hdfsCreateDirectory (self ._handle , ensure_bytes (path ))
380
- if out != 0 :
381
- msg = ensure_string (_lib .hdfsGetLastError ())
382
- raise IOError ('Create directory failed: {}' .format (msg ))
381
+ with lock :
382
+ out = _lib .hdfsCreateDirectory (self ._handle , ensure_bytes (path ))
383
+ if out != 0 :
384
+ msg = ensure_string (_lib .hdfsGetLastError ())
385
+ raise IOError ('Create directory failed: {}' .format (msg ))
383
386
384
387
def set_replication (self , path , replication ):
385
388
""" Instruct HDFS to set the replication for the given file.
@@ -391,31 +394,35 @@ def set_replication(self, path, replication):
391
394
"""
392
395
if replication < 0 :
393
396
raise ValueError ('Replication must be positive, or 0 for system default' )
394
- out = _lib .hdfsSetReplication (self ._handle , ensure_bytes (path ),
395
- ctypes .c_int16 (int (replication )))
396
- if out != 0 :
397
- msg = ensure_string (_lib .hdfsGetLastError ())
398
- raise IOError ('Set replication failed: {}' .format (msg ))
397
+ with lock :
398
+ out = _lib .hdfsSetReplication (self ._handle , ensure_bytes (path ),
399
+ ctypes .c_int16 (int (replication )))
400
+ if out != 0 :
401
+ msg = ensure_string (_lib .hdfsGetLastError ())
402
+ raise IOError ('Set replication failed: {}' .format (msg ))
399
403
400
404
def mv (self , path1 , path2 ):
401
405
""" Move file at path1 to path2 """
402
406
if not self .exists (path1 ):
403
407
raise FileNotFoundError (path1 )
404
- out = _lib .hdfsRename (self ._handle , ensure_bytes (path1 ), ensure_bytes (path2 ))
408
+ with lock :
409
+ out = _lib .hdfsRename (self ._handle , ensure_bytes (path1 ), ensure_bytes (path2 ))
405
410
return out == 0
406
411
407
412
def rm (self , path , recursive = True ):
408
413
"Use recursive for `rm -r`, i.e., delete directory and contents"
409
414
if not self .exists (path ):
410
415
raise FileNotFoundError (path )
411
- out = _lib .hdfsDelete (self ._handle , ensure_bytes (path ), bool (recursive ))
412
- if out != 0 :
413
- msg = ensure_string (_lib .hdfsGetLastError ())
414
- raise IOError ('Remove failed on %s %s' % (path , msg ))
416
+ with lock :
417
+ out = _lib .hdfsDelete (self ._handle , ensure_bytes (path ), bool (recursive ))
418
+ if out != 0 :
419
+ msg = ensure_string (_lib .hdfsGetLastError ())
420
+ raise IOError ('Remove failed on %s %s' % (path , msg ))
415
421
416
422
def exists (self , path ):
417
423
""" Is there an entry at path? """
418
- out = _lib .hdfsExists (self ._handle , ensure_bytes (path ) )
424
+ with lock :
425
+ out = _lib .hdfsExists (self ._handle , ensure_bytes (path ) )
419
426
return out == 0
420
427
421
428
def chmod (self , path , mode ):
@@ -441,20 +448,22 @@ def chmod(self, path, mode):
441
448
"""
442
449
if not self .exists (path ):
443
450
raise FileNotFoundError (path )
444
- out = _lib .hdfsChmod (self ._handle , ensure_bytes (path ), ctypes .c_short (mode ))
445
- if out != 0 :
446
- msg = ensure_string (_lib .hdfsGetLastError ())
447
- raise IOError ("chmod failed on %s %s" % (path , msg ))
451
+ with lock :
452
+ out = _lib .hdfsChmod (self ._handle , ensure_bytes (path ), ctypes .c_short (mode ))
453
+ if out != 0 :
454
+ msg = ensure_string (_lib .hdfsGetLastError ())
455
+ raise IOError ("chmod failed on %s %s" % (path , msg ))
448
456
449
457
def chown (self , path , owner , group ):
450
458
""" Change owner/group """
451
459
if not self .exists (path ):
452
460
raise FileNotFoundError (path )
453
- out = _lib .hdfsChown (self ._handle , ensure_bytes (path ), ensure_bytes (owner ),
454
- ensure_bytes (group ))
455
- if out != 0 :
456
- msg = ensure_string (_lib .hdfsGetLastError ())
457
- raise IOError ("chown failed on %s %s" % (path , msg ))
461
+ with lock :
462
+ out = _lib .hdfsChown (self ._handle , ensure_bytes (path ), ensure_bytes (owner ),
463
+ ensure_bytes (group ))
464
+ if out != 0 :
465
+ msg = ensure_string (_lib .hdfsGetLastError ())
466
+ raise IOError ("chown failed on %s %s" % (path , msg ))
458
467
459
468
def cat (self , path ):
460
469
""" Return contents of file """
0 commit comments