Skip to content

Commit dbe74e1

Browse files
authored
Merge pull request #552 from pauldmccarthy/indexed_gzip
Conditionally use indexed_gzip
2 parents 23539e8 + 790c037 commit dbe74e1

10 files changed

+572
-43
lines changed

.travis.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ cache:
1515
env:
1616
global:
1717
- DEPENDS="six numpy scipy matplotlib h5py pillow"
18+
- OPTIONAL_DEPENDS=""
1819
- PYDICOM=1
1920
- INSTALL_TYPE="setup"
2021
- EXTRA_WHEELS="https://5cf40426d9f06eb7461d-6fe47d9331aba7cd62fc36c7196769e4.ssl.cf2.rackcdn.com"
@@ -84,6 +85,13 @@ matrix:
8485
- python: 3.5
8586
env:
8687
- DOC_DOC_TEST=1
88+
# Run tests with indexed_gzip present
89+
- python: 2.7
90+
env:
91+
- OPTIONAL_DEPENDS="indexed_gzip"
92+
- python: 3.5
93+
env:
94+
- OPTIONAL_DEPENDS="indexed_gzip"
8795
before_install:
8896
- source tools/travis_tools.sh
8997
- python -m pip install --upgrade pip
@@ -93,7 +101,7 @@ before_install:
93101
- python --version # just to check
94102
- pip install -U pip wheel # needed at one point
95103
- retry pip install nose flake8 mock # always
96-
- pip install $EXTRA_PIP_FLAGS $DEPENDS
104+
- pip install $EXTRA_PIP_FLAGS $DEPENDS $OPTIONAL_DEPENDS
97105
# pydicom <= 0.9.8 doesn't install on python 3
98106
- if [ "${TRAVIS_PYTHON_VERSION:0:1}" == "2" ]; then
99107
if [ "$PYDICOM" == "1" ]; then

nibabel/analyze.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -934,8 +934,8 @@ def set_data_dtype(self, dtype):
934934

935935
@classmethod
936936
@kw_only_meth(1)
937-
def from_file_map(klass, file_map, mmap=True):
938-
''' class method to create image from mapping in `file_map ``
937+
def from_file_map(klass, file_map, mmap=True, keep_file_open=None):
938+
'''class method to create image from mapping in `file_map ``
939939
940940
Parameters
941941
----------
@@ -950,6 +950,19 @@ def from_file_map(klass, file_map, mmap=True):
950950
`mmap` value of True gives the same behavior as ``mmap='c'``. If
951951
image data file cannot be memory-mapped, ignore `mmap` value and
952952
read array from file.
953+
keep_file_open : { None, 'auto', True, False }, optional, keyword only
954+
`keep_file_open` controls whether a new file handle is created
955+
every time the image is accessed, or a single file handle is
956+
created and used for the lifetime of this ``ArrayProxy``. If
957+
``True``, a single file handle is created and used. If ``False``,
958+
a new file handle is created every time the image is accessed. If
959+
``'auto'``, and the optional ``indexed_gzip`` dependency is
960+
present, a single file handle is created and persisted. If
961+
``indexed_gzip`` is not available, behaviour is the same as if
962+
``keep_file_open is False``. If ``file_map`` refers to an open
963+
file handle, this setting has no effect. The default value
964+
(``None``) will result in the value of
965+
``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used.
953966
954967
Returns
955968
-------
@@ -964,7 +977,8 @@ def from_file_map(klass, file_map, mmap=True):
964977
imgf = img_fh.fileobj
965978
if imgf is None:
966979
imgf = img_fh.filename
967-
data = klass.ImageArrayProxy(imgf, hdr_copy, mmap=mmap)
980+
data = klass.ImageArrayProxy(imgf, hdr_copy, mmap=mmap,
981+
keep_file_open=keep_file_open)
968982
# Initialize without affine to allow header to pass through unmodified
969983
img = klass(data, None, header, file_map=file_map)
970984
# set affine from header though
@@ -976,8 +990,8 @@ def from_file_map(klass, file_map, mmap=True):
976990

977991
@classmethod
978992
@kw_only_meth(1)
979-
def from_filename(klass, filename, mmap=True):
980-
''' class method to create image from filename `filename`
993+
def from_filename(klass, filename, mmap=True, keep_file_open=None):
994+
'''class method to create image from filename `filename`
981995
982996
Parameters
983997
----------
@@ -990,6 +1004,18 @@ def from_filename(klass, filename, mmap=True):
9901004
`mmap` value of True gives the same behavior as ``mmap='c'``. If
9911005
image data file cannot be memory-mapped, ignore `mmap` value and
9921006
read array from file.
1007+
keep_file_open : { None, 'auto', True, False }, optional, keyword only
1008+
`keep_file_open` controls whether a new file handle is created
1009+
every time the image is accessed, or a single file handle is
1010+
created and used for the lifetime of this ``ArrayProxy``. If
1011+
``True``, a single file handle is created and used. If ``False``,
1012+
a new file handle is created every time the image is accessed. If
1013+
``'auto'``, and the optional ``indexed_gzip`` dependency is
1014+
present, a single file handle is created and persisted. If
1015+
``indexed_gzip`` is not available, behaviour is the same as if
1016+
``keep_file_open is False``. The default value (``None``) will
1017+
result in the value of
1018+
``nibabel.arrayproxy.KEEP_FILE_OPEN_DEFAULT`` being used.
9931019
9941020
Returns
9951021
-------
@@ -998,7 +1024,8 @@ def from_filename(klass, filename, mmap=True):
9981024
if mmap not in (True, False, 'c', 'r'):
9991025
raise ValueError("mmap should be one of {True, False, 'c', 'r'}")
10001026
file_map = klass.filespec_to_file_map(filename)
1001-
return klass.from_file_map(file_map, mmap=mmap)
1027+
return klass.from_file_map(file_map, mmap=mmap,
1028+
keep_file_open=keep_file_open)
10021029

10031030
load = from_filename
10041031

nibabel/arrayproxy.py

Lines changed: 128 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,34 @@
2525
2626
See :mod:`nibabel.tests.test_proxy_api` for proxy API conformance checks.
2727
"""
28+
from contextlib import contextmanager
29+
from threading import RLock
30+
2831
import numpy as np
2932

3033
from .deprecated import deprecate_with_version
3134
from .volumeutils import array_from_file, apply_read_scaling
3235
from .fileslice import fileslice
3336
from .keywordonly import kw_only_meth
34-
from .openers import ImageOpener
37+
from .openers import ImageOpener, HAVE_INDEXED_GZIP
38+
39+
40+
"""This flag controls whether a new file handle is created every time an image
41+
is accessed through an ``ArrayProxy``, or a single file handle is created and
42+
used for the lifetime of the ``ArrayProxy``. It should be set to one of
43+
``True``, ``False``, or ``'auto'``.
44+
45+
If ``True``, a single file handle is created and used. If ``False``, a new
46+
file handle is created every time the image is accessed. If ``'auto'``, and
47+
the optional ``indexed_gzip`` dependency is present, a single file handle is
48+
created and persisted. If ``indexed_gzip`` is not available, behaviour is the
49+
same as if ``keep_file_open is False``.
50+
51+
If this is set to any other value, attempts to create an ``ArrayProxy`` without
52+
specifying the ``keep_file_open`` flag will result in a ``ValueError`` being
53+
raised.
54+
"""
55+
KEEP_FILE_OPEN_DEFAULT = False
3556

3657

3758
class ArrayProxy(object):
@@ -69,8 +90,8 @@ class ArrayProxy(object):
6990
_header = None
7091

7192
@kw_only_meth(2)
72-
def __init__(self, file_like, spec, mmap=True):
73-
""" Initialize array proxy instance
93+
def __init__(self, file_like, spec, mmap=True, keep_file_open=None):
94+
"""Initialize array proxy instance
7495
7596
Parameters
7697
----------
@@ -99,8 +120,18 @@ def __init__(self, file_like, spec, mmap=True):
99120
True gives the same behavior as ``mmap='c'``. If `file_like`
100121
cannot be memory-mapped, ignore `mmap` value and read array from
101122
file.
102-
scaling : {'fp', 'dv'}, optional, keyword only
103-
Type of scaling to use - see header ``get_data_scaling`` method.
123+
keep_file_open : { None, 'auto', True, False }, optional, keyword only
124+
`keep_file_open` controls whether a new file handle is created
125+
every time the image is accessed, or a single file handle is
126+
created and used for the lifetime of this ``ArrayProxy``. If
127+
``True``, a single file handle is created and used. If ``False``,
128+
a new file handle is created every time the image is accessed. If
129+
``'auto'``, and the optional ``indexed_gzip`` dependency is
130+
present, a single file handle is created and persisted. If
131+
``indexed_gzip`` is not available, behaviour is the same as if
132+
``keep_file_open is False``. If ``file_like`` is an open file
133+
handle, this setting has no effect. The default value (``None``)
134+
will result in the value of ``KEEP_FILE_OPEN_DEFAULT`` being used.
104135
"""
105136
if mmap not in (True, False, 'c', 'r'):
106137
raise ValueError("mmap should be one of {True, False, 'c', 'r'}")
@@ -125,6 +156,70 @@ def __init__(self, file_like, spec, mmap=True):
125156
# Permit any specifier that can be interpreted as a numpy dtype
126157
self._dtype = np.dtype(self._dtype)
127158
self._mmap = mmap
159+
self._keep_file_open = self._should_keep_file_open(file_like,
160+
keep_file_open)
161+
self._lock = RLock()
162+
163+
def __del__(self):
164+
"""If this ``ArrayProxy`` was created with ``keep_file_open=True``,
165+
the open file object is closed if necessary.
166+
"""
167+
if hasattr(self, '_opener') and not self._opener.closed:
168+
self._opener.close_if_mine()
169+
self._opener = None
170+
171+
def __getstate__(self):
172+
"""Returns the state of this ``ArrayProxy`` during pickling. """
173+
state = self.__dict__.copy()
174+
state.pop('_lock', None)
175+
return state
176+
177+
def __setstate__(self, state):
178+
"""Sets the state of this ``ArrayProxy`` during unpickling. """
179+
self.__dict__.update(state)
180+
self._lock = RLock()
181+
182+
def _should_keep_file_open(self, file_like, keep_file_open):
183+
"""Called by ``__init__``, and used to determine the final value of
184+
``keep_file_open``.
185+
186+
The return value is derived from these rules:
187+
188+
- If ``file_like`` is a file(-like) object, ``False`` is returned.
189+
Otherwise, ``file_like`` is assumed to be a file name.
190+
- if ``file_like`` ends with ``'gz'``, and the ``indexed_gzip``
191+
library is available, ``True`` is returned.
192+
- Otherwise, ``False`` is returned.
193+
194+
Parameters
195+
----------
196+
197+
file_like : object
198+
File-like object or filename, as passed to ``__init__``.
199+
keep_file_open : { 'auto', True, False }
200+
Flag as passed to ``__init__``.
201+
202+
Returns
203+
-------
204+
205+
The value of ``keep_file_open`` that will be used by this
206+
``ArrayProxy``.
207+
"""
208+
if keep_file_open is None:
209+
keep_file_open = KEEP_FILE_OPEN_DEFAULT
210+
# if keep_file_open is True/False, we do what the user wants us to do
211+
if isinstance(keep_file_open, bool):
212+
return keep_file_open
213+
if keep_file_open != 'auto':
214+
raise ValueError('keep_file_open should be one of {None, '
215+
'\'auto\', True, False}')
216+
217+
# file_like is a handle - keep_file_open is irrelevant
218+
if hasattr(file_like, 'read') and hasattr(file_like, 'seek'):
219+
return False
220+
# Otherwise, if file_like is gzipped, and we have_indexed_gzip, we set
221+
# keep_file_open to True, else we set it to False
222+
return HAVE_INDEXED_GZIP and file_like.endswith('gz')
128223

129224
@property
130225
@deprecate_with_version('ArrayProxy.header deprecated', '2.2', '3.0')
@@ -155,12 +250,33 @@ def inter(self):
155250
def is_proxy(self):
156251
return True
157252

253+
@contextmanager
254+
def _get_fileobj(self):
255+
"""Create and return a new ``ImageOpener``, or return an existing one.
256+
257+
The specific behaviour depends on the value of the ``keep_file_open``
258+
flag that was passed to ``__init__``.
259+
260+
Yields
261+
------
262+
ImageOpener
263+
A newly created ``ImageOpener`` instance, or an existing one,
264+
which provides access to the file.
265+
"""
266+
if self._keep_file_open:
267+
if not hasattr(self, '_opener'):
268+
self._opener = ImageOpener(self.file_like)
269+
yield self._opener
270+
else:
271+
with ImageOpener(self.file_like) as opener:
272+
yield opener
273+
158274
def get_unscaled(self):
159-
''' Read of data from file
275+
""" Read of data from file
160276
161277
This is an optional part of the proxy API
162-
'''
163-
with ImageOpener(self.file_like) as fileobj:
278+
"""
279+
with self._get_fileobj() as fileobj, self._lock:
164280
raw_data = array_from_file(self._shape,
165281
self._dtype,
166282
fileobj,
@@ -175,18 +291,19 @@ def __array__(self):
175291
return apply_read_scaling(raw_data, self._slope, self._inter)
176292

177293
def __getitem__(self, slicer):
178-
with ImageOpener(self.file_like) as fileobj:
294+
with self._get_fileobj() as fileobj:
179295
raw_data = fileslice(fileobj,
180296
slicer,
181297
self._shape,
182298
self._dtype,
183299
self._offset,
184-
order=self.order)
300+
order=self.order,
301+
lock=self._lock)
185302
# Upcast as necessary for big slopes, intercepts
186303
return apply_read_scaling(raw_data, self._slope, self._inter)
187304

188305
def reshape(self, shape):
189-
''' Return an ArrayProxy with a new shape, without modifying data '''
306+
""" Return an ArrayProxy with a new shape, without modifying data """
190307
size = np.prod(self._shape)
191308

192309
# Calculate new shape if not fully specified

0 commit comments

Comments
 (0)