Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow tiffs with npages not dividing pageCount to be loaded #279

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,6 @@ install:
script:
- export SPARK_HOME=`pwd`/spark-1.6.0-bin-hadoop2.6
- export PYTHONPATH=$SPARK_HOME/python:$PYTHONPATH
- export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.9-src.zip:$PYTHONPATH
- export PYTHONPATH=$SPARK_HOME/python/lib/py4j-0.8.2.1-src.zip:$PYTHONPATH
- py.test
- py.test --engine=spark
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
[![Latest Version](https://img.shields.io/pypi/v/thunder-python.svg?style=flat-square)](https://pypi.python.org/pypi/thunder-python)
[![Build Status](https://img.shields.io/travis/thunder-project/thunder/master.svg?style=flat-square)](https://travis-ci.org/thunder-project/thunder)
[![Gitter](https://img.shields.io/gitter/room/thunder-project/thunder.svg?style=flat-square)](https://gitter.im/thunder-project/thunder)
[![Binder](https://img.shields.io/badge/launch-binder-red.svg?style=flat-square)](http://mybinder.org/repo/thunder-project/thunder-docs)


> scalable analysis of image and time series analysis in python

Expand Down Expand Up @@ -85,6 +83,6 @@ Once you have a running cluster with a valid `SparkContext` — this is created

Thunder is a community effort! The codebase so far is due to the excellent work of the following individuals:

> Andrew Osheroff, Ben Poole, Chris Stock, Davis Bennett, Jascha Swisher, Jason Wittenbach, Jeremy Freeman, Josh Rosen, Kunal Lillaney, Logan Grosenick, Matt Conlen, Michael Broxton, Noah Young, Ognen Duzlevski, Richard Hofer, Owen Kahn, Ted Fujimoto, Tom Sainsbury, Uri Laseron, W J Liddy
> Andrew Osheroff, Ben Poole, Chris Stock, Davis Bennett, Jascha Swisher, Jason Wittenbach, Jeremy Freeman, Josh Rosen, Kunal Lillaney, Logan Grosenick, Matt Conlen, Michael Broxton, Noah Young, Ognen Duzlevski, Richard Hofer, Owen Kahn, Ted Fujimoto, Tom Sainsbury, Uri Laseron

If you run into a problem, have a feature request, or want to contribute, submit an issue or a pull request, or come talk to us in the [chatroom](https://gitter.im/thunder-project/thunder)!
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ numpy
scipy
scikit-image
boto
bolt-python >= 0.7.0
bolt-python >= 0.5.1
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from setuptools import setup

version = '1.2.0'
version = '1.0.0'

setup(
name='thunder-python',
Expand Down
25 changes: 2 additions & 23 deletions test/test_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from numpy import allclose, array, asarray, add, ndarray, generic
from numpy import allclose, array, asarray, add

from thunder import series, images

Expand Down Expand Up @@ -101,25 +101,4 @@ def test_map_with_keys(eng):
data = images.fromlist([array([[1, 1], [1, 1]]), array([[2, 2], [2, 2]])], engine=eng)
mapped = data.map(lambda kv: kv[0] + kv[1], with_keys=True)
assert allclose(mapped.shape, [2, 2, 2])
assert allclose(mapped.toarray(), [[[1, 1], [1, 1]], [[3, 3], [3, 3]]])


def test_repartition(eng):
if eng is not None:
data = images.fromlist([array([1, 1]), array([2, 2]), array([3, 3]), array([4, 4]),
array([5, 5]), array([6, 6]), array([7, 7]), array([8, 8]),
array([9, 9]), array([10, 10]), array([11, 11]), array([12, 12])],
engine=eng, npartitions=10)
assert allclose(data.first(), array([1, 1]))
assert isinstance(data.first(), (ndarray, generic))
data = data.repartition(3)
assert allclose(data.first(), array([1, 1]))

data = series.fromlist([array([1, 1]), array([2, 2]), array([3, 3]), array([4, 4]),
array([5, 5]), array([6, 6]), array([7, 7]), array([8, 8]),
array([9, 9]), array([10, 10]), array([11, 11]), array([12, 12])],
engine=eng, npartitions=10)
assert allclose(data.first(), array([1, 1]))
data = data.repartition(3)
assert allclose(data.first(), array([1, 1]))
assert isinstance(data.first(), (ndarray, generic))
assert allclose(mapped.toarray(), [[[1, 1], [1, 1]], [[3, 3], [3, 3]]])
77 changes: 28 additions & 49 deletions test/test_blocks.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,33 @@
import pytest
from numpy import arange, array, allclose, ones, float64, asarray
from numpy import arange, array, allclose, ones

from thunder.images.readers import fromlist

pytestmark = pytest.mark.usefixtures("eng")


def test_conversion(eng):
if eng is None:
return
a = arange(8).reshape((4, 2))
data = fromlist([a, a], engine=eng)
vals = data.toblocks((2, 2)).collect_blocks()
vals = data.toblocks((2, 2)).tordd().sortByKey().values().collect()
truth = [array([a[0:2, 0:2], a[0:2, 0:2]]), array([a[2:4, 0:2], a[2:4, 0:2]])]
assert allclose(vals, truth)


def test_full(eng):
if eng is None:
return
a = arange(8).reshape((4, 2))
data = fromlist([a, a], engine=eng)
vals = data.toblocks((4,2)).collect_blocks()
vals = data.toblocks((4, 2)).tordd().values().collect()
truth = [a, a]
assert allclose(vals, truth)


def test_blocksize(eng):
a = arange(100*100, dtype='int16').reshape((100, 100))
data = fromlist(10*[a], engine=eng)

blocks = data.toblocks((5, 5))
assert blocks.blockshape == (10, 5, 5)

blocks = data.toblocks('1')
assert blocks.blockshape == (10, 5, 100)


def test_padding(eng):
a = arange(30).reshape((5, 6))
data = fromlist([a, a], engine=eng)

blocks = data.toblocks((2, 3), padding=(1, 1))
vals = blocks.collect_blocks()
shapes = list(map(lambda x: x.shape, vals))
truth = [(2, 3, 4), (2, 3, 4), (2, 4, 4), (2, 4, 4), (2, 2, 4), (2, 2, 4)]
assert allclose(array(shapes), array(truth))

truth = data.toarray()
assert allclose(data.toblocks((2, 3), padding=1).toarray(), truth)
assert allclose(data.toblocks((2, 3), padding=(0, 1)).toarray(), truth)
assert allclose(data.toblocks((2, 3), padding=(1, 1)).toarray(), truth)


def test_count(eng):
if eng is None:
return
a = arange(8).reshape((2, 4))
data = fromlist([a], engine=eng)
assert data.toblocks((1, 1)).count() == 8
Expand All @@ -59,54 +37,55 @@ def test_count(eng):


def test_conversion_series(eng):
if eng is None:
return
a = arange(8).reshape((4, 2))
data = fromlist([a], engine=eng)
vals = data.toblocks((1, 2)).toseries().toarray()
assert allclose(vals, a)


def test_conversion_series_3d(eng):
if eng is None:
return
a = arange(24).reshape((2, 3, 4))
data = fromlist([a], engine=eng)
vals = data.toblocks((2, 3, 4)).toseries().toarray()
assert allclose(vals, a)


def test_roundtrip(eng):
if eng is None:
return
a = arange(8).reshape((4, 2))
data = fromlist([a, a], engine=eng)
vals = data.toblocks((2, 2)).toimages()
assert allclose(vals.toarray(), data.toarray())


def test_series_roundtrip_simple(eng):
if eng is None:
return
a = arange(8).reshape((4, 2))
data = fromlist([a, a], engine=eng)
vals = data.toseries().toimages()
assert allclose(vals.toarray(), data.toarray())


def test_shape(eng):
if eng is None:
return
data = fromlist([ones((30, 30)) for _ in range(0, 3)], engine=eng)
blocks = data.toblocks((10, 10))
values = blocks.collect_blocks()
values = [v for k, v in blocks.tordd().collect()]
assert blocks.blockshape == (3, 10, 10)
assert all([v.shape == (3, 10, 10) for v in values])


def test_map(eng):
a = arange(8).reshape((4, 2))
data = fromlist([a, a], engine=eng)
map1 = data.toblocks((4, 2)).map(lambda x: 1.0 * x, dtype=float64).toimages()
map2 = data.toblocks((4, 2)).map(lambda x: 1.0 * x).toimages()
assert map1.dtype == float64
assert map2.dtype == float64

def test_map_generic(eng):
a = arange(3*4).reshape((3, 4))
data = fromlist([a, a], engine=eng)
b = asarray(data.toblocks((2, 2)).map_generic(lambda x: [0, 1]))
assert b.shape == (2, 2)
assert b.dtype == object
truth = [v == [0, 1] for v in b.flatten()]
assert all(truth)
def test_local_mode(eng):
a = arange(64).reshape((8, 8))
data = fromlist([a, a])
if data.mode == 'local':
blocks = data.toblocks((4, 4))
assert allclose(blocks.values, data.values)
assert blocks.count() == 1
assert blocks.blockshape == (2, 8, 8)
15 changes: 7 additions & 8 deletions test/test_images.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from numpy import arange, allclose, array, mean, apply_along_axis, float64
from numpy import arange, allclose, array, mean, apply_along_axis

from thunder.images.readers import fromlist, fromarray
from thunder.images.images import Images
Expand All @@ -12,8 +12,6 @@
def test_map(eng):
data = fromlist([arange(6).reshape((2, 3))], engine=eng)
assert allclose(data.map(lambda x: x + 1).toarray(), [[1, 2, 3], [4, 5, 6]])
assert data.map(lambda x: 1.0*x, dtype=float64).dtype == float64
assert data.map(lambda x: 1.0*x).dtype == float64


def test_map_singleton(eng):
Expand Down Expand Up @@ -188,15 +186,16 @@ def test_map_as_series(eng):
def f(x):
return x - mean(x)
result = apply_along_axis(f, 0, data.toarray())
size = (2, 2)

assert allclose(data.map_as_series(f, chunk_size=size).toarray(), result)
assert allclose(data.map_as_series(f, chunk_size=size, value_size=5).toarray(), result)
assert allclose(data.map_as_series(f).toarray(), result)
assert allclose(data.map_as_series(f, value_size=5).toarray(), result)
assert allclose(data.map_as_series(f, block_size=(2, 2)).toarray(), result)

# function does change size of series
def f(x):
return x[:-1]
result = apply_along_axis(f, 0, data.toarray())

assert allclose(data.map_as_series(f, chunk_size=size).toarray(), result)
assert allclose(data.map_as_series(f, chunk_size=size, value_size=4).toarray(), result)
assert allclose(data.map_as_series(f).toarray(), result)
assert allclose(data.map_as_series(f, value_size=4).toarray(), result)
assert allclose(data.map_as_series(f, block_size=(2, 2)).toarray(), result)
18 changes: 4 additions & 14 deletions test/test_images_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ def test_from_list(eng):
a = arange(8).reshape((2, 4))
data = fromlist([a], engine=eng)
assert allclose(data.shape, (1,) + a.shape)
assert allclose(data.value_shape, a.shape)
assert allclose(data.dims, a.shape)
assert allclose(data.toarray(), a)


def test_from_array(eng):
a = arange(8).reshape((1, 2, 4))
data = fromarray(a, engine=eng)
assert allclose(data.shape, a.shape)
assert allclose(data.value_shape, a.shape[1:])
assert allclose(data.dims, a.shape[1:])
assert allclose(data.toarray(), a)


Expand All @@ -36,15 +36,15 @@ def test_from_array_bolt(eng):
b = barray(a)
data = fromarray(b)
assert allclose(data.shape, a.shape)
assert allclose(data.value_shape, a.shape[1:])
assert allclose(data.dims, a.shape[1:])
assert allclose(data.toarray(), a)


def test_from_array_single(eng):
a = arange(8).reshape((2, 4))
data = fromarray(a, engine=eng)
assert allclose(data.shape, (1,) + a.shape)
assert allclose(data.value_shape, a.shape)
assert allclose(data.dims, a.shape)
assert allclose(data.toarray(), a)


Expand Down Expand Up @@ -114,16 +114,6 @@ def test_from_tif_multi_planes(eng):
assert [x.sum() for x in data.toarray()] == [1140006, 1119161, 1098917]


def test_from_tif_multi_planes_discard_extra(eng):
path = os.path.join(resources, 'multilayer_tif', 'dotdotdot_lzw.tif')
data = fromtif(path, nplanes=2, engine=eng, discard_extra=True)
assert data.shape[0] == 1
assert data.shape[1] == 2
with pytest.raises(BaseException) as error_msg:
data = fromtif(path, nplanes=2, engine=eng, discard_extra=False)
assert 'nplanes' in str(error_msg.value)


def test_from_tif_multi_planes_many(eng):
path = os.path.join(resources, 'multilayer_tif', 'dotdotdot_lzw*.tif')
data = fromtif(path, nplanes=3, engine=eng)
Expand Down
4 changes: 1 addition & 3 deletions test/test_series.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import pytest
from numpy import allclose, arange, array, asarray, dot, cov, corrcoef, float64
from numpy import allclose, arange, array, asarray, dot, cov, corrcoef

from thunder.series.readers import fromlist, fromarray
from thunder.images.readers import fromlist as img_fromlist
Expand All @@ -10,8 +10,6 @@
def test_map(eng):
data = fromlist([array([1, 2]), array([3, 4])], engine=eng)
assert allclose(data.map(lambda x: x + 1).toarray(), [[2, 3], [4, 5]])
assert data.map(lambda x: 1.0*x, dtype=float64).dtype == float64
assert data.map(lambda x: 1.0*x).dtype == float64


def test_map_singletons(eng):
Expand Down
4 changes: 2 additions & 2 deletions test/test_series_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def test_to_binary_roundtrip(tmpdir, eng):
def test_to_binary_roundtrip_partitioned(tmpdir, eng):
a = arange(8, dtype='int16').reshape((4, 2))
p = str(tmpdir) + '/data'
data = fromarray([a, a], npartitions=4, engine=eng)
data = fromarray([a, a], npartitions=2, engine=eng)
data.tobinary(p)
loaded = frombinary(p)
assert allclose(data.toarray(), loaded.toarray())
Expand All @@ -137,4 +137,4 @@ def test_from_example(eng):
data = fromexample('mouse', engine=eng)
assert allclose(data.toarray().shape, (64, 64, 20))
data = fromexample('iris', engine=eng)
assert allclose(data.toarray().shape, (150, 4))
assert allclose(data.toarray().shape, (150, 4))
2 changes: 1 addition & 1 deletion thunder/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ def _setup():

_setup()

__version__ = '1.2.0'
__version__ = '1.0.0'
Loading