Source code for diluvian.volumes

# -*- coding: utf-8 -*-
"""Volumes of raw image and labeled object data."""


from __future__ import division

from collections import namedtuple
import csv
import logging
import os
import re

import h5py
import math
import numpy as np
from PIL import Image
import pytoml as toml
import requests
from scipy import ndimage
import six
from six.moves import range as xrange

from .config import CONFIG
from .octrees import OctreeVolume
from .util import get_nonzero_aabb


DimOrder = namedtuple('DimOrder', ('X', 'Y', 'Z'))


[docs]def partition_volumes(volumes, downsample=True): """Paritition volumes into training and validation based on configuration. Uses the regexes mapping partition sizes and indices in diluvian.config.TrainingConfig by applying them to matching volumes based on name. Parameters ---------- volumes : dict Dictionary mapping volume name to diluvian.volumes.Volume. downsample : bool, optional Whether to downsample partitions automatically. Returns ------- training_volumes, validation_volumes : dict Dictionary mapping volume name to partitioned, downsampled volumes. """ def apply_partitioning(volumes, partitioning): partitioned = {} for name, vol in six.iteritems(volumes): partitions = [p for rgx, p in CONFIG.training.partitions.items() if re.match(rgx, name)] partition_index = [idx for rgx, idx in partitioning.items() if re.match(rgx, name)] if len(partitions) > 1 or len(partition_index) > 1: raise ValueError('Volume "{}" matches more than one partition specifier'.format(name)) elif len(partitions) == 1 and len(partition_index) == 1: v = vol.partition(partitions[0], partition_index[0]) if downsample: v = v.downsample(CONFIG.volume.resolution) partitioned[name] = v return partitioned training_volumes = apply_partitioning(volumes, CONFIG.training.training_partition) validation_volumes = apply_partitioning(volumes, CONFIG.training.validation_partition) return training_volumes, validation_volumes
[docs]class SubvolumeBounds(object): """Sufficient parameters to extract a subvolume from a volume.""" __slots__ = ('start', 'stop', 'seed', 'label_id', 'label_margin',) def __init__(self, start=None, stop=None, seed=None, label_id=None, label_margin=None): assert (start is not None and stop is not None) or seed is not None, "Bounds or seed must be provided" self.start = start self.stop = stop self.seed = seed self.label_id = label_id if label_margin is None: label_margin = np.zeros(3, dtype=np.int64) self.label_margin = label_margin @classmethod
[docs] def iterable_from_csv(cls, filename): bounds = [] with open(filename, 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: for k, v in six.iteritems(row): if not v: row[k] = None elif v[0] == '[': row[k] = np.fromstring(v[1:-1], sep=' ', dtype=np.int64) else: row[k] = int(v) bounds.append(cls(**row)) return bounds
@classmethod
[docs] def iterable_to_csv(cls, bounds, filename): with open(filename, 'w') as csvfile: fieldnames = cls.__slots__ writer = csv.writer(csvfile) writer.writerow(fieldnames) for bound in bounds: writer.writerow([getattr(bound, f) for f in fieldnames])
[docs]class Subvolume(object): """A subvolume of image data and an optional ground truth object mask.""" __slots__ = ('image', 'label_mask', 'seed', 'label_id',) def __init__(self, image, label_mask, seed, label_id): self.image = image self.label_mask = label_mask self.seed = seed self.label_id = label_id
[docs] def f_a(self): """Calculate the mask filling fraction of this subvolume. Returns ------- float Fraction of the subvolume voxels in the object mask. """ return np.count_nonzero(self.label_mask) / float(self.label_mask.size)
[docs] def has_seed_in_mask(self): ctr = self.seed - (np.asarray(self.image.shape) - np.asarray(self.label_mask.shape)) // 2 return self.label_mask[tuple(ctr)]
[docs] def has_uniform_seed_margin(self, seed_margin=20.0): """Test if a subvolume has a margin of uniform label around its seed. Parameters ---------- seed_margin : float, optional The minimum acceptable margin of uniform target label around the seed voxel (in nm, default 20.0). Returns ------- bool True if the rectangular margin around the seed position is uniform. """ margin = np.ceil(np.reciprocal(np.array(CONFIG.volume.resolution), dtype=np.float64) * seed_margin).astype(np.int64) mask_target = self.label_mask # If data is unlabeled, can not test so always succeed. if mask_target is None: return True # Seed location in the mask accounting for offset of label from image. ctr = self.seed - (np.asarray(self.image.shape) - np.asarray(mask_target.shape)) // 2 seed_fov = (ctr - margin, ctr + margin + 1) seed_region = mask_target[seed_fov[0][0]:seed_fov[1][0], seed_fov[0][1]:seed_fov[1][1], seed_fov[0][2]:seed_fov[1][2]] return np.all(seed_region)
[docs]class SubvolumeGenerator(six.Iterator): """Combines a volume and a subvolume bounds generator into a generator. Parameters ---------- volume : Volume bounds_generator : SubvolumeBoundsGenerator """ def __init__(self, volume, bounds_generator): self.volume = volume self.bounds_generator = bounds_generator @property def shape(self): return self.bounds_generator.shape def __iter__(self): return self
[docs] def reset(self): self.bounds_generator.reset()
def __next__(self): return self.volume.get_subvolume(six.next(self.bounds_generator))
[docs]class ErodedMaskGenerator(six.Iterator): def __init__(self, subvolume_generator, erosion_px): self.subvolume_generator = subvolume_generator self.sel = np.ones(erosion_px * 2 + 1) @property def shape(self): return self.subvolume_generator.shape def __iter__(self): return self
[docs] def reset(self): self.subvolume_generator.reset()
def __next__(self): while True: subv = six.next(self.subvolume_generator) subv.label_mask = ndimage.binary_erosion(subv.label_mask, structure=self.sel, border_value=1) if subv.has_seed_in_mask(): return subv
[docs]class RelabelSeedComponentGenerator(six.Iterator): def __init__(self, subvolume_generator): self.subvolume_generator = subvolume_generator @property def shape(self): return self.subvolume_generator.shape def __iter__(self): return self
[docs] def reset(self): self.subvolume_generator.reset()
def __next__(self): subv = six.next(self.subvolume_generator) label_im, _ = ndimage.label(subv.label_mask) label_axis_margin = (np.array(subv.image.shape) - np.array(subv.label_mask.shape)) // 2 seed_label = label_im[tuple(subv.seed - label_axis_margin)] subv.label_mask = label_im == seed_label return subv
[docs]class SubvolumeAugmentGenerator(six.Iterator): """Base class for subvolume generator augmenters. Parameters ---------- subvolume_generator : SubvolumeGenerator return_both : bool If true, return both the original and augmented volume in sequence. If false, return either with equal probability. """ def __init__(self, subvolume_generator, return_both): self.subvolume_generator = subvolume_generator self.return_both = return_both self.return_single_p = 0.5 self.subvolume = None @property def shape(self): return self.subvolume_generator.shape def __iter__(self): return self
[docs] def reset(self): self.subvolume = None self.subvolume_generator.reset()
def __next__(self): if self.return_both: if self.subvolume is None: self.subvolume = six.next(self.subvolume_generator) return self.subvolume else: subv = self.augment_subvolume() self.subvolume = None if subv is None: return six.next(self) else: return subv else: self.subvolume = six.next(self.subvolume_generator) if np.random.sample() < self.return_single_p: return self.subvolume else: subv = self.augment_subvolume() if subv is None: return self.subvolume else: return subv
[docs] def augment_subvolume(self): raise NotImplementedError('Subclasses must implement this method.')
[docs]class ClipSubvolumeImageGenerator(six.Iterator): """Clip subvolume image range (default between zero and one). Useful to apply after a sequence of augmentations. Parameters ---------- subvolume_generator : SubvolumeGenerator min_val, max_val : float, optional """ def __init__(self, subvolume_generator, min_val=0.0, max_val=1.0): self.subvolume_generator = subvolume_generator self.min_val = min_val self.max_val = max_val @property def shape(self): return self.subvolume_generator.shape def __iter__(self): return self
[docs] def reset(self): self.subvolume_generator.reset()
def __next__(self): subv = six.next(self.subvolume_generator) return Subvolume(np.clip(subv.image, self.min_val, self.max_val), subv.label_mask, subv.seed, subv.label_id)
[docs]class MirrorAugmentGenerator(SubvolumeAugmentGenerator): """Repeats subvolumes from a subvolume generator mirrored along an axis. For each subvolume in the original generator, this generator will yield two subvolumes: the original subvolume and the subvolume with the image, label mask, and seed mirrored along a given axis. Parameters ---------- subvolume_generator : SubvolumeGenerator return_both : bool If true, return both the original and augmented volume in sequence. If false, return either with equal probability. axis : int """ def __init__(self, subvolume_generator, return_both, axis): super(MirrorAugmentGenerator, self).__init__(subvolume_generator, return_both) self.axis = axis
[docs] def augment_subvolume(self): subv = self.subvolume shape = subv.image.shape[self.axis] seed = subv.seed.copy() seed[self.axis] = shape - subv.seed[self.axis] - 1 subv = Subvolume(np.flip(subv.image, self.axis), np.flip(subv.label_mask, self.axis) if subv.label_mask is not None else None, seed, subv.label_id) return subv
[docs]class PermuteAxesAugmentGenerator(SubvolumeAugmentGenerator): """Repeats subvolumes from a subvolume generator with an axes permutation. For each subvolume in the original generator, this generator will yield two subvolumes: the original subvolume and the subvolume with the image, label mask, and seed axes permuted according to a given axes order. Parameters ---------- subvolume_generator : SubvolumeGenerator return_both : bool If true, return both the original and augmented volume in sequence. If false, return either with equal probability. axes : sequence of int """ def __init__(self, subvolume_generator, return_both, axes): super(PermuteAxesAugmentGenerator, self).__init__(subvolume_generator, return_both) self.axes = list(axes)
[docs] def augment_subvolume(self): subv = self.subvolume subv = Subvolume(np.transpose(subv.image, self.axes), np.transpose(subv.label_mask, self.axes) if subv.label_mask is not None else None, subv.seed[self.axes], self.subvolume.label_id) return subv
[docs]class MissingDataAugmentGenerator(SubvolumeAugmentGenerator): """Repeats subvolumes from a subvolume generator with missing data planes. For each subvolume in the original generator, this generator will yield the original subvolume and may yield a subvolume with missing planes of image and/or label mask data. Parameters ---------- subvolume_generator : SubvolumeGenerator return_both : bool If true, return both the original and augmented volume in sequence. If false, return either with equal probability. axis : int probability : float Independent probability that each plane of data along axis is missing. remove_label : bool Whether to also remove label mask data. """ def __init__(self, subvolume_generator, return_both, axis, probability, remove_label=False): super(MissingDataAugmentGenerator, self).__init__(subvolume_generator, return_both) self.axis = axis self.probability = probability self.remove_label = remove_label
[docs] def augment_subvolume(self): rolls = np.random.sample(self.shape[self.axis]) # Remove the seed plane from possibilities. rolls[self.subvolume.seed[self.axis]] = 1.1 missing_sections = np.where(rolls < self.probability) if missing_sections and missing_sections[0].size: subv = self.subvolume mask = subv.label_mask.copy() if subv.label_mask is not None and self.remove_label else subv.label_mask subv = Subvolume(subv.image.copy(), mask, subv.seed, subv.label_id) slices = [slice(None), slice(None), slice(None)] slices[self.axis] = missing_sections subv.image[slices] = 0 if self.remove_label: label_axis_margin = (subv.image.shape[self.axis] - subv.label_mask.shape[self.axis]) // 2 label_sections = missing_sections[0] - label_axis_margin label_sections = label_sections[(label_sections >= 0) & (label_sections < subv.label_mask.shape[self.axis])] slices[self.axis] = (label_sections,) subv.label_mask[slices] = False return subv else: # No augmentations to be made. Superclass will automatically return # next subvolume. return None
[docs]class GaussianNoiseAugmentGenerator(SubvolumeAugmentGenerator): """Repeats subvolumes from a subvolume generator with Gaussian noise. For each subvolume in the original generator, this generator will yield two subvolumes: the original subvolume and the subvolume with multiplicative and additive Gaussian noise applied to the image data. Parameters ---------- subvolume_generator : SubvolumeGenerator return_both : bool If true, return both the original and augmented volume in sequence. If false, return either with equal probability. axis : int Axis along which noise will be applied independently. For example, 0 will apply different noise to each z-section. -1 will apply uniform noise to the entire subvolume. multiplicative : float Standard deviation for 1-mean Gaussian multiplicative noise. multiplicative : float Standard deviation for 0-mean Gaussian additive noise. """ def __init__(self, subvolume_generator, return_both, axis, multiplicative, additive): super(GaussianNoiseAugmentGenerator, self).__init__(subvolume_generator, return_both) self.axis = axis self.multiplicative = multiplicative self.additive = additive
[docs] def augment_subvolume(self): subv = self.subvolume # Generate a transformed shape that will apply vector addition # and multiplication along to correct axis. shape_xform = np.ones((1, 3), dtype=np.int32).ravel() shape_xform[self.axis] = -1 dim_size = 1 if self.axis == -1 else self.shape[self.axis] mul_noise = np.random.normal(1.0, self.multiplicative, dim_size).astype(subv.image.dtype) add_noise = np.random.normal(0.0, self.additive, dim_size).astype(subv.image.dtype) subv = Subvolume(subv.image * mul_noise.reshape(shape_xform) + add_noise.reshape(shape_xform), subv.label_mask, subv.seed, subv.label_id) return subv
[docs]class ContrastAugmentGenerator(SubvolumeAugmentGenerator): """Repeats subvolumes from a subvolume generator with altered contrast. For each subvolume in the original generator, this generator will yield the original subvolume and may yield a subvolume with image intensity contrast. Currently this augmentation performs simple rescaling of intensity values, not histogram based methods. This simple approach still yields results resembling TEM artifacts. A single rescaling is chosen for all selected sections in each subvolume, not independently per selected section. Parameters ---------- subvolume_generator : SubvolumeGenerator return_both : bool If true, return both the original and augmented volume in sequence. If false, return either with equal probability. axis : int Axis along which contrast may be altered. For example, 0 will alter contrast by z-sections. probability : float Independent probability that each plane of data along axis is altered. scaling_mean, scaling_std, center_mean, center_std : float Normal distribution parameters for the rescaling of intensity values. """ def __init__(self, subvolume_generator, return_both, axis, probability, scaling_mean, scaling_std, center_mean, center_std): super(ContrastAugmentGenerator, self).__init__(subvolume_generator, return_both) self.axis = axis self.probability = probability self.scaling_mean = scaling_mean self.scaling_std = scaling_std self.center_mean = center_mean self.center_std = center_std
[docs] def augment_subvolume(self): rolls = np.random.sample(self.shape[self.axis]) sections = np.where(rolls < self.probability) if sections and sections[0].size: subv = self.subvolume subv = Subvolume(subv.image.copy(), subv.label_mask, subv.seed, subv.label_id) slices = [slice(None), slice(None), slice(None)] slices[self.axis] = sections data = subv.image[slices] old_min = data.min() old_max = data.max() scaling = np.random.normal(self.scaling_mean, self.scaling_std) center = np.random.normal(self.center_mean, self.center_std) data = scaling*(data - old_min) + 0.5*scaling*center*(old_max - old_min) + old_min subv.image[slices] = data return subv else: return None
[docs]class MaskedArtifactAugmentGenerator(SubvolumeAugmentGenerator): """Repeats subvolumes from a subvolume generator with artifact data added. For each subvolume in the original generator, this generator will yield the original subvolume and may yield a subvolume with planes of image mixed with artifact data from a separate volume. Parameters ---------- subvolume_generator : SubvolumeGenerator return_both : bool If true, return both the original and augmented volume in sequence. If false, return either with equal probability. axis : int probability : float Independent probability that each plane of data along axis has artifacts. artifact_volume_file : string Filename of an TOML descriptor of an HDF5 dataset with image and mask data channels. Only the dataset named 'Artifacts' from this descriptor will be used. Mask data should be a float that will be interpreted as an alpha for blending image data from this artifact file with the original subvolume image data. """ def __init__(self, subvolume_generator, return_both, axis, probability, artifact_volume_file, cache): super(MaskedArtifactAugmentGenerator, self).__init__(subvolume_generator, return_both) self.axis = axis self.probability = probability if 'artifacts' not in cache: vol = HDF5Volume.from_toml(artifact_volume_file)['Artifacts'] cache['mask'] = NdarrayVolume( vol.world_coord_to_local(vol.resolution), image_data=vol.world_mat_to_local(vol.mask_data[:])) vol.mask_data = None cache['artifacts'] = vol.to_memory_volume() self.mask = cache['mask'] self.artifacts = cache['artifacts'] artifact_shape = self.shape.copy() artifact_shape[self.axis] = 1 self.art_bounds_gen = self.artifacts.subvolume_bounds_generator(shape=artifact_shape)
[docs] def augment_subvolume(self): rolls = np.random.sample(self.shape[self.axis]) artifact_sections = np.where(rolls < self.probability) if artifact_sections and artifact_sections[0].size: subv = self.subvolume subv = Subvolume(subv.image.copy(), subv.label_mask, subv.seed, subv.label_id) slices = [slice(None), slice(None), slice(None)] for z in artifact_sections[0]: slices[self.axis] = z mask_found = False # Since artifact data is usually sparse, reject patches # that have all zero mask. while not mask_found: art_bounds = six.next(self.art_bounds_gen) mask = self.mask.get_subvolume(art_bounds).image if mask.max() == 0.0: continue mask_found = True art = self.artifacts.get_subvolume(art_bounds).image raw = subv.image[slices] subv.image[slices] = raw * (1.0 - mask) + art * mask return subv else: return None
[docs]class Volume(object): DIM = DimOrder(Z=0, Y=1, X=2) def __init__(self, resolution, image_data=None, label_data=None, mask_data=None): self.resolution = resolution self.image_data = image_data self.label_data = label_data self.mask_data = mask_data self._mask_bounds = None
[docs] def local_coord_to_world(self, a): return a
[docs] def world_coord_to_local(self, a): return a
[docs] def world_mat_to_local(self, m): return m
@property def mask_bounds(self): if self._mask_bounds is not None: return self._mask_bounds if self.mask_data is None: return None # Explicitly copy the channel to memory. 3x speedup for np ops. mask_data = self.mask_data[:] self._mask_bounds = get_nonzero_aabb(mask_data) return self._mask_bounds @property def shape(self): return tuple(self.world_coord_to_local(np.array(self.image_data.shape))) def _get_downsample_from_resolution(self, resolution): resolution = np.asarray(resolution) downsample = np.log2(np.true_divide(resolution, self.resolution)) if np.any(downsample < 0): raise ValueError('Requested resolution ({}) is higher than volume resolution ({}). ' 'Upsampling is not supported.'.format(resolution, self.resolution)) if not np.all(np.equal(np.mod(downsample, 1), 0)): raise ValueError('Requested resolution ({}) is not a power-of-2 downsample of ' 'volume resolution ({}). ' 'This is currently unsupported.'.format(resolution, self.resolution)) return downsample.astype(np.int64)
[docs] def downsample(self, resolution): downsample = self._get_downsample_from_resolution(resolution) if np.all(np.equal(downsample, 0)): return self return DownsampledVolume(self, downsample)
[docs] def partition(self, partitioning, partition_index): if np.array_equal(partitioning, np.ones(3)) and np.array_equal(partition_index, np.zeros(3)): return self return PartitionedVolume(self, partitioning, partition_index)
[docs] def sparse_wrapper(self, *args): return SparseWrappedVolume(self, *args)
[docs] def subvolume_bounds_generator(self, shape=None, label_margin=None): return self.SubvolumeBoundsGenerator(self, shape, label_margin)
[docs] def subvolume_generator(self, bounds_generator=None, **kwargs): if bounds_generator is None: if not kwargs: raise ValueError('Bounds generator arguments must be provided if no bounds generator is provided.') bounds_generator = self.subvolume_bounds_generator(**kwargs) return SubvolumeGenerator(self, bounds_generator)
[docs] def get_subvolume(self, bounds): if bounds.start is None or bounds.stop is None: raise ValueError('This volume does not support sparse subvolume access.') image_subvol = self.image_data[ bounds.start[0]:bounds.stop[0], bounds.start[1]:bounds.stop[1], bounds.start[2]:bounds.stop[2]] image_subvol = self.world_mat_to_local(image_subvol) if np.issubdtype(image_subvol.dtype, np.integer): image_subvol = image_subvol.astype(np.float32) / 256.0 seed = bounds.seed if seed is None: seed = np.array(image_subvol.shape, dtype=np.int64) // 2 if self.label_data is not None: label_start = bounds.start + bounds.label_margin label_stop = bounds.stop - bounds.label_margin label_subvol = self.label_data[ label_start[0]:label_stop[0], label_start[1]:label_stop[1], label_start[2]:label_stop[2]] label_subvol = self.world_mat_to_local(label_subvol) label_id = bounds.label_id if label_id is None: label_id = label_subvol[tuple(seed - bounds.label_margin)] label_mask = label_subvol == label_id else: label_mask = None label_id = None return Subvolume(image_subvol, label_mask, seed, label_id)
[docs] class SubvolumeBoundsGenerator(six.Iterator): def __init__(self, volume, shape, label_margin=None): self.volume = volume self.shape = shape self.margin = np.floor_divide(self.shape, 2).astype(np.int64) if label_margin is None: label_margin = np.zeros(3, dtype=np.int64) self.label_margin = label_margin self.skip_blank_sections = True self.ctr_min = self.margin self.ctr_max = (np.array(self.volume.shape) - self.margin - 1).astype(np.int64) self.random = np.random.RandomState(CONFIG.random_seed) # If the volume has a mask channel, further limit ctr_min and # ctr_max to lie inside a margin in the AABB of the mask. if self.volume.mask_data is not None: mask_min, mask_max = self.volume.mask_bounds mask_min = self.volume.world_coord_to_local(mask_min) mask_max = self.volume.world_coord_to_local(mask_max) self.ctr_min = np.maximum(self.ctr_min, mask_min + self.label_margin) self.ctr_max = np.minimum(self.ctr_max, mask_max - self.label_margin - 1) if np.any(self.ctr_min >= self.ctr_max): raise ValueError('Cannot generate subvolume bounds: bounds ({}, {}) too small for shape ({})'.format( np.array_str(self.ctr_min), np.array_str(self.ctr_max), np.array_str(self.shape))) def __iter__(self): return self
[docs] def reset(self): self.random.seed(0)
def __next__(self): while True: ctr = np.array([self.random.randint(self.ctr_min[n], self.ctr_max[n]) for n in range(3)]).astype(np.int64) start = ctr - self.margin stop = ctr + self.margin + np.mod(self.shape, 2).astype(np.int64) # If the volume has a mask channel, only accept subvolumes # entirely contained in it. if self.volume.mask_data is not None: start_local = self.volume.world_coord_to_local(start + self.label_margin) stop_local = self.volume.world_coord_to_local(stop - self.label_margin) mask = self.volume.mask_data[ start_local[0]:stop_local[0], start_local[1]:stop_local[1], start_local[2]:stop_local[2]] if not mask.all(): logging.debug('Skipping subvolume not entirely in mask.') continue # Skip subvolumes with seeds in blank sections. if self.skip_blank_sections and self.volume.image_data is not None: if self.volume.image_data[tuple(self.volume.local_coord_to_world(ctr))] == 0: logging.debug('Skipping subvolume with seed in blank section.') continue # Only accept subvolumes where the central seed voxel will be # of a uniform label after downsampling. For more stringent # seed region uniformity filtering, see has_uniform_seed_margin. if self.volume.label_data is None: label_id = None break seed_min = self.volume.local_coord_to_world(ctr) seed_max = self.volume.local_coord_to_world(ctr + 1) label_ids = self.volume.label_data[ seed_min[0]:seed_max[0], seed_min[1]:seed_max[1], seed_min[2]:seed_max[2]] if (label_ids == label_ids.item(0)).all(): label_id = label_ids.item(0) break return SubvolumeBounds(start, stop, label_id=label_id, label_margin=self.label_margin)
[docs]class NdarrayVolume(Volume): """A NumPy ndarray-backed volume. Since all volumes assume image and label data are ndarray-like, this class exists mostly as a bookkeeping convenience to make actual ndarray volumes explicit. """ def __init__(self, *args, **kwargs): super(NdarrayVolume, self).__init__(*args, **kwargs) self.image_data.flags.writeable = False if self.label_data is not None: self.label_data.flags.writeable = False
[docs]class VolumeView(Volume): def __init__(self, parent, *args, **kwargs): super(VolumeView, self).__init__(*args, **kwargs) self.parent = parent
[docs] def local_to_parent(self, a): return a
[docs] def local_coord_to_world(self, a): return self.parent.local_coord_to_world(self.local_to_parent(a))
[docs] def parent_to_local(self, a): return a
[docs] def world_coord_to_local(self, a): return self.parent_to_local(self.parent.world_coord_to_local(a))
[docs] def world_mat_to_local(self, m): return self.parent.world_mat_to_local(m)
@property def mask_bounds(self): return self.parent.mask_bounds @property def shape(self): return self.parent.shape
[docs] def get_subvolume(self, bounds): # assumes bounds given are in local coordinates parent_start = self.local_to_parent(bounds.start) if bounds.start is not None else None parent_stop = self.local_to_parent(bounds.stop) if bounds.stop is not None else None parent_seed = self.local_to_parent(bounds.seed) if bounds.seed is not None else None parent_bounds = SubvolumeBounds(start=parent_start, stop=parent_stop, seed=parent_seed, label_id=bounds.label_id, label_margin=bounds.label_margin) return self.parent.get_subvolume(parent_bounds)
[docs]class PartitionedVolume(VolumeView): """Wrap an existing volume for partitioned access. Subvolume accesses to this volume will be offset and clipped to a partition of the wrapped volume. Parameters ---------- parent : Volume The volume to wrap. partitioning : iterable of int Number of partitions along each axis. Only one axis should be greater than 1. partition_index : iterable of int Index of the partition which this volume will represent. """ def __init__(self, parent, partitioning, partition_index): super(PartitionedVolume, self).__init__( parent, parent.resolution, image_data=parent.image_data, label_data=parent.label_data, mask_data=parent.mask_data) self.partitioning = np.asarray(partitioning) self.partition_index = np.asarray(partition_index) partition_shape = np.floor_divide(np.array(self.parent.shape), self.partitioning) self.bounds = ((np.multiply(partition_shape, self.partition_index)).astype(np.int64), (np.multiply(partition_shape, self.partition_index + 1)).astype(np.int64))
[docs] def local_to_parent(self, a): return a + self.bounds[0]
[docs] def parent_to_local(self, a): return a - self.bounds[0]
@property def mask_bounds(self): if self.parent.mask_bounds is None: return None else: bound_min = np.maximum(self.parent.mask_bounds[0], self.bounds[0]) bound_max = np.minimum(self.parent.mask_bounds[1], self.bounds[1]) return bound_min, bound_max @property def shape(self): return tuple(self.bounds[1] - self.bounds[0])
[docs]class DownsampledVolume(VolumeView): """Wrap an existing volume for downsampled access. Subvolume accesses to this volume will be downsampled, but continue to use the wrapped volume and its data at the original resolution. Parameters ---------- parent : Volume The volume to wrap. downsample : iterable of int Integral zoom levels to downsample the wrapped volume. """ def __init__(self, parent, downsample): self.scale = np.exp2(downsample).astype(np.int64) super(DownsampledVolume, self).__init__( parent, np.multiply(parent.resolution, self.scale), image_data=parent.image_data, label_data=parent.label_data, mask_data=parent.mask_data)
[docs] def local_to_parent(self, a): return np.multiply(a, self.scale)
[docs] def parent_to_local(self, a): return np.floor_divide(a, self.scale)
@property def shape(self): return tuple(np.floor_divide(np.array(self.parent.shape), self.scale))
[docs] def get_subvolume(self, bounds): subvol_shape = bounds.stop - bounds.start label_shape = subvol_shape - 2 * bounds.label_margin parent_bounds = SubvolumeBounds(self.local_to_parent(bounds.start), self.local_to_parent(bounds.stop), label_margin=self.local_to_parent(bounds.label_margin)) subvol = self.parent.get_subvolume(parent_bounds) subvol.image = subvol.image.reshape( [subvol_shape[0], self.scale[0], subvol_shape[1], self.scale[1], subvol_shape[2], self.scale[2]]).mean(5).mean(3).mean(1) if subvol.label_mask is not None: # Downsample body mask by considering blocks where the majority # of voxels are in the body to be in the body. Alternatives are: # - Conjunction (tends to introduce false splits) # - Disjunction (tends to overdilate and merge) # - Mode label (computationally expensive) if CONFIG.volume.label_downsampling == 'conjunction': subvol.label_mask = subvol.label_mask.reshape( [label_shape[0], self.scale[0], label_shape[1], self.scale[1], label_shape[2], self.scale[2]]).all(5).all(3).all(1) else: subvol.label_mask = subvol.label_mask.reshape( [label_shape[0], self.scale[0], label_shape[1], self.scale[1], label_shape[2], self.scale[2]]).mean(5).mean(3).mean(1) > 0.5 # Note that this is not a coordinate xform to parent in the typical # sense, just a rescaling of the coordinate in the subvolume-local # coordinates. Hence no similar call in VolumeView.get_subvolume. subvol.seed = self.parent_to_local(subvol.seed) return subvol
[docs]class SparseWrappedVolume(VolumeView): """Wrap a existing volume for memory cached block sparse access.""" def __init__(self, parent, image_leaf_shape=None, label_leaf_shape=None): if image_leaf_shape is None: image_leaf_shape = list(CONFIG.model.input_fov_shape) if label_leaf_shape is None: label_leaf_shape = list(CONFIG.model.input_fov_shape) image_data = OctreeVolume(image_leaf_shape, (np.zeros(3), parent.image_data.shape), parent.image_data.dtype, populator=self.image_populator) label_data = OctreeVolume(label_leaf_shape, (np.zeros(3), parent.label_data.shape), parent.label_data.dtype, populator=self.label_populator) super(SparseWrappedVolume, self).__init__( parent, parent.resolution, image_data=image_data, label_data=label_data)
[docs] def image_populator(self, bounds): return self.parent.image_data[ bounds[0][0]:bounds[1][0], bounds[0][1]:bounds[1][1], bounds[0][2]:bounds[1][2]]
[docs] def label_populator(self, bounds): return self.parent.label_data[ bounds[0][0]:bounds[1][0], bounds[0][1]:bounds[1][1], bounds[0][2]:bounds[1][2]]
[docs]class HDF5Volume(Volume): """A volume backed by data views to HDF5 file arrays. Parameters ---------- orig_file : str Filename of the HDF5 file to load. image_dataaset : str Full dataset path including groups to the raw image data array. label_dataset : str Full dataset path including groups to the object label data array. """ @staticmethod
[docs] def from_toml(filename): from keras.utils.data_utils import get_file volumes = {} with open(filename, 'rb') as fin: datasets = toml.load(fin).get('dataset', []) for dataset in datasets: hdf5_file = dataset['hdf5_file'] if dataset.get('use_keras_cache', False): hdf5_file = get_file(hdf5_file, dataset['download_url'], md5_hash=dataset.get('download_md5', None)) image_dataset = dataset.get('image_dataset', None) label_dataset = dataset.get('label_dataset', None) mask_dataset = dataset.get('mask_dataset', None) mask_bounds = dataset.get('mask_bounds', None) resolution = dataset.get('resolution', None) hdf5_pathed_file = os.path.join(os.path.dirname(filename), hdf5_file) volume = HDF5Volume(hdf5_pathed_file, image_dataset, label_dataset, mask_dataset, mask_bounds=mask_bounds) # If the volume configuration specifies an explicit resolution, # override any provided in the HDF5 itself. if resolution: logging.info('Overriding resolution for volume "%s"', dataset['name']) volume.resolution = np.array(resolution) volumes[dataset['name']] = volume return volumes
@staticmethod
[docs] def write_file(filename, resolution, **kwargs): h5file = h5py.File(filename, 'w') config = {'hdf5_file': os.path.basename(filename)} channels = ['image', 'label', 'mask'] default_datasets = { 'image': 'volumes/raw', 'label': 'volumes/labels/neuron_ids', 'mask': 'volumes/labels/mask', } for channel in channels: data = kwargs.get('{}_data'.format(channel), None) dataset_name = kwargs.get('{}_dataset'.format(channel), default_datasets[channel]) if data is not None: dataset = h5file.create_dataset(dataset_name, data=data, dtype=data.dtype) dataset.attrs['resolution'] = resolution config['{}_dataset'.format(channel)] = dataset_name h5file.close() return config
def __init__(self, orig_file, image_dataset, label_dataset, mask_dataset, mask_bounds=None): logging.debug('Loading HDF5 file "{}"'.format(orig_file)) self.file = h5py.File(orig_file, 'r') self.resolution = None self._mask_bounds = tuple(map(np.asarray, mask_bounds)) if mask_bounds is not None else None if image_dataset is None and label_dataset is None: raise ValueError('HDF5 volume must have either an image or label dataset: {}'.format(orig_file)) if image_dataset is not None: self.image_data = self.file[image_dataset] if 'resolution' in self.file[image_dataset].attrs: self.resolution = np.array(self.file[image_dataset].attrs['resolution']) if label_dataset is not None: self.label_data = self.file[label_dataset] if 'resolution' in self.file[label_dataset].attrs: resolution = np.array(self.file[label_dataset].attrs['resolution']) if self.resolution is not None and not np.array_equal(self.resolution, resolution): logging.warning('HDF5 image and label dataset resolutions differ in %s: %s, %s', orig_file, self.resolution, resolution) else: self.resolution = resolution else: self.label_data = None if mask_dataset is not None: self.mask_data = self.file[mask_dataset] else: self.mask_data = None if image_dataset is None: self.image_data = np.full_like(self.label_data, np.NaN, dtype=np.float32) if self.resolution is None: self.resolution = np.ones(3)
[docs] def to_memory_volume(self): data = ['image_data', 'label_data', 'mask_data'] data = { k: self.world_mat_to_local(getattr(self, k)[:]) for k in data if getattr(self, k) is not None} return NdarrayVolume(self.world_coord_to_local(self.resolution), **data)
[docs]class ImageStackVolume(Volume): """A volume for block sparse access to image pyramids over HTTP. Coordinate Systems ---------- Real: Physical coordinates, generally measured in nanometers World: pixel coordinates, starts at (0,0,0) and accounts for pixel resolution often (4x4x40) nanometers per pixel Local: Downsampled pixel space Parameters ---------- bounds : iterable of int Shape of the stack at zoom level 0 in pixels. resolution : iterable of float Resolution of the stack at zoom level 0 in nm. tile_width, tile_height : int Size of tiles in pixels format_url : str Format string for building tile URLs from tile parameters. zoom_level : int, optional Zoom level to use for this volume. missing_z : iterable of int, optional Voxel z-indices where data is not available. image_leaf_shape : tuple of int or ndarray, optional Shape of image octree leaves in voxels. Defaults to 10 stacked tiles. label_leaf_shape : tuple of int or ndarray, optional Shape of label octree leaves in voxels. Defaults to FFN model FOV. """ @staticmethod
[docs] def from_catmaid_stack(stack_info, tile_source_parameters): # See https://catmaid.readthedocs.io/en/stable/tile_sources.html format_url = { 1: '{source_base_url}{{z}}/{{row}}_{{col}}_{{zoom_level}}.{file_extension}', 4: '{source_base_url}{{z}}/{{zoom_level}}/{{row}}_{{col}}.{file_extension}', 5: '{source_base_url}{{zoom_level}}/{{z}}/{{row}}/{{col}}.{file_extension}', 7: '{source_base_url}largeDataTileSource/{tile_width}/{tile_height}/' '{{zoom_level}}/{{z}}/{{row}}/{{col}}.{file_extension}', 9: '{source_base_url}{{z}}/{{row}}_{{col}}_{{zoom_level}}.{file_extension}', }[tile_source_parameters['tile_source_type']].format(**tile_source_parameters) bounds = np.flipud(np.array(stack_info['bounds'], dtype=np.int64)) resolution = np.flipud(np.array(stack_info['resolution'])) translation = np.flipud(np.array(stack_info['translation'])) tile_width = int(tile_source_parameters['tile_width']) tile_height = int(tile_source_parameters['tile_height']) return ImageStackVolume(bounds, resolution, translation, tile_width, tile_height, format_url, missing_z=stack_info.get("broken_slices", None))
[docs] def from_toml(filename): volumes = {} with open(filename, "rb") as fin: datasets = toml.load(fin).get("ImageStack", []) for dataset in datasets: # stack info si = [ "bounds", "resolution", "translation", "broken_slices", ] # tile stack parameters tsp = [ "source_base_url", "file_extension", "tile_width", "tile_height", "tile_source_type", ] volume = ImageStackVolume.from_catmaid_stack( {si[key]: dataset[key] for key in si}, {tsp[key]: dataset[key] for key in tsp}, ) volumes[dataset["title"]] = volume return volumes
def __init__(self, bounds, orig_resolution, translation, tile_width, tile_height, tile_format_url, zoom_level=0, missing_z=None, image_leaf_shape=None): self.orig_bounds = bounds self.orig_resolution = orig_resolution self.translation = translation self.tile_width = tile_width self.tile_height = tile_height self.tile_format_url = tile_format_url self.mask_data = None self.zoom_level = int(zoom_level) if missing_z is None: missing_z = [] self.missing_z = frozenset(missing_z) if image_leaf_shape is None: image_leaf_shape = [10, tile_height, tile_width] self.scale = np.exp2(np.array([0, self.zoom_level, self.zoom_level])).astype(np.int64) data_shape = (np.zeros(3), np.divide(bounds, self.scale).astype(np.int64)) self.image_data = OctreeVolume(image_leaf_shape, data_shape, 'float32', populator=self.image_populator) self.label_data = None
[docs] def local_coord_to_world(self, a): return np.multiply(a, self.scale)
[docs] def world_coord_to_local(self, a): return np.floor_divide(a, self.scale)
[docs] def real_coord_to_world(self, a): return np.floor_divide(a - self.translation, self.orig_resolution)
[docs] def world_coord_to_real(self, a): return np.multiply(a, self.orig_resolution) + self.translation
@property def resolution(self): return self.orig_resolution * np.exp2([0, self.zoom_level, self.zoom_level])
[docs] def downsample(self, resolution): downsample = self._get_downsample_from_resolution(resolution) zoom_level = np.min(downsample[[self.DIM.X, self.DIM.Y]]) if zoom_level > 0: return ImageStackVolume( self.orig_bounds, self.orig_resolution, self.translation, self.tile_width, self.tile_height, self.tile_format_url, zoom_level=self.zoom_level + zoom_level, missing_z=self.missing_z, image_leaf_shape=self.image_data.leaf_shape).downsample(resolution) if np.all(np.equal(downsample, 0)): return self return DownsampledVolume(self, downsample)
[docs] def subvolume_bounds_generator(self, sparse_margin=None, **kwargs): if sparse_margin is not None: if kwargs: raise ValueError('sparse_margin can not be combined with other arguments.') return self.SparseSubvolumeBoundsGenerator(self, sparse_margin) return super(ImageStackVolume, self).subvolume_bounds_generator(**kwargs)
[docs] def get_subvolume(self, bounds): if bounds.start is None or bounds.stop is None: image_subvol = self.image_data label_subvol = self.label_data else: image_subvol = self.image_data[ bounds.start[0]:bounds.stop[0], bounds.start[1]:bounds.stop[1], bounds.start[2]:bounds.stop[2]] label_subvol = None if np.issubdtype(image_subvol.dtype, np.integer): raise ValueError('Sparse volume access does not support image data coercion.') seed = bounds.seed if seed is None: seed = np.array(image_subvol.shape, dtype=np.int64) // 2 return Subvolume(image_subvol, label_subvol, seed, bounds.label_id)
[docs] def image_populator(self, bounds): image_subvol = np.zeros(tuple(bounds[1] - bounds[0]), dtype=np.float32) col_range = list(map(int, (math.floor(bounds[0][self.DIM.X] / self.tile_width), math.ceil(bounds[1][self.DIM.X] / self.tile_width)))) row_range = list(map(int, (math.floor(bounds[0][self.DIM.Y] / self.tile_height), math.ceil(bounds[1][self.DIM.Y] / self.tile_height)))) tile_size = np.array([1, self.tile_height, self.tile_width]).astype(np.int64) for z in xrange(bounds[0][self.DIM.Z], bounds[1][self.DIM.Z]): if z in self.missing_z: image_subvol[int(z - bounds[0][self.DIM.Z]), :, :] = 0 continue for r in xrange(*row_range): for c in xrange(*col_range): url = self.tile_format_url.format(zoom_level=self.zoom_level, z=z, row=r, col=c) try: im = np.array(Image.open(requests.get(url, stream=True).raw)) # If the image is multichannel, throw our hands up and # just use the first channel. if im.ndim > 2: im = im[:, :, 0].squeeze() im = im / 256.0 except IOError: logging.debug('Failed to load tile: %s', url) im = np.full((self.tile_height, self.tile_width), 0, dtype=np.float32) tile_coord = np.array([z, r, c]).astype(np.int64) tile_loc = np.multiply(tile_coord, tile_size) subvol = (np.maximum(np.zeros(3), tile_loc - bounds[0]).astype(np.int64), np.minimum(np.array(image_subvol.shape), tile_loc + tile_size - bounds[0]).astype(np.int64)) tile_sub = (np.maximum(np.zeros(3), bounds[0] - tile_loc).astype(np.int64), np.minimum(tile_size, bounds[1] - tile_loc).astype(np.int64)) image_subvol[subvol[0][self.DIM.Z], subvol[0][self.DIM.Y]:subvol[1][self.DIM.Y], subvol[0][self.DIM.X]:subvol[1][self.DIM.X]] = \ im[tile_sub[0][self.DIM.Y]:tile_sub[1][self.DIM.Y], tile_sub[0][self.DIM.X]:tile_sub[1][self.DIM.X]] return image_subvol
[docs] class SparseSubvolumeBoundsGenerator(six.Iterator): def __init__(self, volume, margin): self.volume = volume self.margin = np.asarray(margin).astype(np.int64) self.ctr_min = self.margin self.ctr_max = (np.array(self.volume.shape) - self.margin - 1).astype(np.int64) self.random = np.random.RandomState(CONFIG.random_seed) @property def shape(self): return self.volume.shape def __iter__(self): return self
[docs] def reset(self): self.random.seed(0)
def __next__(self): ctr = np.array([self.random.randint(self.ctr_min[n], self.ctr_max[n]) for n in range(3)]).astype(np.int64) return SubvolumeBounds(seed=ctr)