diff --git a/benchmarks/benchmarks.py b/benchmarks/benchmarks.py
index 43c1e12a5..acc1e6efa 100644
--- a/benchmarks/benchmarks.py
+++ b/benchmarks/benchmarks.py
@@ -26,3 +26,32 @@ def time_many_small_reads(self):
         ds = self.f['a']
         for i in range(10000):
             arr = ds[i * 10:(i + 1) * 10]
+
+class WritingTimeSuite:
+    """Based on example in GitHub issue 492:
+    https://github.com/h5py/h5py/issues/492
+    """
+    def setup(self):
+        self._td = TemporaryDirectory()
+        path = osp.join(self._td.name, 'test.h5')
+        self.f = h5py.File(path, 'w')
+        self.shape = shape = (128, 1024, 512)
+        self.f.create_dataset(
+            'a', shape=shape, dtype=np.float32, chunks=(1, shape[1], 64)
+        )
+
+    def teardown(self):
+        self.f.close()
+        self._td.cleanup()
+
+    def time_write_index_last_axis(self):
+        ds = self.f['a']
+        data = np.zeros(self.shape[:2])
+        for i in range(self.shape[2]):
+            ds[..., i] = data
+
+    def time_write_slice_last_axis(self):
+        ds = self.f['a']
+        data = np.zeros(self.shape[:2])
+        for i in range(self.shape[2]):
+            ds[..., i:i+1] = data[..., np.newaxis]
diff --git a/h5py/_hl/dataset.py b/h5py/_hl/dataset.py
index c84460c8b..b94a48c7a 100644
--- a/h5py/_hl/dataset.py
+++ b/h5py/_hl/dataset.py
@@ -693,22 +693,16 @@ def __getitem__(self, args, new_dtype=None):
         selection = sel.select(self.shape, args, dsid=self.id)
 
         if selection.nselect == 0:
-            return numpy.ndarray(selection.mshape, dtype=new_dtype)
+            return numpy.ndarray(selection.array_shape, dtype=new_dtype)
 
         # Up-converting to (1,) so that numpy.ndarray correctly creates
         # np.void rows in case of multi-field dtype. (issue 135)
-        single_element = selection.mshape == ()
-        mshape = (1,) if single_element else selection.mshape
-        arr = numpy.ndarray(mshape, new_dtype, order='C')
-
-        # HDF5 has a bug where if the memory shape has a different rank
-        # than the dataset, the read is very slow
-        if len(mshape) < len(self.shape):
-            # pad with ones
-            mshape = (1,)*(len(self.shape)-len(mshape)) + mshape
+        single_element = selection.array_shape == ()
+        arr_shape = (1,) if single_element else selection.array_shape
+        arr = numpy.ndarray(arr_shape, new_dtype, order='C')
 
         # Perform the actual read
-        mspace = h5s.create_simple(mshape)
+        mspace = h5s.create_simple(selection.mshape)
         fspace = selection.id
         self.id.read(mspace, fspace, arr, mtype, dxpl=self._dxpl)
 
@@ -840,26 +834,20 @@ def __setitem__(self, args, val):
         # memory. In any case, if we cannot afford to create an intermediate
         # array of the same size as the dataset chunk size, the user program has
         # little hope to go much further. Solves h5py isue #1067
-        if mshape == () and selection.mshape != ():
+        if mshape == () and selection.array_shape != ():
             if self.dtype.subdtype is not None:
                 raise TypeError("Scalar broadcasting is not supported for array dtypes")
-            if self.chunks and (numpy.prod(self.chunks, dtype=numpy.float) >= \
-                                numpy.prod(selection.mshape, dtype=numpy.float)):
-                val2 = numpy.empty(selection.mshape, dtype=val.dtype)
+            if self.chunks and (numpy.prod(self.chunks, dtype=numpy.float) >=
+                                numpy.prod(selection.array_shape, dtype=numpy.float)):
+                val2 = numpy.empty(selection.array_shape, dtype=val.dtype)
             else:
-                val2 = numpy.empty(selection.mshape[-1], dtype=val.dtype)
+                val2 = numpy.empty(selection.array_shape[-1], dtype=val.dtype)
             val2[...] = val
             val = val2
             mshape = val.shape
 
         # Perform the write, with broadcasting
-        # Be careful to pad memory shape with ones to avoid HDF5 chunking
-        # glitch, which kicks in for mismatched memory/file selections
-        if len(mshape) < len(self.shape):
-            mshape_pad = (1,)*(len(self.shape)-len(mshape)) + mshape
-        else:
-            mshape_pad = mshape
-        mspace = h5s.create_simple(mshape_pad, (h5s.UNLIMITED,)*len(mshape_pad))
+        mspace = h5s.create_simple(selection.expand_shape(mshape))
         for fspace in selection.broadcast(mshape):
             self.id.write(mspace, fspace, val, mtype, dxpl=self._dxpl)
 
diff --git a/h5py/_hl/selections.py b/h5py/_hl/selections.py
index a730d3465..505ed2719 100644
--- a/h5py/_hl/selections.py
+++ b/h5py/_hl/selections.py
@@ -149,9 +149,20 @@ def mshape(self):
         """ Shape of selection (always 1-D for this class) """
         return (self.nselect,)
 
-    def broadcast(self, target_shape):
+    @property
+    def array_shape(self):
+        """Shape of array to read/write (always 1-D for this class)"""
+        return self.mshape
+
+    # expand_shape and broadcast only really make sense for SimpleSelection
+    def expand_shape(self, source_shape):
+        if product(source_shape) != self.nselect:
+            raise TypeError("Broadcasting is not supported for point-wise selections")
+        return source_shape
+
+    def broadcast(self, source_shape):
         """ Get an iterable for broadcasting """
-        if np.product(target_shape) != self.nselect:
+        if product(source_shape) != self.nselect:
             raise TypeError("Broadcasting is not supported for point-wise selections")
         yield self._id
 
@@ -213,13 +224,17 @@ class SimpleSelection(Selection):
     @property
     def mshape(self):
         """ Shape of current selection """
-        return self._mshape
+        return self._sel[1]
+
+    @property
+    def array_shape(self):
+        return self._array_shape
 
     def __init__(self, shape, *args, **kwds):
         super(SimpleSelection, self).__init__(shape, *args, **kwds)
         rank = len(self.shape)
         self._sel = ((0,)*rank, self.shape, (1,)*rank, (False,)*rank)
-        self._mshape = self.shape
+        self._array_shape = self.shape
 
     def __getitem__(self, args):
 
@@ -238,47 +253,67 @@ def __getitem__(self, args):
 
         self._sel = (start, count, step, scalar)
 
-        self._mshape = tuple(x for x, y in zip(count, scalar) if not y)
+        # array shape drops dimensions where a scalar index was selected
+        self._array_shape = tuple(x for x, y in zip(count, scalar) if not y)
 
         return self
 
+    def expand_shape(self, source_shape):
+        """Match the dimensions of an array to be broadcast to the selection
 
-    def broadcast(self, target_shape):
-        """ Return an iterator over target dataspaces for broadcasting.
+        The returned shape describes an array of the same size as the input
+        shape, but its dimensions
 
-        Follows the standard NumPy broadcasting rules against the current
-        selection shape (self.mshape).
-        """
-        if self.shape == ():
-            if np.product(target_shape) != 1:
-                raise TypeError("Can't broadcast %s to scalar" % target_shape)
-            self._id.select_all()
-            yield self._id
-            return
+        E.g. with a dataset shape (10, 5, 4, 2), writing like this::
+
+            ds[..., 0] = np.ones((5, 4))
 
+        The source shape (5, 4) will expand to (1, 5, 4, 1).
+        Then the broadcast method below repeats that chunk 10
+        times to write to an effective shape of (10, 5, 4, 1).
+        """
         start, count, step, scalar = self._sel
 
         rank = len(count)
-        target = list(target_shape)
+        remaining_src_dims = list(source_shape)
 
-        tshape = []
-        for idx in range(1,rank+1):
-            if len(target) == 0 or scalar[-idx]:     # Skip scalar axes
-                tshape.append(1)
+        eshape = []
+        for idx in range(1, rank + 1):
+            if len(remaining_src_dims) == 0 or scalar[-idx]:  # Skip scalar axes
+                eshape.append(1)
             else:
-                t = target.pop()
+                t = remaining_src_dims.pop()
                 if t == 1 or count[-idx] == t:
-                    tshape.append(t)
+                    eshape.append(t)
                 else:
-                    raise TypeError("Can't broadcast %s -> %s" % (target_shape, self.mshape))
+                    raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape))  # array shape
 
-        if any([n > 1 for n in target]):
+        if any([n > 1 for n in remaining_src_dims]):
             # All dimensions from target_shape should either have been popped
             # to match the selection shape, or be 1.
-            raise TypeError("Can't broadcast %s -> %s" % (target_shape, self.mshape))
+            raise TypeError("Can't broadcast %s -> %s" % (source_shape, self.array_shape))  # array shape
+
+        # We have built eshape backwards, so now reverse it
+        return tuple(eshape[::-1])
 
-        tshape.reverse()
-        tshape = tuple(tshape)
+
+    def broadcast(self, source_shape):
+        """ Return an iterator over target dataspaces for broadcasting.
+
+        Follows the standard NumPy broadcasting rules against the current
+        selection shape (self.mshape).
+        """
+        if self.shape == ():
+            if product(source_shape) != 1:
+                raise TypeError("Can't broadcast %s to scalar" % source_shape)
+            self._id.select_all()
+            yield self._id
+            return
+
+        start, count, step, scalar = self._sel
+
+        rank = len(count)
+        tshape = self.expand_shape(source_shape)
 
         chunks = tuple(x//y for x, y in zip(count, tshape))
         nchunks = product(chunks)
@@ -310,9 +345,13 @@ class FancySelection(Selection):
     def mshape(self):
         return self._mshape
 
+    @property
+    def array_shape(self):
+        return self._array_shape
+
     def __init__(self, shape, *args, **kwds):
         super(FancySelection, self).__init__(shape, *args, **kwds)
-        self._mshape = self.shape
+        self._mshape = self._array_shape = self.shape
 
     def __getitem__(self, args):
 
@@ -385,10 +424,16 @@ def __getitem__(self, args):
             elif scalar[idx]:
                 mshape[idx] = -1
 
-        self._mshape = tuple(x for x in mshape if x >= 0)
+        self._mshape = tuple(abs(x) for x in mshape)  # Convert -1 back to 1
+        self._array_shape = tuple(x for x in mshape if x >= 0)
+
+    def expand_shape(self, source_shape):
+        if not source_shape == self.array_shape:
+            raise TypeError("Broadcasting is not supported for complex selections")
+        return source_shape
 
-    def broadcast(self, target_shape):
-        if not target_shape == self.mshape:
+    def broadcast(self, source_shape):
+        if not source_shape == self.array_shape:
             raise TypeError("Broadcasting is not supported for complex selections")
         yield self._id
 
diff --git a/h5py/_hl/vds.py b/h5py/_hl/vds.py
index f3b84ed0e..41e7264f0 100644
--- a/h5py/_hl/vds.py
+++ b/h5py/_hl/vds.py
@@ -97,7 +97,7 @@ def __init__(self, path_or_dataset, name=None,
 
     @property
     def shape(self):
-        return self.sel.mshape
+        return self.sel.array_shape
 
     def __getitem__(self, key):
         tmp = copy(self)
diff --git a/news/select-mshape.rst b/news/select-mshape.rst
new file mode 100644
index 000000000..1d7cbd457
--- /dev/null
+++ b/news/select-mshape.rst
@@ -0,0 +1,30 @@
+New features
+------------
+
+* <news item>
+
+Deprecations
+------------
+
+* <news item>
+
+Exposing HDF5 functions
+-----------------------
+
+* <news item>
+
+Bug fixes
+---------
+
+* Fix pathologically slow reading/writing in certain conditions with integer
+  indexing (:issue:`492`).
+
+Building h5py
+-------------
+
+* <news item>
+
+Development
+-----------
+
+* <news item>