Merge pull request #1404 from MDAnalysis/issue-1159-analysis-deps

make scipy and matplotlib required dependencies (#1159)
MDAnalysis · Jun 22, 2017 · 853447c · 853447c
2 parents 87dbe0e + f2f45f8
commit 853447c
Show file tree

Hide file tree

Showing 20 changed files with 152 additions and 242 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -31,10 +31,10 @@ env:
     - MAIN_CMD="pytest ${PYTEST_LIST} ${PYTEST_FLAGS}; python ./testsuite/MDAnalysisTests/mda_nosetests ${NOSE_TEST_LIST} ${NOSE_FLAGS}"
     - SETUP_CMD=""
     - BUILD_CMD="pip install -v package/ && pip install testsuite/"
-    - CONDA_DEPENDENCIES="mmtf-python nose=1.3.7 mock six biopython networkx cython joblib nose-timer pytest=3.1.2 pytest-cov=2.5.1"
-    - CONDA_ALL_DEPENDENCIES="mmtf-python nose=1.3.7 mock six biopython networkx cython joblib nose-timer matplotlib netcdf4 scikit-learn scipy seaborn coveralls clustalw=2.1 pytest=3.1.2 pytest-cov=2.5.1"
+    - CONDA_DEPENDENCIES="mmtf-python nose=1.3.7 mock six biopython networkx cython joblib nose-timer matplotlib scipy griddataformats pytest=3.1.2 pytest-cov=2.5.1"
+    - CONDA_ALL_DEPENDENCIES="mmtf-python nose=1.3.7 mock six biopython networkx cython joblib nose-timer matplotlib netcdf4 scikit-learn scipy griddataformats seaborn coveralls clustalw=2.1 pytest=3.1.2 pytest-cov=2.5.1"
     # Install griddataformats from PIP so that scipy is only installed in the full build (#1147)
-    - PIP_DEPENDENCIES='griddataformats'
+    - PIP_DEPENDENCIES=''
     - CONDA_CHANNELS='biobuilds conda-forge'
     - CONDA_CHANNEL_PRIORITY=True
     - NUMPY_VERSION=stable

diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -29,6 +29,9 @@ Fixes
   * Fixed dtype of numpy arrays to accomodate 32 bit architectures (Issue #1362)
   * Groups are hashable on python 3 (Issue #1397)
 
+Changes
+  * scipy and matplotlib are now required dependencies (Issue #1159)
+
 
 06/03/17 utkbansal, kain88-de, xiki-tempula, kaplajon, wouterboomsma,
          richardjgowers, Shtkddud123, QuantumEntangledAndy, orbeckst,

diff --git a/package/MDAnalysis/analysis/distances.py b/package/MDAnalysis/analysis/distances.py
@@ -42,6 +42,7 @@
            'contact_matrix', 'dist', 'between']
 
 import numpy as np
+import scipy.sparse
 
 from MDAnalysis.lib.distances import distance_array, self_distance_array
 from MDAnalysis.lib.c_distances import contact_matrix_no_pbc, contact_matrix_pbc
@@ -51,15 +52,6 @@
 import logging
 logger = logging.getLogger("MDAnalysis.analysis.distances")
 
-try:
-   from scipy import sparse
-except ImportError:
-   sparse = None
-   msg = "scipy.sparse could not be imported: some functionality will " \
-         "not be available in contact_matrix()"
-   warnings.warn(msg, category=ImportWarning)
-   logger.warn(msg)
-   del msg
 
 def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None):
     '''Calculates a matrix of contacts.
@@ -93,12 +85,6 @@ def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None):
        The contact matrix is returned in a format determined by the `returntype`
        keyword.
 
-
-    Note
-    ----
-    :mod:`scipy.sparse` is require for using *sparse* matrices; if it cannot
-    be imported then an `ImportError` is raised.
-
     See Also
     --------
     :mod:`MDAnalysis.analysis.contacts` for native contact analysis
@@ -112,14 +98,9 @@ def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None):
         adj = (distance_array(coord, coord, box=box) < cutoff)
         return adj
     elif returntype == "sparse":
-        if sparse is None:
-            # hack: if we are running with minimal dependencies then scipy was
-            #       not imported and we have to bail here (see scipy import at top)
-            raise ImportError("For sparse matrix functionality you need to "
-                              "import scipy.")
         # Initialize square List of Lists matrix of dimensions equal to number
         # of coordinates passed
-        sparse_contacts = sparse.lil_matrix((len(coord), len(coord)), dtype='bool')
+        sparse_contacts = scipy.sparse.lil_matrix((len(coord), len(coord)), dtype='bool')
         if box is not None:
             # with PBC
             contact_matrix_pbc(coord, sparse_contacts, box, cutoff)

diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py
@@ -172,21 +172,13 @@
 from __future__ import print_function, division, absolute_import
 from six.moves import range, zip
 
-import MDAnalysis as mda
-import numpy as np
 import warnings
 import logging
-try:
-    from scipy.stats import gaussian_kde
-except ImportError:
-    gaussian_kde = None
-    msg = "scipy.stats.gaussian_kde could not be imported. " \
-          "Dimensionality reduction ensemble comparisons will not " \
-          "be available."
-    warnings.warn(msg,
-                  category=ImportWarning)
-    logging.warn(msg)
-    del msg
+
+import numpy as np
+import scipy.stats
+
+import MDAnalysis as mda
 
 from ...coordinates.memory import MemoryReader
 from .confdistmatrix import get_distance_matrix
@@ -460,18 +452,11 @@ def gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles,
     embedded_ensembles = []
     resamples = []
 
-    if gaussian_kde is None:
-        # hack: if we are running with minimal dependencies then scipy was
-        # not imported and we have to bail here (see scipy import at top)
-        raise ImportError("For Kernel Density Estimation functionality you"
-                          "need to import scipy")
-
     for i in range(1, nensembles + 1):
         this_embedded = embedded_space.transpose()[
             np.where(np.array(ensemble_assignment) == i)].transpose()
         embedded_ensembles.append(this_embedded)
-        kdes.append(gaussian_kde(
-            this_embedded))
+        kdes.append(scipy.stats.gaussian_kde(this_embedded))
 
     # # Set number of samples
     # if not nsamples:
@@ -623,12 +608,6 @@ def cumulative_gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles,
 
     """
 
-    if gaussian_kde is None:
-        # hack: if we are running with minimal dependencies then scipy was
-        # not imported and we have to bail here (see scipy import at top)
-        raise ImportError("For Kernel Density Estimation functionality you"
-                          "need to import scipy")
-
     kdes = []
     embedded_ensembles = []
     resamples = []
@@ -639,8 +618,7 @@ def cumulative_gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles,
             np.logical_and(ensemble_assignment >= ens_id_min,
                               ensemble_assignment <= i))].transpose()
         embedded_ensembles.append(this_embedded)
-        kdes.append(
-            gaussian_kde(this_embedded))
+        kdes.append(scipy.stats.gaussian_kde(this_embedded))
 
     # Resample according to probability distributions
     for this_kde in kdes:

diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
@@ -155,14 +155,16 @@
 from __future__ import division, absolute_import
 from six.moves import zip
 import numpy as np
+import scipy.optimize
+
 import warnings
 
 from MDAnalysis.lib.log import ProgressMeter
 from MDAnalysis.lib.distances import distance_array, calc_angles, calc_bonds
 
 
 class HydrogenBondAutoCorrel(object):
-    """Perform a time autocorrelation of the hydrogen bonds in the system. 
+    """Perform a time autocorrelation of the hydrogen bonds in the system.
 
     Parameters
     ----------
@@ -421,8 +423,10 @@ def solve(self, p_guess=None):
             Initial guess for the leastsq fit, must match the shape of the
             expected coefficients
 
-        Continuous defition results are fitted to a double exponential,
-        intermittent definition are fit to a triple exponential.
+
+        Continuous defition results are fitted to a double exponential with
+        :func:`scipy.optimize.leastsq`, intermittent definition are fit to a
+        triple exponential.
 
         The results of this fitting procedure are saved into the *fit*,
         *tau* and *estimate* keywords in the solution dict.
@@ -434,14 +438,14 @@ def solve(self, p_guess=None):
          - *estimate* contains the estimate provided by the fit of the time
            autocorrelation function
 
-        In addition, the output of the leastsq function is saved into the
-        solution dict
+        In addition, the output of the :func:`~scipy.optimize.leastsq` function
+        is saved into the solution dict
 
          - *infodict*
          - *mesg*
          - *ier*
+
         """
-        from scipy.optimize import leastsq
 
         if self.solution['results'] is None:
             raise ValueError(
@@ -498,9 +502,8 @@ def triple(x, A1, A2, tau1, tau2, tau3):
             if p_guess is None:
                 p_guess = (0.5, 10 * self.sample_time, self.sample_time)
 
-            p, cov, infodict, mesg, ier = leastsq(err, p_guess,
-                                                  args=(time, results),
-                                                  full_output=True)
+                p, cov, infodict, mesg, ier = scipy.optimize.leastsq(
+                err, p_guess, args=(time, results), full_output=True)
             self.solution['fit'] = p
             A1, tau1, tau2 = p
             A2 = 1 - A1
@@ -512,9 +515,8 @@ def triple(x, A1, A2, tau1, tau2, tau3):
                 p_guess = (0.33, 0.33, 10 * self.sample_time,
                            self.sample_time, 0.1 * self.sample_time)
 
-            p, cov, infodict, mesg, ier = leastsq(err, p_guess,
-                                                  args=(time, results),
-                                                  full_output=True)
+            p, cov, infodict, mesg, ier = scipy.optimize.leastsq(
+                err, p_guess, args=(time, results), full_output=True)
             self.solution['fit'] = p
             A1, A2, tau1, tau2, tau3 = p
             A3 = 1 - A1 - A2

diff --git a/package/MDAnalysis/analysis/hole.py b/package/MDAnalysis/analysis/hole.py
@@ -245,7 +245,6 @@
 from six.moves import zip, cPickle
 import six
 
-import numpy as np
 import glob
 import os
 import errno
@@ -258,6 +257,10 @@
 import logging
 from itertools import cycle
 
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+
 from MDAnalysis import Universe
 from MDAnalysis.exceptions import ApplicationError
 from MDAnalysis.lib.util import which, realpath, asiterable
@@ -370,8 +373,6 @@ def save(self, filename="hole.pickle"):
         cPickle.dump(self.profiles, open(filename, "wb"), cPickle.HIGHEST_PROTOCOL)
 
     def _process_plot_kwargs(self, kwargs):
-        import matplotlib.colors
-
         kw = {}
         frames = kwargs.pop('frames', None)
         if frames is None:
@@ -448,9 +449,6 @@ def plot(self, **kwargs):
            Returns ``ax``.
 
         """
-
-        import matplotlib.pyplot as plt
-
         kw, kwargs = self._process_plot_kwargs(kwargs)
 
         ax = kwargs.pop('ax', None)
@@ -517,8 +515,7 @@ def plot3D(self, **kwargs):
            Returns ``ax``.
 
         """
-
-        import matplotlib.pyplot as plt
+        # installed with matplotlib; imported here to enable 3D axes
         from mpl_toolkits.mplot3d import Axes3D
 
         kw, kwargs = self._process_plot_kwargs(kwargs)
@@ -540,8 +537,7 @@ def plot3D(self, **kwargs):
                 rxncoord = profile.rxncoord
             else:
                 # does not seem to work with masked arrays but with nan hack!
-                # http://stackoverflow.com/questions/4913306/python-matplotlib-mplot3d-how-do-i-set-a-maximum-value
-                # -for-the-z-axis
+                # http://stackoverflow.com/questions/4913306/python-matplotlib-mplot3d-how-do-i-set-a-maximum-value-for-the-z-axis
                 #radius = np.ma.masked_greater(profile.radius, rmax)
                 #rxncoord = np.ma.array(profile.rxncoord, mask=radius.mask)
                 rxncoord = profile.rxncoord

diff --git a/package/MDAnalysis/analysis/legacy/x3dna.py b/package/MDAnalysis/analysis/legacy/x3dna.py
@@ -132,13 +132,15 @@
 import errno
 import shutil
 import warnings
-import numpy as np
 import os.path
 import subprocess
 import tempfile
 import textwrap
 from collections import OrderedDict
 
+import numpy as np
+import matplotlib.pyplot as plt
+
 from MDAnalysis import ApplicationError
 from MDAnalysis.lib.util import which, realpath, asiterable
 
@@ -413,7 +415,6 @@ def plot(self, **kwargs):
              Provide `ax` to have all plots plotted in the same axes.
 
         """
-        import matplotlib.pyplot as plt
 
         na_avg, na_std = self.mean_std()
         for k in range(len(na_avg[0])):

diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py
@@ -106,6 +106,7 @@
 import warnings
 
 import numpy as np
+import scipy.integrate
 
 from MDAnalysis import Universe
 from MDAnalysis.analysis.align import _fit_to
@@ -357,9 +358,9 @@ def cosine_content(pca_space, i):
     .. [BerkHess1] Berk Hess. Convergence of sampling in protein simulations.
                    Phys. Rev. E 65, 031910 (2002).
     """
-    from scipy.integrate import simps
+
     t = np.arange(len(pca_space))
     T = len(pca_space)
     cos = np.cos(np.pi * t * (i + 1) / T)
-    return ((2.0 / T) * (simps(cos*pca_space[:, i])) ** 2 /
-            simps(pca_space[:, i] ** 2))
+    return ((2.0 / T) * (scipy.integrate.simps(cos*pca_space[:, i])) ** 2 /
+            scipy.integrate.simps(pca_space[:, i] ** 2))
diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py
@@ -36,6 +36,8 @@
 from six.moves import range
 
 import numpy as np
+import scipy.optimize
+
 import logging
 
 from .. import NoDataError
@@ -165,13 +167,10 @@ def fit_exponential_decay(x, y):
     -----
     This function assumes that data starts at 1.0 and decays to 0.0
 
-    Requires scipy
     """
-    from scipy.optimize import curve_fit
-
     def expfunc(x, a):
         return np.exp(-x/a)
 
-    a = curve_fit(expfunc, x, y)[0][0]
+    a = scipy.optimize.curve_fit(expfunc, x, y)[0][0]
 
     return a