MDAnalysis · yuxuanzhuang · Jul 8, 2020 · Jul 8, 2020 · Jul 8, 2020 · Jul 11, 2020
diff --git a/pmda/custom.py b/pmda/custom.py
@@ -18,7 +18,6 @@
 """
 from __future__ import absolute_import
 
-from MDAnalysis.core.groups import AtomGroup
 from MDAnalysis.core.universe import Universe
 from MDAnalysis.coordinates.base import ProtoReader
 import numpy as np
@@ -75,32 +74,19 @@ def __init__(self, function, universe, *args, **kwargs):
            analyze can not be passed as keyword arguments currently.
 
         """
-
         self.function = function
-
-        # collect all atomgroups with the same trajectory object as universe
-        trajectory = universe.trajectory
-        arg_ags = []
-        self.other_args = []
-        for arg in args:
-            if isinstance(arg,
-                          AtomGroup) and arg.universe.trajectory == trajectory:
-                arg_ags.append(arg)
-            else:
-                self.other_args.append(arg)
-
-        super(AnalysisFromFunction, self).__init__(universe, arg_ags)
+        super().__init__(universe)
+        self.args = args
         self.kwargs = kwargs
 
     def _prepare(self):
-        self.results = []
+        self._results = [None] * self.n_frames
 
-    def _single_frame(self, ts, atomgroups):
-        args = atomgroups + self.other_args
-        return self.function(*args, **self.kwargs)
+    def _single_frame(self):
+        self._results[self._frame_index] = self.function(*self.args, **self.kwargs)
 
     def _conclude(self):
-        self.results = np.concatenate(self._results)
+        self.results = self._results
 
 
 def analysis_class(function):
@@ -144,13 +130,12 @@ def analysis_class(function):
     class WrapperClass(AnalysisFromFunction):
         """Custom Analysis Function"""
 
-        def __init__(self, trajectory=None, *args, **kwargs):
-            if not (isinstance(trajectory, ProtoReader) or isinstance(
-                    trajectory, Universe)):
-                print(type(trajectory))
+        def __init__(self, universe=None, *args, **kwargs):
+            if not isinstance(universe, Universe):
+                print(type(universe))
                 raise ValueError(
-                    "First argument needs to be an MDAnalysis reader object.")
-            super(WrapperClass, self).__init__(function, trajectory, *args,
-                                               **kwargs)
+                        "First argument needs to be an MDAnalysis Universe.")
+                super().__init__(function, universe, *args,
+                                                   **kwargs)
 
     return WrapperClass
diff --git a/pmda/density.py b/pmda/density.py
@@ -240,8 +240,8 @@ def __init__(self, atomgroup, delta=1.0, atomselection=None,
                  metadata=None, padding=2.0, updating=False,
                  parameters=None, gridcenter=None, xdim=None, ydim=None,
                  zdim=None):
-        u = atomgroup.universe
-        super(DensityAnalysis, self).__init__(u, (atomgroup, ))
+        universe = atomgroup.universe
+        super().__init__(universe)
         self._atomgroup = atomgroup
         self._delta = delta
         self._atomselection = atomselection
@@ -253,7 +253,7 @@ def __init__(self, atomgroup, delta=1.0, atomselection=None,
         self._xdim = xdim
         self._ydim = ydim
         self._zdim = zdim
-        self._trajectory = u.trajectory
+        self._trajectory = universe.trajectory
         if updating and atomselection is None:
             raise ValueError("updating=True requires a atomselection string")
         elif not updating and atomselection is not None:
@@ -289,20 +289,31 @@ def _prepare(self):
         grid, edges = np.histogramdd(np.zeros((1, 3)), bins=bins,
                                      range=arange, normed=False)
         grid *= 0.0
-        self._grid = grid
+
+        self._results = [grid] * self.n_frames
         self._edges = edges
         self._arange = arange
         self._bins = bins
 
-    def _single_frame(self, ts, atomgroups):
-        coord = self.current_coordinates(atomgroups[0], self._atomselection,
-                                         self._updating)
-        result = np.histogramdd(coord, bins=self._bins, range=self._arange,
-                                normed=False)
-        return result[0]
+    def _single_frame(self):
+        h, _ = np.histogramdd(self._atomgroup.positions,
+                              bins=self._bins, range=self._arange,
+                              normed=False)
+        # reduce (proposed change #2542 to match the parallel version in pmda.density)
+        # return self._reduce(self._grid, h)
+        #
+        # serial code can simply do
+
+        # the current timestep of the trajectory is self._ts
+#        self._results[self._frame_index][0] = self._ts.frame
+        # the actual trajectory is at self._trajectory
+#        self._results[self._frame_index][1] = self._trajectory.time
+        self._results[self._frame_index] = h
 def _reduce(res, result_single_frame): 
     """ 'accumulate' action for a time series""" 
     if isinstance(res, list) and len(res) == 0: 
         res = result_single_frame 
     else: 
         res += result_single_frame 
     return res 
 self._grid = self._results[:].sum(axis=0) 
 self._grid /= float(self.n_frames) 
 def _reduce(res, result_single_frame): 
     """ 'accumulate' action for a time series""" 
     if isinstance(res, list) and len(res) == 0: 
         res = result_single_frame 
     else: 
         res += result_single_frame 
     return res 
 self._grid = self._results[:].sum(axis=0) 
 self._grid /= float(self.n_frames) 
 
     def _conclude(self):
-        self._grid = self._results[:].sum(axis=0)
+
+        # sum both inside and among blocks. 
+        self._grid = self._results[:].sum(axis=(0, 1))
         self._grid /= float(self.n_frames)
         metadata = self._metadata if self._metadata is not None else {}
         metadata['psf'] = self._atomgroup.universe.filename
@@ -322,14 +333,6 @@ def _conclude(self):
         density.make_density()
         self.density = density
 
-    @staticmethod
-    def _reduce(res, result_single_frame):
-        """ 'accumulate' action for a time series"""
-        if isinstance(res, list) and len(res) == 0:
-            res = result_single_frame
-        else:
-            res += result_single_frame
-        return res
 
     @staticmethod
     def current_coordinates(atomgroup, atomselection, updating):

diff --git a/pmda/leaflet.py b/pmda/leaflet.py
@@ -76,7 +76,7 @@ def __init__(self, universe, atomgroups):
 
         super(LeafletFinder, self).__init__(universe, (atomgroups,))
 
-    def _find_connected_components(self, data, cutoff=15.0):
+    def _find_connected_components(self, data_list, cutoff=15.0):
         """Perform the Connected Components discovery for the atoms in data.
 
         Parameters
@@ -99,62 +99,66 @@ def _find_connected_components(self, data, cutoff=15.0):
 
         """
         # pylint: disable=unsubscriptable-object
-        window, index = data[0]
-        num = window[0].shape[0]
-        i_index = index[0]
-        j_index = index[1]
-        graph = nx.Graph()
-
-        if i_index == j_index:
-            train = window[0]
-            test = window[1]
-        else:
-            train = np.vstack([window[0], window[1]])
-            test = np.vstack([window[0], window[1]])
-
-        tree = cKDTree(train, leafsize=40)
-        edges = tree.query_ball_point(test, cutoff)
-        edge_list = [list(zip(np.repeat(idx, len(dest_list)), dest_list))
-                     for idx, dest_list in enumerate(edges)]
-
-        edge_list_flat = np.array([list(item) for sublist in edge_list for
-                                   item in sublist])
-
-        if i_index == j_index:
-            res = edge_list_flat.transpose()
-            res[0] = res[0] + i_index - 1
-            res[1] = res[1] + j_index - 1
-        else:
-            removed_elements = list()
-            for i in range(edge_list_flat.shape[0]):
-                if (edge_list_flat[i, 0] >= 0 and
-                    edge_list_flat[i, 0] <= num - 1) and \
-                    (edge_list_flat[i, 1] >= 0 and
-                     edge_list_flat[i, 1] <= num - 1) or \
-                    (edge_list_flat[i, 0] >= num and
-                     edge_list_flat[i, 0] <= 2 * num - 1) and \
-                    (edge_list_flat[i, 1] >= num and
-                     edge_list_flat[i, 1] <= 2 * num - 1) or \
-                    (edge_list_flat[i, 0] >= num and
-                     edge_list_flat[i, 0] <= 2 * num - 1) and \
-                    (edge_list_flat[i, 1] >= 0 and
-                     edge_list_flat[i, 1] <= num - 1):
-                    removed_elements.append(i)
-            res = np.delete(edge_list_flat, removed_elements,
-                            axis=0).transpose()
-            res[0] = res[0] + i_index - 1
-            res[1] = res[1] - num + j_index - 1
-        if res.shape[1] == 0:
-            res = np.zeros((2, 1), dtype=np.int)
-
-        edges = [(res[0, k], res[1, k]) for k in range(0, res.shape[1])]
-        graph.add_edges_from(edges)
-
-        # partial connected components
-
-        subgraphs = nx.connected_components(graph)
-        comp = [g for g in subgraphs]
-        return comp
+        #raise TypeError(data)
+        comp_s = []
+        for data in data_list:
+            window, index = data
+            num = window[0].shape[0]
+            i_index = index[0]
+            j_index = index[1]
+            graph = nx.Graph()
+
+            if i_index == j_index:
+                train = window[0]
+                test = window[1]
+            else:
+                train = np.vstack([window[0], window[1]])
+                test = np.vstack([window[0], window[1]])
+
+            tree = cKDTree(train, leafsize=40)
+            edges = tree.query_ball_point(test, cutoff)
+            edge_list = [list(zip(np.repeat(idx, len(dest_list)), dest_list))
+                         for idx, dest_list in enumerate(edges)]
+
+            edge_list_flat = np.array([list(item) for sublist in edge_list for
+                                       item in sublist])
+
+            if i_index == j_index:
+                res = edge_list_flat.transpose()
+                res[0] = res[0] + i_index - 1
+                res[1] = res[1] + j_index - 1
+            else:
+                removed_elements = list()
+                for i in range(edge_list_flat.shape[0]):
+                    if (edge_list_flat[i, 0] >= 0 and
+                        edge_list_flat[i, 0] <= num - 1) and \
+                        (edge_list_flat[i, 1] >= 0 and
+                         edge_list_flat[i, 1] <= num - 1) or \
+                        (edge_list_flat[i, 0] >= num and
+                         edge_list_flat[i, 0] <= 2 * num - 1) and \
+                        (edge_list_flat[i, 1] >= num and
+                         edge_list_flat[i, 1] <= 2 * num - 1) or \
+                        (edge_list_flat[i, 0] >= num and
+                         edge_list_flat[i, 0] <= 2 * num - 1) and \
+                        (edge_list_flat[i, 1] >= 0 and
+                         edge_list_flat[i, 1] <= num - 1):
+                        removed_elements.append(i)
+                res = np.delete(edge_list_flat, removed_elements,
+                                axis=0).transpose()
+                res[0] = res[0] + i_index - 1
+                res[1] = res[1] - num + j_index - 1
+            if res.shape[1] == 0:
+                res = np.zeros((2, 1), dtype=np.int)
+
+            edges = [(res[0, k], res[1, k]) for k in range(0, res.shape[1])]
+            graph.add_edges_from(edges)
+
+            # partial connected components
+
+            subgraphs = nx.connected_components(graph)
+            comp = [g for g in subgraphs]
+            comp_s.append(comp)
+        return comp_s
 
     # pylint: disable=arguments-differ
     def _single_frame(self, ts, atomgroups, scheduler_kwargs, n_jobs,
@@ -200,12 +204,13 @@ def _single_frame(self, ts, atomgroups, scheduler_kwargs, n_jobs,
         # Distribute the data over the available cores, apply the map function
         # and execute.
         parAtoms = db.from_sequence(arranged_coord,
-                                    npartitions=len(arranged_coord))
+                                    npartitions=n_jobs)
         parAtomsMap = parAtoms.map_partitions(self._find_connected_components,
                                               cutoff=cutoff)
         Components = parAtomsMap.compute(**scheduler_kwargs)
 
         # Gather the results and start the reduction. TODO: think if it can go
+        Components = [item for sublist in Components for item in sublist]
         # to the private _reduce method of the based class.
         result = list(Components)