diff --git a/WrightSim/experiment/_scan.py b/WrightSim/experiment/_scan.py
index 24e10b0..318e449 100644
--- a/WrightSim/experiment/_scan.py
+++ b/WrightSim/experiment/_scan.py
@@ -80,10 +80,13 @@ def _gen_efp(self, indices=None):
         return efp
 
     kernel_cuda_source = """
-    __global__ void kernel(double time_start, double time_end, double dt, int nEFields, double* efparams, int* phase_matching, int n_recorded, Hamiltonian* ham, pycuda::complex<double>* out)
+    __global__ void kernel(double time_start, double time_end, double dt, int nEFields,
+                           double* efparams, int* phase_matching, int n_recorded, Hamiltonian* ham,
+                           pycuda::complex<double>* out)
     {
         int idx = threadIdx.x + blockIdx.x * blockDim.x;
-        runge_kutta(time_start, time_end, dt, nEFields, efparams + (idx*5*nEFields), *(efparams + 2), phase_matching, n_recorded, *ham, out + (idx*ham->nRecorded*n_recorded));
+        runge_kutta(time_start, time_end, dt, nEFields, efparams + (idx * 5 * nEFields),
+                    phase_matching, n_recorded, *ham, out + (idx * ham->nRecorded * n_recorded));
     }
     """
 
@@ -95,7 +98,7 @@ def run(self, mp='cpu', chunk=False):
         mp : {False, 'cpu', 'gpu'} (optional)
             Select multiprocessing: False (or '' or None) means single-threaded.
                                     'gpu' indicates to use the CUDA implementation
-                                    Any other value which evaluates to `True` indicates cpu multiprocessed.
+                                    Any other value which evaluates to ``True`` indicates cpu multiprocessed.
                                     Default is 'cpu'.
 
         Returns
@@ -124,10 +127,15 @@ def run(self, mp='cpu', chunk=False):
             start = np.min(self.efp[..., d_ind]) - self.early_buffer
             stop = np.max(self.efp[..., d_ind]) + self.late_buffer
 
-            mod = SourceModule(self.ham.cuda_struct + self.ham.cuda_matrix_source + propagate.muladd_cuda_source + propagate.dot_cuda_source + propagate.pulse_cuda_source + propagate.runge_kutta_cuda_source + Scan.kernel_cuda_source)
+            mod = SourceModule(self.ham.cuda_struct + self.ham.cuda_matrix_source +
+                               propagate.muladd_cuda_source + propagate.dot_cuda_source +
+                               propagate.pulse_cuda_source + propagate.runge_kutta_cuda_source +
+                               Scan.kernel_cuda_source)
 
             kernel = mod.get_function('kernel')
-            kernel(start, stop, np.float64(self.timestep), np.intp(3), efpPtr, pmPtr, np.intp(self.iprime), hamPtr, sigPtr, grid=(self.array.size//256,1), block=(256,1,1))
+            kernel(start, stop, np.float64(self.timestep), np.intp(3), efpPtr,
+                   pmPtr, np.intp(self.iprime), hamPtr, sigPtr,
+                   grid=(self.array.size//256,1), block=(256,1,1))
 
             cuda.memcpy_dtoh(self.sig, sigPtr)
         elif mp:
diff --git a/WrightSim/hamiltonian/default.py b/WrightSim/hamiltonian/default.py
index 7c491c2..129aa7b 100644
--- a/WrightSim/hamiltonian/default.py
+++ b/WrightSim/hamiltonian/default.py
@@ -31,6 +31,53 @@ def __init__(self, rho=None, tau=None, mu=None,
                         propagator=None, phase_cycle=False,
                         labels=['00','01 -2','10 2\'','10 1','20 1+2\'','11 1-2','11 2\'-2', '10 1-2+2\'', '21 1-2+2\''],
                         time_orderings=list(range(1,7)), recorded_indices = [7, 8]):
+        """Create a Hamiltonian object.
+
+        Parameters
+        ----------
+        rho : 1-D array <Complex> (optional)
+            The initial density vector, length defines N.
+            Default is 1. in the ground state (index 0), and 0. everywhere else.
+        tau : scalar or 1-D array (optional)
+            The lifetime of the states in femptoseconds.
+            For coherences, this is the pure dephasing time.
+            For populations, this is the population lifetime.
+            If tau is scalar, the same dephasing time is used for all coherences,
+                              Populations do not decay by default (inf lifetime).
+            This value is converted into a rate of decay, Gamma, by the multiplicative inverse.
+            Default is 50.0 fs dephasing for all coherences, infinite for populations.
+        mu : 1-D array <Complex> (optional)
+            The dipole moments used either in computing state densities or output intensities
+            Array like structures are converted to the proper data type.
+            Order matters, and meaning is dependent on the individual Hamiltonian.
+            Default is two values, both initially 1.0.
+        omega : 1-D array <float64> (optional)
+            The energies of various transitions.
+            The default uses w_central and coupling parameters to compute the appropriate
+                values for a TRIVE Hamiltonian
+        w_central : float (optional)
+            The cetral frequency of a resonance for a TRIVE Hamiltonian.
+            Used only when ``omega`` is ``None``.
+        coupling : float (optional)
+            The copuling of states for a TRIVE Hamiltonian.
+            Used only when ``omega`` is ``None``.
+        propagator : function (optional)
+            The evolution function to use to evolve the density matrix over time.
+            Default is ``runge_kutta``.
+        phase_cycle : bool (optional)
+            Whether or not to use phase cycling.
+            Not applicable to all Hamiltonians.
+            Default is ``False``
+        labels : list of string (optional)
+            Human readable labels for the states. Not used in computation, only to keep track.
+        time_orderings : list of int (optional)
+            Which time orderings to use in the simulation.
+            Default is all for a three interaction process: ``[1,2,3,4,5,6]``.
+        recorded_indices : list of int (optional)
+            Which density vector states to record through the simulation.
+            Default is [7, 8], the output of a TRIVE Hamiltonian.
+
+        """
         if rho is None:
             self.rho = np.zeros(len(labels), dtype=np.complex128)
             self.rho[0] = 1.
@@ -44,6 +91,7 @@ def __init__(self, rho=None, tau=None, mu=None,
         else:
             self.tau = tau
 
+        #TODO: Think about dictionaries or some other way of labeling Mu values
         if mu is None:
             self.mu = np.array([1., 1.], dtype=np.complex128)
         else:
@@ -55,7 +103,7 @@ def __init__(self, rho=None, tau=None, mu=None,
             w_2ag = 2*w_ag - coupling
             w_gg = 0.
             w_aa = w_gg
-            self.omega = np.array( [w_gg, -w_ag, w_ag, w_ag, w_2ag, w_aa, w_aa, w_ag, w_2aa] )
+            self.omega = np.array([w_gg, -w_ag, w_ag, w_ag, w_2ag, w_aa, w_aa, w_ag, w_2aa])
         else:
             self.omega = w_0
 
@@ -71,23 +119,33 @@ def __init__(self, rho=None, tau=None, mu=None,
         self.Gamma = 1./self.tau
 
     def to_device(self, pointer):
+        """Transfer the Hamiltonian to a C struct in CUDA device memory.
+
+        Currently expects a pointer to an already allocated chunk of memory.
+        """
         import pycuda.driver as cuda
+        #TODO: Reorganize to allocate here and return the pointer, this is more friendly
+        # Transfer the arrays which make up the hamiltonian
         rho = cuda.to_device(self.rho)
         mu = cuda.to_device(self.mu)
         omega = cuda.to_device(self.omega)
         Gamma = cuda.to_device(self.Gamma)
 
+        # Convert time orderings into a C boolean array of 1 and 0, offset by one
         tos = [1 if i in self.time_orderings else 0 for i in range(1,7)]
 
+        # Transfer time orderings and recorded indices
         time_orderings = cuda.to_device(np.array(tos, dtype=np.int8))
         recorded_indices = cuda.to_device(np.array(self.recorded_indices, dtype=np.int32))
 
+        # Transfer metadata about the lengths of feilds
         cuda.memcpy_htod(pointer, np.array([len(self.rho)], dtype=np.int32))
         cuda.memcpy_htod(int(pointer) + 4, np.array([len(self.mu)], dtype=np.int32))
         #TODO: generalize nTimeOrderings
         cuda.memcpy_htod(int(pointer) + 8, np.array([6], dtype=np.int32))
         cuda.memcpy_htod(int(pointer) + 12, np.array([len(self.recorded_indices)], dtype=np.int32))
 
+        # Set pointers in the struct
         cuda.memcpy_htod(int(pointer) + 16, np.intp(int(rho)))
         cuda.memcpy_htod(int(pointer) + 24, np.intp(int(mu)))
         cuda.memcpy_htod(int(pointer) + 32, np.intp(int(omega)))
@@ -98,13 +156,29 @@ def to_device(self, pointer):
         
 
     def matrix(self, efields, time):
+        """Generate the time dependant Hamiltonian Coupling Matrix.
+
+        Parameters
+        ----------
+        efields : ndarray<Complex>
+            Contains the time dependent electric fields.
+            Shape (M x T) where M is number of electric fields, and T is number of timesteps.
+        time : 1-D array <float64>
+            The time step values
+
+        Returns
+        -------
+        ndarray <Complex> 
+            Shape T x N x N array with the full Hamiltonian at each time step.
+            N is the number of states in the Density vector.
+        """
+        #TODO: Just put the body of this method in here, rather than calling _gen_matrix
         E1,E2,E3 = efields[0:3]
         return  self._gen_matrix(E1, E2, E3, time, self.omega)
 
     def _gen_matrix(self, E1, E2, E3, time, energies):
         """
         creates the coupling array given the input e-fields values for a specific time, t
-        w1first selects whether w1 or w2p is the first interacting positive field
         
         Currently neglecting pathways where w2 and w3 require different frequencies
         (all TRIVE space, or DOVE on diagonal)
@@ -112,12 +186,15 @@ def _gen_matrix(self, E1, E2, E3, time, energies):
         Matrix formulated such that dephasing/relaxation is accounted for 
         outside of the matrix
         """
+        # Define transition energies
         wag  = energies[1]
         w2aa = energies[-1]
         
+        # Define dipole moments
         mu_ag = self.mu[0]
         mu_2aa = self.mu[-1]
     
+        # Define helpful variables
         A_1 = 0.5j * mu_ag * E1 * np.exp(-1j * wag * time)
         A_2 = 0.5j * mu_ag * E2 * np.exp(1j * wag * time)
         A_2prime = 0.5j * mu_ag * E3 * np.exp(-1j * wag * time)
@@ -125,8 +202,10 @@ def _gen_matrix(self, E1, E2, E3, time, energies):
         B_2 = 0.5j * mu_2aa * E2 * np.exp(1j * w2aa * time)
         B_2prime = 0.5j * mu_2aa * E3 * np.exp(-1j * w2aa * time)
 
+        # Initailze the full array of all hamiltonians to zero
         out = np.zeros((len(time), len(energies), len(energies)), dtype=np.complex128)
 
+        # Add appropriate array elements, according to the time orderings
         if 3 in self.time_orderings or 5 in self.time_orderings:
             out[:,1,0] = -A_2
         if 4 in self.time_orderings or 6 in self.time_orderings:
@@ -155,30 +234,54 @@ def _gen_matrix(self, E1, E2, E3, time, energies):
             out[:,7,6] = -2 * A_1
             out[:,8,6] = B_1
 
+        # Add Gamma along the diagonal
         for i in range(len(self.Gamma)):
             out[:,i,i] = -1 * self.Gamma[i]
 
         #NOTE: NISE multiplied outputs by the approriate mu in here
         #      This mu factors out, remember to use it where needed later
-        #      Removed for clarity and aligning with Equation S15 of Kohler2016
+        #      Removed for clarity and aligning with Equation S15 of Kohler2017
 
         return out
 
     cuda_matrix_source = """
+    /**
+     *  Hamiltonian_matrix: Computes the Hamiltonian matrix for an indevidual time step.
+     *  NOTE: This differs from the Python implementation, which computes the full time 
+     *          dependant hamiltonian, this only computes for a single time step
+     *          (to conserve memory).
+     * 
+     *  Parameters
+     *  ----------
+     *  Hamiltonian ham: A struct which represents a hamiltonian,
+     *                   containing orrays omega, mu, and Gamma
+     *  cmplx* efields: A pointer to an array containg the complex valued
+     *                  electric fields to use for evaluation
+     *  double time: the current time step counter
+     *
+     *  Output
+     *  -------
+     *  cmplx* out: an N x N matrix containing the transition probabilities
+     *
+     */
     __device__ void Hamiltonian_matrix(Hamiltonian ham, pycuda::complex<double>* efields,
-                                                           double time, pycuda::complex<double>* out)
+                                       double time, pycuda::complex<double>* out)
     {
+        // Define state energies
         double wag = ham.omega[1];
         double w2aa = ham.omega[8];
 
+        // Define dipoles
+        //TODO: don't assume one, generalize
         pycuda::complex<double> mu_ag =  1.;//ham.mu[0];
         pycuda::complex<double> mu_2aa = 1.;//ham.mu[1];
 
+        // Define the electric field values
         pycuda::complex<double> E1 =  efields[0];
         pycuda::complex<double> E2 =  efields[1];
         pycuda::complex<double> E3 =  efields[2];
-        
 
+        // Define helpful variables
         pycuda::complex<double> A_1 = 0.5 * I * mu_ag * E1 * pycuda::exp(-1. * I * wag * time);
         pycuda::complex<double> A_2 = 0.5 * I * mu_ag * E2 * pycuda::exp(I * wag * time);
         pycuda::complex<double> A_2prime = 0.5 * I * mu_ag * E3 * pycuda::exp(-1. * I * wag * time);
@@ -186,8 +289,10 @@ def _gen_matrix(self, E1, E2, E3, time, energies):
         pycuda::complex<double> B_2 = 0.5 * I * mu_2aa * E2 * pycuda::exp(I * w2aa * time);
         pycuda::complex<double> B_2prime = 0.5 * I * mu_2aa * E3 * pycuda::exp(-1. * I * w2aa * time);
 
+        //TODO: zero once, take this loop out of the inner most loop
         for (int i=0; i<ham.nStates * ham.nStates; i++) out[i] = pycuda::complex<double>();
 
+        // Fill in appropriate matrix elements
         if(ham.time_orderings[2] || ham.time_orderings[4])
             out[1*ham.nStates + 0] = -1. * A_2;
         if(ham.time_orderings[3] || ham.time_orderings[5])
@@ -222,6 +327,7 @@ def _gen_matrix(self, E1, E2, E3, time, energies):
             out[8*ham.nStates + 6] = B_1;
         }
 
+        // Put Gamma along the diagonal
         for(int i=0; i<ham.nStates; i++) out[i*ham.nStates + i] = -1. * ham.Gamma[i];
     }
 """    
diff --git a/WrightSim/mixed/propagate.py b/WrightSim/mixed/propagate.py
index 88cafd2..ffa491d 100644
--- a/WrightSim/mixed/propagate.py
+++ b/WrightSim/mixed/propagate.py
@@ -1,18 +1,26 @@
 import numpy as np
 
 def runge_kutta(t, efields, n_recorded, hamiltonian):
-    """
-    Evolves the hamiltonian in time, given the three 1d array of E-field points 
-    for each E-field and the hamiltonian
-
-    H[pulse_permutations, t, ij_to, ij_from] is a rank 4 array of the hamiltonian 2D 
-    matrix for each time point and necessary pulse permutations
+    """ Evolves the hamiltonian in time using the runge_kutta method.
 
-    Uses Runge-Kutta method to integrate 
-        --> assumes eqaully spaced t-points (t_int is spacing)
+    Parameters
+    ----------
+    t : 1-D array of float
+        Time points, equally spaced array.
+        Shape T, number of timepoints
+    efields : ndarray <Complex>
+        Time dependant electric fields for all pulses.
+        SHape M x T where M is number of electric fields, T is number of time points.
+    n_recorded : int
+        Number of timesteps to record at the end of the simulation.
+    hamiltonian : Hamiltonian
+        The hamiltonian object which contains the inital conditions and the 
+            function to use to obtain the matrices.
 
-    Unlike earlier implementations, delays, phase, etc., must be incorporated
-    externally to the function.  Rotating wave may still be specified.
+    Returns
+    -------
+    ndarray : <Complex>
+        2-D array of recorded density vector elements for each time step in n_recorded.
     """
     # can only call on n_recorded and t after efield_object.E is called
     dt = np.abs(t[1]-t[0])
@@ -25,11 +33,10 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
     emitted_index = 0
     rho_i = hamiltonian.rho.copy()
     for k in range(len(t)-1):
-        # now sum over p equivalent pulse permutations (e.g. TRIVE near-
-        # diagonal has 2 permutations)
         # calculate delta rho based on previous rho values
         temp_delta_rho = np.dot(H[k], rho_i)
         temp_rho_i = rho_i + temp_delta_rho*dt
+        # second order term
         delta_rho = np.dot(H[k+1], temp_rho_i)
         rho_i += dt/2 * (temp_delta_rho + delta_rho)
         # if we are close enough to final coherence emission, start 
@@ -44,11 +51,20 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
     for rec,native in enumerate(hamiltonian.recorded_indices):
         rho_emitted[rec, emitted_index] = rho_i[native]
 
-   # rho_emitted[s,t], s is out_group index, t is time index
     return rho_emitted
 
 muladd_cuda_source = """
-__device__ void muladd(pycuda::complex<double>* a, double b, pycuda::complex<double>* c, double d, int len, pycuda::complex<double>* out)
+/*
+ * muladd: Linear combination of two vectors with two scalar multiples
+ * 
+ * computes a*b + c*d where a and c are vectors, b and d are scalars
+ * Values are stored in out.
+ * If out is the same as a or c, this is an in place operation.
+ * len defines the length to perform the computation.
+ * Generalizes to n-D array, given it is stored in contiguous memory.
+ */
+__device__ void muladd(pycuda::complex<double>* a, double b, pycuda::complex<double>* c, double d,
+                       int len, pycuda::complex<double>* out)
 {
     for (int i=0; i<len; i++)
     {
@@ -58,7 +74,15 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
 """
 
 dot_cuda_source = """
-__device__ void dot(pycuda::complex<double>* mat, pycuda::complex<double>* vec, int len, pycuda::complex<double>* out)
+/*
+ * dot: Matrix multiplied by a vector.
+ *
+ * Expects a square NxN matrix and an N-length column vector.
+ * Values are written to out. DO NOT use in place, invalid results will be returned.
+ *
+ */
+__device__ void dot(pycuda::complex<double>* mat, pycuda::complex<double>* vec, int len,
+                    pycuda::complex<double>* out)
 {
     for(int i=0; i<len; i++)
     {
@@ -75,79 +99,136 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
 pulse_cuda_source = """
 #include <math.h>
 
-__device__ void calc_efield_params(double* params, double mu_0, int n)
+/*
+ * calc_efield_params: convert efield params into appropriate values
+ * 
+ * Converts FWHM to standard deviation
+ * Converts frequency into the rotating frame
+ * Converts area of peak to height
+ *
+ * Performs calculaiton on N contiguous sets of paramters, operation in place.
+ *
+ */
+__device__ void calc_efield_params(double* params, int n)
 {
     for(int i=0; i < n; i++)
     {
-        //sigma
+        // FWHM to sigma
         params[1 + i*5] /= (2. * sqrt(log(2.)));
-        //mu
-        //params[2 + i*5] -= mu_0;
-        //freq
+        // Frequency to rotating frame
         params[3 + i*5] *= 2 * M_PI * 3e-5;
-        //area -> y
+        // area -> y
         params[0 + i*5] /= params[1 + i*5] * sqrt(2 * M_PI);
     }
 }
 
-__device__ void calc_efield(double* params, int* phase_matching,  double t, int n, pycuda::complex<double>* out)
+/*
+ * calc_efield: Convert parameters, phase matching, and time into an electric field
+ *
+ * Converts n electric fields at a time, places the complex electric
+ *      field value into out, in contiguous fashion.
+ * The length of the phase_matiching array must be at least n.
+ *
+ */
+__device__ void calc_efield(double* params, int* phase_matching,  double t, int n,
+                            pycuda::complex<double>* out)
 {
+    //TODO: ensure phase matching is done correctly for cases where
+    //      it is not equal to +/- 1 (or 0, though why would you have 0)
+    //      NISE took the sign, so far I have only taken the value
     for(int i=0; i < n; i++)
     {
-        out[i] = pycuda::exp(-1. * I * ((double)(phase_matching[i]) * (params[3 + i*5] * (t - params[2 + i*5]) + params[4 + i*5])));
-        out[i] *= params[0 + i*5] * exp(-1 * (t-params[2 + i*5])*(t-params[2 + i*5])/2./params[1 + i*5]/params[1 + i*5]);
+        // Complex phase and magnitude
+        out[i] = pycuda::exp(-1. * I * ((double)(phase_matching[i]) *
+                                        (params[3 + i*5] * (t - params[2 + i*5]) + params[4 + i*5])));
+        // Gaussian envelope
+        out[i] *= params[0 + i*5] * exp(-1 * (t-params[2 + i*5]) * (t-params[2 + i*5])
+                                        / 2. / params[1 + i*5] / params[1 + i*5]);
     }
 }
 """
 
 
 runge_kutta_cuda_source = """
-__device__ pycuda::complex<double>* runge_kutta(const double time_start, const double time_end, const double dt, 
-                                               const int nEFields, double* efparams, double mu_0, int* phase_matching,
-                                               const int n_recorded, Hamiltonian ham,
-                                               pycuda::complex<double> *out)
+/*
+ * runge_kutta: Propagate electric fields over time using Runge-Kutta integration
+ * 
+ * Parameters
+ * ----------
+ * time_start: inital simulation time
+ * time_end: final simulation time
+ * dt: time step
+ * nEFields: number of electric fields
+ * *efparams: pointer to array of parameters for those electric fields
+ * *phase_matiching: array of phase matching conditions
+ * n_recorded: number of output values to record
+ * ham: Hamiltonian struct containing inital values, passed to matrix generator.
+ * 
+ * Output:
+ * *out: array of recorded values. expects enough memory for n_recorded * ham.nRecorded
+ *          complex values
+ */
+__device__
+pycuda::complex<double>* runge_kutta(const double time_start, const double time_end, const double dt, 
+                                     const int nEFields, double* efparams, int* phase_matching,
+                                     const int n_recorded, Hamiltonian ham,
+                                     pycuda::complex<double> *out)
 {
+    // Allocate arrays and pointers for the Hamiltonians for the current and next step.
     //pycuda::complex<double> *H_cur = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
-   // pycuda::complex<double> *H_next = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
+    //pycuda::complex<double> *H_next = (pycuda::complex<double>*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex<double>));
+    //TODO: either figure out why dynamically allocated arrays weren't working, or use a #define to statically allocate
     pycuda::complex<double> buf1[81];
     pycuda::complex<double> buf2[81];
 
     pycuda::complex<double>* H_cur = buf1;
     pycuda::complex<double>* H_next = buf2;
 
+    // Track indices in arrays.
     int out_index = 0;
     int index=0;
     
+    // determine number of points.
     int npoints = (int)((time_end-time_start)/dt);
 
+    // Allocate vectors used in computation.
     pycuda::complex<double>* rho_i = (pycuda::complex<double>*)malloc(ham.nStates * sizeof(pycuda::complex<double>));
     pycuda::complex<double>* temp_delta_rho = (pycuda::complex<double>*)malloc(ham.nStates * sizeof(pycuda::complex<double>)); 
     pycuda::complex<double>* temp_rho_i = (pycuda::complex<double>*)malloc(ham.nStates * sizeof(pycuda::complex<double>)); 
     pycuda::complex<double>* delta_rho = (pycuda::complex<double>*)malloc(ham.nStates * sizeof(pycuda::complex<double>)); 
     pycuda::complex<double>* efields = (pycuda::complex<double>*)malloc(nEFields * sizeof(pycuda::complex<double>)); 
 
+    // Inital rho vector.
+    //TODO: Use the inital condition from the hamiltonian
     rho_i[0] = 1.;
     for(int i=1; i<ham.nStates; i++) rho_i[i] = 0.;
 
-    calc_efield_params(efparams, mu_0, nEFields);
+    // Convert from given units to simulation units.
+    calc_efield_params(efparams, nEFields);
 
+    // Compute the first set of electric fields.
     calc_efield(efparams, phase_matching, time_start, nEFields, efields);
 
-
+    // Compute the inital matrix, stored in H_next, to be swapped
     Hamiltonian_matrix(ham, efields, time_start, H_next);
     for(double t = time_start; t < time_end; t += dt)
     {   
+        // Swap pointers to current and next hamiltonians
         pycuda::complex<double>* temp = H_cur;
         H_cur = H_next;
         H_next = temp;
+        
+        // First order
         calc_efield(efparams, phase_matching, t+dt, nEFields, efields);
         Hamiltonian_matrix(ham, efields, t+dt, H_next);
         dot(H_cur, rho_i, ham.nStates, temp_delta_rho);
         muladd(rho_i, 1., temp_delta_rho, dt, ham.nStates, temp_rho_i);
+        // Second order
         dot(H_next, temp_rho_i, ham.nStates, delta_rho);
         muladd(temp_delta_rho, 1., delta_rho, 1., ham.nStates, delta_rho);
         muladd(rho_i, 1., delta_rho, dt/2., ham.nStates, rho_i);
 
+        // Record results if close enough to the end
         if(index > npoints - n_recorded)
         {
             for(int i=0; i < ham.nRecorded; i++)
@@ -159,6 +240,7 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
         index++;
     }
     
+    // Last point, only first order, recorded
     dot(H_cur, rho_i, ham.nStates, temp_delta_rho);
     muladd(rho_i, 1., temp_delta_rho, dt, ham.nStates, rho_i);
     for(int i=0; i < ham.nRecorded; i++)
@@ -171,6 +253,7 @@ def runge_kutta(t, efields, n_recorded, hamiltonian):
     free(temp_rho_i);
     free(delta_rho);
     free(efields);
+
     return out;
 }
 """