diff --git a/WrightSim/experiment/_scan.py b/WrightSim/experiment/_scan.py index 24e10b0..318e449 100644 --- a/WrightSim/experiment/_scan.py +++ b/WrightSim/experiment/_scan.py @@ -80,10 +80,13 @@ def _gen_efp(self, indices=None): return efp kernel_cuda_source = """ - __global__ void kernel(double time_start, double time_end, double dt, int nEFields, double* efparams, int* phase_matching, int n_recorded, Hamiltonian* ham, pycuda::complex* out) + __global__ void kernel(double time_start, double time_end, double dt, int nEFields, + double* efparams, int* phase_matching, int n_recorded, Hamiltonian* ham, + pycuda::complex* out) { int idx = threadIdx.x + blockIdx.x * blockDim.x; - runge_kutta(time_start, time_end, dt, nEFields, efparams + (idx*5*nEFields), *(efparams + 2), phase_matching, n_recorded, *ham, out + (idx*ham->nRecorded*n_recorded)); + runge_kutta(time_start, time_end, dt, nEFields, efparams + (idx * 5 * nEFields), + phase_matching, n_recorded, *ham, out + (idx * ham->nRecorded * n_recorded)); } """ @@ -95,7 +98,7 @@ def run(self, mp='cpu', chunk=False): mp : {False, 'cpu', 'gpu'} (optional) Select multiprocessing: False (or '' or None) means single-threaded. 'gpu' indicates to use the CUDA implementation - Any other value which evaluates to `True` indicates cpu multiprocessed. + Any other value which evaluates to ``True`` indicates cpu multiprocessed. Default is 'cpu'. Returns @@ -124,10 +127,15 @@ def run(self, mp='cpu', chunk=False): start = np.min(self.efp[..., d_ind]) - self.early_buffer stop = np.max(self.efp[..., d_ind]) + self.late_buffer - mod = SourceModule(self.ham.cuda_struct + self.ham.cuda_matrix_source + propagate.muladd_cuda_source + propagate.dot_cuda_source + propagate.pulse_cuda_source + propagate.runge_kutta_cuda_source + Scan.kernel_cuda_source) + mod = SourceModule(self.ham.cuda_struct + self.ham.cuda_matrix_source + + propagate.muladd_cuda_source + propagate.dot_cuda_source + + propagate.pulse_cuda_source + propagate.runge_kutta_cuda_source + + Scan.kernel_cuda_source) kernel = mod.get_function('kernel') - kernel(start, stop, np.float64(self.timestep), np.intp(3), efpPtr, pmPtr, np.intp(self.iprime), hamPtr, sigPtr, grid=(self.array.size//256,1), block=(256,1,1)) + kernel(start, stop, np.float64(self.timestep), np.intp(3), efpPtr, + pmPtr, np.intp(self.iprime), hamPtr, sigPtr, + grid=(self.array.size//256,1), block=(256,1,1)) cuda.memcpy_dtoh(self.sig, sigPtr) elif mp: diff --git a/WrightSim/hamiltonian/default.py b/WrightSim/hamiltonian/default.py index 7c491c2..129aa7b 100644 --- a/WrightSim/hamiltonian/default.py +++ b/WrightSim/hamiltonian/default.py @@ -31,6 +31,53 @@ def __init__(self, rho=None, tau=None, mu=None, propagator=None, phase_cycle=False, labels=['00','01 -2','10 2\'','10 1','20 1+2\'','11 1-2','11 2\'-2', '10 1-2+2\'', '21 1-2+2\''], time_orderings=list(range(1,7)), recorded_indices = [7, 8]): + """Create a Hamiltonian object. + + Parameters + ---------- + rho : 1-D array (optional) + The initial density vector, length defines N. + Default is 1. in the ground state (index 0), and 0. everywhere else. + tau : scalar or 1-D array (optional) + The lifetime of the states in femptoseconds. + For coherences, this is the pure dephasing time. + For populations, this is the population lifetime. + If tau is scalar, the same dephasing time is used for all coherences, + Populations do not decay by default (inf lifetime). + This value is converted into a rate of decay, Gamma, by the multiplicative inverse. + Default is 50.0 fs dephasing for all coherences, infinite for populations. + mu : 1-D array (optional) + The dipole moments used either in computing state densities or output intensities + Array like structures are converted to the proper data type. + Order matters, and meaning is dependent on the individual Hamiltonian. + Default is two values, both initially 1.0. + omega : 1-D array (optional) + The energies of various transitions. + The default uses w_central and coupling parameters to compute the appropriate + values for a TRIVE Hamiltonian + w_central : float (optional) + The cetral frequency of a resonance for a TRIVE Hamiltonian. + Used only when ``omega`` is ``None``. + coupling : float (optional) + The copuling of states for a TRIVE Hamiltonian. + Used only when ``omega`` is ``None``. + propagator : function (optional) + The evolution function to use to evolve the density matrix over time. + Default is ``runge_kutta``. + phase_cycle : bool (optional) + Whether or not to use phase cycling. + Not applicable to all Hamiltonians. + Default is ``False`` + labels : list of string (optional) + Human readable labels for the states. Not used in computation, only to keep track. + time_orderings : list of int (optional) + Which time orderings to use in the simulation. + Default is all for a three interaction process: ``[1,2,3,4,5,6]``. + recorded_indices : list of int (optional) + Which density vector states to record through the simulation. + Default is [7, 8], the output of a TRIVE Hamiltonian. + + """ if rho is None: self.rho = np.zeros(len(labels), dtype=np.complex128) self.rho[0] = 1. @@ -44,6 +91,7 @@ def __init__(self, rho=None, tau=None, mu=None, else: self.tau = tau + #TODO: Think about dictionaries or some other way of labeling Mu values if mu is None: self.mu = np.array([1., 1.], dtype=np.complex128) else: @@ -55,7 +103,7 @@ def __init__(self, rho=None, tau=None, mu=None, w_2ag = 2*w_ag - coupling w_gg = 0. w_aa = w_gg - self.omega = np.array( [w_gg, -w_ag, w_ag, w_ag, w_2ag, w_aa, w_aa, w_ag, w_2aa] ) + self.omega = np.array([w_gg, -w_ag, w_ag, w_ag, w_2ag, w_aa, w_aa, w_ag, w_2aa]) else: self.omega = w_0 @@ -71,23 +119,33 @@ def __init__(self, rho=None, tau=None, mu=None, self.Gamma = 1./self.tau def to_device(self, pointer): + """Transfer the Hamiltonian to a C struct in CUDA device memory. + + Currently expects a pointer to an already allocated chunk of memory. + """ import pycuda.driver as cuda + #TODO: Reorganize to allocate here and return the pointer, this is more friendly + # Transfer the arrays which make up the hamiltonian rho = cuda.to_device(self.rho) mu = cuda.to_device(self.mu) omega = cuda.to_device(self.omega) Gamma = cuda.to_device(self.Gamma) + # Convert time orderings into a C boolean array of 1 and 0, offset by one tos = [1 if i in self.time_orderings else 0 for i in range(1,7)] + # Transfer time orderings and recorded indices time_orderings = cuda.to_device(np.array(tos, dtype=np.int8)) recorded_indices = cuda.to_device(np.array(self.recorded_indices, dtype=np.int32)) + # Transfer metadata about the lengths of feilds cuda.memcpy_htod(pointer, np.array([len(self.rho)], dtype=np.int32)) cuda.memcpy_htod(int(pointer) + 4, np.array([len(self.mu)], dtype=np.int32)) #TODO: generalize nTimeOrderings cuda.memcpy_htod(int(pointer) + 8, np.array([6], dtype=np.int32)) cuda.memcpy_htod(int(pointer) + 12, np.array([len(self.recorded_indices)], dtype=np.int32)) + # Set pointers in the struct cuda.memcpy_htod(int(pointer) + 16, np.intp(int(rho))) cuda.memcpy_htod(int(pointer) + 24, np.intp(int(mu))) cuda.memcpy_htod(int(pointer) + 32, np.intp(int(omega))) @@ -98,13 +156,29 @@ def to_device(self, pointer): def matrix(self, efields, time): + """Generate the time dependant Hamiltonian Coupling Matrix. + + Parameters + ---------- + efields : ndarray + Contains the time dependent electric fields. + Shape (M x T) where M is number of electric fields, and T is number of timesteps. + time : 1-D array + The time step values + + Returns + ------- + ndarray + Shape T x N x N array with the full Hamiltonian at each time step. + N is the number of states in the Density vector. + """ + #TODO: Just put the body of this method in here, rather than calling _gen_matrix E1,E2,E3 = efields[0:3] return self._gen_matrix(E1, E2, E3, time, self.omega) def _gen_matrix(self, E1, E2, E3, time, energies): """ creates the coupling array given the input e-fields values for a specific time, t - w1first selects whether w1 or w2p is the first interacting positive field Currently neglecting pathways where w2 and w3 require different frequencies (all TRIVE space, or DOVE on diagonal) @@ -112,12 +186,15 @@ def _gen_matrix(self, E1, E2, E3, time, energies): Matrix formulated such that dephasing/relaxation is accounted for outside of the matrix """ + # Define transition energies wag = energies[1] w2aa = energies[-1] + # Define dipole moments mu_ag = self.mu[0] mu_2aa = self.mu[-1] + # Define helpful variables A_1 = 0.5j * mu_ag * E1 * np.exp(-1j * wag * time) A_2 = 0.5j * mu_ag * E2 * np.exp(1j * wag * time) A_2prime = 0.5j * mu_ag * E3 * np.exp(-1j * wag * time) @@ -125,8 +202,10 @@ def _gen_matrix(self, E1, E2, E3, time, energies): B_2 = 0.5j * mu_2aa * E2 * np.exp(1j * w2aa * time) B_2prime = 0.5j * mu_2aa * E3 * np.exp(-1j * w2aa * time) + # Initailze the full array of all hamiltonians to zero out = np.zeros((len(time), len(energies), len(energies)), dtype=np.complex128) + # Add appropriate array elements, according to the time orderings if 3 in self.time_orderings or 5 in self.time_orderings: out[:,1,0] = -A_2 if 4 in self.time_orderings or 6 in self.time_orderings: @@ -155,30 +234,54 @@ def _gen_matrix(self, E1, E2, E3, time, energies): out[:,7,6] = -2 * A_1 out[:,8,6] = B_1 + # Add Gamma along the diagonal for i in range(len(self.Gamma)): out[:,i,i] = -1 * self.Gamma[i] #NOTE: NISE multiplied outputs by the approriate mu in here # This mu factors out, remember to use it where needed later - # Removed for clarity and aligning with Equation S15 of Kohler2016 + # Removed for clarity and aligning with Equation S15 of Kohler2017 return out cuda_matrix_source = """ + /** + * Hamiltonian_matrix: Computes the Hamiltonian matrix for an indevidual time step. + * NOTE: This differs from the Python implementation, which computes the full time + * dependant hamiltonian, this only computes for a single time step + * (to conserve memory). + * + * Parameters + * ---------- + * Hamiltonian ham: A struct which represents a hamiltonian, + * containing orrays omega, mu, and Gamma + * cmplx* efields: A pointer to an array containg the complex valued + * electric fields to use for evaluation + * double time: the current time step counter + * + * Output + * ------- + * cmplx* out: an N x N matrix containing the transition probabilities + * + */ __device__ void Hamiltonian_matrix(Hamiltonian ham, pycuda::complex* efields, - double time, pycuda::complex* out) + double time, pycuda::complex* out) { + // Define state energies double wag = ham.omega[1]; double w2aa = ham.omega[8]; + // Define dipoles + //TODO: don't assume one, generalize pycuda::complex mu_ag = 1.;//ham.mu[0]; pycuda::complex mu_2aa = 1.;//ham.mu[1]; + // Define the electric field values pycuda::complex E1 = efields[0]; pycuda::complex E2 = efields[1]; pycuda::complex E3 = efields[2]; - + // Define helpful variables pycuda::complex A_1 = 0.5 * I * mu_ag * E1 * pycuda::exp(-1. * I * wag * time); pycuda::complex A_2 = 0.5 * I * mu_ag * E2 * pycuda::exp(I * wag * time); pycuda::complex A_2prime = 0.5 * I * mu_ag * E3 * pycuda::exp(-1. * I * wag * time); @@ -186,8 +289,10 @@ def _gen_matrix(self, E1, E2, E3, time, energies): pycuda::complex B_2 = 0.5 * I * mu_2aa * E2 * pycuda::exp(I * w2aa * time); pycuda::complex B_2prime = 0.5 * I * mu_2aa * E3 * pycuda::exp(-1. * I * w2aa * time); + //TODO: zero once, take this loop out of the inner most loop for (int i=0; i(); + // Fill in appropriate matrix elements if(ham.time_orderings[2] || ham.time_orderings[4]) out[1*ham.nStates + 0] = -1. * A_2; if(ham.time_orderings[3] || ham.time_orderings[5]) @@ -222,6 +327,7 @@ def _gen_matrix(self, E1, E2, E3, time, energies): out[8*ham.nStates + 6] = B_1; } + // Put Gamma along the diagonal for(int i=0; i assumes eqaully spaced t-points (t_int is spacing) + Parameters + ---------- + t : 1-D array of float + Time points, equally spaced array. + Shape T, number of timepoints + efields : ndarray + Time dependant electric fields for all pulses. + SHape M x T where M is number of electric fields, T is number of time points. + n_recorded : int + Number of timesteps to record at the end of the simulation. + hamiltonian : Hamiltonian + The hamiltonian object which contains the inital conditions and the + function to use to obtain the matrices. - Unlike earlier implementations, delays, phase, etc., must be incorporated - externally to the function. Rotating wave may still be specified. + Returns + ------- + ndarray : + 2-D array of recorded density vector elements for each time step in n_recorded. """ # can only call on n_recorded and t after efield_object.E is called dt = np.abs(t[1]-t[0]) @@ -25,11 +33,10 @@ def runge_kutta(t, efields, n_recorded, hamiltonian): emitted_index = 0 rho_i = hamiltonian.rho.copy() for k in range(len(t)-1): - # now sum over p equivalent pulse permutations (e.g. TRIVE near- - # diagonal has 2 permutations) # calculate delta rho based on previous rho values temp_delta_rho = np.dot(H[k], rho_i) temp_rho_i = rho_i + temp_delta_rho*dt + # second order term delta_rho = np.dot(H[k+1], temp_rho_i) rho_i += dt/2 * (temp_delta_rho + delta_rho) # if we are close enough to final coherence emission, start @@ -44,11 +51,20 @@ def runge_kutta(t, efields, n_recorded, hamiltonian): for rec,native in enumerate(hamiltonian.recorded_indices): rho_emitted[rec, emitted_index] = rho_i[native] - # rho_emitted[s,t], s is out_group index, t is time index return rho_emitted muladd_cuda_source = """ -__device__ void muladd(pycuda::complex* a, double b, pycuda::complex* c, double d, int len, pycuda::complex* out) +/* + * muladd: Linear combination of two vectors with two scalar multiples + * + * computes a*b + c*d where a and c are vectors, b and d are scalars + * Values are stored in out. + * If out is the same as a or c, this is an in place operation. + * len defines the length to perform the computation. + * Generalizes to n-D array, given it is stored in contiguous memory. + */ +__device__ void muladd(pycuda::complex* a, double b, pycuda::complex* c, double d, + int len, pycuda::complex* out) { for (int i=0; i* mat, pycuda::complex* vec, int len, pycuda::complex* out) +/* + * dot: Matrix multiplied by a vector. + * + * Expects a square NxN matrix and an N-length column vector. + * Values are written to out. DO NOT use in place, invalid results will be returned. + * + */ +__device__ void dot(pycuda::complex* mat, pycuda::complex* vec, int len, + pycuda::complex* out) { for(int i=0; i -__device__ void calc_efield_params(double* params, double mu_0, int n) +/* + * calc_efield_params: convert efield params into appropriate values + * + * Converts FWHM to standard deviation + * Converts frequency into the rotating frame + * Converts area of peak to height + * + * Performs calculaiton on N contiguous sets of paramters, operation in place. + * + */ +__device__ void calc_efield_params(double* params, int n) { for(int i=0; i < n; i++) { - //sigma + // FWHM to sigma params[1 + i*5] /= (2. * sqrt(log(2.))); - //mu - //params[2 + i*5] -= mu_0; - //freq + // Frequency to rotating frame params[3 + i*5] *= 2 * M_PI * 3e-5; - //area -> y + // area -> y params[0 + i*5] /= params[1 + i*5] * sqrt(2 * M_PI); } } -__device__ void calc_efield(double* params, int* phase_matching, double t, int n, pycuda::complex* out) +/* + * calc_efield: Convert parameters, phase matching, and time into an electric field + * + * Converts n electric fields at a time, places the complex electric + * field value into out, in contiguous fashion. + * The length of the phase_matiching array must be at least n. + * + */ +__device__ void calc_efield(double* params, int* phase_matching, double t, int n, + pycuda::complex* out) { + //TODO: ensure phase matching is done correctly for cases where + // it is not equal to +/- 1 (or 0, though why would you have 0) + // NISE took the sign, so far I have only taken the value for(int i=0; i < n; i++) { - out[i] = pycuda::exp(-1. * I * ((double)(phase_matching[i]) * (params[3 + i*5] * (t - params[2 + i*5]) + params[4 + i*5]))); - out[i] *= params[0 + i*5] * exp(-1 * (t-params[2 + i*5])*(t-params[2 + i*5])/2./params[1 + i*5]/params[1 + i*5]); + // Complex phase and magnitude + out[i] = pycuda::exp(-1. * I * ((double)(phase_matching[i]) * + (params[3 + i*5] * (t - params[2 + i*5]) + params[4 + i*5]))); + // Gaussian envelope + out[i] *= params[0 + i*5] * exp(-1 * (t-params[2 + i*5]) * (t-params[2 + i*5]) + / 2. / params[1 + i*5] / params[1 + i*5]); } } """ runge_kutta_cuda_source = """ -__device__ pycuda::complex* runge_kutta(const double time_start, const double time_end, const double dt, - const int nEFields, double* efparams, double mu_0, int* phase_matching, - const int n_recorded, Hamiltonian ham, - pycuda::complex *out) +/* + * runge_kutta: Propagate electric fields over time using Runge-Kutta integration + * + * Parameters + * ---------- + * time_start: inital simulation time + * time_end: final simulation time + * dt: time step + * nEFields: number of electric fields + * *efparams: pointer to array of parameters for those electric fields + * *phase_matiching: array of phase matching conditions + * n_recorded: number of output values to record + * ham: Hamiltonian struct containing inital values, passed to matrix generator. + * + * Output: + * *out: array of recorded values. expects enough memory for n_recorded * ham.nRecorded + * complex values + */ +__device__ +pycuda::complex* runge_kutta(const double time_start, const double time_end, const double dt, + const int nEFields, double* efparams, int* phase_matching, + const int n_recorded, Hamiltonian ham, + pycuda::complex *out) { + // Allocate arrays and pointers for the Hamiltonians for the current and next step. //pycuda::complex *H_cur = (pycuda::complex*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex)); - // pycuda::complex *H_next = (pycuda::complex*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex)); + //pycuda::complex *H_next = (pycuda::complex*)malloc(ham.nStates * ham.nStates * sizeof(pycuda::complex)); + //TODO: either figure out why dynamically allocated arrays weren't working, or use a #define to statically allocate pycuda::complex buf1[81]; pycuda::complex buf2[81]; pycuda::complex* H_cur = buf1; pycuda::complex* H_next = buf2; + // Track indices in arrays. int out_index = 0; int index=0; + // determine number of points. int npoints = (int)((time_end-time_start)/dt); + // Allocate vectors used in computation. pycuda::complex* rho_i = (pycuda::complex*)malloc(ham.nStates * sizeof(pycuda::complex)); pycuda::complex* temp_delta_rho = (pycuda::complex*)malloc(ham.nStates * sizeof(pycuda::complex)); pycuda::complex* temp_rho_i = (pycuda::complex*)malloc(ham.nStates * sizeof(pycuda::complex)); pycuda::complex* delta_rho = (pycuda::complex*)malloc(ham.nStates * sizeof(pycuda::complex)); pycuda::complex* efields = (pycuda::complex*)malloc(nEFields * sizeof(pycuda::complex)); + // Inital rho vector. + //TODO: Use the inital condition from the hamiltonian rho_i[0] = 1.; for(int i=1; i* temp = H_cur; H_cur = H_next; H_next = temp; + + // First order calc_efield(efparams, phase_matching, t+dt, nEFields, efields); Hamiltonian_matrix(ham, efields, t+dt, H_next); dot(H_cur, rho_i, ham.nStates, temp_delta_rho); muladd(rho_i, 1., temp_delta_rho, dt, ham.nStates, temp_rho_i); + // Second order dot(H_next, temp_rho_i, ham.nStates, delta_rho); muladd(temp_delta_rho, 1., delta_rho, 1., ham.nStates, delta_rho); muladd(rho_i, 1., delta_rho, dt/2., ham.nStates, rho_i); + // Record results if close enough to the end if(index > npoints - n_recorded) { for(int i=0; i < ham.nRecorded; i++) @@ -159,6 +240,7 @@ def runge_kutta(t, efields, n_recorded, hamiltonian): index++; } + // Last point, only first order, recorded dot(H_cur, rho_i, ham.nStates, temp_delta_rho); muladd(rho_i, 1., temp_delta_rho, dt, ham.nStates, rho_i); for(int i=0; i < ham.nRecorded; i++) @@ -171,6 +253,7 @@ def runge_kutta(t, efields, n_recorded, hamiltonian): free(temp_rho_i); free(delta_rho); free(efields); + return out; } """