diff --git a/pygsti/forwardsims/torchfwdsim.py b/pygsti/forwardsims/torchfwdsim.py index 5d61c3f0d..06960d9d1 100644 --- a/pygsti/forwardsims/torchfwdsim.py +++ b/pygsti/forwardsims/torchfwdsim.py @@ -32,6 +32,18 @@ try: import torch TORCH_ENABLED = True + + def todevice_kwargs(): + if torch.cuda.device_count() > 0: + return {'dtype': torch.float64, 'device': 'cuda:0'} + elif torch.mps.device_count() > 0: + return {'dtype': torch.float32, 'device': 'mps:0'} + elif torch.xpu.device_count() > 0: + return {'dtype': torch.float64, 'device': 'xpu:0'} + else: + return {'dtype': torch.float64, 'device': -1} + DEVICE_KWARGS = todevice_kwargs() + except ImportError: TORCH_ENABLED = False pass @@ -71,10 +83,10 @@ class StatelessModel: the sophiciated machinery in TorchForwardSimulator's base class. """ - def __init__(self, model: ExplicitOpModel, layout: CircuitOutcomeProbabilityArrayLayout): + def __init__(self, model: ExplicitOpModel, layout: CircuitOutcomeProbabilityArrayLayout, use_gpu: bool): circuits = [] self.outcome_probs_dim = 0 - #TODO: Refactor this to use the bulk_expand_instruments_and_separate_povm codepath + # TODO: Refactor this to use the bulk_expand_instruments_and_separate_povm codepath for _, circuit, outcomes in layout.iter_unique_circuits(): expanded_circuits = model.expand_instruments_and_separate_povm(circuit, outcomes) if len(expanded_circuits) > 1: @@ -84,12 +96,14 @@ def __init__(self, model: ExplicitOpModel, layout: CircuitOutcomeProbabilityArra circuits.append(c) self.outcome_probs_dim += c.outcome_probs_dim self.circuits = circuits + self.use_gpu = use_gpu # We need to verify assumptions on what layout.iter_unique_circuits() returns. # Looking at the implementation of that function, the assumptions can be # framed in terms of the "layout._element_indicies" dict. eind = layout._element_indices assert isinstance(eind, dict) + assert len(eind) > 0 items = iter(eind.items()) k_prev, v_prev = next(items) assert k_prev == 0 @@ -105,13 +119,16 @@ def __init__(self, model: ExplicitOpModel, layout: CircuitOutcomeProbabilityArra for lbl, obj in model._iter_parameterized_objs(): assert isinstance(obj, Torchable), f"{type(obj)} does not subclass {Torchable}." param_type = type(obj) - param_data = (lbl, param_type) + (obj.stateless_data(),) + sld = obj.stateless_data() + if self.use_gpu: + sld = tuple((i.to(**DEVICE_KWARGS) if isinstance(i, torch.Tensor) else i) for i in sld) + param_data = (lbl, param_type) + (sld,) self.param_metadata.append(param_data) - self.params_dim = None - # ^ That's set in get_free_params. - self.default_to_reverse_ad = None - # ^ That'll be set to a boolean the next time that get_free_params is called. + self.params_dim = [] + self.free_param_sizes = [] + self.default_to_reverse_ad = False + # ^ Those are set in get_free_params return def get_free_params(self, model: ExplicitOpModel) -> Tuple[torch.Tensor]: @@ -123,6 +140,7 @@ def get_free_params(self, model: ExplicitOpModel) -> Tuple[torch.Tensor]: to StatelessModel.__init__(...). We raise an error if an inconsistency is detected. """ free_params = [] + free_param_sizes = [] prev_idx = 0 for i, (lbl, obj) in enumerate(model._iter_parameterized_objs()): gpind = obj.gpindices_as_array() @@ -143,7 +161,11 @@ def get_free_params(self, model: ExplicitOpModel) -> Tuple[torch.Tensor]: call to get_torch_bases would silently fail, so we're forced to raise an error here. """ raise ValueError(message) + if self.use_gpu: + vec = vec.to(**DEVICE_KWARGS) free_params.append(vec) + free_param_sizes.append(vec_size) + self.free_param_sizes = free_param_sizes self.params_dim = prev_idx self.default_to_reverse_ad = self.outcome_probs_dim < self.params_dim return tuple(free_params) @@ -157,7 +179,7 @@ def get_torch_bases(self, free_params: Tuple[torch.Tensor]) -> Dict[Label, torch ---- If you want to use the returned dict to build a PyTorch Tensor that supports the .backward() method, then you need to make sure that fp.requires_grad is True for all - fp in free_params. This can be done by calling fp._requires_grad(True) before calling + fp in free_params. This can be done by calling fp.requires_grad_(True) before calling this function. """ assert len(free_params) == len(self.param_metadata) @@ -202,8 +224,9 @@ def circuit_probs_from_free_params(self, *free_params: Tuple[torch.Tensor], enab """ if enable_backward: for fp in free_params: - fp._requires_grad(True) - torch_bases = self.get_torch_bases(free_params) + fp.requires_grad_(True) + + torch_bases = self.get_torch_bases(free_params) # type: ignore probs = self.circuit_probs_from_torch_bases(torch_bases) return probs @@ -215,15 +238,16 @@ class TorchForwardSimulator(ForwardSimulator): ENABLED = TORCH_ENABLED - def __init__(self, model : Optional[ExplicitOpModel] = None): + def __init__(self, model : Optional[ExplicitOpModel] = None, use_gpu=True): if not self.ENABLED: raise RuntimeError('PyTorch could not be imported.') self.model = model + self.use_gpu = use_gpu super(ForwardSimulator, self).__init__(model) def _bulk_fill_probs(self, array_to_fill, layout, split_model = None) -> None: if split_model is None: - slm = StatelessModel(self.model, layout) + slm = StatelessModel(self.model, layout, self.use_gpu) free_params = slm.get_free_params(self.model) torch_bases = slm.get_torch_bases(free_params) else: @@ -234,7 +258,7 @@ def _bulk_fill_probs(self, array_to_fill, layout, split_model = None) -> None: return def _bulk_fill_dprobs(self, array_to_fill, layout, pr_array_to_fill) -> None: - slm = StatelessModel(self.model, layout) + slm = StatelessModel(self.model, layout, self.use_gpu) # ^ TODO: figure out how to safely recycle StatelessModel objects from one # call to another. The current implementation is wasteful if we need to # compute many jacobians without structural changes to layout or self.model. @@ -247,18 +271,17 @@ def _bulk_fill_dprobs(self, array_to_fill, layout, pr_array_to_fill) -> None: argnums = tuple(range(len(slm.param_metadata))) if slm.default_to_reverse_ad: - # Then slm.circuit_probs_from_free_params will automatically construct the - # torch_base dict to support reverse-mode AD. - J_func = torch.func.jacrev(slm.circuit_probs_from_free_params, argnums=argnums) + J_func = torch.func.jacrev(slm.circuit_probs_from_free_params, argnums=argnums) # type: ignore else: - # Then slm.circuit_probs_from_free_params will automatically skip the extra - # steps needed for torch_base to support reverse-mode AD. - J_func = torch.func.jacfwd(slm.circuit_probs_from_free_params, argnums=argnums) + J_func = torch.func.jacfwd(slm.circuit_probs_from_free_params, argnums=argnums) # type: ignore # ^ Note that this _bulk_fill_dprobs function doesn't accept parameters that # could be used to override the default behavior of the StatelessModel. If we # have a need to override the default in the future then we'd need to override # the ForwardSimulator function(s) that call self._bulk_fill_dprobs(...). + # if self.use_gpu: + # J_func = make_fx(J_func, tracing_mode='fake') + # ^ see https://github.com/pytorch/pytorch/issues/152701#issuecomment-2847838362 J_val = J_func(*free_params) J_val = torch.column_stack(J_val) array_to_fill[:] = J_val.cpu().detach().numpy() diff --git a/pygsti/modelmembers/operations/fulltpop.py b/pygsti/modelmembers/operations/fulltpop.py index fa554ce14..1d6ac4c1a 100644 --- a/pygsti/modelmembers/operations/fulltpop.py +++ b/pygsti/modelmembers/operations/fulltpop.py @@ -26,8 +26,6 @@ from pygsti.modelmembers.torchable import Torchable as _Torchable - - class FullTPOp(_DenseOperator, _Torchable): """ A trace-preserving operation matrix. @@ -169,15 +167,16 @@ def from_vector(self, v, close=False, dirty_value=True): self._ptr_has_changed() # because _rep.base == _ptr (same memory) self.dirty = dirty_value - def stateless_data(self) -> Tuple[int]: - return (self.dim,) - - @staticmethod - def torch_base(sd: Tuple[int], t_param: _torch.Tensor) -> _torch.Tensor: - dim = sd[0] + def stateless_data(self) -> Tuple[int, _torch.Tensor]: + dim = self.dim t_const = _torch.zeros(size=(1, dim), dtype=_torch.double) t_const[0,0] = 1.0 - t_param_mat = t_param.reshape((dim - 1, dim)) + return (dim, t_const) + + @staticmethod + def torch_base(sd: Tuple[int, _torch.Tensor], t_param: _torch.Tensor) -> _torch.Tensor: + dim, t_const = sd + t_param_mat = t_param.view(dim - 1, dim) t = _torch.row_stack((t_const, t_param_mat)) return t diff --git a/pygsti/modelmembers/povms/conjugatedeffect.py b/pygsti/modelmembers/povms/conjugatedeffect.py index dcfaad872..56a4993a4 100644 --- a/pygsti/modelmembers/povms/conjugatedeffect.py +++ b/pygsti/modelmembers/povms/conjugatedeffect.py @@ -81,6 +81,12 @@ def __setitem__(self, key, val): ret = self.columnvec.__setitem__(key, val) self._ptr_has_changed() return ret + + def __getstate__(self): + return self.__dict__ + + def __setstate__(self, d): + self.__dict__.update(d) def __getattr__(self, attr): #use __dict__ so no chance for recursive __getattr__ diff --git a/pygsti/modelmembers/povms/tppovm.py b/pygsti/modelmembers/povms/tppovm.py index c105210a7..3e2b4dab2 100644 --- a/pygsti/modelmembers/povms/tppovm.py +++ b/pygsti/modelmembers/povms/tppovm.py @@ -102,29 +102,28 @@ def to_vector(self): vec = _np.concatenate(effect_vecs) return vec - def stateless_data(self) -> Tuple[int, _np.ndarray]: + def stateless_data(self) -> Tuple[int, _torch.Tensor, int]: num_effects = len(self) complement_effect = self[self.complement_label] identity = complement_effect.identity.to_vector() - return (num_effects, identity) - - @staticmethod - def torch_base(sd: Tuple[int, _np.ndarray], t_param: _torch.Tensor) -> _torch.Tensor: - num_effects, identity = sd + identity = identity.reshape((1, -1)) # make into a row vector + t_identity = _torch.from_numpy(identity) + dim = identity.size - - first_basis_vec = _np.zeros(dim) - first_basis_vec[0] = dim ** 0.25 + first_basis_vec = _np.zeros((1,dim)) + first_basis_vec[0,0] = dim ** 0.25 TOL = 1e-15 * _np.sqrt(dim) if _np.linalg.norm(first_basis_vec - identity) > TOL: # Don't error out. The documentation for the class # clearly indicates that the meaning of "identity" # can be nonstandard. warnings.warn('Unexpected normalization!') + return (num_effects, t_identity, dim) - identity = identity.reshape((1, -1)) # make into a row vector - t_identity = _torch.from_numpy(identity) - t_param_mat = t_param.reshape((num_effects - 1, dim)) + @staticmethod + def torch_base(sd: Tuple[int, _torch.Tensor, int], t_param: _torch.Tensor) -> _torch.Tensor: + num_effects, t_identity, dim = sd + t_param_mat = t_param.view(num_effects - 1, dim) t_func = t_identity - t_param_mat.sum(axis=0, keepdim=True) t = _torch.row_stack((t_param_mat, t_func)) return t diff --git a/pygsti/modelmembers/states/densestate.py b/pygsti/modelmembers/states/densestate.py index 714b39913..0291d9159 100644 --- a/pygsti/modelmembers/states/densestate.py +++ b/pygsti/modelmembers/states/densestate.py @@ -101,6 +101,12 @@ def __setitem__(self, key, val): ret = self.columnvec.__setitem__(key, val) self._ptr_has_changed() return ret + + def __getstate__(self): + return self.__dict__ + + def __setstate__(self, d): + self.__dict__.update(d) def __getattr__(self, attr): #use __dict__ so no chance for recursive __getattr__ diff --git a/pygsti/modelmembers/states/tpstate.py b/pygsti/modelmembers/states/tpstate.py index 27de1c750..1762d0677 100644 --- a/pygsti/modelmembers/states/tpstate.py +++ b/pygsti/modelmembers/states/tpstate.py @@ -166,13 +166,14 @@ def from_vector(self, v, close=False, dirty_value=True): self._ptr_has_changed() self.dirty = dirty_value - def stateless_data(self) -> Tuple[int]: - return (self.dim,) + def stateless_data(self) -> Tuple[_torch.Tensor]: + dim = self.dim + t_const = (dim ** -0.25) * _torch.ones(1, dtype=_torch.double) + return (t_const,) @staticmethod - def torch_base(sd: Tuple[int], t_param: _torch.Tensor) -> _torch.Tensor: - dim = sd[0] - t_const = (dim ** -0.25) * _torch.ones(1, dtype=_torch.double) + def torch_base(sd: Tuple[_torch.Tensor], t_param: _torch.Tensor) -> _torch.Tensor: + t_const = sd[0] t = _torch.concat((t_const, t_param)) return t diff --git a/temporary_demos/function_profiler.py b/temporary_demos/function_profiler.py new file mode 100644 index 000000000..5d8de08dd --- /dev/null +++ b/temporary_demos/function_profiler.py @@ -0,0 +1,50 @@ + +import cProfile +import pstats +import io + + +def profile_function(func, subfunction_to_profile, *args, **kwargs, ): + """ + Profiles a given function and returns the time spent in that function. + + Parameters: + - func: The function to run. + - subfunction_to_profile: the subfunction to extract stats for. + - *args: Positional arguments to pass to the function. + - **kwargs: Keyword arguments to pass to the function. + + + Returns: + - A dictionary containing the total time spent in the function and the number of calls. + """ + # Create a profiler + profiler = cProfile.Profile() + profiler.enable() # Start profiling + + # Call the function with the provided arguments + func(*args, **kwargs) + + profiler.disable() # Stop profiling + + # Create a stream to hold the profiling results + s = io.StringIO() + ps = pstats.Stats(profiler, stream=s).sort_stats('cumulative') + ps.print_stats() # Print the profiling results + + # Parse the output to find the specific function's stats + function_name = subfunction_to_profile.__name__ + function_stats = {} + + for line in s.getvalue().splitlines(): + #print(line) + if function_name in line: + parts = line.split() + # Extract the relevant statistics + function_stats['ncalls'] = int(parts[0]) # Number of calls + function_stats['tottime'] = float(parts[2]) # Total time spent in the function + function_stats['percall'] = float(parts[2]) / int(parts[0]) if int(parts[0]) > 0 else 0 # Time per call + function_stats['cumtime'] = float(parts[3]) # Cumulative time spent in the function + break + + return function_stats diff --git a/temporary_demos/jacobians_2q.py b/temporary_demos/jacobians_2q.py new file mode 100644 index 000000000..010b68ca3 --- /dev/null +++ b/temporary_demos/jacobians_2q.py @@ -0,0 +1,137 @@ + +import numpy as np + +from pygsti.forwardsims import ForwardSimulator, MapForwardSimulator, TorchForwardSimulator +from pygsti.models import Model +from pygsti.layouts.copalayout import CircuitOutcomeProbabilityArrayLayout +from pygsti.circuits import Circuit +from pygsti.modelpacks import smq2Q_XYICNOT, ModelPack +import time + +from typing import Sequence + +BulkFillDProbsArgs = tuple[np.ndarray, CircuitOutcomeProbabilityArrayLayout] + + +def get_sim_name(sim): + tstr = str(type(sim)).lower() + if 'map' in tstr: + simname = 'Map ' + elif 'torch' in tstr: + simname = 'Torch' + elif 'matrix' in tstr: + simname = 'Mat ' + else: + raise ValueError() + if 'simple' in tstr: + raise ValueError() + return simname + + +class Benchmarker: + + def __init__(self, model: Model, sims: list[ForwardSimulator], circuits : Sequence[Circuit]): + assert isinstance(sims, list) + # assert isinstance(sims[0], ForwardSimulator) + self.sims = sims + self.base_model = model + self.models = [model.copy() for _ in range(len(self.sims))] + for i,m in enumerate(self.models): + m.sim = sims[i] + assert len(circuits) > 0 + assert isinstance(circuits[0], Circuit) + self.circuits = np.array(circuits, dtype=object) + return + + @staticmethod + def from_modelpack(modelpack: ModelPack, sims: list[ForwardSimulator], gst_maxlen: int): + model_ideal = modelpack.target_model() + model_ideal.convert_members_inplace(to_type='full TP') + # ^ TorchFowardSimulator can only work with TP modelmembers. + model_noisy = model_ideal.depolarize(op_noise=0.05, spam_noise=0.025) + exp_design = modelpack.create_gst_experiment_design(max_max_length=gst_maxlen, fpr=True) + circuits = exp_design.circuit_lists[-1]._circuits + fsth = Benchmarker(model_noisy, sims, circuits) + return fsth + + def prep_parallelizable_args(self, num_groups: int) -> list[list[BulkFillDProbsArgs]]: + # Step 1: naively divide circuits into num_groups blocks that could be processed + # in parallel by any given simulator. If MapForwardSimulator is run with MPI + # then it would figure out its own blocking in a clever way. + num_circuits = self.circuits.size + block_size = int(np.ceil(num_circuits / num_groups)) + circuit_blocks : list[Circuit] = [] + for i in range(num_groups): + start = block_size*i + stop = min(block_size*(i + 1), num_circuits) + if start >= num_circuits or start == stop: + break + block = self.circuits[start:stop].tolist() + circuit_blocks.append(block) + + # Step 2: for each model and and block of circuits, prepare some datastructures + # that the model's simulator needs in order to evaluate bulk_fill_dprobs (the + # bottleneck function we'd like to benchmark and eventually accelerate). + argblocks_by_model = [] + for m in self.models: + argblocks = [] + for b in circuit_blocks: + layout = m.sim.create_layout(b, array_types=('ep',)) # type: ignore + array = layout.allocate_local_array('ep', 'd') + args = (array, layout) + argblocks.append(args) + argblocks_by_model.append(argblocks) + + return argblocks_by_model + + def time_parallelizable_dprobs(self, model, argblocks : list[BulkFillDProbsArgs]): + # The for-loop below is embarassingly parallel. + tic = time.time() + for array, layout in argblocks: + model.sim.bulk_fill_dprobs(array, layout) + toc = time.time() + t = toc - tic + return t + + def compare_dprob_times(self, chunks=1): + print('-'*80) + argblocks_by_model = self.prep_parallelizable_args(chunks) + print('-'*27 + f' {len(self.circuits)} circuits, {chunks} chunks ' + '-'*27) + times = np.zeros(len(self.models)) + for i, (m, argblocks) in enumerate(zip(self.models, argblocks_by_model)): + tic = time.time() + self.time_parallelizable_dprobs(m, argblocks) + toc = time.time() + t = toc - tic + simname = get_sim_name(m.sim) + print(f'Time for {simname} : {t}') + times[i] = t + print('-'*80) + return times + + +def demo_benchmarker_on_modelpack(torch_use_gpu: bool, gst_maxlen=32): + sims = [TorchForwardSimulator(use_gpu=torch_use_gpu), MapForwardSimulator] + mp = smq2Q_XYICNOT + print() + b = Benchmarker.from_modelpack(mp, sims, gst_maxlen) + b.compare_dprob_times() + return + +if __name__ == '__main__': + trials = 2 + + print('\n' + '*'*80) + print( ' '*30 + 'WITH GPU') + print('*'*80) + for i in range(trials): + demo_benchmarker_on_modelpack(True) + + print('\n' + '*'*80) + print( ' '*30 + 'WITHOUT GPU') + print('*'*80) + for i in range(trials): + demo_benchmarker_on_modelpack(False) + + + print(0)