superduperdb.ext.torch package#

Submodules#

superduperdb.ext.torch.encoder module#

class superduperdb.ext.torch.encoder.DecodeTensor(dtype)[source]#

Bases: object

class superduperdb.ext.torch.encoder.EncodeTensor(dtype)[source]#

Bases: object

superduperdb.ext.torch.encoder.tensor(dtype, shape: Sequence)[source]#

Create an encoder for a tensor of a given dtype and shape.

Parameters:
  • dtype – The dtype of the tensor.

  • shape – The shape of the tensor.

superduperdb.ext.torch.model module#

class superduperdb.ext.torch.model.BasicDataset(documents, transform)[source]#

Bases: Dataset

Basic database iterating over a list of documents and applying a transformation

Parameters:
  • documents – documents

  • transform – function

class superduperdb.ext.torch.model.TorchModel(identifier: 'str', artifacts: 'dc.InitVar[t.Optional[t.Dict]]' = None, *, datatype: 'EncoderArg' = None, output_schema: 't.Optional[Schema]' = None, flatten: 'bool' = False, preprocess: 't.Optional[t.Callable]' = None, postprocess: 't.Optional[t.Callable]' = None, collate_fn: 't.Optional[t.Callable]' = None, batch_predict: 'bool' = False, takes_context: 'bool' = False, metrics: 't.Sequence[t.Union[str, Metric, None]]' = (), model_update_kwargs: 'dict' = <factory>, validation_sets: 't.Optional[t.Sequence[t.Union[str, Dataset]]]' = None, predict_X: 't.Optional[str]' = None, predict_select: 't.Optional[CompoundSelect]' = None, predict_max_chunk_size: 't.Optional[int]' = None, predict_kwargs: 't.Optional[t.Dict]' = None, object: 't.Any', model_to_device_method: 't.Optional[str]' = None, metric_values: 't.Optional[t.Dict]' = <factory>, predict_method: 't.Optional[str]' = None, device: 'str' = 'cpu', preferred_devices: 't.Union[None, t.Sequence[str]]' = ('cuda', 'mps', 'cpu'), training_configuration: 't.Union[str, _TrainingConfiguration, None]' = None, train_X: 't.Optional[str]' = None, train_y: 't.Optional[str]' = None, train_select: 't.Optional[CompoundSelect]' = None, num_directions: 'int' = 2, optimizer_state: 't.Optional[t.Any]' = None, forward_method: 'str' = '__call__', train_forward_method: 'str' = '__call__')[source]#

Bases: Model

compute_validation_objective(valid_dataloader)[source]#
eval()[source]#
evaluating()[source]#
extract_batch(batch)[source]#
extract_batch_key(batch, key: List[str] | str)[source]#
forward(X)[source]#
forward_method: str = '__call__'#
log(**kwargs)[source]#
num_directions: int = 2#
property optimizer#
optimizer_state: t.Optional[t.Any] = None#
property optimizers: List#
parameters()[source]#
save(database: Datalayer)[source]#
saving()[source]#
saving_criterion()[source]#
state_dict()[source]#
stopping_criterion(iteration)[source]#
take_step(batch, optimizers)[source]#
to(device)[source]#
train()[source]#
train_forward(X, y=None)[source]#
train_forward_method: str = '__call__'#
train_preprocess()[source]#
class superduperdb.ext.torch.model.TorchTrainerConfiguration(identifier: str, artifacts: dataclasses.InitVar[typing.Optional[typing.Dict]] = None, objective: ~typing.Callable | None = None, loader_kwargs: ~typing.Dict = <factory>, max_iterations: int = 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, no_improve_then_stop: int = 5, splitter: ~typing.Callable | None = None, download: bool = False, validation_interval: int = 100, listen: str = 'objective', optimizer_cls: ~typing.Any = <factory>, optimizer_kwargs: ~typing.Dict = <factory>, target_preprocessors: ~typing.Dict | None = None, *, kwargs: ~typing.Dict | None = None)[source]#

Bases: _TrainingConfiguration

Configuration for the PyTorch trainer.

Parameters:
  • objective – Objective function

  • loader_kwargs – Kwargs for the dataloader

  • max_iterations – Maximum number of iterations

  • no_improve_then_stop – Number of iterations to wait for improvement before stopping

  • splitter – Splitter for the data

  • download – Whether to download the data

  • validation_interval – How often to validate

  • listen – Which metric to listen to for early stopping

  • optimizer_cls – Optimizer class

  • optimizer_kwargs – Kwargs for the optimizer

  • target_preprocessors – Preprocessors for the target

download: bool = False#
listen: str = 'objective'#
loader_kwargs: t.Dict#
max_iterations: int = 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000#
no_improve_then_stop: int = 5#
objective: t.Optional[t.Callable] = None#
optimizer_cls: t.Any#
optimizer_kwargs: t.Dict#
splitter: t.Optional[t.Callable] = None#
target_preprocessors: t.Optional[t.Dict] = None#
validation_interval: int = 100#
superduperdb.ext.torch.model.create_batch(args)[source]#

Create a singleton batch in a manner similar to the PyTorch dataloader

Parameters:

args – single data point for batching

>>> create_batch(3.).shape
torch.Size([1])
>>> x, y = create_batch([torch.randn(5), torch.randn(3, 7)])
>>> x.shape
torch.Size([1, 5])
>>> y.shape
torch.Size([1, 3, 7])
>>> d = create_batch(({'a': torch.randn(4)}))
>>> d['a'].shape
torch.Size([1, 4])
superduperdb.ext.torch.model.unpack_batch(args)[source]#

Unpack a batch into lines of tensor output.

Parameters:

args – a batch of model outputs

>>> unpack_batch(torch.randn(1, 10))[0].shape
torch.Size([10])
>>> out = unpack_batch([torch.randn(2, 10), torch.randn(2, 3, 5)])
>>> type(out)
<class 'list'>
>>> len(out)
2
>>> out = unpack_batch({'a': torch.randn(2, 10), 'b': torch.randn(2, 3, 5)})
>>> [type(x) for x in out]
[<class 'dict'>, <class 'dict'>]
>>> out[0]['a'].shape
torch.Size([10])
>>> out[0]['b'].shape
torch.Size([3, 5])
>>> out = unpack_batch({'a': {'b': torch.randn(2, 10)}})
>>> out[0]['a']['b'].shape
torch.Size([10])
>>> out[1]['a']['b'].shape
torch.Size([10])

superduperdb.ext.torch.utils module#

superduperdb.ext.torch.utils.device_of(module: Module) t.Union[_device, str][source]#

Get device of a model.

Parameters:

model – PyTorch model

superduperdb.ext.torch.utils.eval(module: Module) t.Iterator[None][source]#

Temporarily set a module to evaluation mode.

Parameters:

module – PyTorch module

superduperdb.ext.torch.utils.set_device(module: Module, device: _device)[source]#

Temporarily set a device of a module.

Parameters:
  • module – PyTorch module

  • device – Device to set

superduperdb.ext.torch.utils.to_device(item: t.Any, device: t.Union[str, _device]) t.Any[source]#

Send tensor leaves of nested list/ dictionaries/ tensors to device.

Parameters:
  • item – torch.Tensor instance

  • device – device to which one would like to send

Module contents#

class superduperdb.ext.torch.TorchModel(identifier: 'str', artifacts: 'dc.InitVar[t.Optional[t.Dict]]' = None, *, datatype: 'EncoderArg' = None, output_schema: 't.Optional[Schema]' = None, flatten: 'bool' = False, preprocess: 't.Optional[t.Callable]' = None, postprocess: 't.Optional[t.Callable]' = None, collate_fn: 't.Optional[t.Callable]' = None, batch_predict: 'bool' = False, takes_context: 'bool' = False, metrics: 't.Sequence[t.Union[str, Metric, None]]' = (), model_update_kwargs: 'dict' = <factory>, validation_sets: 't.Optional[t.Sequence[t.Union[str, Dataset]]]' = None, predict_X: 't.Optional[str]' = None, predict_select: 't.Optional[CompoundSelect]' = None, predict_max_chunk_size: 't.Optional[int]' = None, predict_kwargs: 't.Optional[t.Dict]' = None, object: 't.Any', model_to_device_method: 't.Optional[str]' = None, metric_values: 't.Optional[t.Dict]' = <factory>, predict_method: 't.Optional[str]' = None, device: 'str' = 'cpu', preferred_devices: 't.Union[None, t.Sequence[str]]' = ('cuda', 'mps', 'cpu'), training_configuration: 't.Union[str, _TrainingConfiguration, None]' = None, train_X: 't.Optional[str]' = None, train_y: 't.Optional[str]' = None, train_select: 't.Optional[CompoundSelect]' = None, num_directions: 'int' = 2, optimizer_state: 't.Optional[t.Any]' = None, forward_method: 'str' = '__call__', train_forward_method: 'str' = '__call__')[source]#

Bases: Model

compute_validation_objective(valid_dataloader)[source]#
eval()[source]#
evaluating()[source]#
extract_batch(batch)[source]#
extract_batch_key(batch, key: List[str] | str)[source]#
forward(X)[source]#
forward_method: str = '__call__'#
identifier: str#
log(**kwargs)[source]#
metric_values: t.Optional[t.Dict]#
model_update_kwargs: dict#
num_directions: int = 2#
object: t.Any#
property optimizer#
optimizer_state: t.Optional[t.Any] = None#
property optimizers: List#
parameters()[source]#
save(database: Datalayer)[source]#
saving()[source]#
saving_criterion()[source]#
state_dict()[source]#
stopping_criterion(iteration)[source]#
take_step(batch, optimizers)[source]#
to(device)[source]#
train()[source]#
train_forward(X, y=None)[source]#
train_forward_method: str = '__call__'#
train_preprocess()[source]#
class superduperdb.ext.torch.TorchTrainerConfiguration(identifier: str, artifacts: dataclasses.InitVar[typing.Optional[typing.Dict]] = None, objective: ~typing.Callable | None = None, loader_kwargs: ~typing.Dict = <factory>, max_iterations: int = 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000, no_improve_then_stop: int = 5, splitter: ~typing.Callable | None = None, download: bool = False, validation_interval: int = 100, listen: str = 'objective', optimizer_cls: ~typing.Any = <factory>, optimizer_kwargs: ~typing.Dict = <factory>, target_preprocessors: ~typing.Dict | None = None, *, kwargs: ~typing.Dict | None = None)[source]#

Bases: _TrainingConfiguration

Configuration for the PyTorch trainer.

Parameters:
  • objective – Objective function

  • loader_kwargs – Kwargs for the dataloader

  • max_iterations – Maximum number of iterations

  • no_improve_then_stop – Number of iterations to wait for improvement before stopping

  • splitter – Splitter for the data

  • download – Whether to download the data

  • validation_interval – How often to validate

  • listen – Which metric to listen to for early stopping

  • optimizer_cls – Optimizer class

  • optimizer_kwargs – Kwargs for the optimizer

  • target_preprocessors – Preprocessors for the target

download: bool = False#
identifier: str#
listen: str = 'objective'#
loader_kwargs: t.Dict#
max_iterations: int = 10000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000#
no_improve_then_stop: int = 5#
objective: t.Optional[t.Callable] = None#
optimizer_cls: t.Any#
optimizer_kwargs: t.Dict#
splitter: t.Optional[t.Callable] = None#
target_preprocessors: t.Optional[t.Dict] = None#
validation_interval: int = 100#
superduperdb.ext.torch.tensor(dtype, shape: Sequence)[source]#

Create an encoder for a tensor of a given dtype and shape.

Parameters:
  • dtype – The dtype of the tensor.

  • shape – The shape of the tensor.