superduperdb.ext.transformers package#

Submodules#

superduperdb.ext.transformers.model module#

class superduperdb.ext.transformers.model.Pipeline(identifier: str, artifacts: dc.InitVar[t.Optional[t.Dict]] = None, preprocess_type: str = 'tokenizer', preprocess_kwargs: ~typing.Dict[str, ~typing.Any] = <factory>, postprocess_kwargs: ~typing.Dict[str, ~typing.Any] = <factory>, task: str = 'text-classification', *, datatype: EncoderArg = None, output_schema: t.Optional[Schema] = None, flatten: bool = False, preprocess: t.Optional[t.Callable] = None, postprocess: t.Optional[t.Callable] = None, collate_fn: t.Optional[t.Callable] = None, batch_predict: bool = False, takes_context: bool = False, metrics: t.Sequence[t.Union[str, Metric, None]] = (), model_update_kwargs: dict = <factory>, validation_sets: t.Optional[t.Sequence[t.Union[str, Dataset]]] = None, predict_X: t.Optional[str] = None, predict_select: t.Optional[CompoundSelect] = None, predict_max_chunk_size: t.Optional[int] = None, predict_kwargs: t.Optional[t.Dict] = None, object: t.Any, model_to_device_method: t.Optional[str] = None, metric_values: t.Optional[t.Dict] = <factory>, predict_method: t.Optional[str] = None, device: str = 'cpu', preferred_devices: t.Union[None, t.Sequence[str]] = ('cuda', 'mps', 'cpu'), training_configuration: t.Union[str, _TrainingConfiguration, None] = None, train_X: t.Optional[str] = None, train_y: t.Optional[str] = None, train_select: t.Optional[CompoundSelect] = None)[source]#

Bases: Model

A wrapper for transformers.Pipeline

Parameters:
  • preprocess_type – The type of preprocessing to use {‘tokenizer’}

  • preprocess_kwargs – The type of preprocessing to use. Currently only

  • postprocess_kwargs – The type of postprocessing to use.

  • task – The task to use for the pipeline.

property pipeline#
postprocess_kwargs: Dict[str, Any]#
preprocess_kwargs: Dict[str, Any]#
preprocess_type: str = 'tokenizer'#
task: str = 'text-classification'#
property training_arguments#

Module contents#

class superduperdb.ext.transformers.Pipeline(identifier: str, artifacts: dc.InitVar[t.Optional[t.Dict]] = None, preprocess_type: str = 'tokenizer', preprocess_kwargs: ~typing.Dict[str, ~typing.Any] = <factory>, postprocess_kwargs: ~typing.Dict[str, ~typing.Any] = <factory>, task: str = 'text-classification', *, datatype: EncoderArg = None, output_schema: t.Optional[Schema] = None, flatten: bool = False, preprocess: t.Optional[t.Callable] = None, postprocess: t.Optional[t.Callable] = None, collate_fn: t.Optional[t.Callable] = None, batch_predict: bool = False, takes_context: bool = False, metrics: t.Sequence[t.Union[str, Metric, None]] = (), model_update_kwargs: dict = <factory>, validation_sets: t.Optional[t.Sequence[t.Union[str, Dataset]]] = None, predict_X: t.Optional[str] = None, predict_select: t.Optional[CompoundSelect] = None, predict_max_chunk_size: t.Optional[int] = None, predict_kwargs: t.Optional[t.Dict] = None, object: t.Any, model_to_device_method: t.Optional[str] = None, metric_values: t.Optional[t.Dict] = <factory>, predict_method: t.Optional[str] = None, device: str = 'cpu', preferred_devices: t.Union[None, t.Sequence[str]] = ('cuda', 'mps', 'cpu'), training_configuration: t.Union[str, _TrainingConfiguration, None] = None, train_X: t.Optional[str] = None, train_y: t.Optional[str] = None, train_select: t.Optional[CompoundSelect] = None)[source]#

Bases: Model

A wrapper for transformers.Pipeline

Parameters:
  • preprocess_type – The type of preprocessing to use {‘tokenizer’}

  • preprocess_kwargs – The type of preprocessing to use. Currently only

  • postprocess_kwargs – The type of postprocessing to use.

  • task – The task to use for the pipeline.

identifier: str#
metric_values: t.Optional[t.Dict]#
model_update_kwargs: dict#
object: t.Any#
property pipeline#
postprocess_kwargs: Dict[str, Any]#
preprocess_kwargs: Dict[str, Any]#
preprocess_type: str = 'tokenizer'#
task: str = 'text-classification'#
property training_arguments#