superduperdb.ext.jina package#

Submodules#

superduperdb.ext.jina.client module#

class superduperdb.ext.jina.client.JinaAPIClient(api_key: str | None = None, model_name: str = 'jina-embeddings-v2-base-en')[source]#

Bases: object

Create a JinaAPIClient to provide an interface to encode using

Jina Embedding platform sync and async.

Parameters:
  • api_key – The Jina API key. It can be explicitly provided or automatically read from the environment variable JINA_API_KEY (recommended).

  • model_name – The name of the Jina model to use. Check the list of available models on https://jina.ai/embeddings/

__init__(api_key: str | None = None, model_name: str = 'jina-embeddings-v2-base-en')[source]#
Create a JinaAPIClient to provide an interface to encode using

Jina Embedding platform sync and async.

Parameters:
  • api_key – The Jina API key. It can be explicitly provided or automatically read from the environment variable JINA_API_KEY (recommended).

  • model_name – The name of the Jina model to use. Check the list of available models on https://jina.ai/embeddings/

async aencode_batch(texts: List[str]) List[List[float]][source]#
encode_batch(texts: List[str]) List[List[float]][source]#

superduperdb.ext.jina.model module#

class superduperdb.ext.jina.model.Jina(identifier: str, artifacts: dc.InitVar[t.Optional[t.Dict]] = None, *, datatype: EncoderArg = None, output_schema: t.Optional[Schema] = None, flatten: bool = False, preprocess: t.Optional[t.Callable] = None, postprocess: t.Optional[t.Callable] = None, collate_fn: t.Optional[t.Callable] = None, batch_predict: bool = False, takes_context: bool = False, metrics: t.Sequence[t.Union[str, Metric, None]] = (), model_update_kwargs: t.Dict = <factory>, validation_sets: t.Optional[t.Sequence[t.Union[str, Dataset]]] = None, predict_X: t.Optional[str] = None, predict_select: t.Optional[CompoundSelect] = None, predict_max_chunk_size: t.Optional[int] = None, predict_kwargs: t.Optional[t.Dict] = None, model: t.Optional[str] = None, api_key: str | None = None)[source]#

Bases: APIModel

Cohere predictor

api_key: str | None = None#
class superduperdb.ext.jina.model.JinaEmbedding(identifier: str, artifacts: dc.InitVar[t.Optional[t.Dict]] = None, *, datatype: EncoderArg = None, output_schema: t.Optional[Schema] = None, flatten: bool = False, preprocess: t.Optional[t.Callable] = None, postprocess: t.Optional[t.Callable] = None, collate_fn: t.Optional[t.Callable] = None, batch_predict: bool = False, takes_context: bool = False, metrics: t.Sequence[t.Union[str, Metric, None]] = (), model_update_kwargs: t.Dict = <factory>, validation_sets: t.Optional[t.Sequence[t.Union[str, Dataset]]] = None, predict_X: t.Optional[str] = None, predict_select: t.Optional[CompoundSelect] = None, predict_max_chunk_size: t.Optional[int] = None, predict_kwargs: t.Optional[t.Dict] = None, model: t.Optional[str] = None, api_key: str | None = None, shape: ~typing.Sequence[int] | None = None)[source]#

Bases: Jina

Jina embedding predictor

Parameters:

shape – The shape of the embedding as tuple. If not provided, it will be obtained by sending a simple query to the API

pre_create(db)[source]#

Called the first time this component is created

Parameters:

db – the db that creates the component

shape: Sequence[int] | None = None#

Module contents#

class superduperdb.ext.jina.JinaEmbedding(identifier: str, artifacts: dc.InitVar[t.Optional[t.Dict]] = None, *, datatype: EncoderArg = None, output_schema: t.Optional[Schema] = None, flatten: bool = False, preprocess: t.Optional[t.Callable] = None, postprocess: t.Optional[t.Callable] = None, collate_fn: t.Optional[t.Callable] = None, batch_predict: bool = False, takes_context: bool = False, metrics: t.Sequence[t.Union[str, Metric, None]] = (), model_update_kwargs: t.Dict = <factory>, validation_sets: t.Optional[t.Sequence[t.Union[str, Dataset]]] = None, predict_X: t.Optional[str] = None, predict_select: t.Optional[CompoundSelect] = None, predict_max_chunk_size: t.Optional[int] = None, predict_kwargs: t.Optional[t.Dict] = None, model: t.Optional[str] = None, api_key: str | None = None, shape: ~typing.Sequence[int] | None = None)[source]#

Bases: Jina

Jina embedding predictor

Parameters:

shape – The shape of the embedding as tuple. If not provided, it will be obtained by sending a simple query to the API

identifier: str#
model_update_kwargs: t.Dict#
pre_create(db)[source]#

Called the first time this component is created

Parameters:

db – the db that creates the component

shape: Sequence[int] | None = None#