superduperdb.vector_search package#

Subpackages#

Submodules#

superduperdb.vector_search.atlas module#

class superduperdb.vector_search.atlas.MongoAtlasVectorSearcher(identifier: str, collection: str, dimensions: int | None = None, measure: str | None = None, output_path: str | None = None, **kwargs)[source]#

Bases: BaseVectorSearcher

Implementation of atlas vector search

Parameters:

identifier – Unique string identifier of index

_create_index(collection: str, output_path: str)[source]#

Create a vector index in the data backend if an Atlas deployment.

Parameters:

vector_index – vector index to create

add(items)[source]#

Add items to the index.

Parameters:

items – t.Sequence of VectorItems

delete(items)[source]#

Remove items from the index

Parameters:

ids – t.Sequence of ids of vectors.

find_nearest_from_array(h, n=100, within_ids=None)[source]#

Find the nearest vectors to the given vector.

Parameters:
  • h – vector

  • n – number of nearest vectors to return

find_nearest_from_id(id: str, n=100, within_ids=None)[source]#

Find the nearest vectors to the vector with the given id.

Parameters:
  • _id – id of the vector

  • n – number of nearest vectors to return

classmethod from_component(vi: VectorIndex)[source]#
property index#

superduperdb.vector_search.base module#

class superduperdb.vector_search.base.BaseVectorSearcher(identifier: str, dimensions: int, h: ndarray | None = None, index: List[str] | None = None, measure: str | None = None)[source]#

Bases: ABC

abstract add(items: Sequence[VectorItem]) None[source]#

Add items to the index.

Parameters:

items – t.Sequence of VectorItems

abstract delete(ids: Sequence[str]) None[source]#

Remove items from the index

Parameters:

ids – t.Sequence of ids of vectors.

abstract find_nearest_from_array(h: _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | bool | int | float | complex | str | bytes | _NestedSequence[bool | int | float | complex | str | bytes], n: int = 100, within_ids: Sequence[str] = ()) Tuple[List[str], List[float]][source]#

Find the nearest vectors to the given vector.

Parameters:
  • h – vector

  • n – number of nearest vectors to return

abstract find_nearest_from_id(_id, n: int = 100, within_ids: Sequence[str] = ()) Tuple[List[str], List[float]][source]#

Find the nearest vectors to the vector with the given id.

Parameters:
  • _id – id of the vector

  • n – number of nearest vectors to return

classmethod from_component(vi: VectorIndex)[source]#
static to_list(h)[source]#
static to_numpy(h)[source]#
class superduperdb.vector_search.base.VectorIndexMeasureType(value, names=None, *, module=None, qualname=None, type=None, start=1, boundary=None)[source]#

Bases: str, Enum

_generate_next_value_(start, count, last_values)#

Generate the next value when not given.

name: the name of the member start: the initial start value or None count: the number of existing members last_values: the list of values assigned

cosine = 'cosine'#
css = 'css'#
dot = 'dot'#
l2 = 'l2'#
class superduperdb.vector_search.base.VectorItem(id: str, vector: ndarray)[source]#

Bases: object

Class for representing a vector in vector search with id and vector.

classmethod create(*, id: str, vector: _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | bool | int | float | complex | str | bytes | _NestedSequence[bool | int | float | complex | str | bytes]) VectorItem[source]#
id: str#
to_dict() Dict[source]#
vector: ndarray#
class superduperdb.vector_search.base.VectorSearchConfig(id: str, dimensions: int, measure: ~superduperdb.vector_search.base.VectorIndexMeasureType = VectorIndexMeasureType.l2, parameters: ~typing.Mapping[str, ~typing.Any] = <factory>)[source]#

Bases: object

Represents search config which helps initiate a vector searcher class.

dimensions: int#
id: str#
measure: VectorIndexMeasureType = 'l2'#
parameters: Mapping[str, Any]#
class superduperdb.vector_search.base.VectorSearchResult(id: str, score: float)[source]#

Bases: object

Dataclass for representing vector search results with id and score.

id: str#
score: float#
superduperdb.vector_search.base.cosine(x, y)[source]#

Cosine similarity function for vector search

superduperdb.vector_search.base.dot(x, y)[source]#

Dot function for vector similarity search

superduperdb.vector_search.base.l2(x, y)[source]#

L2 function for vector similarity search

superduperdb.vector_search.in_memory module#

class superduperdb.vector_search.in_memory.InMemoryVectorSearcher(identifier: str, dimensions: int, h: ndarray | None = None, index: List[str] | None = None, measure: str | Callable = 'cosine')[source]#

Bases: BaseVectorSearcher

Simple hash-set for looking up with vector similarity.

Parameters:
  • identifier – Unique string identifier of index

  • h – array/ tensor of vectors

  • index – list of IDs

  • measure – measure to assess similarity

add(items: Sequence[VectorItem]) None[source]#

Add items to the index.

Parameters:

items – t.Sequence of VectorItems

delete(ids)[source]#

Remove items from the index

Parameters:

ids – t.Sequence of ids of vectors.

find_nearest_from_array(h, n=100, within_ids=None)[source]#

Find the nearest vectors to the given vector.

Parameters:
  • h – vector

  • n – number of nearest vectors to return

find_nearest_from_id(_id, n=100)[source]#

Find the nearest vectors to the vector with the given id.

Parameters:
  • _id – id of the vector

  • n – number of nearest vectors to return

name = 'vanilla'#

superduperdb.vector_search.interface module#

class superduperdb.vector_search.interface.FastVectorSearcher(db: Datalayer, vector_searcher, vector_index: str)[source]#

Bases: BaseVectorSearcher

add(items: Sequence[VectorItem]) None[source]#

Add items to the index.

Parameters:

items – t.Sequence of VectorItems

delete(ids: Sequence[str]) None[source]#

Remove items from the index

Parameters:

ids – t.Sequence of ids of vectors.

find_nearest_from_array(h: _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | bool | int | float | complex | str | bytes | _NestedSequence[bool | int | float | complex | str | bytes], n: int = 100, within_ids: Sequence[str] = ()) Tuple[List[str], List[float]][source]#

Find the nearest vectors to the given vector.

Parameters:
  • h – vector

  • n – number of nearest vectors to return

find_nearest_from_id(_id, n: int = 100, within_ids: Sequence[str] = ()) Tuple[List[str], List[float]][source]#

Find the nearest vectors to the vector with the given id.

Parameters:
  • _id – id of the vector

  • n – number of nearest vectors to return

superduperdb.vector_search.lance module#

class superduperdb.vector_search.lance.LanceVectorSearcher(identifier: str, dimensions: int, h: ndarray | None = None, index: List[str] | None = None, measure: str | None = None)[source]#

Bases: BaseVectorSearcher

Implementation of a vector index using the lance library.

Parameters:
  • identifier – Unique string identifier of index

  • dimensions – Dimension of the vector embeddings in the Lance dataset

  • h – Seed vectors numpy.ndarray

  • index – list of IDs

  • measure – measure to assess similarity

add(items: Sequence[VectorItem]) None[source]#

Add items to the index.

Parameters:

items – t.Sequence of VectorItems

property dataset#
delete(ids: Sequence[str]) None[source]#

Remove items from the index

Parameters:

ids – t.Sequence of ids of vectors.

find_nearest_from_array(h: _SupportsArray[dtype[Any]] | _NestedSequence[_SupportsArray[dtype[Any]]] | bool | int | float | complex | str | bytes | _NestedSequence[bool | int | float | complex | str | bytes], n: int = 100, within_ids: Sequence[str] = ()) Tuple[List[str], List[float]][source]#

Find the nearest vectors to the given vector.

Parameters:
  • h – vector

  • n – number of nearest vectors to return

find_nearest_from_id(_id, n: int = 100, within_ids: Sequence[str] = ()) Tuple[List[str], List[float]][source]#

Find the nearest vectors to the vector with the given id.

Parameters:
  • _id – id of the vector

  • n – number of nearest vectors to return

superduperdb.vector_search.update_tasks module#

superduperdb.vector_search.update_tasks.copy_vectors(vector_index: str, query: Dict | CompoundSelect, ids: Sequence[str], db=None)[source]#

A helper fxn to copy vectors of a VectorIndex component from the databackend to the fast_vector_search backend.

Parameters:
  • vector-index – A identifier of the vector-index.

  • query – A query which was used by db._build_task_workflow method

  • ids – List of ids which were observed as added/updated documents.

  • db – A DB instance.

superduperdb.vector_search.update_tasks.delete_vectors(vector_index: str, ids: Sequence[str], db=None)[source]#

A helper fxn to delete vectors of a VectorIndex component in the fast_vector_search backend.

Parameters:
  • vector_index – A identifier of vector-index.

  • ids – List of ids which were observed as deleted documents.

  • db – A DB instance.