embeddings

generate text embeddings

openai embeddings

import { embed } from "@threaded/ai";

const vector = await embed("openai/text-embedding-3-small", "hello world");

returns array of numbers

with dimensions

const vector = await embed(
  "openai/text-embedding-3-small",
  "hello world",
  { dimensions: 256 }
);

reduce output dimensions for smaller vectors

huggingface models

const vector = await embed("Xenova/all-MiniLM-L6-v2", "hello world");

uses @huggingface/transformers for local inference

requires system dependencies (onnx runtime)

semantic search example

import { embed } from "@threaded/ai";

const documents = [
  "the cat sat on the mat",
  "dogs are great pets",
  "javascript is a programming language",
];

const docVectors = await Promise.all(
  documents.map(doc => embed("openai/text-embedding-3-small", doc))
);

const query = "pets and animals";
const queryVector = await embed("openai/text-embedding-3-small", query);

const cosineSimilarity = (a, b) => {
  const dot = a.reduce((sum, val, i) => sum + val * b[i], 0);
  const magA = Math.sqrt(a.reduce((sum, val) => sum + val * val, 0));
  const magB = Math.sqrt(b.reduce((sum, val) => sum + val * val, 0));
  return dot / (magA * magB);
};

const scores = docVectors.map((vec, i) => ({
  document: documents[i],
  score: cosineSimilarity(queryVector, vec),
}));

scores.sort((a, b) => b.score - a.score);
console.log(scores);

find most similar documents to query

caching

huggingface models are cached after first load

const vector1 = await embed("Xenova/all-MiniLM-L6-v2", "text 1");
const vector2 = await embed("Xenova/all-MiniLM-L6-v2", "text 2");

second call reuses loaded model

next: image generation