Skip to main content

lindorm_ai

Lindorm Embedding

This notebook covers how to get started with Lindorm Embedding AI model.

import environs
from lindormai.model_manager import ModelManager

env = environs.Env()
env.read_env(".env")


class Config:
AI_EMB_ENDPOINT = env.str("AI_EMB_ENDPOINT", "<EMB_ENDPOINT>")
AI_USERNAME = env.str("AI_USERNAME", "root")
AI_PWD = env.str("AI_PWD", "<PASSWORD>")

AI_DEFAULT_RERANK_MODEL = "rerank_bge_large"
AI_DEFAULT_EMBEDDING_MODEL = "bge-large-zh-v1.5"
AI_DEFAULT_XIAOBU2_EMBEDDING_MODEL = "xiaobu2"


LDAI_EMB_ENDPOINT = Config.AI_EMB_ENDPOINT
LDAI_EMB_USERNAME = Config.AI_USERNAME
LDAI_EMB_PWD = Config.AI_PWD

Define Helper functions

def check_model_exist(model_mgr, model_name):
model_list = model_mgr.list()
for model in model_list:
if model_name == model["name"] and "READY" == model["status"]:
return True
return False


def create_emb_model(model_mgr, model_name, path, algo):
task = "FEATURE_EXTRACTION"
result = model_mgr.create(name=model_name, task=task, path=path, algo=algo)
return result


def create_rerank_model(model_mgr, model_name, path, algo):
task = "SEMANTIC_SIMILARITY"
result = model_mgr.create(name=model_name, task=task, path=path, algo=algo)
return result

Create & Deploy Embedding Model

ldai_model_mgr = ModelManager(LDAI_EMB_ENDPOINT, LDAI_EMB_USERNAME, LDAI_EMB_PWD)

emb_model_name = "bge_model"
emb_model_path = "huggingface://BAAI/bge-large-zh-v1.5"
emb_model_algo = "BGE_LARGE_ZH"

if not check_model_exist(ldai_model_mgr, emb_model_name):
print("model not exist! will create")
create_emb_model(ldai_model_mgr, emb_model_name, emb_model_path, emb_model_algo)
else:
print(f"model {emb_model_name} exist!")

Init LindormAIEmbeddings

from langchain_community.embeddings.lindorm_embedding import LindormAIEmbeddings

ldai_emb = LindormAIEmbeddings(
endpoint=LDAI_EMB_ENDPOINT,
username=LDAI_EMB_USERNAME,
password=LDAI_EMB_PWD,
model_name=emb_model_name,
)
API Reference:LindormAIEmbeddings

Embed single query

query = "辛弃疾"
response = ldai_emb.embed_query(query)
print(f"emb result: {response}")

Embed multiple documents

import random
import string

docs = []

for i in range(10):
doc = "".join(random.choices(string.ascii_letters + string.digits, k=10))
docs.append(doc)

response = ldai_emb.embed_documents(docs)
print(f"emb result: {response[0]}")

Create & Deploy Rerank Model

rerank_model_name = "rerank_bge_large"
rerank_model_path = "huggingface://BAAI/bge-reranker-large"
rerank_model_algo = "BGE_RERANKER_LARGE"

if not check_model_exist(ldai_model_mgr, rerank_model_name):
print("model not exist! will create")
create_rerank_model(
ldai_model_mgr, rerank_model_name, rerank_model_path, rerank_model_algo
)
else:
print(f"model {rerank_model_name} exist!")

Init LindormAIRerank

from langchain_community.document_compressors.lindormai_rerank import LindormAIRerank

ldai_rerank = LindormAIRerank(
endpoint=LDAI_EMB_ENDPOINT,
username=LDAI_EMB_USERNAME,
password=LDAI_EMB_PWD,
model_name=rerank_model_name,
)
API Reference:LindormAIRerank

Rerank documents

from langchain_core.documents import Document

docs = []
doc1 = Document("一只小狗")
doc2 = Document("一个滑滑梯")
docs.append(doc1)
docs.append(doc2)

result = ldai_rerank.compress_documents(docs, "两只小狗")
print(result)
API Reference:Document

Was this page helpful?