lindorm_ai
Lindorm Embedding
This notebook covers how to get started with Lindorm Embedding AI model.
import environs
from lindormai.model_manager import ModelManager
env = environs.Env()
env.read_env(".env")
class Config:
AI_EMB_ENDPOINT = env.str("AI_EMB_ENDPOINT", "<EMB_ENDPOINT>")
AI_USERNAME = env.str("AI_USERNAME", "root")
AI_PWD = env.str("AI_PWD", "<PASSWORD>")
AI_DEFAULT_RERANK_MODEL = "rerank_bge_large"
AI_DEFAULT_EMBEDDING_MODEL = "bge-large-zh-v1.5"
AI_DEFAULT_XIAOBU2_EMBEDDING_MODEL = "xiaobu2"
LDAI_EMB_ENDPOINT = Config.AI_EMB_ENDPOINT
LDAI_EMB_USERNAME = Config.AI_USERNAME
LDAI_EMB_PWD = Config.AI_PWD
Define Helper functions
def check_model_exist(model_mgr, model_name):
model_list = model_mgr.list()
for model in model_list:
if model_name == model["name"] and "READY" == model["status"]:
return True
return False
def create_emb_model(model_mgr, model_name, path, algo):
task = "FEATURE_EXTRACTION"
result = model_mgr.create(name=model_name, task=task, path=path, algo=algo)
return result
def create_rerank_model(model_mgr, model_name, path, algo):
task = "SEMANTIC_SIMILARITY"
result = model_mgr.create(name=model_name, task=task, path=path, algo=algo)
return result
Create & Deploy Embedding Model
ldai_model_mgr = ModelManager(LDAI_EMB_ENDPOINT, LDAI_EMB_USERNAME, LDAI_EMB_PWD)
emb_model_name = "bge_model"
emb_model_path = "huggingface://BAAI/bge-large-zh-v1.5"
emb_model_algo = "BGE_LARGE_ZH"
if not check_model_exist(ldai_model_mgr, emb_model_name):
print("model not exist! will create")
create_emb_model(ldai_model_mgr, emb_model_name, emb_model_path, emb_model_algo)
else:
print(f"model {emb_model_name} exist!")
Init LindormAIEmbeddings
from langchain_community.embeddings.lindorm_embedding import LindormAIEmbeddings
ldai_emb = LindormAIEmbeddings(
endpoint=LDAI_EMB_ENDPOINT,
username=LDAI_EMB_USERNAME,
password=LDAI_EMB_PWD,
model_name=emb_model_name,
)
API Reference:LindormAIEmbeddings
Embed single query
query = "辛弃疾"
response = ldai_emb.embed_query(query)
print(f"emb result: {response}")
Embed multiple documents
import random
import string
docs = []
for i in range(10):
doc = "".join(random.choices(string.ascii_letters + string.digits, k=10))
docs.append(doc)
response = ldai_emb.embed_documents(docs)
print(f"emb result: {response[0]}")
Create & Deploy Rerank Model
rerank_model_name = "rerank_bge_large"
rerank_model_path = "huggingface://BAAI/bge-reranker-large"
rerank_model_algo = "BGE_RERANKER_LARGE"
if not check_model_exist(ldai_model_mgr, rerank_model_name):
print("model not exist! will create")
create_rerank_model(
ldai_model_mgr, rerank_model_name, rerank_model_path, rerank_model_algo
)
else:
print(f"model {rerank_model_name} exist!")
Init LindormAIRerank
from langchain_community.document_compressors.lindormai_rerank import LindormAIRerank
ldai_rerank = LindormAIRerank(
endpoint=LDAI_EMB_ENDPOINT,
username=LDAI_EMB_USERNAME,
password=LDAI_EMB_PWD,
model_name=rerank_model_name,
)
API Reference:LindormAIRerank
Rerank documents
from langchain_core.documents import Document
docs = []
doc1 = Document("一只小狗")
doc2 = Document("一个滑滑梯")
docs.append(doc1)
docs.append(doc2)
result = ldai_rerank.compress_documents(docs, "两只小狗")
print(result)
API Reference:Document
Related
- Embedding model conceptual guide
- Embedding model how-to guides