世界杯来了，Towhee 带你多语言「以文搜球」！开发者社区 ...

世界杯来了，Towhee 带你多语言「以文搜球」！

python -m pip install towhee pymilvus

from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection, utility
HOST = 'localhost'
PORT = '19530'
COLLECTION_NAME = 'text_image_search'
INDEX_TYPE = 'IVF_FLAT'
METRIC_TYPE = 'IP'
DIM = 512
TOPK = 3
def create_milvus(exist_ok=False):
        connections.connect(host=HOST, port=PORT)
    except Exception:
        raise RunTimeError(f'Fail to connect Milvus with {HOST}:{PORT}')
    if utility.has_collection:
        collection = Collection(COLLECTION_NAME)
        if exist_ok:
            print(f'Using existed collection: {COLLECTION_NAME}.')
            return collection
        else:
            print('Deleting previous collection...')
            collection.drop()
    # Create collection
    print('Creating collection...')
    fields = [
        FieldSchema(name='id', dtype=DataType.INT64, description='embedding ids', is_primary=True, auto_id=True),
        FieldSchema(name='embedding', dtype=DataType.FLOAT_VECTOR, description='image embeddings', dim=DIM),
        FieldSchema(name='path', dtype=DataType.VARCHAR, description='image path', max_length=500)
    schema = CollectionSchema(fields=fields, description='text image search')
    collection = Collection(name=COLLECTION_NAME, schema=schema)
    # Create index
    print('Creating index...')
    index_params = {
        'metric_type': METRIC_TYPE,
        'index_type': INDEX_TYPE,
        'params':{"nlist":2048}
    collection.create_index(field_name='embedding', index_params=index_params)
    print(f'Milvus collection is ready: {COLLECTION_NAME} ({INDEX_TYPE}, {METRIC_TYPE}).')
    return collection
    collection = create_collection()

import towhee
# Insert
insert = (
    towhee.glob['path']('path/to/soccer_ball/*.JPEG')
          .image_decode['path', 'image']()
          .image_text_embedding.taiyi['image', 'vec'](
                   model_name='taiyi-clip-roberta-102m-chinese',
                   modality='image')
          .ann_insert.milvus[('vec', 'path'), 'milvus_res'](
                   uri=f'tcp://{HOST}:{PORT}/{COLLECTION_NAME}')
#           .select['path', 'image', 'milvus_res']()
#           .show()
print(f'Total vectors in collection: {collection.num_entities}')

import towhee
query = (
    towhee.dc['text'](['输入查询语句'])
          .image_text_embedding.taiyi['text', 'vec'](
                   model_name='taiyi-clip-roberta-102m-chinese',
                   modality='text')
          .ann_search.milvus['vec', 'milvus_res'](
                   uri=f'tcp://{HOST}:{PORT}/{COLLECTION_NAME}',
                   metric_type=METRIC_TYPE,
                   limit=TOPK,
                   output_fields=['path'])
          .flatten('milvus_res')
          .runas_op['milvus_res', ('image_path', 'score')](lambda x: (x.path, x.score))
          .image_decode['image_path', 'image']()
          .select['text', 'image', 'score']()
          .show()

from towhee.models.clip import create_model
from torchvision import transforms
from PIL import Image
import numpy
model = create_model('clip_vit_b32', pretrained=True, device='cpu')
def encode_text(x):
    features = model.encode_text(x, multilingual=True).squeeze(0).detach().cpu().numpy()
    return features
def encode_image(x):
    tfms = transforms.Compose([
        transforms.Resize(224, interpolation=transforms.InterpolationMode.BICUBIC),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(
           (0.48145466, 0.4578275, 0.40821073), (0.26862954, 0.26130258, 0.27577711))
    img = Image.open(x)
    x = tfms(img).unsqueeze(0)
    features = model.encode_image(x).squeeze(0).detach().cpu().numpy()
    return features

import towhee
# Insert
insert = (
    towhee.glob['path']('path/to/soccer_ball/*.JPEG')
          .runas_op['path', 'vec'](func=encode_image)
          .ann_insert.milvus[('vec', 'path'), 'milvus_res'](uri=f'tcp://{HOST}:{PORT}/{COLLECTION_NAME}')
          .select['path', 'milvus_res']()
          .show()
print(f'Total vectors in collection: {collection.num_entities}')

import towhee
query = (
    towhee.dc['text'](['输入查询语句'])
          .runas_op['text', 'vec'](func=encode_text)
          .ann_search.milvus['vec', 'milvus_res'](
                   uri=f'tcp://{HOST}:{PORT}/{COLLECTION_NAME}',
                   metric_type=METRIC_TYPE,
                   limit=TOPK,
                   output_fields=['path'])
          .flatten('milvus_res')

世界杯来了，Towhee 带你多语言「以文搜球」！

世界杯来了，Towhee 带你多语言「以文搜球」！

中文版搜球

1. 准备工作

2. 插入数据

3. 检索测试

多语言版搜球