AliNLP分词插件analysis-aliws_检索分析服务Elasticsearch版(ES ...

AliNLP分词插件（analysis-aliws）是阿里云Elasticsearch（ES）自带的一个系统默认插件。安装该插件后会在阿里云ES中集成对应的分析器和分词器，可用于文档的分析和检索。您还可以通过该插件的词库配置功能实现词典的热更新。

插件介绍

安装analysis-aliws插件后，阿里云ES默认会集成以下分析器和分词器。您可以使用这些分析器和分词器查询文档，也可以通过词库配置功能自定义更新分词词库。

分析器：aliws（不会截取虚词、虚词短语、符号）
分词器：aliws_tokenizer

PUT /index
   "mappings": {
        "fulltext": {
            "properties": {
                "content": {
                    "type": "text",
                    "analyzer": "aliws"
}

PUT /index
  "mappings": {
    "properties": {
        "content": {
            "type": "text",
            "analyzer": "aliws"
}

{
  "acknowledged": true,
  "shards_acknowledged": true,
  "index": "index"
}

```
POST /index/fulltext/1
  "content": "I like go to school."
}
```
```
{
  "_index": "index",
  "_type": "fulltext",
  "_id": "1",
  "_version": 1,
  "result": "created",
  "_shards": {
    "total": 2,
    "successful": 2,
    "failed": 0
  "_seq_no": 0,
  "_primary_term": 1
}
```

GET /index/fulltext/_search
  "query": {
    "match": {
      "content": "school"
}

{
  "took": 5,
  "timed_out": false,
  "_shards": {
    "total": 5,
    "successful": 5,
    "skipped": 0,
    "failed": 0
  "hits": {
    "total": 1,
    "max_score": 0.2876821,
    "hits": [
        "_index": "index",
        "_type": "fulltext",
        "_id": "2",
        "_score": 0.2876821,
        "_source": {
          "content": "I like go to school."
}

GET _analyze
  "text": "I like go to school.",
  "analyzer": "aliws"
}

{
  "tokens" : [
      "token" : "i",
      "start_offset" : 0,
      "end_offset" : 1,
      "type" : "word",
      "position" : 0
      "token" : "like",
      "start_offset" : 2,
      "end_offset" : 6,
      "type" : "word",
      "position" : 2
      "token" : "go",
      "start_offset" : 7,
      "end_offset" : 9,
      "type" : "word",
      "position" : 4
      "token" : "school",
      "start_offset" : 13,
      "end_offset" : 19,
      "type" : "word",
      "position" : 8
}

GET _analyze
  "text": "I like go to school.",
  "tokenizer": "aliws_tokenizer"
}

{
  "tokens" : [
      "token" : "I",
      "start_offset" : 0,
      "end_offset" : 1,
      "type" : "word",
      "position" : 0
      "token" : " ",
      "start_offset" : 1,
      "end_offset" : 2,
      "type" : "word",
      "position" : 1
      "token" : "like",
      "start_offset" : 2,
      "end_offset" : 6,
      "type" : "word",
      "position" : 2
      "token" : " ",
      "start_offset" : 6,
      "end_offset" : 7,
      "type" : "word",
      "position" : 3
      "token" : "go",
      "start_offset" : 7,
      "end_offset" : 9,
      "type" : "word",
      "position" : 4
      "token" : " ",
      "start_offset" : 9,
      "end_offset" : 10,
      "type" : "word",
      "position" : 5
      "token" : "to",
      "start_offset" : 10,
      "end_offset" : 12,
      "type" : "word",
      "position" : 6
      "token" : " ",
      "start_offset" : 12,
      "end_offset" : 13,
      "type" : "word",
      "position" : 7
      "token" : "school",
      "start_offset" : 13,
      "end_offset" : 19,
      "type" : "word",
      "position" : 8
      "token" : ".",
      "start_offset" : 19,
      "end_offset" : 20,
      "type" : "word",
      "position" : 9
}

```
PUT my-index-000001
  "settings": {
    "analysis": {
     "filter": {
      "my_stop": {
       "type": "stop",
       "stopwords": [
     "analyzer": {
      "my_custom_analyzer": {
       "type": "custom",
       "tokenizer": "aliws_tokenizer",
       "filter": [
        "lowercase",
        "porter_stem",
        "my_stop"
}
```

- ```
PUT my-index1
    "settings": {
        "number_of_shards": 1,
        "analysis": {
            "analyzer": {
                "my_custom_analyzer": {
                    "type": "custom",
                    "tokenizer": "aliws_tokenizer"
}
```
- ```
GET my-index1/_analyze
    "analyzer": "my_custom_analyzer",
    "text": ["iphone"]
}
```

使用AliNLP分词插件（analysis-aliws）

插件介绍

前提条件

使用限制

使用aliws分析器查询文档

配置词库

测试分析器

测试分词器

自定义分词器构造说明

常见问题

相关文档