AlisProject · keillera · Sep 12, 2018 · Sep 1, 2018 · Sep 1, 2018 · Sep 1, 2018
diff --git a/api-template.yaml b/api-template.yaml
@@ -242,6 +242,17 @@ Resources:
                 type: array
                 items:
                   type: string
+          Tag:
+            type: object
+            properties:
+              name:
+                type: string
+              name_with_analyzer:
+                type: string
+              count:
+                type: integer
+              created_at:
+                type: integer
         paths:
           /search/articles:
             get:
@@ -313,6 +324,41 @@ Resources:
                 passthroughBehavior: when_no_templates
                 httpMethod: POST
                 type: aws_proxy
+          /search/tags:
+            get:
+              description: "タグ検索"
+              parameters:
+              - name: "query"
+                in: "query"
+                description: "検索ワード"
+                required: true
+                type: "string"
+              - name: "limit"
+                in: "query"
+                description: "取得件数"
+                required: false
+                type: "integer"
+                minimum: 1
+              - name: "page"
+                in: "query"
+                description: "ページ"
+                required: false
+                type: "integer"
+              responses:
+                "200":
+                  description: "タグ一覧"
+                  schema:
+                    type: array
+                    items:
+                      $ref: '#/definitions/Tag'
+              x-amazon-apigateway-integration:
+                responses:
+                  default:
+                    statusCode: "200"
+                uri: !Sub arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/${ElasticSearchSearchUsers.Arn}/invocations
+                passthroughBehavior: when_no_templates
+                httpMethod: POST
+                type: aws_proxy
           /articles/recent:
             get:
               description: "最新記事一覧情報を取得"
@@ -1827,6 +1873,22 @@ Resources:
             Path: /search/users
             Method: get
             RestApiId: !Ref RestApi
+  ElasticSearchSearchTag:
+    Type: AWS::Serverless::Function
+    Properties:
+      Handler: handler.lambda_handler
+      Role: !GetAtt LambdaRole.Arn
+      CodeUri: ./deploy/search_tags.zip
+      Environment:
+        Variables:
+          ELASTIC_SEARCH_ENDPOINT: !Ref ElasticSearchEndpoint
+      Events:
+        Api:
+          Type: Api
+          Properties:
+            Path: /search/tags
+            Method: get
+            RestApiId: !Ref RestApi
   ElasticSearchService:
     Type: "AWS::Elasticsearch::Domain"
     Properties: 

diff --git a/elasticsearch-setup.py b/elasticsearch-setup.py
@@ -161,6 +161,34 @@ def create_index(self, index, setting):
 }
 create_index_list.append({"name": "users", "setting": users_setting})
 
+tag_settings = {
+    'settings': {
+        'analysis': {
+            'normalizer': {
+                'lowercase_normalizer': {
+                    'type': 'custom',
+                    'char_filter': [],
+                    'filter': ['lowercase']
+                }
+            }
+        }
+    },
+    'mappings': {
+        'tag': {
+            'properties': {
+                'name': {
+                    'type': 'keyword',
+                    'normalizer': 'lowercase_normalizer'
+                },
+                'created_at': {
+                    'type': 'integer'
+                }
+            }
+        }
+    }
+}
+create_index_list.append({"name": "tags", "setting": tag_settings})
+
 for index in create_index_list:
     name = index["name"]
     if esconfig.check_index_exists(name):

diff --git a/src/common/__pycache__/settings.cpython-36.pyc b/src/common/__pycache__/settings.cpython-36.pyc
diff --git a/src/common/__pycache__/tag_util.cpython-36.pyc b/src/common/__pycache__/tag_util.cpython-36.pyc
diff --git a/src/common/es_util.py b/src/common/es_util.py
@@ -1,7 +1,38 @@
 # -*- coding: utf-8 -*-
+class ESUtil:
 
+    @staticmethod
+    def search_tag(elasticsearch, word, limit, page):
+        body = {
+            'query': {
+                'bool': {
+                    'must': [
+                        {
+                            'match': {
+                                'name_with_analyzer': {
+                                    'query': word.lower(),
+                                    'analyzer': 'keyword'
+                                }
+                            }
+                        }
+                    ]
+                }
+            },
+            'sort': [
+                {'count': 'desc'}
+            ],
+            'from': limit * (page - 1),
+            'size': limit
+        }
 
-class ESUtil:
+        response = elasticsearch.search(
+            index='tags',
+            body=body
+        )
+
+        tags = [item['_source'] for item in response['hits']['hits']]
+
+        return tags
 
     @staticmethod
     def search_article(elasticsearch, word, limit, page):

diff --git a/src/common/settings.py b/src/common/settings.py
@@ -119,6 +119,7 @@
 USERS_ARTICLE_INDEX_DEFAULT_LIMIT = 10
 NOTIFICATION_INDEX_DEFAULT_LIMIT = 10
 COMMENT_INDEX_DEFAULT_LIMIT = 10
+TAG_SEARCH_DEFAULT_LIMIT = 100
 
 article_id_length = 12
 COMMENT_ID_LENGTH = 12

diff --git a/src/common/tag_util.py b/src/common/tag_util.py
@@ -1,4 +1,3 @@
-import os
 import re
 import time
 
@@ -8,9 +7,7 @@
 
 class TagUtil:
     @classmethod
-    def create_and_count(cls, dynamodb, before_tag_names, after_tag_names):
-        tag_table = dynamodb.Table(os.environ['TAG_TABLE_NAME'])
-
+    def create_and_count(cls, elasticsearch, before_tag_names, after_tag_names):
         if before_tag_names is None:
             before_tag_names = []
 
@@ -19,34 +16,80 @@ def create_and_count(cls, dynamodb, before_tag_names, after_tag_names):
 
         for tag_name in after_tag_names:
             if before_tag_names is None or tag_name not in before_tag_names:
-                if tag_table.get_item(Key={'name': tag_name}).get('Item'):
+
+                # 大文字小文字区別せずに存在チェックを行いDB(ES)にすでに存在する値を取得する
+                tag = cls.__get_item_case_insensitive(elasticsearch, tag_name)
+
+                if tag:
                     # タグが追加された場合カウントを+1する
-                    TagUtil.update_count(tag_table, tag_name, 1)
-                # タグがDBに存在しない場合は新規作成する
+                    cls.update_count(elasticsearch, tag['name'], 1)
+                # タグがDB(ES)に存在しない場合は新規作成する
                 else:
-                    tag_table.put_item(
-                        Item={
-                            'name': tag_name,
-                            'count': 1,
-                            'created_at': int(time.time())
-                        }
-                    )
+                    cls.create_tag(elasticsearch, tag_name)
 
         # タグが外された場合カウントを-1する
         for tag_name in before_tag_names:
             if tag_name not in after_tag_names:
-                if tag_table.get_item(Key={'name': tag_name}).get('Item'):
-                    cls.update_count(tag_table, tag_name, -1)
+                tag = cls.__get_item_case_insensitive(elasticsearch, tag_name)
+                if tag:
+                    cls.update_count(elasticsearch, tag['name'], -1)
+
+    @classmethod
+    def update_count(cls, elasticsearch, tag_name, num):
+        update_script = {
+            'script': {
+                'source': 'ctx._source.count += params.count',
+                'lang': 'painless',
+                'params': {
+                    'count': num
+                }
+            }
+        }
 
+        elasticsearch.update(index='tags', doc_type='tag', id=tag_name, body=update_script)
+
+    """
+    ここで作成されたtagが検索対象になるまで(__get_item_case_insensitiveの条件として引っかかってくるまで)1sほどかかる
+    これはESのセグメントマージという仕様によるものでどうしても回避したい場合は `elasticsearch.indices.refresh(index='tags')` をcreate後に行う必要がある
+    しかし、ESのデフォルト挙動を無理やり変えることになり、返ってパフォーマンス低下が起きる可能性もあるので特に何もしていない
+    """
     @classmethod
-    def update_count(cls, table, tag_name, num):
-        table.update_item(
-            Key={'name': tag_name},
-            UpdateExpression='set #attr = #attr + :increment',
-            ExpressionAttributeNames={'#attr': 'count'},
-            ExpressionAttributeValues={':increment': num}
+    def create_tag(cls, elasticsearch, tag_name):
+        tag = {
+            'name': tag_name,
+            'name_with_analyzer': tag_name,
+            'count': 1,
+            'created_at': int(time.time())
+        }
+
+        elasticsearch.index(
+            index='tags',
+            doc_type='tag',
+            id=tag['name'],
+            body=tag
         )
 
+    """
+    与えられたタグ名をElasticSearchに問い合わせ(大文字小文字区別せず)
+    すでに存在する場合はElasticSearchに存在する文字列に完全一致する形に変換し、タグ名の配列を返却する
+    """
+    @classmethod
+    def get_tags_with_name_collation(cls, elasticsearch, tag_names):
+        if not tag_names:
+            return tag_names
+
+        results = []
+
+        for tag_name in tag_names:
+            tag = cls.__get_item_case_insensitive(elasticsearch, tag_name)
+
+            if tag:
+                results.append(tag['name'])
+            else:
+                results.append(tag_name)
+
+        return results
+
     @staticmethod
     def validate_format(tags):
         pattern = re.compile(settings.TAG_DENIED_SYMBOL_PATTERN)
@@ -60,3 +103,28 @@ def validate_format(tags):
             for symbol in settings.TAG_ALLOWED_SYMBOLS:
                 if tag[0] == symbol or tag[-1] == symbol:
                     raise ValidationError("tags don't support {str} with start and end of character".format(str=symbol))
+
+    @classmethod
+    def __get_item_case_insensitive(cls, elasticsearch, tag_name):
+        body = {
+            'query': {
+                'bool': {
+                    'must': [
+                        {'term': {'name': tag_name}}
+                    ]
+                }
+            }
+        }
+
+        res = elasticsearch.search(
+            index='tags',
+            doc_type='tag',
+            body=body
+        )
+
+        tags = [item['_source'] for item in res['hits']['hits']]
+
+        if not tags:
+            return None
+
+        return tags[0]
diff --git a/src/handlers/me/articles/drafts/publish/handler.py b/src/handlers/me/articles/drafts/publish/handler.py
@@ -1,10 +1,29 @@
 # -*- coding: utf-8 -*-
+import os
+
 import boto3
+from elasticsearch import Elasticsearch, RequestsHttpConnection
+from requests_aws4auth import AWS4Auth
+
 from me_articles_drafts_publish import MeArticlesDraftsPublish
 
 dynamodb = boto3.resource('dynamodb')
+awsauth = AWS4Auth(
+    os.environ['AWS_ACCESS_KEY_ID'],
+    os.environ['AWS_SECRET_ACCESS_KEY'],
+    os.environ['AWS_REGION'],
+    'es',
+    session_token=os.environ['AWS_SESSION_TOKEN']
+)
+elasticsearch = Elasticsearch(
+    hosts=[{'host': os.environ['ELASTIC_SEARCH_ENDPOINT'], 'port': 443}],
+    http_auth=awsauth,
+    use_ssl=True,
+    verify_certs=True,
+    connection_class=RequestsHttpConnection
+)
 
 
 def lambda_handler(event, context):
-    me_articles_drafts_publish = MeArticlesDraftsPublish(event, context, dynamodb)
+    me_articles_drafts_publish = MeArticlesDraftsPublish(event, context, dynamodb=dynamodb, elasticsearch=elasticsearch)
     return me_articles_drafts_publish.main()
diff --git a/src/handlers/me/articles/drafts/publish/me_articles_drafts_publish.py b/src/handlers/me/articles/drafts/publish/me_articles_drafts_publish.py
@@ -60,12 +60,12 @@ def exec_main_proc(self):
                 ':article_status': 'public',
                 ':one': 1,
                 ':topic': self.params['topic'],
-                ':tags': self.params.get('tags')
+                ':tags': TagUtil.get_tags_with_name_collation(self.elasticsearch, self.params.get('tags'))
             }
         )
 
         try:
-            TagUtil.create_and_count(self.dynamodb, article_info_before.get('tags'), self.params.get('tags'))
+            TagUtil.create_and_count(self.elasticsearch, article_info_before.get('tags'), self.params.get('tags'))
         except Exception as e:
             logging.fatal(e)
             traceback.print_exc()

diff --git a/src/handlers/me/articles/public/republish/handler.py b/src/handlers/me/articles/public/republish/handler.py
@@ -1,8 +1,27 @@
 # -*- coding: utf-8 -*-
+import os
+
 import boto3
+from elasticsearch import Elasticsearch, RequestsHttpConnection
+from requests_aws4auth import AWS4Auth
+
 from me_articles_public_republish import MeArticlesPublicRepublish
 
 dynamodb = boto3.resource('dynamodb')
+awsauth = AWS4Auth(
+    os.environ['AWS_ACCESS_KEY_ID'],
+    os.environ['AWS_SECRET_ACCESS_KEY'],
+    os.environ['AWS_REGION'],
+    'es',
+    session_token=os.environ['AWS_SESSION_TOKEN']
+)
+elasticsearch = Elasticsearch(
+    hosts=[{'host': os.environ['ELASTIC_SEARCH_ENDPOINT'], 'port': 443}],
+    http_auth=awsauth,
+    use_ssl=True,
+    verify_certs=True,
+    connection_class=RequestsHttpConnection
+)
 
 
 def lambda_handler(event, context):

diff --git a/src/handlers/me/articles/public/republish/me_articles_public_republish.py b/src/handlers/me/articles/public/republish/me_articles_public_republish.py
@@ -84,7 +84,7 @@ def __update_article_info(self, article_content_edit):
                 ':eye_catch_url': article_content_edit['eye_catch_url'],
                 ':sync_elasticsearch': 1,
                 ':topic': self.params['topic'],
-                ':tags': self.params.get('tags')
+                ':tags': TagUtil.get_tags_with_name_collation(self.elasticsearch, self.params.get('tags'))
             }
         )