Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions api-template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,17 @@ Resources:
type: array
items:
type: string
Tag:
type: object
properties:
name:
type: string
name_with_analyzer:
type: string
count:
type: integer
created_at:
type: integer
paths:
/search/articles:
get:
Expand Down Expand Up @@ -313,6 +324,41 @@ Resources:
passthroughBehavior: when_no_templates
httpMethod: POST
type: aws_proxy
/search/tags:
get:
description: "タグ検索"
parameters:
- name: "query"
in: "query"
description: "検索ワード"
required: true
type: "string"
- name: "limit"
in: "query"
description: "取得件数"
required: false
type: "integer"
minimum: 1
- name: "page"
in: "query"
description: "ページ"
required: false
type: "integer"
responses:
"200":
description: "タグ一覧"
schema:
type: array
items:
$ref: '#/definitions/Tag'
x-amazon-apigateway-integration:
responses:
default:
statusCode: "200"
uri: !Sub arn:aws:apigateway:${AWS::Region}:lambda:path/2015-03-31/functions/${ElasticSearchSearchUsers.Arn}/invocations
passthroughBehavior: when_no_templates
httpMethod: POST
type: aws_proxy
/articles/recent:
get:
description: "最新記事一覧情報を取得"
Expand Down Expand Up @@ -1827,6 +1873,22 @@ Resources:
Path: /search/users
Method: get
RestApiId: !Ref RestApi
ElasticSearchSearchTag:
Type: AWS::Serverless::Function
Properties:
Handler: handler.lambda_handler
Role: !GetAtt LambdaRole.Arn
CodeUri: ./deploy/search_tags.zip
Environment:
Variables:
ELASTIC_SEARCH_ENDPOINT: !Ref ElasticSearchEndpoint
Events:
Api:
Type: Api
Properties:
Path: /search/tags
Method: get
RestApiId: !Ref RestApi
ElasticSearchService:
Type: "AWS::Elasticsearch::Domain"
Properties:
Expand Down
28 changes: 28 additions & 0 deletions elasticsearch-setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,34 @@ def create_index(self, index, setting):
}
create_index_list.append({"name": "users", "setting": users_setting})

tag_settings = {
'settings': {
'analysis': {
'normalizer': {
'lowercase_normalizer': {
'type': 'custom',
'char_filter': [],
'filter': ['lowercase']
}
}
}
},
'mappings': {
'tag': {
'properties': {
'name': {
'type': 'keyword',
'normalizer': 'lowercase_normalizer'
},
'created_at': {
'type': 'integer'
}
}
}
}
}
create_index_list.append({"name": "tags", "setting": tag_settings})

for index in create_index_list:
name = index["name"]
if esconfig.check_index_exists(name):
Expand Down
Binary file added src/common/__pycache__/settings.cpython-36.pyc
Binary file not shown.
Binary file added src/common/__pycache__/tag_util.cpython-36.pyc
Binary file not shown.
33 changes: 32 additions & 1 deletion src/common/es_util.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,38 @@
# -*- coding: utf-8 -*-
class ESUtil:

@staticmethod
def search_tag(elasticsearch, word, limit, page):
body = {
'query': {
'bool': {
'must': [
{
'match': {
'name_with_analyzer': {
'query': word.lower(),
'analyzer': 'keyword'
}
}
}
]
}
},
'sort': [
{'count': 'desc'}
],
'from': limit * (page - 1),
'size': limit
}

class ESUtil:
response = elasticsearch.search(
index='tags',
body=body
)

tags = [item['_source'] for item in response['hits']['hits']]

return tags

@staticmethod
def search_article(elasticsearch, word, limit, page):
Expand Down
1 change: 1 addition & 0 deletions src/common/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@
USERS_ARTICLE_INDEX_DEFAULT_LIMIT = 10
NOTIFICATION_INDEX_DEFAULT_LIMIT = 10
COMMENT_INDEX_DEFAULT_LIMIT = 10
TAG_SEARCH_DEFAULT_LIMIT = 100

article_id_length = 12
COMMENT_ID_LENGTH = 12
Expand Down
112 changes: 90 additions & 22 deletions src/common/tag_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import os
import re
import time

Expand All @@ -8,9 +7,7 @@

class TagUtil:
@classmethod
def create_and_count(cls, dynamodb, before_tag_names, after_tag_names):
tag_table = dynamodb.Table(os.environ['TAG_TABLE_NAME'])

def create_and_count(cls, elasticsearch, before_tag_names, after_tag_names):
if before_tag_names is None:
before_tag_names = []

Expand All @@ -19,34 +16,80 @@ def create_and_count(cls, dynamodb, before_tag_names, after_tag_names):

for tag_name in after_tag_names:
if before_tag_names is None or tag_name not in before_tag_names:
if tag_table.get_item(Key={'name': tag_name}).get('Item'):

# 大文字小文字区別せずに存在チェックを行いDB(ES)にすでに存在する値を取得する
tag = cls.__get_item_case_insensitive(elasticsearch, tag_name)

if tag:
# タグが追加された場合カウントを+1する
TagUtil.update_count(tag_table, tag_name, 1)
# タグがDBに存在しない場合は新規作成する
cls.update_count(elasticsearch, tag['name'], 1)
# タグがDB(ES)に存在しない場合は新規作成する
else:
tag_table.put_item(
Item={
'name': tag_name,
'count': 1,
'created_at': int(time.time())
}
)
cls.create_tag(elasticsearch, tag_name)

# タグが外された場合カウントを-1する
for tag_name in before_tag_names:
if tag_name not in after_tag_names:
if tag_table.get_item(Key={'name': tag_name}).get('Item'):
cls.update_count(tag_table, tag_name, -1)
tag = cls.__get_item_case_insensitive(elasticsearch, tag_name)
if tag:
cls.update_count(elasticsearch, tag['name'], -1)

@classmethod
def update_count(cls, elasticsearch, tag_name, num):
update_script = {
'script': {
'source': 'ctx._source.count += params.count',
'lang': 'painless',
'params': {
'count': num
}
}
}

elasticsearch.update(index='tags', doc_type='tag', id=tag_name, body=update_script)

"""
ここで作成されたtagが検索対象になるまで(__get_item_case_insensitiveの条件として引っかかってくるまで)1sほどかかる
これはESのセグメントマージという仕様によるものでどうしても回避したい場合は `elasticsearch.indices.refresh(index='tags')` をcreate後に行う必要がある
しかし、ESのデフォルト挙動を無理やり変えることになり、返ってパフォーマンス低下が起きる可能性もあるので特に何もしていない
"""
@classmethod
def update_count(cls, table, tag_name, num):
table.update_item(
Key={'name': tag_name},
UpdateExpression='set #attr = #attr + :increment',
ExpressionAttributeNames={'#attr': 'count'},
ExpressionAttributeValues={':increment': num}
def create_tag(cls, elasticsearch, tag_name):
tag = {
'name': tag_name,
'name_with_analyzer': tag_name,
'count': 1,
'created_at': int(time.time())
}

elasticsearch.index(
index='tags',
doc_type='tag',
id=tag['name'],
body=tag
)

"""
与えられたタグ名をElasticSearchに問い合わせ(大文字小文字区別せず)
すでに存在する場合はElasticSearchに存在する文字列に完全一致する形に変換し、タグ名の配列を返却する
"""
@classmethod
def get_tags_with_name_collation(cls, elasticsearch, tag_names):
if not tag_names:
return tag_names

results = []

for tag_name in tag_names:
tag = cls.__get_item_case_insensitive(elasticsearch, tag_name)

if tag:
results.append(tag['name'])
else:
results.append(tag_name)

return results

@staticmethod
def validate_format(tags):
pattern = re.compile(settings.TAG_DENIED_SYMBOL_PATTERN)
Expand All @@ -60,3 +103,28 @@ def validate_format(tags):
for symbol in settings.TAG_ALLOWED_SYMBOLS:
if tag[0] == symbol or tag[-1] == symbol:
raise ValidationError("tags don't support {str} with start and end of character".format(str=symbol))

@classmethod
def __get_item_case_insensitive(cls, elasticsearch, tag_name):
body = {
'query': {
'bool': {
'must': [
{'term': {'name': tag_name}}
]
}
}
}

res = elasticsearch.search(
index='tags',
doc_type='tag',
body=body
)

tags = [item['_source'] for item in res['hits']['hits']]

if not tags:
return None

return tags[0]
21 changes: 20 additions & 1 deletion src/handlers/me/articles/drafts/publish/handler.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,29 @@
# -*- coding: utf-8 -*-
import os

import boto3
from elasticsearch import Elasticsearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth

from me_articles_drafts_publish import MeArticlesDraftsPublish

dynamodb = boto3.resource('dynamodb')
awsauth = AWS4Auth(
os.environ['AWS_ACCESS_KEY_ID'],
os.environ['AWS_SECRET_ACCESS_KEY'],
os.environ['AWS_REGION'],
'es',
session_token=os.environ['AWS_SESSION_TOKEN']
)
elasticsearch = Elasticsearch(
hosts=[{'host': os.environ['ELASTIC_SEARCH_ENDPOINT'], 'port': 443}],
http_auth=awsauth,
use_ssl=True,
verify_certs=True,
connection_class=RequestsHttpConnection
)


def lambda_handler(event, context):
me_articles_drafts_publish = MeArticlesDraftsPublish(event, context, dynamodb)
me_articles_drafts_publish = MeArticlesDraftsPublish(event, context, dynamodb=dynamodb, elasticsearch=elasticsearch)
return me_articles_drafts_publish.main()
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,12 @@ def exec_main_proc(self):
':article_status': 'public',
':one': 1,
':topic': self.params['topic'],
':tags': self.params.get('tags')
':tags': TagUtil.get_tags_with_name_collation(self.elasticsearch, self.params.get('tags'))
}
)

try:
TagUtil.create_and_count(self.dynamodb, article_info_before.get('tags'), self.params.get('tags'))
TagUtil.create_and_count(self.elasticsearch, article_info_before.get('tags'), self.params.get('tags'))
except Exception as e:
logging.fatal(e)
traceback.print_exc()
Expand Down
19 changes: 19 additions & 0 deletions src/handlers/me/articles/public/republish/handler.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,27 @@
# -*- coding: utf-8 -*-
import os

import boto3
from elasticsearch import Elasticsearch, RequestsHttpConnection
from requests_aws4auth import AWS4Auth

from me_articles_public_republish import MeArticlesPublicRepublish

dynamodb = boto3.resource('dynamodb')
awsauth = AWS4Auth(
os.environ['AWS_ACCESS_KEY_ID'],
os.environ['AWS_SECRET_ACCESS_KEY'],
os.environ['AWS_REGION'],
'es',
session_token=os.environ['AWS_SESSION_TOKEN']
)
elasticsearch = Elasticsearch(
hosts=[{'host': os.environ['ELASTIC_SEARCH_ENDPOINT'], 'port': 443}],
http_auth=awsauth,
use_ssl=True,
verify_certs=True,
connection_class=RequestsHttpConnection
)


def lambda_handler(event, context):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __update_article_info(self, article_content_edit):
':eye_catch_url': article_content_edit['eye_catch_url'],
':sync_elasticsearch': 1,
':topic': self.params['topic'],
':tags': self.params.get('tags')
':tags': TagUtil.get_tags_with_name_collation(self.elasticsearch, self.params.get('tags'))
}
)

Expand Down
Loading