おでかけスポット検索のむずかしさ - Holidayを支える検索技術
- 30. 地名の判別 - 辞書の強化
{
"tokens" : [ {
"token" : "中",
"start_offset" : 0,
"end_offset" : 1,
"type" : "word",
"position" : 1
}, {
"token" : "中目黒",
"start_offset" : 0,
"end_offset" : 3,
"type" : "word",
"position" : 1
}, {
"token" : "目黒",
"start_offset" : 1,
"end_offset" : 3,
"type" : "word",
"position" : 2
}, {
"token" : "駅",
"start_offset" : 3,
"end_offset" : 4,
"type" : "word",
"position" : 3
} ]
}
{
"tokens" : [ {
"token" : "中",
"start_offset" : 0,
"end_offset" : 1,
"type" : "word",
"position" : 1
}, {
"token" : "中目黒駅",
"start_offset" : 0,
"end_offset" : 4,
"type" : "word",
"position" : 1
}, {
"token" : "目黒駅",
"start_offset" : 1,
"end_offset" : 4,
"type" : "word",
"position" : 2
} ]
}
with NEologd
$ curl -XGET 'localhost:9200/index/_analyze' -d '中目黒駅'
kuromoji(Default)
- 31. 地名の判別 - 辞書の強化
kuromoji(Default)
{
"tokens" : [ {
"token" : "奥",
"start_offset" : 0,
"end_offset" : 1,
"type" : "word",
"position" : 1
}, {
"token" : "渋谷",
"start_offset" : 1,
"end_offset" : 3,
"type" : "word",
"position" : 2
} ]
}
{
"tokens" : [ {
"token" : "奥",
"start_offset" : 0,
"end_offset" : 1,
"type" : "word",
"position" : 1
}, {
"token" : "奥渋谷",
"start_offset" : 0,
"end_offset" : 3,
"type" : "word",
"position" : 1
}, {
"token" : "渋谷",
"start_offset" : 1,
"end_offset" : 3,
"type" : "word",
"position" : 2
} ]
}
with NEologd
$ curl -XGET 'localhost:9200/index/_analyze' -d '奥渋谷'