实战场景
一、电商搜索
1.1 业务场景
需求:
- 商品标题/描述全文检索
- 多条件筛选(品牌、价格、类目)
- 相关性排序(销量、评分、新品)
- 搜索建议(自动补全)
- 拼写纠错
1.2 Mapping设计
PUT /products
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"analysis": {
"analyzer": {
"ik_pinyin": {
"type": "custom",
"tokenizer": "ik_max_word",
"filter": ["lowercase", "pinyin_filter"]
}
},
"filter": {
"pinyin_filter": {
"type": "pinyin",
"keep_first_letter": true,
"keep_separate_first_letter": false,
"keep_full_pinyin": true,
"keep_original": true,
"limit_first_letter_length": 16,
"lowercase": true
}
}
}
},
"mappings": {
"properties": {
"id": {
"type": "keyword"
},
"title": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"pinyin": {
"type": "text",
"analyzer": "ik_pinyin"
}
}
},
"description": {
"type": "text",
"analyzer": "ik_max_word"
},
"brand": {
"type": "keyword"
},
"category": {
"type": "keyword"
},
"category_path": {
"type": "text",
"analyzer": "path_hierarchy_analyzer"
},
"price": {
"type": "scaled_float",
"scaling_factor": 100
},
"sales": {
"type": "integer"
},
"rating": {
"type": "half_float"
},
"stock": {
"type": "integer"
},
"tags": {
"type": "keyword"
},
"images": {
"type": "keyword",
"index": false
},
"created_at": {
"type": "date"
},
"updated_at": {
"type": "date"
},
"suggest": {
"type": "completion",
"analyzer": "ik_max_word",
"contexts": [
{
"name": "category",
"type": "category"
}
]
}
}
}
}
1.3 综合搜索查询
GET /products/_search
{
"query": {
"function_score": {
"query": {
"bool": {
"must": [
{
"multi_match": {
"query": "苹果手机",
"fields": [
"title^3", // 标题权重最高
"title.pinyin^2", // 拼音搜索
"description",
"brand^2"
],
"type": "best_fields",
"minimum_should_match": "75%",
"fuzziness": "AUTO" // 拼写纠错
}
}
],
"filter": [
{
"term": { "category": "手机数码" }
},
{
"range": {
"price": {
"gte": 3000,
"lte": 8000
}
}
},
{
"terms": { "brand": ["Apple", "华为", "小米"] }
},
{
"range": {
"stock": { "gt": 0 } // 有货
}
}
],
"should": [
{
"term": {
"tags": "5G",
"boost": 1.5
}
}
]
}
},
"functions": [
{
"filter": { "term": { "brand": "Apple" } },
"weight": 1.3
},
{
"field_value_factor": {
"field": "sales",
"factor": 0.01,
"modifier": "log1p",
"missing": 0
}
},
{
"field_value_factor": {
"field": "rating",
"factor": 1.0,
"modifier": "sqrt",
"missing": 3.0
}
},
{
"gauss": {
"created_at": {
"origin": "now",
"scale": "90d",
"offset": "7d",
"decay": 0.5
}
}
}
],
"score_mode": "sum",
"boost_mode": "multiply",
"min_score": 1
}
},
"from": 0,
"size": 20,
"sort": [
{ "_score": "desc" },
{ "sales": "desc" }
],
"highlight": {
"fields": {
"title": {
"pre_tags": ["<em>"],
"post_tags": ["</em>"]
},
"description": {}
}
},
"aggs": {
"brand_agg": {
"terms": {
"field": "brand",
"size": 10
}
},
"price_range_agg": {
"range": {
"field": "price",
"ranges": [
{ "key": "0-1000", "to": 1000 },
{ "key": "1000-3000", "from": 1000, "to": 3000 },
{ "key": "3000-5000", "from": 3000, "to": 5000 },
{ "key": "5000+", "from": 5000 }
]
}
},
"category_agg": {
"terms": {
"field": "category",
"size": 20
}
}
}
}
1.4 自动补全
// 写入数据时构建suggest字段
PUT /products/_doc/1
{
"title": "iPhone 15 Pro Max 256GB",
"price": 9999,
"suggest": {
"input": ["iPhone", "iPhone 15", "iPhone 15 Pro", "苹果手机"],
"contexts": {
"category": ["手机数码"]
}
}
}
// 自动补全查询
GET /products/_search
{
"suggest": {
"product-suggest": {
"prefix": "iph",
"completion": {
"field": "suggest",
"size": 10,
"contexts": {
"category": ["手机数码"]
},
"fuzzy": {
"fuzziness": "AUTO"
}
}
}
}
}
// 响应
{
"suggest": {
"product-suggest": [
{
"text": "iph",
"offset": 0,
"length": 3,
"options": [
{
"text": "iPhone 15 Pro",
"_score": 10.0,
"_source": {
"title": "iPhone 15 Pro Max 256GB",
"price": 9999
}
}
]
}
]
}
}
1.5 Did You Mean(拼写纠错)
GET /products/_search
{
"query": {
"match": {
"title": "ipone" // 拼写错误
}
},
"suggest": {
"text": "ipone",
"title-suggest": {
"term": {
"field": "title",
"suggest_mode": "popular", // missing/popular/always
"min_word_length": 3
}
}
}
}
// 响应建议: "iphone"
1.6 个性化推荐
GET /products/_search
{
"query": {
"function_score": {
"query": {
"match": { "category": "手机数码" }
},
"functions": [
{
"script_score": {
"script": {
"source": """
// 基于用户历史行为的个性化算分
def userPreferBrands = params.user_prefer_brands;
def score = _score;
if (userPreferBrands.contains(doc['brand'].value)) {
score = score * 1.5;
}
// 价格偏好
def userAvgPrice = params.user_avg_price;
def priceDiff = Math.abs(doc['price'].value - userAvgPrice);
score = score * (1 - priceDiff / userAvgPrice * 0.2);
return score;
""",
"params": {
"user_prefer_brands": ["Apple", "华为"],
"user_avg_price": 5000
}
}
}
}
]
}
}
}
1.7 Java实现示例
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.*;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
public class ProductSearchService {
private RestHighLevelClient client;
public SearchResponse searchProducts(String keyword, ProductSearchQuery query) {
SearchRequest request = new SearchRequest("products");
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
// 构建Bool查询
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
// 主查询
if (keyword != null && !keyword.isEmpty()) {
MultiMatchQueryBuilder multiMatch = QueryBuilders.multiMatchQuery(keyword)
.field("title", 3.0f)
.field("title.pinyin", 2.0f)
.field("description")
.field("brand", 2.0f)
.type(MultiMatchQueryBuilder.Type.BEST_FIELDS)
.minimumShouldMatch("75%")
.fuzziness(Fuzziness.AUTO);
boolQuery.must(multiMatch);
}
// 过滤条件
if (query.getCategory() != null) {
boolQuery.filter(QueryBuilders.termQuery("category", query.getCategory()));
}
if (query.getMinPrice() != null || query.getMaxPrice() != null) {
RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("price");
if (query.getMinPrice() != null) {
rangeQuery.gte(query.getMinPrice());
}
if (query.getMaxPrice() != null) {
rangeQuery.lte(query.getMaxPrice());
}
boolQuery.filter(rangeQuery);
}
if (query.getBrands() != null && !query.getBrands().isEmpty()) {
boolQuery.filter(QueryBuilders.termsQuery("brand", query.getBrands()));
}
// 有货
boolQuery.filter(QueryBuilders.rangeQuery("stock").gt(0));
// Function Score
FunctionScoreQueryBuilder functionScore = QueryBuilders.functionScoreQuery(
boolQuery,
new FunctionScoreQueryBuilder.FilterFunctionBuilder[]{
// 销量影响
new FunctionScoreQueryBuilder.FilterFunctionBuilder(
ScoreFunctionBuilders.fieldValueFactorFunction("sales")
.factor(0.01f)
.modifier(FieldValueFactorFunction.Modifier.LOG1P)
),
// 评分影响
new FunctionScoreQueryBuilder.FilterFunctionBuilder(
ScoreFunctionBuilders.fieldValueFactorFunction("rating")
.factor(1.0f)
.modifier(FieldValueFactorFunction.Modifier.SQRT)
),
// 新品加权
new FunctionScoreQueryBuilder.FilterFunctionBuilder(
ScoreFunctionBuilders.gaussDecayFunction("created_at", "now", "90d")
.setOffset("7d")
.setDecay(0.5)
)
}
).scoreMode(FunctionScoreQuery.ScoreMode.SUM)
.boostMode(CombineFunction.MULTIPLY);
sourceBuilder.query(functionScore);
sourceBuilder.from(query.getFrom());
sourceBuilder.size(query.getSize());
// 高亮
sourceBuilder.highlighter(
new HighlightBuilder()
.field("title")
.field("description")
.preTags("<em>")
.postTags("</em>")
);
// 聚合
sourceBuilder.aggregation(
AggregationBuilders.terms("brand_agg").field("brand").size(10)
);
request.source(sourceBuilder);
try {
return client.search(request, RequestOptions.DEFAULT);
} catch (IOException e) {
throw new RuntimeException("Search failed", e);
}
}
}
二、日志分析
2.1 业务场景
需求:
- 海量日志存储(TB级)
- 实时搜索(秒级延迟)
- 时间范围查询
- 聚合统计(错误率、QPS)
- 自动过期删除
2.2 索引设计
按天创建索引:
// 使用Index Template
PUT /_index_template/logs_template
{
"index_patterns": ["logs-*"],
"data_stream": {},
"template": {
"settings": {
"number_of_shards": 3,
"number_of_replicas": 1,
"refresh_interval": "10s", // 降低实时性,提升性能
"codec": "best_compression", // 压缩存储
"index.lifecycle.name": "logs_policy"
},
"mappings": {
"properties": {
"@timestamp": {
"type": "date",
"format": "yyyy-MM-dd HH:mm:ss||epoch_millis"
},
"level": {
"type": "keyword"
},
"logger": {
"type": "keyword"
},
"message": {
"type": "text",
"analyzer": "standard",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
},
"thread": {
"type": "keyword"
},
"exception": {
"type": "text"
},
"trace_id": {
"type": "keyword"
},
"user_id": {
"type": "keyword"
},
"ip": {
"type": "ip"
},
"duration": {
"type": "integer"
},
"status": {
"type": "keyword"
},
"url": {
"type": "keyword"
},
"method": {
"type": "keyword"
}
}
}
}
}
2.3 ILM策略
PUT /_ilm/policy/logs_policy
{
"policy": {
"phases": {
"hot": {
"actions": {
"rollover": {
"max_size": "50GB",
"max_age": "1d"
},
"set_priority": {
"priority": 100
}
}
},
"warm": {
"min_age": "7d",
"actions": {
"forcemerge": {
"max_num_segments": 1
},
"shrink": {
"number_of_shards": 1
},
"set_priority": {
"priority": 50
}
}
},
"delete": {
"min_age": "30d",
"actions": {
"delete": {}
}
}
}
}
}
2.4 日志查询
错误日志查询:
GET /logs-*/_search
{
"query": {
"bool": {
"must": [
{
"term": { "level": "ERROR" }
},
{
"range": {
"@timestamp": {
"gte": "now-1h"
}
}
}
]
}
},
"sort": [
{ "@timestamp": "desc" }
]
}
链路追踪:
GET /logs-*/_search
{
"query": {
"term": { "trace_id": "abc123" }
},
"sort": [
{ "@timestamp": "asc" }
]
}
慢请求分析:
GET /logs-*/_search
{
"query": {
"bool": {
"filter": [
{ "range": { "@timestamp": { "gte": "now-1h" } } },
{ "range": { "duration": { "gte": 1000 } } } // >1秒
]
}
},
"aggs": {
"slow_urls": {
"terms": {
"field": "url",
"size": 10,
"order": { "avg_duration": "desc" }
},
"aggs": {
"avg_duration": {
"avg": { "field": "duration" }
}
}
}
}
}
2.5 实时统计
QPS统计:
GET /logs-*/_search
{
"size": 0,
"query": {
"range": {
"@timestamp": {
"gte": "now-1h"
}
}
},
"aggs": {
"qps_per_minute": {
"date_histogram": {
"field": "@timestamp",
"fixed_interval": "1m"
}
}
}
}
错误率统计:
GET /logs-*/_search
{
"size": 0,
"query": {
"range": { "@timestamp": { "gte": "now-1h" } }
},
"aggs": {
"total": {
"value_count": { "field": "@timestamp" }
},
"errors": {
"filter": {
"term": { "level": "ERROR" }
}
}
}
}
// 计算错误率: errors.doc_count / total.value
TOP IP统计:
GET /logs-*/_search
{
"size": 0,
"aggs": {
"top_ips": {
"terms": {
"field": "ip",
"size": 10
}
}
}
}
2.6 Logstash配置
# logstash.conf
input {
file {
path => "/var/log/app/*.log"
start_position => "beginning"
codec => multiline {
pattern => "^\d{4}-\d{2}-\d{2}"
negate => true
what => "previous"
}
}
}
filter {
# 解析日志
grok {
match => {
"message" => "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{JAVACLASS:logger} - %{GREEDYDATA:log_message}"
}
}
# 时间解析
date {
match => ["timestamp", "yyyy-MM-dd HH:mm:ss"]
target => "@timestamp"
}
# IP地理位置
geoip {
source => "ip"
target => "geoip"
}
# 删除临时字段
mutate {
remove_field => ["timestamp"]
}
}
output {
elasticsearch {
hosts => ["http://localhost:9200"]
index => "logs-%{+YYYY.MM.dd}"
template_name => "logs_template"
}
}
三、地理位置搜索
3.1 业务场景
需求:
- 附近的人/商家/服务
- 范围搜索(圆形/矩形)
- 距离排序
- 地理聚合(按区域统计)
3.2 Mapping设计
PUT /locations
{
"mappings": {
"properties": {
"name": {
"type": "text",
"fields": {
"keyword": { "type": "keyword" }
}
},
"location": {
"type": "geo_point" // 地理位置
},
"address": {
"type": "text"
},
"category": {
"type": "keyword"
},
"rating": {
"type": "half_float"
}
}
}
}
3.3 写入数据
// 方式1:对象格式
PUT /locations/_doc/1
{
"name": "星巴克(国贸店)",
"location": {
"lat": 39.9042,
"lon": 116.4074
},
"category": "咖啡厅"
}
// 方式2:字符串格式
PUT /locations/_doc/2
{
"name": "肯德基(朝阳门店)",
"location": "39.9289,116.4372", // "lat,lon"
"category": "快餐"
}
// 方式3:数组格式
PUT /locations/_doc/3
{
"name": "必胜客(三里屯店)",
"location": [116.4551, 39.9376], // [lon, lat]
"category": "西餐"
}
// 方式4:GeoHash
PUT /locations/_doc/4
{
"name": "麦当劳(望京店)",
"location": "wx4g0e6y",
"category": "快餐"
}
3.4 地理查询
圆形范围搜索:
GET /locations/_search
{
"query": {
"bool": {
"must": {
"match_all": {}
},
"filter": {
"geo_distance": {
"distance": "5km",
"location": {
"lat": 39.9042,
"lon": 116.4074
}
}
}
}
},
"sort": [
{
"_geo_distance": {
"location": {
"lat": 39.9042,
"lon": 116.4074
},
"order": "asc",
"unit": "km"
}
}
]
}
矩形范围搜索:
GET /locations/_search
{
"query": {
"geo_bounding_box": {
"location": {
"top_left": {
"lat": 40.0,
"lon": 116.0
},
"bottom_right": {
"lat": 39.0,
"lon": 117.0
}
}
}
}
}
多边形范围搜索:
GET /locations/_search
{
"query": {
"geo_polygon": {
"location": {
"points": [
{ "lat": 40.0, "lon": 116.0 },
{ "lat": 40.0, "lon": 117.0 },
{ "lat": 39.0, "lon": 117.0 },
{ "lat": 39.0, "lon": 116.0 }
]
}
}
}
}
3.5 距离衰减排序
GET /locations/_search
{
"query": {
"function_score": {
"query": {
"match": { "category": "咖啡厅" }
},
"functions": [
{
"gauss": {
"location": {
"origin": "39.9042,116.4074", // 用户位置
"scale": "2km", // 2km处衰减到0.5
"offset": "500m", // 500米内不衰减
"decay": 0.5
}
}
},
{
"field_value_factor": {
"field": "rating",
"modifier": "sqrt"
}
}
],
"score_mode": "multiply"
}
}
}
3.6 地理聚合
按区域聚合:
GET /locations/_search
{
"size": 0,
"aggs": {
"grid": {
"geohash_grid": {
"field": "location",
"precision": 5 // GeoHash精度(1-12)
}
}
}
}
按距离分组:
GET /locations/_search
{
"size": 0,
"aggs": {
"distance_ranges": {
"geo_distance": {
"field": "location",
"origin": "39.9042,116.4074",
"unit": "km",
"ranges": [
{ "key": "0-1km", "to": 1 },
{ "key": "1-3km", "from": 1, "to": 3 },
{ "key": "3-5km", "from": 3, "to": 5 },
{ "key": "5km+", "from": 5 }
]
}
}
}
}
3.7 地理边界聚合
GET /locations/_search
{
"size": 0,
"aggs": {
"viewport": {
"geo_bounds": {
"field": "location",
"wrap_longitude": true
}
}
}
}
// 响应:返回所有点的边界框
{
"aggregations": {
"viewport": {
"bounds": {
"top_left": { "lat": 40.0, "lon": 116.0 },
"bottom_right": { "lat": 39.0, "lon": 117.0 }
}
}
}
}
四、数据同步
4.1 业务场景
需求:
- MySQL数据实时同步到ES
- 增量更新
- 数据一致性保证
- 高性能(>1万TPS)
4.2 方案对比
| 方案 | 实时性 | 性能 | 复杂度 | 适用场景 |
|---|---|---|---|---|
| Logstash JDBC | 分钟级 | 低 | 低 | 小数据量,定时同步 |
| Canal | 秒级 | 高 | 中 | 实时同步,阿里系 |
| Debezium | 秒级 | 高 | 中 | 实时同步,通用 |
| 应用双写 | 实时 | 最高 | 高 | 强一致性要求 |
4.3 Logstash JDBC同步
配置:
# jdbc.conf
input {
jdbc {
jdbc_driver_library => "/path/to/mysql-connector-java.jar"
jdbc_driver_class => "com.mysql.jdbc.Driver"
jdbc_connection_string => "jdbc:mysql://localhost:3306/shop"
jdbc_user => "root"
jdbc_password => "password"
# 定时执行(每分钟)
schedule => "* * * * *"
# 增量同步SQL
statement => "SELECT * FROM products WHERE updated_at > :sql_last_value ORDER BY updated_at ASC"
# 记录最后更新时间
use_column_value => true
tracking_column => "updated_at"
tracking_column_type => "timestamp"
# 清理字段名
clean_run => false
}
}
filter {
mutate {
remove_field => ["@version", "@timestamp"]
}
}
output {
elasticsearch {
hosts => ["http://localhost:9200"]
index => "products"
document_id => "%{id}" // 使用MySQL主键
}
stdout {
codec => json_lines
}
}
优缺点:
- 优点:简单,无需修改应用
- 缺点:非实时,删除操作无法同步
4.4 Canal实时同步
架构:
MySQL Binlog → Canal Server → Canal Client → Elasticsearch
配置MySQL:
-- 开启binlog
SET GLOBAL binlog_format = 'ROW';
SET GLOBAL binlog_row_image = 'FULL';
-- 创建Canal账号
CREATE USER 'canal'@'%' IDENTIFIED BY 'canal';
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'canal'@'%';
FLUSH PRIVILEGES;
Canal Server配置:
# instance.properties
canal.instance.master.address=127.0.0.1:3306
canal.instance.dbUsername=canal
canal.instance.dbPassword=canal
canal.instance.filter.regex=shop\\..*
Java Client:
import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.client.CanalConnectors;
import com.alibaba.otter.canal.protocol.Message;
import com.alibaba.otter.canal.protocol.CanalEntry.*;
public class CanalElasticsearchSyncClient {
private CanalConnector connector;
private RestHighLevelClient esClient;
public void start() {
connector = CanalConnectors.newSingleConnector(
new InetSocketAddress("127.0.0.1", 11111),
"example",
"",
""
);
connector.connect();
connector.subscribe("shop\\.products");
connector.rollback();
while (true) {
Message message = connector.getWithoutAck(100);
long batchId = message.getId();
int size = message.getEntries().size();
if (batchId == -1 || size == 0) {
Thread.sleep(1000);
} else {
processEntries(message.getEntries());
connector.ack(batchId);
}
}
}
private void processEntries(List<Entry> entries) {
for (Entry entry : entries) {
if (entry.getEntryType() != EntryType.ROWDATA) {
continue;
}
RowChange rowChange = RowChange.parseFrom(entry.getStoreValue());
EventType eventType = rowChange.getEventType();
for (RowData rowData : rowChange.getRowDatasList()) {
if (eventType == EventType.INSERT || eventType == EventType.UPDATE) {
syncToES(rowData.getAfterColumnsList(), eventType);
} else if (eventType == EventType.DELETE) {
deleteFromES(rowData.getBeforeColumnsList());
}
}
}
}
private void syncToES(List<Column> columns, EventType eventType) {
Map<String, Object> doc = new HashMap<>();
String id = null;
for (Column column : columns) {
if (column.getName().equals("id")) {
id = column.getValue();
}
doc.put(column.getName(), column.getValue());
}
IndexRequest request = new IndexRequest("products")
.id(id)
.source(doc);
try {
esClient.index(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
}
private void deleteFromES(List<Column> columns) {
String id = null;
for (Column column : columns) {
if (column.getName().equals("id")) {
id = column.getValue();
break;
}
}
DeleteRequest request = new DeleteRequest("products", id);
try {
esClient.delete(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
}
}
4.5 应用双写方案
事务保证:
@Service
public class ProductService {
@Autowired
private ProductMapper productMapper;
@Autowired
private ElasticsearchClient esClient;
@Transactional
public void createProduct(Product product) {
// 1. 写入MySQL
productMapper.insert(product);
// 2. 异步写入ES(避免阻塞主流程)
CompletableFuture.runAsync(() -> {
try {
IndexRequest request = new IndexRequest("products")
.id(product.getId().toString())
.source(objectMapper.writeValueAsString(product), XContentType.JSON);
esClient.index(request, RequestOptions.DEFAULT);
} catch (Exception e) {
// 写入失败,记录到失败队列
failureQueue.offer(product);
log.error("Sync to ES failed", e);
}
});
}
// 定时任务:重试失败数据
@Scheduled(fixedDelay = 60000)
public void retryFailedSync() {
Product product;
while ((product = failureQueue.poll()) != null) {
try {
syncToES(product);
} catch (Exception e) {
failureQueue.offer(product);
}
}
}
}
最终一致性:
// 定时全量对账
@Scheduled(cron = "0 0 2 * * ?") // 每天凌晨2点
public void syncCheck() {
// 1. 从MySQL查询所有ID
List<Long> mysqlIds = productMapper.selectAllIds();
// 2. 从ES查询所有ID
SearchRequest request = new SearchRequest("products");
request.source(new SearchSourceBuilder()
.query(QueryBuilders.matchAllQuery())
.fetchSource(false)
.size(10000)
);
List<Long> esIds = new ArrayList<>();
SearchResponse response = esClient.search(request, RequestOptions.DEFAULT);
for (SearchHit hit : response.getHits()) {
esIds.add(Long.parseLong(hit.getId()));
}
// 3. 对比差异
Set<Long> mysqlSet = new HashSet<>(mysqlIds);
Set<Long> esSet = new HashSet<>(esIds);
// MySQL有但ES没有:需要同步
mysqlSet.removeAll(esSet);
for (Long id : mysqlSet) {
Product product = productMapper.selectById(id);
syncToES(product);
}
// ES有但MySQL没有:需要删除
esSet.removeAll(new HashSet<>(mysqlIds));
for (Long id : esSet) {
deleteFromES(id);
}
}
五、高频面试题
如何实现拼写纠错?
答案:
- Fuzziness:模糊查询
{
"query": {
"match": {
"title": {
"query": "ipone",
"fuzziness": "AUTO"
}
}
}
}
- Suggest API:
{
"suggest": {
"text": "ipone",
"title-suggest": {
"term": {
"field": "title"
}
}
}
}
- Phrase Suggest:
{
"suggest": {
"text": "ipone 15 pro",
"title-suggest": {
"phrase": {
"field": "title"
}
}
}
}
地理位置搜索如何优化性能?
答案:
使用geo_point而非geo_shape:
- geo_point:点位置,性能高
- geo_shape:多边形,性能低
降低GeoHash精度:
{
"aggs": {
"grid": {
"geohash_grid": {
"field": "location",
"precision": 5 // 不要设太高
}
}
}
}
- 使用geo_distance filter:
{
"query": {
"bool": {
"filter": { // filter会缓存
"geo_distance": {
"distance": "5km",
"location": "39.9,116.4"
}
}
}
}
}
MySQL到ES同步如何保证一致性?
答案:
实时同步(Canal/Debezium):
- 解析Binlog,准实时同步
- 支持增删改操作
- 秒级延迟
定时对账:
- 每天全量对比MySQL和ES
- 修复不一致数据
失败重试:
- 写入ES失败记录到队列
- 定时重试
版本控制:
PUT /products/_doc/1?version=5&version_type=external
{
"title": "iPhone 15",
"updated_at": 1234567890
}
日志索引如何设计?
答案:
按时间分索引:
- 按天:logs-2024.01.15
- 按月:logs-2024.01(数据量小)
使用ILM:
- Hot(7天):频繁查询
- Warm(30天):偶尔查询
- Delete(90天):删除
优化设置:
{
"settings": {
"refresh_interval": "10s", // 降低实时性
"number_of_shards": 3,
"codec": "best_compression" // 压缩存储
}
}
- 合理Mapping:
{
"message": {
"type": "text",
"index": false // 不需要搜索的字段禁用索引
}
}
电商搜索如何排序?
答案: 综合考虑多个因素:
- 文本相关性(BM25):
{
"query": {
"multi_match": {
"query": "手机",
"fields": ["title^3", "description"]
}
}
}
业务因素(Function Score):
- 销量:销量高排前
- 评分:评分高排前
- 新品:新品优先
- 品牌:优质品牌加权
个性化:
- 用户历史偏好
- 地理位置
- 价格偏好
完整查询见1.3节示例。