HiHuo
首页
博客
手册
工具
关于
首页
博客
手册
工具
关于
  • 技术面试完全指南

    • 技术面试完全指南
    • 8年面试官告诉你:90%的简历在第一轮就被刷掉了
    • 刷了500道LeetCode,终于明白大厂算法面试到底考什么
    • 高频算法题精讲-双指针与滑动窗口
    • 03-高频算法题精讲-二分查找与排序
    • 04-高频算法题精讲-树与递归
    • 05-高频算法题精讲-图与拓扑排序
    • 06-高频算法题精讲-动态规划
    • Go面试必问:一道GMP问题,干掉90%的候选人
    • 08-数据库面试高频题
    • 09-分布式系统面试题
    • 10-Kubernetes与云原生面试题
    • 11-系统设计面试方法论
    • 前端面试高频题
    • AI 与机器学习面试题
    • 行为面试与软技能

实战场景

一、电商搜索

1.1 业务场景

需求:

  • 商品标题/描述全文检索
  • 多条件筛选(品牌、价格、类目)
  • 相关性排序(销量、评分、新品)
  • 搜索建议(自动补全)
  • 拼写纠错

1.2 Mapping设计

PUT /products
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1,
    "analysis": {
      "analyzer": {
        "ik_pinyin": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": ["lowercase", "pinyin_filter"]
        }
      },
      "filter": {
        "pinyin_filter": {
          "type": "pinyin",
          "keep_first_letter": true,
          "keep_separate_first_letter": false,
          "keep_full_pinyin": true,
          "keep_original": true,
          "limit_first_letter_length": 16,
          "lowercase": true
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "id": {
        "type": "keyword"
      },
      "title": {
        "type": "text",
        "analyzer": "ik_max_word",
        "search_analyzer": "ik_smart",
        "fields": {
          "keyword": {
            "type": "keyword",
            "ignore_above": 256
          },
          "pinyin": {
            "type": "text",
            "analyzer": "ik_pinyin"
          }
        }
      },
      "description": {
        "type": "text",
        "analyzer": "ik_max_word"
      },
      "brand": {
        "type": "keyword"
      },
      "category": {
        "type": "keyword"
      },
      "category_path": {
        "type": "text",
        "analyzer": "path_hierarchy_analyzer"
      },
      "price": {
        "type": "scaled_float",
        "scaling_factor": 100
      },
      "sales": {
        "type": "integer"
      },
      "rating": {
        "type": "half_float"
      },
      "stock": {
        "type": "integer"
      },
      "tags": {
        "type": "keyword"
      },
      "images": {
        "type": "keyword",
        "index": false
      },
      "created_at": {
        "type": "date"
      },
      "updated_at": {
        "type": "date"
      },
      "suggest": {
        "type": "completion",
        "analyzer": "ik_max_word",
        "contexts": [
          {
            "name": "category",
            "type": "category"
          }
        ]
      }
    }
  }
}

1.3 综合搜索查询

GET /products/_search
{
  "query": {
    "function_score": {
      "query": {
        "bool": {
          "must": [
            {
              "multi_match": {
                "query": "苹果手机",
                "fields": [
                  "title^3",           // 标题权重最高
                  "title.pinyin^2",    // 拼音搜索
                  "description",
                  "brand^2"
                ],
                "type": "best_fields",
                "minimum_should_match": "75%",
                "fuzziness": "AUTO"    // 拼写纠错
              }
            }
          ],
          "filter": [
            {
              "term": { "category": "手机数码" }
            },
            {
              "range": {
                "price": {
                  "gte": 3000,
                  "lte": 8000
                }
              }
            },
            {
              "terms": { "brand": ["Apple", "华为", "小米"] }
            },
            {
              "range": {
                "stock": { "gt": 0 }  // 有货
              }
            }
          ],
          "should": [
            {
              "term": {
                "tags": "5G",
                "boost": 1.5
              }
            }
          ]
        }
      },
      "functions": [
        {
          "filter": { "term": { "brand": "Apple" } },
          "weight": 1.3
        },
        {
          "field_value_factor": {
            "field": "sales",
            "factor": 0.01,
            "modifier": "log1p",
            "missing": 0
          }
        },
        {
          "field_value_factor": {
            "field": "rating",
            "factor": 1.0,
            "modifier": "sqrt",
            "missing": 3.0
          }
        },
        {
          "gauss": {
            "created_at": {
              "origin": "now",
              "scale": "90d",
              "offset": "7d",
              "decay": 0.5
            }
          }
        }
      ],
      "score_mode": "sum",
      "boost_mode": "multiply",
      "min_score": 1
    }
  },
  "from": 0,
  "size": 20,
  "sort": [
    { "_score": "desc" },
    { "sales": "desc" }
  ],
  "highlight": {
    "fields": {
      "title": {
        "pre_tags": ["<em>"],
        "post_tags": ["</em>"]
      },
      "description": {}
    }
  },
  "aggs": {
    "brand_agg": {
      "terms": {
        "field": "brand",
        "size": 10
      }
    },
    "price_range_agg": {
      "range": {
        "field": "price",
        "ranges": [
          { "key": "0-1000", "to": 1000 },
          { "key": "1000-3000", "from": 1000, "to": 3000 },
          { "key": "3000-5000", "from": 3000, "to": 5000 },
          { "key": "5000+", "from": 5000 }
        ]
      }
    },
    "category_agg": {
      "terms": {
        "field": "category",
        "size": 20
      }
    }
  }
}

1.4 自动补全

// 写入数据时构建suggest字段
PUT /products/_doc/1
{
  "title": "iPhone 15 Pro Max 256GB",
  "price": 9999,
  "suggest": {
    "input": ["iPhone", "iPhone 15", "iPhone 15 Pro", "苹果手机"],
    "contexts": {
      "category": ["手机数码"]
    }
  }
}

// 自动补全查询
GET /products/_search
{
  "suggest": {
    "product-suggest": {
      "prefix": "iph",
      "completion": {
        "field": "suggest",
        "size": 10,
        "contexts": {
          "category": ["手机数码"]
        },
        "fuzzy": {
          "fuzziness": "AUTO"
        }
      }
    }
  }
}

// 响应
{
  "suggest": {
    "product-suggest": [
      {
        "text": "iph",
        "offset": 0,
        "length": 3,
        "options": [
          {
            "text": "iPhone 15 Pro",
            "_score": 10.0,
            "_source": {
              "title": "iPhone 15 Pro Max 256GB",
              "price": 9999
            }
          }
        ]
      }
    ]
  }
}

1.5 Did You Mean(拼写纠错)

GET /products/_search
{
  "query": {
    "match": {
      "title": "ipone"  // 拼写错误
    }
  },
  "suggest": {
    "text": "ipone",
    "title-suggest": {
      "term": {
        "field": "title",
        "suggest_mode": "popular",  // missing/popular/always
        "min_word_length": 3
      }
    }
  }
}

// 响应建议: "iphone"

1.6 个性化推荐

GET /products/_search
{
  "query": {
    "function_score": {
      "query": {
        "match": { "category": "手机数码" }
      },
      "functions": [
        {
          "script_score": {
            "script": {
              "source": """
                // 基于用户历史行为的个性化算分
                def userPreferBrands = params.user_prefer_brands;
                def score = _score;
                if (userPreferBrands.contains(doc['brand'].value)) {
                  score = score * 1.5;
                }
                // 价格偏好
                def userAvgPrice = params.user_avg_price;
                def priceDiff = Math.abs(doc['price'].value - userAvgPrice);
                score = score * (1 - priceDiff / userAvgPrice * 0.2);
                return score;
              """,
              "params": {
                "user_prefer_brands": ["Apple", "华为"],
                "user_avg_price": 5000
              }
            }
          }
        }
      ]
    }
  }
}

1.7 Java实现示例

import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.index.query.*;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;

public class ProductSearchService {

    private RestHighLevelClient client;

    public SearchResponse searchProducts(String keyword, ProductSearchQuery query) {
        SearchRequest request = new SearchRequest("products");
        SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();

        // 构建Bool查询
        BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();

        // 主查询
        if (keyword != null && !keyword.isEmpty()) {
            MultiMatchQueryBuilder multiMatch = QueryBuilders.multiMatchQuery(keyword)
                .field("title", 3.0f)
                .field("title.pinyin", 2.0f)
                .field("description")
                .field("brand", 2.0f)
                .type(MultiMatchQueryBuilder.Type.BEST_FIELDS)
                .minimumShouldMatch("75%")
                .fuzziness(Fuzziness.AUTO);
            boolQuery.must(multiMatch);
        }

        // 过滤条件
        if (query.getCategory() != null) {
            boolQuery.filter(QueryBuilders.termQuery("category", query.getCategory()));
        }

        if (query.getMinPrice() != null || query.getMaxPrice() != null) {
            RangeQueryBuilder rangeQuery = QueryBuilders.rangeQuery("price");
            if (query.getMinPrice() != null) {
                rangeQuery.gte(query.getMinPrice());
            }
            if (query.getMaxPrice() != null) {
                rangeQuery.lte(query.getMaxPrice());
            }
            boolQuery.filter(rangeQuery);
        }

        if (query.getBrands() != null && !query.getBrands().isEmpty()) {
            boolQuery.filter(QueryBuilders.termsQuery("brand", query.getBrands()));
        }

        // 有货
        boolQuery.filter(QueryBuilders.rangeQuery("stock").gt(0));

        // Function Score
        FunctionScoreQueryBuilder functionScore = QueryBuilders.functionScoreQuery(
            boolQuery,
            new FunctionScoreQueryBuilder.FilterFunctionBuilder[]{
                // 销量影响
                new FunctionScoreQueryBuilder.FilterFunctionBuilder(
                    ScoreFunctionBuilders.fieldValueFactorFunction("sales")
                        .factor(0.01f)
                        .modifier(FieldValueFactorFunction.Modifier.LOG1P)
                ),
                // 评分影响
                new FunctionScoreQueryBuilder.FilterFunctionBuilder(
                    ScoreFunctionBuilders.fieldValueFactorFunction("rating")
                        .factor(1.0f)
                        .modifier(FieldValueFactorFunction.Modifier.SQRT)
                ),
                // 新品加权
                new FunctionScoreQueryBuilder.FilterFunctionBuilder(
                    ScoreFunctionBuilders.gaussDecayFunction("created_at", "now", "90d")
                        .setOffset("7d")
                        .setDecay(0.5)
                )
            }
        ).scoreMode(FunctionScoreQuery.ScoreMode.SUM)
         .boostMode(CombineFunction.MULTIPLY);

        sourceBuilder.query(functionScore);
        sourceBuilder.from(query.getFrom());
        sourceBuilder.size(query.getSize());

        // 高亮
        sourceBuilder.highlighter(
            new HighlightBuilder()
                .field("title")
                .field("description")
                .preTags("<em>")
                .postTags("</em>")
        );

        // 聚合
        sourceBuilder.aggregation(
            AggregationBuilders.terms("brand_agg").field("brand").size(10)
        );

        request.source(sourceBuilder);

        try {
            return client.search(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            throw new RuntimeException("Search failed", e);
        }
    }
}

二、日志分析

2.1 业务场景

需求:

  • 海量日志存储(TB级)
  • 实时搜索(秒级延迟)
  • 时间范围查询
  • 聚合统计(错误率、QPS)
  • 自动过期删除

2.2 索引设计

按天创建索引:

// 使用Index Template
PUT /_index_template/logs_template
{
  "index_patterns": ["logs-*"],
  "data_stream": {},
  "template": {
    "settings": {
      "number_of_shards": 3,
      "number_of_replicas": 1,
      "refresh_interval": "10s",  // 降低实时性,提升性能
      "codec": "best_compression",  // 压缩存储
      "index.lifecycle.name": "logs_policy"
    },
    "mappings": {
      "properties": {
        "@timestamp": {
          "type": "date",
          "format": "yyyy-MM-dd HH:mm:ss||epoch_millis"
        },
        "level": {
          "type": "keyword"
        },
        "logger": {
          "type": "keyword"
        },
        "message": {
          "type": "text",
          "analyzer": "standard",
          "fields": {
            "keyword": {
              "type": "keyword",
              "ignore_above": 256
            }
          }
        },
        "thread": {
          "type": "keyword"
        },
        "exception": {
          "type": "text"
        },
        "trace_id": {
          "type": "keyword"
        },
        "user_id": {
          "type": "keyword"
        },
        "ip": {
          "type": "ip"
        },
        "duration": {
          "type": "integer"
        },
        "status": {
          "type": "keyword"
        },
        "url": {
          "type": "keyword"
        },
        "method": {
          "type": "keyword"
        }
      }
    }
  }
}

2.3 ILM策略

PUT /_ilm/policy/logs_policy
{
  "policy": {
    "phases": {
      "hot": {
        "actions": {
          "rollover": {
            "max_size": "50GB",
            "max_age": "1d"
          },
          "set_priority": {
            "priority": 100
          }
        }
      },
      "warm": {
        "min_age": "7d",
        "actions": {
          "forcemerge": {
            "max_num_segments": 1
          },
          "shrink": {
            "number_of_shards": 1
          },
          "set_priority": {
            "priority": 50
          }
        }
      },
      "delete": {
        "min_age": "30d",
        "actions": {
          "delete": {}
        }
      }
    }
  }
}

2.4 日志查询

错误日志查询:

GET /logs-*/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "term": { "level": "ERROR" }
        },
        {
          "range": {
            "@timestamp": {
              "gte": "now-1h"
            }
          }
        }
      ]
    }
  },
  "sort": [
    { "@timestamp": "desc" }
  ]
}

链路追踪:

GET /logs-*/_search
{
  "query": {
    "term": { "trace_id": "abc123" }
  },
  "sort": [
    { "@timestamp": "asc" }
  ]
}

慢请求分析:

GET /logs-*/_search
{
  "query": {
    "bool": {
      "filter": [
        { "range": { "@timestamp": { "gte": "now-1h" } } },
        { "range": { "duration": { "gte": 1000 } } }  // >1秒
      ]
    }
  },
  "aggs": {
    "slow_urls": {
      "terms": {
        "field": "url",
        "size": 10,
        "order": { "avg_duration": "desc" }
      },
      "aggs": {
        "avg_duration": {
          "avg": { "field": "duration" }
        }
      }
    }
  }
}

2.5 实时统计

QPS统计:

GET /logs-*/_search
{
  "size": 0,
  "query": {
    "range": {
      "@timestamp": {
        "gte": "now-1h"
      }
    }
  },
  "aggs": {
    "qps_per_minute": {
      "date_histogram": {
        "field": "@timestamp",
        "fixed_interval": "1m"
      }
    }
  }
}

错误率统计:

GET /logs-*/_search
{
  "size": 0,
  "query": {
    "range": { "@timestamp": { "gte": "now-1h" } }
  },
  "aggs": {
    "total": {
      "value_count": { "field": "@timestamp" }
    },
    "errors": {
      "filter": {
        "term": { "level": "ERROR" }
      }
    }
  }
}

// 计算错误率: errors.doc_count / total.value

TOP IP统计:

GET /logs-*/_search
{
  "size": 0,
  "aggs": {
    "top_ips": {
      "terms": {
        "field": "ip",
        "size": 10
      }
    }
  }
}

2.6 Logstash配置

# logstash.conf
input {
  file {
    path => "/var/log/app/*.log"
    start_position => "beginning"
    codec => multiline {
      pattern => "^\d{4}-\d{2}-\d{2}"
      negate => true
      what => "previous"
    }
  }
}

filter {
  # 解析日志
  grok {
    match => {
      "message" => "%{TIMESTAMP_ISO8601:timestamp} %{LOGLEVEL:level} %{JAVACLASS:logger} - %{GREEDYDATA:log_message}"
    }
  }

  # 时间解析
  date {
    match => ["timestamp", "yyyy-MM-dd HH:mm:ss"]
    target => "@timestamp"
  }

  # IP地理位置
  geoip {
    source => "ip"
    target => "geoip"
  }

  # 删除临时字段
  mutate {
    remove_field => ["timestamp"]
  }
}

output {
  elasticsearch {
    hosts => ["http://localhost:9200"]
    index => "logs-%{+YYYY.MM.dd}"
    template_name => "logs_template"
  }
}

三、地理位置搜索

3.1 业务场景

需求:

  • 附近的人/商家/服务
  • 范围搜索(圆形/矩形)
  • 距离排序
  • 地理聚合(按区域统计)

3.2 Mapping设计

PUT /locations
{
  "mappings": {
    "properties": {
      "name": {
        "type": "text",
        "fields": {
          "keyword": { "type": "keyword" }
        }
      },
      "location": {
        "type": "geo_point"  // 地理位置
      },
      "address": {
        "type": "text"
      },
      "category": {
        "type": "keyword"
      },
      "rating": {
        "type": "half_float"
      }
    }
  }
}

3.3 写入数据

// 方式1:对象格式
PUT /locations/_doc/1
{
  "name": "星巴克(国贸店)",
  "location": {
    "lat": 39.9042,
    "lon": 116.4074
  },
  "category": "咖啡厅"
}

// 方式2:字符串格式
PUT /locations/_doc/2
{
  "name": "肯德基(朝阳门店)",
  "location": "39.9289,116.4372",  // "lat,lon"
  "category": "快餐"
}

// 方式3:数组格式
PUT /locations/_doc/3
{
  "name": "必胜客(三里屯店)",
  "location": [116.4551, 39.9376],  // [lon, lat]
  "category": "西餐"
}

// 方式4:GeoHash
PUT /locations/_doc/4
{
  "name": "麦当劳(望京店)",
  "location": "wx4g0e6y",
  "category": "快餐"
}

3.4 地理查询

圆形范围搜索:

GET /locations/_search
{
  "query": {
    "bool": {
      "must": {
        "match_all": {}
      },
      "filter": {
        "geo_distance": {
          "distance": "5km",
          "location": {
            "lat": 39.9042,
            "lon": 116.4074
          }
        }
      }
    }
  },
  "sort": [
    {
      "_geo_distance": {
        "location": {
          "lat": 39.9042,
          "lon": 116.4074
        },
        "order": "asc",
        "unit": "km"
      }
    }
  ]
}

矩形范围搜索:

GET /locations/_search
{
  "query": {
    "geo_bounding_box": {
      "location": {
        "top_left": {
          "lat": 40.0,
          "lon": 116.0
        },
        "bottom_right": {
          "lat": 39.0,
          "lon": 117.0
        }
      }
    }
  }
}

多边形范围搜索:

GET /locations/_search
{
  "query": {
    "geo_polygon": {
      "location": {
        "points": [
          { "lat": 40.0, "lon": 116.0 },
          { "lat": 40.0, "lon": 117.0 },
          { "lat": 39.0, "lon": 117.0 },
          { "lat": 39.0, "lon": 116.0 }
        ]
      }
    }
  }
}

3.5 距离衰减排序

GET /locations/_search
{
  "query": {
    "function_score": {
      "query": {
        "match": { "category": "咖啡厅" }
      },
      "functions": [
        {
          "gauss": {
            "location": {
              "origin": "39.9042,116.4074",  // 用户位置
              "scale": "2km",   // 2km处衰减到0.5
              "offset": "500m", // 500米内不衰减
              "decay": 0.5
            }
          }
        },
        {
          "field_value_factor": {
            "field": "rating",
            "modifier": "sqrt"
          }
        }
      ],
      "score_mode": "multiply"
    }
  }
}

3.6 地理聚合

按区域聚合:

GET /locations/_search
{
  "size": 0,
  "aggs": {
    "grid": {
      "geohash_grid": {
        "field": "location",
        "precision": 5  // GeoHash精度(1-12)
      }
    }
  }
}

按距离分组:

GET /locations/_search
{
  "size": 0,
  "aggs": {
    "distance_ranges": {
      "geo_distance": {
        "field": "location",
        "origin": "39.9042,116.4074",
        "unit": "km",
        "ranges": [
          { "key": "0-1km", "to": 1 },
          { "key": "1-3km", "from": 1, "to": 3 },
          { "key": "3-5km", "from": 3, "to": 5 },
          { "key": "5km+", "from": 5 }
        ]
      }
    }
  }
}

3.7 地理边界聚合

GET /locations/_search
{
  "size": 0,
  "aggs": {
    "viewport": {
      "geo_bounds": {
        "field": "location",
        "wrap_longitude": true
      }
    }
  }
}

// 响应:返回所有点的边界框
{
  "aggregations": {
    "viewport": {
      "bounds": {
        "top_left": { "lat": 40.0, "lon": 116.0 },
        "bottom_right": { "lat": 39.0, "lon": 117.0 }
      }
    }
  }
}

四、数据同步

4.1 业务场景

需求:

  • MySQL数据实时同步到ES
  • 增量更新
  • 数据一致性保证
  • 高性能(>1万TPS)

4.2 方案对比

方案实时性性能复杂度适用场景
Logstash JDBC分钟级低低小数据量,定时同步
Canal秒级高中实时同步,阿里系
Debezium秒级高中实时同步,通用
应用双写实时最高高强一致性要求

4.3 Logstash JDBC同步

配置:

# jdbc.conf
input {
  jdbc {
    jdbc_driver_library => "/path/to/mysql-connector-java.jar"
    jdbc_driver_class => "com.mysql.jdbc.Driver"
    jdbc_connection_string => "jdbc:mysql://localhost:3306/shop"
    jdbc_user => "root"
    jdbc_password => "password"

    # 定时执行(每分钟)
    schedule => "* * * * *"

    # 增量同步SQL
    statement => "SELECT * FROM products WHERE updated_at > :sql_last_value ORDER BY updated_at ASC"

    # 记录最后更新时间
    use_column_value => true
    tracking_column => "updated_at"
    tracking_column_type => "timestamp"

    # 清理字段名
    clean_run => false
  }
}

filter {
  mutate {
    remove_field => ["@version", "@timestamp"]
  }
}

output {
  elasticsearch {
    hosts => ["http://localhost:9200"]
    index => "products"
    document_id => "%{id}"  // 使用MySQL主键
  }

  stdout {
    codec => json_lines
  }
}

优缺点:

  • 优点:简单,无需修改应用
  • 缺点:非实时,删除操作无法同步

4.4 Canal实时同步

架构:

MySQL Binlog → Canal Server → Canal Client → Elasticsearch

配置MySQL:

-- 开启binlog
SET GLOBAL binlog_format = 'ROW';
SET GLOBAL binlog_row_image = 'FULL';

-- 创建Canal账号
CREATE USER 'canal'@'%' IDENTIFIED BY 'canal';
GRANT SELECT, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'canal'@'%';
FLUSH PRIVILEGES;

Canal Server配置:

# instance.properties
canal.instance.master.address=127.0.0.1:3306
canal.instance.dbUsername=canal
canal.instance.dbPassword=canal
canal.instance.filter.regex=shop\\..*

Java Client:

import com.alibaba.otter.canal.client.CanalConnector;
import com.alibaba.otter.canal.client.CanalConnectors;
import com.alibaba.otter.canal.protocol.Message;
import com.alibaba.otter.canal.protocol.CanalEntry.*;

public class CanalElasticsearchSyncClient {

    private CanalConnector connector;
    private RestHighLevelClient esClient;

    public void start() {
        connector = CanalConnectors.newSingleConnector(
            new InetSocketAddress("127.0.0.1", 11111),
            "example",
            "",
            ""
        );

        connector.connect();
        connector.subscribe("shop\\.products");
        connector.rollback();

        while (true) {
            Message message = connector.getWithoutAck(100);
            long batchId = message.getId();
            int size = message.getEntries().size();

            if (batchId == -1 || size == 0) {
                Thread.sleep(1000);
            } else {
                processEntries(message.getEntries());
                connector.ack(batchId);
            }
        }
    }

    private void processEntries(List<Entry> entries) {
        for (Entry entry : entries) {
            if (entry.getEntryType() != EntryType.ROWDATA) {
                continue;
            }

            RowChange rowChange = RowChange.parseFrom(entry.getStoreValue());
            EventType eventType = rowChange.getEventType();

            for (RowData rowData : rowChange.getRowDatasList()) {
                if (eventType == EventType.INSERT || eventType == EventType.UPDATE) {
                    syncToES(rowData.getAfterColumnsList(), eventType);
                } else if (eventType == EventType.DELETE) {
                    deleteFromES(rowData.getBeforeColumnsList());
                }
            }
        }
    }

    private void syncToES(List<Column> columns, EventType eventType) {
        Map<String, Object> doc = new HashMap<>();
        String id = null;

        for (Column column : columns) {
            if (column.getName().equals("id")) {
                id = column.getValue();
            }
            doc.put(column.getName(), column.getValue());
        }

        IndexRequest request = new IndexRequest("products")
            .id(id)
            .source(doc);

        try {
            esClient.index(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    private void deleteFromES(List<Column> columns) {
        String id = null;
        for (Column column : columns) {
            if (column.getName().equals("id")) {
                id = column.getValue();
                break;
            }
        }

        DeleteRequest request = new DeleteRequest("products", id);
        try {
            esClient.delete(request, RequestOptions.DEFAULT);
        } catch (IOException e) {
            e.printStackTrace();
        }
    }
}

4.5 应用双写方案

事务保证:

@Service
public class ProductService {

    @Autowired
    private ProductMapper productMapper;

    @Autowired
    private ElasticsearchClient esClient;

    @Transactional
    public void createProduct(Product product) {
        // 1. 写入MySQL
        productMapper.insert(product);

        // 2. 异步写入ES(避免阻塞主流程)
        CompletableFuture.runAsync(() -> {
            try {
                IndexRequest request = new IndexRequest("products")
                    .id(product.getId().toString())
                    .source(objectMapper.writeValueAsString(product), XContentType.JSON);
                esClient.index(request, RequestOptions.DEFAULT);
            } catch (Exception e) {
                // 写入失败,记录到失败队列
                failureQueue.offer(product);
                log.error("Sync to ES failed", e);
            }
        });
    }

    // 定时任务:重试失败数据
    @Scheduled(fixedDelay = 60000)
    public void retryFailedSync() {
        Product product;
        while ((product = failureQueue.poll()) != null) {
            try {
                syncToES(product);
            } catch (Exception e) {
                failureQueue.offer(product);
            }
        }
    }
}

最终一致性:

// 定时全量对账
@Scheduled(cron = "0 0 2 * * ?")  // 每天凌晨2点
public void syncCheck() {
    // 1. 从MySQL查询所有ID
    List<Long> mysqlIds = productMapper.selectAllIds();

    // 2. 从ES查询所有ID
    SearchRequest request = new SearchRequest("products");
    request.source(new SearchSourceBuilder()
        .query(QueryBuilders.matchAllQuery())
        .fetchSource(false)
        .size(10000)
    );

    List<Long> esIds = new ArrayList<>();
    SearchResponse response = esClient.search(request, RequestOptions.DEFAULT);
    for (SearchHit hit : response.getHits()) {
        esIds.add(Long.parseLong(hit.getId()));
    }

    // 3. 对比差异
    Set<Long> mysqlSet = new HashSet<>(mysqlIds);
    Set<Long> esSet = new HashSet<>(esIds);

    // MySQL有但ES没有:需要同步
    mysqlSet.removeAll(esSet);
    for (Long id : mysqlSet) {
        Product product = productMapper.selectById(id);
        syncToES(product);
    }

    // ES有但MySQL没有:需要删除
    esSet.removeAll(new HashSet<>(mysqlIds));
    for (Long id : esSet) {
        deleteFromES(id);
    }
}

五、高频面试题

如何实现拼写纠错?

答案:

  1. Fuzziness:模糊查询
{
  "query": {
    "match": {
      "title": {
        "query": "ipone",
        "fuzziness": "AUTO"
      }
    }
  }
}
  1. Suggest API:
{
  "suggest": {
    "text": "ipone",
    "title-suggest": {
      "term": {
        "field": "title"
      }
    }
  }
}
  1. Phrase Suggest:
{
  "suggest": {
    "text": "ipone 15 pro",
    "title-suggest": {
      "phrase": {
        "field": "title"
      }
    }
  }
}

地理位置搜索如何优化性能?

答案:

  1. 使用geo_point而非geo_shape:

    • geo_point:点位置,性能高
    • geo_shape:多边形,性能低
  2. 降低GeoHash精度:

{
  "aggs": {
    "grid": {
      "geohash_grid": {
        "field": "location",
        "precision": 5  // 不要设太高
      }
    }
  }
}
  1. 使用geo_distance filter:
{
  "query": {
    "bool": {
      "filter": {  // filter会缓存
        "geo_distance": {
          "distance": "5km",
          "location": "39.9,116.4"
        }
      }
    }
  }
}

MySQL到ES同步如何保证一致性?

答案:

  1. 实时同步(Canal/Debezium):

    • 解析Binlog,准实时同步
    • 支持增删改操作
    • 秒级延迟
  2. 定时对账:

    • 每天全量对比MySQL和ES
    • 修复不一致数据
  3. 失败重试:

    • 写入ES失败记录到队列
    • 定时重试
  4. 版本控制:

PUT /products/_doc/1?version=5&version_type=external
{
  "title": "iPhone 15",
  "updated_at": 1234567890
}

日志索引如何设计?

答案:

  1. 按时间分索引:

    • 按天:logs-2024.01.15
    • 按月:logs-2024.01(数据量小)
  2. 使用ILM:

    • Hot(7天):频繁查询
    • Warm(30天):偶尔查询
    • Delete(90天):删除
  3. 优化设置:

{
  "settings": {
    "refresh_interval": "10s",  // 降低实时性
    "number_of_shards": 3,
    "codec": "best_compression"  // 压缩存储
  }
}
  1. 合理Mapping:
{
  "message": {
    "type": "text",
    "index": false  // 不需要搜索的字段禁用索引
  }
}

电商搜索如何排序?

答案: 综合考虑多个因素:

  1. 文本相关性(BM25):
{
  "query": {
    "multi_match": {
      "query": "手机",
      "fields": ["title^3", "description"]
    }
  }
}
  1. 业务因素(Function Score):

    • 销量:销量高排前
    • 评分:评分高排前
    • 新品:新品优先
    • 品牌:优质品牌加权
  2. 个性化:

    • 用户历史偏好
    • 地理位置
    • 价格偏好

完整查询见1.3节示例。