CREATE TABLE `employees` (`id` int(11) NOT NULL AUTO_INCREMENT,`name` varchar(24) NOT NULL DEFAULT '' COMMENT '姓名',`age` int(11) NOT NULL DEFAULT '0' COMMENT '年龄',`position` varchar(20) NOT NULL DEFAULT '' COMMENT '职位',`hire_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '入职时间',PRIMARY KEY (`id`),KEY `idx_name_age_position` (`name`,`age`,`position`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8 COMMENT='员工记录表';INSERT INTO employees(name,age,position,hire_time) VALUES('LiLei',22,'manager',NOW());
INSERT INTO employees(name,age,position,hire_time) VALUES('HanMeimei', 23,'dev',NOW());
INSERT INTO employees(name,age,position,hire_time) VALUES('Lucy',23,'dev',NOW());-- 插入一些示例数据
drop procedure if exists insert_emp;
delimiter //
create procedure insert_emp()
begindeclare i int; set i=1; while(i<=100000)do insert into employees(name,age,position) values(CONCAT('zhuge',i),i,'dev'); set i=i+1; end while;
end //
delimiter ;
call insert_emp();
如果上面的存储过程报错,那就不用理会了,用navicat工具自动生成10w+的数据就行。在下面的的案例中,表的数据量,也会影响MySQL底层对于索引策略的选择!

表字段

索引情况

EXPLAIN SELECT * FROM employees
WHERE name > 'LiLei' AND age = 22 AND position ='manager';
联合索引第一个字段就用范围查找不会走索引,MySQL内部可能觉得第一个字段就用范围,结果集应该很大,回表效率不高,还不如就全表扫描。
我们不妨做个实验
EXPLAIN SELECT * FROM employees
WHERE name = 'LiLei' AND age > 22 AND position ='manager';EXPLAIN SELECT * FROM employees
WHERE name = 'LiLei' AND age = 22 AND position >'manager';
用了两个字段索引

用了三个字段索引

EXPLAIN SELECT * FROM employees force index(idx_name_age_position)
WHERE name > 'LiLei' AND age = 22 AND position ='manager';
虽然使用了强制走索引让联合索引第一个字段范围查找也走索引,扫描的行rows看上去也少了点,但是最终查找效率不一定比全表扫描高,因为回表效率不高。
不妨看看下面的这个例子
如果是MySQL8.0以下的版本,MySQL还有缓存,所以要先将其关闭!
MySQL8.0以上的可以忽略!
-- 关闭查询缓存,8.0以上版本没有这个东西了!
set global query_cache_size=0;
set global query_cache_type=0;
执行结果如下,反而走全表扫描的查询效率更高!(尽管扫描行数更多)
-- 执行时间0.081s,不走索引,扫描行数:99977
SELECT * FROM employees WHERE name > 'LiLei';-- 执行时间0.176s,强制走索引,扫描行数:49988
SELECT * FROM employees force index(idx_name_age_position) WHERE name > 'LiLei';-- 如果使用覆盖索引,我们会发现执行效率更快!!!
-- 执行时间0.036s,扫描行数:49988
SELECT name,age,position FROM employees WHERE name > 'LiLei'
EXPLAIN SELECT name,age,position FROM employees
WHERE name > 'LiLei' AND age = 22 AND position ='manager';

所以一般情况下,我们遇到上述的这些问题,直接使用覆盖索引就好了!这也是为什么阿里巴巴手册上写着,慎用 “ SELECT * ” 的原因了!
EXPLAIN SELECT * FROM employees
WHERE name in ('LiLei','HanMeimei','Lucy')
AND age = 22 AND position ='manager';
大体的的意思就是数据量太大了,走索引的效率会高一点;但是如果数据量很小,那直接全表扫描可能效率更高。
我们之前有提到过这样的一张表

-- ALL
EXPLAIN SELECT * FROM employees
WHERE name > 'LiLei' AND age = 22 AND position ='manager';-- range (不管数据量大小都会走索引)
EXPLAIN SELECT * FROM employees
WHERE name like 'LiLei%' AND age = 22 AND position ='manager';
EXPLAIN SELECT * FROM employees_copy
WHERE name like 'LiLei%' AND age = 22 AND position ='manager';
这里主要是因为发生了索引下推!
索引下推(Index Condition Pushdown,ICP), 是MySQL5.6版本之后才有的东西。like KK%其实就是用到了索引下推优化。
对于辅助的联合索引(name,age,position),正常情况按照最左前缀原则
SELECT * FROM employees
WHERE name like 'LiLei%' AND age = 22 AND position ='manager';
这种情况只会走name字段索引,因为根据name字段过滤完,得到的索引行里的age和position是无序的,无法很好的利用索引。
在MySQL5.6之前的版本,这个查询只能在联合索引里匹配到名字是 'LiLei' 开头的索引,然后拿这些索引对应的主键逐个回表,到主键索引上找出相应的记录,再比对age和position这两个字段的值是否符合。
MySQL 5.6引入了索引下推优化,可以在索引遍历过程中,对索引中包含的所有字段先做判断,过滤掉不符合条件的记录之后再回表,可以有效的减少回表次数!!!
使用了索引下推优化后,上面那个查询在联合索引里匹配到名字是 'LiLei' 开头的索引之后,同时还会在索引里过滤age和position这两个字段,拿着过滤完剩下的索引对应的主键id再回表查整行数据。
所以这里的key_len是140,表示这3个字段的索引都有用到!!!

索引下推会减少回表次数,对于innodb引擎的表索引下推只能用于二级索引,innodb的主键索引(聚簇索引)树叶子节点上保存的是全行数据,所以这个时候索引下推并不会起到减少查询全行数据的效果。
估计应该是MySQL认为范围查找过滤的结果集过大,like KK% 在绝大多数情况来看,过滤后的结果集比较小,所以这里MySQL选择给 like KK% 用了索引下推优化(不管数据量是大还是小!)
我们先来看看下面的这几条SQL
-- 执行时间 0.121s ALL
EXPLAIN SELECT * FROM employees WHERE name > 'a';

-- 执行时间 0.070s range
EXPLAIN SELECT name,age,position FROM employees WHERE name > 'a';

如果用name索引需要遍历name字段联合索引树,然后还需要根据遍历出来的主键值(如果符合条件的值很多),再去主键索引树里再去查出最终数据。这样做的成本比全表扫描还高,可以用覆盖索引优化,这样只需要遍历name字段的联合索引树就能拿到所有结果。
从两者的执行时间上来看,也可以发现走覆盖索引的效率要高得多!
再看看这一条SQL
不难发现它与第一条SQL相比,只是判断条件从“ name > 'a' ” 变成了 “ name > 'z' ”,但是后者居然就可以走索引了!
-- 执行时间 0.021s range
EXPLAIN SELECT * FROM employees WHERE name > 'zzz';

对于上面这两种 name>'a' 和 name>'zzz' 的执行结果,mysql最终如何选择索引,我们可以用trace工具来查看!
我们先要开启trace
-- 开启trace
set session optimizer_trace="enabled=on",end_markers_in_json=on; -- 关闭trace
set session optimizer_trace="enabled=off";
将下面的两条语句选中,一起执行!
select * from employees where name > 'a' order by position;
SELECT * FROM information_schema.OPTIMIZER_TRACE;

使用trace工具,我们可以清晰的看到SQL执行的三个阶段。为了方便阅读,我们将这一整个JSON拆成3个片段。
第一阶段:SQL准备阶段,格式化sql
第二阶段:SQL优化阶段
第三阶段:SQL执行阶段
"join_preparation": { // 第一阶段:SQL准备阶段,格式化sql"select#": 1,"steps": [{"expanded_query": "/* select#1 */ select `employees`.`id` AS `id`,`employees`.`name` AS `name`,`employees`.`age` AS `age`,`employees`.`position` AS `position`,`employees`.`hire_time` AS `hire_time` from `employees` where (`employees`.`name` > 'a') order by `employees`.`position`"}] /* steps */
} /* join_preparation */
"join_optimization": { // 第二阶段:SQL优化阶段"select#": 1,"steps": [{"condition_processing": { // 条件处理"condition": "WHERE","original_condition": "(`employees`.`name` > 'a')","steps": [{"transformation": "equality_propagation","resulting_condition": "(`employees`.`name` > 'a')"},{"transformation": "constant_propagation","resulting_condition": "(`employees`.`name` > 'a')"},{"transformation": "trivial_condition_removal","resulting_condition": "(`employees`.`name` > 'a')"}] /* steps */} /* condition_processing */},{"substitute_generated_columns": {} /* substitute_generated_columns */},{"table_dependencies": [ // 表依赖详情{"table": "`employees`","row_may_be_null": false,"map_bit": 0,"depends_on_map_bits": [] /* depends_on_map_bits */}] /* table_dependencies */},{"ref_optimizer_key_uses": [] /* ref_optimizer_key_uses */},{"rows_estimation": [ // 预估表的访问成本{"table": "`employees`", "range_analysis": {"table_scan": { // 全表扫描情况"rows": 99977, // 扫描行数"cost": 10104.1 // 查询成本} /* table_scan */,"potential_range_indexes": [ // 查询可能使用的索引{"index": "PRIMARY", // 主键索引"usable": false,"cause": "not_applicable"},{"index": "idx_name_age_position", // 辅助索引"usable": true,"key_parts": ["name","age","position","id"] /* key_parts */}] /* potential_range_indexes */,"setup_range_conditions": [] /* setup_range_conditions */,"group_index_range": {"chosen": false,"cause": "not_group_by_or_distinct"} /* group_index_range */,"skip_scan_range": {"potential_skip_scan_indexes": [{"index": "idx_name_age_position","usable": false,"cause": "query_references_nonkey_column"}] /* potential_skip_scan_indexes */} /* skip_scan_range */,"analyzing_range_alternatives": { // 分析各个索引使用成本"range_scan_alternatives": [{"index": "idx_name_age_position","ranges": ["'a' < name" // 索引使用范围] /* ranges */,"index_dives_for_eq_ranges": true,"rowid_ordered": false, // 使用该索引获取的记录是否按照主键排序"using_mrr": false, "index_only": false, // 是否使用覆盖索引"in_memory": 1, "rows": 49988, // 索引扫描行数"cost": 17496.1, // 索引使用成本"chosen": false, // 是否选择该索引"cause": "cost"}] /* range_scan_alternatives */,"analyzing_roworder_intersect": {"usable": false,"cause": "too_few_roworder_scans"} /* analyzing_roworder_intersect */} /* analyzing_range_alternatives */} /* range_analysis */}] /* rows_estimation */},{"considered_execution_plans": [{"plan_prefix": [] /* plan_prefix */,"table": "`employees`","best_access_path": { // 最优访问路径"considered_access_paths": [ // 最终选择的访问路径{"rows_to_scan": 99977,"access_type": "scan", // 访问类型:为scan,全表扫描"resulting_rows": 99977,"cost": 10102,"chosen": true, // "use_tmp_table": true}] /* considered_access_paths */} /* best_access_path */,"condition_filtering_pct": 100,"rows_for_plan": 99977,"cost_for_plan": 10102,"sort_cost": 99977,"new_cost_for_plan": 110079,"chosen": true}] /* considered_execution_plans */},{"attaching_conditions_to_tables": {"original_condition": "(`employees`.`name` > 'a')","attached_conditions_computation": [] /* attached_conditions_computation */,"attached_conditions_summary": [{"table": "`employees`","attached": "(`employees`.`name` > 'a')"}] /* attached_conditions_summary */} /* attaching_conditions_to_tables */},{"optimizing_distinct_group_by_order_by": {"simplifying_order_by": {"original_clause": "`employees`.`position`","items": [{"item": "`employees`.`position`"}] /* items */,"resulting_clause_is_simple": true,"resulting_clause": "`employees`.`position`"} /* simplifying_order_by */} /* optimizing_distinct_group_by_order_by */},{"reconsidering_access_paths_for_index_ordering": {"clause": "ORDER BY","steps": [] /* steps */,"index_order_summary": {"table": "`employees`","index_provides_order": false,"order_direction": "undefined","index": "unknown","plan_changed": false} /* index_order_summary */} /* reconsidering_access_paths_for_index_ordering */},{"finalizing_table_conditions": [{"table": "`employees`","original_table_condition": "(`employees`.`name` > 'a')","final_table_condition ": "(`employees`.`name` > 'a')"}] /* finalizing_table_conditions */},{"refine_plan": [{"table": "`employees`"}] /* refine_plan */},{"considering_tmp_tables": [{"adding_sort_to_table": "employees"} /* filesort */] /* considering_tmp_tables */}] /* steps */
} /* join_optimization */
"join_execution": {"select#": 1,"steps": [{"sorting_table": "employees","filesort_information": [{"direction": "asc","expression": "`employees`.`position`"}] /* filesort_information */,"filesort_priority_queue_optimization": {"usable": false,"cause": "not applicable (no LIMIT)"} /* filesort_priority_queue_optimization */,"filesort_execution": [] /* filesort_execution */,"filesort_summary": {"memory_available": 262144,"key_size": 40,"row_size": 190,"max_rows_per_buffer": 1379,"num_rows_estimate": 99977,"num_rows_found": 100003,"num_initial_chunks_spilled_to_disk": 35,"peak_memory_used": 271736,"sort_algorithm": "std::stable_sort","sort_mode": ""} /* filesort_summary */}] /* steps */
} /* join_execution */
所以我们这边得出的结论:
全表扫描的成本低于索引扫描,所以mysql最终选择全表扫描
通过上面的JSON数据,我们就可以知道MySQL底层计算出来的策略。
有一些SQL语句,我们可能觉得它应该要走索引,但是实际上并没有,我们就可以通过trace,这个工具来看看SQL底层的执行逻辑,从而对其做出优化策略!