Spark - sql 소스 코드 해석 의 4 Optimizer: analyzed logical plan – > optimized logical plan
66887 단어 SparkSQL1.4.1상세 하 게 해석 하 다
Optimizer Analyzer Resolved Logical Plan Batch, , (Logical Plan) (Expression), 。 analyzer , batch Rule[LogicalPlan] 。
object DefaultOptimizer extends Optimizer { val batches = // SubQueries are only needed for analysis and can be removed before execution. Batch("Remove SubQueries", FixedPoint(100), EliminateSubQueries) :: Batch("Operator Reordering", FixedPoint(100), UnionPushdown, // Filter Pushdown CombineFilters, // , , // select * from (select a,b from table) where a=1 // select * from (select a,b from table where a=1) PushPredicateThroughProject, // select a,b from x where x.a > 0 join (select * from y where y.b >0) on x.id = y.id PushPredicateThroughJoin, PushPredicateThroughGenerate, // ColumnPruning, ProjectCollapsing, // limit /* * select * from (select * from c_picrecord limit 100)a limit 10 * Limit if ((100 < 10)) 100 else 10 Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,ca * */ CombineLimits) :: Batch("ConstantFolding", FixedPoint(100),// // null // NullPropagation Expression Expressions Literal , NULL SQL 。 NullPropagation, OptimizeIn, // , 1 in (1,2) true // Expression , , , : /* * plan-> Project [(((1 + 2) + 3) + 4) AS c0#46] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42,car_numandcolor#43,compare_id#44L,compare_speed#45] org.apache.spark.sql.parquet.ParquetRelation2@6813b117 result-> Project [10 AS c0#46] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42,car_numandcolor#43,compare_id#44L,compare_speed#45] org.apache.spark.sql.parquet.ParquetRelation2@6813b117 rule-> org.apache.spark.sql.catalyst.optimizer.ConstantFolding$@5c99cab8 * * */ ConstantFolding, // like , , '%demo%', '%demo','demo*','demo' LikeSimplification, // , true and score > 0 score > 0 BooleanSimplification, // filter, where 1=1 where 1=2, , SimplifyFilters, // , , SimplifyCasts, // , Upper(Upper('a')) Upper('a') SimplifyCaseConversionExpressions) :: Batch("Decimal Optimizations", FixedPoint(100), DecimalAggregates) :: Batch("LocalRelation", FixedPoint(100), ConvertToLocalRelation) :: Nil }
Rule[LogicalPlan]
4.1 EliminateSubQueries
하위 검색 제거object EliminateSubQueries extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case Subquery(_, child) => child } }
사실은 Subquery 를 지우 고 chcild 로 바 꾸 는 거 예요.
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = "SELECT id,dev_chnid,dev_chnname,car_num,car_speed,car_direct from test";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Project [id#0L,dev_chnid#26,dev_chnname#4,car_num#5,car_speed#8,car_direct#12] Subquery test Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Project [id#0L,dev_chnid#26,dev_chnname#4,car_num#5,car_speed#8,car_direct#12]// Subquery Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.analysis.EliminateSubQueries$@7032a1b4
4.2 UnionPushdown
연산 자 를 유 니 온 양쪽 에 놓다.object UnionPushdown extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { // Push down filter into union case Filter(condition, u @ Union(left, right)) =>// filter union val rewrites = buildRewrites(u) Union( Filter(condition, left), Filter(pushToRight(condition, rewrites), right)) // Push down projection into union case Project(projectList, u @ Union(left, right)) =>// project union val rewrites = buildRewrites(u) Union( Project(projectList, left), Project(projectList.map(pushToRight(_, rewrites)), right)) } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = "select * from (select id from test union all select id from test)aa";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Project [id#0L] Union Project [id#0L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 Project [id#0L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Union// project union Project [id#0L] Project [id#0L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 Project [id#0L] Project [id#0L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.UnionPushdown$@1173be4f
4.3 CombineFilters
인접 한 filter 2 개 합치 기object CombineFilters extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case ff @ Filter(fc, nf @ Filter(nc, grandChild)) => Filter(And(nc, fc), grandChild)// 2 filter } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = "select id from (select id from test where id >100)a where id > 80";
로그 인쇄 는 다음 과 같 습 니 다:16-07-22 16:12:16,201 INFO org.apache.spark.sql.catalyst.optimizer.DefaultOptimizer(Logging.scala:59) ## plan-> Project Filter (id#0L > CAST(80, LongType)) Filter (id#0L > CAST(100, LongType)) Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Project Filter ((id#0L > CAST(100, LongType)) && (id#0L > CAST(80, LongType))) Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.CombineFilters$@99569a7
4.4 PushPredicateThroughProject
filter 연산 자 를 Project 에 넣 으 면 먼저 필 터 를 한 다음 선택 합 니 다.object PushPredicateThroughProject extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case filter @ Filter(condition, project @ Project(fields, grandChild)) => val sourceAliases = fields.collect { case a @ Alias(c, _) => (a.toAttribute: Attribute) -> c }.toMap project.copy(child = filter.copy(// filter project replaceAlias(condition, sourceAliases), grandChild)) } def replaceAlias(condition: Expression, sourceAliases: Map[Attribute, Expression]): Expression = { condition transform { case a: AttributeReference => sourceAliases.getOrElse(a, a) } } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = "select * from (select id,car_speed from test)aa where id=1";
로그 인쇄 는 다음 과 같 습 니 다:16-07-22 16:28:16,850 INFO org.apache.spark.sql.catalyst.optimizer.DefaultOptimizer(Logging.scala:59) ## plan-> Project [id#0L,car_speed#8] Filter (id#0L = CAST(1, LongType)) Project [id#0L,car_speed#8] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Project [id#0L,car_speed#8] Project [id#0L,car_speed#8] Filter (id#0L = CAST(1, LongType))// filter Project Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.PushPredicateThroughProject$@7678c6b4
4.5 PushPredicateThroughJoin
아래 filter 를 join 의 왼쪽 과 오른쪽으로 밀어 주세요.object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { /** * Splits join condition expressions into three categories based on the attributes required * to evaluate them. * @return (canEvaluateInLeft, canEvaluateInRight, haveToEvaluateInBoth) */ private def split(condition: Seq[Expression], left: LogicalPlan, right: LogicalPlan) = { val (leftEvaluateCondition, rest) = condition.partition(_.references subsetOf left.outputSet) val (rightEvaluateCondition, commonCondition) = rest.partition(_.references subsetOf right.outputSet) (leftEvaluateCondition, rightEvaluateCondition, commonCondition) } def apply(plan: LogicalPlan): LogicalPlan = plan transform { // push the where condition down into join filter case f @ Join(left, right, joinType, joinCondition) => val (leftJoinConditions, rightJoinConditions, commonJoinCondition) = split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right) joinType match { case _ @ (Inner | LeftSemi) => // push down the single side only join filter for both sides sub queries val newLeft = leftJoinConditions. reduceLeftOption(And).map(Filter(_, left)).getOrElse(left) val newRight = rightJoinConditions. reduceLeftOption(And).map(Filter(_, right)).getOrElse(right) val newJoinCond = commonJoinCondition.reduceLeftOption(And) Join(newLeft, newRight, joinType, newJoinCond)// join …… } } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select a.id , b.id from test a join test b on a.id = b.id and a.id > 0 and b.id > 0";
로그 인쇄 는 다음 과 같 습 니 다:16-07-22 17:07:21,526 INFO org.apache.spark.sql.catalyst.optimizer.DefaultOptimizer(Logging.scala:59) ## plan-> Project [id#0L,id#43L] Join Inner, Some((((id#0L = id#43L) && (id#0L > CAST(0, LongType))) && (id#43L > CAST(0, LongType)))) Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 Relation[id#43L,dev_id#44,dev_chnnum#45L,dev_name#46,dev_chnname#47,car_num#48,car_numtype#49,car_numcolor#50,car_speed#51,car_type#52,car_color#53,car_length#54L,car_direct#55,car_way_code#56,cap_time#57L,cap_date#58L,inf_note#59,max_speed#60,min_speed#61,car_img_url#62,car_img1_url#63,car_img2_url#64,car_img3_url#65,car_img4_url#66,car_img5_url#67,rec_stat#68,dev_chnid#69,car_img_count#70,save_flag#71,dc_cleanflag#72,pic_id#73,car_img_plate_top#74L,car_img_plate_left#75L,car_img_plate_bottom#76L,car_img_plate_right#77L,car_brand#78L,issafetybelt#79,isvisor#80,bind_stat#81,car_num_pic#82,combined_pic_url#83,verify_memo#84,rec_stat_tmp#85] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Aggregate [COUNT(1) AS count#86L] Project [id#0L,id#43L] Join Inner, Some((id#0L = id#43L)) Filter (id#0L > CAST(0, LongType))// , join Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 Filter (id#43L > CAST(0, LongType)) // , join Relation[id#43L,dev_id#44,dev_chnnum#45L,dev_name#46,dev_chnname#47,car_num#48,car_numtype#49,car_numcolor#50,car_speed#51,car_type#52,car_color#53,car_length#54L,car_direct#55,car_way_code#56,cap_time#57L,cap_date#58L,inf_note#59,max_speed#60,min_speed#61,car_img_url#62,car_img1_url#63,car_img2_url#64,car_img3_url#65,car_img4_url#66,car_img5_url#67,rec_stat#68,dev_chnid#69,car_img_count#70,save_flag#71,dc_cleanflag#72,pic_id#73,car_img_plate_top#74L,car_img_plate_left#75L,car_img_plate_bottom#76L,car_img_plate_right#77L,car_brand#78L,issafetybelt#79,isvisor#80,bind_stat#81,car_num_pic#82,combined_pic_url#83,verify_memo#84,rec_stat_tmp#85] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.PushPredicateThroughJoin$@6593bce2
4.6 ColumnPruning
재단 열object ColumnPruning extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { // Eliminate attributes that are not needed to calculate the specified aggregates. // select 1+1 as a1, dev_chnid from (select dev_chnid, id from test)a group by dev_chnid case a @ Aggregate(_, _, child) if (child.outputSet -- a.references).nonEmpty => a.copy(child = Project(a.references.toSeq, child))//child , // select dev_chnid from (select dev_chnid,MAX(id) from test group by dev_chnid)aa case p @ Project(projectList, a @ Aggregate(groupingExpressions, aggregateExpressions, child)) if (a.outputSet -- p.references).nonEmpty =>// Project Project( projectList, Aggregate( groupingExpressions, aggregateExpressions.filter(e => p.references.contains(e)), child)) // Eliminate unneeded attributes from either side of a Join. //select ai,bi from (select a.id as ai,a.car_speed,b.id as bi,b.car_speed from test a join test b on a.id = b.id)aa case Project(projectList, Join(left, right, joinType, condition)) =>// join Project // Collect the list of all references required either above or to evaluate the condition. val allReferences: AttributeSet = AttributeSet( projectList.flatMap(_.references.iterator)) ++ condition.map(_.references).getOrElse(AttributeSet(Seq.empty)) /** Applies a projection only when the child is producing unnecessary attributes */ def pruneJoinChild(c: LogicalPlan): LogicalPlan = prunedChild(c, allReferences) Project(projectList, Join(pruneJoinChild(left), pruneJoinChild(right), joinType, condition)) // Eliminate unneeded attributes from right side of a LeftSemiJoin. //select a.*,b.id from test a left join test b on a.id=b.id case Join(left, right, LeftSemi, condition) =>// // Collect the list of all references required to evaluate the condition. val allReferences: AttributeSet = condition.map(_.references).getOrElse(AttributeSet(Seq.empty)) Join(left, prunedChild(right, allReferences), LeftSemi, condition) //select id from (select * from test limit 10)aa case Project(projectList, Limit(exp, child)) =>//limit , Project Limit(exp, Project(projectList, child)) // push down project if possible when the child is sort //select * from (select * from test order by id)aa case p @ Project(projectList, s @ Sort(_, _, grandChild))// sort Project , Sort if s.references.subsetOf(p.outputSet) => s.copy(child = Project(projectList, grandChild)) // Eliminate no-op Projects case Project(projectList, child) if child.output == projectList => child// op project }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = "SELECT 1+1 as a1, dev_chnid from (select dev_chnid, id from test)a group by dev_chnid";
로그 인쇄 는 다음 과 같 습 니 다:16-07-23 13:56:17,721 INFO org.apache.spark.sql.catalyst.optimizer.DefaultOptimizer(Logging.scala:59) ## plan-> Aggregate [dev_chnid#26], [(1 + 1) AS a1#43,dev_chnid#26] Project [dev_chnid#26,id#0L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Aggregate [dev_chnid#26], [(1 + 1) AS a1#43,dev_chnid#26] Project [dev_chnid#26]// Project [dev_chnid#26,id#0L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.ColumnPruning$@58b18d61
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select dev_chnid from (select dev_chnid,MAX(id) from test group by dev_chnid)aa";
로그 인쇄 는 다음 과 같 습 니 다:16-07-23 14:26:37,430 INFO org.apache.spark.sql.catalyst.optimizer.DefaultOptimizer(Logging.scala:59) ## plan-> Project [dev_chnid#26] Aggregate [dev_chnid#26], [dev_chnid#26,MAX(id#0L) AS c1#46L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Project [dev_chnid#26] Aggregate [dev_chnid#26], [dev_chnid#26]// Project Project [dev_chnid#26] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.ColumnPruning$@70b908c4
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = "select ai,bi from (select a.id as ai,a.car_speed,b.id as bi,b.car_speed from test a join test b on a.id = b.id)aa";
로그 인쇄 는 다음 과 같 습 니 다:16-07-23 14:51:05,757 INFO org.apache.spark.sql.catalyst.optimizer.DefaultOptimizer(Logging.scala:59) ## plan-> Project [id#0L AS ai#49L,id#51L AS bi#50L] Join Inner, Some((id#0L = id#51L)) Project [id#0L,car_speed#8] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 Project [id#51L,car_speed#59] Relation[id#51L,dev_id#52,dev_chnnum#53L,dev_name#54,dev_chnname#55,car_num#56,car_numtype#57,car_numcolor#58,car_speed#59,car_type#60,car_color#61,car_length#62L,car_direct#63,car_way_code#64,cap_time#65L,cap_date#66L,inf_note#67,max_speed#68,min_speed#69,car_img_url#70,car_img1_url#71,car_img2_url#72,car_img3_url#73,car_img4_url#74,car_img5_url#75,rec_stat#76,dev_chnid#77,car_img_count#78,save_flag#79,dc_cleanflag#80,pic_id#81,car_img_plate_top#82L,car_img_plate_left#83L,car_img_plate_bottom#84L,car_img_plate_right#85L,car_brand#86L,issafetybelt#87,isvisor#88,bind_stat#89,car_num_pic#90,combined_pic_url#91,verify_memo#92,rec_stat_tmp#93] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Project [id#0L AS ai#49L,id#51L AS bi#50L] Join Inner, Some((id#0L = id#51L)) Project [id#0L] // Project [id#0L,car_speed#8] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 Project [id#51L] // Project [id#51L,car_speed#59] Relation[id#51L,dev_id#52,dev_chnnum#53L,dev_name#54,dev_chnname#55,car_num#56,car_numtype#57,car_numcolor#58,car_speed#59,car_type#60,car_color#61,car_length#62L,car_direct#63,car_way_code#64,cap_time#65L,cap_date#66L,inf_note#67,max_speed#68,min_speed#69,car_img_url#70,car_img1_url#71,car_img2_url#72,car_img3_url#73,car_img4_url#74,car_img5_url#75,rec_stat#76,dev_chnid#77,car_img_count#78,save_flag#79,dc_cleanflag#80,pic_id#81,car_img_plate_top#82L,car_img_plate_left#83L,car_img_plate_bottom#84L,car_img_plate_right#85L,car_brand#86L,issafetybelt#87,isvisor#88,bind_stat#89,car_num_pic#90,combined_pic_url#91,verify_memo#92,rec_stat_tmp#93] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.ColumnPruning$@3b244fea
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select a.*,b.id from test a left join test b on a.id=b.id";
로그 인쇄 는 다음 과 같 습 니 다:16-07-23 15:13:23,208 INFO org.apache.spark.sql.catalyst.optimizer.DefaultOptimizer(Logging.scala:59) ## plan-> Project [id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42,id#43L] Join LeftOuter, Some((id#0L = id#43L)) Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 Relation[id#43L,dev_id#44,dev_chnnum#45L,dev_name#46,dev_chnname#47,car_num#48,car_numtype#49,car_numcolor#50,car_speed#51,car_type#52,car_color#53,car_length#54L,car_direct#55,car_way_code#56,cap_time#57L,cap_date#58L,inf_note#59,max_speed#60,min_speed#61,car_img_url#62,car_img1_url#63,car_img2_url#64,car_img3_url#65,car_img4_url#66,car_img5_url#67,rec_stat#68,dev_chnid#69,car_img_count#70,save_flag#71,dc_cleanflag#72,pic_id#73,car_img_plate_top#74L,car_img_plate_left#75L,car_img_plate_bottom#76L,car_img_plate_right#77L,car_brand#78L,issafetybelt#79,isvisor#80,bind_stat#81,car_num_pic#82,combined_pic_url#83,verify_memo#84,rec_stat_tmp#85] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Project [id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42,id#43L] Join LeftOuter, Some((id#0L = id#43L)) Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 Project [id#43L] // Relation[id#43L,dev_id#44,dev_chnnum#45L,dev_name#46,dev_chnname#47,car_num#48,car_numtype#49,car_numcolor#50,car_speed#51,car_type#52,car_color#53,car_length#54L,car_direct#55,car_way_code#56,cap_time#57L,cap_date#58L,inf_note#59,max_speed#60,min_speed#61,car_img_url#62,car_img1_url#63,car_img2_url#64,car_img3_url#65,car_img4_url#66,car_img5_url#67,rec_stat#68,dev_chnid#69,car_img_count#70,save_flag#71,dc_cleanflag#72,pic_id#73,car_img_plate_top#74L,car_img_plate_left#75L,car_img_plate_bottom#76L,car_img_plate_right#77L,car_brand#78L,issafetybelt#79,isvisor#80,bind_stat#81,car_num_pic#82,combined_pic_url#83,verify_memo#84,rec_stat_tmp#85] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.ColumnPruning$@354ebd72
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = "select id from (select * from test limit 10)aa";
로그 인쇄 는 다음 과 같 습 니 다:16-07-23 15:21:26,190 INFO org.apache.spark.sql.catalyst.optimizer.DefaultOptimizer(Logging.scala:59) ## plan-> Project [id#0L] Limit 10 Project[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Limit 10//limit Project [id#0L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.ColumnPruning$@20a8022
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select * from (select * from test order by id)aa";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Project [id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] Sort [id#0L ASC], true Project [id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Sort [id#0L ASC], true//Sort id , Sort Project [id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.ColumnPruning$@1aa7d923
4.7 ProjectCollapsing
접 기 Project 는 말 그대로 인접 한 Project 를 하나의 Project 로 통합 하 는 것 이다.object ProjectCollapsing extends Rule[LogicalPlan] { /** Returns true if any expression in projectList is non-deterministic. */ private def hasNondeterministic(projectList: Seq[NamedExpression]): Boolean = { projectList.exists(expr => expr.find(!_.deterministic).isDefined) } def apply(plan: LogicalPlan): LogicalPlan = plan transformUp { // We only collapse these two Projects if the child Project's expressions are all // deterministic. case Project(projectList1, Project(projectList2, child)) if !hasNondeterministic(projectList2) => // Create a map of Aliases to their values from the child projection. // e.g., 'SELECT ... FROM (SELECT a + b AS c, d ...)' produces Map(c -> Alias(a + b, c)). val aliasMap = AttributeMap(projectList2.collect { case a @ Alias(e, _) => (a.toAttribute, a) }) // Substitute any attributes that are produced by the child projection, so that we safely // eliminate it. // e.g., 'SELECT c + 1 FROM (SELECT a + b AS C ...' produces 'SELECT a + b + 1 ...' // TODO: Fix TransformBase to avoid the cast below. val substitutedProjection = projectList1.map(_.transform { case a: Attribute if aliasMap.contains(a) => aliasMap(a) }).asInstanceOf[Seq[NamedExpression]] Project(substitutedProjection, child)// 2 Project } }
例如sql语句如下:
String sql = "select id from (select id,dev_chnid from test)aa";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Project [id#0L] Project [id#0L,dev_chnid#26] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Project [id#0L]// 2 Project Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.ProjectCollapsing$@6fe94a75
4.8 CombineLimits
인접 한 limit 2 개 합치 기object CombineLimits extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { ////ll Limit,le expression, nl ll grandChild,ne nl expression case ll @ Limit(le, nl @ Limit(ne, grandChild)) => Limit(If(LessThan(ne, le), ne, le), grandChild)//expression , ne le ne, le } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select id from (select id,dev_chnid from test limit 100)aa limit 10";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Limit 10 Limit 100 Project [id#0L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Limit if ((100 < 10)) 100 else 10// 2 Limit Project [id#0L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.CombineLimits$@35fb4cf5
4.9 NullPropagation
null 표현 식 바 꾸 기object NullPropagation extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsUp { case e @ Count(Literal(null, _)) => Cast(Literal(0L), e.dataType)// count(null) count(0) …… }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select COUNT(null) from test";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Aggregate [COUNT(null) AS c0#43L] Project Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Aggregate [CAST(0, LongType) AS c0#43L]//null 0 Project Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.NullPropagation$@44b3ac1f
4.10 OptimizeIn
최적화 In 연산 자object OptimizeIn extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsDown { case In(v, list) if !list.exists(!_.isInstanceOf[Literal]) => val hSet = list.map(e => e.eval(null)) InSet(v, HashSet() ++ hSet) } } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = "select * from test where id IN(1,2)";
로그 인쇄 는 다음 과 같 습 니 다:Filter id#0L IN (1,2) Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Filter id#0L INSET (1,2)// IN INSET Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.OptimizeIn$@33944f6b
4.11 ConstantFolding
병합 상수object ConstantFolding extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform {// plan transform case q: LogicalPlan => q transformExpressionsDown { // plan expression transform // Skip redundant folding of literals. This rule is technically not necessary. Placing this // here avoids running the next rule for Literal values, which would create a new Literal // object and running eval unnecessarily. case l: Literal => l // Fold expressions that are foldable. // [ret : 10 | e : (((1 + 2) + 3) + 4) | e.class : class org.apache.spark.sql.catalyst.expressions.Add | e.dataType : (((1 + 2) + 3) + 4).dataType] // logInfo(s"[ret : $ret | e : $e | e.class : $eclass | e.dataType : $e.dataType]") case e if e.foldable => { e.getClass() Literal.create(e.eval(null), e.dataType) }// eval // Fold "literal in (item1, item2, ..., literal, ...)" into true directly. case In(Literal(v, _), list) if list.exists { case Literal(candidate, _) if candidate == v => true case _ => false } => Literal.create(true, BooleanType) } } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select 1+2+3+id from test";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Project [(CAST(((1 + 2) + 3), LongType) + id#0L) AS c0#43L] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Project [(6 + id#0L) AS c0#43L]// 1+2+3=6 Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.ConstantFolding$@4e8a45cd
4.12 LikeSimplification
Like 표현 식 간소화object LikeSimplification extends Rule[LogicalPlan] { // if guards below protect from escapes on trailing %. // Cases like "something\%" are not optimized, but this does not affect correctness. val startsWith = "([^_%]+)%".r val endsWith = "%([^_%]+)".r val contains = "%([^_%]+)%".r val equalTo = "([^_%]*)".r def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { case Like(l, Literal(utf, StringType)) => utf.toString match { case startsWith(pattern) if !pattern.endsWith("\\") => StartsWith(l, Literal(pattern)) case endsWith(pattern) => EndsWith(l, Literal(pattern)) case contains(pattern) if !pattern.endsWith("\\") => Contains(l, Literal(pattern)) case equalTo(pattern) => EqualTo(l, Literal(pattern)) case _ => Like(l, Literal.create(utf, StringType)) } } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select * from test where car_num LIKE 'N%'";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Filter (car_num#5 LIKE N%) Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Filter StartsWith(car_num#5, N)// N% StartsWith Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.LikeSimplification$@37638d4f
4.13 BooleanSimplification
Boolean 표현 식 을 간소화 합 니 다. 즉, Boolean 표현 식 의 값 을 미리 알 수 있다 면 다른 부분 은 계산 하지 않 습 니 다.object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsUp { case and @ And(left, right) => (left, right) match { // true && r => r case (Literal(true, BooleanType), r) => r// true, // l && true => l case (l, Literal(true, BooleanType)) => l// true, // false && r => false case (Literal(false, BooleanType), _) => Literal(false)// 1 false, false // l && false => false case (_, Literal(false, BooleanType)) => Literal(false) // 1 false, false // a && a => a case (l, r) if l fastEquals r => l// , 1 // (a || b) && (a || c) => a || (b && c) case _ => …… }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select * from test where id > 0 and id > 0 and 1 > 0";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Filter (((id#0L > 0) && (id#0L > 0)) && true) Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Filter (id#0L > 0) Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.BooleanSimplification$@48c9076b
4.14 SimplifyFilters
필터 식 간소화object SimplifyFilters extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { // If the filter condition always evaluate to true, remove the filter. case Filter(Literal(true, BooleanType), child) => child// Filter true, Filter // If the filter condition always evaluate to null or false, // replace the input with an empty relation. case Filter(Literal(null, _), child) => LocalRelation(child.output, data = Seq.empty) // Filter false, Filter case Filter(Literal(false, BooleanType), child) => LocalRelation(child.output, data = Seq.empty) } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select * from test where 1 > 0";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Filter true Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result->// Filter Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.SimplifyFilters$@47fce61b
4.15 SimplifyCasts
Cast 를 간소화 하고 데이터 형식 과 변환 할 유형 이 일치 하면 Cast 를 제거 합 니 다.object SimplifyCasts extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { case Cast(e, dataType) if e.dataType == dataType => e// , Cast } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select CAST(dev_chnid as String) from test";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Project [CAST(dev_chnid#26, StringType) AS c0#43] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Project [dev_chnid#26 AS c0#43]// dev_chnid String, Cast Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.SimplifyCasts$@2b287e24
4.16 SimplifyCaseConversionExpressions
대소 문자 표현 식 간소화object SimplifyCaseConversionExpressions extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsUp { case Upper(Upper(child)) => Upper(child) case Upper(Lower(child)) => Upper(child) case Lower(Upper(child)) => Lower(child) case Lower(Lower(child)) => Lower(child) } } }
예 를 들 어 sql 문 구 는 다음 과 같다.String sql = " select UPPER(LOWER(dev_chnid)) from test ";
로그 인쇄 는 다음 과 같 습 니 다:plan-> Project [Upper(Lower(dev_chnid#26)) AS c0#43] Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 result-> Project [Upper(dev_chnid#26) AS c0#43]// Relation[id#0L,dev_id#1,dev_chnnum#2L,dev_name#3,dev_chnname#4,car_num#5,car_numtype#6,car_numcolor#7,car_speed#8,car_type#9,car_color#10,car_length#11L,car_direct#12,car_way_code#13,cap_time#14L,cap_date#15L,inf_note#16,max_speed#17,min_speed#18,car_img_url#19,car_img1_url#20,car_img2_url#21,car_img3_url#22,car_img4_url#23,car_img5_url#24,rec_stat#25,dev_chnid#26,car_img_count#27,save_flag#28,dc_cleanflag#29,pic_id#30,car_img_plate_top#31L,car_img_plate_left#32L,car_img_plate_bottom#33L,car_img_plate_right#34L,car_brand#35L,issafetybelt#36,isvisor#37,bind_stat#38,car_num_pic#39,combined_pic_url#40,verify_memo#41,rec_stat_tmp#42] org.apache.spark.sql.parquet.ParquetRelation2@2a400010 rule->org.apache.spark.sql.catalyst.optimizer.SimplifyCaseConversionExpressions$@1a09681
이 내용에 흥미가 있습니까?
현재 기사가 여러분의 문제를 해결하지 못하는 경우 AI 엔진은 머신러닝 분석(스마트 모델이 방금 만들어져 부정확한 경우가 있을 수 있음)을 통해 가장 유사한 기사를 추천합니다:
Spark Streaming의 통계 소켓 단어 수1. socket 단어 수 통계 TCP 소켓의 데이터 서버에서 수신한 텍스트 데이터의 단어 수입니다. 2. maven 설정 3. 프로그래밍 코드 입력 내용 결과 내보내기...
텍스트를 자유롭게 공유하거나 복사할 수 있습니다.하지만 이 문서의 URL은 참조 URL로 남겨 두십시오.
CC BY-SA 2.5, CC BY-SA 3.0 및 CC BY-SA 4.0에 따라 라이센스가 부여됩니다.