华为云服务器GaussDB(DWS)性能调优:大表关联(结果集较小)引起的性能瓶颈问题案例_云淘科技
1、【问题描述】
CNG-IT去HANA性能优化过程中,存在部分SQL语句大表关联慢的情况,并且关联后结果集数据行数降低幅度大。
2、【原始SQL】
SELECT PERIOD_MONTH,
PERIOD_ID,
AATP_BIG_MODEL_EXTERNAL,
LAG(SUM(CURR_AATP_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL ORDER BY PERIOD_ID) AS CURR_AATP_QTY,
SUM(AATP_QTY) AS AATP_QTY,
SUM(SHIPMENT_QTY) AS SHIPMENT_QTY,
SUM(INV_QTY) AS INV_QTY,
SUM(WIP_QTY) AS WIP_QTY,
SUM(SUM(AATP_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL,PERIOD_MONTH ORDER BY PERIOD_ID) AS AATP_CUM_QTY,
SUM(SUM(SHIPMENT_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL,PERIOD_MONTH ORDER BY PERIOD_ID) AS SHIPMENT_CUM_QTY,
SUM(SUM(INV_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL,PERIOD_MONTH ORDER BY PERIOD_ID) AS INV_CUM_QTY,
LAG(SUM(AATP_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL,PERIOD_MONTH ORDER BY PERIOD_ID) AS AATP_LAG_QTY,
LAG(SUM(SHIPMENT_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL,PERIOD_MONTH ORDER BY PERIOD_ID) AS SHIPMENT_LAG_QTY,
LAG(SUM(AATP_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL ORDER BY PERIOD_ID) AS AATP_HIS_LAG_QTY,
LAG(SUM(SHIPMENT_QTY)) OVER(PARTITION BY AATP_BIG_MODEL_EXTERNAL ORDER BY PERIOD_ID) AS SHIPMENT_HIS_LAG_QTY
FROM (
SELECT PERIOD_MONTH,PERIOD_ID,
PR.BIG_MODEL_EXTERNAL AS AATP_BIG_MODEL_EXTERNAL,
PR.PROD_BIG_MODEL AS AATP_BIG_MODEL,
PR.PROD_EN_NAME AS AATP_PROD_EN_NAME,
PR.PROD_SPEC AS AATP_PROD_SPEC,
PR.PROD_COORDINATOR AS AATP_PROD_COORDINATOR,
PR.PS_SKU_NUMBER AS AATP_PS_SKU_NUMBER,
PR.SKU_NAME AS AATP_SKU_NAME,
PR.PROD_COLOUR AS AATP_PROD_COLOUR,
SUM(CURR_AATP_QTY) AS CURR_AATP_QTY,
SUM(AATP_QTY) AS AATP_QTY,
SUM(SHIPMENT_QTY) AS SHIPMENT_QTY,
SUM(INV_QTY) AS INV_QTY,
SUM(WIP_QTY) AS WIP_QTY,
SUM(AATP_QTY_HIS) AS AATP_QTY_HIS,
SUM(SHIPMENT_QTY_HIS) AS SHIPMENT_QTY_HIS
FROM (
SELECT CAST(TO_CHAR(ADDDATE(ADDDATE(R.BUCKET_DATE,-WEEKDAY(R.BUCKET_DATE)),3),'YYYYMM') AS DECIMAL(28,0)) AS PERIOD_MONTH,
CASE WHEN 'WK' = 'DY' AND T.RN = 1
THEN CAST(TO_CHAR(ADDDATE(R.BUCKET_DATE,6-WEEKDAY(R.BUCKET_DATE)),'YYYYMMDD') AS DECIMAL(28,0))
WHEN 'WK' = 'DY' AND T.RN > 1
THEN CAST(TO_CHAR(R.BUCKET_DATE,'YYYYMMDD') AS DECIMAL(28,0))
ELSE CAST(TO_CHAR(ADDDATE(R.BUCKET_DATE,-WEEKDAY(R.BUCKET_DATE)),'YYYYMMDD') AS DECIMAL(28,0)) END AS PERIOD_ID,
R.PS_SKU_NUMBER,
0 AS CURR_AATP_QTY,
SUM(REGION_AATP_QTY) AS AATP_QTY,
SUM(SUM(CASE WHEN R.BUCKET_DATE <= ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)) THEN REGION_AATP_QTY ELSE 0 END))
OVER(PARTITION BY PS_SKU_NUMBER) AS AATP_QTY_HIS,
0 AS SHIPMENT_QTY,
0 AS SHIPMENT_QTY_HIS,
0 AS INV_QTY,
0 AS WIP_QTY
FROM DMISC.DM_PLAN_FCST_AATP_GAP_F R
INNER JOIN DMISC.DM_DIM_PLAN_LOOKUP_D D
ON R.IPMT_CODE = D.LOOKUP_CODE
INNER JOIN (
SELECT CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') = ADDDATE(TO_DATE(20190429,'YYYYMMDD'),-7)
AND D.PERIOD_ST_DATE <= ADDDATE(TO_DATE(20190528,'YYYYMMDD'),6)
AND D.DAY_OF_WEEK = 4
GROUP BY D.PERIOD_ST_DATE,
CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') < TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))
WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9)) = TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-2),'YYYYMMDD'))
ELSE 20231117
END
) T
ON R.DATA_VERSION = T.PLAN_ID
WHERE PLAN_DATE_TYPE = 'DY'
AND R.REGION_CN_NAME = '大中华终端业务部'
AND R.PS_SKU_NUMBER IS NOT NULL
--AND R.PS_SKU_NUMBER ''
AND R.BUCKET_DATE >= TO_DATE(T.BUCKET_DATE_START,'YYYYMMDD')
AND R.BUCKET_DATE 1
THEN CAST(TO_CHAR(R.BUCKET_DATE,'YYYYMMDD') AS DECIMAL(28,0))
ELSE CAST(TO_CHAR(ADDDATE(R.BUCKET_DATE,-WEEKDAY(R.BUCKET_DATE)),'YYYYMMDD') AS DECIMAL(28,0)) END AS PERIOD_ID,
R.PS_SKU_NUMBER,
SUM(REGION_AATP_QTY) AS CURR_AATP_QTY,
0 AS AATP_QTY,
0 AS AATP_QTY_HIS,
0 AS SHIPMENT_QTY,
0 AS SHIPMENT_QTY_HIS,
0 AS INV_QTY,
0 AS WIP_QTY
FROM DMISC.DM_PLAN_FCST_AATP_GAP_F R
INNER JOIN DMISC.DM_DIM_PLAN_LOOKUP_D D
ON R.IPMT_CODE = D.LOOKUP_CODE
INNER JOIN (
SELECT CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') = ADDDATE(TO_DATE('20190429'),0)
AND D.PERIOD_ST_DATE <= ADDDATE(TO_DATE('20190528'),6)
AND D.DAY_OF_WEEK = 4
GROUP BY D.PERIOD_ST_DATE,
CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') < TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))
WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9)) = TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))
ELSE 20231117
END
) T
ON R.DATA_VERSION = T.PLAN_ID
WHERE PLAN_DATE_TYPE = 'DY'
AND R.REGION_CN_NAME = '大中华终端业务部'
AND R.PS_SKU_NUMBER IS NOT NULL
--AND R.PS_SKU_NUMBER ''
AND R.BUCKET_DATE >= TO_DATE(T.BUCKET_DATE_START)
AND R.BUCKET_DATE = ADDDATE(TO_DATE(20190429),-7)
AND R.ACTUAL_SHIPMENT_DATE = TO_DATE(20190429)
THEN CAST(TO_CHAR(R.ACTUAL_SHIPMENT_DATE,'YYYYMMDD') AS DECIMAL(28,0))
ELSE CAST(TO_CHAR(ADDDATE(R.ACTUAL_SHIPMENT_DATE,-WEEKDAY(R.ACTUAL_SHIPMENT_DATE)),'YYYYMMDD') AS DECIMAL(28,0))
END AS PERIOD_ID,
PS.PS_SKU_NUMBER,
SHIPMENT_QTY
FROM DMISC.DM_OM_SHIPMENT_DTL_F R
INNER JOIN DMISC.DM_DIM_CBG_PS_SKU_REL_R PS
ON R.ITEM_CODE = PS.ITEM_NUMBER
WHERE R.ACTUAL_SHIPMENT_DATE >= ADDDATE(TO_DATE(20190429,'YYYYMMDD'),-7)
AND R.ACTUAL_SHIPMENT_DATE 0 )
GROUP BY PERIOD_MONTH,
PERIOD_ID,
PS_SKU_NUMBER
UNION ALL
SELECT CAST(TO_CHAR(ADDDATE(ADDDATE(R.TRANSACTION_DATE,-WEEKDAY(R.TRANSACTION_DATE)),3),'YYYYMM') AS DECIMAL(28,0)) AS PERIOD_MONTH,
CASE WHEN 'WK' = 'DY'
THEN CAST(TO_CHAR(R.TRANSACTION_DATE,'YYYYMMDD') AS DECIMAL(28,0))
ELSE CAST(TO_CHAR(ADDDATE(R.TRANSACTION_DATE,-WEEKDAY(R.TRANSACTION_DATE)),'YYYYMMDD') AS DECIMAL(28,0))
END AS PERIOD_ID,
PS.PS_SKU_NUMBER,
0 AS CURR_AATP_QTY,
0 AS AATP_QTY,
0 AS AATP_QTY_HIS,
0 AS SHIPMENT_QTY,
0 AS SHIPMENT_QTY_HIS,
0 AS INV_QTY,
SUM(TRANSACTION_QTY) AS WIP_QTY
FROM DMISC.DM_MFG_WIP_EXECUTION_DTL_F R
INNER JOIN DMISC.DM_DIM_CBG_PS_SKU_REL_R PS
ON R.ITEM_CODE = PS.ITEM_NUMBER
INNER JOIN (
SELECT DISTINCT R.PS_SKU_NUMBER
FROM DMISC.DM_PLAN_FCST_AATP_GAP_F R
INNER JOIN DMISC.DM_DIM_PLAN_LOOKUP_D D
ON R.IPMT_CODE = D.LOOKUP_CODE
INNER JOIN (
SELECT CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') = ADDDATE(TO_DATE(20190429,'YYYYMMDD'),-7)
AND D.PERIOD_ST_DATE <= ADDDATE(TO_DATE(20190528,'YYYYMMDD'),6)
AND D.DAY_OF_WEEK = 4
GROUP BY D.PERIOD_ST_DATE,
CASE WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD') < TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9),'YYYYMMDD'))
WHEN TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-9)) = TO_CHAR(ADDDATE(CURRENT_DATE,-WEEKDAY(CURRENT_DATE)),'YYYYMMDD')
THEN TO_NUMBER(TO_CHAR(ADDDATE(D.PERIOD_ST_DATE,-2),'YYYYMMDD'))
ELSE 20231117
END
) T
ON R.DATA_VERSION = T.PLAN_ID
WHERE PLAN_DATE_TYPE = 'DY'
AND R.REGION_CN_NAME = '大中华终端业务部'
AND R.PS_SKU_NUMBER IS NOT NULL
--AND R.PS_SKU_NUMBER ''
AND R.BUCKET_DATE >= TO_DATE(T.BUCKET_DATE_START)
AND R.BUCKET_DATE = TO_DATE(20190429)
AND R.TRANSACTION_DATE 0)
AND ('ALL' = 'ALL' OR INSTR('ALL'||',',PR.PROD_COLOUR||',')>0)
AND ('ALL' = 'ALL' OR INSTR('ALL'||',',R.PS_SKU_NUMBER||',')>0)
AND ('ALL' = 'ALL' OR INSTR('ALL'||',',PR.PROD_COORDINATOR||',')>0)
AND 1=1
AND (1=1)
AND (1=1)
AND 1=1 AND NVL(BOM_TYPE,'@@') 'ACC'
GROUP BY PERIOD_MONTH,PERIOD_ID,
PR.BIG_MODEL_EXTERNAL,
PR.PROD_BIG_MODEL,
PR.PROD_EN_NAME,
PR.PROD_SPEC,
PR.PROD_COORDINATOR,
PR.PS_SKU_NUMBER,
PR.SKU_NAME,
PR.PROD_COLOUR
)
WHERE AATP_BIG_MODEL_EXTERNAL IS NOT NULL
GROUP BY PERIOD_MONTH,PERIOD_ID,
AATP_BIG_MODEL_EXTERNAL
3、【性能分析】
上图是原始SQL语句的performance执行计划(具体计划放在附件一),从中可以看出,该计划存在两个问题:
SQL语句未向量化执行
r表与d表关联慢
问题1可以通过落临时表来降低不可向量化的范围(create temp table)https://support.huaweicloud.com/sqlreference-dws/dws_06_0177.html
本篇博文重点介绍如何解决问题2,从计划中可以看出,r表作为大表与d表关联后结果集数据量不大,此时可以借助Bloom Filter,利用d表关联条件对r表进行提前过滤,从而减少关联时r表的数据量,降低关联时耗费的时间。
Bloom Filter使用方法(对大表操作)
关联条件中若存在NUMERIC数据类型,改为int/bigint
关联条件中数据类型若为text,则需要先重新创建该表,创建时在该关联条件后加上COLLATE “C”
数据类型没问题后,再将该关联条件设置为PCK,能够加速过滤
开启Bloom Filter的标志
优化后,执行计划如下所示(完整performance放在附件二中):
performance-优化前.txt
1.68MB
下载次数:1次
performance-优化后.txt
1.47MB
下载次数:1次
附件下载
performance-优化前.txt
1.68MB
下载次数:1次
performance-优化后.txt
1.47MB
下载次数:1次文章来源:华为云社区